diff --git a/core/api.py b/core/api.py index 20b961c..5fea590 100644 --- a/core/api.py +++ b/core/api.py @@ -64,8 +64,15 @@ async def readiness_check(): # Initialize telemetry telemetry = TelemetryService() -# Add OpenTelemetry instrumentation -FastAPIInstrumentor.instrument_app(app) +# Add OpenTelemetry instrumentation - exclude HTTP send/receive spans +FastAPIInstrumentor.instrument_app( + app, + excluded_urls="health,health/.*", # Exclude health check endpoints + exclude_spans=["send", "receive"], # Exclude HTTP send/receive spans to reduce telemetry volume + http_capture_headers_server_request=None, # Don't capture request headers + http_capture_headers_server_response=None, # Don't capture response headers + tracer_provider=None # Use the global tracer provider +) # Add CORS middleware app.add_middleware( diff --git a/core/database/postgres_database.py b/core/database/postgres_database.py index 903ca29..6b1d1a3 100644 --- a/core/database/postgres_database.py +++ b/core/database/postgres_database.py @@ -750,16 +750,36 @@ class PostgresDatabase(BaseDatabase): filter_conditions = [] for key, value in filters.items(): - # Convert boolean values to string 'true' or 'false' - if isinstance(value, bool): - value = str(value).lower() - - # Use proper SQL escaping for string values - if isinstance(value, str): - # Replace single quotes with double single quotes to escape them - value = value.replace("'", "''") - - filter_conditions.append(f"doc_metadata->>'{key}' = '{value}'") + # Handle list of values (IN operator) + if isinstance(value, list): + if not value: # Skip empty lists + continue + + # Build a list of properly escaped values + escaped_values = [] + for item in value: + if isinstance(item, bool): + escaped_values.append(str(item).lower()) + elif isinstance(item, str): + escaped_values.append(f"'{item.replace('\'', '\'\'')}'") + else: + escaped_values.append(f"'{item}'") + + # Join with commas for IN clause + values_str = ", ".join(escaped_values) + filter_conditions.append(f"doc_metadata->>'{key}' IN ({values_str})") + else: + # Handle single value (equality) + # Convert boolean values to string 'true' or 'false' + if isinstance(value, bool): + value = str(value).lower() + + # Use proper SQL escaping for string values + if isinstance(value, str): + # Replace single quotes with double single quotes to escape them + value = value.replace("'", "''") + + filter_conditions.append(f"doc_metadata->>'{key}' = '{value}'") return " AND ".join(filter_conditions) @@ -773,12 +793,34 @@ class PostgresDatabase(BaseDatabase): if value is None: continue - if isinstance(value, str): - # Replace single quotes with double single quotes to escape them - escaped_value = value.replace("'", "''") - conditions.append(f"system_metadata->>'{key}' = '{escaped_value}'") + # Handle list of values (IN operator) + if isinstance(value, list): + if not value: # Skip empty lists + continue + + # Build a list of properly escaped values + escaped_values = [] + for item in value: + if isinstance(item, bool): + escaped_values.append(str(item).lower()) + elif isinstance(item, str): + escaped_values.append(f"'{item.replace('\'', '\'\'')}'") + else: + escaped_values.append(f"'{item}'") + + # Join with commas for IN clause + values_str = ", ".join(escaped_values) + conditions.append(f"system_metadata->>'{key}' IN ({values_str})") else: - conditions.append(f"system_metadata->>'{key}' = '{value}'") + # Handle single value (equality) + if isinstance(value, str): + # Replace single quotes with double single quotes to escape them + escaped_value = value.replace("'", "''") + conditions.append(f"system_metadata->>'{key}' = '{escaped_value}'") + elif isinstance(value, bool): + conditions.append(f"system_metadata->>'{key}' = '{str(value).lower()}'") + else: + conditions.append(f"system_metadata->>'{key}' = '{value}'") return " AND ".join(conditions) diff --git a/core/services/document_service.py b/core/services/document_service.py index ad890b1..48b7161 100644 --- a/core/services/document_service.py +++ b/core/services/document_service.py @@ -1409,6 +1409,9 @@ class DocumentService: # Update metadata if provided - additive but replacing existing keys if metadata: doc.metadata.update(metadata) + + # Ensure external_id is preserved in metadata + doc.metadata["external_id"] = doc.external_id # Increment version current_version = doc.system_metadata.get("version", 1) diff --git a/core/workers/ingestion_worker.py b/core/workers/ingestion_worker.py index 4ac6352..b4a658e 100644 --- a/core/workers/ingestion_worker.py +++ b/core/workers/ingestion_worker.py @@ -171,8 +171,13 @@ async def process_ingestion_job( raise ValueError(f"Document {document_id} not found in database after multiple retries") # Prepare updates for the document + # Merge new metadata with existing metadata to preserve external_id + merged_metadata = {**doc.metadata, **metadata} + # Make sure external_id is preserved in the metadata + merged_metadata["external_id"] = doc.external_id + updates = { - "metadata": metadata, + "metadata": merged_metadata, "additional_metadata": additional_metadata, "system_metadata": {**doc.system_metadata, "content": text} } diff --git a/ui-component/app/page.tsx b/ui-component/app/page.tsx index 8594b85..2ae9d4d 100644 --- a/ui-component/app/page.tsx +++ b/ui-component/app/page.tsx @@ -7,8 +7,9 @@ import { useSearchParams } from 'next/navigation'; function HomeContent() { const searchParams = useSearchParams(); const folderParam = searchParams.get('folder'); + const sectionParam = searchParams.get('section'); - return ; + return ; } export default function Home() { diff --git a/ui-component/components/MorphikUI.tsx b/ui-component/components/MorphikUI.tsx index cf996bb..7afb9dd 100644 --- a/ui-component/components/MorphikUI.tsx +++ b/ui-component/components/MorphikUI.tsx @@ -22,7 +22,8 @@ const MorphikUI: React.FC = ({ isReadOnlyUri = false, // Default to editable URI onUriChange, onBackClick, - initialFolder = null + initialFolder = null, + initialSection = 'documents' }) => { // State to manage connectionUri internally if needed const [currentUri, setCurrentUri] = useState(connectionUri); @@ -40,7 +41,7 @@ const MorphikUI: React.FC = ({ onUriChange(newUri); } }; - const [activeSection, setActiveSection] = useState('documents'); + const [activeSection, setActiveSection] = useState(initialSection); const [selectedGraphName, setSelectedGraphName] = useState(undefined); const [isSidebarCollapsed, setIsSidebarCollapsed] = useState(false); diff --git a/ui-component/components/chat/ChatMessage.tsx b/ui-component/components/chat/ChatMessage.tsx index c571099..5da070f 100644 --- a/ui-component/components/chat/ChatMessage.tsx +++ b/ui-component/components/chat/ChatMessage.tsx @@ -1,14 +1,54 @@ "use client"; import React from 'react'; +import Image from 'next/image'; +import { Accordion, AccordionContent, AccordionItem, AccordionTrigger } from '@/components/ui/accordion'; +import { Badge } from '@/components/ui/badge'; +import { Source } from '@/components/types'; // Define our own props interface to avoid empty interface error interface ChatMessageProps { role: 'user' | 'assistant'; content: string; + sources?: Source[]; } -const ChatMessageComponent: React.FC = ({ role, content }) => { +const ChatMessageComponent: React.FC = ({ role, content, sources }) => { + // Helper to render content based on content type + const renderContent = (content: string, contentType: string) => { + if (contentType.startsWith('image/')) { + return ( +
+ Document content +
+ ); + } else if (content.startsWith('data:image/png;base64,') || content.startsWith('data:image/jpeg;base64,')) { + return ( +
+ Base64 image content +
+ ); + } else { + return ( +
+ {content} +
+ ); + } + }; + return (
= ({ role, content }) => }`} >
{content}
+ + {sources && sources.length > 0 && role === 'assistant' && ( + + + Sources ({sources.length}) + +
+ {sources.map((source, index) => ( +
+
+
+
+ + {source.filename || `Document ${source.document_id.substring(0, 8)}...`} + + + Chunk {source.chunk_number} {source.score !== undefined && `• Score: ${source.score.toFixed(2)}`} + +
+ {source.content_type && ( + + {source.content_type} + + )} +
+
+ + {source.content && ( + renderContent(source.content, source.content_type || 'text/plain') + )} + + + + Metadata + +
+                              {JSON.stringify(source.metadata, null, 2)}
+                            
+
+
+
+
+ ))} +
+
+
+
+ )}
); diff --git a/ui-component/components/chat/ChatSection.tsx b/ui-component/components/chat/ChatSection.tsx index 3f28aed..4cd066f 100644 --- a/ui-component/components/chat/ChatSection.tsx +++ b/ui-component/components/chat/ChatSection.tsx @@ -10,7 +10,7 @@ import { showAlert } from '@/components/ui/alert-system'; import ChatOptionsDialog from './ChatOptionsDialog'; import ChatMessageComponent from './ChatMessage'; -import { ChatMessage, QueryOptions, Folder } from '@/components/types'; +import { ChatMessage, QueryOptions, Folder, Source } from '@/components/types'; interface ChatSectionProps { apiBaseUrl: string; @@ -33,6 +33,82 @@ const ChatSection: React.FC = ({ apiBaseUrl, authToken }) => { max_tokens: 500, temperature: 0.7 }); + + // Handle URL parameters for folder and filters + useEffect(() => { + if (typeof window !== 'undefined') { + const params = new URLSearchParams(window.location.search); + const folderParam = params.get('folder'); + const filtersParam = params.get('filters'); + const documentIdsParam = params.get('document_ids'); + + let shouldShowChatOptions = false; + + // Update folder if provided + if (folderParam) { + try { + const folderName = decodeURIComponent(folderParam); + if (folderName) { + console.log(`Setting folder from URL parameter: ${folderName}`); + updateQueryOption('folder_name', folderName); + shouldShowChatOptions = true; + } + } catch (error) { + console.error('Error parsing folder parameter:', error); + } + } + + // Handle document_ids (selected documents) parameter - for backward compatibility + if (documentIdsParam) { + try { + const documentIdsJson = decodeURIComponent(documentIdsParam); + const documentIds = JSON.parse(documentIdsJson); + + // Create a filter object with external_id filter (correct field name) + const filtersObj = { external_id: documentIds }; + const validFiltersJson = JSON.stringify(filtersObj); + + console.log(`Setting document_ids filter from URL parameter:`, filtersObj); + updateQueryOption('filters', validFiltersJson); + shouldShowChatOptions = true; + } catch (error) { + console.error('Error parsing document_ids parameter:', error); + } + } + // Handle general filters parameter + if (filtersParam) { + try { + const filtersJson = decodeURIComponent(filtersParam); + // Parse the JSON to confirm it's valid + const filtersObj = JSON.parse(filtersJson); + + console.log(`Setting filters from URL parameter:`, filtersObj); + + // Store the filters directly as a JSON string + updateQueryOption('filters', filtersJson); + shouldShowChatOptions = true; + + // Log a more helpful message about what's happening + if (filtersObj.external_id) { + console.log(`Chat will filter by ${Array.isArray(filtersObj.external_id) ? filtersObj.external_id.length : 1} document(s)`); + } + } catch (error) { + console.error('Error parsing filters parameter:', error); + } + } + + // Only show the chat options panel on initial parameter load + if (shouldShowChatOptions) { + setShowChatAdvanced(true); + + // Clear URL parameters after processing them to prevent modal from re-appearing on refresh + if (window.history.replaceState) { + const newUrl = window.location.pathname + window.location.hash; + window.history.replaceState({}, document.title, newUrl); + } + } + } + }, []); // Update query options const updateQueryOption = (key: K, value: QueryOptions[K]) => { @@ -141,8 +217,59 @@ const ChatSection: React.FC = ({ apiBaseUrl, authToken }) => { const data = await response.json(); // Add assistant response to chat - const assistantMessage: ChatMessage = { role: 'assistant', content: data.completion }; + const assistantMessage: ChatMessage = { + role: 'assistant', + content: data.completion, + sources: data.sources + }; setChatMessages(prev => [...prev, assistantMessage]); + + // If sources are available, retrieve the full source content + if (data.sources && data.sources.length > 0) { + try { + // Fetch full source details + const sourcesResponse = await fetch(`${apiBaseUrl}/batch/chunks`, { + method: 'POST', + headers: { + 'Authorization': authToken ? `Bearer ${authToken}` : '', + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + sources: data.sources, + folder_name: queryOptions.folder_name + }) + }); + + if (sourcesResponse.ok) { + const sourcesData = await sourcesResponse.json(); + + // Process source data + + // Update the message with detailed source information + const updatedMessage = { + ...assistantMessage, + sources: sourcesData.map((source: Source) => ({ + document_id: source.document_id, + chunk_number: source.chunk_number, + score: source.score, + content: source.content, + content_type: source.content_type || 'text/plain', + filename: source.filename, + metadata: source.metadata, + download_url: source.download_url + })) + }; + + // Update the message with detailed sources + setChatMessages(prev => prev.map((msg, idx) => + idx === prev.length - 1 ? updatedMessage : msg + )); + } + } catch (err) { + console.error('Error fetching source details:', err); + // Continue with basic sources if detailed fetch fails + } + } setChatQuery(''); // Clear input } catch (err) { const errorMsg = err instanceof Error ? err.message : 'An unknown error occurred'; @@ -173,6 +300,7 @@ const ChatSection: React.FC = ({ apiBaseUrl, authToken }) => { key={index} role={message.role} content={message.content} + sources={message.sources} /> ))} diff --git a/ui-component/components/documents/DocumentList.tsx b/ui-component/components/documents/DocumentList.tsx index ef60426..b738aa7 100644 --- a/ui-component/components/documents/DocumentList.tsx +++ b/ui-component/components/documents/DocumentList.tsx @@ -1,6 +1,6 @@ "use client"; -import React, { useState } from 'react'; +import React, { useState, useEffect } from 'react'; import { Checkbox } from "@/components/ui/checkbox"; import { Badge } from '@/components/ui/badge'; import { ScrollArea } from '@/components/ui/scroll-area'; @@ -8,7 +8,7 @@ import { Button } from '@/components/ui/button'; import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from '@/components/ui/dialog'; import { Input } from '@/components/ui/input'; import { Textarea } from '@/components/ui/textarea'; -import { Plus, Wand2, Upload } from 'lucide-react'; +import { Plus, Wand2, Upload, Filter } from 'lucide-react'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"; import { showAlert } from '@/components/ui/alert-system'; @@ -43,6 +43,76 @@ interface DocumentListProps { selectedFolder?: string | null; } +// Filter Dialog Component +const FilterDialog = ({ + isOpen, + onClose, + columns, + filterValues, + setFilterValues +}: { + isOpen: boolean; + onClose: () => void; + columns: CustomColumn[]; + filterValues: Record; + setFilterValues: React.Dispatch>>; +}) => { + const [localFilters, setLocalFilters] = useState>(filterValues); + + const handleApplyFilters = () => { + setFilterValues(localFilters); + onClose(); + }; + + const handleClearFilters = () => { + setLocalFilters({}); + setFilterValues({}); + onClose(); + }; + + const handleFilterChange = (column: string, value: string) => { + setLocalFilters(prev => ({ + ...prev, + [column]: value + })); + }; + + return ( + !open && onClose()}> + e.preventDefault()}> + + Filter Documents + + Filter documents by their metadata values + + +
+ {columns.map(column => ( +
+ + handleFilterChange(column.name, e.target.value)} + /> +
+ ))} +
+ + +
+ + +
+
+
+
+ ); +}; + // Create a separate Column Dialog component to isolate its state const AddColumnDialog = ({ isOpen, @@ -198,6 +268,9 @@ const DocumentList: React.FC = ({ const [customColumns, setCustomColumns] = useState([]); const [showAddColumnDialog, setShowAddColumnDialog] = useState(false); const [isExtracting, setIsExtracting] = useState(false); + const [showFilterDialog, setShowFilterDialog] = useState(false); + const [filterValues, setFilterValues] = useState>({}); + const [filteredDocuments, setFilteredDocuments] = useState([]); // Get unique metadata fields from all documents const existingMetadataFields = React.useMemo(() => { @@ -209,6 +282,29 @@ const DocumentList: React.FC = ({ }); return Array.from(fields); }, [documents]); + + // Apply filter logic + useEffect(() => { + if (Object.keys(filterValues).length === 0) { + setFilteredDocuments(documents); + return; + } + + const filtered = documents.filter(doc => { + // Check if document matches all filter criteria + return Object.entries(filterValues).every(([key, value]) => { + if (!value || value.trim() === '') return true; // Skip empty filters + + const docValue = doc.metadata?.[key]; + if (docValue === undefined) return false; + + // String comparison (case-insensitive) + return String(docValue).toLowerCase().includes(value.toLowerCase()); + }); + }); + + setFilteredDocuments(filtered); + }, [documents, filterValues]); // Combine existing metadata fields with custom columns const allColumns = React.useMemo(() => { @@ -237,6 +333,7 @@ const DocumentList: React.FC = ({ }; // Handle data extraction + const handleExtract = async () => { // First, find the folder object to get its ID if (!selectedFolder || customColumns.length === 0) { @@ -408,46 +505,34 @@ const DocumentList: React.FC = ({ } }; - const DocumentListHeader = () => ( -
-
'140px').join(' ')}` - }}> -
- { - if (checked) { - setSelectedDocuments(documents.map(doc => doc.external_id)); - } else { - setSelectedDocuments([]); - } - }} - aria-label="Select all documents" - /> -
-
Filename
-
Type
-
-
- Status - - - - - - - -
- Documents with "Processing" status are queryable, but visual features like direct visual context will only be available after processing completes. -
+ // Calculate how many filters are currently active + const activeFilterCount = Object.values(filterValues).filter(v => v && v.trim() !== '').length; + + const DocumentListHeader = () => { + return ( +
+
'140px').join(' ')}` + }}> +
+ { + if (checked) { + setSelectedDocuments(documents.map(doc => doc.external_id)); + } else { + setSelectedDocuments([]); + } + }} + aria-label="Select all documents" + />
-
- {allColumns.map((column) => ( -
+
Filename
+
Type
+
- {column.name} + Status @@ -456,38 +541,50 @@ const DocumentList: React.FC = ({
-

{column.description}

-

Type: {column._type}

- {column.schema && ( -

Schema provided

- )} + Documents with "Processing" status are queryable, but visual features like direct visual context will only be available after processing completes.
- ))} -
- -
- + {allColumns.map((column) => ( +
+
+ {column.name} + + + + + + + +
+

{column.description}

+

Type: {column._type}

+ {column.schema && ( +

Schema provided

+ )} +
+
+
+ ))} +
- {/* Render the dialog separately */} + {/* Render dialogs separately */} setShowAddColumnDialog(false)} onAddColumn={handleAddColumn} /> + + setShowFilterDialog(false)} + columns={allColumns} + filterValues={filterValues} + setFilterValues={setFilterValues} + />
-
- ); + ); + }; if (loading && !documents.length) { return ( @@ -507,7 +604,7 @@ const DocumentList: React.FC = ({
- {documents.map((doc) => ( + {filteredDocuments.map((doc) => (
handleDocumentClick(doc)} @@ -569,6 +666,24 @@ const DocumentList: React.FC = ({
))} + {filteredDocuments.length === 0 && documents.length > 0 && ( +
+
+ +
+

+ No documents match the current filters. +

+ +
+ )} + {documents.length === 0 && (
@@ -588,18 +703,66 @@ const DocumentList: React.FC = ({ )} - {customColumns.length > 0 && ( -
- +
+ {/* Filter stats */} +
+ {Object.keys(filterValues).length > 0 ? ( +
+ + + {filteredDocuments.length} of {documents.length} documents + {Object.keys(filterValues).length > 0 && ( + + )} + +
+ ) : null}
- )} + + {/* Action buttons */} +
+ {/* Filter button */} + + + {/* Add column button */} + + + {customColumns.length > 0 && selectedFolder && ( + + )} +
+
); }; diff --git a/ui-component/components/documents/DocumentsSection.tsx b/ui-component/components/documents/DocumentsSection.tsx index c224717..d4da950 100644 --- a/ui-component/components/documents/DocumentsSection.tsx +++ b/ui-component/components/documents/DocumentsSection.tsx @@ -1326,14 +1326,7 @@ const DocumentsSection: React.FC = ({ /> )} - {documents.length === 0 && !loading && folders.length === 0 && !foldersLoading ? ( -
-
- -

No documents found. Upload your first document.

-
-
- ) : selectedFolder && documents.length === 0 && !loading ? ( + {selectedFolder && documents.length === 0 && !loading ? (
diff --git a/ui-component/components/documents/FolderList.tsx b/ui-component/components/documents/FolderList.tsx index c7a1f32..d57bcb8 100644 --- a/ui-component/components/documents/FolderList.tsx +++ b/ui-component/components/documents/FolderList.tsx @@ -2,7 +2,7 @@ import React from 'react'; import { Button } from '@/components/ui/button'; -import { PlusCircle, ArrowLeft } from 'lucide-react'; +import { PlusCircle, ArrowLeft, MessageSquare } from 'lucide-react'; import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle, DialogTrigger } from '@/components/ui/dialog'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; @@ -131,15 +131,51 @@ const FolderList: React.FC = ({
- {/* Show delete button if documents are selected */} - {selectedDocuments.length > 0 && handleDeleteMultipleDocuments && ( - + {/* Show action buttons if documents are selected */} + {selectedDocuments && selectedDocuments.length > 0 && ( +
+ {/* Chat with selected button */} + + + {/* Delete button */} + {handleDeleteMultipleDocuments && ( + + )} +
)}
diff --git a/ui-component/components/search/SearchResultCard.tsx b/ui-component/components/search/SearchResultCard.tsx index 816c7ab..3f3491b 100644 --- a/ui-component/components/search/SearchResultCard.tsx +++ b/ui-component/components/search/SearchResultCard.tsx @@ -83,4 +83,4 @@ const SearchResultCard: React.FC = ({ result }) => { ); }; -export default SearchResultCard; \ No newline at end of file +export default SearchResultCard; diff --git a/ui-component/components/types.ts b/ui-component/components/types.ts index 89c8b33..bded248 100644 --- a/ui-component/components/types.ts +++ b/ui-component/components/types.ts @@ -8,6 +8,7 @@ export interface MorphikUIProps { onBackClick?: () => void; // Callback when back button is clicked appName?: string; // Name of the app to display in UI initialFolder?: string | null; // Initial folder to show + initialSection?: string; // Initial section to show (documents, search, chat, etc.) } export interface Document { @@ -41,9 +42,21 @@ export interface SearchResult { metadata: Record; } +export interface Source { + document_id: string; + chunk_number: number; + score?: number; + filename?: string; + content?: string; + content_type?: string; + metadata?: Record; + download_url?: string; +} + export interface ChatMessage { role: 'user' | 'assistant'; content: string; + sources?: Source[]; } export interface SearchOptions {