From e56691a1c57a0341fde08ff284986074299534c4 Mon Sep 17 00:00:00 2001 From: LukeZekes <41444778+LukeZekes@users.noreply.github.com> Date: Wed, 5 Mar 2025 09:56:02 -0600 Subject: [PATCH] add filename option for text documents (#47) --- core/api.py | 2 ++ core/models/request.py | 1 + core/services/document_service.py | 3 ++- sdks/python/databridge/models.py | 1 + sdks/python/databridge/sync.py | 2 ++ 5 files changed, 8 insertions(+), 1 deletion(-) diff --git a/core/api.py b/core/api.py index c5df9fe..ab5b161 100644 --- a/core/api.py +++ b/core/api.py @@ -261,6 +261,7 @@ async def ingest_text( Args: request: IngestTextRequest containing: - content: Text content to ingest + - filename: Optional filename to help determine content type - metadata: Optional metadata dictionary - rules: Optional list of rules. Each rule should be either: - MetadataExtractionRule: {"type": "metadata_extraction", "schema": {...}} @@ -283,6 +284,7 @@ async def ingest_text( ): return await document_service.ingest_text( content=request.content, + filename=request.filename, metadata=request.metadata, rules=request.rules, use_colpali=request.use_colpali, diff --git a/core/models/request.py b/core/models/request.py index 97facc2..203c5a6 100644 --- a/core/models/request.py +++ b/core/models/request.py @@ -24,6 +24,7 @@ class IngestTextRequest(BaseModel): """Request model for ingesting text content""" content: str + filename: Optional[str] = None metadata: Dict[str, Any] = Field(default_factory=dict) rules: List[Dict[str, Any]] = Field(default_factory=list) use_colpali: Optional[bool] = None diff --git a/core/services/document_service.py b/core/services/document_service.py index 483f3f2..8a6e5d6 100644 --- a/core/services/document_service.py +++ b/core/services/document_service.py @@ -36,7 +36,6 @@ import os logger = logging.getLogger(__name__) IMAGE = {im.mime for im in IMAGE} - class DocumentService: def __init__( self, @@ -185,6 +184,7 @@ class DocumentService: async def ingest_text( self, content: str, + filename: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, auth: AuthContext = None, rules: Optional[List[str]] = None, @@ -197,6 +197,7 @@ class DocumentService: doc = Document( content_type="text/plain", + filename=filename, metadata=metadata or {}, owner={"type": auth.entity_type, "id": auth.entity_id}, access_control={ diff --git a/sdks/python/databridge/models.py b/sdks/python/databridge/models.py index cbcd8d8..6f62f96 100644 --- a/sdks/python/databridge/models.py +++ b/sdks/python/databridge/models.py @@ -70,6 +70,7 @@ class IngestTextRequest(BaseModel): """Request model for ingesting text content""" content: str + filename: Optional[str] = None metadata: Dict[str, Any] = Field(default_factory=dict) rules: List[Dict[str, Any]] = Field(default_factory=list) use_colpali: bool = Field(default=False) diff --git a/sdks/python/databridge/sync.py b/sdks/python/databridge/sync.py index 405726f..0fd751e 100644 --- a/sdks/python/databridge/sync.py +++ b/sdks/python/databridge/sync.py @@ -150,6 +150,7 @@ class DataBridge: def ingest_text( self, content: str, + filename: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, rules: Optional[List[RuleOrDict]] = None, use_colpali: bool = True, @@ -191,6 +192,7 @@ class DataBridge: """ request = IngestTextRequest( content=content, + filename=filename, metadata=metadata or {}, rules=[self._convert_rule(r) for r in (rules or [])], use_colpali=use_colpali,