add filename option for text documents (#47)

This commit is contained in:
LukeZekes 2025-03-05 09:56:02 -06:00 committed by GitHub
parent 186c76a799
commit e56691a1c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 8 additions and 1 deletions

View File

@ -261,6 +261,7 @@ async def ingest_text(
Args:
request: IngestTextRequest containing:
- content: Text content to ingest
- filename: Optional filename to help determine content type
- metadata: Optional metadata dictionary
- rules: Optional list of rules. Each rule should be either:
- MetadataExtractionRule: {"type": "metadata_extraction", "schema": {...}}
@ -283,6 +284,7 @@ async def ingest_text(
):
return await document_service.ingest_text(
content=request.content,
filename=request.filename,
metadata=request.metadata,
rules=request.rules,
use_colpali=request.use_colpali,

View File

@ -24,6 +24,7 @@ class IngestTextRequest(BaseModel):
"""Request model for ingesting text content"""
content: str
filename: Optional[str] = None
metadata: Dict[str, Any] = Field(default_factory=dict)
rules: List[Dict[str, Any]] = Field(default_factory=list)
use_colpali: Optional[bool] = None

View File

@ -36,7 +36,6 @@ import os
logger = logging.getLogger(__name__)
IMAGE = {im.mime for im in IMAGE}
class DocumentService:
def __init__(
self,
@ -185,6 +184,7 @@ class DocumentService:
async def ingest_text(
self,
content: str,
filename: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
auth: AuthContext = None,
rules: Optional[List[str]] = None,
@ -197,6 +197,7 @@ class DocumentService:
doc = Document(
content_type="text/plain",
filename=filename,
metadata=metadata or {},
owner={"type": auth.entity_type, "id": auth.entity_id},
access_control={

View File

@ -70,6 +70,7 @@ class IngestTextRequest(BaseModel):
"""Request model for ingesting text content"""
content: str
filename: Optional[str] = None
metadata: Dict[str, Any] = Field(default_factory=dict)
rules: List[Dict[str, Any]] = Field(default_factory=list)
use_colpali: bool = Field(default=False)

View File

@ -150,6 +150,7 @@ class DataBridge:
def ingest_text(
self,
content: str,
filename: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
rules: Optional[List[RuleOrDict]] = None,
use_colpali: bool = True,
@ -191,6 +192,7 @@ class DataBridge:
"""
request = IngestTextRequest(
content=content,
filename=filename,
metadata=metadata or {},
rules=[self._convert_rule(r) for r in (rules or [])],
use_colpali=use_colpali,