mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
add filename option for text documents (#47)
This commit is contained in:
parent
186c76a799
commit
e56691a1c5
@ -261,6 +261,7 @@ async def ingest_text(
|
||||
Args:
|
||||
request: IngestTextRequest containing:
|
||||
- content: Text content to ingest
|
||||
- filename: Optional filename to help determine content type
|
||||
- metadata: Optional metadata dictionary
|
||||
- rules: Optional list of rules. Each rule should be either:
|
||||
- MetadataExtractionRule: {"type": "metadata_extraction", "schema": {...}}
|
||||
@ -283,6 +284,7 @@ async def ingest_text(
|
||||
):
|
||||
return await document_service.ingest_text(
|
||||
content=request.content,
|
||||
filename=request.filename,
|
||||
metadata=request.metadata,
|
||||
rules=request.rules,
|
||||
use_colpali=request.use_colpali,
|
||||
|
@ -24,6 +24,7 @@ class IngestTextRequest(BaseModel):
|
||||
"""Request model for ingesting text content"""
|
||||
|
||||
content: str
|
||||
filename: Optional[str] = None
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
rules: List[Dict[str, Any]] = Field(default_factory=list)
|
||||
use_colpali: Optional[bool] = None
|
||||
|
@ -36,7 +36,6 @@ import os
|
||||
logger = logging.getLogger(__name__)
|
||||
IMAGE = {im.mime for im in IMAGE}
|
||||
|
||||
|
||||
class DocumentService:
|
||||
def __init__(
|
||||
self,
|
||||
@ -185,6 +184,7 @@ class DocumentService:
|
||||
async def ingest_text(
|
||||
self,
|
||||
content: str,
|
||||
filename: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
auth: AuthContext = None,
|
||||
rules: Optional[List[str]] = None,
|
||||
@ -197,6 +197,7 @@ class DocumentService:
|
||||
|
||||
doc = Document(
|
||||
content_type="text/plain",
|
||||
filename=filename,
|
||||
metadata=metadata or {},
|
||||
owner={"type": auth.entity_type, "id": auth.entity_id},
|
||||
access_control={
|
||||
|
@ -70,6 +70,7 @@ class IngestTextRequest(BaseModel):
|
||||
"""Request model for ingesting text content"""
|
||||
|
||||
content: str
|
||||
filename: Optional[str] = None
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
rules: List[Dict[str, Any]] = Field(default_factory=list)
|
||||
use_colpali: bool = Field(default=False)
|
||||
|
@ -150,6 +150,7 @@ class DataBridge:
|
||||
def ingest_text(
|
||||
self,
|
||||
content: str,
|
||||
filename: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
rules: Optional[List[RuleOrDict]] = None,
|
||||
use_colpali: bool = True,
|
||||
@ -191,6 +192,7 @@ class DataBridge:
|
||||
"""
|
||||
request = IngestTextRequest(
|
||||
content=content,
|
||||
filename=filename,
|
||||
metadata=metadata or {},
|
||||
rules=[self._convert_rule(r) for r in (rules or [])],
|
||||
use_colpali=use_colpali,
|
||||
|
Loading…
x
Reference in New Issue
Block a user