88 lines
3.1 KiB
Python
Raw Normal View History

2024-12-22 19:46:53 -05:00
from typing import Dict, Any, List, Literal, Optional
from pydantic import BaseModel, Field, field_validator
class Document(BaseModel):
"""Document metadata model"""
2024-12-22 19:46:53 -05:00
external_id: str = Field(..., description="Unique document identifier")
content_type: str = Field(..., description="Content type of the document")
filename: Optional[str] = Field(None, description="Original filename if available")
metadata: Dict[str, Any] = Field(
default_factory=dict, description="User-defined metadata"
)
storage_info: Dict[str, str] = Field(
default_factory=dict, description="Storage-related information"
)
system_metadata: Dict[str, Any] = Field(
default_factory=dict, description="System-managed metadata"
)
access_control: Dict[str, Any] = Field(
default_factory=dict, description="Access control information"
)
chunk_ids: List[str] = Field(
default_factory=list, description="IDs of document chunks"
)
2024-12-22 19:46:53 -05:00
class IngestTextRequest(BaseModel):
"""Request model for text ingestion"""
2024-12-22 19:46:53 -05:00
content: str = Field(..., description="Text content to ingest")
metadata: Dict[str, Any] = Field(
default_factory=dict, description="Optional metadata"
)
2024-12-22 19:46:53 -05:00
class ChunkResult(BaseModel):
"""Query result at chunk level"""
2024-12-22 19:46:53 -05:00
content: str = Field(..., description="Chunk content")
score: float = Field(..., description="Relevance score")
document_id: str = Field(..., description="Parent document ID")
chunk_number: int = Field(..., description="Chunk sequence number")
metadata: Dict[str, Any] = Field(
default_factory=dict, description="Document metadata"
)
2024-12-22 19:46:53 -05:00
content_type: str = Field(..., description="Content type")
filename: Optional[str] = Field(None, description="Original filename")
download_url: Optional[str] = Field(
None, description="URL to download full document"
)
2024-12-22 19:46:53 -05:00
class DocumentContent(BaseModel):
"""Represents either a URL or content string"""
type: Literal["url", "string"] = Field(
..., description="Content type (url or string)"
)
2024-12-22 19:46:53 -05:00
value: str = Field(..., description="The actual content or URL")
filename: Optional[str] = Field(None, description="Filename when type is url")
@field_validator("filename")
2024-12-22 19:46:53 -05:00
def filename_only_for_url(cls, v, values):
if values.data.get("type") == "string" and v is not None:
raise ValueError("filename can only be set when type is url")
if values.data.get("type") == "url" and v is None:
raise ValueError("filename is required when type is url")
2024-12-22 19:46:53 -05:00
return v
class DocumentResult(BaseModel):
"""Query result at document level"""
2024-12-22 19:46:53 -05:00
score: float = Field(..., description="Relevance score")
document_id: str = Field(..., description="Document ID")
metadata: Dict[str, Any] = Field(
default_factory=dict, description="Document metadata"
)
2024-12-22 19:46:53 -05:00
content: DocumentContent = Field(..., description="Document content or URL")
2024-12-26 08:52:25 -05:00
class CompletionResponse(BaseModel):
"""Completion response model"""
2024-12-26 08:52:25 -05:00
completion: str
usage: Dict[str, int]