254 lines
7.2 KiB
Python
Raw Normal View History

import json
2024-11-22 18:56:22 -05:00
from datetime import datetime, UTC
2024-11-28 19:09:40 -05:00
from typing import List, Optional, Union, Dict, Set
from fastapi import (
FastAPI,
Form,
HTTPException,
Depends,
Header,
APIRouter,
UploadFile
)
from fastapi.middleware.cors import CORSMiddleware
import jwt
2024-11-28 19:09:40 -05:00
import logging
from core.models.request import IngestTextRequest, QueryRequest
from core.models.documents import (
2024-11-22 18:56:22 -05:00
Document,
DocumentResult,
ChunkResult,
EntityType
)
from core.models.auth import AuthContext
from core.services.document_service import DocumentService
from core.config import get_settings
from core.database.mongo_database import MongoDatabase
from core.vector_store.mongo_vector_store import MongoDBAtlasVectorStore
from core.storage.s3_storage import S3Storage
from core.parser.unstructured_parser import UnstructuredAPIParser
from core.embedding_model.openai_embedding_model import OpenAIEmbeddingModel
from core.services.uri_service import get_uri_service
# Initialize FastAPI app
2024-11-22 18:56:22 -05:00
app = FastAPI(title="DataBridge API")
2024-11-28 19:09:40 -05:00
logger = logging.getLogger(__name__)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
2024-11-22 18:56:22 -05:00
allow_headers=["*"]
)
# Initialize service
2024-11-22 18:56:22 -05:00
settings = get_settings()
2024-11-22 18:56:22 -05:00
# Initialize components
database = MongoDatabase(
**settings.get_mongodb_settings()
)
2024-11-22 18:56:22 -05:00
vector_store = MongoDBAtlasVectorStore(
settings.MONGODB_URI,
settings.DATABRIDGE_DB,
"document_chunks",
"vector_index"
)
2024-11-18 18:41:23 -05:00
2024-11-22 18:56:22 -05:00
storage = S3Storage(
**settings.get_storage_settings()
)
2024-11-20 18:42:19 -05:00
2024-11-22 18:56:22 -05:00
parser = UnstructuredAPIParser(
api_key=settings.UNSTRUCTURED_API_KEY,
chunk_size=settings.CHUNK_SIZE,
chunk_overlap=settings.CHUNK_OVERLAP
)
2024-11-22 18:56:22 -05:00
embedding_model = OpenAIEmbeddingModel(
api_key=settings.OPENAI_API_KEY,
model_name=settings.EMBEDDING_MODEL
)
2024-11-22 18:56:22 -05:00
# Initialize document service
document_service = DocumentService(
database=database,
vector_store=vector_store,
storage=storage,
parser=parser,
embedding_model=embedding_model
)
async def verify_token(authorization: str = Header(None)) -> AuthContext:
2024-11-22 18:56:22 -05:00
"""Verify JWT Bearer token."""
if not authorization:
raise HTTPException(
status_code=401,
detail="Missing authorization header",
headers={"WWW-Authenticate": "Bearer"}
)
2024-11-20 18:42:19 -05:00
try:
2024-11-22 18:56:22 -05:00
if not authorization.startswith("Bearer "):
raise HTTPException(
status_code=401,
detail="Invalid authorization header"
)
token = authorization[7:] # Remove "Bearer "
payload = jwt.decode(
token,
settings.JWT_SECRET_KEY,
algorithms=[settings.JWT_ALGORITHM]
2024-11-20 18:42:19 -05:00
)
2024-11-22 18:56:22 -05:00
if datetime.fromtimestamp(payload["exp"], UTC) < datetime.now(UTC):
raise HTTPException(status_code=401, detail="Token expired")
return AuthContext(
entity_type=EntityType(payload["type"]),
entity_id=payload["entity_id"],
app_id=payload.get("app_id"),
permissions=set(payload.get("permissions", ["read"]))
2024-11-20 18:42:19 -05:00
)
2024-11-22 18:56:22 -05:00
except jwt.InvalidTokenError as e:
raise HTTPException(status_code=401, detail=str(e))
2024-11-20 18:42:19 -05:00
2024-11-28 19:09:40 -05:00
@app.post("/ingest/text", response_model=Document)
async def ingest_text(
request: IngestTextRequest,
auth: AuthContext = Depends(verify_token)
) -> Document:
"""Ingest a text document."""
try:
return await document_service.ingest_text(request, auth)
2024-11-28 19:09:40 -05:00
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e))
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
2024-11-28 19:09:40 -05:00
@app.post("/ingest/file", response_model=Document)
async def ingest_file(
2024-11-28 19:09:40 -05:00
file: UploadFile,
metadata: str = Form("{}"), # JSON string of metadata
2024-11-22 18:56:22 -05:00
auth: AuthContext = Depends(verify_token)
) -> Document:
"""Ingest a file document."""
2024-11-22 18:56:22 -05:00
try:
metadata_dict = json.loads(metadata)
2024-11-28 19:09:40 -05:00
doc = await document_service.ingest_file(file, metadata_dict, auth)
return doc # Should just send a success response, not sure why we're sending a document #TODO: discuss with bhau
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e))
except json.JSONDecodeError:
raise HTTPException(400, "Invalid metadata JSON")
2024-11-22 18:56:22 -05:00
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
2024-11-22 18:56:22 -05:00
@app.post("/query", response_model=Union[List[ChunkResult], List[DocumentResult]])
async def query_documents(
request: QueryRequest,
2024-11-22 18:56:22 -05:00
auth: AuthContext = Depends(verify_token)
):
"""Query documents with specified return type."""
try:
2024-11-22 18:56:22 -05:00
return await document_service.query(request, auth)
except Exception as e:
2024-11-22 18:56:22 -05:00
raise HTTPException(status_code=400, detail=str(e))
2024-11-22 18:56:22 -05:00
@app.get("/documents", response_model=List[Document])
async def list_documents(
auth: AuthContext = Depends(verify_token),
skip: int = 0,
limit: int = 100
):
"""List accessible documents."""
try:
return await document_service.db.get_documents(auth, skip, limit)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
2024-11-22 18:56:22 -05:00
@app.get("/documents/{document_id}", response_model=Document)
async def get_document(
document_id: str,
auth: AuthContext = Depends(verify_token)
):
"""Get document by ID."""
try:
doc = await document_service.db.get_document(document_id, auth)
2024-11-28 19:09:40 -05:00
logger.info(f"Found document: {doc}")
2024-11-22 18:56:22 -05:00
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
return doc
2024-11-28 19:09:40 -05:00
except HTTPException as e:
raise e # Return the HTTPException as is
2024-11-22 18:56:22 -05:00
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
2024-11-23 13:32:47 -05:00
auth_router = APIRouter(prefix="/auth", tags=["auth"])
@auth_router.post("/developer-token")
async def create_developer_token(
dev_id: str,
2024-11-28 19:09:40 -05:00
app_id: Optional[str] = None,
2024-11-23 13:32:47 -05:00
expiry_days: int = 30,
2024-11-28 19:09:40 -05:00
permissions: Optional[Set[str]] = None,
2024-11-23 13:32:47 -05:00
auth: AuthContext = Depends(verify_token)
) -> Dict[str, str]:
"""Create a developer access URI."""
# Verify requesting user has admin permissions
if "admin" not in auth.permissions:
raise HTTPException(
status_code=403,
detail="Admin permissions required"
)
uri_service = get_uri_service()
uri = uri_service.create_developer_uri(
dev_id=dev_id,
app_id=app_id,
expiry_days=expiry_days,
permissions=permissions
)
return {"uri": uri}
@auth_router.post("/user-token")
async def create_user_token(
user_id: str,
expiry_days: int = 30,
2024-11-28 19:09:40 -05:00
permissions: Optional[Set[str]] = None,
2024-11-23 13:32:47 -05:00
auth: AuthContext = Depends(verify_token)
) -> Dict[str, str]:
"""Create a user access URI."""
# Verify requesting user has admin permissions
if "admin" not in auth.permissions:
raise HTTPException(
status_code=403,
detail="Admin permissions required"
)
uri_service = get_uri_service()
uri = uri_service.create_user_uri(
user_id=user_id,
expiry_days=expiry_days,
permissions=permissions
)
return {"uri": uri}
# Add to your main FastAPI app
app.include_router(auth_router)