Add reranking (#14)

2025-05-09 19:32:38 +00:00 · 2025-01-02 03:42:47 -05:00 · 2025-01-02 03:42:47 -05:00 · 20faae8903
commit 20faae8903
parent c936aa91a4
12 changed files with 728 additions and 61 deletions
--- a/config.toml
+++ b/config.toml
@ -11,6 +11,7 @@ vector_store = "mongodb"
 embedding = "ollama" # "openai", "ollama"
 completion = "ollama" # "openai", "ollama"
 parser = "combined" # "combined", "unstructured", "contextual"
 reranker = "bge" # "bge"
 # Storage Configuration
 [storage.local]
@ -45,12 +46,20 @@ default_temperature = 0.7
 [models.ollama]
 base_url = "http://localhost:11434"
 [models.reranker]
 model_name = "BAAI/bge-reranker-large" # "BAAI/bge-reranker-v2-gemma", "BAAI/bge-reranker-large"
 device = "mps" # "cuda:0" # Optional: Set to null or remove for CPU
 use_fp16 = true
 query_max_length = 256
 passage_max_length = 512
 # Document Processing
 [processing]
 [processing.text]
 chunk_size = 1000
 chunk_overlap = 200
 default_k = 4
 use_reranking = true  # Whether to use reranking by default
 [processing.video]
 frame_sample_rate = 120
--- a/core/api.py
+++ b/core/api.py
@ -28,6 +28,7 @@ from core.storage.local_storage import LocalStorage
 from core.embedding.openai_embedding_model import OpenAIEmbeddingModel
 from core.completion.ollama_completion import OllamaCompletionModel
 from core.parser.contextual_parser import ContextualParser
 from core.reranker.bge_reranker import BGEReranker
 # Initialize FastAPI app
 app = FastAPI(title="DataBridge API")
@ -158,14 +159,28 @@ match settings.COMPLETION_PROVIDER:
    case _:
        raise ValueError(f"Unsupported completion provider: {settings.COMPLETION_PROVIDER}")
 # Initialize reranker
 match settings.RERANKER_PROVIDER:
    case "bge":
        reranker = BGEReranker(
            model_name=settings.RERANKER_MODEL,
            device=settings.RERANKER_DEVICE,
            use_fp16=settings.RERANKER_USE_FP16,
            query_max_length=settings.RERANKER_QUERY_MAX_LENGTH,
            passage_max_length=settings.RERANKER_PASSAGE_MAX_LENGTH,
        )
    case _:
        raise ValueError(f"Unsupported reranker provider: {settings.RERANKER_PROVIDER}")
 # Initialize document service with configured components
 document_service = DocumentService(
    storage=storage,
    database=database,
    vector_store=vector_store,
    storage=storage,
    parser=parser,
    embedding_model=embedding_model,
    completion_model=completion_model,
    parser=parser,
    reranker=reranker,
 )
@ -243,27 +258,51 @@ async def ingest_file(
@app.post("/retrieve/chunks", response_model=List[ChunkResult])
 async def retrieve_chunks(request: RetrieveRequest, auth: AuthContext = Depends(verify_token)):
    """Retrieve relevant chunks."""
-    async with telemetry.track_operation(
+    try:
-        operation_type="retrieve_chunks",
+        async with telemetry.track_operation(
-        user_id=auth.entity_id,
+            operation_type="retrieve_chunks",
-        metadata=request.model_dump(),
+            user_id=auth.entity_id,
-    ):
+            metadata={
-        return await document_service.retrieve_chunks(
+                "k": request.k,
-            request.query, auth, request.filters, request.k, request.min_score
+                "min_score": request.min_score,
-        )
+                "use_reranking": request.use_reranking,
            },
        ):
            return await document_service.retrieve_chunks(
                request.query,
                auth,
                request.filters,
                request.k,
                request.min_score,
                request.use_reranking,
            )
    except PermissionError as e:
        raise HTTPException(status_code=403, detail=str(e))
@app.post("/retrieve/docs", response_model=List[DocumentResult])
 async def retrieve_documents(request: RetrieveRequest, auth: AuthContext = Depends(verify_token)):
    """Retrieve relevant documents."""
-    async with telemetry.track_operation(
+    try:
-        operation_type="retrieve_docs",
+        async with telemetry.track_operation(
-        user_id=auth.entity_id,
+            operation_type="retrieve_docs",
-        metadata=request.model_dump(),
+            user_id=auth.entity_id,
-    ):
+            metadata={
-        return await document_service.retrieve_docs(
+                "k": request.k,
-            request.query, auth, request.filters, request.k, request.min_score
+                "min_score": request.min_score,
-        )
+                "use_reranking": request.use_reranking,
            },
        ):
            return await document_service.retrieve_docs(
                request.query,
                auth,
                request.filters,
                request.k,
                request.min_score,
                request.use_reranking,
            )
    except PermissionError as e:
        raise HTTPException(status_code=403, detail=str(e))
@app.post("/query", response_model=CompletionResponse)
@ -271,27 +310,30 @@ async def query_completion(
    request: CompletionQueryRequest, auth: AuthContext = Depends(verify_token)
 ):
    """Generate completion using relevant chunks as context."""
-    async with telemetry.track_operation(
+    try:
-        operation_type="query",
+        async with telemetry.track_operation(
-        user_id=auth.entity_id,
+            operation_type="query",
-        metadata=request.model_dump(),
+            user_id=auth.entity_id,
-    ) as span:
+            metadata={
-        response = await document_service.query(
+                "k": request.k,
-            request.query,
+                "min_score": request.min_score,
-            auth,
+                "max_tokens": request.max_tokens,
-            request.filters,
+                "temperature": request.temperature,
-            request.k,
+                "use_reranking": request.use_reranking,
-            request.min_score,
+            },
-            request.max_tokens,
+        ):
-            request.temperature,
+            return await document_service.query(
-        )
+                request.query,
-        if isinstance(response, dict) and "usage" in response:
+                auth,
-            usage = response["usage"]
+                request.filters,
-            if isinstance(usage, dict):
+                request.k,
-                span.set_attribute("tokens.completion", usage.get("completion_tokens", 0))
+                request.min_score,
-                span.set_attribute("tokens.prompt", usage.get("prompt_tokens", 0))
+                request.max_tokens,
-                span.set_attribute("tokens.total", usage.get("total_tokens", 0))
+                request.temperature,
-        return response
+                request.use_reranking,
            )
    except PermissionError as e:
        raise HTTPException(status_code=403, detail=str(e))
@app.get("/documents", response_model=List[Document])
--- a/core/config.py
+++ b/core/config.py
@ -32,6 +32,7 @@ class Settings(BaseSettings):
    EMBEDDING_PROVIDER: str = "openai"
    COMPLETION_PROVIDER: str = "ollama"
    PARSER_PROVIDER: str = "combined"
    RERANKER_PROVIDER: str = "bge"
    # Storage settings
    STORAGE_PATH: str = "./storage"
@ -53,6 +54,11 @@ class Settings(BaseSettings):
    COMPLETION_MAX_TOKENS: int = 1000
    COMPLETION_TEMPERATURE: float = 0.7
    OLLAMA_BASE_URL: str = "http://localhost:11434"
    RERANKER_MODEL: str = "BAAI/bge-reranker-v2-gemma"
    RERANKER_DEVICE: Optional[str] = None
    RERANKER_USE_FP16: bool = True
    RERANKER_QUERY_MAX_LENGTH: int = 256
    RERANKER_PASSAGE_MAX_LENGTH: int = 512
    # Processing settings
    CHUNK_SIZE: int = 1000
@ -60,6 +66,7 @@ class Settings(BaseSettings):
    DEFAULT_K: int = 4
    FRAME_SAMPLE_RATE: int = 120
    USE_UNSTRUCTURED_API: bool = False
    USE_RERANKING: bool = True
    # Auth settings
    JWT_ALGORITHM: str = "HS256"
@ -87,6 +94,7 @@ def get_settings() -> Settings:
        "EMBEDDING_PROVIDER": config["service"]["components"]["embedding"],
        "COMPLETION_PROVIDER": config["service"]["components"]["completion"],
        "PARSER_PROVIDER": config["service"]["components"]["parser"],
        "RERANKER_PROVIDER": config["service"]["components"]["reranker"],
        # Storage settings
        "STORAGE_PATH": config["storage"]["local"]["path"],
        "AWS_REGION": config["storage"]["aws"]["region"],
@ -104,10 +112,16 @@ def get_settings() -> Settings:
        "COMPLETION_MAX_TOKENS": config["models"]["completion"]["default_max_tokens"],
        "COMPLETION_TEMPERATURE": config["models"]["completion"]["default_temperature"],
        "OLLAMA_BASE_URL": config["models"]["ollama"]["base_url"],
        "RERANKER_MODEL": config["models"]["reranker"]["model_name"],
        "RERANKER_DEVICE": config["models"]["reranker"].get("device"),
        "RERANKER_USE_FP16": config["models"]["reranker"].get("use_fp16", True),
        "RERANKER_QUERY_MAX_LENGTH": config["models"]["reranker"].get("query_max_length", 256),
        "RERANKER_PASSAGE_MAX_LENGTH": config["models"]["reranker"].get("passage_max_length", 512),
        # Processing settings
        "CHUNK_SIZE": config["processing"]["text"]["chunk_size"],
        "CHUNK_OVERLAP": config["processing"]["text"]["chunk_overlap"],
        "DEFAULT_K": config["processing"]["text"]["default_k"],
        "USE_RERANKING": config["processing"]["text"]["use_reranking"],
        "FRAME_SAMPLE_RATE": config["processing"]["video"]["frame_sample_rate"],
        "USE_UNSTRUCTURED_API": config["processing"]["unstructured"]["use_api"],
        # Auth settings
--- a/core/database/mongo_database.py
+++ b/core/database/mongo_database.py
@ -52,6 +52,7 @@ class MongoDatabase(BaseDatabase):
            # Ensure system metadata
            doc_dict["system_metadata"]["created_at"] = datetime.now(UTC)
            doc_dict["system_metadata"]["updated_at"] = datetime.now(UTC)
            doc_dict["metadata"]["external_id"] = doc_dict["external_id"]
            result = await self.collection.insert_one(doc_dict)
            return bool(result.inserted_id)
--- a/core/models/request.py
+++ b/core/models/request.py
@ -16,6 +16,7 @@ class RetrieveRequest(BaseModel):
    filters: Optional[Dict[str, Any]] = None
    k: int = Field(default=4, gt=0)
    min_score: float = Field(default=0.0)
    use_reranking: Optional[bool] = None  # If None, use default from config
 class CompletionQueryRequest(RetrieveRequest):
--- a/core/reranker/init.py
+++ b/core/reranker/init.py
@ -0,0 +1 @@
 """Reranker package for reranking search results."""
--- a/core/reranker/base_reranker.py
+++ b/core/reranker/base_reranker.py
@ -0,0 +1,26 @@
 from abc import ABC, abstractmethod
 from typing import List, Union
 from core.models.chunk import DocumentChunk
 class BaseReranker(ABC):
    """Base class for reranking search results"""
    @abstractmethod
    async def rerank(
        self,
        query: str,
        chunks: List[DocumentChunk],
    ) -> List[DocumentChunk]:
        """Rerank chunks based on their relevance to the query"""
        pass
    @abstractmethod
    async def compute_score(
        self,
        query: str,
        text: Union[str, List[str]],
    ) -> Union[float, List[float]]:
        """Compute relevance scores between query and text"""
        pass
--- a/core/reranker/bge_reranker.py
+++ b/core/reranker/bge_reranker.py
@ -0,0 +1,59 @@
 from typing import List, Union, Optional
 from FlagEmbedding import FlagAutoReranker
 from core.models.chunk import DocumentChunk
 from core.reranker.base_reranker import BaseReranker
 class BGEReranker(BaseReranker):
    """BGE reranker implementation using FlagEmbedding"""
    def __init__(
        self,
        model_name: str = "BAAI/bge-reranker-v2-gemma",
        query_max_length: int = 256,
        passage_max_length: int = 512,
        use_fp16: bool = True,
        device: Optional[str] = None,
    ):
        """Initialize BGE reranker"""
        devices = [device] if device else None
        self.reranker = FlagAutoReranker.from_finetuned(
            model_name_or_path=model_name,
            query_max_length=query_max_length,
            passage_max_length=passage_max_length,
            use_fp16=use_fp16,
            devices=devices,
        )
    async def rerank(
        self,
        query: str,
        chunks: List[DocumentChunk],
    ) -> List[DocumentChunk]:
        """Rerank chunks based on their relevance to the query"""
        if not chunks:
            return []
        # Get scores for all chunks
        passages = [chunk.content for chunk in chunks]
        scores = await self.compute_score(query, passages)
        # Update scores and sort chunks
        for chunk, score in zip(chunks, scores):
            chunk.score = float(score)
        return sorted(chunks, key=lambda x: x.score, reverse=True)
    async def compute_score(
        self,
        query: str,
        text: Union[str, List[str]],
    ) -> Union[float, List[float]]:
        """Compute relevance scores between query and text"""
        if isinstance(text, str):
            text = [text]
            scores = self.reranker.compute_score([[query, t] for t in text], normalize=True)
            return scores[0] if len(scores) == 1 else scores
        else:
            return self.reranker.compute_score([[query, t] for t in text], normalize=True)
--- a/core/services/document_service.py
+++ b/core/services/document_service.py
@ -19,6 +19,8 @@ from core.parser.base_parser import BaseParser
 from core.completion.base_completion import BaseCompletionModel
 from core.completion.base_completion import CompletionRequest, CompletionResponse
 import logging
 from core.reranker.base_reranker import BaseReranker
 from core.config import get_settings
 logger = logging.getLogger(__name__)
@ -32,6 +34,7 @@ class DocumentService:
        parser: BaseParser,
        embedding_model: BaseEmbeddingModel,
        completion_model: BaseCompletionModel,
        reranker: BaseReranker,
    ):
        self.db = database
        self.vector_store = vector_store
@ -39,16 +42,21 @@ class DocumentService:
        self.parser = parser
        self.embedding_model = embedding_model
        self.completion_model = completion_model
        self.reranker = reranker
    async def retrieve_chunks(
        self,
        query: str,
        auth: AuthContext,
        filters: Optional[Dict[str, Any]] = None,
-        k: int = 4,
+        k: int = 5,
        min_score: float = 0.0,
        use_reranking: Optional[bool] = None,
    ) -> List[ChunkResult]:
        """Retrieve relevant chunks."""
        settings = get_settings()
        should_rerank = use_reranking if use_reranking is not None else settings.USE_RERANKING
        # Get embedding for query
        query_embedding = await self.embedding_model.embed_for_query(query)
        logger.info("Generated query embedding")
@ -61,9 +69,18 @@ class DocumentService:
        logger.info(f"Found {len(doc_ids)} authorized documents")
        # Search chunks with vector similarity
-        chunks = await self.vector_store.query_similar(query_embedding, k=k, doc_ids=doc_ids)
+        chunks = await self.vector_store.query_similar(
            query_embedding, k=10 * k if should_rerank else k, doc_ids=doc_ids
        )
        logger.info(f"Found {len(chunks)} similar chunks")
        # Rerank chunks using the reranker if enabled
        if chunks and should_rerank:
            chunks = await self.reranker.rerank(query, chunks)
            chunks.sort(key=lambda x: x.score, reverse=True)
            chunks = chunks[:k]
            logger.info(f"Reranked {k*10} chunks and selected the top {k}")
        # Create and return chunk results
        results = await self._create_chunk_results(auth, chunks)
        logger.info(f"Returning {len(results)} chunk results")
@ -74,12 +91,13 @@ class DocumentService:
        query: str,
        auth: AuthContext,
        filters: Optional[Dict[str, Any]] = None,
-        k: int = 4,
+        k: int = 5,
        min_score: float = 0.0,
        use_reranking: Optional[bool] = None,
    ) -> List[DocumentResult]:
        """Retrieve relevant documents."""
        # Get chunks first
-        chunks = await self.retrieve_chunks(query, auth, filters, k, min_score)
+        chunks = await self.retrieve_chunks(query, auth, filters, k, min_score, use_reranking)
        # Convert to document results
        results = await self._create_document_results(auth, chunks)
        documents = list(results.values())
@ -95,10 +113,11 @@ class DocumentService:
        min_score: float = 0.0,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        use_reranking: Optional[bool] = None,
    ) -> CompletionResponse:
        """Generate completion using relevant chunks as context."""
        # Get relevant chunks
-        chunks = await self.retrieve_chunks(query, auth, filters, k, min_score)
+        chunks = await self.retrieve_chunks(query, auth, filters, k, min_score, use_reranking)
        documents = await self._create_document_results(auth, chunks)
        chunk_contents = [chunk.augmented_content(documents[chunk.document_id]) for chunk in chunks]
--- a/core/tests/integration/test_api.py
+++ b/core/tests/integration/test_api.py
@ -194,6 +194,10 @@ async def test_ingest_invalid_metadata(client: AsyncClient):
@pytest.mark.asyncio
@pytest.mark.skipif(
    get_settings().EMBEDDING_PROVIDER == "ollama",
    reason="local embedding models do not have size limits",
 )
 async def test_ingest_oversized_content(client: AsyncClient):
    """Test ingestion with oversized content"""
    headers = create_auth_header()
@ -285,26 +289,28 @@ async def test_invalid_document_id(client: AsyncClient):
@pytest.mark.asyncio
 async def test_retrieve_chunks(client: AsyncClient):
    """Test retrieving document chunks"""
    upload_string = "The quick brown fox jumps over the lazy dog"
    # First ingest a document to search
-    doc_id = await test_ingest_text_document(
+    doc_id = await test_ingest_text_document(client, content=upload_string)
        client, content="The quick brown fox jumps over the lazy dog"
    )
    headers = create_auth_header()
    # Sleep to allow time for document to be indexed
    await asyncio.sleep(1)
    response = await client.post(
        "/retrieve/chunks",
-        json={"query": "jumping fox", "k": 1, "min_score": 0.0},
+        json={
            "query": "jumping fox",
            "k": 1,
            "min_score": 0.0,
            "filters": {"external_id": doc_id},  # Add filter for specific document
        },
        headers=headers,
    )
    assert response.status_code == 200
    results = list(response.json())
-    assert len(results) == 1
+    assert len(results) > 0
    assert results[0]["score"] > 0.5
-    assert results[0]["document_id"] == doc_id
+    assert results[0]["content"] == upload_string
@pytest.mark.asyncio
@ -324,7 +330,10 @@ async def test_retrieve_docs(client: AsyncClient):
    headers = create_auth_header()
    response = await client.post(
        "/retrieve/docs",
-        json={"query": "Headaches, dehydration", "filters": {"test": True}},
+        json={
            "query": "Headaches, dehydration",
            "filters": {"test": True, "external_id": doc_id},  # Add filter for specific document
        },
        headers=headers,
    )
@ -398,3 +407,287 @@ async def test_invalid_completion_params(client: AsyncClient):
        headers=headers,
    )
    assert response.status_code == 422
@pytest.mark.asyncio
 async def test_retrieve_chunks_default_reranking(client: AsyncClient):
    """Test retrieving chunks with default reranking behavior"""
    # First ingest some test documents
    _ = await test_ingest_text_document(
        client, "The quick brown fox jumps over the lazy dog. This is a test document."
    )
    _ = await test_ingest_text_document(
        client, "The lazy dog sleeps while the quick brown fox runs. Another test document."
    )
    headers = create_auth_header()
    response = await client.post(
        "/retrieve/chunks",
        json={
            "query": "What does the fox do?",
            "k": 2,
            "min_score": 0.0,
            # Not specifying use_reranking - should use default from config
        },
        headers=headers,
    )
    assert response.status_code == 200
    chunks = response.json()
    assert len(chunks) > 0
    # Verify chunks are ordered by score
    scores = [chunk["score"] for chunk in chunks]
    assert all(scores[i] >= scores[i + 1] for i in range(len(scores) - 1))
@pytest.mark.asyncio
 async def test_retrieve_chunks_explicit_reranking(client: AsyncClient):
    """Test retrieving chunks with explicitly enabled reranking"""
    # First ingest some test documents
    _ = await test_ingest_text_document(
        client, "The quick brown fox jumps over the lazy dog. This is a test document."
    )
    _ = await test_ingest_text_document(
        client, "The lazy dog sleeps while the quick brown fox runs. Another test document."
    )
    headers = create_auth_header()
    response = await client.post(
        "/retrieve/chunks",
        json={
            "query": "What does the fox do?",
            "k": 2,
            "min_score": 0.0,
            "use_reranking": True,
        },
        headers=headers,
    )
    assert response.status_code == 200
    chunks = response.json()
    assert len(chunks) > 0
    # Verify chunks are ordered by score
    scores = [chunk["score"] for chunk in chunks]
    assert all(scores[i] >= scores[i + 1] for i in range(len(scores) - 1))
@pytest.mark.asyncio
 async def test_retrieve_chunks_disabled_reranking(client: AsyncClient):
    """Test retrieving chunks with explicitly disabled reranking"""
    # First ingest some test documents
    await test_ingest_text_document(
        client, "The quick brown fox jumps over the lazy dog. This is a test document."
    )
    await test_ingest_text_document(
        client, "The lazy dog sleeps while the quick brown fox runs. Another test document."
    )
    headers = create_auth_header()
    response = await client.post(
        "/retrieve/chunks",
        json={
            "query": "What does the fox do?",
            "k": 2,
            "min_score": 0.0,
            "use_reranking": False,
        },
        headers=headers,
    )
    assert response.status_code == 200
    chunks = response.json()
    assert len(chunks) > 0
@pytest.mark.asyncio
 async def test_reranking_affects_results(client: AsyncClient):
    """Test that reranking actually changes the order of results"""
    # First ingest documents with clearly different semantic relevance
    await test_ingest_text_document(
        client, "The capital of France is Paris. The city is known for the Eiffel Tower."
    )
    await test_ingest_text_document(
        client, "Paris is a city in France. It has many famous landmarks and museums."
    )
    await test_ingest_text_document(
        client, "Paris Hilton is a celebrity and businesswoman. She has nothing to do with France."
    )
    headers = create_auth_header()
    # Get results without reranking
    response_no_rerank = await client.post(
        "/retrieve/chunks",
        json={
            "query": "Tell me about the capital city of France",
            "k": 3,
            "min_score": 0.0,
            "use_reranking": False,
        },
        headers=headers,
    )
    # Get results with reranking
    response_with_rerank = await client.post(
        "/retrieve/chunks",
        json={
            "query": "Tell me about the capital city of France",
            "k": 3,
            "min_score": 0.0,
            "use_reranking": True,
        },
        headers=headers,
    )
    assert response_no_rerank.status_code == 200
    assert response_with_rerank.status_code == 200
    chunks_no_rerank = response_no_rerank.json()
    chunks_with_rerank = response_with_rerank.json()
    # Verify we got results in both cases
    assert len(chunks_no_rerank) > 0
    assert len(chunks_with_rerank) > 0
    # The order or scores should be different between reranked and non-reranked results
    # This test might be a bit flaky depending on the exact scoring, but it should work most of the time
    # given our carefully crafted test data
    scores_no_rerank = [c["score"] for c in chunks_no_rerank]
    scores_with_rerank = [c["score"] for c in chunks_with_rerank]
    assert scores_no_rerank != scores_with_rerank, "Reranking should affect the scores"
@pytest.mark.asyncio
 async def test_retrieve_docs_with_reranking(client: AsyncClient):
    """Test document retrieval with reranking options"""
    # First ingest documents with clearly different semantic relevance
    await test_ingest_text_document(
        client, "The capital of France is Paris. The city is known for the Eiffel Tower."
    )
    await test_ingest_text_document(
        client, "Paris is a city in France. It has many famous landmarks and museums."
    )
    await test_ingest_text_document(
        client, "Paris Hilton is a celebrity and businesswoman. She has nothing to do with France."
    )
    headers = create_auth_header()
    # Test with default reranking (from config)
    response_default = await client.post(
        "/retrieve/docs",
        json={
            "query": "Tell me about the capital city of France",
            "k": 3,
            "min_score": 0.0,
        },
        headers=headers,
    )
    assert response_default.status_code == 200
    docs_default = response_default.json()
    assert len(docs_default) > 0
    # Test with explicit reranking enabled
    response_rerank = await client.post(
        "/retrieve/docs",
        json={
            "query": "Tell me about the capital city of France",
            "k": 3,
            "min_score": 0.0,
            "use_reranking": True,
        },
        headers=headers,
    )
    assert response_rerank.status_code == 200
    docs_rerank = response_rerank.json()
    assert len(docs_rerank) > 0
    # Test with reranking disabled
    response_no_rerank = await client.post(
        "/retrieve/docs",
        json={
            "query": "Tell me about the capital city of France",
            "k": 3,
            "min_score": 0.0,
            "use_reranking": False,
        },
        headers=headers,
    )
    assert response_no_rerank.status_code == 200
    docs_no_rerank = response_no_rerank.json()
    assert len(docs_no_rerank) > 0
    # Verify that reranking affects the order
    scores_rerank = [doc["score"] for doc in docs_rerank]
    scores_no_rerank = [doc["score"] for doc in docs_no_rerank]
    assert scores_rerank != scores_no_rerank, "Reranking should affect document scores"
@pytest.mark.asyncio
 async def test_query_with_reranking(client: AsyncClient):
    """Test query completion with reranking options"""
    # First ingest documents with clearly different semantic relevance
    await test_ingest_text_document(
        client, "The capital of France is Paris. The city is known for the Eiffel Tower."
    )
    await test_ingest_text_document(
        client, "Paris is a city in France. It has many famous landmarks and museums."
    )
    await test_ingest_text_document(
        client, "Paris Hilton is a celebrity and businesswoman. She has nothing to do with France."
    )
    headers = create_auth_header()
    # Test with default reranking (from config)
    response_default = await client.post(
        "/query",
        json={
            "query": "What is the capital of France?",
            "k": 3,
            "min_score": 0.0,
            "max_tokens": 50,
        },
        headers=headers,
    )
    assert response_default.status_code == 200
    completion_default = response_default.json()
    assert "completion" in completion_default
    # Test with explicit reranking enabled
    response_rerank = await client.post(
        "/query",
        json={
            "query": "What is the capital of France?",
            "k": 3,
            "min_score": 0.0,
            "max_tokens": 50,
            "use_reranking": True,
        },
        headers=headers,
    )
    assert response_rerank.status_code == 200
    completion_rerank = response_rerank.json()
    assert "completion" in completion_rerank
    # Test with reranking disabled
    response_no_rerank = await client.post(
        "/query",
        json={
            "query": "What is the capital of France?",
            "k": 3,
            "min_score": 0.0,
            "max_tokens": 50,
            "use_reranking": False,
        },
        headers=headers,
    )
    assert response_no_rerank.status_code == 200
    completion_no_rerank = response_no_rerank.json()
    assert "completion" in completion_no_rerank
    # The actual responses might be different due to different chunk ordering,
    # but all should mention Paris as the capital
    assert "Paris" in completion_default["completion"]
    assert "Paris" in completion_rerank["completion"]
    assert "Paris" in completion_no_rerank["completion"]
--- a/core/tests/unit/test_reranker.py
+++ b/core/tests/unit/test_reranker.py
@ -0,0 +1,166 @@
 import pytest
 from typing import List
 from core.models.chunk import DocumentChunk
 from core.reranker.bge_reranker import BGEReranker
 from core.config import get_settings
@pytest.fixture
 def sample_chunks() -> List[DocumentChunk]:
    return [
        DocumentChunk(
            document_id="1",
            content="The quick brown fox jumps over the lazy dog",
            embedding=[0.1] * 10,
            chunk_number=1,
            score=0.5,
        ),
        DocumentChunk(
            document_id="2",
            content="Python is a popular programming language",
            embedding=[0.2] * 10,
            chunk_number=1,
            score=0.7,
        ),
        DocumentChunk(
            document_id="3",
            content="Machine learning models help analyze data",
            embedding=[0.3] * 10,
            chunk_number=1,
            score=0.3,
        ),
    ]
@pytest.fixture
 def reranker():
    """Fixture to create and reuse a BGE reranker instance"""
    settings = get_settings()
    return BGEReranker(
        model_name=settings.RERANKER_MODEL,
        device=settings.RERANKER_DEVICE,
        use_fp16=settings.RERANKER_USE_FP16,
        query_max_length=settings.RERANKER_QUERY_MAX_LENGTH,
        passage_max_length=settings.RERANKER_PASSAGE_MAX_LENGTH,
    )
@pytest.mark.asyncio
 async def test_reranker_relevance(reranker, sample_chunks):
    """Test that reranker improves relevance for programming-related query"""
    print("\n=== Testing Reranker Relevance ===")
    query = "What is Python programming language?"
    # Get reranked results
    reranked_chunks = await reranker.rerank(query, sample_chunks)
    print(f"\nQuery: {query}")
    for i, chunk in enumerate(reranked_chunks):
        print(f"{i+1}. Score: {chunk.score:.3f} - {chunk.content}")
    # The most relevant chunks should be about Python
    assert "Python" in reranked_chunks[0].content
    assert reranked_chunks[0].score > reranked_chunks[-1].score
    # Check that irrelevant content (fox/dog) is ranked lower
    fox_chunk_idx = next(i for i, c in enumerate(reranked_chunks) if "fox" in c.content.lower())
    assert fox_chunk_idx > 0  # Should not be first
@pytest.mark.asyncio
 async def test_reranker_score_distribution(reranker, sample_chunks):
    """Test that reranker produces reasonable score distribution"""
    print("\n=== Testing Score Distribution ===")
    query = "Tell me about machine learning and data science"
    # Get reranked results
    reranked_chunks = await reranker.rerank(query, sample_chunks)
    print(f"\nQuery: {query}")
    for i, chunk in enumerate(reranked_chunks):
        print(f"{i+1}. Score: {chunk.score:.3f} - {chunk.content}")
    # Check score properties
    scores = [c.score for c in reranked_chunks]
    assert all(0 <= s <= 1 for s in scores)  # Scores should be between 0 and 1
    assert len(set(scores)) > 1  # Should have different scores (not all same)
    # Verify ordering
    assert scores == sorted(scores, reverse=True)  # Should be in descending order
    # Most relevant chunk should be about ML/data science
    top_chunk = reranked_chunks[0]
    assert any(term in top_chunk.content.lower() for term in ["machine learning", "data science"])
@pytest.mark.asyncio
 async def test_reranker_batch_scoring(reranker):
    """Test that reranker can handle multiple queries/passages efficiently"""
    print("\n=== Testing Batch Scoring ===")
    texts = [
        "Python is a programming language",
        "Machine learning is a field of AI",
        "The quick brown fox jumps",
        "Data science uses statistical methods",
    ]
    queries = ["What is Python?", "Explain artificial intelligence", "Tell me about data analysis"]
    # Test multiple queries against multiple texts
    for query in queries:
        scores = await reranker.compute_score(query, texts)
        print(f"\nQuery: {query}")
        for text, score in zip(texts, scores):
            print(f"Score: {score:.3f} - {text}")
        assert len(scores) == len(texts)
        assert all(isinstance(s, float) for s in scores)
        assert all(0 <= s <= 1 for s in scores)
@pytest.mark.asyncio
 async def test_reranker_empty_and_edge_cases(reranker, sample_chunks):
    """Test reranker behavior with empty or edge case inputs"""
    print("\n=== Testing Edge Cases ===")
    # Empty chunks list
    result = await reranker.rerank("test query", [])
    assert result == []
    print("Empty chunks test passed")
    # Single chunk
    single_chunk = DocumentChunk(
        document_id="1",
        content="Test content",
        embedding=[0.1] * 768,
        chunk_number=1,
        score=0.5,
    )
    result = await reranker.rerank("test query", [single_chunk])
    assert len(result) == 1
    assert isinstance(result[0].score, float)
    print(f"Single chunk test passed - Score: {result[0].score:.3f}")
    # Empty query
    result = await reranker.rerank("", sample_chunks)
    assert len(result) == len(sample_chunks)
    print("Empty query test passed")
@pytest.mark.asyncio
 async def test_reranker_consistency(reranker, sample_chunks):
    """Test that reranker produces consistent results for same input"""
    print("\n=== Testing Consistency ===")
    query = "What is Python programming?"
    # Run reranking multiple times
    results1 = await reranker.rerank(query, sample_chunks)
    results2 = await reranker.rerank(query, sample_chunks)
    # Scores should be the same across runs
    scores1 = [c.score for c in results1]
    scores2 = [c.score for c in results2]
    print("\nScores from first run:", [f"{s:.3f}" for s in scores1])
    print("Scores from second run:", [f"{s:.3f}" for s in scores2])
    assert scores1 == scores2
    # Order should be preserved
    assert [c.document_id for c in results1] == [c.document_id for c in results2]
    print("Order consistency test passed")
--- a/requirements.txt
+++ b/requirements.txt
@ -1,10 +1,13 @@
 accelerate==1.2.1
 aiofiles==24.1.0
 aiohttp==3.9.5
 aiosignal==1.3.1
 annotated-types==0.6.0
 anthropic==0.42.0
 antlr4-python3-runtime==4.9.3
 anyio==4.3.0
 appnope==0.1.4
 asgiref==3.8.1
 assemblyai==0.36.0
 asttokens==2.4.1
 attrs==23.2.0
@ -18,6 +21,7 @@ botocore==1.34.103
 botocore-stubs==1.34.150
 build==1.2.2.post1
 cachetools==5.3.3
 cbor==1.0.0
 certifi==2024.2.2
 cffi==1.17.0
 cfgv==3.4.0
@ -30,7 +34,7 @@ contourpy==1.2.1
 cryptography==43.0.0
 cycler==0.12.1
 dataclasses-json==0.6.7
-datasets==2.21.0
+datasets==2.19.0
 debugpy==1.8.5
 decorator==5.1.1
 deepdiff==7.0.1
@ -51,10 +55,11 @@ fastapi-cli==0.0.2
 ffmpeg-python==0.2.0
 filelock==3.15.4
 filetype==1.2.0
 FlagEmbedding==1.3.3
 flatbuffers==24.3.25
 fonttools==4.53.1
 frozenlist==1.4.1
-fsspec==2024.6.1
+fsspec==2024.3.1
 future==1.0.0
 google-api-core==2.19.1
 google-auth==2.29.0
@ -67,14 +72,18 @@ h11==0.14.0
 httpcore==1.0.5
 httptools==0.6.1
 httpx==0.27.0
-huggingface-hub==0.24.5
+huggingface-hub==0.27.0
 humanfriendly==10.0
 identify==2.6.3
 idna==3.7
 ijson==3.3.0
 importlib_metadata==8.5.0
 iniconfig==2.0.0
 inscriptis==2.5.0
 iopath==0.1.10
 ipykernel==6.29.5
 ipython==8.26.0
 ir_datasets==0.5.9
 jaraco.classes==3.4.0
 jaraco.context==6.0.1
 jaraco.functools==4.1.0
@ -99,9 +108,11 @@ langchain-text-splitters==0.2.2
 langchain-unstructured==0.1.1
 langdetect==1.0.9
 langsmith==0.1.98
 lap==0.5.12
 layoutparser==0.3.4
 llvmlite==0.43.0
 lxml==5.2.2
 lz4==4.3.3
 Markdown==3.6
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
@ -131,6 +142,18 @@ onnxruntime==1.18.1
 openai==1.43.0
 opencv-python==4.10.0.84
 openpyxl==3.1.5
 opentelemetry-api==1.29.0
 opentelemetry-exporter-otlp==1.29.0
 opentelemetry-exporter-otlp-proto-common==1.29.0
 opentelemetry-exporter-otlp-proto-grpc==1.29.0
 opentelemetry-exporter-otlp-proto-http==1.29.0
 opentelemetry-instrumentation==0.50b0
 opentelemetry-instrumentation-asgi==0.50b0
 opentelemetry-instrumentation-fastapi==0.50b0
 opentelemetry-proto==1.29.0
 opentelemetry-sdk==1.29.0
 opentelemetry-semantic-conventions==0.50b0
 opentelemetry-util-http==0.50b0
 ordered-set==4.1.0
 orjson==3.10.3
 packaging==24.0
@ -141,6 +164,7 @@ pathspec==0.12.1
 pdf2image==1.17.0
 pdfminer.six==20231228
 pdfplumber==0.11.3
 peft==0.14.0
 pexpect==4.9.0
 pikepdf==9.1.1
 pillow==10.4.0
@ -156,7 +180,9 @@ protobuf==5.27.3
 psutil==6.0.0
 ptyprocess==0.7.0
 pure_eval==0.2.3
 py-cpuinfo==9.0.0
 pyarrow==17.0.0
 pyarrow-hotfix==0.6
 pyasn1==0.6.0
 pyasn1_modules==0.4.0
 pycocotools==2.0.8
@ -184,6 +210,7 @@ python-magic==0.4.27
 python-multipart==0.0.9
 python-oxmsg==0.0.1
 python-pptx==0.6.23
 pytubefix==8.8.5
 pytz==2024.1
 PyYAML==6.0.1
 pyzmq==26.2.0
@ -198,7 +225,11 @@ rich==13.7.1
 rsa==4.9
 s3transfer==0.10.1
 safetensors==0.4.4
 scikit-learn==1.6.0
 scipy==1.14.0
 seaborn==0.13.2
 sentence-transformers==3.3.1
 sentencepiece==0.2.0
 shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.1
@ -209,6 +240,7 @@ starlette==0.37.2
 sympy==1.13.2
 tabulate==0.9.0
 tenacity==8.5.0
 threadpoolctl==3.5.0
 tiktoken==0.7.0
 timm==1.0.8
 tokenizers==0.19.1
@ -218,7 +250,8 @@ torchvision==0.17.2
 tornado==6.4.1
 tqdm==4.66.4
 traitlets==5.14.3
-transformers==4.44.0
+transformers==4.44.2
 trec-car-tools==2.6
 twine==6.0.1
 typer==0.12.3
 types-awscrt==0.21.2
@ -227,6 +260,9 @@ typing-inspect==0.9.0
 typing_extensions==4.11.0
 tzdata==2024.1
 ujson==5.9.0
 ultralytics==8.3.55
 ultralytics-thop==2.0.13
 unlzw3==0.2.3
 unstructured==0.15.0
 unstructured-client==0.24.1
 unstructured-inference==0.7.36
@ -235,6 +271,8 @@ urllib3==2.2.1
 uvicorn==0.29.0
 uvloop==0.19.0
 virtualenv==20.28.0
 warc3-wet==0.2.5
 warc3-wet-clueweb09==0.2.5
 watchfiles==0.21.0
 wcwidth==0.2.13
 websockets==12.0
@ -243,7 +281,5 @@ xlrd==2.0.1
 XlsxWriter==3.2.0
 xxhash==3.4.1
 yarl==1.9.4
-opentelemetry-api>=1.21.0
+zipp==3.21.0
-opentelemetry-sdk>=1.21.0
+zlib-state==0.1.9
 opentelemetry-instrumentation-fastapi>=0.42b0
 opentelemetry-exporter-otlp>=1.21.0
		`@ -0,0 +1 @@`
							`"""Reranker package for reranking search results."""`