Reduce extra logging, change to debugs

2025-05-09 19:32:38 +00:00 · 2025-03-27 20:05:27 -07:00 · 2025-03-27 20:05:27 -07:00 · 6ef3ec207e
commit 6ef3ec207e
parent 7eb5887d2f
4 changed files with 29 additions and 29 deletions
--- a/core/api.py
+++ b/core/api.py
@ -355,7 +355,7 @@ async def ingest_file(
                "use_colpali": use_colpali,
            },
        ):
-            logger.info(f"API: Ingesting file with use_colpali: {use_colpali}")
+            logger.debug(f"API: Ingesting file with use_colpali: {use_colpali}")
            return await document_service.ingest_file(
                file=file,
                metadata=metadata_dict,
@ -631,7 +631,7 @@ async def get_document(document_id: str, auth: AuthContext = Depends(verify_toke
    """Get document by ID."""
    try:
        doc = await document_service.db.get_document(document_id, auth)
-        logger.info(f"Found document: {doc}")
+        logger.debug(f"Found document: {doc}")
        if not doc:
            raise HTTPException(status_code=404, detail="Document not found")
        return doc
@ -645,7 +645,7 @@ async def get_document_by_filename(filename: str, auth: AuthContext = Depends(ve
    """Get document by filename."""
    try:
        doc = await document_service.db.get_document_by_filename(filename, auth)
-        logger.info(f"Found document by filename: {doc}")
+        logger.debug(f"Found document by filename: {doc}")
        if not doc:
            raise HTTPException(status_code=404, detail=f"Document with filename '{filename}' not found")
        return doc
@ -1152,7 +1152,7 @@ async def generate_cloud_uri(
        user_id = request.user_id
        expiry_days = request.expiry_days

-        logger.info(f"Generating cloud URI for app_id={app_id}, name={name}, user_id={user_id}")
+        logger.debug(f"Generating cloud URI for app_id={app_id}, name={name}, user_id={user_id}")

        # Verify authorization header before proceeding
        if not authorization:
@ -1199,7 +1199,7 @@ async def generate_cloud_uri(
        uri = await user_service.generate_cloud_uri(user_id, app_id, name, expiry_days)

        if not uri:
-            logger.info("Application limit reached for this account tier with user_id: %s", user_id)
+            logger.debug("Application limit reached for this account tier with user_id: %s", user_id)
            raise HTTPException(
                status_code=403,
                detail="Application limit reached for this account tier"
--- a/core/services/document_service.py
+++ b/core/services/document_service.py
@ -121,9 +121,9 @@ class DocumentService:
            ) if search_multi else []
        )

-        logger.info(f"Found {len(chunks)} similar chunks via regular embedding")
+        logger.debug(f"Found {len(chunks)} similar chunks via regular embedding")
        if use_colpali:
-            logger.info(
+            logger.debug(
                f"Found {len(chunks_multivector)} similar chunks via multivector embedding since we are also using colpali"
            )

@ -132,7 +132,7 @@ class DocumentService:
            chunks = await self.reranker.rerank(query, chunks)
            chunks.sort(key=lambda x: x.score, reverse=True)
            chunks = chunks[:k]
-            logger.info(f"Reranked {k*10} chunks and selected the top {k}")
+            logger.debug(f"Reranked {k*10} chunks and selected the top {k}")

        chunks = chunks_multivector + chunks

@ -350,7 +350,7 @@ class DocumentService:
                "user_id": [auth.user_id] if auth.user_id else [],  # Add user_id to access control for filtering (as a list)
            },
        )
-        logger.info(f"Created text document record with ID {doc.external_id}")
+        logger.debug(f"Created text document record with ID {doc.external_id}")

        # Apply rules if provided
        if rules:
@ -370,13 +370,13 @@ class DocumentService:
        chunks = await self.parser.split_text(content)
        if not chunks:
            raise ValueError("No content chunks extracted")
-        logger.info(f"Split processed text into {len(chunks)} chunks")
+        logger.debug(f"Split processed text into {len(chunks)} chunks")

        # Generate embeddings for chunks
        embeddings = await self.embedding_model.embed_for_ingestion(chunks)
-        logger.info(f"Generated {len(embeddings)} embeddings")
+        logger.debug(f"Generated {len(embeddings)} embeddings")
        chunk_objects = self._create_chunk_objects(doc.external_id, chunks, embeddings)
-        logger.info(f"Created {len(chunk_objects)} chunk objects")
+        logger.debug(f"Created {len(chunk_objects)} chunk objects")

        chunk_objects_multivector = []

@ -396,7 +396,7 @@ class DocumentService:

        # Store everything
        await self._store_chunks_and_doc(chunk_objects, doc, use_colpali, chunk_objects_multivector)
-        logger.info(f"Successfully stored text document {doc.external_id}")
+        logger.debug(f"Successfully stored text document {doc.external_id}")

        return doc

@ -452,7 +452,7 @@ class DocumentService:
        additional_metadata, text = await self.parser.parse_file_to_text(
            file_content, file.filename
        )
-        logger.info(f"Parsed file into text of length {len(text)}")
+        logger.debug(f"Parsed file into text of length {len(text)}")

        # Apply rules if provided
        if rules:
@ -479,7 +479,7 @@ class DocumentService:

        # Store full content
        doc.system_metadata["content"] = text
-        logger.info(f"Created file document record with ID {doc.external_id}")
+        logger.debug(f"Created file document record with ID {doc.external_id}")

        file_content_base64 = base64.b64encode(file_content).decode()
        # Store the original file
@ -487,33 +487,33 @@ class DocumentService:
            file_content_base64, doc.external_id, file.content_type
        )
        doc.storage_info = {"bucket": storage_info[0], "key": storage_info[1]}
-        logger.info(f"Stored file in bucket `{storage_info[0]}` with key `{storage_info[1]}`")
+        logger.debug(f"Stored file in bucket `{storage_info[0]}` with key `{storage_info[1]}`")

        # Split into chunks after all processing is done
        chunks = await self.parser.split_text(text)
        if not chunks:
            raise ValueError("No content chunks extracted")
-        logger.info(f"Split processed text into {len(chunks)} chunks")
+        logger.debug(f"Split processed text into {len(chunks)} chunks")

        # Generate embeddings for chunks
        embeddings = await self.embedding_model.embed_for_ingestion(chunks)
-        logger.info(f"Generated {len(embeddings)} embeddings")
+        logger.debug(f"Generated {len(embeddings)} embeddings")

        # Create and store chunk objects
        chunk_objects = self._create_chunk_objects(doc.external_id, chunks, embeddings)
-        logger.info(f"Created {len(chunk_objects)} chunk objects")
+        logger.debug(f"Created {len(chunk_objects)} chunk objects")

        chunk_objects_multivector = []
-        logger.info(f"use_colpali: {use_colpali}")
+        logger.debug(f"use_colpali: {use_colpali}")
        if use_colpali and self.colpali_embedding_model:
            chunks_multivector = self._create_chunks_multivector(
                file_type, file_content_base64, file_content, chunks
            )
-            logger.info(f"Created {len(chunks_multivector)} chunks for multivector embedding")
+            logger.debug(f"Created {len(chunks_multivector)} chunks for multivector embedding")
            colpali_embeddings = await self.colpali_embedding_model.embed_for_ingestion(
                chunks_multivector
            )
-            logger.info(f"Generated {len(colpali_embeddings)} embeddings for multivector embedding")
+            logger.debug(f"Generated {len(colpali_embeddings)} embeddings for multivector embedding")
            chunk_objects_multivector = self._create_chunk_objects(
                doc.external_id, chunks_multivector, colpali_embeddings
            )
@ -522,7 +522,7 @@ class DocumentService:
        doc.chunk_ids = await self._store_chunks_and_doc(
            chunk_objects, doc, use_colpali, chunk_objects_multivector
        )
-        logger.info(f"Successfully stored file document {doc.external_id}")
+        logger.debug(f"Successfully stored file document {doc.external_id}")

        return doc

@ -780,7 +780,7 @@ class DocumentService:
            ):
                doc_chunks[chunk.document_id] = chunk
        logger.info(f"Grouped chunks into {len(doc_chunks)} documents")
-        logger.info(f"Document chunks: {doc_chunks}")
+        logger.debug(f"Document chunks: {doc_chunks}")
        results = {}
        for doc_id, chunk in doc_chunks.items():
            # Get document metadata
--- a/core/vector_store/multi_vector_store.py
+++ b/core/vector_store/multi_vector_store.py
@ -207,7 +207,7 @@ class MultiVectorStore(BaseVectorStore):

            stored_ids.append(f"{chunk.document_id}-{chunk.chunk_number}")

-        logger.info(f"{len(stored_ids)} vector embeddings added successfully!")
+        logger.debug(f"{len(stored_ids)} vector embeddings added successfully!")
        return len(stored_ids) > 0, stored_ids

        # except Exception as e:
@ -303,7 +303,7 @@ class MultiVectorStore(BaseVectorStore):
            WHERE {where_clause}
        """
        
-        logger.info(f"Batch retrieving {len(chunk_identifiers)} chunks from multi-vector store")
+        logger.debug(f"Batch retrieving {len(chunk_identifiers)} chunks from multi-vector store")
        
        result = self.conn.execute(query).fetchall()
        
@ -325,7 +325,7 @@ class MultiVectorStore(BaseVectorStore):
            )
            chunks.append(chunk)
            
-        logger.info(f"Found {len(chunks)} chunks in batch retrieval from multi-vector store")
+        logger.debug(f"Found {len(chunks)} chunks in batch retrieval from multi-vector store")
        return chunks
    
    def close(self):
--- a/core/vector_store/pgvector_store.py
+++ b/core/vector_store/pgvector_store.py
@ -210,7 +210,7 @@ class PGVectorStore(BaseVectorStore):
                # Build query to find all matching chunks in a single query
                query = select(VectorEmbedding).where(or_condition)
                
-                logger.info(f"Batch retrieving {len(chunk_identifiers)} chunks with a single query")
+                logger.debug(f"Batch retrieving {len(chunk_identifiers)} chunks with a single query")
                
                # Execute query
                result = await session.execute(query)
@ -235,7 +235,7 @@ class PGVectorStore(BaseVectorStore):
                    )
                    chunks.append(chunk)
                
-                logger.info(f"Found {len(chunks)} chunks in batch retrieval")
+                logger.debug(f"Found {len(chunks)} chunks in batch retrieval")
                return chunks
                
        except Exception as e: