Reduce extra logging, change to debugs

This commit is contained in:
Adityavardhan Agrawal 2025-03-27 20:05:27 -07:00
parent 7eb5887d2f
commit 6ef3ec207e
4 changed files with 29 additions and 29 deletions

View File

@ -355,7 +355,7 @@ async def ingest_file(
"use_colpali": use_colpali,
},
):
logger.info(f"API: Ingesting file with use_colpali: {use_colpali}")
logger.debug(f"API: Ingesting file with use_colpali: {use_colpali}")
return await document_service.ingest_file(
file=file,
metadata=metadata_dict,
@ -631,7 +631,7 @@ async def get_document(document_id: str, auth: AuthContext = Depends(verify_toke
"""Get document by ID."""
try:
doc = await document_service.db.get_document(document_id, auth)
logger.info(f"Found document: {doc}")
logger.debug(f"Found document: {doc}")
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
return doc
@ -645,7 +645,7 @@ async def get_document_by_filename(filename: str, auth: AuthContext = Depends(ve
"""Get document by filename."""
try:
doc = await document_service.db.get_document_by_filename(filename, auth)
logger.info(f"Found document by filename: {doc}")
logger.debug(f"Found document by filename: {doc}")
if not doc:
raise HTTPException(status_code=404, detail=f"Document with filename '{filename}' not found")
return doc
@ -1152,7 +1152,7 @@ async def generate_cloud_uri(
user_id = request.user_id
expiry_days = request.expiry_days
logger.info(f"Generating cloud URI for app_id={app_id}, name={name}, user_id={user_id}")
logger.debug(f"Generating cloud URI for app_id={app_id}, name={name}, user_id={user_id}")
# Verify authorization header before proceeding
if not authorization:
@ -1199,7 +1199,7 @@ async def generate_cloud_uri(
uri = await user_service.generate_cloud_uri(user_id, app_id, name, expiry_days)
if not uri:
logger.info("Application limit reached for this account tier with user_id: %s", user_id)
logger.debug("Application limit reached for this account tier with user_id: %s", user_id)
raise HTTPException(
status_code=403,
detail="Application limit reached for this account tier"

View File

@ -121,9 +121,9 @@ class DocumentService:
) if search_multi else []
)
logger.info(f"Found {len(chunks)} similar chunks via regular embedding")
logger.debug(f"Found {len(chunks)} similar chunks via regular embedding")
if use_colpali:
logger.info(
logger.debug(
f"Found {len(chunks_multivector)} similar chunks via multivector embedding since we are also using colpali"
)
@ -132,7 +132,7 @@ class DocumentService:
chunks = await self.reranker.rerank(query, chunks)
chunks.sort(key=lambda x: x.score, reverse=True)
chunks = chunks[:k]
logger.info(f"Reranked {k*10} chunks and selected the top {k}")
logger.debug(f"Reranked {k*10} chunks and selected the top {k}")
chunks = chunks_multivector + chunks
@ -350,7 +350,7 @@ class DocumentService:
"user_id": [auth.user_id] if auth.user_id else [], # Add user_id to access control for filtering (as a list)
},
)
logger.info(f"Created text document record with ID {doc.external_id}")
logger.debug(f"Created text document record with ID {doc.external_id}")
# Apply rules if provided
if rules:
@ -370,13 +370,13 @@ class DocumentService:
chunks = await self.parser.split_text(content)
if not chunks:
raise ValueError("No content chunks extracted")
logger.info(f"Split processed text into {len(chunks)} chunks")
logger.debug(f"Split processed text into {len(chunks)} chunks")
# Generate embeddings for chunks
embeddings = await self.embedding_model.embed_for_ingestion(chunks)
logger.info(f"Generated {len(embeddings)} embeddings")
logger.debug(f"Generated {len(embeddings)} embeddings")
chunk_objects = self._create_chunk_objects(doc.external_id, chunks, embeddings)
logger.info(f"Created {len(chunk_objects)} chunk objects")
logger.debug(f"Created {len(chunk_objects)} chunk objects")
chunk_objects_multivector = []
@ -396,7 +396,7 @@ class DocumentService:
# Store everything
await self._store_chunks_and_doc(chunk_objects, doc, use_colpali, chunk_objects_multivector)
logger.info(f"Successfully stored text document {doc.external_id}")
logger.debug(f"Successfully stored text document {doc.external_id}")
return doc
@ -452,7 +452,7 @@ class DocumentService:
additional_metadata, text = await self.parser.parse_file_to_text(
file_content, file.filename
)
logger.info(f"Parsed file into text of length {len(text)}")
logger.debug(f"Parsed file into text of length {len(text)}")
# Apply rules if provided
if rules:
@ -479,7 +479,7 @@ class DocumentService:
# Store full content
doc.system_metadata["content"] = text
logger.info(f"Created file document record with ID {doc.external_id}")
logger.debug(f"Created file document record with ID {doc.external_id}")
file_content_base64 = base64.b64encode(file_content).decode()
# Store the original file
@ -487,33 +487,33 @@ class DocumentService:
file_content_base64, doc.external_id, file.content_type
)
doc.storage_info = {"bucket": storage_info[0], "key": storage_info[1]}
logger.info(f"Stored file in bucket `{storage_info[0]}` with key `{storage_info[1]}`")
logger.debug(f"Stored file in bucket `{storage_info[0]}` with key `{storage_info[1]}`")
# Split into chunks after all processing is done
chunks = await self.parser.split_text(text)
if not chunks:
raise ValueError("No content chunks extracted")
logger.info(f"Split processed text into {len(chunks)} chunks")
logger.debug(f"Split processed text into {len(chunks)} chunks")
# Generate embeddings for chunks
embeddings = await self.embedding_model.embed_for_ingestion(chunks)
logger.info(f"Generated {len(embeddings)} embeddings")
logger.debug(f"Generated {len(embeddings)} embeddings")
# Create and store chunk objects
chunk_objects = self._create_chunk_objects(doc.external_id, chunks, embeddings)
logger.info(f"Created {len(chunk_objects)} chunk objects")
logger.debug(f"Created {len(chunk_objects)} chunk objects")
chunk_objects_multivector = []
logger.info(f"use_colpali: {use_colpali}")
logger.debug(f"use_colpali: {use_colpali}")
if use_colpali and self.colpali_embedding_model:
chunks_multivector = self._create_chunks_multivector(
file_type, file_content_base64, file_content, chunks
)
logger.info(f"Created {len(chunks_multivector)} chunks for multivector embedding")
logger.debug(f"Created {len(chunks_multivector)} chunks for multivector embedding")
colpali_embeddings = await self.colpali_embedding_model.embed_for_ingestion(
chunks_multivector
)
logger.info(f"Generated {len(colpali_embeddings)} embeddings for multivector embedding")
logger.debug(f"Generated {len(colpali_embeddings)} embeddings for multivector embedding")
chunk_objects_multivector = self._create_chunk_objects(
doc.external_id, chunks_multivector, colpali_embeddings
)
@ -522,7 +522,7 @@ class DocumentService:
doc.chunk_ids = await self._store_chunks_and_doc(
chunk_objects, doc, use_colpali, chunk_objects_multivector
)
logger.info(f"Successfully stored file document {doc.external_id}")
logger.debug(f"Successfully stored file document {doc.external_id}")
return doc
@ -780,7 +780,7 @@ class DocumentService:
):
doc_chunks[chunk.document_id] = chunk
logger.info(f"Grouped chunks into {len(doc_chunks)} documents")
logger.info(f"Document chunks: {doc_chunks}")
logger.debug(f"Document chunks: {doc_chunks}")
results = {}
for doc_id, chunk in doc_chunks.items():
# Get document metadata

View File

@ -207,7 +207,7 @@ class MultiVectorStore(BaseVectorStore):
stored_ids.append(f"{chunk.document_id}-{chunk.chunk_number}")
logger.info(f"{len(stored_ids)} vector embeddings added successfully!")
logger.debug(f"{len(stored_ids)} vector embeddings added successfully!")
return len(stored_ids) > 0, stored_ids
# except Exception as e:
@ -303,7 +303,7 @@ class MultiVectorStore(BaseVectorStore):
WHERE {where_clause}
"""
logger.info(f"Batch retrieving {len(chunk_identifiers)} chunks from multi-vector store")
logger.debug(f"Batch retrieving {len(chunk_identifiers)} chunks from multi-vector store")
result = self.conn.execute(query).fetchall()
@ -325,7 +325,7 @@ class MultiVectorStore(BaseVectorStore):
)
chunks.append(chunk)
logger.info(f"Found {len(chunks)} chunks in batch retrieval from multi-vector store")
logger.debug(f"Found {len(chunks)} chunks in batch retrieval from multi-vector store")
return chunks
def close(self):

View File

@ -210,7 +210,7 @@ class PGVectorStore(BaseVectorStore):
# Build query to find all matching chunks in a single query
query = select(VectorEmbedding).where(or_condition)
logger.info(f"Batch retrieving {len(chunk_identifiers)} chunks with a single query")
logger.debug(f"Batch retrieving {len(chunk_identifiers)} chunks with a single query")
# Execute query
result = await session.execute(query)
@ -235,7 +235,7 @@ class PGVectorStore(BaseVectorStore):
)
chunks.append(chunk)
logger.info(f"Found {len(chunks)} chunks in batch retrieval")
logger.debug(f"Found {len(chunks)} chunks in batch retrieval")
return chunks
except Exception as e: