Update requirements, fix some docker connection issues

This commit is contained in:
Adityavardhan Agrawal 2025-04-22 11:39:30 -07:00
parent 3e5774b730
commit 2985493b66
4 changed files with 45 additions and 22 deletions

View File

@ -4,7 +4,6 @@ import logging
import os
import urllib.parse as up
from datetime import UTC, datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
from arq.connections import RedisSettings
@ -59,7 +58,8 @@ async def get_document_with_retry(document_service, document_id, auth, max_retri
attempt += 1
if attempt < max_retries:
logger.warning(
f"Document {document_id} not found on attempt {attempt}/{max_retries}. Retrying in {retry_delay}s..."
f"Document {document_id} not found on attempt {attempt}/{max_retries}. "
f"Retrying in {retry_delay}s..."
)
await asyncio.sleep(retry_delay)
retry_delay *= 1.5
@ -69,7 +69,8 @@ async def get_document_with_retry(document_service, document_id, auth, max_retri
error_msg = str(e)
if attempt < max_retries:
logger.warning(
f"Error retrieving document on attempt {attempt}/{max_retries}: {error_msg}. Retrying in {retry_delay}s..."
f"Error retrieving document on attempt {attempt}/{max_retries}: {error_msg}. "
f"Retrying in {retry_delay}s..."
)
await asyncio.sleep(retry_delay)
retry_delay *= 1.5
@ -392,12 +393,7 @@ async def startup(ctx):
logger.error("ColPali vector store initialization failed")
ctx["colpali_embedding_model"] = colpali_embedding_model
ctx["colpali_vector_store"] = colpali_vector_store
# Initialize cache factory for DocumentService (may not be used for ingestion)
from core.cache.llama_cache_factory import LlamaCacheFactory
cache_factory = LlamaCacheFactory(Path(settings.STORAGE_PATH))
ctx["cache_factory"] = cache_factory
ctx["cache_factory"] = None
# Initialize rules processor
rules_processor = RulesProcessor()
@ -414,7 +410,7 @@ async def startup(ctx):
vector_store=vector_store,
embedding_model=embedding_model,
parser=parser,
cache_factory=cache_factory,
cache_factory=None,
enable_colpali=settings.ENABLE_COLPALI,
colpali_embedding_model=colpali_embedding_model,
colpali_vector_store=colpali_vector_store,
@ -463,8 +459,8 @@ def redis_settings_from_env() -> RedisSettings:
# Use ARQ's supported parameters with optimized values for stability
# For high-volume ingestion (100+ documents), these settings help prevent timeouts
return RedisSettings(
host=url.hostname or os.getenv("REDIS_HOST", "127.0.0.1"),
port=url.port or int(os.getenv("REDIS_PORT", "6379")),
host=get_settings().REDIS_HOST,
port=get_settings().REDIS_PORT,
database=int(url.path.lstrip("/") or 0),
conn_timeout=5, # Increased connection timeout (seconds)
conn_retries=15, # More retries for transient connection issues

View File

@ -139,8 +139,14 @@ check_postgres() {\n\
# Check PostgreSQL\n\
check_postgres\n\
\n\
# Start the application with standard asyncio event loop\n\
exec uvicorn core.api:app --host $HOST --port $PORT --loop asyncio --http auto --ws auto --lifespan auto\n\
# Check if command arguments were passed ($# is the number of arguments)\n\
if [ $# -gt 0 ]; then\n\
# If arguments exist, execute them (e.g., execute "arq core.workers...")\n\
exec "$@"\n\
else\n\
# Otherwise, execute the default command (Uvicorn for the API)\n\
exec uvicorn core.api:app --host $HOST --port $PORT --loop asyncio --http auto --ws auto --lifespan auto\n\
fi\n\
' > /app/docker-entrypoint.sh && chmod +x /app/docker-entrypoint.sh
# Copy application code

View File

@ -74,7 +74,7 @@ $$ LANGUAGE SQL;
-- Create graphs table for knowledge graph functionality
CREATE TABLE IF NOT EXISTS graphs (
id VARCHAR PRIMARY KEY,
name VARCHAR UNIQUE,
name VARCHAR NOT NULL,
entities JSONB DEFAULT '[]',
relationships JSONB DEFAULT '[]',
graph_metadata JSONB DEFAULT '{}',
@ -86,5 +86,9 @@ CREATE TABLE IF NOT EXISTS graphs (
access_control JSONB DEFAULT '{"readers": [], "writers": [], "admins": []}'
);
-- Create index for graph name for faster lookups
-- Create index for graph name and owner for faster lookups
CREATE INDEX IF NOT EXISTS idx_graph_name ON graphs(name);
CREATE INDEX IF NOT EXISTS idx_graph_owner ON graphs USING gin(owner);
-- Create unique constraint on name scoped by owner
CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_owner_name ON graphs((owner->>'id'), name);

View File

@ -9,6 +9,7 @@ anthropic==0.42.0
antlr4-python3-runtime==4.9.3
anyio==4.3.0
appnope==0.1.4
arq==0.25.0
asgiref==3.8.1
assemblyai==0.36.0
asttokens==2.4.1
@ -53,6 +54,7 @@ diskcache==5.6.3
distlib==0.3.9
distro==1.9.0
dnspython==2.6.1
docstring_parser==0.16
docutils==0.21.2
ecdsa==0.19.0
effdet==0.4.1
@ -67,6 +69,7 @@ filelock==3.15.4
filetype==1.2.0
fireworks-ai==0.15.12
FlagEmbedding==1.3.4
flake8==7.0.0
flatbuffers==24.3.25
fonttools==4.53.1
frozenlist==1.4.1
@ -88,6 +91,9 @@ greenlet==3.1.1
grpcio==1.65.4
grpcio-status==1.65.4
h11==0.14.0
h2==4.2.0
hiredis==3.1.0
hpack==4.1.0
html2text==2024.2.26
htmldate==1.9.3
httpcore==1.0.5
@ -98,22 +104,25 @@ httpx-sse==0.4.0
httpx-ws==0.7.1
huggingface-hub==0.27.0
humanfriendly==10.0
hyperframe==6.1.0
identify==2.6.3
idna==3.7
ijson==3.3.0
importlib_metadata==8.5.0
iniconfig==2.0.0
inscriptis==2.5.0
instructor==1.7.9
iopath==0.1.10
ipykernel==6.29.5
ipython==8.26.0
ir_datasets==0.5.9
isort==6.0.1
jaraco.classes==3.4.0
jaraco.context==6.0.1
jaraco.functools==4.1.0
jedi==0.19.1
Jinja2==3.1.4
# jiter==0.5.0
jiter==0.8.2
jmespath==1.0.1
joblib==1.4.2
jsonpatch==1.33
@ -133,6 +142,7 @@ langdetect==1.0.9
langsmith==0.3.8
lap==0.5.12
layoutparser==0.3.4
litellm==1.65.4.post1
llama_cpp_python==0.3.5
llvmlite==0.43.0
lmnr==0.4.60
@ -148,12 +158,15 @@ MarkupSafe==2.1.5
marshmallow==3.21.3
matplotlib==3.9.2
matplotlib-inline==0.1.7
mccabe==0.7.0
mdurl==0.1.2
monotonic==1.6
more-itertools==10.5.0
motor==3.4.0
mpmath==1.3.0
multidict==6.0.5
multiprocess==0.70.16
mypy==1.15.0
mypy-boto3-s3==1.34.138
mypy-extensions==1.0.0
narwhals==1.26.0
@ -162,7 +175,6 @@ networkx==3.3
nh3==0.2.20
nltk==3.8.1
nodeenv==1.9.1
# numba==0.60.0
numpy==1.26.4
olefile==0.47
ollama==0.4.7
@ -228,19 +240,24 @@ pyarrow-hotfix==0.6
pyasn1==0.6.0
pyasn1_modules==0.4.0
pycocotools==2.0.8
pycodestyle==2.11.1
pycparser==2.22
pydantic==2.10.6
pydantic-settings==2.4.0
pydantic_core==2.27.2
pydeck==0.9.1
pyee==12.1.1
pyflakes==3.2.0
Pygments==2.18.0
PyJWT==2.9.0
pymongo==4.7.1
pypandoc==1.13
pyparsing==3.1.2
pypdf==4.3.1
pypdfium2==4.30.0
pyproject-flake8==7.0.0
pyproject_hooks==1.2.0
pyright==1.1.399
pytesseract==0.3.10
pytest==8.2.0
pytest-asyncio==0.24.0
@ -249,6 +266,7 @@ python-docx==1.1.2
python-dotenv==1.0.1
python-iso639==2024.4.27
python-jose==3.3.0
python-magic==0.4.27
python-multipart==0.0.9
python-oxmsg==0.0.1
python-pptx==0.6.23
@ -259,6 +277,7 @@ pyzmq==26.2.0
rank-bm25==0.2.2
rapidfuzz==3.9.5
readme_renderer==44.0
redis==5.2.1
referencing==0.36.2
regex==2024.7.24
requests==2.32.3
@ -267,6 +286,7 @@ rfc3986==2.0.0
rich==13.7.1
rpds-py==0.22.3
rsa==4.9
ruff==0.11.5
s3transfer==0.11.2
safetensors==0.4.4
scikit-learn==1.6.0
@ -315,7 +335,7 @@ ujson==5.9.0
ultralytics==8.3.55
ultralytics-thop==2.0.13
unlzw3==0.2.3
unstructured==0.16.0
unstructured==0.15.0
unstructured-client==0.24.1
unstructured-inference==0.7.36
unstructured.pytesseract==0.3.12
@ -338,6 +358,3 @@ yarl==1.9.4
zipp==3.21.0
zlib-state==0.1.9
zstandard==0.23.0
litellm==1.65.4.post1
instructor==1.7.9
arq==0.25.0