mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
Update requirements, fix some docker connection issues
This commit is contained in:
parent
3e5774b730
commit
2985493b66
@ -4,7 +4,6 @@ import logging
|
||||
import os
|
||||
import urllib.parse as up
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from arq.connections import RedisSettings
|
||||
@ -59,7 +58,8 @@ async def get_document_with_retry(document_service, document_id, auth, max_retri
|
||||
attempt += 1
|
||||
if attempt < max_retries:
|
||||
logger.warning(
|
||||
f"Document {document_id} not found on attempt {attempt}/{max_retries}. Retrying in {retry_delay}s..."
|
||||
f"Document {document_id} not found on attempt {attempt}/{max_retries}. "
|
||||
f"Retrying in {retry_delay}s..."
|
||||
)
|
||||
await asyncio.sleep(retry_delay)
|
||||
retry_delay *= 1.5
|
||||
@ -69,7 +69,8 @@ async def get_document_with_retry(document_service, document_id, auth, max_retri
|
||||
error_msg = str(e)
|
||||
if attempt < max_retries:
|
||||
logger.warning(
|
||||
f"Error retrieving document on attempt {attempt}/{max_retries}: {error_msg}. Retrying in {retry_delay}s..."
|
||||
f"Error retrieving document on attempt {attempt}/{max_retries}: {error_msg}. "
|
||||
f"Retrying in {retry_delay}s..."
|
||||
)
|
||||
await asyncio.sleep(retry_delay)
|
||||
retry_delay *= 1.5
|
||||
@ -392,12 +393,7 @@ async def startup(ctx):
|
||||
logger.error("ColPali vector store initialization failed")
|
||||
ctx["colpali_embedding_model"] = colpali_embedding_model
|
||||
ctx["colpali_vector_store"] = colpali_vector_store
|
||||
|
||||
# Initialize cache factory for DocumentService (may not be used for ingestion)
|
||||
from core.cache.llama_cache_factory import LlamaCacheFactory
|
||||
|
||||
cache_factory = LlamaCacheFactory(Path(settings.STORAGE_PATH))
|
||||
ctx["cache_factory"] = cache_factory
|
||||
ctx["cache_factory"] = None
|
||||
|
||||
# Initialize rules processor
|
||||
rules_processor = RulesProcessor()
|
||||
@ -414,7 +410,7 @@ async def startup(ctx):
|
||||
vector_store=vector_store,
|
||||
embedding_model=embedding_model,
|
||||
parser=parser,
|
||||
cache_factory=cache_factory,
|
||||
cache_factory=None,
|
||||
enable_colpali=settings.ENABLE_COLPALI,
|
||||
colpali_embedding_model=colpali_embedding_model,
|
||||
colpali_vector_store=colpali_vector_store,
|
||||
@ -463,8 +459,8 @@ def redis_settings_from_env() -> RedisSettings:
|
||||
# Use ARQ's supported parameters with optimized values for stability
|
||||
# For high-volume ingestion (100+ documents), these settings help prevent timeouts
|
||||
return RedisSettings(
|
||||
host=url.hostname or os.getenv("REDIS_HOST", "127.0.0.1"),
|
||||
port=url.port or int(os.getenv("REDIS_PORT", "6379")),
|
||||
host=get_settings().REDIS_HOST,
|
||||
port=get_settings().REDIS_PORT,
|
||||
database=int(url.path.lstrip("/") or 0),
|
||||
conn_timeout=5, # Increased connection timeout (seconds)
|
||||
conn_retries=15, # More retries for transient connection issues
|
||||
|
10
dockerfile
10
dockerfile
@ -139,8 +139,14 @@ check_postgres() {\n\
|
||||
# Check PostgreSQL\n\
|
||||
check_postgres\n\
|
||||
\n\
|
||||
# Start the application with standard asyncio event loop\n\
|
||||
exec uvicorn core.api:app --host $HOST --port $PORT --loop asyncio --http auto --ws auto --lifespan auto\n\
|
||||
# Check if command arguments were passed ($# is the number of arguments)\n\
|
||||
if [ $# -gt 0 ]; then\n\
|
||||
# If arguments exist, execute them (e.g., execute "arq core.workers...")\n\
|
||||
exec "$@"\n\
|
||||
else\n\
|
||||
# Otherwise, execute the default command (Uvicorn for the API)\n\
|
||||
exec uvicorn core.api:app --host $HOST --port $PORT --loop asyncio --http auto --ws auto --lifespan auto\n\
|
||||
fi\n\
|
||||
' > /app/docker-entrypoint.sh && chmod +x /app/docker-entrypoint.sh
|
||||
|
||||
# Copy application code
|
||||
|
8
init.sql
8
init.sql
@ -74,7 +74,7 @@ $$ LANGUAGE SQL;
|
||||
-- Create graphs table for knowledge graph functionality
|
||||
CREATE TABLE IF NOT EXISTS graphs (
|
||||
id VARCHAR PRIMARY KEY,
|
||||
name VARCHAR UNIQUE,
|
||||
name VARCHAR NOT NULL,
|
||||
entities JSONB DEFAULT '[]',
|
||||
relationships JSONB DEFAULT '[]',
|
||||
graph_metadata JSONB DEFAULT '{}',
|
||||
@ -86,5 +86,9 @@ CREATE TABLE IF NOT EXISTS graphs (
|
||||
access_control JSONB DEFAULT '{"readers": [], "writers": [], "admins": []}'
|
||||
);
|
||||
|
||||
-- Create index for graph name for faster lookups
|
||||
-- Create index for graph name and owner for faster lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_graph_name ON graphs(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_graph_owner ON graphs USING gin(owner);
|
||||
|
||||
-- Create unique constraint on name scoped by owner
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_owner_name ON graphs((owner->>'id'), name);
|
||||
|
@ -9,6 +9,7 @@ anthropic==0.42.0
|
||||
antlr4-python3-runtime==4.9.3
|
||||
anyio==4.3.0
|
||||
appnope==0.1.4
|
||||
arq==0.25.0
|
||||
asgiref==3.8.1
|
||||
assemblyai==0.36.0
|
||||
asttokens==2.4.1
|
||||
@ -53,6 +54,7 @@ diskcache==5.6.3
|
||||
distlib==0.3.9
|
||||
distro==1.9.0
|
||||
dnspython==2.6.1
|
||||
docstring_parser==0.16
|
||||
docutils==0.21.2
|
||||
ecdsa==0.19.0
|
||||
effdet==0.4.1
|
||||
@ -67,6 +69,7 @@ filelock==3.15.4
|
||||
filetype==1.2.0
|
||||
fireworks-ai==0.15.12
|
||||
FlagEmbedding==1.3.4
|
||||
flake8==7.0.0
|
||||
flatbuffers==24.3.25
|
||||
fonttools==4.53.1
|
||||
frozenlist==1.4.1
|
||||
@ -88,6 +91,9 @@ greenlet==3.1.1
|
||||
grpcio==1.65.4
|
||||
grpcio-status==1.65.4
|
||||
h11==0.14.0
|
||||
h2==4.2.0
|
||||
hiredis==3.1.0
|
||||
hpack==4.1.0
|
||||
html2text==2024.2.26
|
||||
htmldate==1.9.3
|
||||
httpcore==1.0.5
|
||||
@ -98,22 +104,25 @@ httpx-sse==0.4.0
|
||||
httpx-ws==0.7.1
|
||||
huggingface-hub==0.27.0
|
||||
humanfriendly==10.0
|
||||
hyperframe==6.1.0
|
||||
identify==2.6.3
|
||||
idna==3.7
|
||||
ijson==3.3.0
|
||||
importlib_metadata==8.5.0
|
||||
iniconfig==2.0.0
|
||||
inscriptis==2.5.0
|
||||
instructor==1.7.9
|
||||
iopath==0.1.10
|
||||
ipykernel==6.29.5
|
||||
ipython==8.26.0
|
||||
ir_datasets==0.5.9
|
||||
isort==6.0.1
|
||||
jaraco.classes==3.4.0
|
||||
jaraco.context==6.0.1
|
||||
jaraco.functools==4.1.0
|
||||
jedi==0.19.1
|
||||
Jinja2==3.1.4
|
||||
# jiter==0.5.0
|
||||
jiter==0.8.2
|
||||
jmespath==1.0.1
|
||||
joblib==1.4.2
|
||||
jsonpatch==1.33
|
||||
@ -133,6 +142,7 @@ langdetect==1.0.9
|
||||
langsmith==0.3.8
|
||||
lap==0.5.12
|
||||
layoutparser==0.3.4
|
||||
litellm==1.65.4.post1
|
||||
llama_cpp_python==0.3.5
|
||||
llvmlite==0.43.0
|
||||
lmnr==0.4.60
|
||||
@ -148,12 +158,15 @@ MarkupSafe==2.1.5
|
||||
marshmallow==3.21.3
|
||||
matplotlib==3.9.2
|
||||
matplotlib-inline==0.1.7
|
||||
mccabe==0.7.0
|
||||
mdurl==0.1.2
|
||||
monotonic==1.6
|
||||
more-itertools==10.5.0
|
||||
motor==3.4.0
|
||||
mpmath==1.3.0
|
||||
multidict==6.0.5
|
||||
multiprocess==0.70.16
|
||||
mypy==1.15.0
|
||||
mypy-boto3-s3==1.34.138
|
||||
mypy-extensions==1.0.0
|
||||
narwhals==1.26.0
|
||||
@ -162,7 +175,6 @@ networkx==3.3
|
||||
nh3==0.2.20
|
||||
nltk==3.8.1
|
||||
nodeenv==1.9.1
|
||||
# numba==0.60.0
|
||||
numpy==1.26.4
|
||||
olefile==0.47
|
||||
ollama==0.4.7
|
||||
@ -228,19 +240,24 @@ pyarrow-hotfix==0.6
|
||||
pyasn1==0.6.0
|
||||
pyasn1_modules==0.4.0
|
||||
pycocotools==2.0.8
|
||||
pycodestyle==2.11.1
|
||||
pycparser==2.22
|
||||
pydantic==2.10.6
|
||||
pydantic-settings==2.4.0
|
||||
pydantic_core==2.27.2
|
||||
pydeck==0.9.1
|
||||
pyee==12.1.1
|
||||
pyflakes==3.2.0
|
||||
Pygments==2.18.0
|
||||
PyJWT==2.9.0
|
||||
pymongo==4.7.1
|
||||
pypandoc==1.13
|
||||
pyparsing==3.1.2
|
||||
pypdf==4.3.1
|
||||
pypdfium2==4.30.0
|
||||
pyproject-flake8==7.0.0
|
||||
pyproject_hooks==1.2.0
|
||||
pyright==1.1.399
|
||||
pytesseract==0.3.10
|
||||
pytest==8.2.0
|
||||
pytest-asyncio==0.24.0
|
||||
@ -249,6 +266,7 @@ python-docx==1.1.2
|
||||
python-dotenv==1.0.1
|
||||
python-iso639==2024.4.27
|
||||
python-jose==3.3.0
|
||||
python-magic==0.4.27
|
||||
python-multipart==0.0.9
|
||||
python-oxmsg==0.0.1
|
||||
python-pptx==0.6.23
|
||||
@ -259,6 +277,7 @@ pyzmq==26.2.0
|
||||
rank-bm25==0.2.2
|
||||
rapidfuzz==3.9.5
|
||||
readme_renderer==44.0
|
||||
redis==5.2.1
|
||||
referencing==0.36.2
|
||||
regex==2024.7.24
|
||||
requests==2.32.3
|
||||
@ -267,6 +286,7 @@ rfc3986==2.0.0
|
||||
rich==13.7.1
|
||||
rpds-py==0.22.3
|
||||
rsa==4.9
|
||||
ruff==0.11.5
|
||||
s3transfer==0.11.2
|
||||
safetensors==0.4.4
|
||||
scikit-learn==1.6.0
|
||||
@ -315,7 +335,7 @@ ujson==5.9.0
|
||||
ultralytics==8.3.55
|
||||
ultralytics-thop==2.0.13
|
||||
unlzw3==0.2.3
|
||||
unstructured==0.16.0
|
||||
unstructured==0.15.0
|
||||
unstructured-client==0.24.1
|
||||
unstructured-inference==0.7.36
|
||||
unstructured.pytesseract==0.3.12
|
||||
@ -338,6 +358,3 @@ yarl==1.9.4
|
||||
zipp==3.21.0
|
||||
zlib-state==0.1.9
|
||||
zstandard==0.23.0
|
||||
litellm==1.65.4.post1
|
||||
instructor==1.7.9
|
||||
arq==0.25.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user