mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
Update requirements, fix some docker connection issues
This commit is contained in:
parent
3e5774b730
commit
2985493b66
@ -4,7 +4,6 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import urllib.parse as up
|
import urllib.parse as up
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from arq.connections import RedisSettings
|
from arq.connections import RedisSettings
|
||||||
@ -59,7 +58,8 @@ async def get_document_with_retry(document_service, document_id, auth, max_retri
|
|||||||
attempt += 1
|
attempt += 1
|
||||||
if attempt < max_retries:
|
if attempt < max_retries:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Document {document_id} not found on attempt {attempt}/{max_retries}. Retrying in {retry_delay}s..."
|
f"Document {document_id} not found on attempt {attempt}/{max_retries}. "
|
||||||
|
f"Retrying in {retry_delay}s..."
|
||||||
)
|
)
|
||||||
await asyncio.sleep(retry_delay)
|
await asyncio.sleep(retry_delay)
|
||||||
retry_delay *= 1.5
|
retry_delay *= 1.5
|
||||||
@ -69,7 +69,8 @@ async def get_document_with_retry(document_service, document_id, auth, max_retri
|
|||||||
error_msg = str(e)
|
error_msg = str(e)
|
||||||
if attempt < max_retries:
|
if attempt < max_retries:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Error retrieving document on attempt {attempt}/{max_retries}: {error_msg}. Retrying in {retry_delay}s..."
|
f"Error retrieving document on attempt {attempt}/{max_retries}: {error_msg}. "
|
||||||
|
f"Retrying in {retry_delay}s..."
|
||||||
)
|
)
|
||||||
await asyncio.sleep(retry_delay)
|
await asyncio.sleep(retry_delay)
|
||||||
retry_delay *= 1.5
|
retry_delay *= 1.5
|
||||||
@ -392,12 +393,7 @@ async def startup(ctx):
|
|||||||
logger.error("ColPali vector store initialization failed")
|
logger.error("ColPali vector store initialization failed")
|
||||||
ctx["colpali_embedding_model"] = colpali_embedding_model
|
ctx["colpali_embedding_model"] = colpali_embedding_model
|
||||||
ctx["colpali_vector_store"] = colpali_vector_store
|
ctx["colpali_vector_store"] = colpali_vector_store
|
||||||
|
ctx["cache_factory"] = None
|
||||||
# Initialize cache factory for DocumentService (may not be used for ingestion)
|
|
||||||
from core.cache.llama_cache_factory import LlamaCacheFactory
|
|
||||||
|
|
||||||
cache_factory = LlamaCacheFactory(Path(settings.STORAGE_PATH))
|
|
||||||
ctx["cache_factory"] = cache_factory
|
|
||||||
|
|
||||||
# Initialize rules processor
|
# Initialize rules processor
|
||||||
rules_processor = RulesProcessor()
|
rules_processor = RulesProcessor()
|
||||||
@ -414,7 +410,7 @@ async def startup(ctx):
|
|||||||
vector_store=vector_store,
|
vector_store=vector_store,
|
||||||
embedding_model=embedding_model,
|
embedding_model=embedding_model,
|
||||||
parser=parser,
|
parser=parser,
|
||||||
cache_factory=cache_factory,
|
cache_factory=None,
|
||||||
enable_colpali=settings.ENABLE_COLPALI,
|
enable_colpali=settings.ENABLE_COLPALI,
|
||||||
colpali_embedding_model=colpali_embedding_model,
|
colpali_embedding_model=colpali_embedding_model,
|
||||||
colpali_vector_store=colpali_vector_store,
|
colpali_vector_store=colpali_vector_store,
|
||||||
@ -463,8 +459,8 @@ def redis_settings_from_env() -> RedisSettings:
|
|||||||
# Use ARQ's supported parameters with optimized values for stability
|
# Use ARQ's supported parameters with optimized values for stability
|
||||||
# For high-volume ingestion (100+ documents), these settings help prevent timeouts
|
# For high-volume ingestion (100+ documents), these settings help prevent timeouts
|
||||||
return RedisSettings(
|
return RedisSettings(
|
||||||
host=url.hostname or os.getenv("REDIS_HOST", "127.0.0.1"),
|
host=get_settings().REDIS_HOST,
|
||||||
port=url.port or int(os.getenv("REDIS_PORT", "6379")),
|
port=get_settings().REDIS_PORT,
|
||||||
database=int(url.path.lstrip("/") or 0),
|
database=int(url.path.lstrip("/") or 0),
|
||||||
conn_timeout=5, # Increased connection timeout (seconds)
|
conn_timeout=5, # Increased connection timeout (seconds)
|
||||||
conn_retries=15, # More retries for transient connection issues
|
conn_retries=15, # More retries for transient connection issues
|
||||||
|
10
dockerfile
10
dockerfile
@ -139,8 +139,14 @@ check_postgres() {\n\
|
|||||||
# Check PostgreSQL\n\
|
# Check PostgreSQL\n\
|
||||||
check_postgres\n\
|
check_postgres\n\
|
||||||
\n\
|
\n\
|
||||||
# Start the application with standard asyncio event loop\n\
|
# Check if command arguments were passed ($# is the number of arguments)\n\
|
||||||
exec uvicorn core.api:app --host $HOST --port $PORT --loop asyncio --http auto --ws auto --lifespan auto\n\
|
if [ $# -gt 0 ]; then\n\
|
||||||
|
# If arguments exist, execute them (e.g., execute "arq core.workers...")\n\
|
||||||
|
exec "$@"\n\
|
||||||
|
else\n\
|
||||||
|
# Otherwise, execute the default command (Uvicorn for the API)\n\
|
||||||
|
exec uvicorn core.api:app --host $HOST --port $PORT --loop asyncio --http auto --ws auto --lifespan auto\n\
|
||||||
|
fi\n\
|
||||||
' > /app/docker-entrypoint.sh && chmod +x /app/docker-entrypoint.sh
|
' > /app/docker-entrypoint.sh && chmod +x /app/docker-entrypoint.sh
|
||||||
|
|
||||||
# Copy application code
|
# Copy application code
|
||||||
|
8
init.sql
8
init.sql
@ -74,7 +74,7 @@ $$ LANGUAGE SQL;
|
|||||||
-- Create graphs table for knowledge graph functionality
|
-- Create graphs table for knowledge graph functionality
|
||||||
CREATE TABLE IF NOT EXISTS graphs (
|
CREATE TABLE IF NOT EXISTS graphs (
|
||||||
id VARCHAR PRIMARY KEY,
|
id VARCHAR PRIMARY KEY,
|
||||||
name VARCHAR UNIQUE,
|
name VARCHAR NOT NULL,
|
||||||
entities JSONB DEFAULT '[]',
|
entities JSONB DEFAULT '[]',
|
||||||
relationships JSONB DEFAULT '[]',
|
relationships JSONB DEFAULT '[]',
|
||||||
graph_metadata JSONB DEFAULT '{}',
|
graph_metadata JSONB DEFAULT '{}',
|
||||||
@ -86,5 +86,9 @@ CREATE TABLE IF NOT EXISTS graphs (
|
|||||||
access_control JSONB DEFAULT '{"readers": [], "writers": [], "admins": []}'
|
access_control JSONB DEFAULT '{"readers": [], "writers": [], "admins": []}'
|
||||||
);
|
);
|
||||||
|
|
||||||
-- Create index for graph name for faster lookups
|
-- Create index for graph name and owner for faster lookups
|
||||||
CREATE INDEX IF NOT EXISTS idx_graph_name ON graphs(name);
|
CREATE INDEX IF NOT EXISTS idx_graph_name ON graphs(name);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_graph_owner ON graphs USING gin(owner);
|
||||||
|
|
||||||
|
-- Create unique constraint on name scoped by owner
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_owner_name ON graphs((owner->>'id'), name);
|
||||||
|
@ -9,6 +9,7 @@ anthropic==0.42.0
|
|||||||
antlr4-python3-runtime==4.9.3
|
antlr4-python3-runtime==4.9.3
|
||||||
anyio==4.3.0
|
anyio==4.3.0
|
||||||
appnope==0.1.4
|
appnope==0.1.4
|
||||||
|
arq==0.25.0
|
||||||
asgiref==3.8.1
|
asgiref==3.8.1
|
||||||
assemblyai==0.36.0
|
assemblyai==0.36.0
|
||||||
asttokens==2.4.1
|
asttokens==2.4.1
|
||||||
@ -53,6 +54,7 @@ diskcache==5.6.3
|
|||||||
distlib==0.3.9
|
distlib==0.3.9
|
||||||
distro==1.9.0
|
distro==1.9.0
|
||||||
dnspython==2.6.1
|
dnspython==2.6.1
|
||||||
|
docstring_parser==0.16
|
||||||
docutils==0.21.2
|
docutils==0.21.2
|
||||||
ecdsa==0.19.0
|
ecdsa==0.19.0
|
||||||
effdet==0.4.1
|
effdet==0.4.1
|
||||||
@ -67,6 +69,7 @@ filelock==3.15.4
|
|||||||
filetype==1.2.0
|
filetype==1.2.0
|
||||||
fireworks-ai==0.15.12
|
fireworks-ai==0.15.12
|
||||||
FlagEmbedding==1.3.4
|
FlagEmbedding==1.3.4
|
||||||
|
flake8==7.0.0
|
||||||
flatbuffers==24.3.25
|
flatbuffers==24.3.25
|
||||||
fonttools==4.53.1
|
fonttools==4.53.1
|
||||||
frozenlist==1.4.1
|
frozenlist==1.4.1
|
||||||
@ -88,6 +91,9 @@ greenlet==3.1.1
|
|||||||
grpcio==1.65.4
|
grpcio==1.65.4
|
||||||
grpcio-status==1.65.4
|
grpcio-status==1.65.4
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
|
h2==4.2.0
|
||||||
|
hiredis==3.1.0
|
||||||
|
hpack==4.1.0
|
||||||
html2text==2024.2.26
|
html2text==2024.2.26
|
||||||
htmldate==1.9.3
|
htmldate==1.9.3
|
||||||
httpcore==1.0.5
|
httpcore==1.0.5
|
||||||
@ -98,22 +104,25 @@ httpx-sse==0.4.0
|
|||||||
httpx-ws==0.7.1
|
httpx-ws==0.7.1
|
||||||
huggingface-hub==0.27.0
|
huggingface-hub==0.27.0
|
||||||
humanfriendly==10.0
|
humanfriendly==10.0
|
||||||
|
hyperframe==6.1.0
|
||||||
identify==2.6.3
|
identify==2.6.3
|
||||||
idna==3.7
|
idna==3.7
|
||||||
ijson==3.3.0
|
ijson==3.3.0
|
||||||
importlib_metadata==8.5.0
|
importlib_metadata==8.5.0
|
||||||
iniconfig==2.0.0
|
iniconfig==2.0.0
|
||||||
inscriptis==2.5.0
|
inscriptis==2.5.0
|
||||||
|
instructor==1.7.9
|
||||||
iopath==0.1.10
|
iopath==0.1.10
|
||||||
ipykernel==6.29.5
|
ipykernel==6.29.5
|
||||||
ipython==8.26.0
|
ipython==8.26.0
|
||||||
ir_datasets==0.5.9
|
ir_datasets==0.5.9
|
||||||
|
isort==6.0.1
|
||||||
jaraco.classes==3.4.0
|
jaraco.classes==3.4.0
|
||||||
jaraco.context==6.0.1
|
jaraco.context==6.0.1
|
||||||
jaraco.functools==4.1.0
|
jaraco.functools==4.1.0
|
||||||
jedi==0.19.1
|
jedi==0.19.1
|
||||||
Jinja2==3.1.4
|
Jinja2==3.1.4
|
||||||
# jiter==0.5.0
|
jiter==0.8.2
|
||||||
jmespath==1.0.1
|
jmespath==1.0.1
|
||||||
joblib==1.4.2
|
joblib==1.4.2
|
||||||
jsonpatch==1.33
|
jsonpatch==1.33
|
||||||
@ -133,6 +142,7 @@ langdetect==1.0.9
|
|||||||
langsmith==0.3.8
|
langsmith==0.3.8
|
||||||
lap==0.5.12
|
lap==0.5.12
|
||||||
layoutparser==0.3.4
|
layoutparser==0.3.4
|
||||||
|
litellm==1.65.4.post1
|
||||||
llama_cpp_python==0.3.5
|
llama_cpp_python==0.3.5
|
||||||
llvmlite==0.43.0
|
llvmlite==0.43.0
|
||||||
lmnr==0.4.60
|
lmnr==0.4.60
|
||||||
@ -148,12 +158,15 @@ MarkupSafe==2.1.5
|
|||||||
marshmallow==3.21.3
|
marshmallow==3.21.3
|
||||||
matplotlib==3.9.2
|
matplotlib==3.9.2
|
||||||
matplotlib-inline==0.1.7
|
matplotlib-inline==0.1.7
|
||||||
|
mccabe==0.7.0
|
||||||
mdurl==0.1.2
|
mdurl==0.1.2
|
||||||
monotonic==1.6
|
monotonic==1.6
|
||||||
more-itertools==10.5.0
|
more-itertools==10.5.0
|
||||||
|
motor==3.4.0
|
||||||
mpmath==1.3.0
|
mpmath==1.3.0
|
||||||
multidict==6.0.5
|
multidict==6.0.5
|
||||||
multiprocess==0.70.16
|
multiprocess==0.70.16
|
||||||
|
mypy==1.15.0
|
||||||
mypy-boto3-s3==1.34.138
|
mypy-boto3-s3==1.34.138
|
||||||
mypy-extensions==1.0.0
|
mypy-extensions==1.0.0
|
||||||
narwhals==1.26.0
|
narwhals==1.26.0
|
||||||
@ -162,7 +175,6 @@ networkx==3.3
|
|||||||
nh3==0.2.20
|
nh3==0.2.20
|
||||||
nltk==3.8.1
|
nltk==3.8.1
|
||||||
nodeenv==1.9.1
|
nodeenv==1.9.1
|
||||||
# numba==0.60.0
|
|
||||||
numpy==1.26.4
|
numpy==1.26.4
|
||||||
olefile==0.47
|
olefile==0.47
|
||||||
ollama==0.4.7
|
ollama==0.4.7
|
||||||
@ -228,19 +240,24 @@ pyarrow-hotfix==0.6
|
|||||||
pyasn1==0.6.0
|
pyasn1==0.6.0
|
||||||
pyasn1_modules==0.4.0
|
pyasn1_modules==0.4.0
|
||||||
pycocotools==2.0.8
|
pycocotools==2.0.8
|
||||||
|
pycodestyle==2.11.1
|
||||||
pycparser==2.22
|
pycparser==2.22
|
||||||
pydantic==2.10.6
|
pydantic==2.10.6
|
||||||
pydantic-settings==2.4.0
|
pydantic-settings==2.4.0
|
||||||
pydantic_core==2.27.2
|
pydantic_core==2.27.2
|
||||||
pydeck==0.9.1
|
pydeck==0.9.1
|
||||||
pyee==12.1.1
|
pyee==12.1.1
|
||||||
|
pyflakes==3.2.0
|
||||||
Pygments==2.18.0
|
Pygments==2.18.0
|
||||||
PyJWT==2.9.0
|
PyJWT==2.9.0
|
||||||
|
pymongo==4.7.1
|
||||||
pypandoc==1.13
|
pypandoc==1.13
|
||||||
pyparsing==3.1.2
|
pyparsing==3.1.2
|
||||||
pypdf==4.3.1
|
pypdf==4.3.1
|
||||||
pypdfium2==4.30.0
|
pypdfium2==4.30.0
|
||||||
|
pyproject-flake8==7.0.0
|
||||||
pyproject_hooks==1.2.0
|
pyproject_hooks==1.2.0
|
||||||
|
pyright==1.1.399
|
||||||
pytesseract==0.3.10
|
pytesseract==0.3.10
|
||||||
pytest==8.2.0
|
pytest==8.2.0
|
||||||
pytest-asyncio==0.24.0
|
pytest-asyncio==0.24.0
|
||||||
@ -249,6 +266,7 @@ python-docx==1.1.2
|
|||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
python-iso639==2024.4.27
|
python-iso639==2024.4.27
|
||||||
python-jose==3.3.0
|
python-jose==3.3.0
|
||||||
|
python-magic==0.4.27
|
||||||
python-multipart==0.0.9
|
python-multipart==0.0.9
|
||||||
python-oxmsg==0.0.1
|
python-oxmsg==0.0.1
|
||||||
python-pptx==0.6.23
|
python-pptx==0.6.23
|
||||||
@ -259,6 +277,7 @@ pyzmq==26.2.0
|
|||||||
rank-bm25==0.2.2
|
rank-bm25==0.2.2
|
||||||
rapidfuzz==3.9.5
|
rapidfuzz==3.9.5
|
||||||
readme_renderer==44.0
|
readme_renderer==44.0
|
||||||
|
redis==5.2.1
|
||||||
referencing==0.36.2
|
referencing==0.36.2
|
||||||
regex==2024.7.24
|
regex==2024.7.24
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
@ -267,6 +286,7 @@ rfc3986==2.0.0
|
|||||||
rich==13.7.1
|
rich==13.7.1
|
||||||
rpds-py==0.22.3
|
rpds-py==0.22.3
|
||||||
rsa==4.9
|
rsa==4.9
|
||||||
|
ruff==0.11.5
|
||||||
s3transfer==0.11.2
|
s3transfer==0.11.2
|
||||||
safetensors==0.4.4
|
safetensors==0.4.4
|
||||||
scikit-learn==1.6.0
|
scikit-learn==1.6.0
|
||||||
@ -315,7 +335,7 @@ ujson==5.9.0
|
|||||||
ultralytics==8.3.55
|
ultralytics==8.3.55
|
||||||
ultralytics-thop==2.0.13
|
ultralytics-thop==2.0.13
|
||||||
unlzw3==0.2.3
|
unlzw3==0.2.3
|
||||||
unstructured==0.16.0
|
unstructured==0.15.0
|
||||||
unstructured-client==0.24.1
|
unstructured-client==0.24.1
|
||||||
unstructured-inference==0.7.36
|
unstructured-inference==0.7.36
|
||||||
unstructured.pytesseract==0.3.12
|
unstructured.pytesseract==0.3.12
|
||||||
@ -338,6 +358,3 @@ yarl==1.9.4
|
|||||||
zipp==3.21.0
|
zipp==3.21.0
|
||||||
zlib-state==0.1.9
|
zlib-state==0.1.9
|
||||||
zstandard==0.23.0
|
zstandard==0.23.0
|
||||||
litellm==1.65.4.post1
|
|
||||||
instructor==1.7.9
|
|
||||||
arq==0.25.0
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user