morphik-core/dockerfile
2025-05-01 12:05:14 -07:00

174 lines
4.5 KiB
Plaintext

# syntax=docker/dockerfile:1
# Build stage
FROM python:3.12.5-slim as builder
# Set working directory
WORKDIR /app
# Install build dependencies
RUN apt-get update && apt-get install -y \
gcc \
g++ \
cmake \
python3-dev \
git \
&& rm -rf /var/lib/apt/lists/*
# Install Rust using the simpler method
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
RUN echo 'source $HOME/.cargo/env' >> $HOME/.bashrc
# Copy requirements and install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir --user -r requirements.txt
# Download NLTK data
RUN python -m nltk.downloader -d /usr/local/share/nltk_data punkt averaged_perceptron_tagger
# Production stage
FROM python:3.12.5-slim
# Set working directory
WORKDIR /app
# Install runtime dependencies
RUN apt-get update && apt-get install -y \
ffmpeg \
libsm6 \
libxext6 \
libmagic1 \
tesseract-ocr \
postgresql-client \
poppler-utils \
gcc \
g++ \
cmake \
python3-dev \
git \
&& rm -rf /var/lib/apt/lists/*
# Copy installed packages from builder
COPY --from=builder /root/.local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
COPY --from=builder /root/.local/bin /usr/local/bin
# Copy NLTK data from builder
COPY --from=builder /usr/local/share/nltk_data /usr/local/share/nltk_data
# Create necessary directories
RUN mkdir -p storage logs
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV HOST=0.0.0.0
ENV PORT=8000
ENV PATH="/usr/local/bin:$PATH"
# Create default configuration
RUN echo '[api]\n\
host = "0.0.0.0"\n\
port = 8000\n\
reload = false\n\
\n\
[auth]\n\
jwt_algorithm = "HS256"\n\
dev_mode = true\n\
dev_entity_id = "dev_user"\n\
dev_entity_type = "developer"\n\
dev_permissions = ["read", "write", "admin"]\n\
\n\
[completion]\n\
provider = "ollama"\n\
model_name = "llama2"\n\
base_url = "http://localhost:11434"\n\
\n\
[database]\n\
provider = "postgres"\n\
\n\
[embedding]\n\
provider = "ollama"\n\
model_name = "nomic-embed-text"\n\
dimensions = 768\n\
similarity_metric = "cosine"\n\
base_url = "http://localhost:11434"\n\
\n\
[parser]\n\
chunk_size = 1000\n\
chunk_overlap = 200\n\
use_unstructured_api = false\n\
\n\
[reranker]\n\
use_reranker = false\n\
\n\
[storage]\n\
provider = "local"\n\
storage_path = "/app/storage"\n\
\n\
[vector_store]\n\
provider = "pgvector"\n\
' > /app/morphik.toml.default
# Create startup script
RUN echo '#!/bin/bash\n\
set -e\n\
\n\
# Copy default config if none exists\n\
if [ ! -f /app/morphik.toml ]; then\n\
cp /app/morphik.toml.default /app/morphik.toml\n\
fi\n\
\n\
# Function to check PostgreSQL\n\
check_postgres() {\n\
if [ -n "$POSTGRES_URI" ]; then\n\
echo "Waiting for PostgreSQL..."\n\
max_retries=30\n\
retries=0\n\
until PGPASSWORD=$PGPASSWORD pg_isready -h postgres -U morphik -d morphik; do\n\
retries=$((retries + 1))\n\
if [ $retries -eq $max_retries ]; then\n\
echo "Error: PostgreSQL did not become ready in time"\n\
exit 1\n\
fi\n\
echo "Waiting for PostgreSQL... (Attempt $retries/$max_retries)"\n\
sleep 2\n\
done\n\
echo "PostgreSQL is ready!"\n\
\n\
# Verify database connection\n\
if ! PGPASSWORD=$PGPASSWORD psql -h postgres -U morphik -d morphik -c "SELECT 1" > /dev/null 2>&1; then\n\
echo "Error: Could not connect to PostgreSQL database"\n\
exit 1\n\
fi\n\
echo "PostgreSQL connection verified!"\n\
fi\n\
}\n\
\n\
# Check PostgreSQL\n\
check_postgres\n\
\n\
# Check if command arguments were passed ($# is the number of arguments)\n\
if [ $# -gt 0 ]; then\n\
# If arguments exist, execute them (e.g., execute "arq core.workers...")\n\
exec "$@"\n\
else\n\
# Otherwise, execute the default command (Uvicorn for the API)\n\
exec uvicorn core.api:app --host $HOST --port $PORT --loop asyncio --http auto --ws auto --lifespan auto\n\
fi\n\
' > /app/docker-entrypoint.sh && chmod +x /app/docker-entrypoint.sh
# Copy application code
COPY core ./core
COPY README.md LICENSE ./
# Labels for the image
LABEL org.opencontainers.image.title="Morphik Core"
LABEL org.opencontainers.image.description="Morphik Core - A powerful document processing and retrieval system"
LABEL org.opencontainers.image.source="https://github.com/yourusername/morphik"
LABEL org.opencontainers.image.version="1.0.0"
LABEL org.opencontainers.image.licenses="MIT"
# Expose port
EXPOSE 8000
# Set the entrypoint
ENTRYPOINT ["/app/docker-entrypoint.sh"]