morphik-core/morphik.toml
2025-05-01 17:46:18 -07:00

139 lines
5.1 KiB
TOML

[api]
host = "0.0.0.0"
port = 8000
reload = true
[auth]
jwt_algorithm = "HS256"
dev_mode = true # Enabled by default for easier local development
dev_entity_id = "dev_user" # Default dev user ID
dev_entity_type = "developer" # Default dev entity type
dev_permissions = ["read", "write", "admin"] # Default dev permissions
#### Registered models
[registered_models]
# OpenAI models
openai_gpt4 = { model_name = "gpt-4" }
openai_gpt4-1 = { model_name = "gpt-4.1" }
openai_gpt4o = { model_name = "gpt-4o" }
openai_gpt4o_mini = { model_name = "gpt-4o-mini" }
# Azure OpenAI models
azure_gpt4 = { model_name = "gpt-4", api_base = "YOUR_AZURE_URL_HERE", api_version = "2023-05-15", deployment_id = "gpt-4-deployment" }
azure_gpt35 = { model_name = "gpt-3.5-turbo", api_base = "YOUR_AZURE_URL_HERE", api_version = "2023-05-15", deployment_id = "gpt-35-turbo-deployment" }
# Anthropic models
claude_opus = { model_name = "claude-3-opus-20240229" }
claude_sonnet = { model_name = "claude-3-7-sonnet-latest" }
# Ollama models
ollama_llama = { model_name = "ollama_chat/llama3.2", api_base = "http://localhost:11434" }
ollama_llama_vision = { model_name = "ollama_chat/llama3.2-vision", api_base = "http://localhost:11434", vision = true }
# If you are running on docker, but running ollama locally use the following
ollama_llama_docker = { model_name = "ollama_chat/llama3.2", api_base = "http://host.docker.internal:11434" }
ollama_llama_vision_docker = { model_name = "ollama_chat/llama3.2-vision", api_base = "http://host.docker.internal:11434", vision = true }
# If you are running on docker and ollama is also running on docker use the following
ollama_llama_docker_docker = { model_name = "ollama_chat/llama3.2", api_base = "http://ollama:11434" }
ollama_llama_vision_docker_docker = { model_name = "ollama_chat/llama3.2-vision", api_base = "http://ollama:11434", vision = true }
# Embedding models
openai_embedding = { model_name = "text-embedding-3-small" }
openai_embedding_large = { model_name = "text-embedding-3-large" }
azure_embedding = { model_name = "text-embedding-ada-002", api_base = "YOUR_AZURE_URL_HERE", api_version = "2023-05-15", deployment_id = "embedding-ada-002" }
ollama_embedding = { model_name = "ollama/nomic-embed-text", api_base = "http://localhost:11434" }
# If you are running on docker, but running ollama locally use the following
ollama_embedding_docker = { model_name = "ollama/nomic-embed-text", api_base = "http://host.docker.internal:11434" }
# If you are running on docker and ollama is also running on docker use the following
ollama_embedding_docker_docker = { model_name = "ollama/nomic-embed-text", api_base = "http://ollama:11434" }
#### Component configurations ####
[agent]
model = "ollama_llama" # Model for the agent logic
[completion]
model = "ollama_llama" #"openai_gpt4o" # Reference to a key in registered_models
default_max_tokens = "1000"
default_temperature = 0.5
[database]
provider = "postgres"
# Connection pool settings
pool_size = 10 # Maximum number of connections in the pool
max_overflow = 15 # Maximum number of connections that can be created beyond pool_size
pool_recycle = 3600 # Time in seconds after which a connection is recycled (1 hour)
pool_timeout = 10 # Seconds to wait for a connection from the pool
pool_pre_ping = true # Check connection viability before using it from the pool
max_retries = 3 # Number of retries for database operations
retry_delay = 1.0 # Initial delay between retries in seconds
[embedding]
model = "ollama_embedding" # Reference to registered model
dimensions = 768
similarity_metric = "cosine"
[parser]
chunk_size = 6000
chunk_overlap = 300
use_unstructured_api = false
use_contextual_chunking = false
contextual_chunking_model = "ollama_llama" # Reference to a key in registered_models
[document_analysis]
model = "ollama_llama" # Reference to a key in registered_models
[parser.vision]
model = "ollama_llama" # Reference to a key in registered_models
frame_sample_rate = -1 # Set to -1 to disable frame captioning
[reranker]
use_reranker = true
provider = "flag"
model_name = "BAAI/bge-reranker-large"
query_max_length = 256
passage_max_length = 512
use_fp16 = true
device = "mps" # use "cpu" if on docker and using a mac, "cuda" if cuda enabled device
[storage]
provider = "local"
storage_path = "./storage"
# [storage]
# provider = "aws-s3"
# region = "us-east-2"
# bucket_name = "morphik-s3-storage"
[vector_store]
provider = "pgvector"
[rules]
model = "ollama_llama"
batch_size = 4096
[morphik]
enable_colpali = true
mode = "self_hosted" # "cloud" or "self_hosted"
api_domain = "api.morphik.ai" # API domain for cloud URIs
[redis]
host = "localhost" # use "redis" for docker
port = 6379
[graph]
model = "ollama_llama"
enable_entity_resolution = true
[telemetry]
enabled = true
honeycomb_enabled = true
honeycomb_endpoint = "https://api.honeycomb.io"
honeycomb_proxy_endpoint = "https://otel-proxy.onrender.com"
service_name = "databridge-core"
otlp_timeout = 10
otlp_max_retries = 3
otlp_retry_delay = 1
otlp_max_export_batch_size = 512
otlp_schedule_delay_millis = 5000
otlp_max_queue_size = 2048