2025-01-07 01:42:10 -05:00
|
|
|
[api]
|
2025-04-08 00:19:47 -07:00
|
|
|
host = "0.0.0.0"
|
2025-01-07 01:42:10 -05:00
|
|
|
port = 8000
|
|
|
|
reload = true
|
|
|
|
|
|
|
|
[auth]
|
|
|
|
jwt_algorithm = "HS256"
|
2025-01-11 21:54:00 +05:30
|
|
|
dev_mode = true # Enabled by default for easier local development
|
|
|
|
dev_entity_id = "dev_user" # Default dev user ID
|
|
|
|
dev_entity_type = "developer" # Default dev entity type
|
|
|
|
dev_permissions = ["read", "write", "admin"] # Default dev permissions
|
2025-01-07 01:42:10 -05:00
|
|
|
|
2025-04-08 00:19:47 -07:00
|
|
|
#### Registered models
|
|
|
|
[registered_models]
|
|
|
|
# OpenAI models
|
2025-04-13 18:51:55 -07:00
|
|
|
openai_gpt4o = { model_name = "gpt-4o", vision = true }
|
2025-04-08 00:19:47 -07:00
|
|
|
openai_gpt4 = { model_name = "gpt-4" }
|
|
|
|
|
|
|
|
# Azure OpenAI models
|
|
|
|
azure_gpt4 = { model_name = "gpt-4", api_base = "YOUR_AZURE_URL_HERE", api_version = "2023-05-15", deployment_id = "gpt-4-deployment" }
|
|
|
|
azure_gpt35 = { model_name = "gpt-3.5-turbo", api_base = "YOUR_AZURE_URL_HERE", api_version = "2023-05-15", deployment_id = "gpt-35-turbo-deployment" }
|
|
|
|
|
|
|
|
# Anthropic models
|
|
|
|
claude_opus = { model_name = "claude-3-opus-20240229" }
|
|
|
|
claude_sonnet = { model_name = "claude-3-sonnet-20240229" }
|
2025-01-07 01:42:10 -05:00
|
|
|
|
2025-04-08 00:19:47 -07:00
|
|
|
# Ollama models
|
|
|
|
ollama_llama = { model_name = "ollama_chat/llama3.2", api_base = "http://localhost:11434" }
|
|
|
|
ollama_llama_vision = { model_name = "ollama_chat/llama3.2-vision", api_base = "http://localhost:11434", vision = true }
|
2025-04-12 22:26:26 -07:00
|
|
|
# If you are running on docker, but running ollama locally use the following
|
2025-04-12 22:23:10 -07:00
|
|
|
ollama_llama_docker = { model_name = "ollama_chat/llama3.2", api_base = "http://host.docker.internal:11434" }
|
|
|
|
ollama_llama_vision_docker = { model_name = "ollama_chat/llama3.2-vision", api_base = "http://host.docker.internal:11434", vision = true }
|
2025-04-12 22:26:26 -07:00
|
|
|
# If you are running on docker and ollama is also running on docker use the following
|
|
|
|
ollama_llama_docker_docker = { model_name = "ollama_chat/llama3.2", api_base = "http://ollama:11434" }
|
|
|
|
ollama_llama_vision_docker_docker = { model_name = "ollama_chat/llama3.2-vision", api_base = "http://ollama:11434", vision = true }
|
2025-03-29 23:22:47 -07:00
|
|
|
|
2025-04-08 00:19:47 -07:00
|
|
|
# Embedding models
|
|
|
|
openai_embedding = { model_name = "text-embedding-3-small" }
|
|
|
|
openai_embedding_large = { model_name = "text-embedding-3-large" }
|
|
|
|
azure_embedding = { model_name = "text-embedding-ada-002", api_base = "YOUR_AZURE_URL_HERE", api_version = "2023-05-15", deployment_id = "embedding-ada-002" }
|
|
|
|
ollama_embedding = { model_name = "ollama/nomic-embed-text", api_base = "http://localhost:11434" }
|
2025-04-12 22:26:26 -07:00
|
|
|
# If you are running on docker, but running ollama locally use the following
|
2025-04-12 22:23:10 -07:00
|
|
|
ollama_embedding_docker = { model_name = "ollama/nomic-embed-text", api_base = "http://host.docker.internal:11434" }
|
2025-04-12 22:26:26 -07:00
|
|
|
# If you are running on docker and ollama is also running on docker use the following
|
|
|
|
ollama_embedding_docker_docker = { model_name = "ollama/nomic-embed-text", api_base = "http://ollama:11434" }
|
2025-04-08 00:19:47 -07:00
|
|
|
|
|
|
|
#### Component configurations ####
|
|
|
|
|
|
|
|
[completion]
|
2025-04-15 20:50:34 -07:00
|
|
|
model = "ollama_llama_vision" # Reference to a key in registered_models
|
2025-04-08 00:19:47 -07:00
|
|
|
default_max_tokens = "1000"
|
|
|
|
default_temperature = 0.5
|
2025-01-07 01:42:10 -05:00
|
|
|
|
|
|
|
[database]
|
|
|
|
provider = "postgres"
|
|
|
|
|
|
|
|
[embedding]
|
2025-04-08 00:19:47 -07:00
|
|
|
model = "ollama_embedding" # Reference to registered model
|
2025-01-07 01:42:10 -05:00
|
|
|
dimensions = 768
|
|
|
|
similarity_metric = "cosine"
|
|
|
|
|
|
|
|
[parser]
|
|
|
|
chunk_size = 1000
|
|
|
|
chunk_overlap = 200
|
|
|
|
use_unstructured_api = false
|
2025-02-15 21:02:15 +03:00
|
|
|
use_contextual_chunking = false
|
2025-04-08 00:19:47 -07:00
|
|
|
contextual_chunking_model = "ollama_llama" # Reference to a key in registered_models
|
2025-01-30 16:03:46 -05:00
|
|
|
|
|
|
|
[parser.vision]
|
2025-04-08 00:19:47 -07:00
|
|
|
model = "ollama_llama_vision" # Reference to a key in registered_models
|
2025-01-30 16:03:46 -05:00
|
|
|
frame_sample_rate = -1 # Set to -1 to disable frame captioning
|
2025-01-07 01:42:10 -05:00
|
|
|
|
|
|
|
[reranker]
|
2025-02-26 20:17:12 -05:00
|
|
|
use_reranker = true
|
2025-01-07 01:42:10 -05:00
|
|
|
provider = "flag"
|
|
|
|
model_name = "BAAI/bge-reranker-large"
|
|
|
|
query_max_length = 256
|
|
|
|
passage_max_length = 512
|
|
|
|
use_fp16 = true
|
2025-03-29 23:22:47 -07:00
|
|
|
device = "mps" # use "cpu" if on docker and using a mac, "cuda" if cuda enabled device
|
2025-01-07 01:42:10 -05:00
|
|
|
|
|
|
|
[storage]
|
|
|
|
provider = "local"
|
2025-01-22 16:57:09 -05:00
|
|
|
storage_path = "./storage"
|
2025-01-07 01:42:10 -05:00
|
|
|
|
|
|
|
# [storage]
|
|
|
|
# provider = "aws-s3"
|
|
|
|
# region = "us-east-2"
|
2025-04-09 18:46:00 -07:00
|
|
|
# bucket_name = "morphik-s3-storage"
|
2025-01-07 01:42:10 -05:00
|
|
|
|
|
|
|
[vector_store]
|
|
|
|
provider = "pgvector"
|
|
|
|
|
2025-02-08 00:09:36 -05:00
|
|
|
[rules]
|
2025-04-08 00:19:47 -07:00
|
|
|
model = "ollama_llama"
|
2025-02-08 00:09:36 -05:00
|
|
|
batch_size = 4096
|
2025-02-26 20:17:12 -05:00
|
|
|
|
2025-04-09 18:46:00 -07:00
|
|
|
[morphik]
|
2025-02-26 20:17:12 -05:00
|
|
|
enable_colpali = true
|
2025-03-27 17:30:02 -07:00
|
|
|
mode = "self_hosted" # "cloud" or "self_hosted"
|
2025-04-11 01:05:15 -07:00
|
|
|
api_domain = "api.morphik.ai" # API domain for cloud URIs
|
2025-03-17 17:36:43 -04:00
|
|
|
|
2025-04-15 23:31:49 -07:00
|
|
|
[redis]
|
|
|
|
host = "localhost"
|
|
|
|
port = 6379
|
|
|
|
|
2025-03-17 17:36:43 -04:00
|
|
|
[graph]
|
2025-04-08 00:19:47 -07:00
|
|
|
model = "ollama_llama"
|
2025-03-29 19:25:01 -07:00
|
|
|
enable_entity_resolution = true
|
2025-03-17 17:36:43 -04:00
|
|
|
|
2025-03-23 17:50:18 -04:00
|
|
|
[telemetry]
|
|
|
|
enabled = true
|
|
|
|
honeycomb_enabled = true
|
|
|
|
honeycomb_endpoint = "https://api.honeycomb.io"
|
|
|
|
honeycomb_proxy_endpoint = "https://otel-proxy.onrender.com"
|
|
|
|
service_name = "databridge-core"
|
|
|
|
otlp_timeout = 10
|
|
|
|
otlp_max_retries = 3
|
|
|
|
otlp_retry_delay = 1
|
|
|
|
otlp_max_export_batch_size = 512
|
|
|
|
otlp_schedule_delay_millis = 5000
|
|
|
|
otlp_max_queue_size = 2048
|