morphik-core/databridge.toml

125 lines
2.8 KiB
TOML
Raw Normal View History

2025-01-07 01:42:10 -05:00
[api]
host = "localhost" # Needs to be "0.0.0.0" for docker
2025-01-07 01:42:10 -05:00
port = 8000
reload = true
[auth]
jwt_algorithm = "HS256"
dev_mode = true # Enabled by default for easier local development
dev_entity_id = "dev_user" # Default dev user ID
dev_entity_type = "developer" # Default dev entity type
dev_permissions = ["read", "write", "admin"] # Default dev permissions
2025-01-07 01:42:10 -05:00
[completion]
provider = "ollama"
2025-02-26 22:36:25 -05:00
model_name = "llama3.2-vision"
2025-01-07 01:42:10 -05:00
default_max_tokens = "1000"
default_temperature = 0.7
# base_url = "http://ollama:11434" # Just use the service name
base_url = "http://localhost:11434"
2025-01-07 01:42:10 -05:00
# [completion]
# provider = "openai"
# model_name = "gpt-4o"
2025-01-07 01:42:10 -05:00
# default_max_tokens = "1000"
# default_temperature = 0.7
[database]
provider = "postgres"
# [database]
# provider = "mongodb"
# database_name = "databridge"
# collection_name = "documents"
[embedding]
provider = "ollama"
model_name = "nomic-embed-text"
dimensions = 768
similarity_metric = "cosine"
# base_url = "http://ollama:11434" # Just use the service name
base_url = "http://localhost:11434" # Local configuration
2025-01-07 01:42:10 -05:00
# [embedding]
# provider = "openai"
# model_name = "text-embedding-3-small"
# dimensions = 1536
# similarity_metric = "dotProduct"
[parser]
chunk_size = 1000
chunk_overlap = 200
use_unstructured_api = false
use_contextual_chunking = false
[parser.vision]
provider = "ollama"
model_name = "llama3.2-vision"
frame_sample_rate = -1 # Set to -1 to disable frame captioning
base_url = "http://localhost:11434" # Only used for ollama
# base_url = "http://ollama:11434" # Use if using via docker
# [parser.vision]
# provider = "openai"
# model_name = "gpt-4o-mini"
# frame_sample_rate = -1 # Set to -1 to disable frame captioning
2025-01-07 01:42:10 -05:00
[reranker]
use_reranker = true
2025-01-07 01:42:10 -05:00
provider = "flag"
model_name = "BAAI/bge-reranker-large"
query_max_length = 256
passage_max_length = 512
use_fp16 = true
device = "mps"
# [reranker]
# use_reranker = false
[storage]
provider = "local"
storage_path = "./storage"
2025-01-07 01:42:10 -05:00
# [storage]
# provider = "aws-s3"
# region = "us-east-2"
# bucket_name = "databridge-s3-storage"
[vector_store]
provider = "pgvector"
# [vector_store]
# provider = "mongodb"
# database_name = "databridge"
# collection_name = "document_chunks"
2025-02-08 00:09:36 -05:00
[rules]
provider = "ollama"
model_name = "llama3.2"
batch_size = 4096
[databridge]
enable_colpali = true
mode = "self_hosted" # "cloud" or "self_hosted"
[graph]
provider = "ollama"
model_name = "llama3.2"
# [graph]
# provider = "openai"
# model_name = "gpt-4o-mini"
2025-03-23 17:50:18 -04:00
[telemetry]
enabled = true
honeycomb_enabled = true
honeycomb_endpoint = "https://api.honeycomb.io"
honeycomb_proxy_endpoint = "https://otel-proxy.onrender.com"
service_name = "databridge-core"
otlp_timeout = 10
otlp_max_retries = 3
otlp_retry_delay = 1
otlp_max_export_batch_size = 512
otlp_schedule_delay_millis = 5000
otlp_max_queue_size = 2048