mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
85 lines
2.1 KiB
TOML
85 lines
2.1 KiB
TOML
# Core Service Components
|
|
[service]
|
|
host = "localhost"
|
|
port = 8000
|
|
reload = false
|
|
|
|
[service.components]
|
|
storage = "local" # "aws-s3"
|
|
database = "postgres" # "postgres", "mongodb"
|
|
vector_store = "pgvector" # "mongodb"
|
|
embedding = "ollama" # "openai", "ollama"
|
|
completion = "ollama" # "openai", "ollama"
|
|
parser = "combined" # "combined", "unstructured", "contextual"
|
|
reranker = "bge" # "bge"
|
|
|
|
# Storage Configuration
|
|
[storage.local]
|
|
path = "./storage"
|
|
|
|
[storage.aws]
|
|
region = "us-east-2"
|
|
bucket_name = "databridge-s3-storage"
|
|
|
|
# Database Configuration
|
|
[database.postgres]
|
|
database_name = "databridge"
|
|
documents_table = "documents"
|
|
chunks_table = "document_chunks"
|
|
|
|
[database.mongodb]
|
|
database_name = "databridge"
|
|
documents_collection = "documents"
|
|
chunks_collection = "document_chunks"
|
|
|
|
# Vector Store Configuration
|
|
[vector_store.mongodb]
|
|
dimensions = 768 # 768 for nomic-embed-text, 1536 for text-embedding-3-small
|
|
index_name = "vector_index"
|
|
similarity_metric = "cosine"
|
|
|
|
[vector_store.pgvector]
|
|
dimensions = 768 # 768 for nomic-embed-text, 1536 for text-embedding-3-small
|
|
table_name = "vector_embeddings"
|
|
index_method = "ivfflat" # "ivfflat" or "hnsw"
|
|
index_lists = 100 # Number of lists for ivfflat index
|
|
probes = 10 # Number of probes for ivfflat search
|
|
|
|
# Model Configurations
|
|
[models]
|
|
[models.embedding]
|
|
model_name = "nomic-embed-text" # "text-embedding-3-small", "nomic-embed-text"
|
|
|
|
[models.completion]
|
|
model_name = "llama3.2" # "gpt-4o-mini", "llama3.1", etc.
|
|
default_max_tokens = 1000
|
|
default_temperature = 0.7
|
|
|
|
[models.ollama]
|
|
base_url = "http://localhost:11434"
|
|
|
|
[models.reranker]
|
|
model_name = "BAAI/bge-reranker-large" # "BAAI/bge-reranker-v2-gemma", "BAAI/bge-reranker-large"
|
|
device = "mps" # "cuda:0" # Optional: Set to null or remove for CPU
|
|
use_fp16 = true
|
|
query_max_length = 256
|
|
passage_max_length = 512
|
|
|
|
# Document Processing
|
|
[processing]
|
|
[processing.text]
|
|
chunk_size = 1000
|
|
chunk_overlap = 200
|
|
default_k = 4
|
|
use_reranking = true # Whether to use reranking by default
|
|
|
|
[processing.video]
|
|
frame_sample_rate = 120
|
|
|
|
[processing.unstructured]
|
|
use_api = false
|
|
|
|
# Authentication
|
|
[auth]
|
|
jwt_algorithm = "HS256"
|