morphik-core/core/config.py

118 lines
4.5 KiB
Python
Raw Normal View History

from typing import Optional
2024-11-22 18:56:22 -05:00
from pydantic import Field
from pydantic_settings import BaseSettings
from functools import lru_cache
import tomli
2024-12-26 08:52:25 -05:00
from dotenv import load_dotenv
2024-11-22 18:56:22 -05:00
class Settings(BaseSettings):
"""DataBridge configuration settings."""
# Required environment variables (referenced in config.toml)
JWT_SECRET_KEY: str = Field(..., env="JWT_SECRET_KEY")
2024-11-22 18:56:22 -05:00
MONGODB_URI: str = Field(..., env="MONGODB_URI")
UNSTRUCTURED_API_KEY: Optional[str] = Field(None, env="UNSTRUCTURED_API_KEY")
AWS_ACCESS_KEY: Optional[str] = Field(None, env="AWS_ACCESS_KEY")
AWS_SECRET_ACCESS_KEY: Optional[str] = Field(None, env="AWS_SECRET_ACCESS_KEY")
ASSEMBLYAI_API_KEY: Optional[str] = Field(None, env="ASSEMBLYAI_API_KEY")
OPENAI_API_KEY: Optional[str] = Field(None, env="OPENAI_API_KEY")
ANTHROPIC_API_KEY: Optional[str] = Field(None, env="ANTHROPIC_API_KEY")
2024-11-22 18:56:22 -05:00
# Service settings
HOST: str = "localhost"
PORT: int = 8000
RELOAD: bool = False
# Component selection
STORAGE_PROVIDER: str = "local"
DATABASE_PROVIDER: str = "mongodb"
VECTOR_STORE_PROVIDER: str = "mongodb"
EMBEDDING_PROVIDER: str = "openai"
COMPLETION_PROVIDER: str = "ollama"
PARSER_PROVIDER: str = "combined"
# Storage settings
STORAGE_PATH: str = "./storage"
2024-12-26 08:52:25 -05:00
AWS_REGION: str = "us-east-2"
S3_BUCKET: str = "databridge-s3-storage"
# Database settings
DATABRIDGE_DB: str = "DataBridgeTest"
2024-12-26 08:52:25 -05:00
DOCUMENTS_COLLECTION: str = "documents"
CHUNKS_COLLECTION: str = "document_chunks"
# Vector store settings
2024-12-26 08:52:25 -05:00
VECTOR_INDEX_NAME: str = "vector_index"
VECTOR_DIMENSIONS: int = 1536
# Model settings
2024-12-26 08:52:25 -05:00
EMBEDDING_MODEL: str = "text-embedding-3-small"
COMPLETION_MODEL: str = "llama3.1"
COMPLETION_MAX_TOKENS: int = 1000
COMPLETION_TEMPERATURE: float = 0.7
OLLAMA_BASE_URL: str = "http://localhost:11434"
# Processing settings
2024-12-26 08:52:25 -05:00
CHUNK_SIZE: int = 1000
CHUNK_OVERLAP: int = 200
DEFAULT_K: int = 4
FRAME_SAMPLE_RATE: int = 120
USE_UNSTRUCTURED_API: bool = False
2024-11-22 18:56:22 -05:00
# Auth settings
JWT_ALGORITHM: str = "HS256"
2024-11-22 18:56:22 -05:00
@lru_cache()
def get_settings() -> Settings:
"""Get cached settings instance."""
load_dotenv(override=True)
2024-12-26 08:52:25 -05:00
# Load config.toml
with open("config.toml", "rb") as f:
config = tomli.load(f)
# Map config.toml values to settings
settings_dict = {
# Service settings
"HOST": config["service"]["host"],
"PORT": config["service"]["port"],
"RELOAD": config["service"]["reload"],
# Component selection
"STORAGE_PROVIDER": config["service"]["components"]["storage"],
"DATABASE_PROVIDER": config["service"]["components"]["database"],
"VECTOR_STORE_PROVIDER": config["service"]["components"]["vector_store"],
"EMBEDDING_PROVIDER": config["service"]["components"]["embedding"],
"COMPLETION_PROVIDER": config["service"]["components"]["completion"],
"PARSER_PROVIDER": config["service"]["components"]["parser"],
# Storage settings
"STORAGE_PATH": config["storage"]["local"]["path"],
"AWS_REGION": config["storage"]["aws"]["region"],
"S3_BUCKET": config["storage"]["aws"]["bucket_name"],
# Database settings
"DATABRIDGE_DB": config["database"]["mongodb"]["database_name"],
"DOCUMENTS_COLLECTION": config["database"]["mongodb"]["documents_collection"],
"CHUNKS_COLLECTION": config["database"]["mongodb"]["chunks_collection"],
# Vector store settings
"VECTOR_INDEX_NAME": config["vector_store"]["mongodb"]["index_name"],
"VECTOR_DIMENSIONS": config["vector_store"]["mongodb"]["dimensions"],
# Model settings
"EMBEDDING_MODEL": config["models"]["embedding"]["model_name"],
"COMPLETION_MODEL": config["models"]["completion"]["model_name"],
"COMPLETION_MAX_TOKENS": config["models"]["completion"]["default_max_tokens"],
"COMPLETION_TEMPERATURE": config["models"]["completion"]["default_temperature"],
"OLLAMA_BASE_URL": config["models"]["ollama"]["base_url"],
# Processing settings
"CHUNK_SIZE": config["processing"]["text"]["chunk_size"],
"CHUNK_OVERLAP": config["processing"]["text"]["chunk_overlap"],
"DEFAULT_K": config["processing"]["text"]["default_k"],
"FRAME_SAMPLE_RATE": config["processing"]["video"]["frame_sample_rate"],
"USE_UNSTRUCTURED_API": config["processing"]["unstructured"]["use_api"],
# Auth settings
2024-12-26 08:52:25 -05:00
"JWT_ALGORITHM": config["auth"]["jwt_algorithm"],
}
return Settings(**settings_dict)