morphik-core/core/config.py

105 lines
3.4 KiB
Python
Raw Normal View History

2024-12-04 20:26:14 -05:00
from typing import Dict, Any
2024-11-22 18:56:22 -05:00
from pydantic import Field
from pydantic_settings import BaseSettings
from functools import lru_cache
import tomli
from dotenv import load_dotenv, find_dotenv
2024-11-22 18:56:22 -05:00
def load_toml_config() -> Dict[Any, Any]:
"""Load configuration from config.toml file."""
with open("config.toml", "rb") as f:
return tomli.load(f)
2024-11-22 18:56:22 -05:00
class Settings(BaseSettings):
"""DataBridge configuration settings."""
2024-12-04 20:26:14 -05:00
2024-11-22 18:56:22 -05:00
# MongoDB settings
MONGODB_URI: str = Field(..., env="MONGODB_URI")
DATABRIDGE_DB: str = Field(None)
2024-11-22 18:56:22 -05:00
# Collection names
DOCUMENTS_COLLECTION: str = Field(None)
CHUNKS_COLLECTION: str = Field(None)
2024-11-22 18:56:22 -05:00
# Vector search settings
VECTOR_INDEX_NAME: str = Field(None)
2024-11-22 18:56:22 -05:00
# API Keys
OPENAI_API_KEY: str = Field(..., env="OPENAI_API_KEY")
UNSTRUCTURED_API_KEY: str = Field(..., env="UNSTRUCTURED_API_KEY")
2024-12-04 20:26:14 -05:00
2024-11-22 18:56:22 -05:00
# Optional API keys for alternative models
2024-12-04 20:26:14 -05:00
ANTHROPIC_API_KEY: str | None = Field(None, env="ANTHROPIC_API_KEY")
COHERE_API_KEY: str | None = Field(None, env="COHERE_API_KEY")
VOYAGE_API_KEY: str | None = Field(None, env="VOYAGE_API_KEY")
2024-11-22 18:56:22 -05:00
# Model settings
EMBEDDING_MODEL: str = Field("text-embedding-3-small")
2024-11-22 18:56:22 -05:00
# Document processing settings
CHUNK_SIZE: int = Field(1000)
CHUNK_OVERLAP: int = Field(200)
DEFAULT_K: int = Field(4)
2024-11-22 18:56:22 -05:00
# Storage settings
AWS_ACCESS_KEY: str = Field(..., env="AWS_ACCESS_KEY")
AWS_SECRET_ACCESS_KEY: str = Field(..., env="AWS_SECRET_ACCESS_KEY")
AWS_REGION: str = Field(None)
S3_BUCKET: str = Field(None)
2024-11-22 18:56:22 -05:00
# Auth settings
JWT_SECRET_KEY: str = Field(..., env="JWT_SECRET_KEY")
JWT_ALGORITHM: str = Field("HS256")
2024-11-22 18:56:22 -05:00
# Server settings
HOST: str = Field("localhost")
PORT: int = Field(8000)
RELOAD: bool = Field(False)
2024-11-22 18:56:22 -05:00
class Config:
env_file = ".env"
case_sensitive = True
extra = "allow"
2024-11-22 18:56:22 -05:00
def __init__(self, **kwargs):
# Force reload of environment variables
load_dotenv(find_dotenv(), override=True)
config = load_toml_config()
# Set values from config.toml
kwargs.update({
# MongoDB settings
"DATABRIDGE_DB": config["mongodb"]["database_name"],
"DOCUMENTS_COLLECTION": config["mongodb"]["documents_collection"],
"CHUNKS_COLLECTION": config["mongodb"]["chunks_collection"],
"VECTOR_INDEX_NAME": config["mongodb"]["vector"]["index_name"],
# AWS settings
"AWS_REGION": config["aws"]["default_region"],
"S3_BUCKET": config["aws"]["default_bucket_name"],
# Model settings
"EMBEDDING_MODEL": config["model"]["embedding_model"],
# Document processing settings
"CHUNK_SIZE": config["document_processing"]["chunk_size"],
"CHUNK_OVERLAP": config["document_processing"]["chunk_overlap"],
"DEFAULT_K": config["document_processing"]["default_k"],
# Server settings
"HOST": config["server"]["host"],
"PORT": config["server"]["port"],
"RELOAD": config["server"]["reload"],
# Auth settings
"JWT_ALGORITHM": config["auth"]["jwt_algorithm"],
})
super().__init__(**kwargs)
2024-11-22 18:56:22 -05:00
@lru_cache()
def get_settings() -> Settings:
"""Get cached settings instance."""
2024-12-04 20:26:14 -05:00
return Settings()