Streamline dev experience with optional auth and simplified config (#27)

This commit is contained in:
Adityavardhan Agrawal 2025-01-11 21:54:00 +05:30 committed by GitHub
parent 13947d41bd
commit f4c14fc71b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 93 additions and 69 deletions

View File

@ -1,4 +1,4 @@
JWT_SECRET_KEY="..." JWT_SECRET_KEY="..." # Required in production, optional in dev mode (dev_mode=true in databridge.toml)
POSTGRES_URI="postgresql+asyncpg://postgres:postgres@localhost:5432/databridge" # Required for PostgreSQL database POSTGRES_URI="postgresql+asyncpg://postgres:postgres@localhost:5432/databridge" # Required for PostgreSQL database
MONGODB_URI="..." # Optional: Only needed if using MongoDB MONGODB_URI="..." # Optional: Only needed if using MongoDB

View File

@ -227,7 +227,16 @@ document_service = DocumentService(
async def verify_token(authorization: str = Header(None)) -> AuthContext: async def verify_token(authorization: str = Header(None)) -> AuthContext:
"""Verify JWT Bearer token.""" """Verify JWT Bearer token or return dev context if dev_mode is enabled."""
# Check if dev mode is enabled
if settings.dev_mode:
return AuthContext(
entity_type=EntityType(settings.dev_entity_type),
entity_id=settings.dev_entity_id,
permissions=set(settings.dev_permissions),
)
# Normal token verification flow
if not authorization: if not authorization:
raise HTTPException( raise HTTPException(
status_code=401, status_code=401,

View File

@ -9,7 +9,7 @@ from dotenv import load_dotenv
class Settings(BaseSettings): class Settings(BaseSettings):
"""DataBridge configuration settings.""" """DataBridge configuration settings."""
# environment variables: # Environment variables
JWT_SECRET_KEY: str JWT_SECRET_KEY: str
POSTGRES_URI: Optional[str] = None POSTGRES_URI: Optional[str] = None
MONGODB_URI: Optional[str] = None MONGODB_URI: Optional[str] = None
@ -19,42 +19,45 @@ class Settings(BaseSettings):
OPENAI_API_KEY: Optional[str] = None OPENAI_API_KEY: Optional[str] = None
ANTHROPIC_API_KEY: Optional[str] = None ANTHROPIC_API_KEY: Optional[str] = None
# configuration variables: # API configuration
## api:
HOST: str HOST: str
PORT: int PORT: int
RELOAD: bool RELOAD: bool
## auth: # Auth configuration
JWT_ALGORITHM: str JWT_ALGORITHM: str
dev_mode: bool = False
dev_entity_type: str = "developer"
dev_entity_id: str = "dev_user"
dev_permissions: list = ["read", "write", "admin"]
## completion: # Completion configuration
COMPLETION_PROVIDER: Literal["ollama", "openai"] COMPLETION_PROVIDER: Literal["ollama", "openai"]
COMPLETION_MODEL: str COMPLETION_MODEL: str
COMPLETION_MAX_TOKENS: Optional[str] = None COMPLETION_MAX_TOKENS: Optional[str] = None
COMPLETION_TEMPERATURE: Optional[float] = None COMPLETION_TEMPERATURE: Optional[float] = None
COMPLETION_OLLAMA_BASE_URL: Optional[str] = None COMPLETION_OLLAMA_BASE_URL: Optional[str] = None
## database # Database configuration
DATABASE_PROVIDER: Literal["postgres", "mongodb"] DATABASE_PROVIDER: Literal["postgres", "mongodb"]
DATABASE_NAME: Optional[str] = None DATABASE_NAME: Optional[str] = None
DOCUMENTS_COLLECTION: Optional[str] = None DOCUMENTS_COLLECTION: Optional[str] = None
## embedding # Embedding configuration
EMBEDDING_PROVIDER: Literal["ollama", "openai"] EMBEDDING_PROVIDER: Literal["ollama", "openai"]
EMBEDDING_MODEL: str EMBEDDING_MODEL: str
VECTOR_DIMENSIONS: int VECTOR_DIMENSIONS: int
EMBEDDING_SIMILARITY_METRIC: Literal["cosine", "dotProduct"] EMBEDDING_SIMILARITY_METRIC: Literal["cosine", "dotProduct"]
EMBEDDING_OLLAMA_BASE_URL: Optional[str] = None EMBEDDING_OLLAMA_BASE_URL: Optional[str] = None
## parser # Parser configuration
PARSER_PROVIDER: Literal["unstructured", "combined", "contextual"] PARSER_PROVIDER: Literal["unstructured", "combined", "contextual"]
CHUNK_SIZE: int CHUNK_SIZE: int
CHUNK_OVERLAP: int CHUNK_OVERLAP: int
USE_UNSTRUCTURED_API: bool USE_UNSTRUCTURED_API: bool
FRAME_SAMPLE_RATE: Optional[int] = None FRAME_SAMPLE_RATE: Optional[int] = None
## reranker # Reranker configuration
USE_RERANKING: bool USE_RERANKING: bool
RERANKER_PROVIDER: Optional[Literal["flag"]] = None RERANKER_PROVIDER: Optional[Literal["flag"]] = None
RERANKER_MODEL: Optional[str] = None RERANKER_MODEL: Optional[str] = None
@ -63,13 +66,13 @@ class Settings(BaseSettings):
RERANKER_USE_FP16: Optional[bool] = None RERANKER_USE_FP16: Optional[bool] = None
RERANKER_DEVICE: Optional[str] = None RERANKER_DEVICE: Optional[str] = None
## storage # Storage configuration
STORAGE_PROVIDER: Literal["local", "aws-s3"] STORAGE_PROVIDER: Literal["local", "aws-s3"]
STORAGE_PATH: Optional[str] = None STORAGE_PATH: Optional[str] = None
AWS_REGION: Optional[str] = None AWS_REGION: Optional[str] = None
S3_BUCKET: Optional[str] = None S3_BUCKET: Optional[str] = None
## vector store # Vector store configuration
VECTOR_STORE_PROVIDER: Literal["pgvector", "mongodb"] VECTOR_STORE_PROVIDER: Literal["pgvector", "mongodb"]
VECTOR_STORE_DATABASE_NAME: Optional[str] = None VECTOR_STORE_DATABASE_NAME: Optional[str] = None
VECTOR_STORE_COLLECTION_NAME: Optional[str] = None VECTOR_STORE_COLLECTION_NAME: Optional[str] = None
@ -95,9 +98,19 @@ def get_settings() -> Settings:
# load auth config # load auth config
auth_config = { auth_config = {
"JWT_ALGORITHM": config["auth"]["jwt_algorithm"], "JWT_ALGORITHM": config["auth"]["jwt_algorithm"],
"JWT_SECRET_KEY": os.environ["JWT_SECRET_KEY"], "JWT_SECRET_KEY": os.environ.get(
"JWT_SECRET_KEY", "dev-secret-key"
), # Default for dev mode
"dev_mode": config["auth"].get("dev_mode", False),
"dev_entity_type": config["auth"].get("dev_entity_type", "developer"),
"dev_entity_id": config["auth"].get("dev_entity_id", "dev_user"),
"dev_permissions": config["auth"].get("dev_permissions", ["read", "write", "admin"]),
} }
# Only require JWT_SECRET_KEY in non-dev mode
if not auth_config["dev_mode"] and "JWT_SECRET_KEY" not in os.environ:
raise ValueError("JWT_SECRET_KEY is required when dev_mode is disabled")
# load completion config # load completion config
completion_config = { completion_config = {
"COMPLETION_PROVIDER": config["completion"]["provider"], "COMPLETION_PROVIDER": config["completion"]["provider"],

View File

@ -5,6 +5,10 @@ reload = true
[auth] [auth]
jwt_algorithm = "HS256" jwt_algorithm = "HS256"
dev_mode = true # Enabled by default for easier local development
dev_entity_id = "dev_user" # Default dev user ID
dev_entity_type = "developer" # Default dev entity type
dev_permissions = ["read", "write", "admin"] # Default dev permissions
[completion] [completion]
provider = "ollama" provider = "ollama"

View File

@ -21,25 +21,24 @@ class AsyncDataBridge:
DataBridge client for document operations. DataBridge client for document operations.
Args: Args:
uri (str): DataBridge URI in the format "databridge://<owner_id>:<token>@<host>" uri (str, optional): DataBridge URI in format "databridge://<owner_id>:<token>@<host>".
If not provided, connects to http://localhost:8000 without authentication.
timeout (int, optional): Request timeout in seconds. Defaults to 30. timeout (int, optional): Request timeout in seconds. Defaults to 30.
is_local (bool, optional): Whether to connect to a local server. Defaults to False. is_local (bool, optional): Whether to connect to a local server. Defaults to False.
Examples: Examples:
```python ```python
async with AsyncDataBridge("databridge://owner_id:token@api.databridge.ai") as db: # Without authentication
# Ingest text async with AsyncDataBridge() as db:
doc = await db.ingest_text( doc = await db.ingest_text("Sample content")
"Sample content",
metadata={"category": "sample"}
)
# Query documents # With authentication
results = await db.query("search query") async with AsyncDataBridge("databridge://owner_id:token@api.databridge.ai") as db:
doc = await db.ingest_text("Sample content")
``` ```
""" """
def __init__(self, uri: str, timeout: int = 30, is_local: bool = False): def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
self._timeout = timeout self._timeout = timeout
self._client = ( self._client = (
httpx.AsyncClient(timeout=timeout) httpx.AsyncClient(timeout=timeout)
@ -51,7 +50,12 @@ class AsyncDataBridge:
) )
) )
self._is_local = is_local self._is_local = is_local
self._setup_auth(uri)
if uri:
self._setup_auth(uri)
else:
self._base_url = "http://localhost:8000"
self._auth_token = None
def _setup_auth(self, uri: str) -> None: def _setup_auth(self, uri: str) -> None:
"""Setup authentication from URI""" """Setup authentication from URI"""
@ -61,7 +65,7 @@ class AsyncDataBridge:
# Split host and auth parts # Split host and auth parts
auth, host = parsed.netloc.split("@") auth, host = parsed.netloc.split("@")
self._owner_id, self._auth_token = auth.split(":") _, self._auth_token = auth.split(":")
# Set base URL # Set base URL
self._base_url = f"{'http' if self._is_local else 'https'}://{host}" self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
@ -76,8 +80,10 @@ class AsyncDataBridge:
data: Optional[Dict[str, Any]] = None, data: Optional[Dict[str, Any]] = None,
files: Optional[Dict[str, Any]] = None, files: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Make authenticated HTTP request""" """Make HTTP request"""
headers = {"Authorization": f"Bearer {self._auth_token}"} headers = {}
if self._auth_token: # Only add auth header if we have a token
headers["Authorization"] = f"Bearer {self._auth_token}"
if not files: if not files:
headers["Content-Type"] = "application/json" headers["Content-Type"] = "application/json"

View File

@ -21,31 +21,33 @@ class DataBridge:
DataBridge client for document operations. DataBridge client for document operations.
Args: Args:
uri (str): DataBridge URI in the format "databridge://<owner_id>:<token>@<host>" uri (str, optional): DataBridge URI in format "databridge://<owner_id>:<token>@<host>".
If not provided, connects to http://localhost:8000 without authentication.
timeout (int, optional): Request timeout in seconds. Defaults to 30. timeout (int, optional): Request timeout in seconds. Defaults to 30.
is_local (bool, optional): Whether connecting to local development server. Defaults to False. is_local (bool, optional): Whether connecting to local development server. Defaults to False.
Examples: Examples:
```python ```python
with DataBridge("databridge://owner_id:token@api.databridge.ai") as db: # Without authentication
# Ingest text db = DataBridge()
doc = db.ingest_text(
"Sample content",
metadata={"category": "sample"}
)
# Query documents # With authentication
results = db.query("search query") db = DataBridge("databridge://owner_id:token@api.databridge.ai")
``` ```
""" """
def __init__(self, uri: str, timeout: int = 30, is_local: bool = False): def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
self._timeout = timeout self._timeout = timeout
self._session = requests.Session() self._session = requests.Session()
if is_local: if is_local:
self._session.verify = False # Disable SSL for localhost self._session.verify = False # Disable SSL for localhost
self._is_local = is_local self._is_local = is_local
self._setup_auth(uri)
if uri:
self._setup_auth(uri)
else:
self._base_url = "http://localhost:8000"
self._auth_token = None
def _setup_auth(self, uri: str) -> None: def _setup_auth(self, uri: str) -> None:
"""Setup authentication from URI""" """Setup authentication from URI"""
@ -55,7 +57,7 @@ class DataBridge:
# Split host and auth parts # Split host and auth parts
auth, host = parsed.netloc.split("@") auth, host = parsed.netloc.split("@")
self._owner_id, self._auth_token = auth.split(":") _, self._auth_token = auth.split(":")
# Set base URL # Set base URL
self._base_url = f"{'http' if self._is_local else 'https'}://{host}" self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
@ -70,8 +72,10 @@ class DataBridge:
data: Optional[Dict[str, Any]] = None, data: Optional[Dict[str, Any]] = None,
files: Optional[Dict[str, Any]] = None, files: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Make authenticated HTTP request""" """Make HTTP request"""
headers = {"Authorization": f"Bearer {self._auth_token}"} headers = {}
if self._auth_token: # Only add auth header if we have a token
headers["Authorization"] = f"Bearer {self._auth_token}"
if not files: if not files:
headers["Content-Type"] = "application/json" headers["Content-Type"] = "application/json"

View File

@ -4,8 +4,12 @@ DataBridge interactive CLI.
Assumes a DataBridge server is running. Assumes a DataBridge server is running.
Usage: Usage:
python shell.py <uri> Without authentication (connects to localhost):
Example: python shell.py "http://test_user:token@localhost:8000" python shell.py
With authentication:
python shell.py <uri>
Example: python shell.py "databridge://user:token@localhost:8000"
This provides the exact same interface as the Python SDK: This provides the exact same interface as the Python SDK:
db.ingest_text("content", metadata={...}) db.ingest_text("content", metadata={...})
@ -18,7 +22,6 @@ import sys
from pathlib import Path from pathlib import Path
import time import time
import requests import requests
from urllib.parse import urlparse
# Add local SDK to path before other imports # Add local SDK to path before other imports
_SDK_PATH = str(Path(__file__).parent / "sdks" / "python") _SDK_PATH = str(Path(__file__).parent / "sdks" / "python")
@ -29,20 +32,10 @@ from databridge import DataBridge # noqa: E402
class DB: class DB:
def __init__(self, uri: str): def __init__(self, uri: str = None):
"""Initialize DataBridge with URI""" """Initialize DataBridge with optional URI"""
# Convert databridge:// to http:// for localhost self._client = DataBridge(uri, is_local=True, timeout=1000)
if "localhost" in uri or "127.0.0.1" in uri: self.base_url = "http://localhost:8000" # For health check only
uri = uri.replace("databridge://", "http://")
self.uri = uri
self.base_url = self._get_base_url(uri)
is_local = "localhost" in uri or "127.0.0.1" in uri
self._client = DataBridge(self.uri, is_local=is_local, timeout=1000)
def _get_base_url(self, uri: str) -> str:
"""Extract base URL from URI (removing auth if present)"""
parsed = urlparse(uri)
return f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
def check_health(self, max_retries=30, retry_interval=1) -> bool: def check_health(self, max_retries=30, retry_interval=1) -> bool:
"""Check if DataBridge server is healthy with retries""" """Check if DataBridge server is healthy with retries"""
@ -131,20 +124,15 @@ class DB:
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) != 2: uri = sys.argv[1] if len(sys.argv) > 1 else None
print("Error: URI argument required") db = DB(uri)
print(__doc__)
sys.exit(1)
# Create DB instance with provided URI # Check server health
db = DB(sys.argv[1])
# Wait for server to be healthy
if not db.check_health(): if not db.check_health():
print("Error: Could not connect to DataBridge server after multiple attempts") print("Error: Could not connect to DataBridge server")
sys.exit(1) sys.exit(1)
print("\nSuccessfully connected to DataBridge server!") print("\nConnected to DataBridge")
# Start an interactive Python shell with 'db' already imported # Start an interactive Python shell with 'db' already imported
import code import code