Streamline dev experience with optional auth and simplified config (#27)

This commit is contained in:
Adityavardhan Agrawal 2025-01-11 21:54:00 +05:30 committed by GitHub
parent 13947d41bd
commit f4c14fc71b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 93 additions and 69 deletions

View File

@ -1,4 +1,4 @@
JWT_SECRET_KEY="..."
JWT_SECRET_KEY="..." # Required in production, optional in dev mode (dev_mode=true in databridge.toml)
POSTGRES_URI="postgresql+asyncpg://postgres:postgres@localhost:5432/databridge" # Required for PostgreSQL database
MONGODB_URI="..." # Optional: Only needed if using MongoDB

View File

@ -227,7 +227,16 @@ document_service = DocumentService(
async def verify_token(authorization: str = Header(None)) -> AuthContext:
"""Verify JWT Bearer token."""
"""Verify JWT Bearer token or return dev context if dev_mode is enabled."""
# Check if dev mode is enabled
if settings.dev_mode:
return AuthContext(
entity_type=EntityType(settings.dev_entity_type),
entity_id=settings.dev_entity_id,
permissions=set(settings.dev_permissions),
)
# Normal token verification flow
if not authorization:
raise HTTPException(
status_code=401,

View File

@ -9,7 +9,7 @@ from dotenv import load_dotenv
class Settings(BaseSettings):
"""DataBridge configuration settings."""
# environment variables:
# Environment variables
JWT_SECRET_KEY: str
POSTGRES_URI: Optional[str] = None
MONGODB_URI: Optional[str] = None
@ -19,42 +19,45 @@ class Settings(BaseSettings):
OPENAI_API_KEY: Optional[str] = None
ANTHROPIC_API_KEY: Optional[str] = None
# configuration variables:
## api:
# API configuration
HOST: str
PORT: int
RELOAD: bool
## auth:
# Auth configuration
JWT_ALGORITHM: str
dev_mode: bool = False
dev_entity_type: str = "developer"
dev_entity_id: str = "dev_user"
dev_permissions: list = ["read", "write", "admin"]
## completion:
# Completion configuration
COMPLETION_PROVIDER: Literal["ollama", "openai"]
COMPLETION_MODEL: str
COMPLETION_MAX_TOKENS: Optional[str] = None
COMPLETION_TEMPERATURE: Optional[float] = None
COMPLETION_OLLAMA_BASE_URL: Optional[str] = None
## database
# Database configuration
DATABASE_PROVIDER: Literal["postgres", "mongodb"]
DATABASE_NAME: Optional[str] = None
DOCUMENTS_COLLECTION: Optional[str] = None
## embedding
# Embedding configuration
EMBEDDING_PROVIDER: Literal["ollama", "openai"]
EMBEDDING_MODEL: str
VECTOR_DIMENSIONS: int
EMBEDDING_SIMILARITY_METRIC: Literal["cosine", "dotProduct"]
EMBEDDING_OLLAMA_BASE_URL: Optional[str] = None
## parser
# Parser configuration
PARSER_PROVIDER: Literal["unstructured", "combined", "contextual"]
CHUNK_SIZE: int
CHUNK_OVERLAP: int
USE_UNSTRUCTURED_API: bool
FRAME_SAMPLE_RATE: Optional[int] = None
## reranker
# Reranker configuration
USE_RERANKING: bool
RERANKER_PROVIDER: Optional[Literal["flag"]] = None
RERANKER_MODEL: Optional[str] = None
@ -63,13 +66,13 @@ class Settings(BaseSettings):
RERANKER_USE_FP16: Optional[bool] = None
RERANKER_DEVICE: Optional[str] = None
## storage
# Storage configuration
STORAGE_PROVIDER: Literal["local", "aws-s3"]
STORAGE_PATH: Optional[str] = None
AWS_REGION: Optional[str] = None
S3_BUCKET: Optional[str] = None
## vector store
# Vector store configuration
VECTOR_STORE_PROVIDER: Literal["pgvector", "mongodb"]
VECTOR_STORE_DATABASE_NAME: Optional[str] = None
VECTOR_STORE_COLLECTION_NAME: Optional[str] = None
@ -95,9 +98,19 @@ def get_settings() -> Settings:
# load auth config
auth_config = {
"JWT_ALGORITHM": config["auth"]["jwt_algorithm"],
"JWT_SECRET_KEY": os.environ["JWT_SECRET_KEY"],
"JWT_SECRET_KEY": os.environ.get(
"JWT_SECRET_KEY", "dev-secret-key"
), # Default for dev mode
"dev_mode": config["auth"].get("dev_mode", False),
"dev_entity_type": config["auth"].get("dev_entity_type", "developer"),
"dev_entity_id": config["auth"].get("dev_entity_id", "dev_user"),
"dev_permissions": config["auth"].get("dev_permissions", ["read", "write", "admin"]),
}
# Only require JWT_SECRET_KEY in non-dev mode
if not auth_config["dev_mode"] and "JWT_SECRET_KEY" not in os.environ:
raise ValueError("JWT_SECRET_KEY is required when dev_mode is disabled")
# load completion config
completion_config = {
"COMPLETION_PROVIDER": config["completion"]["provider"],

View File

@ -5,6 +5,10 @@ reload = true
[auth]
jwt_algorithm = "HS256"
dev_mode = true # Enabled by default for easier local development
dev_entity_id = "dev_user" # Default dev user ID
dev_entity_type = "developer" # Default dev entity type
dev_permissions = ["read", "write", "admin"] # Default dev permissions
[completion]
provider = "ollama"

View File

@ -21,25 +21,24 @@ class AsyncDataBridge:
DataBridge client for document operations.
Args:
uri (str): DataBridge URI in the format "databridge://<owner_id>:<token>@<host>"
uri (str, optional): DataBridge URI in format "databridge://<owner_id>:<token>@<host>".
If not provided, connects to http://localhost:8000 without authentication.
timeout (int, optional): Request timeout in seconds. Defaults to 30.
is_local (bool, optional): Whether to connect to a local server. Defaults to False.
Examples:
```python
async with AsyncDataBridge("databridge://owner_id:token@api.databridge.ai") as db:
# Ingest text
doc = await db.ingest_text(
"Sample content",
metadata={"category": "sample"}
)
# Without authentication
async with AsyncDataBridge() as db:
doc = await db.ingest_text("Sample content")
# Query documents
results = await db.query("search query")
# With authentication
async with AsyncDataBridge("databridge://owner_id:token@api.databridge.ai") as db:
doc = await db.ingest_text("Sample content")
```
"""
def __init__(self, uri: str, timeout: int = 30, is_local: bool = False):
def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
self._timeout = timeout
self._client = (
httpx.AsyncClient(timeout=timeout)
@ -51,7 +50,12 @@ class AsyncDataBridge:
)
)
self._is_local = is_local
self._setup_auth(uri)
if uri:
self._setup_auth(uri)
else:
self._base_url = "http://localhost:8000"
self._auth_token = None
def _setup_auth(self, uri: str) -> None:
"""Setup authentication from URI"""
@ -61,7 +65,7 @@ class AsyncDataBridge:
# Split host and auth parts
auth, host = parsed.netloc.split("@")
self._owner_id, self._auth_token = auth.split(":")
_, self._auth_token = auth.split(":")
# Set base URL
self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
@ -76,8 +80,10 @@ class AsyncDataBridge:
data: Optional[Dict[str, Any]] = None,
files: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Make authenticated HTTP request"""
headers = {"Authorization": f"Bearer {self._auth_token}"}
"""Make HTTP request"""
headers = {}
if self._auth_token: # Only add auth header if we have a token
headers["Authorization"] = f"Bearer {self._auth_token}"
if not files:
headers["Content-Type"] = "application/json"

View File

@ -21,31 +21,33 @@ class DataBridge:
DataBridge client for document operations.
Args:
uri (str): DataBridge URI in the format "databridge://<owner_id>:<token>@<host>"
uri (str, optional): DataBridge URI in format "databridge://<owner_id>:<token>@<host>".
If not provided, connects to http://localhost:8000 without authentication.
timeout (int, optional): Request timeout in seconds. Defaults to 30.
is_local (bool, optional): Whether connecting to local development server. Defaults to False.
Examples:
```python
with DataBridge("databridge://owner_id:token@api.databridge.ai") as db:
# Ingest text
doc = db.ingest_text(
"Sample content",
metadata={"category": "sample"}
)
# Without authentication
db = DataBridge()
# Query documents
results = db.query("search query")
# With authentication
db = DataBridge("databridge://owner_id:token@api.databridge.ai")
```
"""
def __init__(self, uri: str, timeout: int = 30, is_local: bool = False):
def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
self._timeout = timeout
self._session = requests.Session()
if is_local:
self._session.verify = False # Disable SSL for localhost
self._is_local = is_local
self._setup_auth(uri)
if uri:
self._setup_auth(uri)
else:
self._base_url = "http://localhost:8000"
self._auth_token = None
def _setup_auth(self, uri: str) -> None:
"""Setup authentication from URI"""
@ -55,7 +57,7 @@ class DataBridge:
# Split host and auth parts
auth, host = parsed.netloc.split("@")
self._owner_id, self._auth_token = auth.split(":")
_, self._auth_token = auth.split(":")
# Set base URL
self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
@ -70,8 +72,10 @@ class DataBridge:
data: Optional[Dict[str, Any]] = None,
files: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Make authenticated HTTP request"""
headers = {"Authorization": f"Bearer {self._auth_token}"}
"""Make HTTP request"""
headers = {}
if self._auth_token: # Only add auth header if we have a token
headers["Authorization"] = f"Bearer {self._auth_token}"
if not files:
headers["Content-Type"] = "application/json"

View File

@ -4,8 +4,12 @@ DataBridge interactive CLI.
Assumes a DataBridge server is running.
Usage:
python shell.py <uri>
Example: python shell.py "http://test_user:token@localhost:8000"
Without authentication (connects to localhost):
python shell.py
With authentication:
python shell.py <uri>
Example: python shell.py "databridge://user:token@localhost:8000"
This provides the exact same interface as the Python SDK:
db.ingest_text("content", metadata={...})
@ -18,7 +22,6 @@ import sys
from pathlib import Path
import time
import requests
from urllib.parse import urlparse
# Add local SDK to path before other imports
_SDK_PATH = str(Path(__file__).parent / "sdks" / "python")
@ -29,20 +32,10 @@ from databridge import DataBridge # noqa: E402
class DB:
def __init__(self, uri: str):
"""Initialize DataBridge with URI"""
# Convert databridge:// to http:// for localhost
if "localhost" in uri or "127.0.0.1" in uri:
uri = uri.replace("databridge://", "http://")
self.uri = uri
self.base_url = self._get_base_url(uri)
is_local = "localhost" in uri or "127.0.0.1" in uri
self._client = DataBridge(self.uri, is_local=is_local, timeout=1000)
def _get_base_url(self, uri: str) -> str:
"""Extract base URL from URI (removing auth if present)"""
parsed = urlparse(uri)
return f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
def __init__(self, uri: str = None):
"""Initialize DataBridge with optional URI"""
self._client = DataBridge(uri, is_local=True, timeout=1000)
self.base_url = "http://localhost:8000" # For health check only
def check_health(self, max_retries=30, retry_interval=1) -> bool:
"""Check if DataBridge server is healthy with retries"""
@ -131,20 +124,15 @@ class DB:
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Error: URI argument required")
print(__doc__)
sys.exit(1)
uri = sys.argv[1] if len(sys.argv) > 1 else None
db = DB(uri)
# Create DB instance with provided URI
db = DB(sys.argv[1])
# Wait for server to be healthy
# Check server health
if not db.check_health():
print("Error: Could not connect to DataBridge server after multiple attempts")
print("Error: Could not connect to DataBridge server")
sys.exit(1)
print("\nSuccessfully connected to DataBridge server!")
print("\nConnected to DataBridge")
# Start an interactive Python shell with 'db' already imported
import code