mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
Streamline dev experience with optional auth and simplified config (#27)
This commit is contained in:
parent
13947d41bd
commit
f4c14fc71b
@ -1,4 +1,4 @@
|
||||
JWT_SECRET_KEY="..."
|
||||
JWT_SECRET_KEY="..." # Required in production, optional in dev mode (dev_mode=true in databridge.toml)
|
||||
POSTGRES_URI="postgresql+asyncpg://postgres:postgres@localhost:5432/databridge" # Required for PostgreSQL database
|
||||
MONGODB_URI="..." # Optional: Only needed if using MongoDB
|
||||
|
||||
|
11
core/api.py
11
core/api.py
@ -227,7 +227,16 @@ document_service = DocumentService(
|
||||
|
||||
|
||||
async def verify_token(authorization: str = Header(None)) -> AuthContext:
|
||||
"""Verify JWT Bearer token."""
|
||||
"""Verify JWT Bearer token or return dev context if dev_mode is enabled."""
|
||||
# Check if dev mode is enabled
|
||||
if settings.dev_mode:
|
||||
return AuthContext(
|
||||
entity_type=EntityType(settings.dev_entity_type),
|
||||
entity_id=settings.dev_entity_id,
|
||||
permissions=set(settings.dev_permissions),
|
||||
)
|
||||
|
||||
# Normal token verification flow
|
||||
if not authorization:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
|
@ -9,7 +9,7 @@ from dotenv import load_dotenv
|
||||
class Settings(BaseSettings):
|
||||
"""DataBridge configuration settings."""
|
||||
|
||||
# environment variables:
|
||||
# Environment variables
|
||||
JWT_SECRET_KEY: str
|
||||
POSTGRES_URI: Optional[str] = None
|
||||
MONGODB_URI: Optional[str] = None
|
||||
@ -19,42 +19,45 @@ class Settings(BaseSettings):
|
||||
OPENAI_API_KEY: Optional[str] = None
|
||||
ANTHROPIC_API_KEY: Optional[str] = None
|
||||
|
||||
# configuration variables:
|
||||
## api:
|
||||
# API configuration
|
||||
HOST: str
|
||||
PORT: int
|
||||
RELOAD: bool
|
||||
|
||||
## auth:
|
||||
# Auth configuration
|
||||
JWT_ALGORITHM: str
|
||||
dev_mode: bool = False
|
||||
dev_entity_type: str = "developer"
|
||||
dev_entity_id: str = "dev_user"
|
||||
dev_permissions: list = ["read", "write", "admin"]
|
||||
|
||||
## completion:
|
||||
# Completion configuration
|
||||
COMPLETION_PROVIDER: Literal["ollama", "openai"]
|
||||
COMPLETION_MODEL: str
|
||||
COMPLETION_MAX_TOKENS: Optional[str] = None
|
||||
COMPLETION_TEMPERATURE: Optional[float] = None
|
||||
COMPLETION_OLLAMA_BASE_URL: Optional[str] = None
|
||||
|
||||
## database
|
||||
# Database configuration
|
||||
DATABASE_PROVIDER: Literal["postgres", "mongodb"]
|
||||
DATABASE_NAME: Optional[str] = None
|
||||
DOCUMENTS_COLLECTION: Optional[str] = None
|
||||
|
||||
## embedding
|
||||
# Embedding configuration
|
||||
EMBEDDING_PROVIDER: Literal["ollama", "openai"]
|
||||
EMBEDDING_MODEL: str
|
||||
VECTOR_DIMENSIONS: int
|
||||
EMBEDDING_SIMILARITY_METRIC: Literal["cosine", "dotProduct"]
|
||||
EMBEDDING_OLLAMA_BASE_URL: Optional[str] = None
|
||||
|
||||
## parser
|
||||
# Parser configuration
|
||||
PARSER_PROVIDER: Literal["unstructured", "combined", "contextual"]
|
||||
CHUNK_SIZE: int
|
||||
CHUNK_OVERLAP: int
|
||||
USE_UNSTRUCTURED_API: bool
|
||||
FRAME_SAMPLE_RATE: Optional[int] = None
|
||||
|
||||
## reranker
|
||||
# Reranker configuration
|
||||
USE_RERANKING: bool
|
||||
RERANKER_PROVIDER: Optional[Literal["flag"]] = None
|
||||
RERANKER_MODEL: Optional[str] = None
|
||||
@ -63,13 +66,13 @@ class Settings(BaseSettings):
|
||||
RERANKER_USE_FP16: Optional[bool] = None
|
||||
RERANKER_DEVICE: Optional[str] = None
|
||||
|
||||
## storage
|
||||
# Storage configuration
|
||||
STORAGE_PROVIDER: Literal["local", "aws-s3"]
|
||||
STORAGE_PATH: Optional[str] = None
|
||||
AWS_REGION: Optional[str] = None
|
||||
S3_BUCKET: Optional[str] = None
|
||||
|
||||
## vector store
|
||||
# Vector store configuration
|
||||
VECTOR_STORE_PROVIDER: Literal["pgvector", "mongodb"]
|
||||
VECTOR_STORE_DATABASE_NAME: Optional[str] = None
|
||||
VECTOR_STORE_COLLECTION_NAME: Optional[str] = None
|
||||
@ -95,9 +98,19 @@ def get_settings() -> Settings:
|
||||
# load auth config
|
||||
auth_config = {
|
||||
"JWT_ALGORITHM": config["auth"]["jwt_algorithm"],
|
||||
"JWT_SECRET_KEY": os.environ["JWT_SECRET_KEY"],
|
||||
"JWT_SECRET_KEY": os.environ.get(
|
||||
"JWT_SECRET_KEY", "dev-secret-key"
|
||||
), # Default for dev mode
|
||||
"dev_mode": config["auth"].get("dev_mode", False),
|
||||
"dev_entity_type": config["auth"].get("dev_entity_type", "developer"),
|
||||
"dev_entity_id": config["auth"].get("dev_entity_id", "dev_user"),
|
||||
"dev_permissions": config["auth"].get("dev_permissions", ["read", "write", "admin"]),
|
||||
}
|
||||
|
||||
# Only require JWT_SECRET_KEY in non-dev mode
|
||||
if not auth_config["dev_mode"] and "JWT_SECRET_KEY" not in os.environ:
|
||||
raise ValueError("JWT_SECRET_KEY is required when dev_mode is disabled")
|
||||
|
||||
# load completion config
|
||||
completion_config = {
|
||||
"COMPLETION_PROVIDER": config["completion"]["provider"],
|
||||
|
@ -5,6 +5,10 @@ reload = true
|
||||
|
||||
[auth]
|
||||
jwt_algorithm = "HS256"
|
||||
dev_mode = true # Enabled by default for easier local development
|
||||
dev_entity_id = "dev_user" # Default dev user ID
|
||||
dev_entity_type = "developer" # Default dev entity type
|
||||
dev_permissions = ["read", "write", "admin"] # Default dev permissions
|
||||
|
||||
[completion]
|
||||
provider = "ollama"
|
||||
|
@ -21,25 +21,24 @@ class AsyncDataBridge:
|
||||
DataBridge client for document operations.
|
||||
|
||||
Args:
|
||||
uri (str): DataBridge URI in the format "databridge://<owner_id>:<token>@<host>"
|
||||
uri (str, optional): DataBridge URI in format "databridge://<owner_id>:<token>@<host>".
|
||||
If not provided, connects to http://localhost:8000 without authentication.
|
||||
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
||||
is_local (bool, optional): Whether to connect to a local server. Defaults to False.
|
||||
|
||||
Examples:
|
||||
```python
|
||||
async with AsyncDataBridge("databridge://owner_id:token@api.databridge.ai") as db:
|
||||
# Ingest text
|
||||
doc = await db.ingest_text(
|
||||
"Sample content",
|
||||
metadata={"category": "sample"}
|
||||
)
|
||||
# Without authentication
|
||||
async with AsyncDataBridge() as db:
|
||||
doc = await db.ingest_text("Sample content")
|
||||
|
||||
# Query documents
|
||||
results = await db.query("search query")
|
||||
# With authentication
|
||||
async with AsyncDataBridge("databridge://owner_id:token@api.databridge.ai") as db:
|
||||
doc = await db.ingest_text("Sample content")
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, uri: str, timeout: int = 30, is_local: bool = False):
|
||||
def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
|
||||
self._timeout = timeout
|
||||
self._client = (
|
||||
httpx.AsyncClient(timeout=timeout)
|
||||
@ -51,7 +50,12 @@ class AsyncDataBridge:
|
||||
)
|
||||
)
|
||||
self._is_local = is_local
|
||||
self._setup_auth(uri)
|
||||
|
||||
if uri:
|
||||
self._setup_auth(uri)
|
||||
else:
|
||||
self._base_url = "http://localhost:8000"
|
||||
self._auth_token = None
|
||||
|
||||
def _setup_auth(self, uri: str) -> None:
|
||||
"""Setup authentication from URI"""
|
||||
@ -61,7 +65,7 @@ class AsyncDataBridge:
|
||||
|
||||
# Split host and auth parts
|
||||
auth, host = parsed.netloc.split("@")
|
||||
self._owner_id, self._auth_token = auth.split(":")
|
||||
_, self._auth_token = auth.split(":")
|
||||
|
||||
# Set base URL
|
||||
self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
|
||||
@ -76,8 +80,10 @@ class AsyncDataBridge:
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
files: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Make authenticated HTTP request"""
|
||||
headers = {"Authorization": f"Bearer {self._auth_token}"}
|
||||
"""Make HTTP request"""
|
||||
headers = {}
|
||||
if self._auth_token: # Only add auth header if we have a token
|
||||
headers["Authorization"] = f"Bearer {self._auth_token}"
|
||||
|
||||
if not files:
|
||||
headers["Content-Type"] = "application/json"
|
||||
|
@ -21,31 +21,33 @@ class DataBridge:
|
||||
DataBridge client for document operations.
|
||||
|
||||
Args:
|
||||
uri (str): DataBridge URI in the format "databridge://<owner_id>:<token>@<host>"
|
||||
uri (str, optional): DataBridge URI in format "databridge://<owner_id>:<token>@<host>".
|
||||
If not provided, connects to http://localhost:8000 without authentication.
|
||||
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
||||
is_local (bool, optional): Whether connecting to local development server. Defaults to False.
|
||||
|
||||
Examples:
|
||||
```python
|
||||
with DataBridge("databridge://owner_id:token@api.databridge.ai") as db:
|
||||
# Ingest text
|
||||
doc = db.ingest_text(
|
||||
"Sample content",
|
||||
metadata={"category": "sample"}
|
||||
)
|
||||
# Without authentication
|
||||
db = DataBridge()
|
||||
|
||||
# Query documents
|
||||
results = db.query("search query")
|
||||
# With authentication
|
||||
db = DataBridge("databridge://owner_id:token@api.databridge.ai")
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, uri: str, timeout: int = 30, is_local: bool = False):
|
||||
def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
|
||||
self._timeout = timeout
|
||||
self._session = requests.Session()
|
||||
if is_local:
|
||||
self._session.verify = False # Disable SSL for localhost
|
||||
self._is_local = is_local
|
||||
self._setup_auth(uri)
|
||||
|
||||
if uri:
|
||||
self._setup_auth(uri)
|
||||
else:
|
||||
self._base_url = "http://localhost:8000"
|
||||
self._auth_token = None
|
||||
|
||||
def _setup_auth(self, uri: str) -> None:
|
||||
"""Setup authentication from URI"""
|
||||
@ -55,7 +57,7 @@ class DataBridge:
|
||||
|
||||
# Split host and auth parts
|
||||
auth, host = parsed.netloc.split("@")
|
||||
self._owner_id, self._auth_token = auth.split(":")
|
||||
_, self._auth_token = auth.split(":")
|
||||
|
||||
# Set base URL
|
||||
self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
|
||||
@ -70,8 +72,10 @@ class DataBridge:
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
files: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Make authenticated HTTP request"""
|
||||
headers = {"Authorization": f"Bearer {self._auth_token}"}
|
||||
"""Make HTTP request"""
|
||||
headers = {}
|
||||
if self._auth_token: # Only add auth header if we have a token
|
||||
headers["Authorization"] = f"Bearer {self._auth_token}"
|
||||
|
||||
if not files:
|
||||
headers["Content-Type"] = "application/json"
|
||||
|
42
shell.py
42
shell.py
@ -4,8 +4,12 @@ DataBridge interactive CLI.
|
||||
Assumes a DataBridge server is running.
|
||||
|
||||
Usage:
|
||||
python shell.py <uri>
|
||||
Example: python shell.py "http://test_user:token@localhost:8000"
|
||||
Without authentication (connects to localhost):
|
||||
python shell.py
|
||||
|
||||
With authentication:
|
||||
python shell.py <uri>
|
||||
Example: python shell.py "databridge://user:token@localhost:8000"
|
||||
|
||||
This provides the exact same interface as the Python SDK:
|
||||
db.ingest_text("content", metadata={...})
|
||||
@ -18,7 +22,6 @@ import sys
|
||||
from pathlib import Path
|
||||
import time
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Add local SDK to path before other imports
|
||||
_SDK_PATH = str(Path(__file__).parent / "sdks" / "python")
|
||||
@ -29,20 +32,10 @@ from databridge import DataBridge # noqa: E402
|
||||
|
||||
|
||||
class DB:
|
||||
def __init__(self, uri: str):
|
||||
"""Initialize DataBridge with URI"""
|
||||
# Convert databridge:// to http:// for localhost
|
||||
if "localhost" in uri or "127.0.0.1" in uri:
|
||||
uri = uri.replace("databridge://", "http://")
|
||||
self.uri = uri
|
||||
self.base_url = self._get_base_url(uri)
|
||||
is_local = "localhost" in uri or "127.0.0.1" in uri
|
||||
self._client = DataBridge(self.uri, is_local=is_local, timeout=1000)
|
||||
|
||||
def _get_base_url(self, uri: str) -> str:
|
||||
"""Extract base URL from URI (removing auth if present)"""
|
||||
parsed = urlparse(uri)
|
||||
return f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
|
||||
def __init__(self, uri: str = None):
|
||||
"""Initialize DataBridge with optional URI"""
|
||||
self._client = DataBridge(uri, is_local=True, timeout=1000)
|
||||
self.base_url = "http://localhost:8000" # For health check only
|
||||
|
||||
def check_health(self, max_retries=30, retry_interval=1) -> bool:
|
||||
"""Check if DataBridge server is healthy with retries"""
|
||||
@ -131,20 +124,15 @@ class DB:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Error: URI argument required")
|
||||
print(__doc__)
|
||||
sys.exit(1)
|
||||
uri = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
db = DB(uri)
|
||||
|
||||
# Create DB instance with provided URI
|
||||
db = DB(sys.argv[1])
|
||||
|
||||
# Wait for server to be healthy
|
||||
# Check server health
|
||||
if not db.check_health():
|
||||
print("Error: Could not connect to DataBridge server after multiple attempts")
|
||||
print("Error: Could not connect to DataBridge server")
|
||||
sys.exit(1)
|
||||
|
||||
print("\nSuccessfully connected to DataBridge server!")
|
||||
print("\nConnected to DataBridge")
|
||||
|
||||
# Start an interactive Python shell with 'db' already imported
|
||||
import code
|
||||
|
Loading…
x
Reference in New Issue
Block a user