mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
Streamline dev experience with optional auth and simplified config (#27)
This commit is contained in:
parent
13947d41bd
commit
f4c14fc71b
@ -1,4 +1,4 @@
|
|||||||
JWT_SECRET_KEY="..."
|
JWT_SECRET_KEY="..." # Required in production, optional in dev mode (dev_mode=true in databridge.toml)
|
||||||
POSTGRES_URI="postgresql+asyncpg://postgres:postgres@localhost:5432/databridge" # Required for PostgreSQL database
|
POSTGRES_URI="postgresql+asyncpg://postgres:postgres@localhost:5432/databridge" # Required for PostgreSQL database
|
||||||
MONGODB_URI="..." # Optional: Only needed if using MongoDB
|
MONGODB_URI="..." # Optional: Only needed if using MongoDB
|
||||||
|
|
||||||
|
11
core/api.py
11
core/api.py
@ -227,7 +227,16 @@ document_service = DocumentService(
|
|||||||
|
|
||||||
|
|
||||||
async def verify_token(authorization: str = Header(None)) -> AuthContext:
|
async def verify_token(authorization: str = Header(None)) -> AuthContext:
|
||||||
"""Verify JWT Bearer token."""
|
"""Verify JWT Bearer token or return dev context if dev_mode is enabled."""
|
||||||
|
# Check if dev mode is enabled
|
||||||
|
if settings.dev_mode:
|
||||||
|
return AuthContext(
|
||||||
|
entity_type=EntityType(settings.dev_entity_type),
|
||||||
|
entity_id=settings.dev_entity_id,
|
||||||
|
permissions=set(settings.dev_permissions),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Normal token verification flow
|
||||||
if not authorization:
|
if not authorization:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=401,
|
status_code=401,
|
||||||
|
@ -9,7 +9,7 @@ from dotenv import load_dotenv
|
|||||||
class Settings(BaseSettings):
|
class Settings(BaseSettings):
|
||||||
"""DataBridge configuration settings."""
|
"""DataBridge configuration settings."""
|
||||||
|
|
||||||
# environment variables:
|
# Environment variables
|
||||||
JWT_SECRET_KEY: str
|
JWT_SECRET_KEY: str
|
||||||
POSTGRES_URI: Optional[str] = None
|
POSTGRES_URI: Optional[str] = None
|
||||||
MONGODB_URI: Optional[str] = None
|
MONGODB_URI: Optional[str] = None
|
||||||
@ -19,42 +19,45 @@ class Settings(BaseSettings):
|
|||||||
OPENAI_API_KEY: Optional[str] = None
|
OPENAI_API_KEY: Optional[str] = None
|
||||||
ANTHROPIC_API_KEY: Optional[str] = None
|
ANTHROPIC_API_KEY: Optional[str] = None
|
||||||
|
|
||||||
# configuration variables:
|
# API configuration
|
||||||
## api:
|
|
||||||
HOST: str
|
HOST: str
|
||||||
PORT: int
|
PORT: int
|
||||||
RELOAD: bool
|
RELOAD: bool
|
||||||
|
|
||||||
## auth:
|
# Auth configuration
|
||||||
JWT_ALGORITHM: str
|
JWT_ALGORITHM: str
|
||||||
|
dev_mode: bool = False
|
||||||
|
dev_entity_type: str = "developer"
|
||||||
|
dev_entity_id: str = "dev_user"
|
||||||
|
dev_permissions: list = ["read", "write", "admin"]
|
||||||
|
|
||||||
## completion:
|
# Completion configuration
|
||||||
COMPLETION_PROVIDER: Literal["ollama", "openai"]
|
COMPLETION_PROVIDER: Literal["ollama", "openai"]
|
||||||
COMPLETION_MODEL: str
|
COMPLETION_MODEL: str
|
||||||
COMPLETION_MAX_TOKENS: Optional[str] = None
|
COMPLETION_MAX_TOKENS: Optional[str] = None
|
||||||
COMPLETION_TEMPERATURE: Optional[float] = None
|
COMPLETION_TEMPERATURE: Optional[float] = None
|
||||||
COMPLETION_OLLAMA_BASE_URL: Optional[str] = None
|
COMPLETION_OLLAMA_BASE_URL: Optional[str] = None
|
||||||
|
|
||||||
## database
|
# Database configuration
|
||||||
DATABASE_PROVIDER: Literal["postgres", "mongodb"]
|
DATABASE_PROVIDER: Literal["postgres", "mongodb"]
|
||||||
DATABASE_NAME: Optional[str] = None
|
DATABASE_NAME: Optional[str] = None
|
||||||
DOCUMENTS_COLLECTION: Optional[str] = None
|
DOCUMENTS_COLLECTION: Optional[str] = None
|
||||||
|
|
||||||
## embedding
|
# Embedding configuration
|
||||||
EMBEDDING_PROVIDER: Literal["ollama", "openai"]
|
EMBEDDING_PROVIDER: Literal["ollama", "openai"]
|
||||||
EMBEDDING_MODEL: str
|
EMBEDDING_MODEL: str
|
||||||
VECTOR_DIMENSIONS: int
|
VECTOR_DIMENSIONS: int
|
||||||
EMBEDDING_SIMILARITY_METRIC: Literal["cosine", "dotProduct"]
|
EMBEDDING_SIMILARITY_METRIC: Literal["cosine", "dotProduct"]
|
||||||
EMBEDDING_OLLAMA_BASE_URL: Optional[str] = None
|
EMBEDDING_OLLAMA_BASE_URL: Optional[str] = None
|
||||||
|
|
||||||
## parser
|
# Parser configuration
|
||||||
PARSER_PROVIDER: Literal["unstructured", "combined", "contextual"]
|
PARSER_PROVIDER: Literal["unstructured", "combined", "contextual"]
|
||||||
CHUNK_SIZE: int
|
CHUNK_SIZE: int
|
||||||
CHUNK_OVERLAP: int
|
CHUNK_OVERLAP: int
|
||||||
USE_UNSTRUCTURED_API: bool
|
USE_UNSTRUCTURED_API: bool
|
||||||
FRAME_SAMPLE_RATE: Optional[int] = None
|
FRAME_SAMPLE_RATE: Optional[int] = None
|
||||||
|
|
||||||
## reranker
|
# Reranker configuration
|
||||||
USE_RERANKING: bool
|
USE_RERANKING: bool
|
||||||
RERANKER_PROVIDER: Optional[Literal["flag"]] = None
|
RERANKER_PROVIDER: Optional[Literal["flag"]] = None
|
||||||
RERANKER_MODEL: Optional[str] = None
|
RERANKER_MODEL: Optional[str] = None
|
||||||
@ -63,13 +66,13 @@ class Settings(BaseSettings):
|
|||||||
RERANKER_USE_FP16: Optional[bool] = None
|
RERANKER_USE_FP16: Optional[bool] = None
|
||||||
RERANKER_DEVICE: Optional[str] = None
|
RERANKER_DEVICE: Optional[str] = None
|
||||||
|
|
||||||
## storage
|
# Storage configuration
|
||||||
STORAGE_PROVIDER: Literal["local", "aws-s3"]
|
STORAGE_PROVIDER: Literal["local", "aws-s3"]
|
||||||
STORAGE_PATH: Optional[str] = None
|
STORAGE_PATH: Optional[str] = None
|
||||||
AWS_REGION: Optional[str] = None
|
AWS_REGION: Optional[str] = None
|
||||||
S3_BUCKET: Optional[str] = None
|
S3_BUCKET: Optional[str] = None
|
||||||
|
|
||||||
## vector store
|
# Vector store configuration
|
||||||
VECTOR_STORE_PROVIDER: Literal["pgvector", "mongodb"]
|
VECTOR_STORE_PROVIDER: Literal["pgvector", "mongodb"]
|
||||||
VECTOR_STORE_DATABASE_NAME: Optional[str] = None
|
VECTOR_STORE_DATABASE_NAME: Optional[str] = None
|
||||||
VECTOR_STORE_COLLECTION_NAME: Optional[str] = None
|
VECTOR_STORE_COLLECTION_NAME: Optional[str] = None
|
||||||
@ -95,9 +98,19 @@ def get_settings() -> Settings:
|
|||||||
# load auth config
|
# load auth config
|
||||||
auth_config = {
|
auth_config = {
|
||||||
"JWT_ALGORITHM": config["auth"]["jwt_algorithm"],
|
"JWT_ALGORITHM": config["auth"]["jwt_algorithm"],
|
||||||
"JWT_SECRET_KEY": os.environ["JWT_SECRET_KEY"],
|
"JWT_SECRET_KEY": os.environ.get(
|
||||||
|
"JWT_SECRET_KEY", "dev-secret-key"
|
||||||
|
), # Default for dev mode
|
||||||
|
"dev_mode": config["auth"].get("dev_mode", False),
|
||||||
|
"dev_entity_type": config["auth"].get("dev_entity_type", "developer"),
|
||||||
|
"dev_entity_id": config["auth"].get("dev_entity_id", "dev_user"),
|
||||||
|
"dev_permissions": config["auth"].get("dev_permissions", ["read", "write", "admin"]),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Only require JWT_SECRET_KEY in non-dev mode
|
||||||
|
if not auth_config["dev_mode"] and "JWT_SECRET_KEY" not in os.environ:
|
||||||
|
raise ValueError("JWT_SECRET_KEY is required when dev_mode is disabled")
|
||||||
|
|
||||||
# load completion config
|
# load completion config
|
||||||
completion_config = {
|
completion_config = {
|
||||||
"COMPLETION_PROVIDER": config["completion"]["provider"],
|
"COMPLETION_PROVIDER": config["completion"]["provider"],
|
||||||
|
@ -5,6 +5,10 @@ reload = true
|
|||||||
|
|
||||||
[auth]
|
[auth]
|
||||||
jwt_algorithm = "HS256"
|
jwt_algorithm = "HS256"
|
||||||
|
dev_mode = true # Enabled by default for easier local development
|
||||||
|
dev_entity_id = "dev_user" # Default dev user ID
|
||||||
|
dev_entity_type = "developer" # Default dev entity type
|
||||||
|
dev_permissions = ["read", "write", "admin"] # Default dev permissions
|
||||||
|
|
||||||
[completion]
|
[completion]
|
||||||
provider = "ollama"
|
provider = "ollama"
|
||||||
|
@ -21,25 +21,24 @@ class AsyncDataBridge:
|
|||||||
DataBridge client for document operations.
|
DataBridge client for document operations.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
uri (str): DataBridge URI in the format "databridge://<owner_id>:<token>@<host>"
|
uri (str, optional): DataBridge URI in format "databridge://<owner_id>:<token>@<host>".
|
||||||
|
If not provided, connects to http://localhost:8000 without authentication.
|
||||||
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
||||||
is_local (bool, optional): Whether to connect to a local server. Defaults to False.
|
is_local (bool, optional): Whether to connect to a local server. Defaults to False.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
```python
|
```python
|
||||||
async with AsyncDataBridge("databridge://owner_id:token@api.databridge.ai") as db:
|
# Without authentication
|
||||||
# Ingest text
|
async with AsyncDataBridge() as db:
|
||||||
doc = await db.ingest_text(
|
doc = await db.ingest_text("Sample content")
|
||||||
"Sample content",
|
|
||||||
metadata={"category": "sample"}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Query documents
|
# With authentication
|
||||||
results = await db.query("search query")
|
async with AsyncDataBridge("databridge://owner_id:token@api.databridge.ai") as db:
|
||||||
|
doc = await db.ingest_text("Sample content")
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, uri: str, timeout: int = 30, is_local: bool = False):
|
def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
|
||||||
self._timeout = timeout
|
self._timeout = timeout
|
||||||
self._client = (
|
self._client = (
|
||||||
httpx.AsyncClient(timeout=timeout)
|
httpx.AsyncClient(timeout=timeout)
|
||||||
@ -51,7 +50,12 @@ class AsyncDataBridge:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
self._is_local = is_local
|
self._is_local = is_local
|
||||||
self._setup_auth(uri)
|
|
||||||
|
if uri:
|
||||||
|
self._setup_auth(uri)
|
||||||
|
else:
|
||||||
|
self._base_url = "http://localhost:8000"
|
||||||
|
self._auth_token = None
|
||||||
|
|
||||||
def _setup_auth(self, uri: str) -> None:
|
def _setup_auth(self, uri: str) -> None:
|
||||||
"""Setup authentication from URI"""
|
"""Setup authentication from URI"""
|
||||||
@ -61,7 +65,7 @@ class AsyncDataBridge:
|
|||||||
|
|
||||||
# Split host and auth parts
|
# Split host and auth parts
|
||||||
auth, host = parsed.netloc.split("@")
|
auth, host = parsed.netloc.split("@")
|
||||||
self._owner_id, self._auth_token = auth.split(":")
|
_, self._auth_token = auth.split(":")
|
||||||
|
|
||||||
# Set base URL
|
# Set base URL
|
||||||
self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
|
self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
|
||||||
@ -76,8 +80,10 @@ class AsyncDataBridge:
|
|||||||
data: Optional[Dict[str, Any]] = None,
|
data: Optional[Dict[str, Any]] = None,
|
||||||
files: Optional[Dict[str, Any]] = None,
|
files: Optional[Dict[str, Any]] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Make authenticated HTTP request"""
|
"""Make HTTP request"""
|
||||||
headers = {"Authorization": f"Bearer {self._auth_token}"}
|
headers = {}
|
||||||
|
if self._auth_token: # Only add auth header if we have a token
|
||||||
|
headers["Authorization"] = f"Bearer {self._auth_token}"
|
||||||
|
|
||||||
if not files:
|
if not files:
|
||||||
headers["Content-Type"] = "application/json"
|
headers["Content-Type"] = "application/json"
|
||||||
|
@ -21,31 +21,33 @@ class DataBridge:
|
|||||||
DataBridge client for document operations.
|
DataBridge client for document operations.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
uri (str): DataBridge URI in the format "databridge://<owner_id>:<token>@<host>"
|
uri (str, optional): DataBridge URI in format "databridge://<owner_id>:<token>@<host>".
|
||||||
|
If not provided, connects to http://localhost:8000 without authentication.
|
||||||
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
||||||
is_local (bool, optional): Whether connecting to local development server. Defaults to False.
|
is_local (bool, optional): Whether connecting to local development server. Defaults to False.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
```python
|
```python
|
||||||
with DataBridge("databridge://owner_id:token@api.databridge.ai") as db:
|
# Without authentication
|
||||||
# Ingest text
|
db = DataBridge()
|
||||||
doc = db.ingest_text(
|
|
||||||
"Sample content",
|
|
||||||
metadata={"category": "sample"}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Query documents
|
# With authentication
|
||||||
results = db.query("search query")
|
db = DataBridge("databridge://owner_id:token@api.databridge.ai")
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, uri: str, timeout: int = 30, is_local: bool = False):
|
def __init__(self, uri: Optional[str] = None, timeout: int = 30, is_local: bool = False):
|
||||||
self._timeout = timeout
|
self._timeout = timeout
|
||||||
self._session = requests.Session()
|
self._session = requests.Session()
|
||||||
if is_local:
|
if is_local:
|
||||||
self._session.verify = False # Disable SSL for localhost
|
self._session.verify = False # Disable SSL for localhost
|
||||||
self._is_local = is_local
|
self._is_local = is_local
|
||||||
self._setup_auth(uri)
|
|
||||||
|
if uri:
|
||||||
|
self._setup_auth(uri)
|
||||||
|
else:
|
||||||
|
self._base_url = "http://localhost:8000"
|
||||||
|
self._auth_token = None
|
||||||
|
|
||||||
def _setup_auth(self, uri: str) -> None:
|
def _setup_auth(self, uri: str) -> None:
|
||||||
"""Setup authentication from URI"""
|
"""Setup authentication from URI"""
|
||||||
@ -55,7 +57,7 @@ class DataBridge:
|
|||||||
|
|
||||||
# Split host and auth parts
|
# Split host and auth parts
|
||||||
auth, host = parsed.netloc.split("@")
|
auth, host = parsed.netloc.split("@")
|
||||||
self._owner_id, self._auth_token = auth.split(":")
|
_, self._auth_token = auth.split(":")
|
||||||
|
|
||||||
# Set base URL
|
# Set base URL
|
||||||
self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
|
self._base_url = f"{'http' if self._is_local else 'https'}://{host}"
|
||||||
@ -70,8 +72,10 @@ class DataBridge:
|
|||||||
data: Optional[Dict[str, Any]] = None,
|
data: Optional[Dict[str, Any]] = None,
|
||||||
files: Optional[Dict[str, Any]] = None,
|
files: Optional[Dict[str, Any]] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Make authenticated HTTP request"""
|
"""Make HTTP request"""
|
||||||
headers = {"Authorization": f"Bearer {self._auth_token}"}
|
headers = {}
|
||||||
|
if self._auth_token: # Only add auth header if we have a token
|
||||||
|
headers["Authorization"] = f"Bearer {self._auth_token}"
|
||||||
|
|
||||||
if not files:
|
if not files:
|
||||||
headers["Content-Type"] = "application/json"
|
headers["Content-Type"] = "application/json"
|
||||||
|
42
shell.py
42
shell.py
@ -4,8 +4,12 @@ DataBridge interactive CLI.
|
|||||||
Assumes a DataBridge server is running.
|
Assumes a DataBridge server is running.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python shell.py <uri>
|
Without authentication (connects to localhost):
|
||||||
Example: python shell.py "http://test_user:token@localhost:8000"
|
python shell.py
|
||||||
|
|
||||||
|
With authentication:
|
||||||
|
python shell.py <uri>
|
||||||
|
Example: python shell.py "databridge://user:token@localhost:8000"
|
||||||
|
|
||||||
This provides the exact same interface as the Python SDK:
|
This provides the exact same interface as the Python SDK:
|
||||||
db.ingest_text("content", metadata={...})
|
db.ingest_text("content", metadata={...})
|
||||||
@ -18,7 +22,6 @@ import sys
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import time
|
import time
|
||||||
import requests
|
import requests
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
# Add local SDK to path before other imports
|
# Add local SDK to path before other imports
|
||||||
_SDK_PATH = str(Path(__file__).parent / "sdks" / "python")
|
_SDK_PATH = str(Path(__file__).parent / "sdks" / "python")
|
||||||
@ -29,20 +32,10 @@ from databridge import DataBridge # noqa: E402
|
|||||||
|
|
||||||
|
|
||||||
class DB:
|
class DB:
|
||||||
def __init__(self, uri: str):
|
def __init__(self, uri: str = None):
|
||||||
"""Initialize DataBridge with URI"""
|
"""Initialize DataBridge with optional URI"""
|
||||||
# Convert databridge:// to http:// for localhost
|
self._client = DataBridge(uri, is_local=True, timeout=1000)
|
||||||
if "localhost" in uri or "127.0.0.1" in uri:
|
self.base_url = "http://localhost:8000" # For health check only
|
||||||
uri = uri.replace("databridge://", "http://")
|
|
||||||
self.uri = uri
|
|
||||||
self.base_url = self._get_base_url(uri)
|
|
||||||
is_local = "localhost" in uri or "127.0.0.1" in uri
|
|
||||||
self._client = DataBridge(self.uri, is_local=is_local, timeout=1000)
|
|
||||||
|
|
||||||
def _get_base_url(self, uri: str) -> str:
|
|
||||||
"""Extract base URL from URI (removing auth if present)"""
|
|
||||||
parsed = urlparse(uri)
|
|
||||||
return f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
|
|
||||||
|
|
||||||
def check_health(self, max_retries=30, retry_interval=1) -> bool:
|
def check_health(self, max_retries=30, retry_interval=1) -> bool:
|
||||||
"""Check if DataBridge server is healthy with retries"""
|
"""Check if DataBridge server is healthy with retries"""
|
||||||
@ -131,20 +124,15 @@ class DB:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) != 2:
|
uri = sys.argv[1] if len(sys.argv) > 1 else None
|
||||||
print("Error: URI argument required")
|
db = DB(uri)
|
||||||
print(__doc__)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Create DB instance with provided URI
|
# Check server health
|
||||||
db = DB(sys.argv[1])
|
|
||||||
|
|
||||||
# Wait for server to be healthy
|
|
||||||
if not db.check_health():
|
if not db.check_health():
|
||||||
print("Error: Could not connect to DataBridge server after multiple attempts")
|
print("Error: Could not connect to DataBridge server")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print("\nSuccessfully connected to DataBridge server!")
|
print("\nConnected to DataBridge")
|
||||||
|
|
||||||
# Start an interactive Python shell with 'db' already imported
|
# Start an interactive Python shell with 'db' already imported
|
||||||
import code
|
import code
|
||||||
|
Loading…
x
Reference in New Issue
Block a user