morphik-core/simple_example.py

75 lines
2.0 KiB
Python

from datetime import datetime, timedelta, UTC # Note: using UTC for timezone awareness
import base64
from databridge import DataBridge
import jwt
import os
from dotenv import load_dotenv
def create_databridge_uri() -> str:
"""Create DataBridge URI from environment variables"""
load_dotenv()
# Get credentials from environment
mongo_uri = os.getenv("MONGODB_URI")
openai_key = os.getenv("OPENAI_API_KEY")
unstructured_key = os.getenv("UNSTRUCTURED_API_KEY")
owner_id = os.getenv("DATABRIDGE_OWNER", "admin")
# Validate required credentials
if not all([mongo_uri, openai_key, unstructured_key]):
raise ValueError("Missing required environment variables")
# Generate auth token
auth_token = jwt.encode(
{
'owner_id': owner_id,
'exp': datetime.now(UTC) + timedelta(days=30)
},
'your-secret-key', # In production, use proper secret
algorithm='HS256'
)
# For DataBridge URI, use any host identifier (it won't affect MongoDB connection)
uri = (
f"databridge://{owner_id}:{auth_token}@databridge.local"
f"?openai_key={openai_key}"
f"&unstructured_key={unstructured_key}"
f"&db=brandsyncaidb"
f"&collection=kb_chunked_embeddings"
)
return uri
async def main():
# Initialize DataBridge
bridge = DataBridge(create_databridge_uri())
# Example: Ingest a PDF document
with open("sample.pdf", "rb") as f:
pdf_content = base64.b64encode(f.read()).decode()
await bridge.ingest_document(
content=pdf_content,
metadata={
"content_type": "application/pdf",
"is_base64": True,
"title": "Sample PDF"
}
)
# Example: Query documents
results = await bridge.query(
query="What is machine learning?",
k=4
)
for result in results:
print(f"Content: {result['content'][:200]}...")
print(f"Score: {result['score']}\n")
if __name__ == "__main__":
import asyncio
asyncio.run(main())