mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
75 lines
2.0 KiB
Python
75 lines
2.0 KiB
Python
from datetime import datetime, timedelta, UTC # Note: using UTC for timezone awareness
|
|
import base64
|
|
from databridge import DataBridge
|
|
import jwt
|
|
import os
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
def create_databridge_uri() -> str:
|
|
"""Create DataBridge URI from environment variables"""
|
|
load_dotenv()
|
|
|
|
# Get credentials from environment
|
|
mongo_uri = os.getenv("MONGODB_URI")
|
|
openai_key = os.getenv("OPENAI_API_KEY")
|
|
unstructured_key = os.getenv("UNSTRUCTURED_API_KEY")
|
|
owner_id = os.getenv("DATABRIDGE_OWNER", "admin")
|
|
|
|
# Validate required credentials
|
|
if not all([mongo_uri, openai_key, unstructured_key]):
|
|
raise ValueError("Missing required environment variables")
|
|
|
|
# Generate auth token
|
|
auth_token = jwt.encode(
|
|
{
|
|
'owner_id': owner_id,
|
|
'exp': datetime.now(UTC) + timedelta(days=30)
|
|
},
|
|
'your-secret-key', # In production, use proper secret
|
|
algorithm='HS256'
|
|
)
|
|
|
|
# For DataBridge URI, use any host identifier (it won't affect MongoDB connection)
|
|
uri = (
|
|
f"databridge://{owner_id}:{auth_token}@databridge.local"
|
|
f"?openai_key={openai_key}"
|
|
f"&unstructured_key={unstructured_key}"
|
|
f"&db=brandsyncaidb"
|
|
f"&collection=kb_chunked_embeddings"
|
|
)
|
|
|
|
return uri
|
|
|
|
|
|
async def main():
|
|
# Initialize DataBridge
|
|
bridge = DataBridge(create_databridge_uri())
|
|
|
|
# Example: Ingest a PDF document
|
|
with open("sample.pdf", "rb") as f:
|
|
pdf_content = base64.b64encode(f.read()).decode()
|
|
|
|
await bridge.ingest_document(
|
|
content=pdf_content,
|
|
metadata={
|
|
"content_type": "application/pdf",
|
|
"is_base64": True,
|
|
"title": "Sample PDF"
|
|
}
|
|
)
|
|
|
|
# Example: Query documents
|
|
results = await bridge.query(
|
|
query="What is machine learning?",
|
|
k=4
|
|
)
|
|
|
|
for result in results:
|
|
print(f"Content: {result['content'][:200]}...")
|
|
print(f"Score: {result['score']}\n")
|
|
|
|
if __name__ == "__main__":
|
|
import asyncio
|
|
asyncio.run(main())
|