mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
219 lines
7.8 KiB
Python
219 lines
7.8 KiB
Python
from datetime import UTC, datetime
|
|
import logging
|
|
from typing import Dict, List, Optional, Any
|
|
|
|
from motor.motor_asyncio import AsyncIOMotorClient
|
|
from pymongo import ReturnDocument
|
|
from pymongo.errors import PyMongoError
|
|
|
|
from .base_database import BaseDatabase
|
|
from ..models.documents import Document
|
|
from ..models.auth import AuthContext, EntityType
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class MongoDatabase(BaseDatabase):
|
|
"""MongoDB implementation for document metadata storage."""
|
|
|
|
def __init__(
|
|
self,
|
|
uri: str,
|
|
db_name: str,
|
|
collection_name: str,
|
|
):
|
|
"""Initialize MongoDB connection for document storage."""
|
|
self.client = AsyncIOMotorClient(uri)
|
|
self.db = self.client[db_name]
|
|
self.collection = self.db[collection_name]
|
|
|
|
async def initialize(self):
|
|
"""Initialize database indexes."""
|
|
try:
|
|
# Create indexes for common queries
|
|
await self.collection.create_index("external_id", unique=True)
|
|
await self.collection.create_index("owner.id")
|
|
await self.collection.create_index("access_control.readers")
|
|
await self.collection.create_index("access_control.writers")
|
|
await self.collection.create_index("access_control.admins")
|
|
await self.collection.create_index("system_metadata.created_at")
|
|
|
|
logger.info("MongoDB indexes created successfully")
|
|
return True
|
|
except PyMongoError as e:
|
|
logger.error(f"Error creating MongoDB indexes: {str(e)}")
|
|
return False
|
|
|
|
async def store_document(self, document: Document) -> bool:
|
|
"""Store document metadata."""
|
|
try:
|
|
doc_dict = document.model_dump()
|
|
|
|
# Ensure system metadata
|
|
doc_dict["system_metadata"]["created_at"] = datetime.now(UTC)
|
|
doc_dict["system_metadata"]["updated_at"] = datetime.now(UTC)
|
|
|
|
result = await self.collection.insert_one(doc_dict)
|
|
return bool(result.inserted_id)
|
|
|
|
except PyMongoError as e:
|
|
logger.error(f"Error storing document metadata: {str(e)}")
|
|
return False
|
|
|
|
async def get_document(self, document_id: str, auth: AuthContext) -> Optional[Document]:
|
|
"""Retrieve document metadata by ID if user has access."""
|
|
try:
|
|
# Build access filter
|
|
access_filter = self._build_access_filter(auth)
|
|
|
|
# Query document
|
|
query = {"$and": [{"external_id": document_id}, access_filter]}
|
|
logger.debug(f"Querying document with query: {query}")
|
|
|
|
doc_dict = await self.collection.find_one(query)
|
|
logger.debug(f"Found document: {doc_dict}")
|
|
return Document(**doc_dict) if doc_dict else None
|
|
|
|
except PyMongoError as e:
|
|
logger.error(f"Error retrieving document metadata: {str(e)}")
|
|
raise e
|
|
|
|
async def get_documents(
|
|
self,
|
|
auth: AuthContext,
|
|
skip: int = 0,
|
|
limit: int = 100,
|
|
filters: Optional[Dict[str, Any]] = None,
|
|
) -> List[Document]:
|
|
"""List accessible documents with pagination and filtering."""
|
|
try:
|
|
# Build query
|
|
auth_filter = self._build_access_filter(auth)
|
|
metadata_filter = self._build_metadata_filter(filters)
|
|
query = {"$and": [auth_filter, metadata_filter]} if metadata_filter else auth_filter
|
|
|
|
# Execute paginated query
|
|
cursor = self.collection.find(query).skip(skip).limit(limit)
|
|
|
|
documents = []
|
|
async for doc_dict in cursor:
|
|
documents.append(Document(**doc_dict))
|
|
|
|
return documents
|
|
|
|
except PyMongoError as e:
|
|
logger.error(f"Error listing documents: {str(e)}")
|
|
return []
|
|
|
|
async def update_document(
|
|
self, document_id: str, updates: Dict[str, Any], auth: AuthContext
|
|
) -> bool:
|
|
"""Update document metadata if user has write access."""
|
|
try:
|
|
# Verify write access
|
|
if not await self.check_access(document_id, auth, "write"):
|
|
return False
|
|
|
|
# Update system metadata
|
|
updates.setdefault("system_metadata", {})
|
|
updates["system_metadata"]["updated_at"] = datetime.utcnow()
|
|
|
|
result = await self.collection.find_one_and_update(
|
|
{"external_id": document_id},
|
|
{"$set": updates},
|
|
return_document=ReturnDocument.AFTER,
|
|
)
|
|
|
|
return bool(result)
|
|
|
|
except PyMongoError as e:
|
|
logger.error(f"Error updating document metadata: {str(e)}")
|
|
return False
|
|
|
|
async def delete_document(self, document_id: str, auth: AuthContext) -> bool:
|
|
"""Delete document if user has admin access."""
|
|
try:
|
|
# Verify admin access
|
|
if not await self.check_access(document_id, auth, "admin"):
|
|
return False
|
|
|
|
result = await self.collection.delete_one({"external_id": document_id})
|
|
return bool(result.deleted_count)
|
|
|
|
except PyMongoError as e:
|
|
logger.error(f"Error deleting document: {str(e)}")
|
|
return False
|
|
|
|
async def find_authorized_and_filtered_documents(
|
|
self, auth: AuthContext, filters: Optional[Dict[str, Any]] = None
|
|
) -> List[str]:
|
|
"""Find document IDs matching filters and access permissions."""
|
|
# Build query
|
|
auth_filter = self._build_access_filter(auth)
|
|
metadata_filter = self._build_metadata_filter(filters)
|
|
query = {"$and": [auth_filter, metadata_filter]} if metadata_filter else auth_filter
|
|
|
|
# Get matching document IDs
|
|
cursor = self.collection.find(query, {"external_id": 1})
|
|
|
|
document_ids = []
|
|
async for doc in cursor:
|
|
document_ids.append(doc["external_id"])
|
|
|
|
return document_ids
|
|
|
|
async def check_access(
|
|
self, document_id: str, auth: AuthContext, required_permission: str = "read"
|
|
) -> bool:
|
|
"""Check if user has required permission for document."""
|
|
try:
|
|
doc = await self.collection.find_one({"external_id": document_id})
|
|
if not doc:
|
|
return False
|
|
|
|
access_control = doc.get("access_control", {})
|
|
|
|
# Check owner access
|
|
owner = doc.get("owner", {})
|
|
if owner.get("type") == auth.entity_type and owner.get("id") == auth.entity_id:
|
|
return True
|
|
|
|
# Check permission-specific access
|
|
permission_map = {"read": "readers", "write": "writers", "admin": "admins"}
|
|
|
|
permission_set = permission_map.get(required_permission)
|
|
if not permission_set:
|
|
return False
|
|
|
|
return auth.entity_id in access_control.get(permission_set, set())
|
|
|
|
except PyMongoError as e:
|
|
logger.error(f"Error checking document access: {str(e)}")
|
|
return False
|
|
|
|
def _build_access_filter(self, auth: AuthContext) -> Dict[str, Any]:
|
|
"""Build MongoDB filter for access control."""
|
|
base_filter = {
|
|
"$or": [
|
|
{"owner.id": auth.entity_id},
|
|
{"access_control.readers": auth.entity_id},
|
|
{"access_control.writers": auth.entity_id},
|
|
{"access_control.admins": auth.entity_id},
|
|
]
|
|
}
|
|
|
|
if auth.entity_type == EntityType.DEVELOPER:
|
|
# Add app-specific access for developers
|
|
base_filter["$or"].append({"access_control.app_access": auth.app_id})
|
|
|
|
return base_filter
|
|
|
|
def _build_metadata_filter(self, filters: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Build MongoDB filter for metadata."""
|
|
if not filters:
|
|
return {}
|
|
filter_dict = {}
|
|
for key, value in filters.items():
|
|
filter_dict[f"metadata.{key}"] = value
|
|
return filter_dict
|