Update .gitignore, morphik.toml, and database schema; enhance graph service access control

2025-05-09 19:32:38 +00:00 · 2025-04-22 02:19:34 -07:00 · 2025-04-22 02:19:34 -07:00 · ea95b2647c
commit ea95b2647c
parent 6bd7077eec
5 changed files with 48 additions and 22 deletions
--- a/.gitignore
+++ b/.gitignore
@ -33,7 +33,9 @@ offload/*
 test.pdf
 experiments/*
-ui-component/package-lock.json
+ee/ui-component/package-lock.json/*
 ee/ui-component/node-modules/*
 ee/ui-component/.next
 ui-component/notebook-storage/notebooks.json
--- a/core/database/postgres_database.py
+++ b/core/database/postgres_database.py
@ -49,7 +49,7 @@ class GraphModel(Base):
    __tablename__ = "graphs"
    id = Column(String, primary_key=True)
-    name = Column(String, unique=True, index=True)
+    name = Column(String, index=True)  # Not unique globally anymore
    entities = Column(JSONB, default=list)
    relationships = Column(JSONB, default=list)
    graph_metadata = Column(JSONB, default=dict)  # Renamed from 'metadata' to avoid conflict
@ -67,6 +67,8 @@ class GraphModel(Base):
        Index("idx_graph_owner", "owner", postgresql_using="gin"),
        Index("idx_graph_access_control", "access_control", postgresql_using="gin"),
        Index("idx_graph_system_metadata", "system_metadata", postgresql_using="gin"),
        # Create a unique constraint on name scoped by owner ID
        Index("idx_graph_owner_name", "name", text("(owner->>'id')"), unique=True),
    )
--- a/core/services/graph_service.py
+++ b/core/services/graph_service.py
@ -79,7 +79,8 @@ class GraphService:
            additional_filters: Optional additional metadata filters to determine which new documents to include
            additional_documents: Optional list of specific additional document IDs to include
            prompt_overrides: Optional GraphPromptOverrides with customizations for prompts
-            system_filters: Optional system metadata filters (e.g. folder_name, end_user_id) to determine which documents to include
+            system_filters: Optional system metadata filters (e.g. folder_name, end_user_id)
            to determine which documents to include
        Returns:
            Graph: The updated graph
@ -91,8 +92,8 @@ class GraphService:
        if "write" not in auth.permissions:
            raise PermissionError("User does not have write permission")
-        # Get the existing graph
+        # Get the existing graph with system filters for proper user_id scoping
-        existing_graph = await self.db.get_graph(name, auth)
+        existing_graph = await self.db.get_graph(name, auth, system_filters=system_filters)
        if not existing_graph:
            raise ValueError(f"Graph '{name}' not found")
@ -403,7 +404,8 @@ class GraphService:
            filters: Optional metadata filters to determine which documents to include
            documents: Optional list of specific document IDs to include
            prompt_overrides: Optional GraphPromptOverrides with customizations for prompts
-            system_filters: Optional system metadata filters (e.g. folder_name, end_user_id) to determine which documents to include
+            system_filters: Optional system metadata filters (e.g. folder_name, end_user_id)
            to determine which documents to include
        Returns:
            Graph: The created graph
@ -444,16 +446,26 @@ class GraphService:
        # Validation is now handled by type annotations
        # Create a new graph with authorization info
        access_control = {
            "readers": [auth.entity_id],
            "writers": [auth.entity_id],
            "admins": [auth.entity_id],
        }
        # Add user_id to access_control if present (for proper user_id scoping)
        if auth.user_id:
            # User ID must be provided as a list to match the Graph model's type constraints
            access_control["user_id"] = [auth.user_id]
        # Ensure entity_type is a string value for storage
        entity_type = auth.entity_type.value if hasattr(auth.entity_type, "value") else auth.entity_type
        graph = Graph(
            name=name,
            document_ids=[doc.external_id for doc in document_objects],
            filters=filters,
-            owner={"type": auth.entity_type, "id": auth.entity_id},
+            owner={"type": entity_type, "id": auth.entity_id},
-            access_control={
+            access_control=access_control,
                "readers": [auth.entity_id],
                "writers": [auth.entity_id],
                "admins": [auth.entity_id],
            },
        )
        # Add folder_name and end_user_id to system_metadata if provided
@ -727,17 +739,26 @@ class GraphService:
                serialized_examples = custom_examples
            examples_json = {"entities": serialized_examples}
-            examples_str = f"\nHere are some examples of the kind of entities to extract:\n```json\n{json.dumps(examples_json, indent=2)}\n```\n"
+            examples_str = (
                f"\nHere are some examples of the kind of entities to extract:\n```json\n"
                f"{json.dumps(examples_json, indent=2)}\n```\n"
            )
        # Modify the system message to handle properties as a string that will be parsed later
        system_message = {
            "role": "system",
            "content": (
-                "You are an entity extraction and relationship extraction assistant. Extract entities and their relationships from text precisely and thoroughly, extract as many entities and relationships as possible. "
+                "You are an entity extraction and relationship extraction assistant. Extract entities and "
-                "For entities, include entity label and type (some examples: PERSON, ORGANIZATION, LOCATION, CONCEPT, etc.). If the user has given examples, use those, these are just suggestions"
+                "their relationships from text precisely and thoroughly, extract as many entities and "
-                "For relationships, use a simple format with source, target, and relationship fields. Be very through, there are many relationships that are not obvious"
+                "relationships as possible. "
-                "IMPORTANT: The source and target fields must be simple strings representing entity labels. For example: "
+                "For entities, include entity label and type (some examples: PERSON, ORGANIZATION, LOCATION, "
-                "if you extract entities 'Entity A' and 'Entity B', a relationship would have source: 'Entity A', target: 'Entity B', relationship: 'relates to'. "
+                "CONCEPT, etc.). If the user has given examples, use those, these are just suggestions"
                "For relationships, use a simple format with source, target, and relationship fields. "
                "Be very through, there are many relationships that are not obvious"
                "IMPORTANT: The source and target fields must be simple strings representing "
                "entity labels. For example: "
                "if you extract entities 'Entity A' and 'Entity B', a relationship would have source: 'Entity A', "
                "target: 'Entity B', relationship: 'relates to'. "
                "Respond directly in json format, without any additional text or explanations. "
            ),
        }
@ -757,7 +778,8 @@ class GraphService:
                    "For relationships, specify the source entity, target entity, and the relationship between them. "
                    "The source and target must be simple strings matching the entity labels, not objects. "
                    f"{examples_str}"
-                    'Sample relationship format: {"source": "Entity A", "target": "Entity B", "relationship": "works for"}\n\n'
+                    'Sample relationship format: {"source": "Entity A", "target": "Entity B", '
                    '"relationship": "works for"}\n\n'
                    "Return your response as valid JSON:\n\n" + content_limited
                ),
            }
@ -819,7 +841,8 @@ class GraphService:
        # Process extraction results
        entities, relationships = self._process_extraction_results(extraction_result, doc_id, chunk_number)
        logger.info(
-            f"Extracted {len(entities)} entities and {len(relationships)} relationships from document {doc_id}, chunk {chunk_number}"
+            f"Extracted {len(entities)} entities and {len(relationships)} relationships from document "
            f"{doc_id}, chunk {chunk_number}"
        )
        return entities, relationships
--- a/core/services/telemetry.py
+++ b/core/services/telemetry.py
@ -528,7 +528,6 @@ class TelemetryService:
                        export_timeout_millis=OTLP_TIMEOUT * 1000,
                    )
                )
                print(f"Successfully configured Honeycomb metrics exporter to {OTLP_METRICS_ENDPOINT}")
            except Exception as e:
                print(f"Failed to configure Honeycomb metrics exporter: {str(e)}")
--- a/morphik.toml
+++ b/morphik.toml
@ -111,7 +111,7 @@ mode = "self_hosted"  # "cloud" or "self_hosted"
 api_domain = "api.morphik.ai"  # API domain for cloud URIs
 [redis]
-host = "localhost"
+host = "localhost"  # use "redis" for docker
 port = 6379
 [graph]