Update .gitignore, morphik.toml, and database schema; enhance graph service access control

This commit is contained in:
Adityavardhan Agrawal 2025-04-22 02:19:34 -07:00
parent 6bd7077eec
commit ea95b2647c
5 changed files with 48 additions and 22 deletions

4
.gitignore vendored
View File

@ -33,7 +33,9 @@ offload/*
test.pdf
experiments/*
ui-component/package-lock.json
ee/ui-component/package-lock.json/*
ee/ui-component/node-modules/*
ee/ui-component/.next
ui-component/notebook-storage/notebooks.json

View File

@ -49,7 +49,7 @@ class GraphModel(Base):
__tablename__ = "graphs"
id = Column(String, primary_key=True)
name = Column(String, unique=True, index=True)
name = Column(String, index=True) # Not unique globally anymore
entities = Column(JSONB, default=list)
relationships = Column(JSONB, default=list)
graph_metadata = Column(JSONB, default=dict) # Renamed from 'metadata' to avoid conflict
@ -67,6 +67,8 @@ class GraphModel(Base):
Index("idx_graph_owner", "owner", postgresql_using="gin"),
Index("idx_graph_access_control", "access_control", postgresql_using="gin"),
Index("idx_graph_system_metadata", "system_metadata", postgresql_using="gin"),
# Create a unique constraint on name scoped by owner ID
Index("idx_graph_owner_name", "name", text("(owner->>'id')"), unique=True),
)

View File

@ -79,7 +79,8 @@ class GraphService:
additional_filters: Optional additional metadata filters to determine which new documents to include
additional_documents: Optional list of specific additional document IDs to include
prompt_overrides: Optional GraphPromptOverrides with customizations for prompts
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id) to determine which documents to include
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id)
to determine which documents to include
Returns:
Graph: The updated graph
@ -91,8 +92,8 @@ class GraphService:
if "write" not in auth.permissions:
raise PermissionError("User does not have write permission")
# Get the existing graph
existing_graph = await self.db.get_graph(name, auth)
# Get the existing graph with system filters for proper user_id scoping
existing_graph = await self.db.get_graph(name, auth, system_filters=system_filters)
if not existing_graph:
raise ValueError(f"Graph '{name}' not found")
@ -403,7 +404,8 @@ class GraphService:
filters: Optional metadata filters to determine which documents to include
documents: Optional list of specific document IDs to include
prompt_overrides: Optional GraphPromptOverrides with customizations for prompts
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id) to determine which documents to include
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id)
to determine which documents to include
Returns:
Graph: The created graph
@ -444,16 +446,26 @@ class GraphService:
# Validation is now handled by type annotations
# Create a new graph with authorization info
access_control = {
"readers": [auth.entity_id],
"writers": [auth.entity_id],
"admins": [auth.entity_id],
}
# Add user_id to access_control if present (for proper user_id scoping)
if auth.user_id:
# User ID must be provided as a list to match the Graph model's type constraints
access_control["user_id"] = [auth.user_id]
# Ensure entity_type is a string value for storage
entity_type = auth.entity_type.value if hasattr(auth.entity_type, "value") else auth.entity_type
graph = Graph(
name=name,
document_ids=[doc.external_id for doc in document_objects],
filters=filters,
owner={"type": auth.entity_type, "id": auth.entity_id},
access_control={
"readers": [auth.entity_id],
"writers": [auth.entity_id],
"admins": [auth.entity_id],
},
owner={"type": entity_type, "id": auth.entity_id},
access_control=access_control,
)
# Add folder_name and end_user_id to system_metadata if provided
@ -727,17 +739,26 @@ class GraphService:
serialized_examples = custom_examples
examples_json = {"entities": serialized_examples}
examples_str = f"\nHere are some examples of the kind of entities to extract:\n```json\n{json.dumps(examples_json, indent=2)}\n```\n"
examples_str = (
f"\nHere are some examples of the kind of entities to extract:\n```json\n"
f"{json.dumps(examples_json, indent=2)}\n```\n"
)
# Modify the system message to handle properties as a string that will be parsed later
system_message = {
"role": "system",
"content": (
"You are an entity extraction and relationship extraction assistant. Extract entities and their relationships from text precisely and thoroughly, extract as many entities and relationships as possible. "
"For entities, include entity label and type (some examples: PERSON, ORGANIZATION, LOCATION, CONCEPT, etc.). If the user has given examples, use those, these are just suggestions"
"For relationships, use a simple format with source, target, and relationship fields. Be very through, there are many relationships that are not obvious"
"IMPORTANT: The source and target fields must be simple strings representing entity labels. For example: "
"if you extract entities 'Entity A' and 'Entity B', a relationship would have source: 'Entity A', target: 'Entity B', relationship: 'relates to'. "
"You are an entity extraction and relationship extraction assistant. Extract entities and "
"their relationships from text precisely and thoroughly, extract as many entities and "
"relationships as possible. "
"For entities, include entity label and type (some examples: PERSON, ORGANIZATION, LOCATION, "
"CONCEPT, etc.). If the user has given examples, use those, these are just suggestions"
"For relationships, use a simple format with source, target, and relationship fields. "
"Be very through, there are many relationships that are not obvious"
"IMPORTANT: The source and target fields must be simple strings representing "
"entity labels. For example: "
"if you extract entities 'Entity A' and 'Entity B', a relationship would have source: 'Entity A', "
"target: 'Entity B', relationship: 'relates to'. "
"Respond directly in json format, without any additional text or explanations. "
),
}
@ -757,7 +778,8 @@ class GraphService:
"For relationships, specify the source entity, target entity, and the relationship between them. "
"The source and target must be simple strings matching the entity labels, not objects. "
f"{examples_str}"
'Sample relationship format: {"source": "Entity A", "target": "Entity B", "relationship": "works for"}\n\n'
'Sample relationship format: {"source": "Entity A", "target": "Entity B", '
'"relationship": "works for"}\n\n'
"Return your response as valid JSON:\n\n" + content_limited
),
}
@ -819,7 +841,8 @@ class GraphService:
# Process extraction results
entities, relationships = self._process_extraction_results(extraction_result, doc_id, chunk_number)
logger.info(
f"Extracted {len(entities)} entities and {len(relationships)} relationships from document {doc_id}, chunk {chunk_number}"
f"Extracted {len(entities)} entities and {len(relationships)} relationships from document "
f"{doc_id}, chunk {chunk_number}"
)
return entities, relationships

View File

@ -528,7 +528,6 @@ class TelemetryService:
export_timeout_millis=OTLP_TIMEOUT * 1000,
)
)
print(f"Successfully configured Honeycomb metrics exporter to {OTLP_METRICS_ENDPOINT}")
except Exception as e:
print(f"Failed to configure Honeycomb metrics exporter: {str(e)}")

View File

@ -111,7 +111,7 @@ mode = "self_hosted" # "cloud" or "self_hosted"
api_domain = "api.morphik.ai" # API domain for cloud URIs
[redis]
host = "localhost"
host = "localhost" # use "redis" for docker
port = 6379
[graph]