mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
Update .gitignore, morphik.toml, and database schema; enhance graph service access control
This commit is contained in:
parent
6bd7077eec
commit
ea95b2647c
4
.gitignore
vendored
4
.gitignore
vendored
@ -33,7 +33,9 @@ offload/*
|
||||
test.pdf
|
||||
|
||||
experiments/*
|
||||
ui-component/package-lock.json
|
||||
ee/ui-component/package-lock.json/*
|
||||
ee/ui-component/node-modules/*
|
||||
ee/ui-component/.next
|
||||
|
||||
|
||||
ui-component/notebook-storage/notebooks.json
|
||||
|
@ -49,7 +49,7 @@ class GraphModel(Base):
|
||||
__tablename__ = "graphs"
|
||||
|
||||
id = Column(String, primary_key=True)
|
||||
name = Column(String, unique=True, index=True)
|
||||
name = Column(String, index=True) # Not unique globally anymore
|
||||
entities = Column(JSONB, default=list)
|
||||
relationships = Column(JSONB, default=list)
|
||||
graph_metadata = Column(JSONB, default=dict) # Renamed from 'metadata' to avoid conflict
|
||||
@ -67,6 +67,8 @@ class GraphModel(Base):
|
||||
Index("idx_graph_owner", "owner", postgresql_using="gin"),
|
||||
Index("idx_graph_access_control", "access_control", postgresql_using="gin"),
|
||||
Index("idx_graph_system_metadata", "system_metadata", postgresql_using="gin"),
|
||||
# Create a unique constraint on name scoped by owner ID
|
||||
Index("idx_graph_owner_name", "name", text("(owner->>'id')"), unique=True),
|
||||
)
|
||||
|
||||
|
||||
|
@ -79,7 +79,8 @@ class GraphService:
|
||||
additional_filters: Optional additional metadata filters to determine which new documents to include
|
||||
additional_documents: Optional list of specific additional document IDs to include
|
||||
prompt_overrides: Optional GraphPromptOverrides with customizations for prompts
|
||||
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id) to determine which documents to include
|
||||
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id)
|
||||
to determine which documents to include
|
||||
|
||||
Returns:
|
||||
Graph: The updated graph
|
||||
@ -91,8 +92,8 @@ class GraphService:
|
||||
if "write" not in auth.permissions:
|
||||
raise PermissionError("User does not have write permission")
|
||||
|
||||
# Get the existing graph
|
||||
existing_graph = await self.db.get_graph(name, auth)
|
||||
# Get the existing graph with system filters for proper user_id scoping
|
||||
existing_graph = await self.db.get_graph(name, auth, system_filters=system_filters)
|
||||
if not existing_graph:
|
||||
raise ValueError(f"Graph '{name}' not found")
|
||||
|
||||
@ -403,7 +404,8 @@ class GraphService:
|
||||
filters: Optional metadata filters to determine which documents to include
|
||||
documents: Optional list of specific document IDs to include
|
||||
prompt_overrides: Optional GraphPromptOverrides with customizations for prompts
|
||||
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id) to determine which documents to include
|
||||
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id)
|
||||
to determine which documents to include
|
||||
|
||||
Returns:
|
||||
Graph: The created graph
|
||||
@ -444,16 +446,26 @@ class GraphService:
|
||||
# Validation is now handled by type annotations
|
||||
|
||||
# Create a new graph with authorization info
|
||||
access_control = {
|
||||
"readers": [auth.entity_id],
|
||||
"writers": [auth.entity_id],
|
||||
"admins": [auth.entity_id],
|
||||
}
|
||||
|
||||
# Add user_id to access_control if present (for proper user_id scoping)
|
||||
if auth.user_id:
|
||||
# User ID must be provided as a list to match the Graph model's type constraints
|
||||
access_control["user_id"] = [auth.user_id]
|
||||
|
||||
# Ensure entity_type is a string value for storage
|
||||
entity_type = auth.entity_type.value if hasattr(auth.entity_type, "value") else auth.entity_type
|
||||
|
||||
graph = Graph(
|
||||
name=name,
|
||||
document_ids=[doc.external_id for doc in document_objects],
|
||||
filters=filters,
|
||||
owner={"type": auth.entity_type, "id": auth.entity_id},
|
||||
access_control={
|
||||
"readers": [auth.entity_id],
|
||||
"writers": [auth.entity_id],
|
||||
"admins": [auth.entity_id],
|
||||
},
|
||||
owner={"type": entity_type, "id": auth.entity_id},
|
||||
access_control=access_control,
|
||||
)
|
||||
|
||||
# Add folder_name and end_user_id to system_metadata if provided
|
||||
@ -727,17 +739,26 @@ class GraphService:
|
||||
serialized_examples = custom_examples
|
||||
|
||||
examples_json = {"entities": serialized_examples}
|
||||
examples_str = f"\nHere are some examples of the kind of entities to extract:\n```json\n{json.dumps(examples_json, indent=2)}\n```\n"
|
||||
examples_str = (
|
||||
f"\nHere are some examples of the kind of entities to extract:\n```json\n"
|
||||
f"{json.dumps(examples_json, indent=2)}\n```\n"
|
||||
)
|
||||
|
||||
# Modify the system message to handle properties as a string that will be parsed later
|
||||
system_message = {
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are an entity extraction and relationship extraction assistant. Extract entities and their relationships from text precisely and thoroughly, extract as many entities and relationships as possible. "
|
||||
"For entities, include entity label and type (some examples: PERSON, ORGANIZATION, LOCATION, CONCEPT, etc.). If the user has given examples, use those, these are just suggestions"
|
||||
"For relationships, use a simple format with source, target, and relationship fields. Be very through, there are many relationships that are not obvious"
|
||||
"IMPORTANT: The source and target fields must be simple strings representing entity labels. For example: "
|
||||
"if you extract entities 'Entity A' and 'Entity B', a relationship would have source: 'Entity A', target: 'Entity B', relationship: 'relates to'. "
|
||||
"You are an entity extraction and relationship extraction assistant. Extract entities and "
|
||||
"their relationships from text precisely and thoroughly, extract as many entities and "
|
||||
"relationships as possible. "
|
||||
"For entities, include entity label and type (some examples: PERSON, ORGANIZATION, LOCATION, "
|
||||
"CONCEPT, etc.). If the user has given examples, use those, these are just suggestions"
|
||||
"For relationships, use a simple format with source, target, and relationship fields. "
|
||||
"Be very through, there are many relationships that are not obvious"
|
||||
"IMPORTANT: The source and target fields must be simple strings representing "
|
||||
"entity labels. For example: "
|
||||
"if you extract entities 'Entity A' and 'Entity B', a relationship would have source: 'Entity A', "
|
||||
"target: 'Entity B', relationship: 'relates to'. "
|
||||
"Respond directly in json format, without any additional text or explanations. "
|
||||
),
|
||||
}
|
||||
@ -757,7 +778,8 @@ class GraphService:
|
||||
"For relationships, specify the source entity, target entity, and the relationship between them. "
|
||||
"The source and target must be simple strings matching the entity labels, not objects. "
|
||||
f"{examples_str}"
|
||||
'Sample relationship format: {"source": "Entity A", "target": "Entity B", "relationship": "works for"}\n\n'
|
||||
'Sample relationship format: {"source": "Entity A", "target": "Entity B", '
|
||||
'"relationship": "works for"}\n\n'
|
||||
"Return your response as valid JSON:\n\n" + content_limited
|
||||
),
|
||||
}
|
||||
@ -819,7 +841,8 @@ class GraphService:
|
||||
# Process extraction results
|
||||
entities, relationships = self._process_extraction_results(extraction_result, doc_id, chunk_number)
|
||||
logger.info(
|
||||
f"Extracted {len(entities)} entities and {len(relationships)} relationships from document {doc_id}, chunk {chunk_number}"
|
||||
f"Extracted {len(entities)} entities and {len(relationships)} relationships from document "
|
||||
f"{doc_id}, chunk {chunk_number}"
|
||||
)
|
||||
return entities, relationships
|
||||
|
||||
|
@ -528,7 +528,6 @@ class TelemetryService:
|
||||
export_timeout_millis=OTLP_TIMEOUT * 1000,
|
||||
)
|
||||
)
|
||||
print(f"Successfully configured Honeycomb metrics exporter to {OTLP_METRICS_ENDPOINT}")
|
||||
except Exception as e:
|
||||
print(f"Failed to configure Honeycomb metrics exporter: {str(e)}")
|
||||
|
||||
|
@ -111,7 +111,7 @@ mode = "self_hosted" # "cloud" or "self_hosted"
|
||||
api_domain = "api.morphik.ai" # API domain for cloud URIs
|
||||
|
||||
[redis]
|
||||
host = "localhost"
|
||||
host = "localhost" # use "redis" for docker
|
||||
port = 6379
|
||||
|
||||
[graph]
|
||||
|
Loading…
x
Reference in New Issue
Block a user