mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
Update .gitignore, morphik.toml, and database schema; enhance graph service access control
This commit is contained in:
parent
6bd7077eec
commit
ea95b2647c
4
.gitignore
vendored
4
.gitignore
vendored
@ -33,7 +33,9 @@ offload/*
|
|||||||
test.pdf
|
test.pdf
|
||||||
|
|
||||||
experiments/*
|
experiments/*
|
||||||
ui-component/package-lock.json
|
ee/ui-component/package-lock.json/*
|
||||||
|
ee/ui-component/node-modules/*
|
||||||
|
ee/ui-component/.next
|
||||||
|
|
||||||
|
|
||||||
ui-component/notebook-storage/notebooks.json
|
ui-component/notebook-storage/notebooks.json
|
||||||
|
@ -49,7 +49,7 @@ class GraphModel(Base):
|
|||||||
__tablename__ = "graphs"
|
__tablename__ = "graphs"
|
||||||
|
|
||||||
id = Column(String, primary_key=True)
|
id = Column(String, primary_key=True)
|
||||||
name = Column(String, unique=True, index=True)
|
name = Column(String, index=True) # Not unique globally anymore
|
||||||
entities = Column(JSONB, default=list)
|
entities = Column(JSONB, default=list)
|
||||||
relationships = Column(JSONB, default=list)
|
relationships = Column(JSONB, default=list)
|
||||||
graph_metadata = Column(JSONB, default=dict) # Renamed from 'metadata' to avoid conflict
|
graph_metadata = Column(JSONB, default=dict) # Renamed from 'metadata' to avoid conflict
|
||||||
@ -67,6 +67,8 @@ class GraphModel(Base):
|
|||||||
Index("idx_graph_owner", "owner", postgresql_using="gin"),
|
Index("idx_graph_owner", "owner", postgresql_using="gin"),
|
||||||
Index("idx_graph_access_control", "access_control", postgresql_using="gin"),
|
Index("idx_graph_access_control", "access_control", postgresql_using="gin"),
|
||||||
Index("idx_graph_system_metadata", "system_metadata", postgresql_using="gin"),
|
Index("idx_graph_system_metadata", "system_metadata", postgresql_using="gin"),
|
||||||
|
# Create a unique constraint on name scoped by owner ID
|
||||||
|
Index("idx_graph_owner_name", "name", text("(owner->>'id')"), unique=True),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -79,7 +79,8 @@ class GraphService:
|
|||||||
additional_filters: Optional additional metadata filters to determine which new documents to include
|
additional_filters: Optional additional metadata filters to determine which new documents to include
|
||||||
additional_documents: Optional list of specific additional document IDs to include
|
additional_documents: Optional list of specific additional document IDs to include
|
||||||
prompt_overrides: Optional GraphPromptOverrides with customizations for prompts
|
prompt_overrides: Optional GraphPromptOverrides with customizations for prompts
|
||||||
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id) to determine which documents to include
|
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id)
|
||||||
|
to determine which documents to include
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Graph: The updated graph
|
Graph: The updated graph
|
||||||
@ -91,8 +92,8 @@ class GraphService:
|
|||||||
if "write" not in auth.permissions:
|
if "write" not in auth.permissions:
|
||||||
raise PermissionError("User does not have write permission")
|
raise PermissionError("User does not have write permission")
|
||||||
|
|
||||||
# Get the existing graph
|
# Get the existing graph with system filters for proper user_id scoping
|
||||||
existing_graph = await self.db.get_graph(name, auth)
|
existing_graph = await self.db.get_graph(name, auth, system_filters=system_filters)
|
||||||
if not existing_graph:
|
if not existing_graph:
|
||||||
raise ValueError(f"Graph '{name}' not found")
|
raise ValueError(f"Graph '{name}' not found")
|
||||||
|
|
||||||
@ -403,7 +404,8 @@ class GraphService:
|
|||||||
filters: Optional metadata filters to determine which documents to include
|
filters: Optional metadata filters to determine which documents to include
|
||||||
documents: Optional list of specific document IDs to include
|
documents: Optional list of specific document IDs to include
|
||||||
prompt_overrides: Optional GraphPromptOverrides with customizations for prompts
|
prompt_overrides: Optional GraphPromptOverrides with customizations for prompts
|
||||||
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id) to determine which documents to include
|
system_filters: Optional system metadata filters (e.g. folder_name, end_user_id)
|
||||||
|
to determine which documents to include
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Graph: The created graph
|
Graph: The created graph
|
||||||
@ -444,16 +446,26 @@ class GraphService:
|
|||||||
# Validation is now handled by type annotations
|
# Validation is now handled by type annotations
|
||||||
|
|
||||||
# Create a new graph with authorization info
|
# Create a new graph with authorization info
|
||||||
|
access_control = {
|
||||||
|
"readers": [auth.entity_id],
|
||||||
|
"writers": [auth.entity_id],
|
||||||
|
"admins": [auth.entity_id],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add user_id to access_control if present (for proper user_id scoping)
|
||||||
|
if auth.user_id:
|
||||||
|
# User ID must be provided as a list to match the Graph model's type constraints
|
||||||
|
access_control["user_id"] = [auth.user_id]
|
||||||
|
|
||||||
|
# Ensure entity_type is a string value for storage
|
||||||
|
entity_type = auth.entity_type.value if hasattr(auth.entity_type, "value") else auth.entity_type
|
||||||
|
|
||||||
graph = Graph(
|
graph = Graph(
|
||||||
name=name,
|
name=name,
|
||||||
document_ids=[doc.external_id for doc in document_objects],
|
document_ids=[doc.external_id for doc in document_objects],
|
||||||
filters=filters,
|
filters=filters,
|
||||||
owner={"type": auth.entity_type, "id": auth.entity_id},
|
owner={"type": entity_type, "id": auth.entity_id},
|
||||||
access_control={
|
access_control=access_control,
|
||||||
"readers": [auth.entity_id],
|
|
||||||
"writers": [auth.entity_id],
|
|
||||||
"admins": [auth.entity_id],
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add folder_name and end_user_id to system_metadata if provided
|
# Add folder_name and end_user_id to system_metadata if provided
|
||||||
@ -727,17 +739,26 @@ class GraphService:
|
|||||||
serialized_examples = custom_examples
|
serialized_examples = custom_examples
|
||||||
|
|
||||||
examples_json = {"entities": serialized_examples}
|
examples_json = {"entities": serialized_examples}
|
||||||
examples_str = f"\nHere are some examples of the kind of entities to extract:\n```json\n{json.dumps(examples_json, indent=2)}\n```\n"
|
examples_str = (
|
||||||
|
f"\nHere are some examples of the kind of entities to extract:\n```json\n"
|
||||||
|
f"{json.dumps(examples_json, indent=2)}\n```\n"
|
||||||
|
)
|
||||||
|
|
||||||
# Modify the system message to handle properties as a string that will be parsed later
|
# Modify the system message to handle properties as a string that will be parsed later
|
||||||
system_message = {
|
system_message = {
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": (
|
"content": (
|
||||||
"You are an entity extraction and relationship extraction assistant. Extract entities and their relationships from text precisely and thoroughly, extract as many entities and relationships as possible. "
|
"You are an entity extraction and relationship extraction assistant. Extract entities and "
|
||||||
"For entities, include entity label and type (some examples: PERSON, ORGANIZATION, LOCATION, CONCEPT, etc.). If the user has given examples, use those, these are just suggestions"
|
"their relationships from text precisely and thoroughly, extract as many entities and "
|
||||||
"For relationships, use a simple format with source, target, and relationship fields. Be very through, there are many relationships that are not obvious"
|
"relationships as possible. "
|
||||||
"IMPORTANT: The source and target fields must be simple strings representing entity labels. For example: "
|
"For entities, include entity label and type (some examples: PERSON, ORGANIZATION, LOCATION, "
|
||||||
"if you extract entities 'Entity A' and 'Entity B', a relationship would have source: 'Entity A', target: 'Entity B', relationship: 'relates to'. "
|
"CONCEPT, etc.). If the user has given examples, use those, these are just suggestions"
|
||||||
|
"For relationships, use a simple format with source, target, and relationship fields. "
|
||||||
|
"Be very through, there are many relationships that are not obvious"
|
||||||
|
"IMPORTANT: The source and target fields must be simple strings representing "
|
||||||
|
"entity labels. For example: "
|
||||||
|
"if you extract entities 'Entity A' and 'Entity B', a relationship would have source: 'Entity A', "
|
||||||
|
"target: 'Entity B', relationship: 'relates to'. "
|
||||||
"Respond directly in json format, without any additional text or explanations. "
|
"Respond directly in json format, without any additional text or explanations. "
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
@ -757,7 +778,8 @@ class GraphService:
|
|||||||
"For relationships, specify the source entity, target entity, and the relationship between them. "
|
"For relationships, specify the source entity, target entity, and the relationship between them. "
|
||||||
"The source and target must be simple strings matching the entity labels, not objects. "
|
"The source and target must be simple strings matching the entity labels, not objects. "
|
||||||
f"{examples_str}"
|
f"{examples_str}"
|
||||||
'Sample relationship format: {"source": "Entity A", "target": "Entity B", "relationship": "works for"}\n\n'
|
'Sample relationship format: {"source": "Entity A", "target": "Entity B", '
|
||||||
|
'"relationship": "works for"}\n\n'
|
||||||
"Return your response as valid JSON:\n\n" + content_limited
|
"Return your response as valid JSON:\n\n" + content_limited
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
@ -819,7 +841,8 @@ class GraphService:
|
|||||||
# Process extraction results
|
# Process extraction results
|
||||||
entities, relationships = self._process_extraction_results(extraction_result, doc_id, chunk_number)
|
entities, relationships = self._process_extraction_results(extraction_result, doc_id, chunk_number)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Extracted {len(entities)} entities and {len(relationships)} relationships from document {doc_id}, chunk {chunk_number}"
|
f"Extracted {len(entities)} entities and {len(relationships)} relationships from document "
|
||||||
|
f"{doc_id}, chunk {chunk_number}"
|
||||||
)
|
)
|
||||||
return entities, relationships
|
return entities, relationships
|
||||||
|
|
||||||
|
@ -528,7 +528,6 @@ class TelemetryService:
|
|||||||
export_timeout_millis=OTLP_TIMEOUT * 1000,
|
export_timeout_millis=OTLP_TIMEOUT * 1000,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
print(f"Successfully configured Honeycomb metrics exporter to {OTLP_METRICS_ENDPOINT}")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Failed to configure Honeycomb metrics exporter: {str(e)}")
|
print(f"Failed to configure Honeycomb metrics exporter: {str(e)}")
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ mode = "self_hosted" # "cloud" or "self_hosted"
|
|||||||
api_domain = "api.morphik.ai" # API domain for cloud URIs
|
api_domain = "api.morphik.ai" # API domain for cloud URIs
|
||||||
|
|
||||||
[redis]
|
[redis]
|
||||||
host = "localhost"
|
host = "localhost" # use "redis" for docker
|
||||||
port = 6379
|
port = 6379
|
||||||
|
|
||||||
[graph]
|
[graph]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user