mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
fix quick setup for pgvector
This commit is contained in:
parent
3825ddcc2c
commit
9660a81120
@ -1,5 +1,5 @@
|
|||||||
JWT_SECRET_KEY="..."
|
JWT_SECRET_KEY="..."
|
||||||
POSTGRES_URI="postgresql+asyncpg://user:password@localhost:5432/databridge" # Required for PostgreSQL database
|
POSTGRES_URI="postgresql+asyncpg://postgres:postgres@localhost:5432/databridge" # Required for PostgreSQL database
|
||||||
MONGODB_URI="..." # Optional: Only needed if using MongoDB
|
MONGODB_URI="..." # Optional: Only needed if using MongoDB
|
||||||
|
|
||||||
UNSTRUCTURED_API_KEY="..." # Optional: Needed for parsing via unstructured API
|
UNSTRUCTURED_API_KEY="..." # Optional: Needed for parsing via unstructured API
|
||||||
|
@ -243,31 +243,48 @@ def setup_postgres():
|
|||||||
from core.database.postgres_database import Base
|
from core.database.postgres_database import Base
|
||||||
from core.vector_store.pgvector_store import Base as VectorBase
|
from core.vector_store.pgvector_store import Base as VectorBase
|
||||||
|
|
||||||
|
# Create regular tables first
|
||||||
await conn.run_sync(Base.metadata.create_all)
|
await conn.run_sync(Base.metadata.create_all)
|
||||||
await conn.run_sync(VectorBase.metadata.create_all)
|
LOGGER.info("Created base PostgreSQL tables")
|
||||||
LOGGER.info("Created all PostgreSQL tables and indexes")
|
|
||||||
|
|
||||||
# Create vector index with configuration from settings
|
# Get vector dimensions from config
|
||||||
table_name = "document_chunks" # Default table name for pgvector
|
|
||||||
dimensions = CONFIG["embedding"]["dimensions"]
|
dimensions = CONFIG["embedding"]["dimensions"]
|
||||||
|
|
||||||
# First, alter the embedding column to be a vector
|
# Drop existing vector index if it exists
|
||||||
alter_sql = f"""
|
drop_index_sql = """
|
||||||
ALTER TABLE {table_name}
|
DROP INDEX IF EXISTS vector_idx;
|
||||||
ALTER COLUMN embedding TYPE vector({dimensions})
|
|
||||||
USING embedding::vector({dimensions});
|
|
||||||
"""
|
"""
|
||||||
await conn.execute(text(alter_sql))
|
await conn.execute(text(drop_index_sql))
|
||||||
LOGGER.info(f"Altered embedding column to be vector({dimensions})")
|
|
||||||
|
# Drop existing vector embeddings table if it exists
|
||||||
|
drop_table_sql = """
|
||||||
|
DROP TABLE IF EXISTS vector_embeddings;
|
||||||
|
"""
|
||||||
|
await conn.execute(text(drop_table_sql))
|
||||||
|
|
||||||
|
# Create vector embeddings table with proper vector column
|
||||||
|
create_table_sql = f"""
|
||||||
|
CREATE TABLE vector_embeddings (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
document_id VARCHAR(255),
|
||||||
|
chunk_number INTEGER,
|
||||||
|
content TEXT,
|
||||||
|
chunk_metadata TEXT,
|
||||||
|
embedding vector({dimensions}),
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
await conn.execute(text(create_table_sql))
|
||||||
|
LOGGER.info("Created vector_embeddings table with vector column")
|
||||||
|
|
||||||
# Create the vector index
|
# Create the vector index
|
||||||
index_sql = f"""
|
index_sql = f"""
|
||||||
CREATE INDEX IF NOT EXISTS vector_idx
|
CREATE INDEX vector_idx
|
||||||
ON {table_name} USING ivfflat (embedding vector_l2_ops)
|
ON vector_embeddings USING ivfflat (embedding vector_l2_ops)
|
||||||
WITH (lists = 100);
|
WITH (lists = 100);
|
||||||
"""
|
"""
|
||||||
await conn.execute(text(index_sql))
|
await conn.execute(text(index_sql))
|
||||||
LOGGER.info(f"Created IVFFlat index on {table_name}")
|
LOGGER.info("Created IVFFlat index on vector_embeddings")
|
||||||
|
|
||||||
await engine.dispose()
|
await engine.dispose()
|
||||||
LOGGER.info("PostgreSQL setup completed successfully")
|
LOGGER.info("PostgreSQL setup completed successfully")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user