diff --git a/init.sql b/init.sql deleted file mode 100644 index 5d4f299..0000000 --- a/init.sql +++ /dev/null @@ -1,94 +0,0 @@ --- Enable pgvector extension -CREATE EXTENSION IF NOT EXISTS vector; - --- Create documents table -CREATE TABLE IF NOT EXISTS documents ( - external_id VARCHAR PRIMARY KEY, - owner JSONB, - content_type VARCHAR, - filename VARCHAR, - doc_metadata JSONB DEFAULT '{}', - storage_info JSONB DEFAULT '{}', - system_metadata JSONB DEFAULT '{}', - additional_metadata JSONB DEFAULT '{}', - access_control JSONB DEFAULT '{}', - chunk_ids JSONB DEFAULT '[]' -); - --- Create indexes for documents table -CREATE INDEX IF NOT EXISTS idx_owner_id ON documents USING gin(owner); -CREATE INDEX IF NOT EXISTS idx_access_control ON documents USING gin(access_control); -CREATE INDEX IF NOT EXISTS idx_system_metadata ON documents USING gin(system_metadata); - --- Create vector_embeddings table -CREATE TABLE IF NOT EXISTS vector_embeddings ( - id SERIAL PRIMARY KEY, - document_id VARCHAR(255), - chunk_number INTEGER, - content TEXT, - chunk_metadata TEXT, - embedding vector(768), - created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP -); - --- Create caches table -CREATE TABLE IF NOT EXISTS caches ( - name TEXT PRIMARY KEY, - metadata JSON NOT NULL, - created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP -); - --- Create vector index -CREATE INDEX IF NOT EXISTS vector_idx -ON vector_embeddings USING ivfflat (embedding vector_l2_ops) -WITH (lists = 100); - --- Initialize multi-vector embeddings table for new multi-vector functionality. -CREATE TABLE IF NOT EXISTS multi_vector_embeddings ( - id BIGSERIAL PRIMARY KEY, - embeddings BIT(128)[] -); - --- Create function for multi-vector similarity computation -CREATE OR REPLACE FUNCTION max_sim(document bit[], query bit[]) RETURNS double precision AS $$ - WITH queries AS ( - SELECT row_number() OVER () AS query_number, * - FROM (SELECT unnest(query) AS query) AS foo - ), - documents AS ( - SELECT unnest(document) AS document - ), - similarities AS ( - SELECT - query_number, - 1.0 - (bit_count(document # query)::float / greatest(bit_length(query), 1)::float) AS similarity - FROM queries CROSS JOIN documents - ), - max_similarities AS ( - SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number - ) - SELECT SUM(max_similarity) FROM max_similarities; -$$ LANGUAGE SQL; - --- Create graphs table for knowledge graph functionality -CREATE TABLE IF NOT EXISTS graphs ( - id VARCHAR PRIMARY KEY, - name VARCHAR NOT NULL, - entities JSONB DEFAULT '[]', - relationships JSONB DEFAULT '[]', - graph_metadata JSONB DEFAULT '{}', - document_ids JSONB DEFAULT '[]', - filters JSONB DEFAULT NULL, - created_at VARCHAR, - updated_at VARCHAR, - owner JSONB DEFAULT '{}', - access_control JSONB DEFAULT '{"readers": [], "writers": [], "admins": []}' -); - --- Create index for graph name and owner for faster lookups -CREATE INDEX IF NOT EXISTS idx_graph_name ON graphs(name); -CREATE INDEX IF NOT EXISTS idx_graph_owner ON graphs USING gin(owner); - --- Create unique constraint on name scoped by owner -CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_owner_name ON graphs((owner->>'id'), name); diff --git a/postgres.dockerfile b/postgres.dockerfile index 5f69dca..d21099c 100644 --- a/postgres.dockerfile +++ b/postgres.dockerfile @@ -18,8 +18,7 @@ RUN git clone --branch v0.5.1 https://github.com/pgvector/pgvector.git \ RUN apk del git build-base clang llvm postgresql-dev \ && rm -rf /pgvector -# Copy initialization scripts -COPY init.sql /docker-entrypoint-initdb.d/ - # Copy data dump COPY dump.sql /tmp/dump.sql + +# (No database init script necessary — schema is created by the application at runtime)