mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
Fix docker pg hardcoded dimensions
This commit is contained in:
parent
df3a40583a
commit
7ef3bd2baa
94
init.sql
94
init.sql
@ -1,94 +0,0 @@
|
|||||||
-- Enable pgvector extension
|
|
||||||
CREATE EXTENSION IF NOT EXISTS vector;
|
|
||||||
|
|
||||||
-- Create documents table
|
|
||||||
CREATE TABLE IF NOT EXISTS documents (
|
|
||||||
external_id VARCHAR PRIMARY KEY,
|
|
||||||
owner JSONB,
|
|
||||||
content_type VARCHAR,
|
|
||||||
filename VARCHAR,
|
|
||||||
doc_metadata JSONB DEFAULT '{}',
|
|
||||||
storage_info JSONB DEFAULT '{}',
|
|
||||||
system_metadata JSONB DEFAULT '{}',
|
|
||||||
additional_metadata JSONB DEFAULT '{}',
|
|
||||||
access_control JSONB DEFAULT '{}',
|
|
||||||
chunk_ids JSONB DEFAULT '[]'
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Create indexes for documents table
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_owner_id ON documents USING gin(owner);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_access_control ON documents USING gin(access_control);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_system_metadata ON documents USING gin(system_metadata);
|
|
||||||
|
|
||||||
-- Create vector_embeddings table
|
|
||||||
CREATE TABLE IF NOT EXISTS vector_embeddings (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
document_id VARCHAR(255),
|
|
||||||
chunk_number INTEGER,
|
|
||||||
content TEXT,
|
|
||||||
chunk_metadata TEXT,
|
|
||||||
embedding vector(768),
|
|
||||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Create caches table
|
|
||||||
CREATE TABLE IF NOT EXISTS caches (
|
|
||||||
name TEXT PRIMARY KEY,
|
|
||||||
metadata JSON NOT NULL,
|
|
||||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Create vector index
|
|
||||||
CREATE INDEX IF NOT EXISTS vector_idx
|
|
||||||
ON vector_embeddings USING ivfflat (embedding vector_l2_ops)
|
|
||||||
WITH (lists = 100);
|
|
||||||
|
|
||||||
-- Initialize multi-vector embeddings table for new multi-vector functionality.
|
|
||||||
CREATE TABLE IF NOT EXISTS multi_vector_embeddings (
|
|
||||||
id BIGSERIAL PRIMARY KEY,
|
|
||||||
embeddings BIT(128)[]
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Create function for multi-vector similarity computation
|
|
||||||
CREATE OR REPLACE FUNCTION max_sim(document bit[], query bit[]) RETURNS double precision AS $$
|
|
||||||
WITH queries AS (
|
|
||||||
SELECT row_number() OVER () AS query_number, *
|
|
||||||
FROM (SELECT unnest(query) AS query) AS foo
|
|
||||||
),
|
|
||||||
documents AS (
|
|
||||||
SELECT unnest(document) AS document
|
|
||||||
),
|
|
||||||
similarities AS (
|
|
||||||
SELECT
|
|
||||||
query_number,
|
|
||||||
1.0 - (bit_count(document # query)::float / greatest(bit_length(query), 1)::float) AS similarity
|
|
||||||
FROM queries CROSS JOIN documents
|
|
||||||
),
|
|
||||||
max_similarities AS (
|
|
||||||
SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number
|
|
||||||
)
|
|
||||||
SELECT SUM(max_similarity) FROM max_similarities;
|
|
||||||
$$ LANGUAGE SQL;
|
|
||||||
|
|
||||||
-- Create graphs table for knowledge graph functionality
|
|
||||||
CREATE TABLE IF NOT EXISTS graphs (
|
|
||||||
id VARCHAR PRIMARY KEY,
|
|
||||||
name VARCHAR NOT NULL,
|
|
||||||
entities JSONB DEFAULT '[]',
|
|
||||||
relationships JSONB DEFAULT '[]',
|
|
||||||
graph_metadata JSONB DEFAULT '{}',
|
|
||||||
document_ids JSONB DEFAULT '[]',
|
|
||||||
filters JSONB DEFAULT NULL,
|
|
||||||
created_at VARCHAR,
|
|
||||||
updated_at VARCHAR,
|
|
||||||
owner JSONB DEFAULT '{}',
|
|
||||||
access_control JSONB DEFAULT '{"readers": [], "writers": [], "admins": []}'
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Create index for graph name and owner for faster lookups
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_graph_name ON graphs(name);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_graph_owner ON graphs USING gin(owner);
|
|
||||||
|
|
||||||
-- Create unique constraint on name scoped by owner
|
|
||||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_owner_name ON graphs((owner->>'id'), name);
|
|
@ -18,8 +18,7 @@ RUN git clone --branch v0.5.1 https://github.com/pgvector/pgvector.git \
|
|||||||
RUN apk del git build-base clang llvm postgresql-dev \
|
RUN apk del git build-base clang llvm postgresql-dev \
|
||||||
&& rm -rf /pgvector
|
&& rm -rf /pgvector
|
||||||
|
|
||||||
# Copy initialization scripts
|
|
||||||
COPY init.sql /docker-entrypoint-initdb.d/
|
|
||||||
|
|
||||||
# Copy data dump
|
# Copy data dump
|
||||||
COPY dump.sql /tmp/dump.sql
|
COPY dump.sql /tmp/dump.sql
|
||||||
|
|
||||||
|
# (No database init script necessary — schema is created by the application at runtime)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user