mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
Change in max_sim function for newer postgresql versions (#54)
* add filename option for text documents * fix: duplicated env variables * Include chunk score in query response * don't initialize colpali store if one is not provided * Add score to ChunkSource * Add huggingface cache volume to avoid redownloads * Replace <~> in maximum_similarity calculation * Add dev_mode setup to default configuration * Dump with databridge documents (no pdfs) * Add dev_mode setup to default configuration * Comment docker instructions for api host * Use empty dump file
This commit is contained in:
parent
9df9196d51
commit
5d76521059
@ -132,7 +132,10 @@ class MultiVectorStore(BaseVectorStore):
|
||||
SELECT unnest(document) AS document
|
||||
),
|
||||
similarities AS (
|
||||
SELECT query_number, 1 - ((document <~> query) / bit_length(query)) AS similarity FROM queries CROSS JOIN documents
|
||||
SELECT
|
||||
query_number,
|
||||
1.0 - (bit_count(document # query)::float / greatest(bit_length(query), 1)::float) AS similarity
|
||||
FROM queries CROSS JOIN documents
|
||||
),
|
||||
max_similarities AS (
|
||||
SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number
|
||||
|
@ -1,5 +1,5 @@
|
||||
[api]
|
||||
host = "localhost"
|
||||
host = "localhost" # Needs to be "0.0.0.0" for docker
|
||||
port = 8000
|
||||
reload = true
|
||||
|
||||
|
@ -1,6 +1,20 @@
|
||||
version: '3.8'
|
||||
|
||||
x-ollama-check: &ollama-check
|
||||
# This command reads the toml file and checks if any provider is set to "ollama"
|
||||
command: >
|
||||
/bin/sh -c '
|
||||
grep -q "provider *= *\"ollama\"" databridge.toml &&
|
||||
echo "true" > /tmp/needs_ollama ||
|
||||
echo "false" > /tmp/needs_ollama'
|
||||
|
||||
services:
|
||||
config-check:
|
||||
image: alpine
|
||||
volumes:
|
||||
- ./databridge.toml:/databridge.toml
|
||||
<<: *ollama-check
|
||||
|
||||
databridge:
|
||||
build: .
|
||||
ports:
|
||||
@ -16,11 +30,15 @@ services:
|
||||
- ./storage:/app/storage
|
||||
- ./logs:/app/logs
|
||||
- ./databridge.toml:/app/databridge.toml
|
||||
- huggingface_cache:/root/.cache/huggingface
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
config-check:
|
||||
condition: service_completed_successfully
|
||||
ollama:
|
||||
condition: service_started
|
||||
required: false
|
||||
networks:
|
||||
- databridge-network
|
||||
env_file:
|
||||
@ -51,6 +69,8 @@ services:
|
||||
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
profiles:
|
||||
- ollama
|
||||
ports:
|
||||
- "11434:11434"
|
||||
volumes:
|
||||
@ -66,4 +86,5 @@ networks:
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
ollama_data:
|
||||
ollama_data:
|
||||
huggingface_cache:
|
||||
|
@ -65,6 +65,10 @@ reload = false\n\
|
||||
\n\
|
||||
[auth]\n\
|
||||
jwt_algorithm = "HS256"\n\
|
||||
dev_mode = true\n\
|
||||
dev_entity_id = "dev_user"\n\
|
||||
dev_entity_type = "developer"\n\
|
||||
dev_permissions = ["read", "write", "admin"]\n\
|
||||
\n\
|
||||
[completion]\n\
|
||||
provider = "ollama"\n\
|
||||
|
4
init.sql
4
init.sql
@ -60,7 +60,9 @@ CREATE OR REPLACE FUNCTION max_sim(document bit[], query bit[]) RETURNS double p
|
||||
SELECT unnest(document) AS document
|
||||
),
|
||||
similarities AS (
|
||||
SELECT query_number, 1 - ((document <~> query) / bit_length(query)) AS similarity
|
||||
SELECT
|
||||
query_number,
|
||||
1.0 - (bit_count(document # query)::float / greatest(bit_length(query), 1)::float) AS similarity
|
||||
FROM queries CROSS JOIN documents
|
||||
),
|
||||
max_similarities AS (
|
||||
|
@ -20,3 +20,6 @@ RUN apk del git build-base clang llvm postgresql-dev \
|
||||
|
||||
# Copy initialization scripts
|
||||
COPY init.sql /docker-entrypoint-initdb.d/
|
||||
|
||||
# Copy data dump
|
||||
COPY dump.sql /tmp/dump.sql
|
@ -328,7 +328,10 @@ def setup_postgres():
|
||||
SELECT unnest(document_bits) AS document
|
||||
),
|
||||
similarities AS (
|
||||
SELECT query_number, 1 - ((document <~> query) / bit_length(query)) AS similarity FROM queries CROSS JOIN documents
|
||||
SELECT
|
||||
query_number,
|
||||
1.0 - (bit_count(document # query)::float / greatest(bit_length(query), 1)::float) AS similarity
|
||||
FROM queries CROSS JOIN documents
|
||||
),
|
||||
max_similarities AS (
|
||||
SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number
|
||||
|
Loading…
x
Reference in New Issue
Block a user