diff --git a/core/vector_store/multi_vector_store.py b/core/vector_store/multi_vector_store.py index 692830c..9eecf8d 100644 --- a/core/vector_store/multi_vector_store.py +++ b/core/vector_store/multi_vector_store.py @@ -132,7 +132,10 @@ class MultiVectorStore(BaseVectorStore): SELECT unnest(document) AS document ), similarities AS ( - SELECT query_number, 1 - ((document <~> query) / bit_length(query)) AS similarity FROM queries CROSS JOIN documents + SELECT + query_number, + 1.0 - (bit_count(document # query)::float / greatest(bit_length(query), 1)::float) AS similarity + FROM queries CROSS JOIN documents ), max_similarities AS ( SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number diff --git a/databridge.toml b/databridge.toml index 1e91590..6e14c74 100644 --- a/databridge.toml +++ b/databridge.toml @@ -1,5 +1,5 @@ [api] -host = "localhost" +host = "localhost" # Needs to be "0.0.0.0" for docker port = 8000 reload = true diff --git a/docker-compose.yml b/docker-compose.yml index a95934e..89678c1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,20 @@ version: '3.8' +x-ollama-check: &ollama-check + # This command reads the toml file and checks if any provider is set to "ollama" + command: > + /bin/sh -c ' + grep -q "provider *= *\"ollama\"" databridge.toml && + echo "true" > /tmp/needs_ollama || + echo "false" > /tmp/needs_ollama' + services: + config-check: + image: alpine + volumes: + - ./databridge.toml:/databridge.toml + <<: *ollama-check + databridge: build: . ports: @@ -16,11 +30,15 @@ services: - ./storage:/app/storage - ./logs:/app/logs - ./databridge.toml:/app/databridge.toml + - huggingface_cache:/root/.cache/huggingface depends_on: postgres: condition: service_healthy + config-check: + condition: service_completed_successfully ollama: condition: service_started + required: false networks: - databridge-network env_file: @@ -51,6 +69,8 @@ services: ollama: image: ollama/ollama:latest + profiles: + - ollama ports: - "11434:11434" volumes: @@ -66,4 +86,5 @@ networks: volumes: postgres_data: - ollama_data: + ollama_data: + huggingface_cache: diff --git a/dockerfile b/dockerfile index 5856a70..d475474 100644 --- a/dockerfile +++ b/dockerfile @@ -65,6 +65,10 @@ reload = false\n\ \n\ [auth]\n\ jwt_algorithm = "HS256"\n\ +dev_mode = true\n\ +dev_entity_id = "dev_user"\n\ +dev_entity_type = "developer"\n\ +dev_permissions = ["read", "write", "admin"]\n\ \n\ [completion]\n\ provider = "ollama"\n\ diff --git a/dump.sql b/dump.sql new file mode 100644 index 0000000..e69de29 diff --git a/init.sql b/init.sql index 698a840..7792c76 100644 --- a/init.sql +++ b/init.sql @@ -60,7 +60,9 @@ CREATE OR REPLACE FUNCTION max_sim(document bit[], query bit[]) RETURNS double p SELECT unnest(document) AS document ), similarities AS ( - SELECT query_number, 1 - ((document <~> query) / bit_length(query)) AS similarity + SELECT + query_number, + 1.0 - (bit_count(document # query)::float / greatest(bit_length(query), 1)::float) AS similarity FROM queries CROSS JOIN documents ), max_similarities AS ( diff --git a/postgres.dockerfile b/postgres.dockerfile index ca92690..4b72bf3 100644 --- a/postgres.dockerfile +++ b/postgres.dockerfile @@ -20,3 +20,6 @@ RUN apk del git build-base clang llvm postgresql-dev \ # Copy initialization scripts COPY init.sql /docker-entrypoint-initdb.d/ + +# Copy data dump +COPY dump.sql /tmp/dump.sql \ No newline at end of file diff --git a/quick_setup.py b/quick_setup.py index 498757d..081025a 100644 --- a/quick_setup.py +++ b/quick_setup.py @@ -328,7 +328,10 @@ def setup_postgres(): SELECT unnest(document_bits) AS document ), similarities AS ( - SELECT query_number, 1 - ((document <~> query) / bit_length(query)) AS similarity FROM queries CROSS JOIN documents + SELECT + query_number, + 1.0 - (bit_count(document # query)::float / greatest(bit_length(query), 1)::float) AS similarity + FROM queries CROSS JOIN documents ), max_similarities AS ( SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number