Msingh qdrant embeddings search (#1220)

This commit is contained in:
msingh-openai 2024-05-30 08:47:44 -07:00 committed by GitHub
parent a5197083bd
commit f0bbde1e8d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -44,67 +44,32 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"id": "8d8810f9", "id": "8d8810f9",
"metadata": { "metadata": {},
"ExecuteTime": { "outputs": [],
"end_time": "2023-06-29T12:59:21.344233180Z",
"start_time": "2023-06-29T12:59:00.815088712Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting qdrant-client\r\n",
" ...\r\n",
"Successfully installed certifi-2023.5.7 grpcio-1.56.0 grpcio-tools-1.56.0 h11-0.14.0 h2-4.1.0 hpack-4.0.0 httpcore-0.17.2 httpx-0.24.1 hyperframe-6.0.1 numpy-1.25.0 portalocker-2.7.0 protobuf-4.23.3 pydantic-1.10.9 qdrant-client-1.3.1 typing-extensions-4.5.0 urllib3-1.26.16\r\n",
"Collecting wget\r\n",
" Using cached wget-3.2.zip (10 kB)\r\n",
" Preparing metadata (setup.py) ... \u001B[?25ldone\r\n",
"\u001B[?25hBuilding wheels for collected packages: wget\r\n",
" Building wheel for wget (setup.py) ... \u001B[?25ldone\r\n",
"\u001B[?25h Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9657 sha256=eb5f15f12150fc304e7b14973424f696fa8d95225772bc0cbc0b318bf92e04b9\r\n",
" Stored in directory: /home/user/.cache/pip/wheels/04/5f/3e/46cc37c5d698415694d83f607f833f83f0149e49b3af9d0f38\r\n",
"Successfully built wget\r\n",
"Installing collected packages: wget\r\n",
"Successfully installed wget-3.2\r\n"
]
}
],
"source": [ "source": [
"# We'll need to install Qdrant client\n", "# We'll need to install Qdrant client\n",
"!pip install qdrant-client\n", "!pip install qdrant-client"
"\n",
"#Install wget to pull zip file\n",
"!pip install wget"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 2,
"id": "5be94df6", "id": "5be94df6",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-06-29T13:00:32.715638041Z", "end_time": "2024-05-21T23:49:06.926613Z",
"start_time": "2023-06-29T13:00:31.654032435Z" "start_time": "2024-05-21T23:49:06.923221Z"
} }
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"import openai\n", "import openai\n",
"\n",
"from typing import List, Iterator\n",
"import pandas as pd\n", "import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import wget\n",
"from ast import literal_eval\n", "from ast import literal_eval\n",
"import qdrant_client # Qdrant's client library for Python\n",
"\n", "\n",
"# Qdrant's client library for Python\n", "# This can be changed to the embedding model of your choice. Make sure its the same model that is used for generating embeddings\n",
"import qdrant_client\n", "EMBEDDING_MODEL = \"text-embedding-ada-002\"\n",
"\n",
"# I've set this to our new embeddings model, this can be changed to the embedding model of your choice\n",
"EMBEDDING_MODEL = \"text-embedding-3-small\"\n",
"\n", "\n",
"# Ignore unclosed SSL socket warnings - optional in case you get these errors\n", "# Ignore unclosed SSL socket warnings - optional in case you get these errors\n",
"import warnings\n", "import warnings\n",
@ -125,41 +90,34 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 3,
"id": "5dff8b55", "id": "5dff8b55",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-06-29T13:02:47.656128622Z", "end_time": "2024-05-21T23:49:54.889503Z",
"start_time": "2023-06-29T13:00:39.079229873Z" "start_time": "2024-05-21T23:49:41.132888Z"
} }
}, },
"outputs": [ "outputs": [],
{
"data": {
"text/plain": [
"'vector_database_wikipedia_articles_embedded.zip'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import requests\n",
"\n",
"embeddings_url = 'https://cdn.openai.com/API/examples/data/vector_database_wikipedia_articles_embedded.zip'\n", "embeddings_url = 'https://cdn.openai.com/API/examples/data/vector_database_wikipedia_articles_embedded.zip'\n",
"\n", "\n",
"# The file is ~700 MB so this will take some time\n", "# The file is ~700 MB so this will take some time\n",
"wget.download(embeddings_url)" "response = requests.get(embeddings_url, verify=True) # Set verify=False to bypass SSL verification\n",
"with open('vector_database_wikipedia_articles_embedded.zip', 'wb') as file:\n",
" file.write(response.content)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 4,
"id": "21097972", "id": "21097972",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-06-29T13:03:08.268413005Z", "end_time": "2024-05-21T23:50:56.268540Z",
"start_time": "2023-06-29T13:02:47.626254476Z" "start_time": "2024-05-21T23:50:53.171125Z"
} }
}, },
"outputs": [], "outputs": [],
@ -171,12 +129,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 5,
"id": "70bbd8ba", "id": "70bbd8ba",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-06-29T13:03:28.291797292Z", "end_time": "2024-05-21T23:51:08.388674Z",
"start_time": "2023-06-29T13:03:08.269033964Z" "start_time": "2024-05-21T23:50:57.592940Z"
} }
}, },
"outputs": [], "outputs": [],
@ -188,7 +146,12 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 6,
"id": "1721e45d", "id": "1721e45d",
"metadata": {}, "metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T23:51:13.706819Z",
"start_time": "2024-05-21T23:51:13.700231Z"
}
},
"outputs": [ "outputs": [
{ {
"data": { "data": {
@ -305,7 +268,7 @@
"4 [0.021524671465158463, 0.018522677943110466, -... 4 " "4 [0.021524671465158463, 0.018522677943110466, -... 4 "
] ]
}, },
"execution_count": 6, "execution_count": 92,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -318,7 +281,12 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 7,
"id": "960b82af", "id": "960b82af",
"metadata": {}, "metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T23:55:20.588010Z",
"start_time": "2024-05-21T23:51:16.274336Z"
}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Read vectors from strings back into a list\n", "# Read vectors from strings back into a list\n",
@ -333,7 +301,12 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 8,
"id": "a334ab8b", "id": "a334ab8b",
"metadata": {}, "metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T23:55:36.075327Z",
"start_time": "2024-05-21T23:55:36.038710Z"
}
},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
@ -395,13 +368,13 @@
"id": "76d697e9", "id": "76d697e9",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-01-18T09:28:38.928205Z", "end_time": "2024-05-21T23:55:56.550765Z",
"start_time": "2023-01-18T09:28:38.913987Z" "start_time": "2024-05-21T23:55:56.517724Z"
} }
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"qdrant = qdrant_client.QdrantClient(host='localhost', prefer_grpc=True)" "qdrant = qdrant_client.QdrantClient(host=\"localhost\", port=6333)"
] ]
}, },
{ {
@ -410,18 +383,18 @@
"id": "1deeb539", "id": "1deeb539",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-01-18T09:29:19.806639Z", "end_time": "2024-05-21T23:55:57.340006Z",
"start_time": "2023-01-18T09:29:19.727897Z" "start_time": "2024-05-21T23:55:57.312830Z"
} }
}, },
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"CollectionsResponse(collections=[CollectionDescription(name='Routines')])" "CollectionsResponse(collections=[])"
] ]
}, },
"execution_count": 10, "execution_count": 96,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -448,8 +421,8 @@
"id": "1a84ee1d", "id": "1a84ee1d",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-01-18T09:29:22.530121Z", "end_time": "2024-05-21T23:56:04.066640Z",
"start_time": "2023-01-18T09:29:22.524604Z" "start_time": "2024-05-21T23:56:04.064878Z"
} }
}, },
"outputs": [], "outputs": [],
@ -463,8 +436,8 @@
"id": "00876f92", "id": "00876f92",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-01-18T09:31:14.413334Z", "end_time": "2024-05-21T23:56:05.462165Z",
"start_time": "2023-01-18T09:31:13.619079Z" "start_time": "2024-05-21T23:56:05.247948Z"
} }
}, },
"outputs": [ "outputs": [
@ -474,7 +447,49 @@
"True" "True"
] ]
}, },
"execution_count": 12, "execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get the vector size from the first row to set up the collection\n",
"vector_size = len(article_df['content_vector'][0])\n",
"\n",
"# Set up the collection with the vector configuration. You need to declare the vector size and distance metric for the collection. Distance metric enables vector database to index and search vectors efficiently.\n",
"qdrant.recreate_collection(\n",
" collection_name='Articles',\n",
" vectors_config={\n",
" 'title': rest.VectorParams(\n",
" distance=rest.Distance.COSINE,\n",
" size=vector_size,\n",
" ),\n",
" 'content': rest.VectorParams(\n",
" distance=rest.Distance.COSINE,\n",
" size=vector_size,\n",
" ),\n",
" }\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "9f39a8c395554ca3",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T23:56:21.577594Z",
"start_time": "2024-05-21T23:56:21.460740Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 99,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -498,52 +513,67 @@
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"execution_count": 13, "id": "e95be6e0c9af4c21",
"id": "f24e76ab", "metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2023-01-18T09:36:28.597535Z",
"start_time": "2023-01-18T09:36:24.108867Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"qdrant.upsert(\n", "In addition to the vector configuration defined under `vector`, we can also define the `payload` configuration. Payload is an optional field that allows you to store additional metadata alongside the vectors. In our case, we'll store the `id`, `title`, and `url` of the articles. As we return the title of nearest articles in the search results from payload, we can also provide the user with the URL to the article (which is part of the meta-data)."
" collection_name='Articles',\n",
" points=[\n",
" rest.PointStruct(\n",
" id=k,\n",
" vector={\n",
" 'title': v['title_vector'],\n",
" 'content': v['content_vector'],\n",
" },\n",
" payload=v.to_dict(),\n",
" )\n",
" for k, v in article_df.iterrows()\n",
" ],\n",
")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 14,
"id": "f24e76ab",
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-21T23:58:25.183855Z",
"start_time": "2024-05-21T23:56:50.664145Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Upserting articles: 100%|██████████| 25000/25000 [01:34<00:00, 264.52it/s]\n"
]
}
],
"source": [
"from qdrant_client.models import PointStruct # Import the PointStruct to store the vector and payload\n",
"from tqdm import tqdm # Library to show the progress bar \n",
"\n",
"# Populate collection with vectors using tqdm to show progress\n",
"for k, v in tqdm(article_df.iterrows(), desc=\"Upserting articles\", total=len(article_df)):\n",
" try:\n",
" qdrant.upsert(\n",
" collection_name='Articles',\n",
" points=[\n",
" PointStruct(\n",
" id=k,\n",
" vector={'title': v['title_vector'], \n",
" 'content': v['content_vector']},\n",
" payload={\n",
" 'id': v['id'],\n",
" 'title': v['title'],\n",
" 'url': v['url']\n",
" }\n",
" )\n",
" ]\n",
" )\n",
" except Exception as e:\n",
" print(f\"Failed to upsert row {k}: {v}\")\n",
" print(f\"Exception: {e}\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "d1188a12", "id": "d1188a12",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-01-18T09:58:13.825886Z", "end_time": "2024-05-21T23:58:27.558407Z",
"start_time": "2023-01-18T09:58:13.816248Z" "start_time": "2024-05-21T23:58:27.549740Z"
} }
}, },
"outputs": [ "outputs": [
@ -553,7 +583,7 @@
"CountResult(count=25000)" "CountResult(count=25000)"
] ]
}, },
"execution_count": 14, "execution_count": 101,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -575,12 +605,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 16,
"id": "f1bac4ef", "id": "f1bac4ef",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-01-18T09:50:35.265647Z", "end_time": "2024-05-21T23:58:35.492725Z",
"start_time": "2023-01-18T09:50:35.256065Z" "start_time": "2024-05-21T23:58:35.488963Z"
} }
}, },
"outputs": [], "outputs": [],
@ -588,17 +618,18 @@
"def query_qdrant(query, collection_name, vector_name='title', top_k=20):\n", "def query_qdrant(query, collection_name, vector_name='title', top_k=20):\n",
"\n", "\n",
" # Creates embedding vector from user query\n", " # Creates embedding vector from user query\n",
" embedded_query = openai.Embedding.create(\n", " embedded_query = openai.embeddings.create(\n",
" input=query,\n", " input=query,\n",
" model=EMBEDDING_MODEL,\n", " model=EMBEDDING_MODEL,\n",
" )['data'][0]['embedding']\n", " ).data[0].embedding # We take the first embedding from the list\n",
" \n", " \n",
" query_results = qdrant.search(\n", " query_results = qdrant.search(\n",
" collection_name=collection_name,\n", " collection_name=collection_name,\n",
" query_vector=(\n", " query_vector=(\n",
" vector_name, embedded_query\n", " vector_name, embedded_query\n",
" ),\n", " ),\n",
" limit=top_k,\n", " limit=top_k, \n",
" query_filter=None\n",
" )\n", " )\n",
" \n", " \n",
" return query_results" " return query_results"
@ -606,12 +637,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 17,
"id": "aa92f3d3", "id": "aa92f3d3",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-01-18T09:50:46.545145Z", "end_time": "2024-05-21T23:58:37.183718Z",
"start_time": "2023-01-18T09:50:35.711020Z" "start_time": "2024-05-21T23:58:36.949491Z"
} }
}, },
"outputs": [ "outputs": [
@ -619,43 +650,43 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"1. Museum of Modern Art (Score: 0.875)\n", "1. Museum of Modern Art, URL: https://simple.wikipedia.org/wiki/Museum%20of%20Modern%20Art (Score: 0.875)\n",
"2. Western Europe (Score: 0.867)\n", "2. Western Europe, URL: https://simple.wikipedia.org/wiki/Western%20Europe (Score: 0.867)\n",
"3. Renaissance art (Score: 0.864)\n", "3. Renaissance art, URL: https://simple.wikipedia.org/wiki/Renaissance%20art (Score: 0.864)\n",
"4. Pop art (Score: 0.86)\n", "4. Pop art, URL: https://simple.wikipedia.org/wiki/Pop%20art (Score: 0.86)\n",
"5. Northern Europe (Score: 0.855)\n", "5. Northern Europe, URL: https://simple.wikipedia.org/wiki/Northern%20Europe (Score: 0.855)\n",
"6. Hellenistic art (Score: 0.853)\n", "6. Hellenistic art, URL: https://simple.wikipedia.org/wiki/Hellenistic%20art (Score: 0.853)\n",
"7. Modernist literature (Score: 0.847)\n", "7. Modernist literature, URL: https://simple.wikipedia.org/wiki/Modernist%20literature (Score: 0.847)\n",
"8. Art film (Score: 0.843)\n", "8. Art film, URL: https://simple.wikipedia.org/wiki/Art%20film (Score: 0.843)\n",
"9. Central Europe (Score: 0.843)\n", "9. Central Europe, URL: https://simple.wikipedia.org/wiki/Central%20Europe (Score: 0.842)\n",
"10. European (Score: 0.841)\n", "10. European, URL: https://simple.wikipedia.org/wiki/European (Score: 0.841)\n",
"11. Art (Score: 0.841)\n", "11. Art, URL: https://simple.wikipedia.org/wiki/Art (Score: 0.841)\n",
"12. Byzantine art (Score: 0.841)\n", "12. Byzantine art, URL: https://simple.wikipedia.org/wiki/Byzantine%20art (Score: 0.841)\n",
"13. Postmodernism (Score: 0.84)\n", "13. Postmodernism, URL: https://simple.wikipedia.org/wiki/Postmodernism (Score: 0.84)\n",
"14. Eastern Europe (Score: 0.839)\n", "14. Eastern Europe, URL: https://simple.wikipedia.org/wiki/Eastern%20Europe (Score: 0.839)\n",
"15. Europe (Score: 0.839)\n", "15. Cubism, URL: https://simple.wikipedia.org/wiki/Cubism (Score: 0.839)\n",
"16. Cubism (Score: 0.839)\n", "16. Europe, URL: https://simple.wikipedia.org/wiki/Europe (Score: 0.839)\n",
"17. Impressionism (Score: 0.838)\n", "17. Impressionism, URL: https://simple.wikipedia.org/wiki/Impressionism (Score: 0.838)\n",
"18. Bauhaus (Score: 0.838)\n", "18. Bauhaus, URL: https://simple.wikipedia.org/wiki/Bauhaus (Score: 0.838)\n",
"19. Expressionism (Score: 0.837)\n", "19. Surrealism, URL: https://simple.wikipedia.org/wiki/Surrealism (Score: 0.837)\n",
"20. Surrealism (Score: 0.837)\n" "20. Expressionism, URL: https://simple.wikipedia.org/wiki/Expressionism (Score: 0.837)\n"
] ]
} }
], ],
"source": [ "source": [
"query_results = query_qdrant('modern art in Europe', 'Articles')\n", "query_results = query_qdrant('modern art in Europe', 'Articles', 'title')\n",
"for i, article in enumerate(query_results):\n", "for i, article in enumerate(query_results):\n",
" print(f'{i + 1}. {article.payload[\"title\"]} (Score: {round(article.score, 3)})')" " print(f'{i + 1}. {article.payload[\"title\"]}, URL: {article.payload['url']} (Score: {round(article.score, 3)})')"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 18,
"id": "7ed116b8", "id": "7ed116b8",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-01-18T09:53:11.038910Z", "end_time": "2024-05-21T23:58:53.144123Z",
"start_time": "2023-01-18T09:52:55.248029Z" "start_time": "2024-05-21T23:58:52.924091Z"
} }
}, },
"outputs": [ "outputs": [
@ -663,26 +694,26 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"1. Battle of Bannockburn (Score: 0.869)\n", "1. Battle of Bannockburn, URL: https://simple.wikipedia.org/wiki/Battle%20of%20Bannockburn (Score: 0.869)\n",
"2. Wars of Scottish Independence (Score: 0.861)\n", "2. Wars of Scottish Independence, URL: https://simple.wikipedia.org/wiki/Wars%20of%20Scottish%20Independence (Score: 0.861)\n",
"3. 1651 (Score: 0.853)\n", "3. 1651, URL: https://simple.wikipedia.org/wiki/1651 (Score: 0.852)\n",
"4. First War of Scottish Independence (Score: 0.85)\n", "4. First War of Scottish Independence, URL: https://simple.wikipedia.org/wiki/First%20War%20of%20Scottish%20Independence (Score: 0.85)\n",
"5. Robert I of Scotland (Score: 0.846)\n", "5. Robert I of Scotland, URL: https://simple.wikipedia.org/wiki/Robert%20I%20of%20Scotland (Score: 0.846)\n",
"6. 841 (Score: 0.844)\n", "6. 841, URL: https://simple.wikipedia.org/wiki/841 (Score: 0.844)\n",
"7. 1716 (Score: 0.844)\n", "7. 1716, URL: https://simple.wikipedia.org/wiki/1716 (Score: 0.844)\n",
"8. 1314 (Score: 0.837)\n", "8. 1314, URL: https://simple.wikipedia.org/wiki/1314 (Score: 0.837)\n",
"9. 1263 (Score: 0.836)\n", "9. 1263, URL: https://simple.wikipedia.org/wiki/1263 (Score: 0.836)\n",
"10. William Wallace (Score: 0.835)\n", "10. William Wallace, URL: https://simple.wikipedia.org/wiki/William%20Wallace (Score: 0.835)\n",
"11. Stirling (Score: 0.831)\n", "11. Stirling, URL: https://simple.wikipedia.org/wiki/Stirling (Score: 0.831)\n",
"12. 1306 (Score: 0.831)\n", "12. 1306, URL: https://simple.wikipedia.org/wiki/1306 (Score: 0.831)\n",
"13. 1746 (Score: 0.831)\n", "13. 1746, URL: https://simple.wikipedia.org/wiki/1746 (Score: 0.83)\n",
"14. 1040s (Score: 0.828)\n", "14. 1040s, URL: https://simple.wikipedia.org/wiki/1040s (Score: 0.828)\n",
"15. 1106 (Score: 0.827)\n", "15. 1106, URL: https://simple.wikipedia.org/wiki/1106 (Score: 0.827)\n",
"16. 1304 (Score: 0.827)\n", "16. 1304, URL: https://simple.wikipedia.org/wiki/1304 (Score: 0.826)\n",
"17. David II of Scotland (Score: 0.825)\n", "17. David II of Scotland, URL: https://simple.wikipedia.org/wiki/David%20II%20of%20Scotland (Score: 0.825)\n",
"18. Braveheart (Score: 0.824)\n", "18. Braveheart, URL: https://simple.wikipedia.org/wiki/Braveheart (Score: 0.824)\n",
"19. 1124 (Score: 0.824)\n", "19. 1124, URL: https://simple.wikipedia.org/wiki/1124 (Score: 0.824)\n",
"20. July 27 (Score: 0.823)\n" "20. July 27, URL: https://simple.wikipedia.org/wiki/July%2027 (Score: 0.823)\n"
] ]
} }
], ],
@ -690,13 +721,13 @@
"# This time we'll query using content vector\n", "# This time we'll query using content vector\n",
"query_results = query_qdrant('Famous battles in Scottish history', 'Articles', 'content')\n", "query_results = query_qdrant('Famous battles in Scottish history', 'Articles', 'content')\n",
"for i, article in enumerate(query_results):\n", "for i, article in enumerate(query_results):\n",
" print(f'{i + 1}. {article.payload[\"title\"]} (Score: {round(article.score, 3)})')" " print(f'{i + 1}. {article.payload[\"title\"]}, URL: {article.payload['url']} (Score: {round(article.score, 3)})')"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 19,
"id": "0119d87a", "id": "cd4f750dc6daa2e8",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []