updates Q&A example with latest embedding and completion models

This commit is contained in:
Ted Sanders 2023-01-11 08:45:56 -08:00
parent 49fca3f32e
commit df2758ac2e

View File

@ -19,13 +19,14 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import pandas as pd\n",
"import openai\n",
"import numpy as np\n", "import numpy as np\n",
"import openai\n",
"import pandas as pd\n",
"import pickle\n", "import pickle\n",
"from transformers import GPT2TokenizerFast\n", "import tiktoken\n",
"\n", "\n",
"COMPLETIONS_MODEL = \"text-davinci-002\"" "COMPLETIONS_MODEL = \"text-davinci-003\"\n",
"EMBEDDING_MODEL = \"text-embedding-ada-002\""
] ]
}, },
{ {
@ -45,7 +46,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"\"The 2020 Summer Olympics men's high jump was won by Mariusz Przybylski of Poland.\"" "\"Marcelo Chierighini of Brazil won the gold medal in the men's high jump at the 2020 Summer Olympics.\""
] ]
}, },
"execution_count": 2, "execution_count": 2,
@ -60,19 +61,17 @@
" prompt=prompt,\n", " prompt=prompt,\n",
" temperature=0,\n", " temperature=0,\n",
" max_tokens=300,\n", " max_tokens=300,\n",
" top_p=1,\n",
" frequency_penalty=0,\n",
" presence_penalty=0,\n",
" model=COMPLETIONS_MODEL\n", " model=COMPLETIONS_MODEL\n",
")[\"choices\"][0][\"text\"].strip(\" \\n\")" ")[\"choices\"][0][\"text\"].strip(\" \\n\")"
] ]
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"id": "47204cce-a7d5-4c81-ab6e-53323026e08c", "id": "47204cce-a7d5-4c81-ab6e-53323026e08c",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Mariusz Przybylski is a professional footballer from Poland, and not much of a high jumper! Evidently GPT-3 needs some assistance here. \n", "Marcelo is a gold medalist swimmer, and, we assume, not much of a high jumper! Evidently GPT-3 needs some assistance here. \n",
"\n", "\n",
"The first issue to tackle is that the model is hallucinating an answer rather than telling us \"I don't know\". This is bad because it makes it hard to trust the answer that the model gives us! \n", "The first issue to tackle is that the model is hallucinating an answer rather than telling us \"I don't know\". This is bad because it makes it hard to trust the answer that the model gives us! \n",
"\n", "\n",
@ -108,9 +107,6 @@
" prompt=prompt,\n", " prompt=prompt,\n",
" temperature=0,\n", " temperature=0,\n",
" max_tokens=300,\n", " max_tokens=300,\n",
" top_p=1,\n",
" frequency_penalty=0,\n",
" presence_penalty=0,\n",
" model=COMPLETIONS_MODEL\n", " model=COMPLETIONS_MODEL\n",
")[\"choices\"][0][\"text\"].strip(\" \\n\")" ")[\"choices\"][0][\"text\"].strip(\" \\n\")"
] ]
@ -132,7 +128,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"\"Gianmarco Tamberi and Mutaz Essa Barshim won the 2020 Summer Olympics men's high jump.\"" "'Gianmarco Tamberi and Mutaz Essa Barshim emerged as joint winners of the event.'"
] ]
}, },
"execution_count": 4, "execution_count": 4,
@ -245,55 +241,55 @@
" </thead>\n", " </thead>\n",
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>Nordic combined at the 2016 Winter Youth Olympics</th>\n", " <th>Jamaica at the 2020 Summer Olympics</th>\n",
" <th>Summary</th>\n", " <th>Swimming</th>\n",
" <td>Nordic combined at the 2016 Winter Youth Olymp...</td>\n", " <td>Jamaican swimmers further achieved qualifying ...</td>\n",
" <td>56</td>\n", " <td>51</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>Morocco at the 2020 Summer Olympics</th>\n", " <th>Archery at the 2020 Summer Olympics Women's individual</th>\n",
" <th>Judo</th>\n", " <th>Background</th>\n",
" <td>Morocco qualified two female judoka for each o...</td>\n", " <td>This is the 13th consecutive appearance of the...</td>\n",
" <td>106</td>\n", " <td>136</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>Guinea-Bissau at the 2020 Summer Olympics</th>\n", " <th>Germany at the 2020 Summer Olympics</th>\n",
" <th>Wrestling</th>\n", " <th>Sport climbing</th>\n",
" <td>Guinea-Bissau qualified two wrestlers for each...</td>\n", " <td>Germany entered two sport climbers into the Ol...</td>\n",
" <td>69</td>\n", " <td>98</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>Rome bid for the 2020 Summer Olympics</th>\n", " <th>Cycling at the 2020 Summer Olympics Women's BMX racing</th>\n",
" <th>History</th>\n", " <th>Competition format</th>\n",
" <td>The Italian National Olympic Committee (CONI) ...</td>\n", " <td>The competition was a three-round tournament, ...</td>\n",
" <td>738</td>\n", " <td>215</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>Italy at the 2020 Summer Olympics</th>\n", " <th>Volleyball at the 2020 Summer Olympics Men's tournament</th>\n",
" <th>Slalom</th>\n", " <th>Format</th>\n",
" <td>Italian canoeists qualified one boat for each ...</td>\n", " <td>The preliminary round was a competition betwee...</td>\n",
" <td>76</td>\n", " <td>104</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" content \\\n", " content \\\n",
"title heading \n", "title heading \n",
"Nordic combined at the 2016 Winter Youth Olympics Summary Nordic combined at the 2016 Winter Youth Olymp... \n", "Jamaica at the 2020 Summer Olympics Swimming Jamaican swimmers further achieved qualifying ... \n",
"Morocco at the 2020 Summer Olympics Judo Morocco qualified two female judoka for each o... \n", "Archery at the 2020 Summer Olympics Women's i... Background This is the 13th consecutive appearance of the... \n",
"Guinea-Bissau at the 2020 Summer Olympics Wrestling Guinea-Bissau qualified two wrestlers for each... \n", "Germany at the 2020 Summer Olympics Sport climbing Germany entered two sport climbers into the Ol... \n",
"Rome bid for the 2020 Summer Olympics History The Italian National Olympic Committee (CONI) ... \n", "Cycling at the 2020 Summer Olympics Women's B... Competition format The competition was a three-round tournament, ... \n",
"Italy at the 2020 Summer Olympics Slalom Italian canoeists qualified one boat for each ... \n", "Volleyball at the 2020 Summer Olympics Men's ... Format The preliminary round was a competition betwee... \n",
"\n", "\n",
" tokens \n", " tokens \n",
"title heading \n", "title heading \n",
"Nordic combined at the 2016 Winter Youth Olympics Summary 56 \n", "Jamaica at the 2020 Summer Olympics Swimming 51 \n",
"Morocco at the 2020 Summer Olympics Judo 106 \n", "Archery at the 2020 Summer Olympics Women's i... Background 136 \n",
"Guinea-Bissau at the 2020 Summer Olympics Wrestling 69 \n", "Germany at the 2020 Summer Olympics Sport climbing 98 \n",
"Rome bid for the 2020 Summer Olympics History 738 \n", "Cycling at the 2020 Summer Olympics Women's B... Competition format 215 \n",
"Italy at the 2020 Summer Olympics Slalom 76 " "Volleyball at the 2020 Summer Olympics Men's ... Format 104 "
] ]
}, },
"execution_count": 5, "execution_count": 5,
@ -326,36 +322,17 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 6,
"id": "4b874907-5109-4eef-ad9a-add4367925a3",
"metadata": {},
"outputs": [],
"source": [
"MODEL_NAME = \"curie\"\n",
"\n",
"DOC_EMBEDDINGS_MODEL = f\"text-search-{MODEL_NAME}-doc-001\"\n",
"QUERY_EMBEDDINGS_MODEL = f\"text-search-{MODEL_NAME}-query-001\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ba475f30-ef7f-431c-b60d-d5970b62ad09", "id": "ba475f30-ef7f-431c-b60d-d5970b62ad09",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def get_embedding(text: str, model: str) -> list[float]:\n", "def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:\n",
" result = openai.Embedding.create(\n", " result = openai.Embedding.create(\n",
" model=model,\n", " model=model,\n",
" input=text\n", " input=text\n",
" )\n", " )\n",
" return result[\"data\"][0][\"embedding\"]\n", " return result[\"data\"][0][\"embedding\"]\n",
"\n", "\n",
"def get_doc_embedding(text: str) -> list[float]:\n",
" return get_embedding(text, DOC_EMBEDDINGS_MODEL)\n",
"\n",
"def get_query_embedding(text: str) -> list[float]:\n",
" return get_embedding(text, QUERY_EMBEDDINGS_MODEL)\n",
"\n",
"def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:\n", "def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:\n",
" \"\"\"\n", " \"\"\"\n",
" Create an embedding for each row in the dataframe using the OpenAI Embeddings API.\n", " Create an embedding for each row in the dataframe using the OpenAI Embeddings API.\n",
@ -363,13 +340,13 @@
" Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.\n", " Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.\n",
" \"\"\"\n", " \"\"\"\n",
" return {\n", " return {\n",
" idx: get_doc_embedding(r.content.replace(\"\\n\", \" \")) for idx, r in df.iterrows()\n", " idx: get_embedding(r.content) for idx, r in df.iterrows()\n",
" }" " }"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 7,
"id": "737266aa-cbe7-4691-87c1-fce8a31632f1", "id": "737266aa-cbe7-4691-87c1-fce8a31632f1",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -399,7 +376,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 8,
"id": "ab50bfca-cb02-41c6-b338-4400abe1d86e", "id": "ab50bfca-cb02-41c6-b338-4400abe1d86e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -408,12 +385,12 @@
"\n", "\n",
"# ===== OR, uncomment the below line to recaculate the embeddings from scratch. ========\n", "# ===== OR, uncomment the below line to recaculate the embeddings from scratch. ========\n",
"\n", "\n",
"# context_embeddings = compute_doc_embeddings(df)" "# document_embeddings = compute_doc_embeddings(df)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 9,
"id": "b9a8c713-c8a9-47dc-85a4-871ee1395566", "id": "b9a8c713-c8a9-47dc-85a4-871ee1395566",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -421,7 +398,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"('2020 Summer Olympics', 'Summary') : [-0.00089670566, 0.0027141054, -0.00030984893, 0.0066024954, -0.009860336]... (4096 entries)\n" "('2020 Summer Olympics', 'Summary') : [0.0037565305829048, -0.0061981128528714, -0.0087078781798481, -0.0071364338509738, -0.0025227521546185]... (1536 entries)\n"
] ]
} }
], ],
@ -447,15 +424,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 10,
"id": "dcd680e9-f194-4180-b14f-fc357498eb92", "id": "dcd680e9-f194-4180-b14f-fc357498eb92",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def vector_similarity(x: list[float], y: list[float]) -> float:\n", "def vector_similarity(x: list[float], y: list[float]) -> float:\n",
" \"\"\"\n", " \"\"\"\n",
" We could use cosine similarity or dot product to calculate the similarity between vectors.\n", " Returns the similarity between two vectors.\n",
" In practice, we have found it makes little difference. \n", " \n",
" Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.\n",
" \"\"\"\n", " \"\"\"\n",
" return np.dot(np.array(x), np.array(y))\n", " return np.dot(np.array(x), np.array(y))\n",
"\n", "\n",
@ -466,7 +444,7 @@
" \n", " \n",
" Return the list of document sections, sorted by relevance in descending order.\n", " Return the list of document sections, sorted by relevance in descending order.\n",
" \"\"\"\n", " \"\"\"\n",
" query_embedding = get_query_embedding(query)\n", " query_embedding = get_embedding(query)\n",
" \n", " \n",
" document_similarities = sorted([\n", " document_similarities = sorted([\n",
" (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()\n", " (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()\n",
@ -477,26 +455,27 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 11,
"id": "e3a27d73-f47f-480d-b336-079414f749cb", "id": "e3a27d73-f47f-480d-b336-079414f749cb",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[(0.42962625596241333,\n", "[(0.884864308450606,\n",
" (\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')),\n", " (\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')),\n",
" (0.40670511466655435,\n", " (0.8633938355935518,\n",
" (\"Athletics at the 2020 Summer Olympics Women's high jump\", 'Summary')),\n", " (\"Athletics at the 2020 Summer Olympics Men's pole vault\", 'Summary')),\n",
" (0.40469276614514266,\n", " (0.861639730583851,\n",
" (\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Background')),\n", " (\"Athletics at the 2020 Summer Olympics Men's long jump\", 'Summary')),\n",
" (0.4042442976710604,\n", " (0.8560523857031264,\n",
" (\"Athletics at the 2020 Summer Olympics Men's triple jump\", 'Summary')),\n", " (\"Athletics at the 2020 Summer Olympics Men's triple jump\", 'Summary')),\n",
" (0.4021923631988294,\n", " (0.8469039130441247,\n",
" (\"Athletics at the 2020 Summer Olympics Women's long jump\", 'Summary'))]" " (\"Athletics at the 2020 Summer Olympics Men's 110 metres hurdles\",\n",
" 'Summary'))]"
] ]
}, },
"execution_count": 12, "execution_count": 11,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -507,26 +486,27 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 12,
"id": "729c2ce7-8540-4ab2-bb3a-76c4dfcb689c", "id": "729c2ce7-8540-4ab2-bb3a-76c4dfcb689c",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[(0.42879291463492475,\n", "[(0.8726165220223294,\n",
" (\"Athletics at the 2020 Summer Olympics Women's high jump\", 'Summary')),\n",
" (0.4194122846175017,\n",
" (\"Athletics at the 2020 Summer Olympics Women's long jump\", 'Summary')),\n", " (\"Athletics at the 2020 Summer Olympics Women's long jump\", 'Summary')),\n",
" (0.41152657076657995,\n", " (0.8682196158313358,\n",
" (\"Athletics at the 2020 Summer Olympics Women's high jump\", 'Summary')),\n",
" (0.863191526370672,\n",
" (\"Athletics at the 2020 Summer Olympics Women's pole vault\", 'Summary')),\n",
" (0.8609374262115406,\n",
" (\"Athletics at the 2020 Summer Olympics Women's triple jump\", 'Summary')),\n", " (\"Athletics at the 2020 Summer Olympics Women's triple jump\", 'Summary')),\n",
" (0.4096367709206329,\n", " (0.8581515607285688,\n",
" (\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')),\n", " (\"Athletics at the 2020 Summer Olympics Women's 100 metres hurdles\",\n",
" (0.4059521236876147,\n", " 'Summary'))]"
" (\"Athletics at the 2020 Summer Olympics Women's pole vault\", 'Summary'))]"
] ]
}, },
"execution_count": 13, "execution_count": 12,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -536,11 +516,12 @@
] ]
}, },
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"id": "3cf71fae-abb1-46b2-a483-c1b2f1a915c2", "id": "3cf71fae-abb1-46b2-a483-c1b2f1a915c2",
"metadata": {}, "metadata": {},
"source": [ "source": [
"We can see that the most relevant document sections for each question are the summaries for the Men's and Women's high jump competitions - which is exactly what we would expect." "We can see that the most relevant document sections for each question include the summaries for the Men's and Women's high jump competitions - which is exactly what we would expect."
] ]
}, },
{ {
@ -555,7 +536,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 13,
"id": "b763ace2-1946-48e0-8ff1-91ba335d47a0", "id": "b763ace2-1946-48e0-8ff1-91ba335d47a0",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -565,7 +546,7 @@
"'Context separator contains 3 tokens'" "'Context separator contains 3 tokens'"
] ]
}, },
"execution_count": 14, "execution_count": 13,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -573,16 +554,17 @@
"source": [ "source": [
"MAX_SECTION_LEN = 500\n", "MAX_SECTION_LEN = 500\n",
"SEPARATOR = \"\\n* \"\n", "SEPARATOR = \"\\n* \"\n",
"ENCODING = \"cl100k_base\" # encoding for text-embedding-ada-002\n",
"\n", "\n",
"tokenizer = GPT2TokenizerFast.from_pretrained(\"gpt2\")\n", "encoding = tiktoken.get_encoding(ENCODING)\n",
"separator_len = len(tokenizer.tokenize(SEPARATOR))\n", "separator_len = len(encoding.encode(SEPARATOR))\n",
"\n", "\n",
"f\"Context separator contains {separator_len} tokens\"" "f\"Context separator contains {separator_len} tokens\""
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 14,
"id": "0c5c0509-eeb9-4552-a5d4-6ace04ef73dd", "id": "0c5c0509-eeb9-4552-a5d4-6ace04ef73dd",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -619,7 +601,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 15,
"id": "f614045a-3917-4b28-9643-7e0c299ec1a7", "id": "f614045a-3917-4b28-9643-7e0c299ec1a7",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -627,18 +609,16 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Selected 3 document sections:\n", "Selected 2 document sections:\n",
"(\"Athletics at the 2020 Summer Olympics Women's high jump\", 'Summary')\n",
"(\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')\n", "(\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')\n",
"(\"Athletics at the 2020 Summer Olympics Men's triple jump\", 'Summary')\n", "(\"Athletics at the 2020 Summer Olympics Men's long jump\", 'Summary')\n",
"===\n", "===\n",
" Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say \"I don't know.\"\n", " Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say \"I don't know.\"\n",
"\n", "\n",
"Context:\n", "Context:\n",
"\n", "\n",
"* The women's high jump event at the 2020 Summer Olympics took place on 5 and 7 August 2021 at the Japan National Stadium. Even though 32 athletes qualified through the qualification system for the Games, only 31 took part in the competition. This was the 22nd appearance of the event, having appeared at every Olympics since women's athletics was introduced in 1928.\n",
"* The men's high jump event at the 2020 Summer Olympics took place between 30 July and 1 August 2021 at the Olympic Stadium. 33 athletes from 24 nations competed; the total possible number depended on how many nations would use universality places to enter athletes in addition to the 32 qualifying through mark or ranking (no universality places were used in 2021). Italian athlete Gianmarco Tamberi along with Qatari athlete Mutaz Essa Barshim emerged as joint winners of the event following a tie between both of them as they cleared 2.37m. Both Tamberi and Barshim agreed to share the gold medal in a rare instance where the athletes of different nations had agreed to share the same medal in the history of Olympics. Barshim in particular was heard to ask a competition official \"Can we have two golds?\" in response to being offered a 'jump off'. Maksim Nedasekau of Belarus took bronze. The medals were the first ever in the men's high jump for Italy and Belarus, the first gold in the men's high jump for Italy and Qatar, and the third consecutive medal in the men's high jump for Qatar (all by Barshim). Barshim became only the second man to earn three medals in high jump, joining Patrik Sjöberg of Sweden (1984 to 1992).\n", "* The men's high jump event at the 2020 Summer Olympics took place between 30 July and 1 August 2021 at the Olympic Stadium. 33 athletes from 24 nations competed; the total possible number depended on how many nations would use universality places to enter athletes in addition to the 32 qualifying through mark or ranking (no universality places were used in 2021). Italian athlete Gianmarco Tamberi along with Qatari athlete Mutaz Essa Barshim emerged as joint winners of the event following a tie between both of them as they cleared 2.37m. Both Tamberi and Barshim agreed to share the gold medal in a rare instance where the athletes of different nations had agreed to share the same medal in the history of Olympics. Barshim in particular was heard to ask a competition official \"Can we have two golds?\" in response to being offered a 'jump off'. Maksim Nedasekau of Belarus took bronze. The medals were the first ever in the men's high jump for Italy and Belarus, the first gold in the men's high jump for Italy and Qatar, and the third consecutive medal in the men's high jump for Qatar (all by Barshim). Barshim became only the second man to earn three medals in high jump, joining Patrik Sjöberg of Sweden (1984 to 1992).\n",
"* The men's triple jump event at the 2020 Summer Olympics took place between 3 and 5 August 2021 at the Japan National Stadium. Approximately 35 athletes were expected to compete; the exact number was dependent on how many nations use universality places to enter athletes in addition to the 32 qualifying through time or ranking (2 universality places were used in 2016). 32 athletes from 19 nations competed. Pedro Pichardo of Portugal won the gold medal, the nation's second victory in the men's triple jump (after Nelson Évora in 2008). China's Zhu Yaming took silver, while Hugues Fabrice Zango earned Burkina Faso's first Olympic medal in any event.\n", "* The men's long jump event at the 2020 Summer Olympics took place between 31 July and 2 August 2021 at the Japan National Stadium. Approximately 35 athletes were expected to compete; the exact number was dependent on how many nations use universality places to enter athletes in addition to the 32 qualifying through time or ranking (1 universality place was used in 2016). 31 athletes from 20 nations competed. Miltiadis Tentoglou won the gold medal, Greece's first medal in the men's long jump. Cuban athletes Juan Miguel Echevarría and Maykel Massó earned silver and bronze, respectively, the nation's first medals in the event since 2008.\n",
"\n", "\n",
" Q: Who won the 2020 Summer Olympics men's high jump?\n", " Q: Who won the 2020 Summer Olympics men's high jump?\n",
" A:\n" " A:\n"
@ -671,7 +651,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 16,
"id": "b0edfec7-9243-4573-92e0-253d31c771ad", "id": "b0edfec7-9243-4573-92e0-253d31c771ad",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -686,7 +666,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 17,
"id": "9c1c9a69-848e-4099-a90d-c8da36c153d5", "id": "9c1c9a69-848e-4099-a90d-c8da36c153d5",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -716,7 +696,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 18,
"id": "c233e449-bf33-4c9e-b095-6a4dd278c8fd", "id": "c233e449-bf33-4c9e-b095-6a4dd278c8fd",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -724,19 +704,18 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Selected 3 document sections:\n", "Selected 2 document sections:\n",
"(\"Athletics at the 2020 Summer Olympics Women's high jump\", 'Summary')\n",
"(\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')\n", "(\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')\n",
"(\"Athletics at the 2020 Summer Olympics Men's triple jump\", 'Summary')\n" "(\"Athletics at the 2020 Summer Olympics Men's long jump\", 'Summary')\n"
] ]
}, },
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"'Gianmarco Tamberi and Mutaz Essa Barshim emerged as joint winners of the event following a tie between both of them as they cleared 2.37m.'" "'Gianmarco Tamberi and Mutaz Essa Barshim emerged as joint winners of the event following a tie between both of them as they cleared 2.37m. Both Tamberi and Barshim agreed to share the gold medal.'"
] ]
}, },
"execution_count": 19, "execution_count": 18,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -761,7 +740,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 19,
"id": "1127867b-2884-44bb-9439-0e8ae171c835", "id": "1127867b-2884-44bb-9439-0e8ae171c835",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -770,7 +749,7 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Selected 1 document sections:\n", "Selected 1 document sections:\n",
"('2020 Summer Olympics', 'Postponement to 2021')\n", "('Concerns and controversies at the 2020 Summer Olympics', 'Summary')\n",
"\n", "\n",
"Q: Why was the 2020 Summer Olympics originally postponed?\n", "Q: Why was the 2020 Summer Olympics originally postponed?\n",
"A: The 2020 Summer Olympics were originally postponed due to the COVID-19 pandemic.\n" "A: The 2020 Summer Olympics were originally postponed due to the COVID-19 pandemic.\n"
@ -786,7 +765,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 20,
"id": "720d9e0b-b189-4101-91ee-babf736199e6", "id": "720d9e0b-b189-4101-91ee-babf736199e6",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -794,8 +773,9 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Selected 1 document sections:\n", "Selected 2 document sections:\n",
"('2020 Summer Olympics medal table', 'Summary')\n", "('2020 Summer Olympics medal table', 'Summary')\n",
"('List of 2020 Summer Olympics medal winners', 'Summary')\n",
"\n", "\n",
"Q: In the 2020 Summer Olympics, how many gold medals did the country which won the most medals win?\n", "Q: In the 2020 Summer Olympics, how many gold medals did the country which won the most medals win?\n",
"A: The United States won the most medals overall, with 113, and the most gold medals, with 39.\n" "A: The United States won the most medals overall, with 113, and the most gold medals, with 39.\n"
@ -811,7 +791,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 21,
"id": "4e8e51cc-e4eb-4557-9e09-2929d4df5b7f", "id": "4e8e51cc-e4eb-4557-9e09-2929d4df5b7f",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -819,13 +799,12 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Selected 3 document sections:\n", "Selected 2 document sections:\n",
"(\"Athletics at the 2020 Summer Olympics Men's shot put\", 'Summary')\n", "(\"Athletics at the 2020 Summer Olympics Men's shot put\", 'Summary')\n",
"(\"Athletics at the 2020 Summer Olympics Men's shot put\", 'Background')\n", "(\"Athletics at the 2020 Summer Olympics Men's discus throw\", 'Summary')\n",
"(\"Athletics at the 2020 Summer Olympics Men's hammer throw\", 'Competition format')\n",
"\n", "\n",
"Q: What was unusual about the mens shotput competition?\n", "Q: What was unusual about the mens shotput competition?\n",
"A: The same three competitors received the same medals in back-to-back editions of an the same individual event.\n" "A: The same three competitors received the same medals in back-to-back editions of the same individual event.\n"
] ]
} }
], ],
@ -838,7 +817,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 22,
"id": "37c83519-e3c6-4c44-8b4a-98cbb3a5f5ba", "id": "37c83519-e3c6-4c44-8b4a-98cbb3a5f5ba",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -846,11 +825,12 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Selected 1 document sections:\n", "Selected 2 document sections:\n",
"('Italy at the 2020 Summer Olympics', 'Summary')\n", "('Italy at the 2020 Summer Olympics', 'Summary')\n",
"('San Marino at the 2020 Summer Olympics', 'Summary')\n",
"\n", "\n",
"Q: In the 2020 Summer Olympics, how many silver medals did Italy win?\n", "Q: In the 2020 Summer Olympics, how many silver medals did Italy win?\n",
"A: 10\n" "A: 10 silver medals.\n"
] ]
} }
], ],
@ -871,7 +851,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 23,
"id": "26a1a9ef-e1ee-4f80-a1b1-6164ccfa5bac", "id": "26a1a9ef-e1ee-4f80-a1b1-6164ccfa5bac",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -879,10 +859,11 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Selected 3 document sections:\n", "Selected 4 document sections:\n",
"('France at the 2020 Summer Olympics', 'Taekwondo')\n", "('France at the 2020 Summer Olympics', 'Taekwondo')\n",
"('2020 Summer Olympics medal table', 'Medal count')\n",
"('Taekwondo at the 2020 Summer Olympics Qualification', 'Qualification summary')\n", "('Taekwondo at the 2020 Summer Olympics Qualification', 'Qualification summary')\n",
"('2020 Summer Olympics medal table', 'Medal count')\n",
"(\"Taekwondo at the 2020 Summer Olympics Men's 80 kg\", 'Competition format')\n",
"\n", "\n",
"Q: What is the total number of medals won by France, multiplied by the number of Taekwondo medals given out to all countries?\n", "Q: What is the total number of medals won by France, multiplied by the number of Taekwondo medals given out to all countries?\n",
"A: I don't know.\n" "A: I don't know.\n"
@ -898,7 +879,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "execution_count": 24,
"id": "9fba8a63-eb81-4661-ae17-59bb5e2933d6", "id": "9fba8a63-eb81-4661-ae17-59bb5e2933d6",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -906,14 +887,10 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Selected 7 document sections:\n", "Selected 3 document sections:\n",
"('Chile at the 2020 Summer Olympics', 'Mountain biking')\n", "(\"Sport climbing at the 2020 Summer Olympics Men's combined\", 'Route-setting')\n",
"('South Korea at the 2020 Summer Olympics', 'Sport climbing')\n",
"(\"Cycling at the 2020 Summer Olympics Men's cross-country\", 'Competition format')\n",
"(\"Ski mountaineering at the 2020 Winter Youth Olympics Boys' individual\", 'Summary')\n", "(\"Ski mountaineering at the 2020 Winter Youth Olympics Boys' individual\", 'Summary')\n",
"(\"Cycling at the 2020 Summer Olympics Women's cross-country\", 'Competition format')\n", "(\"Ski mountaineering at the 2020 Winter Youth Olympics Girls' individual\", 'Summary')\n",
"('Portugal at the 2020 Summer Olympics', 'Mountain biking')\n",
"('Slovenia at the 2020 Summer Olympics', 'Mountain biking')\n",
"\n", "\n",
"Q: What is the tallest mountain in the world?\n", "Q: What is the tallest mountain in the world?\n",
"A: I don't know.\n" "A: I don't know.\n"
@ -929,7 +906,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 25,
"id": "2d4c693b-cdb9-4f4c-bd1b-f77b29097a1f", "id": "2d4c693b-cdb9-4f4c-bd1b-f77b29097a1f",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -937,15 +914,9 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Selected 8 document sections:\n", "Selected 2 document sections:\n",
"(\"Gymnastics at the 2020 Summer Olympics Women's trampoline\", 'Summary')\n", "(\"Gymnastics at the 2020 Summer Olympics Women's trampoline\", 'Summary')\n",
"(\"Rowing at the 2020 Summer Olympics Women's quadruple sculls\", 'Summary')\n", "('Equestrian at the 2020 Summer Olympics Team jumping', 'Summary')\n",
"(\"Cycling at the 2020 Summer Olympics Women's sprint\", 'Summary')\n",
"(\"Cycling at the 2020 Summer Olympics Women's team sprint\", 'Summary')\n",
"(\"Wrestling at the 2020 Summer Olympics Women's freestyle 62 kg\", 'Summary')\n",
"(\"Cycling at the 2020 Summer Olympics Women's BMX freestyle\", 'Summary')\n",
"(\"Rowing at the 2020 Summer Olympics Women's lightweight double sculls\", 'Summary')\n",
"(\"Wrestling at the 2020 Summer Olympics Women's freestyle 68 kg\", 'Summary')\n",
"\n", "\n",
"Q: Who won the grimblesplatch competition at the 2020 Summer Olympic games?\n", "Q: Who won the grimblesplatch competition at the 2020 Summer Olympic games?\n",
"A: I don't know.\n" "A: I don't know.\n"