Fix styling for using_logprobs cookbook (#947)

This commit is contained in:
Shyamal H Anadkat 2023-12-20 17:08:59 -08:00 committed by GitHub
parent 1abc529895
commit 228cde1c46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -41,22 +41,22 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 264,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from openai import OpenAI\n", "from openai import OpenAI\n",
"from math import exp\n", "from math import exp\n",
"import numpy as np\n", "import numpy as np\n",
"from colorama import init, Fore\n", "from IPython.display import display, HTML\n",
"\n", "\n",
"\n", "\n",
"client = OpenAI()\n" "client = OpenAI()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 265,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -85,7 +85,7 @@
" params[\"tools\"] = tools\n", " params[\"tools\"] = tools\n",
"\n", "\n",
" completion = client.chat.completions.create(**params)\n", " completion = client.chat.completions.create(**params)\n",
" return completion\n" " return completion"
] ]
}, },
{ {
@ -113,7 +113,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 266,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -133,7 +133,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 267,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -146,7 +146,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 268,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -175,7 +175,7 @@
" [{\"role\": \"user\", \"content\": CLASSIFICATION_PROMPT.format(headline=headline)}],\n", " [{\"role\": \"user\", \"content\": CLASSIFICATION_PROMPT.format(headline=headline)}],\n",
" model=\"gpt-4\",\n", " model=\"gpt-4\",\n",
" )\n", " )\n",
" print(f\"Category: {API_RESPONSE.choices[0].message.content}\\n\")\n" " print(f\"Category: {API_RESPONSE.choices[0].message.content}\\n\")"
] ]
}, },
{ {
@ -187,7 +187,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 269,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -195,21 +195,69 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\n", "\n",
"Headline: Tech Giant Unveils Latest Smartphone Model with Advanced Photo-Editing Features.\n", "Headline: Tech Giant Unveils Latest Smartphone Model with Advanced Photo-Editing Features.\n"
"\u001b[36mOutput token 1:\u001b[39m Technology, \u001b[33mlogprobs:\u001b[39m -1.9816675e-06, \u001b[35mlinear probability:\u001b[39m 100.0%\n", ]
"\u001b[36mOutput token 2:\u001b[39m Techn, \u001b[33mlogprobs:\u001b[39m -14.062502, \u001b[35mlinear probability:\u001b[39m 0.0%\n", },
{
"data": {
"text/html": [
"<span style='color: cyan'>Output token 1:</span> Technology, <span style='color: darkorange'>logprobs:</span> -2.4584822e-06, <span style='color: magenta'>linear probability:</span> 100.0%<br><span style='color: cyan'>Output token 2:</span> Techn, <span style='color: darkorange'>logprobs:</span> -13.781253, <span style='color: magenta'>linear probability:</span> 0.0%<br>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n", "\n",
"\n", "\n",
"\n", "\n",
"Headline: Local Mayor Launches Initiative to Enhance Urban Public Transport.\n", "Headline: Local Mayor Launches Initiative to Enhance Urban Public Transport.\n"
"\u001b[36mOutput token 1:\u001b[39m Politics, \u001b[33mlogprobs:\u001b[39m -3.650519e-06, \u001b[35mlinear probability:\u001b[39m 100.0%\n", ]
"\u001b[36mOutput token 2:\u001b[39m Technology, \u001b[33mlogprobs:\u001b[39m -13.015629, \u001b[35mlinear probability:\u001b[39m 0.0%\n", },
{
"data": {
"text/html": [
"<span style='color: cyan'>Output token 1:</span> Politics, <span style='color: darkorange'>logprobs:</span> -2.4584822e-06, <span style='color: magenta'>linear probability:</span> 100.0%<br><span style='color: cyan'>Output token 2:</span> Technology, <span style='color: darkorange'>logprobs:</span> -13.937503, <span style='color: magenta'>linear probability:</span> 0.0%<br>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n", "\n",
"\n", "\n",
"\n", "\n",
"Headline: Tennis Champion Showcases Hidden Talents in Symphony Orchestra Debut\n", "Headline: Tennis Champion Showcases Hidden Talents in Symphony Orchestra Debut\n"
"\u001b[36mOutput token 1:\u001b[39m Art, \u001b[33mlogprobs:\u001b[39m -0.19579042, \u001b[35mlinear probability:\u001b[39m 82.22%\n", ]
"\u001b[36mOutput token 2:\u001b[39m Sports, \u001b[33mlogprobs:\u001b[39m -1.7270404, \u001b[35mlinear probability:\u001b[39m 17.78%\n", },
{
"data": {
"text/html": [
"<span style='color: cyan'>Output token 1:</span> Art, <span style='color: darkorange'>logprobs:</span> -0.009169078, <span style='color: magenta'>linear probability:</span> 99.09%<br><span style='color: cyan'>Output token 2:</span> Sports, <span style='color: darkorange'>logprobs:</span> -4.696669, <span style='color: magenta'>linear probability:</span> 0.91%<br>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n", "\n",
"\n" "\n"
] ]
@ -225,13 +273,15 @@
" top_logprobs=2,\n", " top_logprobs=2,\n",
" )\n", " )\n",
" top_two_logprobs = API_RESPONSE.choices[0].logprobs.content[0].top_logprobs\n", " top_two_logprobs = API_RESPONSE.choices[0].logprobs.content[0].top_logprobs\n",
" html_content = \"\"\n",
" for i, logprob in enumerate(top_two_logprobs, start=1):\n", " for i, logprob in enumerate(top_two_logprobs, start=1):\n",
" print(\n", " html_content += (\n",
" f\"{Fore.CYAN}Output token {i}:{Fore.RESET} {logprob.token}, \"\n", " f\"<span style='color: cyan'>Output token {i}:</span> {logprob.token}, \"\n",
" f\"{Fore.YELLOW}logprobs:{Fore.RESET} {logprob.logprob}, \"\n", " f\"<span style='color: darkorange'>logprobs:</span> {logprob.logprob}, \"\n",
" f\"{Fore.MAGENTA}linear probability:{Fore.RESET} {np.round(np.exp(logprob.logprob)*100,2)}%\"\n", " f\"<span style='color: magenta'>linear probability:</span> {np.round(np.exp(logprob.logprob)*100,2)}%<br>\"\n",
" )\n", " )\n",
" print(\"\\n\")\n" " display(HTML(html_content))\n",
" print(\"\\n\")"
] ]
}, },
{ {
@ -266,7 +316,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 270,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -289,7 +339,7 @@
"medium_questions = [\n", "medium_questions = [\n",
" \"Did Lovelace collaborate with Charles Dickens\",\n", " \"Did Lovelace collaborate with Charles Dickens\",\n",
" \"What concepts did Lovelace build with Charles Babbage\",\n", " \"What concepts did Lovelace build with Charles Babbage\",\n",
"]\n" "]"
] ]
}, },
{ {
@ -301,7 +351,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 271,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -314,36 +364,25 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 272,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "data": {
"output_type": "stream", "text/html": [
"text": [ "Questions clearly answered in article<p style=\"color:green\">Question: What nationality was Ada Lovelace?</p><p style=\"color:cyan\">has_sufficient_context_for_answer: True, <span style=\"color:darkorange\">logprobs: -3.1281633e-07, <span style=\"color:magenta\">linear probability: 100.0%</span></p><p style=\"color:green\">Question: What was an important finding from Lovelace's seventh note?</p><p style=\"color:cyan\">has_sufficient_context_for_answer: True, <span style=\"color:darkorange\">logprobs: -7.89631e-07, <span style=\"color:magenta\">linear probability: 100.0%</span></p>Questions only partially covered in the article<p style=\"color:green\">Question: Did Lovelace collaborate with Charles Dickens</p><p style=\"color:cyan\">has_sufficient_context_for_answer: True, <span style=\"color:darkorange\">logprobs: -0.06993677, <span style=\"color:magenta\">linear probability: 93.25%</span></p><p style=\"color:green\">Question: What concepts did Lovelace build with Charles Babbage</p><p style=\"color:cyan\">has_sufficient_context_for_answer: False, <span style=\"color:darkorange\">logprobs: -0.61807257, <span style=\"color:magenta\">linear probability: 53.9%</span></p>"
"Questions clearly answered in article\n", ],
"\n", "text/plain": [
"\u001b[32mQuestion:\u001b[39m What nationality was Ada Lovelace?\n", "<IPython.core.display.HTML object>"
"\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -3.1281633e-07, \u001b[35mlinear probability:\u001b[39m 100.0% \n", ]
"\n", },
"\u001b[32mQuestion:\u001b[39m What was an important finding from Lovelace's seventh note?\n", "metadata": {},
"\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -6.704273e-07, \u001b[35mlinear probability:\u001b[39m 100.0% \n", "output_type": "display_data"
"\n",
"\n",
"\n",
"Questions only partially covered in the article\n",
"\n",
"\u001b[32mQuestion:\u001b[39m Did Lovelace collaborate with Charles Dickens\n",
"\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -0.07655343, \u001b[35mlinear probability:\u001b[39m 92.63% \n",
"\n",
"\u001b[32mQuestion:\u001b[39m What concepts did Lovelace build with Charles Babbage\n",
"\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -0.0699371, \u001b[35mlinear probability:\u001b[39m 93.25% \n",
"\n"
]
} }
], ],
"source": [ "source": [
"print(\"Questions clearly answered in article\" + \"\\n\")\n", "html_output = \"\"\n",
"html_output += \"Questions clearly answered in article\"\n",
"\n", "\n",
"for question in easy_questions:\n", "for question in easy_questions:\n",
" API_RESPONSE = get_completion(\n", " API_RESPONSE = get_completion(\n",
@ -358,14 +397,11 @@
" model=\"gpt-4\",\n", " model=\"gpt-4\",\n",
" logprobs=True,\n", " logprobs=True,\n",
" )\n", " )\n",
" print(Fore.GREEN + \"Question:\" + Fore.RESET, question)\n", " html_output += f'<p style=\"color:green\">Question: {question}</p>'\n",
" for logprob in API_RESPONSE.choices[0].logprobs.content:\n", " for logprob in API_RESPONSE.choices[0].logprobs.content:\n",
" print(\n", " html_output += f'<p style=\"color:cyan\">has_sufficient_context_for_answer: {logprob.token}, <span style=\"color:darkorange\">logprobs: {logprob.logprob}, <span style=\"color:magenta\">linear probability: {np.round(np.exp(logprob.logprob)*100,2)}%</span></p>'\n",
" Fore.CYAN + \"has_sufficient_context_for_answer:\" + Fore.RESET + f\" {logprob.token}, \" + Fore.YELLOW + \"logprobs:\" + Fore.RESET + f\" {logprob.logprob}, \" + Fore.MAGENTA + \"linear probability:\" + Fore.RESET + f\" {np.round(np.exp(logprob.logprob)*100,2)}%\",\n",
" \"\\n\",\n",
" )\n",
"\n", "\n",
"print(\"\\n\\n\" + \"Questions only partially covered in the article\" + \"\\n\")\n", "html_output += \"Questions only partially covered in the article\"\n",
"\n", "\n",
"for question in medium_questions:\n", "for question in medium_questions:\n",
" API_RESPONSE = get_completion(\n", " API_RESPONSE = get_completion(\n",
@ -381,12 +417,11 @@
" logprobs=True,\n", " logprobs=True,\n",
" top_logprobs=3,\n", " top_logprobs=3,\n",
" )\n", " )\n",
" print(Fore.GREEN + \"Question:\" + Fore.RESET, question)\n", " html_output += f'<p style=\"color:green\">Question: {question}</p>'\n",
" for logprob in API_RESPONSE.choices[0].logprobs.content:\n", " for logprob in API_RESPONSE.choices[0].logprobs.content:\n",
" print(\n", " html_output += f'<p style=\"color:cyan\">has_sufficient_context_for_answer: {logprob.token}, <span style=\"color:darkorange\">logprobs: {logprob.logprob}, <span style=\"color:magenta\">linear probability: {np.round(np.exp(logprob.logprob)*100,2)}%</span></p>'\n",
" Fore.CYAN + \"has_sufficient_context_for_answer:\" + Fore.RESET + f\" {logprob.token}, \" + Fore.YELLOW + \"logprobs:\" + Fore.RESET + f\" {logprob.logprob}, \" + Fore.MAGENTA + \"linear probability:\" + Fore.RESET + f\" {np.round(np.exp(logprob.logprob)*100,2)}%\",\n", "\n",
" \"\\n\",\n", "display(HTML(html_output))"
" )\n"
] ]
}, },
{ {
@ -421,7 +456,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 273,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -433,7 +468,7 @@
" \"My least favorite TV show\",\n", " \"My least favorite TV show\",\n",
" \"My least favorite TV show is\",\n", " \"My least favorite TV show is\",\n",
" \"My least favorite TV show is Breaking Bad\",\n", " \"My least favorite TV show is Breaking Bad\",\n",
"]\n" "]"
] ]
}, },
{ {
@ -445,61 +480,26 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 274,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "data": {
"output_type": "stream", "text/html": [
"text": [ "<p>Sentence: My</p><p style=\"color:cyan\">Predicted next token: favorite, <span style=\"color:darkorange\">logprobs: -0.18245785, <span style=\"color:magenta\">linear probability: 83.32%</span></p><p style=\"color:cyan\">Predicted next token: dog, <span style=\"color:darkorange\">logprobs: -2.397172, <span style=\"color:magenta\">linear probability: 9.1%</span></p><p style=\"color:cyan\">Predicted next token: ap, <span style=\"color:darkorange\">logprobs: -3.8732424, <span style=\"color:magenta\">linear probability: 2.08%</span></p><br><p>Sentence: My least</p><p style=\"color:cyan\">Predicted next token: favorite, <span style=\"color:darkorange\">logprobs: -0.0146376295, <span style=\"color:magenta\">linear probability: 98.55%</span></p><p style=\"color:cyan\">Predicted next token: My, <span style=\"color:darkorange\">logprobs: -4.2417912, <span style=\"color:magenta\">linear probability: 1.44%</span></p><p style=\"color:cyan\">Predicted next token: favorite, <span style=\"color:darkorange\">logprobs: -9.748788, <span style=\"color:magenta\">linear probability: 0.01%</span></p><br><p>Sentence: My least favorite</p><p style=\"color:cyan\">Predicted next token: food, <span style=\"color:darkorange\">logprobs: -0.9481721, <span style=\"color:magenta\">linear probability: 38.74%</span></p><p style=\"color:cyan\">Predicted next token: My, <span style=\"color:darkorange\">logprobs: -1.3447137, <span style=\"color:magenta\">linear probability: 26.06%</span></p><p style=\"color:cyan\">Predicted next token: color, <span style=\"color:darkorange\">logprobs: -1.3887696, <span style=\"color:magenta\">linear probability: 24.94%</span></p><br><p>Sentence: My least favorite TV</p><p style=\"color:cyan\">Predicted next token: show, <span style=\"color:darkorange\">logprobs: -0.0007898556, <span style=\"color:magenta\">linear probability: 99.92%</span></p><p style=\"color:cyan\">Predicted next token: My, <span style=\"color:darkorange\">logprobs: -7.711523, <span style=\"color:magenta\">linear probability: 0.04%</span></p><p style=\"color:cyan\">Predicted next token: series, <span style=\"color:darkorange\">logprobs: -9.348547, <span style=\"color:magenta\">linear probability: 0.01%</span></p><br><p>Sentence: My least favorite TV show</p><p style=\"color:cyan\">Predicted next token: is, <span style=\"color:darkorange\">logprobs: -0.2851253, <span style=\"color:magenta\">linear probability: 75.19%</span></p><p style=\"color:cyan\">Predicted next token: of, <span style=\"color:darkorange\">logprobs: -1.55335, <span style=\"color:magenta\">linear probability: 21.15%</span></p><p style=\"color:cyan\">Predicted next token: My, <span style=\"color:darkorange\">logprobs: -3.4928775, <span style=\"color:magenta\">linear probability: 3.04%</span></p><br><p>Sentence: My least favorite TV show is</p><p style=\"color:cyan\">Predicted next token: \"My, <span style=\"color:darkorange\">logprobs: -0.69349754, <span style=\"color:magenta\">linear probability: 49.98%</span></p><p style=\"color:cyan\">Predicted next token: \"The, <span style=\"color:darkorange\">logprobs: -1.2899293, <span style=\"color:magenta\">linear probability: 27.53%</span></p><p style=\"color:cyan\">Predicted next token: My, <span style=\"color:darkorange\">logprobs: -2.4170141, <span style=\"color:magenta\">linear probability: 8.92%</span></p><br><p>Sentence: My least favorite TV show is Breaking Bad</p><p style=\"color:cyan\">Predicted next token: because, <span style=\"color:darkorange\">logprobs: -0.17786823, <span style=\"color:magenta\">linear probability: 83.71%</span></p><p style=\"color:cyan\">Predicted next token: ,, <span style=\"color:darkorange\">logprobs: -2.3946173, <span style=\"color:magenta\">linear probability: 9.12%</span></p><p style=\"color:cyan\">Predicted next token: ., <span style=\"color:darkorange\">logprobs: -3.1861975, <span style=\"color:magenta\">linear probability: 4.13%</span></p><br>"
"\u001b[36mSentence:\u001b[39m My\n", ],
"\u001b[36mPredicted next token:\u001b[39m favorite, \u001b[33mlogprobs:\u001b[39m -0.18245785, \u001b[35mlinear probability:\u001b[39m 83.32%\n", "text/plain": [
"\u001b[36mPredicted next token:\u001b[39m dog, \u001b[33mlogprobs:\u001b[39m -2.397172, \u001b[35mlinear probability:\u001b[39m 9.1%\n", "<IPython.core.display.HTML object>"
"\u001b[36mPredicted next token:\u001b[39m ap, \u001b[33mlogprobs:\u001b[39m -3.8732424, \u001b[35mlinear probability:\u001b[39m 2.08%\n", ]
"\n", },
"\n", "metadata": {},
"\u001b[36mSentence:\u001b[39m My least\n", "output_type": "display_data"
"\u001b[36mPredicted next token:\u001b[39m favorite, \u001b[33mlogprobs:\u001b[39m -0.01722952, \u001b[35mlinear probability:\u001b[39m 98.29%\n",
"\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -4.079079, \u001b[35mlinear probability:\u001b[39m 1.69%\n",
"\u001b[36mPredicted next token:\u001b[39m favorite, \u001b[33mlogprobs:\u001b[39m -9.6813755, \u001b[35mlinear probability:\u001b[39m 0.01%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least favorite\n",
"\u001b[36mPredicted next token:\u001b[39m food, \u001b[33mlogprobs:\u001b[39m -0.9481721, \u001b[35mlinear probability:\u001b[39m 38.74%\n",
"\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -1.3447137, \u001b[35mlinear probability:\u001b[39m 26.06%\n",
"\u001b[36mPredicted next token:\u001b[39m color, \u001b[33mlogprobs:\u001b[39m -1.3887696, \u001b[35mlinear probability:\u001b[39m 24.94%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least favorite TV\n",
"\u001b[36mPredicted next token:\u001b[39m show, \u001b[33mlogprobs:\u001b[39m -0.0007898556, \u001b[35mlinear probability:\u001b[39m 99.92%\n",
"\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -7.711523, \u001b[35mlinear probability:\u001b[39m 0.04%\n",
"\u001b[36mPredicted next token:\u001b[39m series, \u001b[33mlogprobs:\u001b[39m -9.348547, \u001b[35mlinear probability:\u001b[39m 0.01%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least favorite TV show\n",
"\u001b[36mPredicted next token:\u001b[39m is, \u001b[33mlogprobs:\u001b[39m -0.18602066, \u001b[35mlinear probability:\u001b[39m 83.03%\n",
"\u001b[36mPredicted next token:\u001b[39m of, \u001b[33mlogprobs:\u001b[39m -2.0780265, \u001b[35mlinear probability:\u001b[39m 12.52%\n",
"\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -3.271426, \u001b[35mlinear probability:\u001b[39m 3.8%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least favorite TV show is\n",
"\u001b[36mPredicted next token:\u001b[39m \"My, \u001b[33mlogprobs:\u001b[39m -0.77423567, \u001b[35mlinear probability:\u001b[39m 46.11%\n",
"\u001b[36mPredicted next token:\u001b[39m \"The, \u001b[33mlogprobs:\u001b[39m -1.2854586, \u001b[35mlinear probability:\u001b[39m 27.65%\n",
"\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -2.2629042, \u001b[35mlinear probability:\u001b[39m 10.4%\n",
"\n",
"\n",
"\u001b[36mSentence:\u001b[39m My least favorite TV show is Breaking Bad\n",
"\u001b[36mPredicted next token:\u001b[39m because, \u001b[33mlogprobs:\u001b[39m -0.16519119, \u001b[35mlinear probability:\u001b[39m 84.77%\n",
"\u001b[36mPredicted next token:\u001b[39m ,, \u001b[33mlogprobs:\u001b[39m -2.430881, \u001b[35mlinear probability:\u001b[39m 8.8%\n",
"\u001b[36mPredicted next token:\u001b[39m ., \u001b[33mlogprobs:\u001b[39m -3.2097907, \u001b[35mlinear probability:\u001b[39m 4.04%\n",
"\n",
"\n"
]
} }
], ],
"source": [ "source": [
"high_prob_completions = {}\n", "high_prob_completions = {}\n",
"low_prob_completions = {}\n", "low_prob_completions = {}\n",
"html_output = \"\"\n",
"\n", "\n",
"for sentence in sentence_list:\n", "for sentence in sentence_list:\n",
" PROMPT = \"\"\"Complete this sentence. You are acting as auto-complete. Simply complete the sentence to the best of your ability, make sure it is just ONE sentence: {sentence}\"\"\"\n", " PROMPT = \"\"\"Complete this sentence. You are acting as auto-complete. Simply complete the sentence to the best of your ability, make sure it is just ONE sentence: {sentence}\"\"\"\n",
@ -509,19 +509,19 @@
" logprobs=True,\n", " logprobs=True,\n",
" top_logprobs=3,\n", " top_logprobs=3,\n",
" )\n", " )\n",
" print(Fore.CYAN + \"Sentence:\" + Fore.RESET, sentence)\n", " html_output += f'<p>Sentence: {sentence}</p>'\n",
" first_token = True\n", " first_token = True\n",
" for token in API_RESPONSE.choices[0].logprobs.content[0].top_logprobs:\n", " for token in API_RESPONSE.choices[0].logprobs.content[0].top_logprobs:\n",
" print(\n", " html_output += f'<p style=\"color:cyan\">Predicted next token: {token.token}, <span style=\"color:darkorange\">logprobs: {token.logprob}, <span style=\"color:magenta\">linear probability: {np.round(np.exp(token.logprob)*100,2)}%</span></p>'\n",
" Fore.CYAN + \"Predicted next token:\" + Fore.RESET + f\" {token.token}, \" + Fore.YELLOW + \"logprobs:\" + Fore.RESET + f\" {token.logprob}, \" + Fore.MAGENTA + \"linear probability:\" + Fore.RESET + f\" {np.round(np.exp(token.logprob)*100,2)}%\"\n",
" )\n",
" if first_token:\n", " if first_token:\n",
" if np.exp(token.logprob) > 0.95:\n", " if np.exp(token.logprob) > 0.95:\n",
" high_prob_completions[sentence] = token.token\n", " high_prob_completions[sentence] = token.token\n",
" if np.exp(token.logprob) < 0.60:\n", " if np.exp(token.logprob) < 0.60:\n",
" low_prob_completions[sentence] = token.token\n", " low_prob_completions[sentence] = token.token\n",
" first_token = False\n", " first_token = False\n",
" print(\"\\n\")\n" " html_output += \"<br>\"\n",
"\n",
"display(HTML(html_output))"
] ]
}, },
{ {
@ -533,7 +533,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 275,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -542,7 +542,7 @@
"{'My least': 'favorite', 'My least favorite TV': 'show'}" "{'My least': 'favorite', 'My least favorite TV': 'show'}"
] ]
}, },
"execution_count": 18, "execution_count": 275,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -560,7 +560,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 276,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -569,7 +569,7 @@
"{'My least favorite': 'food', 'My least favorite TV show is': '\"My'}" "{'My least favorite': 'food', 'My least favorite TV show is': '\"My'}"
] ]
}, },
"execution_count": 19, "execution_count": 276,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -602,7 +602,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 277,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -612,39 +612,52 @@
" [{\"role\": \"user\", \"content\": PROMPT}], model=\"gpt-4\", logprobs=True, top_logprobs=5\n", " [{\"role\": \"user\", \"content\": PROMPT}], model=\"gpt-4\", logprobs=True, top_logprobs=5\n",
")\n", ")\n",
"\n", "\n",
"\n",
"def highlight_text(api_response):\n", "def highlight_text(api_response):\n",
" colors = [\n", " colors = [\n",
" Fore.MAGENTA,\n", " \"#FF00FF\", # Magenta\n",
" Fore.GREEN,\n", " \"#008000\", # Green\n",
" Fore.YELLOW,\n", " \"#FF8C00\", # Dark Orange\n",
" Fore.RED,\n", " \"#FF0000\", # Red\n",
" Fore.BLUE,\n", " \"#0000FF\", # Blue\n",
" ]\n", " ]\n",
" reset_color = Fore.RESET\n",
" tokens = api_response.choices[0].logprobs.content\n", " tokens = api_response.choices[0].logprobs.content\n",
"\n", "\n",
" color_idx = 0 # Initialize color index\n", " color_idx = 0 # Initialize color index\n",
" html_output = \"\" # Initialize HTML output\n",
" for t in tokens:\n", " for t in tokens:\n",
" token_str = bytes(t.bytes).decode(\"utf-8\") # Decode bytes to string\n", " token_str = bytes(t.bytes).decode(\"utf-8\") # Decode bytes to string\n",
"\n", "\n",
" print(f\"{colors[color_idx]}{token_str}{reset_color}\", end=\"\")\n", " # Add colored token to HTML output\n",
" html_output += f\"<span style='color: {colors[color_idx]}'>{token_str}</span>\"\n",
"\n", "\n",
" # Move to the next color\n", " # Move to the next color\n",
" color_idx = (color_idx + 1) % len(colors)\n", " color_idx = (color_idx + 1) % len(colors)\n",
" print()\n", " display(HTML(html_output)) # Display HTML output\n",
" print(f\"Total number of tokens: {len(tokens)}\")\n" " print(f\"Total number of tokens: {len(tokens)}\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 278,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{
"data": {
"text/html": [
"<span style='color: #FF00FF'>The</span><span style='color: #008000'> longest</span><span style='color: #FF8C00'> word</span><span style='color: #FF0000'> in</span><span style='color: #0000FF'> the</span><span style='color: #FF00FF'> English</span><span style='color: #008000'> language</span><span style='color: #FF8C00'>,</span><span style='color: #FF0000'> according</span><span style='color: #0000FF'> to</span><span style='color: #FF00FF'> the</span><span style='color: #008000'> Guinness</span><span style='color: #FF8C00'> World</span><span style='color: #FF0000'> Records</span><span style='color: #0000FF'>,</span><span style='color: #FF00FF'> is</span><span style='color: #008000'> '</span><span style='color: #FF8C00'>p</span><span style='color: #FF0000'>ne</span><span style='color: #0000FF'>um</span><span style='color: #FF00FF'>on</span><span style='color: #008000'>oul</span><span style='color: #FF8C00'>tram</span><span style='color: #FF0000'>icro</span><span style='color: #0000FF'>sc</span><span style='color: #FF00FF'>op</span><span style='color: #008000'>ics</span><span style='color: #FF8C00'>il</span><span style='color: #FF0000'>ic</span><span style='color: #0000FF'>ov</span><span style='color: #FF00FF'>ol</span><span style='color: #008000'>cano</span><span style='color: #FF8C00'>con</span><span style='color: #FF0000'>iosis</span><span style='color: #0000FF'>'.</span><span style='color: #FF00FF'> It</span><span style='color: #008000'> is</span><span style='color: #FF8C00'> a</span><span style='color: #FF0000'> type</span><span style='color: #0000FF'> of</span><span style='color: #FF00FF'> lung</span><span style='color: #008000'> disease</span><span style='color: #FF8C00'> caused</span><span style='color: #FF0000'> by</span><span style='color: #0000FF'> inh</span><span style='color: #FF00FF'>aling</span><span style='color: #008000'> ash</span><span style='color: #FF8C00'> and</span><span style='color: #FF0000'> sand</span><span style='color: #0000FF'> dust</span><span style='color: #FF00FF'>.</span>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\u001b[35mThe\u001b[39m\u001b[32m longest\u001b[39m\u001b[33m word\u001b[39m\u001b[31m in\u001b[39m\u001b[34m the\u001b[39m\u001b[35m English\u001b[39m\u001b[32m language\u001b[39m\u001b[33m,\u001b[39m\u001b[31m according\u001b[39m\u001b[34m to\u001b[39m\u001b[35m the\u001b[39m\u001b[32m Guinness\u001b[39m\u001b[33m World\u001b[39m\u001b[31m Records\u001b[39m\u001b[34m,\u001b[39m\u001b[35m is\u001b[39m\u001b[32m '\u001b[39m\u001b[33mp\u001b[39m\u001b[31mne\u001b[39m\u001b[34mum\u001b[39m\u001b[35mon\u001b[39m\u001b[32moul\u001b[39m\u001b[33mtram\u001b[39m\u001b[31micro\u001b[39m\u001b[34msc\u001b[39m\u001b[35mop\u001b[39m\u001b[32mics\u001b[39m\u001b[33mil\u001b[39m\u001b[31mic\u001b[39m\u001b[34mov\u001b[39m\u001b[35mol\u001b[39m\u001b[32mcano\u001b[39m\u001b[33mcon\u001b[39m\u001b[31miosis\u001b[39m\u001b[34m'.\u001b[39m\u001b[35m It\u001b[39m\u001b[32m is\u001b[39m\u001b[33m a\u001b[39m\u001b[31m type\u001b[39m\u001b[34m of\u001b[39m\u001b[35m lung\u001b[39m\u001b[32m disease\u001b[39m\u001b[33m caused\u001b[39m\u001b[31m by\u001b[39m\u001b[34m inh\u001b[39m\u001b[35maling\u001b[39m\u001b[32m ash\u001b[39m\u001b[33m and\u001b[39m\u001b[31m sand\u001b[39m\u001b[34m dust\u001b[39m\u001b[35m.\u001b[39m\n",
"Total number of tokens: 51\n" "Total number of tokens: 51\n"
] ]
} }
@ -662,7 +675,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 279,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -680,23 +693,23 @@
"Bytes: [153] \n", "Bytes: [153] \n",
"\n", "\n",
"Token: -\n", "Token: -\n",
"Log prob: -0.011257432\n", "Log prob: -0.0096905725\n",
"Linear prob: 98.88 %\n", "Linear prob: 99.04 %\n",
"Bytes: [32, 45] \n", "Bytes: [32, 45] \n",
"\n", "\n",
"Token: Blue\n", "Token: Blue\n",
"Log prob: -0.0004397287\n", "Log prob: -0.00042042506\n",
"Linear prob: 99.96 %\n", "Linear prob: 99.96 %\n",
"Bytes: [32, 66, 108, 117, 101] \n", "Bytes: [32, 66, 108, 117, 101] \n",
"\n", "\n",
"Token: Heart\n", "Token: Heart\n",
"Log prob: -7.1954215e-05\n", "Log prob: -7.302705e-05\n",
"Linear prob: 99.99 %\n", "Linear prob: 99.99 %\n",
"Bytes: [32, 72, 101, 97, 114, 116] \n", "Bytes: [32, 72, 101, 97, 114, 116] \n",
"\n", "\n",
"Bytes array: [240, 159, 146, 153, 32, 45, 32, 66, 108, 117, 101, 32, 72, 101, 97, 114, 116]\n", "Bytes array: [240, 159, 146, 153, 32, 45, 32, 66, 108, 117, 101, 32, 72, 101, 97, 114, 116]\n",
"Decoded bytes: 💙 - Blue Heart\n", "Decoded bytes: 💙 - Blue Heart\n",
"Joint prob: 98.8 %\n" "Joint prob: 98.96 %\n"
] ]
} }
], ],
@ -727,7 +740,7 @@
"# Print the results\n", "# Print the results\n",
"print(\"Bytes array:\", aggregated_bytes)\n", "print(\"Bytes array:\", aggregated_bytes)\n",
"print(f\"Decoded bytes: {aggregated_text}\")\n", "print(f\"Decoded bytes: {aggregated_text}\")\n",
"print(\"Joint prob:\", np.round(exp(joint_logprob) * 100, 2), \"%\")\n" "print(\"Joint prob:\", np.round(exp(joint_logprob) * 100, 2), \"%\")"
] ]
}, },
{ {