From 228cde1c46da74cb176715b1296027905f597c7d Mon Sep 17 00:00:00 2001 From: Shyamal H Anadkat Date: Wed, 20 Dec 2023 17:08:59 -0800 Subject: [PATCH] Fix styling for using_logprobs cookbook (#947) --- examples/Using_logprobs.ipynb | 291 ++++++++++++++++++---------------- 1 file changed, 152 insertions(+), 139 deletions(-) diff --git a/examples/Using_logprobs.ipynb b/examples/Using_logprobs.ipynb index e1144f2..f33c7af 100644 --- a/examples/Using_logprobs.ipynb +++ b/examples/Using_logprobs.ipynb @@ -41,22 +41,22 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 264, "metadata": {}, "outputs": [], "source": [ "from openai import OpenAI\n", "from math import exp\n", "import numpy as np\n", - "from colorama import init, Fore\n", + "from IPython.display import display, HTML\n", "\n", "\n", - "client = OpenAI()\n" + "client = OpenAI()" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 265, "metadata": {}, "outputs": [], "source": [ @@ -85,7 +85,7 @@ " params[\"tools\"] = tools\n", "\n", " completion = client.chat.completions.create(**params)\n", - " return completion\n" + " return completion" ] }, { @@ -113,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 266, "metadata": {}, "outputs": [], "source": [ @@ -133,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 267, "metadata": {}, "outputs": [], "source": [ @@ -146,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 268, "metadata": {}, "outputs": [ { @@ -175,7 +175,7 @@ " [{\"role\": \"user\", \"content\": CLASSIFICATION_PROMPT.format(headline=headline)}],\n", " model=\"gpt-4\",\n", " )\n", - " print(f\"Category: {API_RESPONSE.choices[0].message.content}\\n\")\n" + " print(f\"Category: {API_RESPONSE.choices[0].message.content}\\n\")" ] }, { @@ -187,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 269, "metadata": {}, "outputs": [ { @@ -195,21 +195,69 @@ "output_type": "stream", "text": [ "\n", - "Headline: Tech Giant Unveils Latest Smartphone Model with Advanced Photo-Editing Features.\n", - "\u001b[36mOutput token 1:\u001b[39m Technology, \u001b[33mlogprobs:\u001b[39m -1.9816675e-06, \u001b[35mlinear probability:\u001b[39m 100.0%\n", - "\u001b[36mOutput token 2:\u001b[39m Techn, \u001b[33mlogprobs:\u001b[39m -14.062502, \u001b[35mlinear probability:\u001b[39m 0.0%\n", + "Headline: Tech Giant Unveils Latest Smartphone Model with Advanced Photo-Editing Features.\n" + ] + }, + { + "data": { + "text/html": [ + "Output token 1: Technology, logprobs: -2.4584822e-06, linear probability: 100.0%
Output token 2: Techn, logprobs: -13.781253, linear probability: 0.0%
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\n", "\n", "\n", - "Headline: Local Mayor Launches Initiative to Enhance Urban Public Transport.\n", - "\u001b[36mOutput token 1:\u001b[39m Politics, \u001b[33mlogprobs:\u001b[39m -3.650519e-06, \u001b[35mlinear probability:\u001b[39m 100.0%\n", - "\u001b[36mOutput token 2:\u001b[39m Technology, \u001b[33mlogprobs:\u001b[39m -13.015629, \u001b[35mlinear probability:\u001b[39m 0.0%\n", + "Headline: Local Mayor Launches Initiative to Enhance Urban Public Transport.\n" + ] + }, + { + "data": { + "text/html": [ + "Output token 1: Politics, logprobs: -2.4584822e-06, linear probability: 100.0%
Output token 2: Technology, logprobs: -13.937503, linear probability: 0.0%
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\n", "\n", "\n", - "Headline: Tennis Champion Showcases Hidden Talents in Symphony Orchestra Debut\n", - "\u001b[36mOutput token 1:\u001b[39m Art, \u001b[33mlogprobs:\u001b[39m -0.19579042, \u001b[35mlinear probability:\u001b[39m 82.22%\n", - "\u001b[36mOutput token 2:\u001b[39m Sports, \u001b[33mlogprobs:\u001b[39m -1.7270404, \u001b[35mlinear probability:\u001b[39m 17.78%\n", + "Headline: Tennis Champion Showcases Hidden Talents in Symphony Orchestra Debut\n" + ] + }, + { + "data": { + "text/html": [ + "Output token 1: Art, logprobs: -0.009169078, linear probability: 99.09%
Output token 2: Sports, logprobs: -4.696669, linear probability: 0.91%
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\n", "\n" ] @@ -225,13 +273,15 @@ " top_logprobs=2,\n", " )\n", " top_two_logprobs = API_RESPONSE.choices[0].logprobs.content[0].top_logprobs\n", + " html_content = \"\"\n", " for i, logprob in enumerate(top_two_logprobs, start=1):\n", - " print(\n", - " f\"{Fore.CYAN}Output token {i}:{Fore.RESET} {logprob.token}, \"\n", - " f\"{Fore.YELLOW}logprobs:{Fore.RESET} {logprob.logprob}, \"\n", - " f\"{Fore.MAGENTA}linear probability:{Fore.RESET} {np.round(np.exp(logprob.logprob)*100,2)}%\"\n", + " html_content += (\n", + " f\"Output token {i}: {logprob.token}, \"\n", + " f\"logprobs: {logprob.logprob}, \"\n", + " f\"linear probability: {np.round(np.exp(logprob.logprob)*100,2)}%
\"\n", " )\n", - " print(\"\\n\")\n" + " display(HTML(html_content))\n", + " print(\"\\n\")" ] }, { @@ -266,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 270, "metadata": {}, "outputs": [], "source": [ @@ -289,7 +339,7 @@ "medium_questions = [\n", " \"Did Lovelace collaborate with Charles Dickens\",\n", " \"What concepts did Lovelace build with Charles Babbage\",\n", - "]\n" + "]" ] }, { @@ -301,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 271, "metadata": {}, "outputs": [], "source": [ @@ -314,36 +364,25 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 272, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Questions clearly answered in article\n", - "\n", - "\u001b[32mQuestion:\u001b[39m What nationality was Ada Lovelace?\n", - "\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -3.1281633e-07, \u001b[35mlinear probability:\u001b[39m 100.0% \n", - "\n", - "\u001b[32mQuestion:\u001b[39m What was an important finding from Lovelace's seventh note?\n", - "\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -6.704273e-07, \u001b[35mlinear probability:\u001b[39m 100.0% \n", - "\n", - "\n", - "\n", - "Questions only partially covered in the article\n", - "\n", - "\u001b[32mQuestion:\u001b[39m Did Lovelace collaborate with Charles Dickens\n", - "\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -0.07655343, \u001b[35mlinear probability:\u001b[39m 92.63% \n", - "\n", - "\u001b[32mQuestion:\u001b[39m What concepts did Lovelace build with Charles Babbage\n", - "\u001b[36mhas_sufficient_context_for_answer:\u001b[39m True, \u001b[33mlogprobs:\u001b[39m -0.0699371, \u001b[35mlinear probability:\u001b[39m 93.25% \n", - "\n" - ] + "data": { + "text/html": [ + "Questions clearly answered in article

Question: What nationality was Ada Lovelace?

has_sufficient_context_for_answer: True, logprobs: -3.1281633e-07, linear probability: 100.0%

Question: What was an important finding from Lovelace's seventh note?

has_sufficient_context_for_answer: True, logprobs: -7.89631e-07, linear probability: 100.0%

Questions only partially covered in the article

Question: Did Lovelace collaborate with Charles Dickens

has_sufficient_context_for_answer: True, logprobs: -0.06993677, linear probability: 93.25%

Question: What concepts did Lovelace build with Charles Babbage

has_sufficient_context_for_answer: False, logprobs: -0.61807257, linear probability: 53.9%

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "print(\"Questions clearly answered in article\" + \"\\n\")\n", + "html_output = \"\"\n", + "html_output += \"Questions clearly answered in article\"\n", "\n", "for question in easy_questions:\n", " API_RESPONSE = get_completion(\n", @@ -358,14 +397,11 @@ " model=\"gpt-4\",\n", " logprobs=True,\n", " )\n", - " print(Fore.GREEN + \"Question:\" + Fore.RESET, question)\n", + " html_output += f'

Question: {question}

'\n", " for logprob in API_RESPONSE.choices[0].logprobs.content:\n", - " print(\n", - " Fore.CYAN + \"has_sufficient_context_for_answer:\" + Fore.RESET + f\" {logprob.token}, \" + Fore.YELLOW + \"logprobs:\" + Fore.RESET + f\" {logprob.logprob}, \" + Fore.MAGENTA + \"linear probability:\" + Fore.RESET + f\" {np.round(np.exp(logprob.logprob)*100,2)}%\",\n", - " \"\\n\",\n", - " )\n", + " html_output += f'

has_sufficient_context_for_answer: {logprob.token}, logprobs: {logprob.logprob}, linear probability: {np.round(np.exp(logprob.logprob)*100,2)}%

'\n", "\n", - "print(\"\\n\\n\" + \"Questions only partially covered in the article\" + \"\\n\")\n", + "html_output += \"Questions only partially covered in the article\"\n", "\n", "for question in medium_questions:\n", " API_RESPONSE = get_completion(\n", @@ -381,12 +417,11 @@ " logprobs=True,\n", " top_logprobs=3,\n", " )\n", - " print(Fore.GREEN + \"Question:\" + Fore.RESET, question)\n", + " html_output += f'

Question: {question}

'\n", " for logprob in API_RESPONSE.choices[0].logprobs.content:\n", - " print(\n", - " Fore.CYAN + \"has_sufficient_context_for_answer:\" + Fore.RESET + f\" {logprob.token}, \" + Fore.YELLOW + \"logprobs:\" + Fore.RESET + f\" {logprob.logprob}, \" + Fore.MAGENTA + \"linear probability:\" + Fore.RESET + f\" {np.round(np.exp(logprob.logprob)*100,2)}%\",\n", - " \"\\n\",\n", - " )\n" + " html_output += f'

has_sufficient_context_for_answer: {logprob.token}, logprobs: {logprob.logprob}, linear probability: {np.round(np.exp(logprob.logprob)*100,2)}%

'\n", + "\n", + "display(HTML(html_output))" ] }, { @@ -421,7 +456,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 273, "metadata": {}, "outputs": [], "source": [ @@ -433,7 +468,7 @@ " \"My least favorite TV show\",\n", " \"My least favorite TV show is\",\n", " \"My least favorite TV show is Breaking Bad\",\n", - "]\n" + "]" ] }, { @@ -445,61 +480,26 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 274, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36mSentence:\u001b[39m My\n", - "\u001b[36mPredicted next token:\u001b[39m favorite, \u001b[33mlogprobs:\u001b[39m -0.18245785, \u001b[35mlinear probability:\u001b[39m 83.32%\n", - "\u001b[36mPredicted next token:\u001b[39m dog, \u001b[33mlogprobs:\u001b[39m -2.397172, \u001b[35mlinear probability:\u001b[39m 9.1%\n", - "\u001b[36mPredicted next token:\u001b[39m ap, \u001b[33mlogprobs:\u001b[39m -3.8732424, \u001b[35mlinear probability:\u001b[39m 2.08%\n", - "\n", - "\n", - "\u001b[36mSentence:\u001b[39m My least\n", - "\u001b[36mPredicted next token:\u001b[39m favorite, \u001b[33mlogprobs:\u001b[39m -0.01722952, \u001b[35mlinear probability:\u001b[39m 98.29%\n", - "\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -4.079079, \u001b[35mlinear probability:\u001b[39m 1.69%\n", - "\u001b[36mPredicted next token:\u001b[39m favorite, \u001b[33mlogprobs:\u001b[39m -9.6813755, \u001b[35mlinear probability:\u001b[39m 0.01%\n", - "\n", - "\n", - "\u001b[36mSentence:\u001b[39m My least favorite\n", - "\u001b[36mPredicted next token:\u001b[39m food, \u001b[33mlogprobs:\u001b[39m -0.9481721, \u001b[35mlinear probability:\u001b[39m 38.74%\n", - "\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -1.3447137, \u001b[35mlinear probability:\u001b[39m 26.06%\n", - "\u001b[36mPredicted next token:\u001b[39m color, \u001b[33mlogprobs:\u001b[39m -1.3887696, \u001b[35mlinear probability:\u001b[39m 24.94%\n", - "\n", - "\n", - "\u001b[36mSentence:\u001b[39m My least favorite TV\n", - "\u001b[36mPredicted next token:\u001b[39m show, \u001b[33mlogprobs:\u001b[39m -0.0007898556, \u001b[35mlinear probability:\u001b[39m 99.92%\n", - "\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -7.711523, \u001b[35mlinear probability:\u001b[39m 0.04%\n", - "\u001b[36mPredicted next token:\u001b[39m series, \u001b[33mlogprobs:\u001b[39m -9.348547, \u001b[35mlinear probability:\u001b[39m 0.01%\n", - "\n", - "\n", - "\u001b[36mSentence:\u001b[39m My least favorite TV show\n", - "\u001b[36mPredicted next token:\u001b[39m is, \u001b[33mlogprobs:\u001b[39m -0.18602066, \u001b[35mlinear probability:\u001b[39m 83.03%\n", - "\u001b[36mPredicted next token:\u001b[39m of, \u001b[33mlogprobs:\u001b[39m -2.0780265, \u001b[35mlinear probability:\u001b[39m 12.52%\n", - "\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -3.271426, \u001b[35mlinear probability:\u001b[39m 3.8%\n", - "\n", - "\n", - "\u001b[36mSentence:\u001b[39m My least favorite TV show is\n", - "\u001b[36mPredicted next token:\u001b[39m \"My, \u001b[33mlogprobs:\u001b[39m -0.77423567, \u001b[35mlinear probability:\u001b[39m 46.11%\n", - "\u001b[36mPredicted next token:\u001b[39m \"The, \u001b[33mlogprobs:\u001b[39m -1.2854586, \u001b[35mlinear probability:\u001b[39m 27.65%\n", - "\u001b[36mPredicted next token:\u001b[39m My, \u001b[33mlogprobs:\u001b[39m -2.2629042, \u001b[35mlinear probability:\u001b[39m 10.4%\n", - "\n", - "\n", - "\u001b[36mSentence:\u001b[39m My least favorite TV show is Breaking Bad\n", - "\u001b[36mPredicted next token:\u001b[39m because, \u001b[33mlogprobs:\u001b[39m -0.16519119, \u001b[35mlinear probability:\u001b[39m 84.77%\n", - "\u001b[36mPredicted next token:\u001b[39m ,, \u001b[33mlogprobs:\u001b[39m -2.430881, \u001b[35mlinear probability:\u001b[39m 8.8%\n", - "\u001b[36mPredicted next token:\u001b[39m ., \u001b[33mlogprobs:\u001b[39m -3.2097907, \u001b[35mlinear probability:\u001b[39m 4.04%\n", - "\n", - "\n" - ] + "data": { + "text/html": [ + "

Sentence: My

Predicted next token: favorite, logprobs: -0.18245785, linear probability: 83.32%

Predicted next token: dog, logprobs: -2.397172, linear probability: 9.1%

Predicted next token: ap, logprobs: -3.8732424, linear probability: 2.08%


Sentence: My least

Predicted next token: favorite, logprobs: -0.0146376295, linear probability: 98.55%

Predicted next token: My, logprobs: -4.2417912, linear probability: 1.44%

Predicted next token: favorite, logprobs: -9.748788, linear probability: 0.01%


Sentence: My least favorite

Predicted next token: food, logprobs: -0.9481721, linear probability: 38.74%

Predicted next token: My, logprobs: -1.3447137, linear probability: 26.06%

Predicted next token: color, logprobs: -1.3887696, linear probability: 24.94%


Sentence: My least favorite TV

Predicted next token: show, logprobs: -0.0007898556, linear probability: 99.92%

Predicted next token: My, logprobs: -7.711523, linear probability: 0.04%

Predicted next token: series, logprobs: -9.348547, linear probability: 0.01%


Sentence: My least favorite TV show

Predicted next token: is, logprobs: -0.2851253, linear probability: 75.19%

Predicted next token: of, logprobs: -1.55335, linear probability: 21.15%

Predicted next token: My, logprobs: -3.4928775, linear probability: 3.04%


Sentence: My least favorite TV show is

Predicted next token: \"My, logprobs: -0.69349754, linear probability: 49.98%

Predicted next token: \"The, logprobs: -1.2899293, linear probability: 27.53%

Predicted next token: My, logprobs: -2.4170141, linear probability: 8.92%


Sentence: My least favorite TV show is Breaking Bad

Predicted next token: because, logprobs: -0.17786823, linear probability: 83.71%

Predicted next token: ,, logprobs: -2.3946173, linear probability: 9.12%

Predicted next token: ., logprobs: -3.1861975, linear probability: 4.13%


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ "high_prob_completions = {}\n", "low_prob_completions = {}\n", + "html_output = \"\"\n", "\n", "for sentence in sentence_list:\n", " PROMPT = \"\"\"Complete this sentence. You are acting as auto-complete. Simply complete the sentence to the best of your ability, make sure it is just ONE sentence: {sentence}\"\"\"\n", @@ -509,19 +509,19 @@ " logprobs=True,\n", " top_logprobs=3,\n", " )\n", - " print(Fore.CYAN + \"Sentence:\" + Fore.RESET, sentence)\n", + " html_output += f'

Sentence: {sentence}

'\n", " first_token = True\n", " for token in API_RESPONSE.choices[0].logprobs.content[0].top_logprobs:\n", - " print(\n", - " Fore.CYAN + \"Predicted next token:\" + Fore.RESET + f\" {token.token}, \" + Fore.YELLOW + \"logprobs:\" + Fore.RESET + f\" {token.logprob}, \" + Fore.MAGENTA + \"linear probability:\" + Fore.RESET + f\" {np.round(np.exp(token.logprob)*100,2)}%\"\n", - " )\n", + " html_output += f'

Predicted next token: {token.token}, logprobs: {token.logprob}, linear probability: {np.round(np.exp(token.logprob)*100,2)}%

'\n", " if first_token:\n", " if np.exp(token.logprob) > 0.95:\n", " high_prob_completions[sentence] = token.token\n", " if np.exp(token.logprob) < 0.60:\n", " low_prob_completions[sentence] = token.token\n", " first_token = False\n", - " print(\"\\n\")\n" + " html_output += \"
\"\n", + "\n", + "display(HTML(html_output))" ] }, { @@ -533,7 +533,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 275, "metadata": {}, "outputs": [ { @@ -542,7 +542,7 @@ "{'My least': 'favorite', 'My least favorite TV': 'show'}" ] }, - "execution_count": 18, + "execution_count": 275, "metadata": {}, "output_type": "execute_result" } @@ -560,7 +560,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 276, "metadata": {}, "outputs": [ { @@ -569,7 +569,7 @@ "{'My least favorite': 'food', 'My least favorite TV show is': '\"My'}" ] }, - "execution_count": 19, + "execution_count": 276, "metadata": {}, "output_type": "execute_result" } @@ -602,7 +602,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 277, "metadata": {}, "outputs": [], "source": [ @@ -612,39 +612,52 @@ " [{\"role\": \"user\", \"content\": PROMPT}], model=\"gpt-4\", logprobs=True, top_logprobs=5\n", ")\n", "\n", + "\n", "def highlight_text(api_response):\n", " colors = [\n", - " Fore.MAGENTA,\n", - " Fore.GREEN,\n", - " Fore.YELLOW,\n", - " Fore.RED,\n", - " Fore.BLUE,\n", + " \"#FF00FF\", # Magenta\n", + " \"#008000\", # Green\n", + " \"#FF8C00\", # Dark Orange\n", + " \"#FF0000\", # Red\n", + " \"#0000FF\", # Blue\n", " ]\n", - " reset_color = Fore.RESET\n", " tokens = api_response.choices[0].logprobs.content\n", "\n", " color_idx = 0 # Initialize color index\n", + " html_output = \"\" # Initialize HTML output\n", " for t in tokens:\n", " token_str = bytes(t.bytes).decode(\"utf-8\") # Decode bytes to string\n", "\n", - " print(f\"{colors[color_idx]}{token_str}{reset_color}\", end=\"\")\n", + " # Add colored token to HTML output\n", + " html_output += f\"{token_str}\"\n", "\n", " # Move to the next color\n", " color_idx = (color_idx + 1) % len(colors)\n", - " print()\n", - " print(f\"Total number of tokens: {len(tokens)}\")\n" + " display(HTML(html_output)) # Display HTML output\n", + " print(f\"Total number of tokens: {len(tokens)}\")" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 278, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "The longest word in the English language, according to the Guinness World Records, is 'pneumonoultramicroscopicsilicovolcanoconiosis'. It is a type of lung disease caused by inhaling ash and sand dust." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[35mThe\u001b[39m\u001b[32m longest\u001b[39m\u001b[33m word\u001b[39m\u001b[31m in\u001b[39m\u001b[34m the\u001b[39m\u001b[35m English\u001b[39m\u001b[32m language\u001b[39m\u001b[33m,\u001b[39m\u001b[31m according\u001b[39m\u001b[34m to\u001b[39m\u001b[35m the\u001b[39m\u001b[32m Guinness\u001b[39m\u001b[33m World\u001b[39m\u001b[31m Records\u001b[39m\u001b[34m,\u001b[39m\u001b[35m is\u001b[39m\u001b[32m '\u001b[39m\u001b[33mp\u001b[39m\u001b[31mne\u001b[39m\u001b[34mum\u001b[39m\u001b[35mon\u001b[39m\u001b[32moul\u001b[39m\u001b[33mtram\u001b[39m\u001b[31micro\u001b[39m\u001b[34msc\u001b[39m\u001b[35mop\u001b[39m\u001b[32mics\u001b[39m\u001b[33mil\u001b[39m\u001b[31mic\u001b[39m\u001b[34mov\u001b[39m\u001b[35mol\u001b[39m\u001b[32mcano\u001b[39m\u001b[33mcon\u001b[39m\u001b[31miosis\u001b[39m\u001b[34m'.\u001b[39m\u001b[35m It\u001b[39m\u001b[32m is\u001b[39m\u001b[33m a\u001b[39m\u001b[31m type\u001b[39m\u001b[34m of\u001b[39m\u001b[35m lung\u001b[39m\u001b[32m disease\u001b[39m\u001b[33m caused\u001b[39m\u001b[31m by\u001b[39m\u001b[34m inh\u001b[39m\u001b[35maling\u001b[39m\u001b[32m ash\u001b[39m\u001b[33m and\u001b[39m\u001b[31m sand\u001b[39m\u001b[34m dust\u001b[39m\u001b[35m.\u001b[39m\n", "Total number of tokens: 51\n" ] } @@ -662,7 +675,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 279, "metadata": {}, "outputs": [ { @@ -680,23 +693,23 @@ "Bytes: [153] \n", "\n", "Token: -\n", - "Log prob: -0.011257432\n", - "Linear prob: 98.88 %\n", + "Log prob: -0.0096905725\n", + "Linear prob: 99.04 %\n", "Bytes: [32, 45] \n", "\n", "Token: Blue\n", - "Log prob: -0.0004397287\n", + "Log prob: -0.00042042506\n", "Linear prob: 99.96 %\n", "Bytes: [32, 66, 108, 117, 101] \n", "\n", "Token: Heart\n", - "Log prob: -7.1954215e-05\n", + "Log prob: -7.302705e-05\n", "Linear prob: 99.99 %\n", "Bytes: [32, 72, 101, 97, 114, 116] \n", "\n", "Bytes array: [240, 159, 146, 153, 32, 45, 32, 66, 108, 117, 101, 32, 72, 101, 97, 114, 116]\n", "Decoded bytes: 💙 - Blue Heart\n", - "Joint prob: 98.8 %\n" + "Joint prob: 98.96 %\n" ] } ], @@ -727,7 +740,7 @@ "# Print the results\n", "print(\"Bytes array:\", aggregated_bytes)\n", "print(f\"Decoded bytes: {aggregated_text}\")\n", - "print(\"Joint prob:\", np.round(exp(joint_logprob) * 100, 2), \"%\")\n" + "print(\"Joint prob:\", np.round(exp(joint_logprob) * 100, 2), \"%\")" ] }, {