mirror of
https://github.com/james-m-jordan/openai-cookbook.git
synced 2025-05-09 19:32:38 +00:00
update for 4o-mini (#1421)
This commit is contained in:
parent
8127d0c929
commit
db3144982a
@ -22,20 +22,21 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"| Encoding name | OpenAI models |\n",
|
"| Encoding name | OpenAI models |\n",
|
||||||
"|-------------------------|-----------------------------------------------------|\n",
|
"|-------------------------|-----------------------------------------------------|\n",
|
||||||
"| `cl100k_base` | `gpt-4`, `gpt-3.5-turbo`, `text-embedding-ada-002`, `text-embedding-3-small`, `text-embedding-3-large` |\n",
|
"| `o200k_base` | `gpt-4o`, `gpt-4o-mini` |\n",
|
||||||
|
"| `cl100k_base` | `gpt-4-turbo`, `gpt-4`, `gpt-3.5-turbo`, `text-embedding-ada-002`, `text-embedding-3-small`, `text-embedding-3-large` |\n",
|
||||||
"| `p50k_base` | Codex models, `text-davinci-002`, `text-davinci-003`|\n",
|
"| `p50k_base` | Codex models, `text-davinci-002`, `text-davinci-003`|\n",
|
||||||
"| `r50k_base` (or `gpt2`) | GPT-3 models like `davinci` |\n",
|
"| `r50k_base` (or `gpt2`) | GPT-3 models like `davinci` |\n",
|
||||||
"\n",
|
"\n",
|
||||||
"You can retrieve the encoding for a model using `tiktoken.encoding_for_model()` as follows:\n",
|
"You can retrieve the encoding for a model using `tiktoken.encoding_for_model()` as follows:\n",
|
||||||
"```python\n",
|
"```python\n",
|
||||||
"encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')\n",
|
"encoding = tiktoken.encoding_for_model('gpt-4o-mini')\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Note that `p50k_base` overlaps substantially with `r50k_base`, and for non-code applications, they will usually give the same tokens.\n",
|
"Note that `p50k_base` overlaps substantially with `r50k_base`, and for non-code applications, they will usually give the same tokens.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Tokenizer libraries by language\n",
|
"## Tokenizer libraries by language\n",
|
||||||
"\n",
|
"\n",
|
||||||
"For `cl100k_base` and `p50k_base` encodings:\n",
|
"For `o200k_base`, `cl100k_base` and `p50k_base` encodings:\n",
|
||||||
"- Python: [tiktoken](https://github.com/openai/tiktoken/blob/main/README.md)\n",
|
"- Python: [tiktoken](https://github.com/openai/tiktoken/blob/main/README.md)\n",
|
||||||
"- .NET / C#: [SharpToken](https://github.com/dmitry-brazhenko/SharpToken), [TiktokenSharp](https://github.com/aiqinxuancai/TiktokenSharp)\n",
|
"- .NET / C#: [SharpToken](https://github.com/dmitry-brazhenko/SharpToken), [TiktokenSharp](https://github.com/aiqinxuancai/TiktokenSharp)\n",
|
||||||
"- Java: [jtokkit](https://github.com/knuddelsgmbh/jtokkit)\n",
|
"- Java: [jtokkit](https://github.com/knuddelsgmbh/jtokkit)\n",
|
||||||
@ -71,12 +72,27 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"%pip install --upgrade tiktoken\n",
|
"%pip install --upgrade tiktoken -q\n",
|
||||||
"%pip install --upgrade openai"
|
"%pip install --upgrade openai -q"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -89,7 +105,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -131,7 +147,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"encoding = tiktoken.encoding_for_model(\"gpt-3.5-turbo\")"
|
"encoding = tiktoken.encoding_for_model(\"gpt-4o-mini\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -159,7 +175,7 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"[83, 1609, 5963, 374, 2294, 0]"
|
"[83, 8251, 2488, 382, 2212, 0]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 5,
|
"execution_count": 5,
|
||||||
@ -168,7 +184,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"encoding.encode(\"tiktoken is great!\")\n"
|
"encoding.encode(\"tiktoken is great!\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -189,7 +205,7 @@
|
|||||||
" \"\"\"Returns the number of tokens in a text string.\"\"\"\n",
|
" \"\"\"Returns the number of tokens in a text string.\"\"\"\n",
|
||||||
" encoding = tiktoken.get_encoding(encoding_name)\n",
|
" encoding = tiktoken.get_encoding(encoding_name)\n",
|
||||||
" num_tokens = len(encoding.encode(string))\n",
|
" num_tokens = len(encoding.encode(string))\n",
|
||||||
" return num_tokens\n"
|
" return num_tokens"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -209,7 +225,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"num_tokens_from_string(\"tiktoken is great!\", \"cl100k_base\")\n"
|
"num_tokens_from_string(\"tiktoken is great!\", \"o200k_base\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -245,7 +261,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"encoding.decode([83, 1609, 5963, 374, 2294, 0])\n"
|
"encoding.decode([83, 8251, 2488, 382, 2212, 0])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -272,7 +288,7 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"[b't', b'ik', b'token', b' is', b' great', b'!']"
|
"[b't', b'ikt', b'oken', b' is', b' great', b'!']"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 9,
|
"execution_count": 9,
|
||||||
@ -281,7 +297,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"[encoding.decode_single_token_bytes(token) for token in [83, 1609, 5963, 374, 2294, 0]]\n"
|
"[encoding.decode_single_token_bytes(token) for token in [83, 8251, 2488, 382, 2212, 0]]\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -313,7 +329,7 @@
|
|||||||
" # print the example string\n",
|
" # print the example string\n",
|
||||||
" print(f'\\nExample string: \"{example_string}\"')\n",
|
" print(f'\\nExample string: \"{example_string}\"')\n",
|
||||||
" # for each encoding, print the # of tokens, the token integers, and the token bytes\n",
|
" # for each encoding, print the # of tokens, the token integers, and the token bytes\n",
|
||||||
" for encoding_name in [\"r50k_base\", \"p50k_base\", \"cl100k_base\"]:\n",
|
" for encoding_name in [\"r50k_base\", \"p50k_base\", \"cl100k_base\", \"o200k_base\"]:\n",
|
||||||
" encoding = tiktoken.get_encoding(encoding_name)\n",
|
" encoding = tiktoken.get_encoding(encoding_name)\n",
|
||||||
" token_integers = encoding.encode(example_string)\n",
|
" token_integers = encoding.encode(example_string)\n",
|
||||||
" num_tokens = len(token_integers)\n",
|
" num_tokens = len(token_integers)\n",
|
||||||
@ -321,8 +337,7 @@
|
|||||||
" print()\n",
|
" print()\n",
|
||||||
" print(f\"{encoding_name}: {num_tokens} tokens\")\n",
|
" print(f\"{encoding_name}: {num_tokens} tokens\")\n",
|
||||||
" print(f\"token integers: {token_integers}\")\n",
|
" print(f\"token integers: {token_integers}\")\n",
|
||||||
" print(f\"token bytes: {token_bytes}\")\n",
|
" print(f\"token bytes: {token_bytes}\")"
|
||||||
" "
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -347,12 +362,16 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"cl100k_base: 6 tokens\n",
|
"cl100k_base: 6 tokens\n",
|
||||||
"token integers: [519, 85342, 34500, 479, 8997, 2191]\n",
|
"token integers: [519, 85342, 34500, 479, 8997, 2191]\n",
|
||||||
"token bytes: [b'ant', b'idis', b'establish', b'ment', b'arian', b'ism']\n"
|
"token bytes: [b'ant', b'idis', b'establish', b'ment', b'arian', b'ism']\n",
|
||||||
|
"\n",
|
||||||
|
"o200k_base: 6 tokens\n",
|
||||||
|
"token integers: [493, 129901, 376, 160388, 21203, 2367]\n",
|
||||||
|
"token bytes: [b'ant', b'idis', b'est', b'ablishment', b'arian', b'ism']\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"compare_encodings(\"antidisestablishmentarianism\")\n"
|
"compare_encodings(\"antidisestablishmentarianism\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -377,12 +396,16 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"cl100k_base: 7 tokens\n",
|
"cl100k_base: 7 tokens\n",
|
||||||
"token integers: [17, 489, 220, 17, 284, 220, 19]\n",
|
"token integers: [17, 489, 220, 17, 284, 220, 19]\n",
|
||||||
|
"token bytes: [b'2', b' +', b' ', b'2', b' =', b' ', b'4']\n",
|
||||||
|
"\n",
|
||||||
|
"o200k_base: 7 tokens\n",
|
||||||
|
"token integers: [17, 659, 220, 17, 314, 220, 19]\n",
|
||||||
"token bytes: [b'2', b' +', b' ', b'2', b' =', b' ', b'4']\n"
|
"token bytes: [b'2', b' +', b' ', b'2', b' =', b' ', b'4']\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"compare_encodings(\"2 + 2 = 4\")\n"
|
"compare_encodings(\"2 + 2 = 4\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -407,12 +430,16 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"cl100k_base: 9 tokens\n",
|
"cl100k_base: 9 tokens\n",
|
||||||
"token integers: [33334, 45918, 243, 21990, 9080, 33334, 62004, 16556, 78699]\n",
|
"token integers: [33334, 45918, 243, 21990, 9080, 33334, 62004, 16556, 78699]\n",
|
||||||
"token bytes: [b'\\xe3\\x81\\x8a', b'\\xe8\\xaa', b'\\x95', b'\\xe7\\x94\\x9f', b'\\xe6\\x97\\xa5', b'\\xe3\\x81\\x8a', b'\\xe3\\x82\\x81', b'\\xe3\\x81\\xa7', b'\\xe3\\x81\\xa8\\xe3\\x81\\x86']\n"
|
"token bytes: [b'\\xe3\\x81\\x8a', b'\\xe8\\xaa', b'\\x95', b'\\xe7\\x94\\x9f', b'\\xe6\\x97\\xa5', b'\\xe3\\x81\\x8a', b'\\xe3\\x82\\x81', b'\\xe3\\x81\\xa7', b'\\xe3\\x81\\xa8\\xe3\\x81\\x86']\n",
|
||||||
|
"\n",
|
||||||
|
"o200k_base: 8 tokens\n",
|
||||||
|
"token integers: [8930, 9697, 243, 128225, 8930, 17693, 4344, 48669]\n",
|
||||||
|
"token bytes: [b'\\xe3\\x81\\x8a', b'\\xe8\\xaa', b'\\x95', b'\\xe7\\x94\\x9f\\xe6\\x97\\xa5', b'\\xe3\\x81\\x8a', b'\\xe3\\x82\\x81', b'\\xe3\\x81\\xa7', b'\\xe3\\x81\\xa8\\xe3\\x81\\x86']\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"compare_encodings(\"お誕生日おめでとう\")\n"
|
"compare_encodings(\"お誕生日おめでとう\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -422,9 +449,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## 6. Counting tokens for chat completions API calls\n",
|
"## 6. Counting tokens for chat completions API calls\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ChatGPT models like `gpt-3.5-turbo` and `gpt-4` use tokens in the same way as older completions models, but because of their message-based formatting, it's more difficult to count how many tokens will be used by a conversation.\n",
|
"ChatGPT models like `gpt-4o-mini` and `gpt-4` use tokens in the same way as older completions models, but because of their message-based formatting, it's more difficult to count how many tokens will be used by a conversation.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below is an example function for counting tokens for messages passed to `gpt-3.5-turbo` or `gpt-4`.\n",
|
"Below is an example function for counting tokens for messages passed to `gpt-3.5-turbo`, `gpt-4`, `gpt-4o` and `gpt-4o-mini`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Note that the exact way that tokens are counted from messages may change from model to model. Consider the counts from the function below an estimate, not a timeless guarantee.\n",
|
"Note that the exact way that tokens are counted from messages may change from model to model. Consider the counts from the function below an estimate, not a timeless guarantee.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -433,33 +460,37 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0613\"):\n",
|
"def num_tokens_from_messages(messages, model=\"gpt-4o-mini-2024-07-18\"):\n",
|
||||||
" \"\"\"Return the number of tokens used by a list of messages.\"\"\"\n",
|
" \"\"\"Return the number of tokens used by a list of messages.\"\"\"\n",
|
||||||
" try:\n",
|
" try:\n",
|
||||||
" encoding = tiktoken.encoding_for_model(model)\n",
|
" encoding = tiktoken.encoding_for_model(model)\n",
|
||||||
" except KeyError:\n",
|
" except KeyError:\n",
|
||||||
" print(\"Warning: model not found. Using cl100k_base encoding.\")\n",
|
" print(\"Warning: model not found. Using o200k_base encoding.\")\n",
|
||||||
" encoding = tiktoken.get_encoding(\"cl100k_base\")\n",
|
" encoding = tiktoken.get_encoding(\"o200k_base\")\n",
|
||||||
" if model in {\n",
|
" if model in {\n",
|
||||||
" \"gpt-3.5-turbo-0613\",\n",
|
" \"gpt-3.5-turbo-0125\",\n",
|
||||||
" \"gpt-3.5-turbo-16k-0613\",\n",
|
|
||||||
" \"gpt-4-0314\",\n",
|
" \"gpt-4-0314\",\n",
|
||||||
" \"gpt-4-32k-0314\",\n",
|
" \"gpt-4-32k-0314\",\n",
|
||||||
" \"gpt-4-0613\",\n",
|
" \"gpt-4-0613\",\n",
|
||||||
" \"gpt-4-32k-0613\",\n",
|
" \"gpt-4-32k-0613\",\n",
|
||||||
|
" \"gpt-4o-mini-2024-07-18\",\n",
|
||||||
|
" \"gpt-4o-2024-08-06\"\n",
|
||||||
" }:\n",
|
" }:\n",
|
||||||
" tokens_per_message = 3\n",
|
" tokens_per_message = 3\n",
|
||||||
" tokens_per_name = 1\n",
|
" tokens_per_name = 1\n",
|
||||||
" elif model == \"gpt-3.5-turbo-0301\":\n",
|
|
||||||
" tokens_per_message = 4 # every message follows <|start|>{role/name}\\n{content}<|end|>\\n\n",
|
|
||||||
" tokens_per_name = -1 # if there's a name, the role is omitted\n",
|
|
||||||
" elif \"gpt-3.5-turbo\" in model:\n",
|
" elif \"gpt-3.5-turbo\" in model:\n",
|
||||||
" print(\"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\")\n",
|
" print(\"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.\")\n",
|
||||||
" return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0613\")\n",
|
" return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0125\")\n",
|
||||||
|
" elif \"gpt-4o-mini\" in model:\n",
|
||||||
|
" print(\"Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.\")\n",
|
||||||
|
" return num_tokens_from_messages(messages, model=\"gpt-4o-mini-2024-07-18\")\n",
|
||||||
|
" elif \"gpt-4o\" in model:\n",
|
||||||
|
" print(\"Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.\")\n",
|
||||||
|
" return num_tokens_from_messages(messages, model=\"gpt-4o-2024-08-06\")\n",
|
||||||
" elif \"gpt-4\" in model:\n",
|
" elif \"gpt-4\" in model:\n",
|
||||||
" print(\"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\")\n",
|
" print(\"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\")\n",
|
||||||
" return num_tokens_from_messages(messages, model=\"gpt-4-0613\")\n",
|
" return num_tokens_from_messages(messages, model=\"gpt-4-0613\")\n",
|
||||||
@ -480,27 +511,15 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 15,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"gpt-3.5-turbo-0301\n",
|
|
||||||
"127 prompt tokens counted by num_tokens_from_messages().\n",
|
|
||||||
"127 prompt tokens counted by the OpenAI API.\n",
|
|
||||||
"\n",
|
|
||||||
"gpt-3.5-turbo-0613\n",
|
|
||||||
"129 prompt tokens counted by num_tokens_from_messages().\n",
|
|
||||||
"129 prompt tokens counted by the OpenAI API.\n",
|
|
||||||
"\n",
|
|
||||||
"gpt-3.5-turbo\n",
|
"gpt-3.5-turbo\n",
|
||||||
"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\n",
|
"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.\n",
|
||||||
"129 prompt tokens counted by num_tokens_from_messages().\n",
|
|
||||||
"129 prompt tokens counted by the OpenAI API.\n",
|
|
||||||
"\n",
|
|
||||||
"gpt-4-0314\n",
|
|
||||||
"129 prompt tokens counted by num_tokens_from_messages().\n",
|
"129 prompt tokens counted by num_tokens_from_messages().\n",
|
||||||
"129 prompt tokens counted by the OpenAI API.\n",
|
"129 prompt tokens counted by the OpenAI API.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -512,6 +531,16 @@
|
|||||||
"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\n",
|
"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\n",
|
||||||
"129 prompt tokens counted by num_tokens_from_messages().\n",
|
"129 prompt tokens counted by num_tokens_from_messages().\n",
|
||||||
"129 prompt tokens counted by the OpenAI API.\n",
|
"129 prompt tokens counted by the OpenAI API.\n",
|
||||||
|
"\n",
|
||||||
|
"gpt-4o\n",
|
||||||
|
"Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.\n",
|
||||||
|
"124 prompt tokens counted by num_tokens_from_messages().\n",
|
||||||
|
"124 prompt tokens counted by the OpenAI API.\n",
|
||||||
|
"\n",
|
||||||
|
"gpt-4o-mini\n",
|
||||||
|
"Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.\n",
|
||||||
|
"124 prompt tokens counted by num_tokens_from_messages().\n",
|
||||||
|
"124 prompt tokens counted by the OpenAI API.\n",
|
||||||
"\n"
|
"\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -556,12 +585,11 @@
|
|||||||
"]\n",
|
"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for model in [\n",
|
"for model in [\n",
|
||||||
" \"gpt-3.5-turbo-0301\",\n",
|
|
||||||
" \"gpt-3.5-turbo-0613\",\n",
|
|
||||||
" \"gpt-3.5-turbo\",\n",
|
" \"gpt-3.5-turbo\",\n",
|
||||||
" \"gpt-4-0314\",\n",
|
|
||||||
" \"gpt-4-0613\",\n",
|
" \"gpt-4-0613\",\n",
|
||||||
" \"gpt-4\",\n",
|
" \"gpt-4\",\n",
|
||||||
|
" \"gpt-4o\",\n",
|
||||||
|
" \"gpt-4o-mini\"\n",
|
||||||
" ]:\n",
|
" ]:\n",
|
||||||
" print(model)\n",
|
" print(model)\n",
|
||||||
" # example token count from the function defined above\n",
|
" # example token count from the function defined above\n",
|
||||||
@ -575,12 +603,186 @@
|
|||||||
" print()\n"
|
" print()\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 7. Counting tokens for chat completions with tool calls\n",
|
||||||
|
"\n",
|
||||||
|
"Next, we will look into how to apply this calculations to messages that may contain function calls. This is not immediately trivial, due to the formatting of the tools themselves. \n",
|
||||||
|
"\n",
|
||||||
|
"Below is an example function for counting tokens for messages that contain tools, passed to `gpt-3.5-turbo`, `gpt-4`, `gpt-4o` and `gpt-4o-mini`."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 16,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": [
|
||||||
|
"def num_tokens_for_tools(functions, messages, model):\n",
|
||||||
|
" \n",
|
||||||
|
" # Initialize function settings to 0\n",
|
||||||
|
" func_init = 0\n",
|
||||||
|
" prop_init = 0\n",
|
||||||
|
" prop_key = 0\n",
|
||||||
|
" enum_init = 0\n",
|
||||||
|
" enum_item = 0\n",
|
||||||
|
" func_end = 0\n",
|
||||||
|
" \n",
|
||||||
|
" if model in [\n",
|
||||||
|
" \"gpt-4o\",\n",
|
||||||
|
" \"gpt-4o-mini\"\n",
|
||||||
|
" ]:\n",
|
||||||
|
" \n",
|
||||||
|
" # Set function settings for the above models\n",
|
||||||
|
" func_init = 7\n",
|
||||||
|
" prop_init = 3\n",
|
||||||
|
" prop_key = 3\n",
|
||||||
|
" enum_init = -3\n",
|
||||||
|
" enum_item = 3\n",
|
||||||
|
" func_end = 12\n",
|
||||||
|
" elif model in [\n",
|
||||||
|
" \"gpt-3.5-turbo\",\n",
|
||||||
|
" \"gpt-4\"\n",
|
||||||
|
" ]:\n",
|
||||||
|
" # Set function settings for the above models\n",
|
||||||
|
" func_init = 10\n",
|
||||||
|
" prop_init = 3\n",
|
||||||
|
" prop_key = 3\n",
|
||||||
|
" enum_init = -3\n",
|
||||||
|
" enum_item = 3\n",
|
||||||
|
" func_end = 12\n",
|
||||||
|
" else:\n",
|
||||||
|
" raise NotImplementedError(\n",
|
||||||
|
" f\"\"\"num_tokens_for_tools() is not implemented for model {model}.\"\"\"\n",
|
||||||
|
" )\n",
|
||||||
|
" \n",
|
||||||
|
" try:\n",
|
||||||
|
" encoding = tiktoken.encoding_for_model(model)\n",
|
||||||
|
" except KeyError:\n",
|
||||||
|
" print(\"Warning: model not found. Using o200k_base encoding.\")\n",
|
||||||
|
" encoding = tiktoken.get_encoding(\"o200k_base\")\n",
|
||||||
|
" \n",
|
||||||
|
" func_token_count = 0\n",
|
||||||
|
" if len(functions) > 0:\n",
|
||||||
|
" for f in functions:\n",
|
||||||
|
" func_token_count += func_init # Add tokens for start of each function\n",
|
||||||
|
" function = f[\"function\"]\n",
|
||||||
|
" f_name = function[\"name\"]\n",
|
||||||
|
" f_desc = function[\"description\"]\n",
|
||||||
|
" if f_desc.endswith(\".\"):\n",
|
||||||
|
" f_desc = f_desc[:-1]\n",
|
||||||
|
" line = f_name + \":\" + f_desc\n",
|
||||||
|
" func_token_count += len(encoding.encode(line)) # Add tokens for set name and description\n",
|
||||||
|
" if len(function[\"parameters\"][\"properties\"]) > 0:\n",
|
||||||
|
" func_token_count += prop_init # Add tokens for start of each property\n",
|
||||||
|
" for key in list(function[\"parameters\"][\"properties\"].keys()):\n",
|
||||||
|
" func_token_count += prop_key # Add tokens for each set property\n",
|
||||||
|
" p_name = key\n",
|
||||||
|
" p_type = function[\"parameters\"][\"properties\"][key][\"type\"]\n",
|
||||||
|
" p_desc = function[\"parameters\"][\"properties\"][key][\"description\"]\n",
|
||||||
|
" if \"enum\" in function[\"parameters\"][\"properties\"][key].keys():\n",
|
||||||
|
" func_token_count += enum_init # Add tokens if property has enum list\n",
|
||||||
|
" for item in function[\"parameters\"][\"properties\"][key][\"enum\"]:\n",
|
||||||
|
" func_token_count += enum_item\n",
|
||||||
|
" func_token_count += len(encoding.encode(item))\n",
|
||||||
|
" if p_desc.endswith(\".\"):\n",
|
||||||
|
" p_desc = p_desc[:-1]\n",
|
||||||
|
" line = f\"{p_name}:{p_type}:{p_desc}\"\n",
|
||||||
|
" func_token_count += len(encoding.encode(line))\n",
|
||||||
|
" func_token_count += func_end\n",
|
||||||
|
" \n",
|
||||||
|
" messages_token_count = num_tokens_from_messages(messages, model)\n",
|
||||||
|
" total_tokens = messages_token_count + func_token_count\n",
|
||||||
|
" \n",
|
||||||
|
" return total_tokens"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"gpt-3.5-turbo\n",
|
||||||
|
"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.\n",
|
||||||
|
"105 prompt tokens counted by num_tokens_for_tools().\n",
|
||||||
|
"105 prompt tokens counted by the OpenAI API.\n",
|
||||||
|
"\n",
|
||||||
|
"gpt-4\n",
|
||||||
|
"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\n",
|
||||||
|
"105 prompt tokens counted by num_tokens_for_tools().\n",
|
||||||
|
"105 prompt tokens counted by the OpenAI API.\n",
|
||||||
|
"\n",
|
||||||
|
"gpt-4o\n",
|
||||||
|
"Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.\n",
|
||||||
|
"101 prompt tokens counted by num_tokens_for_tools().\n",
|
||||||
|
"101 prompt tokens counted by the OpenAI API.\n",
|
||||||
|
"\n",
|
||||||
|
"gpt-4o-mini\n",
|
||||||
|
"Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.\n",
|
||||||
|
"101 prompt tokens counted by num_tokens_for_tools().\n",
|
||||||
|
"101 prompt tokens counted by the OpenAI API.\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"tools = [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"function\",\n",
|
||||||
|
" \"function\": {\n",
|
||||||
|
" \"name\": \"get_current_weather\",\n",
|
||||||
|
" \"description\": \"Get the current weather in a given location\",\n",
|
||||||
|
" \"parameters\": {\n",
|
||||||
|
" \"type\": \"object\",\n",
|
||||||
|
" \"properties\": {\n",
|
||||||
|
" \"location\": {\n",
|
||||||
|
" \"type\": \"string\",\n",
|
||||||
|
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
|
||||||
|
" },\n",
|
||||||
|
" \"unit\": {\"type\": \"string\", \n",
|
||||||
|
" \"description\": \"The unit of temperature to return\",\n",
|
||||||
|
" \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
|
||||||
|
" },\n",
|
||||||
|
" \"required\": [\"location\"],\n",
|
||||||
|
" },\n",
|
||||||
|
" }\n",
|
||||||
|
" }\n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"example_messages = [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"system\",\n",
|
||||||
|
" \"content\": \"You are a helpful assistant that can answer to questions about the weather.\",\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": \"What's the weather like in San Francisco?\",\n",
|
||||||
|
" },\n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"for model in [\n",
|
||||||
|
" \"gpt-3.5-turbo\",\n",
|
||||||
|
" \"gpt-4\",\n",
|
||||||
|
" \"gpt-4o\",\n",
|
||||||
|
" \"gpt-4o-mini\"\n",
|
||||||
|
" ]:\n",
|
||||||
|
" print(model)\n",
|
||||||
|
" # example token count from the function defined above\n",
|
||||||
|
" print(f\"{num_tokens_for_tools(tools, example_messages, model)} prompt tokens counted by num_tokens_for_tools().\")\n",
|
||||||
|
" # example token count from the OpenAI API\n",
|
||||||
|
" response = client.chat.completions.create(model=model,\n",
|
||||||
|
" messages=example_messages,\n",
|
||||||
|
" tools=tools,\n",
|
||||||
|
" temperature=0)\n",
|
||||||
|
" print(f'{response.usage.prompt_tokens} prompt tokens counted by the OpenAI API.')\n",
|
||||||
|
" print()"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -599,7 +801,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.11.5"
|
"version": "3.11.7"
|
||||||
},
|
},
|
||||||
"vscode": {
|
"vscode": {
|
||||||
"interpreter": {
|
"interpreter": {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user