update for 4o-mini (#1421)

This commit is contained in:
Teo Musatoiu 2024-10-08 10:22:11 +01:00 committed by GitHub
parent 8127d0c929
commit db3144982a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -22,20 +22,21 @@
"\n",
"| Encoding name | OpenAI models |\n",
"|-------------------------|-----------------------------------------------------|\n",
"| `cl100k_base` | `gpt-4`, `gpt-3.5-turbo`, `text-embedding-ada-002`, `text-embedding-3-small`, `text-embedding-3-large` |\n",
"| `o200k_base` | `gpt-4o`, `gpt-4o-mini` |\n",
"| `cl100k_base` | `gpt-4-turbo`, `gpt-4`, `gpt-3.5-turbo`, `text-embedding-ada-002`, `text-embedding-3-small`, `text-embedding-3-large` |\n",
"| `p50k_base` | Codex models, `text-davinci-002`, `text-davinci-003`|\n",
"| `r50k_base` (or `gpt2`) | GPT-3 models like `davinci` |\n",
"\n",
"You can retrieve the encoding for a model using `tiktoken.encoding_for_model()` as follows:\n",
"```python\n",
"encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')\n",
"encoding = tiktoken.encoding_for_model('gpt-4o-mini')\n",
"```\n",
"\n",
"Note that `p50k_base` overlaps substantially with `r50k_base`, and for non-code applications, they will usually give the same tokens.\n",
"\n",
"## Tokenizer libraries by language\n",
"\n",
"For `cl100k_base` and `p50k_base` encodings:\n",
"For `o200k_base`, `cl100k_base` and `p50k_base` encodings:\n",
"- Python: [tiktoken](https://github.com/openai/tiktoken/blob/main/README.md)\n",
"- .NET / C#: [SharpToken](https://github.com/dmitry-brazhenko/SharpToken), [TiktokenSharp](https://github.com/aiqinxuancai/TiktokenSharp)\n",
"- Java: [jtokkit](https://github.com/knuddelsgmbh/jtokkit)\n",
@ -71,12 +72,27 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade tiktoken\n",
"%pip install --upgrade openai"
"%pip install --upgrade tiktoken -q\n",
"%pip install --upgrade openai -q"
]
},
{
@ -89,7 +105,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -131,7 +147,7 @@
"metadata": {},
"outputs": [],
"source": [
"encoding = tiktoken.encoding_for_model(\"gpt-3.5-turbo\")"
"encoding = tiktoken.encoding_for_model(\"gpt-4o-mini\")"
]
},
{
@ -159,7 +175,7 @@
{
"data": {
"text/plain": [
"[83, 1609, 5963, 374, 2294, 0]"
"[83, 8251, 2488, 382, 2212, 0]"
]
},
"execution_count": 5,
@ -168,7 +184,7 @@
}
],
"source": [
"encoding.encode(\"tiktoken is great!\")\n"
"encoding.encode(\"tiktoken is great!\")"
]
},
{
@ -189,7 +205,7 @@
" \"\"\"Returns the number of tokens in a text string.\"\"\"\n",
" encoding = tiktoken.get_encoding(encoding_name)\n",
" num_tokens = len(encoding.encode(string))\n",
" return num_tokens\n"
" return num_tokens"
]
},
{
@ -209,7 +225,7 @@
}
],
"source": [
"num_tokens_from_string(\"tiktoken is great!\", \"cl100k_base\")\n"
"num_tokens_from_string(\"tiktoken is great!\", \"o200k_base\")"
]
},
{
@ -245,7 +261,7 @@
}
],
"source": [
"encoding.decode([83, 1609, 5963, 374, 2294, 0])\n"
"encoding.decode([83, 8251, 2488, 382, 2212, 0])"
]
},
{
@ -272,7 +288,7 @@
{
"data": {
"text/plain": [
"[b't', b'ik', b'token', b' is', b' great', b'!']"
"[b't', b'ikt', b'oken', b' is', b' great', b'!']"
]
},
"execution_count": 9,
@ -281,7 +297,7 @@
}
],
"source": [
"[encoding.decode_single_token_bytes(token) for token in [83, 1609, 5963, 374, 2294, 0]]\n"
"[encoding.decode_single_token_bytes(token) for token in [83, 8251, 2488, 382, 2212, 0]]\n"
]
},
{
@ -313,7 +329,7 @@
" # print the example string\n",
" print(f'\\nExample string: \"{example_string}\"')\n",
" # for each encoding, print the # of tokens, the token integers, and the token bytes\n",
" for encoding_name in [\"r50k_base\", \"p50k_base\", \"cl100k_base\"]:\n",
" for encoding_name in [\"r50k_base\", \"p50k_base\", \"cl100k_base\", \"o200k_base\"]:\n",
" encoding = tiktoken.get_encoding(encoding_name)\n",
" token_integers = encoding.encode(example_string)\n",
" num_tokens = len(token_integers)\n",
@ -321,8 +337,7 @@
" print()\n",
" print(f\"{encoding_name}: {num_tokens} tokens\")\n",
" print(f\"token integers: {token_integers}\")\n",
" print(f\"token bytes: {token_bytes}\")\n",
" "
" print(f\"token bytes: {token_bytes}\")"
]
},
{
@ -347,12 +362,16 @@
"\n",
"cl100k_base: 6 tokens\n",
"token integers: [519, 85342, 34500, 479, 8997, 2191]\n",
"token bytes: [b'ant', b'idis', b'establish', b'ment', b'arian', b'ism']\n"
"token bytes: [b'ant', b'idis', b'establish', b'ment', b'arian', b'ism']\n",
"\n",
"o200k_base: 6 tokens\n",
"token integers: [493, 129901, 376, 160388, 21203, 2367]\n",
"token bytes: [b'ant', b'idis', b'est', b'ablishment', b'arian', b'ism']\n"
]
}
],
"source": [
"compare_encodings(\"antidisestablishmentarianism\")\n"
"compare_encodings(\"antidisestablishmentarianism\")"
]
},
{
@ -377,12 +396,16 @@
"\n",
"cl100k_base: 7 tokens\n",
"token integers: [17, 489, 220, 17, 284, 220, 19]\n",
"token bytes: [b'2', b' +', b' ', b'2', b' =', b' ', b'4']\n",
"\n",
"o200k_base: 7 tokens\n",
"token integers: [17, 659, 220, 17, 314, 220, 19]\n",
"token bytes: [b'2', b' +', b' ', b'2', b' =', b' ', b'4']\n"
]
}
],
"source": [
"compare_encodings(\"2 + 2 = 4\")\n"
"compare_encodings(\"2 + 2 = 4\")"
]
},
{
@ -407,12 +430,16 @@
"\n",
"cl100k_base: 9 tokens\n",
"token integers: [33334, 45918, 243, 21990, 9080, 33334, 62004, 16556, 78699]\n",
"token bytes: [b'\\xe3\\x81\\x8a', b'\\xe8\\xaa', b'\\x95', b'\\xe7\\x94\\x9f', b'\\xe6\\x97\\xa5', b'\\xe3\\x81\\x8a', b'\\xe3\\x82\\x81', b'\\xe3\\x81\\xa7', b'\\xe3\\x81\\xa8\\xe3\\x81\\x86']\n"
"token bytes: [b'\\xe3\\x81\\x8a', b'\\xe8\\xaa', b'\\x95', b'\\xe7\\x94\\x9f', b'\\xe6\\x97\\xa5', b'\\xe3\\x81\\x8a', b'\\xe3\\x82\\x81', b'\\xe3\\x81\\xa7', b'\\xe3\\x81\\xa8\\xe3\\x81\\x86']\n",
"\n",
"o200k_base: 8 tokens\n",
"token integers: [8930, 9697, 243, 128225, 8930, 17693, 4344, 48669]\n",
"token bytes: [b'\\xe3\\x81\\x8a', b'\\xe8\\xaa', b'\\x95', b'\\xe7\\x94\\x9f\\xe6\\x97\\xa5', b'\\xe3\\x81\\x8a', b'\\xe3\\x82\\x81', b'\\xe3\\x81\\xa7', b'\\xe3\\x81\\xa8\\xe3\\x81\\x86']\n"
]
}
],
"source": [
"compare_encodings(\"お誕生日おめでとう\")\n"
"compare_encodings(\"お誕生日おめでとう\")"
]
},
{
@ -422,9 +449,9 @@
"source": [
"## 6. Counting tokens for chat completions API calls\n",
"\n",
"ChatGPT models like `gpt-3.5-turbo` and `gpt-4` use tokens in the same way as older completions models, but because of their message-based formatting, it's more difficult to count how many tokens will be used by a conversation.\n",
"ChatGPT models like `gpt-4o-mini` and `gpt-4` use tokens in the same way as older completions models, but because of their message-based formatting, it's more difficult to count how many tokens will be used by a conversation.\n",
"\n",
"Below is an example function for counting tokens for messages passed to `gpt-3.5-turbo` or `gpt-4`.\n",
"Below is an example function for counting tokens for messages passed to `gpt-3.5-turbo`, `gpt-4`, `gpt-4o` and `gpt-4o-mini`.\n",
"\n",
"Note that the exact way that tokens are counted from messages may change from model to model. Consider the counts from the function below an estimate, not a timeless guarantee.\n",
"\n",
@ -433,33 +460,37 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0613\"):\n",
"def num_tokens_from_messages(messages, model=\"gpt-4o-mini-2024-07-18\"):\n",
" \"\"\"Return the number of tokens used by a list of messages.\"\"\"\n",
" try:\n",
" encoding = tiktoken.encoding_for_model(model)\n",
" except KeyError:\n",
" print(\"Warning: model not found. Using cl100k_base encoding.\")\n",
" encoding = tiktoken.get_encoding(\"cl100k_base\")\n",
" print(\"Warning: model not found. Using o200k_base encoding.\")\n",
" encoding = tiktoken.get_encoding(\"o200k_base\")\n",
" if model in {\n",
" \"gpt-3.5-turbo-0613\",\n",
" \"gpt-3.5-turbo-16k-0613\",\n",
" \"gpt-3.5-turbo-0125\",\n",
" \"gpt-4-0314\",\n",
" \"gpt-4-32k-0314\",\n",
" \"gpt-4-0613\",\n",
" \"gpt-4-32k-0613\",\n",
" \"gpt-4o-mini-2024-07-18\",\n",
" \"gpt-4o-2024-08-06\"\n",
" }:\n",
" tokens_per_message = 3\n",
" tokens_per_name = 1\n",
" elif model == \"gpt-3.5-turbo-0301\":\n",
" tokens_per_message = 4 # every message follows <|start|>{role/name}\\n{content}<|end|>\\n\n",
" tokens_per_name = -1 # if there's a name, the role is omitted\n",
" elif \"gpt-3.5-turbo\" in model:\n",
" print(\"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\")\n",
" return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0613\")\n",
" print(\"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.\")\n",
" return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0125\")\n",
" elif \"gpt-4o-mini\" in model:\n",
" print(\"Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.\")\n",
" return num_tokens_from_messages(messages, model=\"gpt-4o-mini-2024-07-18\")\n",
" elif \"gpt-4o\" in model:\n",
" print(\"Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.\")\n",
" return num_tokens_from_messages(messages, model=\"gpt-4o-2024-08-06\")\n",
" elif \"gpt-4\" in model:\n",
" print(\"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\")\n",
" return num_tokens_from_messages(messages, model=\"gpt-4-0613\")\n",
@ -480,27 +511,15 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"gpt-3.5-turbo-0301\n",
"127 prompt tokens counted by num_tokens_from_messages().\n",
"127 prompt tokens counted by the OpenAI API.\n",
"\n",
"gpt-3.5-turbo-0613\n",
"129 prompt tokens counted by num_tokens_from_messages().\n",
"129 prompt tokens counted by the OpenAI API.\n",
"\n",
"gpt-3.5-turbo\n",
"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\n",
"129 prompt tokens counted by num_tokens_from_messages().\n",
"129 prompt tokens counted by the OpenAI API.\n",
"\n",
"gpt-4-0314\n",
"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.\n",
"129 prompt tokens counted by num_tokens_from_messages().\n",
"129 prompt tokens counted by the OpenAI API.\n",
"\n",
@ -512,6 +531,16 @@
"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\n",
"129 prompt tokens counted by num_tokens_from_messages().\n",
"129 prompt tokens counted by the OpenAI API.\n",
"\n",
"gpt-4o\n",
"Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.\n",
"124 prompt tokens counted by num_tokens_from_messages().\n",
"124 prompt tokens counted by the OpenAI API.\n",
"\n",
"gpt-4o-mini\n",
"Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.\n",
"124 prompt tokens counted by num_tokens_from_messages().\n",
"124 prompt tokens counted by the OpenAI API.\n",
"\n"
]
}
@ -556,12 +585,11 @@
"]\n",
"\n",
"for model in [\n",
" \"gpt-3.5-turbo-0301\",\n",
" \"gpt-3.5-turbo-0613\",\n",
" \"gpt-3.5-turbo\",\n",
" \"gpt-4-0314\",\n",
" \"gpt-4-0613\",\n",
" \"gpt-4\",\n",
" \"gpt-4o\",\n",
" \"gpt-4o-mini\"\n",
" ]:\n",
" print(model)\n",
" # example token count from the function defined above\n",
@ -575,12 +603,186 @@
" print()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 7. Counting tokens for chat completions with tool calls\n",
"\n",
"Next, we will look into how to apply this calculations to messages that may contain function calls. This is not immediately trivial, due to the formatting of the tools themselves. \n",
"\n",
"Below is an example function for counting tokens for messages that contain tools, passed to `gpt-3.5-turbo`, `gpt-4`, `gpt-4o` and `gpt-4o-mini`."
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": []
"source": [
"def num_tokens_for_tools(functions, messages, model):\n",
" \n",
" # Initialize function settings to 0\n",
" func_init = 0\n",
" prop_init = 0\n",
" prop_key = 0\n",
" enum_init = 0\n",
" enum_item = 0\n",
" func_end = 0\n",
" \n",
" if model in [\n",
" \"gpt-4o\",\n",
" \"gpt-4o-mini\"\n",
" ]:\n",
" \n",
" # Set function settings for the above models\n",
" func_init = 7\n",
" prop_init = 3\n",
" prop_key = 3\n",
" enum_init = -3\n",
" enum_item = 3\n",
" func_end = 12\n",
" elif model in [\n",
" \"gpt-3.5-turbo\",\n",
" \"gpt-4\"\n",
" ]:\n",
" # Set function settings for the above models\n",
" func_init = 10\n",
" prop_init = 3\n",
" prop_key = 3\n",
" enum_init = -3\n",
" enum_item = 3\n",
" func_end = 12\n",
" else:\n",
" raise NotImplementedError(\n",
" f\"\"\"num_tokens_for_tools() is not implemented for model {model}.\"\"\"\n",
" )\n",
" \n",
" try:\n",
" encoding = tiktoken.encoding_for_model(model)\n",
" except KeyError:\n",
" print(\"Warning: model not found. Using o200k_base encoding.\")\n",
" encoding = tiktoken.get_encoding(\"o200k_base\")\n",
" \n",
" func_token_count = 0\n",
" if len(functions) > 0:\n",
" for f in functions:\n",
" func_token_count += func_init # Add tokens for start of each function\n",
" function = f[\"function\"]\n",
" f_name = function[\"name\"]\n",
" f_desc = function[\"description\"]\n",
" if f_desc.endswith(\".\"):\n",
" f_desc = f_desc[:-1]\n",
" line = f_name + \":\" + f_desc\n",
" func_token_count += len(encoding.encode(line)) # Add tokens for set name and description\n",
" if len(function[\"parameters\"][\"properties\"]) > 0:\n",
" func_token_count += prop_init # Add tokens for start of each property\n",
" for key in list(function[\"parameters\"][\"properties\"].keys()):\n",
" func_token_count += prop_key # Add tokens for each set property\n",
" p_name = key\n",
" p_type = function[\"parameters\"][\"properties\"][key][\"type\"]\n",
" p_desc = function[\"parameters\"][\"properties\"][key][\"description\"]\n",
" if \"enum\" in function[\"parameters\"][\"properties\"][key].keys():\n",
" func_token_count += enum_init # Add tokens if property has enum list\n",
" for item in function[\"parameters\"][\"properties\"][key][\"enum\"]:\n",
" func_token_count += enum_item\n",
" func_token_count += len(encoding.encode(item))\n",
" if p_desc.endswith(\".\"):\n",
" p_desc = p_desc[:-1]\n",
" line = f\"{p_name}:{p_type}:{p_desc}\"\n",
" func_token_count += len(encoding.encode(line))\n",
" func_token_count += func_end\n",
" \n",
" messages_token_count = num_tokens_from_messages(messages, model)\n",
" total_tokens = messages_token_count + func_token_count\n",
" \n",
" return total_tokens"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"gpt-3.5-turbo\n",
"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.\n",
"105 prompt tokens counted by num_tokens_for_tools().\n",
"105 prompt tokens counted by the OpenAI API.\n",
"\n",
"gpt-4\n",
"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\n",
"105 prompt tokens counted by num_tokens_for_tools().\n",
"105 prompt tokens counted by the OpenAI API.\n",
"\n",
"gpt-4o\n",
"Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.\n",
"101 prompt tokens counted by num_tokens_for_tools().\n",
"101 prompt tokens counted by the OpenAI API.\n",
"\n",
"gpt-4o-mini\n",
"Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.\n",
"101 prompt tokens counted by num_tokens_for_tools().\n",
"101 prompt tokens counted by the OpenAI API.\n",
"\n"
]
}
],
"source": [
"tools = [\n",
" {\n",
" \"type\": \"function\",\n",
" \"function\": {\n",
" \"name\": \"get_current_weather\",\n",
" \"description\": \"Get the current weather in a given location\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
" },\n",
" \"unit\": {\"type\": \"string\", \n",
" \"description\": \"The unit of temperature to return\",\n",
" \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
" },\n",
" \"required\": [\"location\"],\n",
" },\n",
" }\n",
" }\n",
"]\n",
"\n",
"example_messages = [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"You are a helpful assistant that can answer to questions about the weather.\",\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"What's the weather like in San Francisco?\",\n",
" },\n",
"]\n",
"\n",
"for model in [\n",
" \"gpt-3.5-turbo\",\n",
" \"gpt-4\",\n",
" \"gpt-4o\",\n",
" \"gpt-4o-mini\"\n",
" ]:\n",
" print(model)\n",
" # example token count from the function defined above\n",
" print(f\"{num_tokens_for_tools(tools, example_messages, model)} prompt tokens counted by num_tokens_for_tools().\")\n",
" # example token count from the OpenAI API\n",
" response = client.chat.completions.create(model=model,\n",
" messages=example_messages,\n",
" tools=tools,\n",
" temperature=0)\n",
" print(f'{response.usage.prompt_tokens} prompt tokens counted by the OpenAI API.')\n",
" print()"
]
}
],
"metadata": {
@ -599,7 +801,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.11.7"
},
"vscode": {
"interpreter": {