mirror of
https://github.com/james-m-jordan/openai-cookbook.git
synced 2025-05-09 19:32:38 +00:00
[tiktoken_counting] fix tokenizer name (#741)
This commit is contained in:
parent
4631e1b74a
commit
c2959fd60b
@ -58,6 +58,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -68,14 +69,50 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: tiktoken in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (0.5.1)\n",
|
||||
"Requirement already satisfied: requests>=2.26.0 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from tiktoken) (2.31.0)\n",
|
||||
"Requirement already satisfied: regex>=2022.1.18 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from tiktoken) (2023.8.8)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (2.0.5)\n",
|
||||
"\u001b[33mWARNING: You are using pip version 22.0.4; however, version 23.2.1 is available.\n",
|
||||
"You should consider upgrading via the '/Users/simon/.virtualenvs/openai/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n",
|
||||
"\u001b[0mNote: you may need to restart the kernel to use updated packages.\n",
|
||||
"Requirement already satisfied: openai in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (0.28.1)\n",
|
||||
"Requirement already satisfied: aiohttp in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from openai) (3.8.5)\n",
|
||||
"Requirement already satisfied: requests>=2.20 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from openai) (2.31.0)\n",
|
||||
"Requirement already satisfied: tqdm in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from openai) (4.66.1)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.20->openai) (3.4)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.20->openai) (2023.7.22)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.20->openai) (3.2.0)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from requests>=2.20->openai) (2.0.5)\n",
|
||||
"Requirement already satisfied: frozenlist>=1.1.1 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from aiohttp->openai) (1.4.0)\n",
|
||||
"Requirement already satisfied: aiosignal>=1.1.2 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from aiohttp->openai) (1.3.1)\n",
|
||||
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from aiohttp->openai) (4.0.3)\n",
|
||||
"Requirement already satisfied: yarl<2.0,>=1.0 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from aiohttp->openai) (1.9.2)\n",
|
||||
"Requirement already satisfied: multidict<7.0,>=4.5 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from aiohttp->openai) (6.0.4)\n",
|
||||
"Requirement already satisfied: attrs>=17.3.0 in /Users/simon/.virtualenvs/openai/lib/python3.9/site-packages (from aiohttp->openai) (23.1.0)\n",
|
||||
"\u001b[33mWARNING: You are using pip version 22.0.4; however, version 23.2.1 is available.\n",
|
||||
"You should consider upgrading via the '/Users/simon/.virtualenvs/openai/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n",
|
||||
"\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade tiktoken"
|
||||
"%pip install --upgrade tiktoken\n",
|
||||
"%pip install --upgrade openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -88,10 +125,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tiktoken\n"
|
||||
"import tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -112,6 +150,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -128,6 +167,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -136,6 +176,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -163,6 +204,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -203,6 +245,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -210,6 +253,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -237,6 +281,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -244,6 +289,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -271,6 +317,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -278,6 +325,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -297,7 +345,7 @@
|
||||
" # print the example string\n",
|
||||
" print(f'\\nExample string: \"{example_string}\"')\n",
|
||||
" # for each encoding, print the # of tokens, the token integers, and the token bytes\n",
|
||||
" for encoding_name in [\"gpt2\", \"p50k_base\", \"cl100k_base\"]:\n",
|
||||
" for encoding_name in [\"r50k_base\", \"p50k_base\", \"cl100k_base\"]:\n",
|
||||
" encoding = tiktoken.get_encoding(encoding_name)\n",
|
||||
" token_integers = encoding.encode(example_string)\n",
|
||||
" num_tokens = len(token_integers)\n",
|
||||
@ -321,7 +369,7 @@
|
||||
"\n",
|
||||
"Example string: \"antidisestablishmentarianism\"\n",
|
||||
"\n",
|
||||
"gpt2: 5 tokens\n",
|
||||
"r50k_base: 5 tokens\n",
|
||||
"token integers: [415, 29207, 44390, 3699, 1042]\n",
|
||||
"token bytes: [b'ant', b'idis', b'establishment', b'arian', b'ism']\n",
|
||||
"\n",
|
||||
@ -351,7 +399,7 @@
|
||||
"\n",
|
||||
"Example string: \"2 + 2 = 4\"\n",
|
||||
"\n",
|
||||
"gpt2: 5 tokens\n",
|
||||
"r50k_base: 5 tokens\n",
|
||||
"token integers: [17, 1343, 362, 796, 604]\n",
|
||||
"token bytes: [b'2', b' +', b' 2', b' =', b' 4']\n",
|
||||
"\n",
|
||||
@ -381,7 +429,7 @@
|
||||
"\n",
|
||||
"Example string: \"お誕生日おめでとう\"\n",
|
||||
"\n",
|
||||
"gpt2: 14 tokens\n",
|
||||
"r50k_base: 14 tokens\n",
|
||||
"token integers: [2515, 232, 45739, 243, 37955, 33768, 98, 2515, 232, 1792, 223, 30640, 30201, 29557]\n",
|
||||
"token bytes: [b'\\xe3\\x81', b'\\x8a', b'\\xe8\\xaa', b'\\x95', b'\\xe7\\x94\\x9f', b'\\xe6\\x97', b'\\xa5', b'\\xe3\\x81', b'\\x8a', b'\\xe3\\x82', b'\\x81', b'\\xe3\\x81\\xa7', b'\\xe3\\x81\\xa8', b'\\xe3\\x81\\x86']\n",
|
||||
"\n",
|
||||
@ -482,7 +530,7 @@
|
||||
"gpt-3.5-turbo\n",
|
||||
"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.\n",
|
||||
"129 prompt tokens counted by num_tokens_from_messages().\n",
|
||||
"127 prompt tokens counted by the OpenAI API.\n",
|
||||
"129 prompt tokens counted by the OpenAI API.\n",
|
||||
"\n",
|
||||
"gpt-4-0314\n",
|
||||
"129 prompt tokens counted by num_tokens_from_messages().\n",
|
||||
@ -575,7 +623,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
"version": "3.9.13"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
Loading…
x
Reference in New Issue
Block a user