Encode separator for completions endpoint

`SEPARATOR` is being used to construct a prompt for use with the completions endpoint
This commit is contained in:
Robin Richtsfeld 2023-01-29 02:13:55 +01:00 committed by GitHub
parent 420c818ba1
commit b34607ddb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -554,7 +554,7 @@
"source": [ "source": [
"MAX_SECTION_LEN = 500\n", "MAX_SECTION_LEN = 500\n",
"SEPARATOR = \"\\n* \"\n", "SEPARATOR = \"\\n* \"\n",
"ENCODING = \"cl100k_base\" # encoding for text-embedding-ada-002\n", "ENCODING = \"gpt2\" # encoding for text-davinci-003\n",
"\n", "\n",
"encoding = tiktoken.get_encoding(ENCODING)\n", "encoding = tiktoken.get_encoding(ENCODING)\n",
"separator_len = len(encoding.encode(SEPARATOR))\n", "separator_len = len(encoding.encode(SEPARATOR))\n",