Fix UTF-8 encoding in Chat_finetuning_data_prep.ipynb (#648)

This commit is contained in:
recordcrash 2023-08-29 03:12:30 +02:00 committed by GitHub
parent 63f95154b1
commit 1945bfe65c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -8,7 +8,6 @@
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import tiktoken\n",
"import numpy as np\n",
"from collections import defaultdict"
@ -32,7 +31,7 @@
"outputs": [],
"source": [
"# Load dataset\n",
"with open(data_path) as f:\n",
"with open(data_path, 'r', encoding='utf-8') as f:\n",
" dataset = [json.loads(line) for line in f]"
]
},