From 1945bfe65c9a4acd92bf943f5b5fde2b3258fb5d Mon Sep 17 00:00:00 2001 From: recordcrash Date: Tue, 29 Aug 2023 03:12:30 +0200 Subject: [PATCH] Fix UTF-8 encoding in Chat_finetuning_data_prep.ipynb (#648) --- examples/Chat_finetuning_data_prep.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/Chat_finetuning_data_prep.ipynb b/examples/Chat_finetuning_data_prep.ipynb index 57ac317..d666aed 100644 --- a/examples/Chat_finetuning_data_prep.ipynb +++ b/examples/Chat_finetuning_data_prep.ipynb @@ -8,7 +8,6 @@ "outputs": [], "source": [ "import json\n", - "import os\n", "import tiktoken\n", "import numpy as np\n", "from collections import defaultdict" @@ -32,7 +31,7 @@ "outputs": [], "source": [ "# Load dataset\n", - "with open(data_path) as f:\n", + "with open(data_path, 'r', encoding='utf-8') as f:\n", " dataset = [json.loads(line) for line in f]" ] },