small improvements to the fine-tuning cookbook (#651)

2025-05-09 19:32:38 +00:00 · 2023-08-22 15:30:03 -07:00 · 2023-08-22 15:30:03 -07:00 · 8ed84645e8
commit 8ed84645e8
parent 64e9471909
2 changed files with 62 additions and 113 deletions
--- a/.gitignore
+++ b/.gitignore
@ -132,3 +132,4 @@ dmypy.json
 *transactions*.jsonl
 /examples/data/transactions*
 *.DS_Store
+tmp_*
--- a/examples/How_to_finetune_chat_models.ipynb
+++ b/examples/How_to_finetune_chat_models.ipynb
@ -44,7 +44,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 1,
   "id": "6e1f4403-37e1-4115-a215-12fd7daa1eb6",
   "metadata": {},
   "outputs": [],
@ -53,7 +53,6 @@
    "import openai\n",
    "import os\n",
    "import pandas as pd\n",
-    "import requests\n",
    "from pprint import pprint\n",
    "\n",
    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\", \"\")"
@ -216,7 +215,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 3,
   "id": "9a8216b0-d1dc-472d-b07d-1be03acd70a5",
   "metadata": {},
   "outputs": [],
@ -245,7 +244,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 4,
   "id": "5b853efa-dfea-4770-ab88-9b7e17794421",
   "metadata": {},
   "outputs": [],
@ -277,7 +276,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 5,
   "id": "8d2eb207-2c2b-43f6-a613-64a7e92d494d",
   "metadata": {},
   "outputs": [],
@ -304,7 +303,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 6,
   "id": "8b53e7a2-1cac-4c5f-8ba4-3292ba2a0770",
   "metadata": {},
   "outputs": [],
@ -330,7 +329,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 7,
   "id": "69462d9e-e6bd-49b9-a064-9eae4ea5b7a8",
   "metadata": {},
   "outputs": [
@ -338,8 +337,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Training file id: file-XMhftmLXyyTvvmiLpMRMIAcL\n",
-      "Validation file id: file-kZz433aerpPIMADZF7xC5NKd\n"
+      "Training file ID: file-XEpJAyCL2qMbMEgXwYfS9ypT\n",
+      "Validation file ID: file-MAGY5QyvCMdOWUDuXJB79uw9\n"
     ]
    }
   ],
@ -354,8 +353,8 @@
    ")\n",
    "validation_file_id = validation_response[\"id\"]\n",
    "\n",
-    "print(\"Training file id:\", training_file_id)\n",
-    "print(\"Validation file id:\", validation_file_id)"
+    "print(\"Training file ID:\", training_file_id)\n",
+    "print(\"Validation file ID:\", validation_file_id)"
   ]
  },
  {
@ -367,12 +366,12 @@
    "\n",
    "Now we can create our fine-tuning job with the generated files and an optional suffix to identify the model. The response will contain an `id` which you can use to retrieve updates on the job.\n",
    "\n",
-    "Note: The files have to first be processed by our system, so you might get a `File is not ready` error. In that case, simply retry a few minutes later.\n"
+    "Note: The files have to first be processed by our system, so you might get a `File not ready` error. In that case, simply retry a few minutes later.\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 10,
   "id": "05541ceb-5628-447e-962d-7e57c112439c",
   "metadata": {},
   "outputs": [
@ -380,40 +379,23 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{\n",
-      "  \"object\": \"fine_tuning.job\",\n",
-      "  \"id\": \"ftjob-ksOzx7zjpsrADZfhB5eyfB0Z\",\n",
-      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
-      "  \"created_at\": 1692734343,\n",
-      "  \"finished_at\": null,\n",
-      "  \"fine_tuned_model\": null,\n",
-      "  \"organization_id\": \"org-l89177bnhkme4a44292n5r3j\",\n",
-      "  \"result_files\": [],\n",
-      "  \"status\": \"created\",\n",
-      "  \"validation_file\": \"file-kZz433aerpPIMADZF7xC5NKd\",\n",
-      "  \"training_file\": \"file-XMhftmLXyyTvvmiLpMRMIAcL\",\n",
-      "  \"hyperparameters\": {\n",
-      "    \"n_epochs\": 3\n",
-      "  },\n",
-      "  \"trained_tokens\": null\n",
-      "}\n"
+      "Job ID: ftjob-9xVzrp0Oem9rWj2UFWDcFLqT\n",
+      "Status: created\n"
     ]
    }
   ],
   "source": [
-    "suffix_name = \"recipe-ner\"\n",
-    "\n",
-    "\n",
    "response = openai.FineTuningJob.create(\n",
    "    training_file=training_file_id,\n",
    "    validation_file=validation_file_id,\n",
    "    model=\"gpt-3.5-turbo\",\n",
-    "    suffix=suffix_name,\n",
+    "    suffix=\"recipe-ner\",\n",
    ")\n",
    "\n",
    "job_id = response[\"id\"]\n",
    "\n",
-    "print(response)"
+    "print(\"Job ID:\", response[\"id\"])\n",
+    "print(\"Status:\", response[\"status\"])"
   ]
  },
  {
@ -430,7 +412,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 11,
   "id": "d7392f48",
   "metadata": {},
   "outputs": [
@ -438,31 +420,18 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{\n",
-      "  \"object\": \"fine_tuning.job\",\n",
-      "  \"id\": \"ftjob-ksOzx7zjpsrADZfhB5eyfB0Z\",\n",
-      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
-      "  \"created_at\": 1692734343,\n",
-      "  \"finished_at\": 1692735182,\n",
-      "  \"fine_tuned_model\": \"ft:gpt-3.5-turbo-0613:openai:recipe-ner:7qS2GFaX\",\n",
-      "  \"organization_id\": \"org-l89177bnhkme4a44292n5r3j\",\n",
-      "  \"result_files\": [\n",
-      "    \"file-Tjt0E6BvQ846m75gYxtWiRzb\"\n",
-      "  ],\n",
-      "  \"status\": \"succeeded\",\n",
-      "  \"validation_file\": \"file-kZz433aerpPIMADZF7xC5NKd\",\n",
-      "  \"training_file\": \"file-XMhftmLXyyTvvmiLpMRMIAcL\",\n",
-      "  \"hyperparameters\": {\n",
-      "    \"n_epochs\": 3\n",
-      "  },\n",
-      "  \"trained_tokens\": 39687\n",
-      "}\n"
+      "Job ID: ftjob-9xVzrp0Oem9rWj2UFWDcFLqT\n",
+      "Status: running\n",
+      "Trained Tokens: None\n"
     ]
    }
   ],
   "source": [
    "response = openai.FineTuningJob.retrieve(job_id)\n",
-    "print(response)"
+    "\n",
+    "print(\"Job ID:\", response[\"id\"])\n",
+    "print(\"Status:\", response[\"status\"])\n",
+    "print(\"Trained Tokens:\", response[\"trained_tokens\"])\n"
   ]
  },
  {
@ -470,12 +439,12 @@
   "id": "30a57fbb",
   "metadata": {},
   "source": [
-    "We can track the progress of the fine-tune with the events endpoint. You can rerun the cell below a few times until the fine-tune is ready. \n"
+    "We can track the progress of the fine-tune with the events endpoint. You can rerun the cell below a few times until the fine-tune is ready.\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 12,
   "id": "08cace28",
   "metadata": {},
   "outputs": [
@ -483,39 +452,39 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Created fine-tune: ftjob-ksOzx7zjpsrADZfhB5eyfB0Z\n",
+      "Created fine-tune: ftjob-9xVzrp0Oem9rWj2UFWDcFLqT\n",
      "Fine tuning job started\n",
-      "Step 10: training loss=0.16\n",
-      "Step 20: training loss=0.14\n",
-      "Step 30: training loss=0.30\n",
-      "Step 40: training loss=0.17\n",
-      "Step 50: training loss=0.03\n",
-      "Step 60: training loss=0.55\n",
-      "Step 70: training loss=0.09\n",
-      "Step 80: training loss=0.00\n",
-      "Step 90: training loss=0.15\n",
-      "Step 100: training loss=0.06\n",
-      "Step 110: training loss=0.03\n",
-      "Step 120: training loss=0.04\n",
-      "Step 130: training loss=0.21\n",
+      "Step 10: training loss=2.41\n",
+      "Step 20: training loss=0.06\n",
+      "Step 30: training loss=0.38\n",
+      "Step 40: training loss=0.09\n",
+      "Step 50: training loss=0.19\n",
+      "Step 60: training loss=0.10\n",
+      "Step 70: training loss=0.00\n",
+      "Step 80: training loss=0.01\n",
+      "Step 90: training loss=0.72\n",
+      "Step 100: training loss=0.13\n",
+      "Step 110: training loss=0.15\n",
+      "Step 120: training loss=0.00\n",
+      "Step 130: training loss=0.47\n",
      "Step 140: training loss=0.00\n",
-      "Step 150: training loss=0.02\n",
-      "Step 160: training loss=0.00\n",
-      "Step 170: training loss=0.00\n",
-      "Step 180: training loss=0.19\n",
-      "Step 190: training loss=0.55\n",
+      "Step 150: training loss=0.10\n",
+      "Step 160: training loss=0.06\n",
+      "Step 170: training loss=0.03\n",
+      "Step 180: training loss=0.08\n",
+      "Step 190: training loss=0.04\n",
      "Step 200: training loss=0.01\n",
-      "Step 210: training loss=0.00\n",
+      "Step 210: training loss=0.03\n",
      "Step 220: training loss=0.00\n",
-      "Step 230: training loss=0.00\n",
+      "Step 230: training loss=0.08\n",
      "Step 240: training loss=0.00\n",
      "Step 250: training loss=0.00\n",
-      "Step 260: training loss=0.36\n",
-      "Step 270: training loss=0.16\n",
-      "Step 280: training loss=0.01\n",
-      "Step 290: training loss=0.04\n",
-      "Step 300: training loss=0.53\n",
-      "New fine-tuned model created: ft:gpt-3.5-turbo-0613:openai:recipe-ner:7qS2GFaX\n",
+      "Step 260: training loss=0.00\n",
+      "Step 270: training loss=0.00\n",
+      "Step 280: training loss=0.00\n",
+      "Step 290: training loss=0.00\n",
+      "Step 300: training loss=0.60\n",
+      "New fine-tuned model created: ft:gpt-3.5-turbo-0613:openai:recipe-ner:7qTvyJ81\n",
      "Fine-tuning job successfully completed\n"
     ]
    }
@ -535,12 +504,12 @@
   "id": "d0da4e32",
   "metadata": {},
   "source": [
-    "Now that it's done, we can get a fine-tuned model ID from the job\n"
+    "Now that it's done, we can get a fine-tuned model ID from the job:\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 14,
   "id": "40b28c26",
   "metadata": {},
   "outputs": [
@ -548,27 +517,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{\n",
-      "  \"object\": \"fine_tuning.job\",\n",
-      "  \"id\": \"ftjob-ksOzx7zjpsrADZfhB5eyfB0Z\",\n",
-      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
-      "  \"created_at\": 1692734343,\n",
-      "  \"finished_at\": 1692735182,\n",
-      "  \"fine_tuned_model\": \"ft:gpt-3.5-turbo-0613:openai:recipe-ner:7qS2GFaX\",\n",
-      "  \"organization_id\": \"org-l89177bnhkme4a44292n5r3j\",\n",
-      "  \"result_files\": [\n",
-      "    \"file-Tjt0E6BvQ846m75gYxtWiRzb\"\n",
-      "  ],\n",
-      "  \"status\": \"succeeded\",\n",
-      "  \"validation_file\": \"file-kZz433aerpPIMADZF7xC5NKd\",\n",
-      "  \"training_file\": \"file-XMhftmLXyyTvvmiLpMRMIAcL\",\n",
-      "  \"hyperparameters\": {\n",
-      "    \"n_epochs\": 3\n",
-      "  },\n",
-      "  \"trained_tokens\": 39687\n",
-      "}\n",
-      "\n",
-      "Fine-tuned model id: ft:gpt-3.5-turbo-0613:openai:recipe-ner:7qS2GFaX\n"
+      "Fine-tuned model ID: ft:gpt-3.5-turbo-0613:openai:recipe-ner:7qTvyJ81\n"
     ]
    }
   ],
@ -576,8 +525,7 @@
    "response = openai.FineTuningJob.retrieve(job_id)\n",
    "fine_tuned_model_id = response[\"fine_tuned_model\"]\n",
    "\n",
-    "print(response)\n",
-    "print(\"\\nFine-tuned model id:\", fine_tuned_model_id)"
+    "print(\"Fine-tuned model ID:\", fine_tuned_model_id)"
   ]
  },
  {
@ -593,12 +541,12 @@
   "id": "0ab9ac11",
   "metadata": {},
   "source": [
-    "The last step is to use your fine-tuned model for inference. Similar to the classic `FineTuning`, you simply call `ChatCompletions` with your new fine-tuned model name filling the `model` parameter."
+    "The last step is to use your fine-tuned model for inference. Similar to the classic `FineTuning`, you simply call `ChatCompletions` with your new fine-tuned model name filling the `model` parameter.\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 15,
   "id": "1c7de631-b68f-4eff-9ae7-051641579c2b",
   "metadata": {},
   "outputs": [
@ -632,7 +580,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 88,
+   "execution_count": 16,
   "id": "1a1d2589",
   "metadata": {},
   "outputs": [