From 5591ff376f46a2c45df5b22b0830034390f0dd04 Mon Sep 17 00:00:00 2001
From: Anish Shah <93145909+ash0ts@users.noreply.github.com>
Date: Mon, 2 Oct 2023 17:02:34 -0400
Subject: [PATCH] Add Weights and Biases OpenAI MLOps examples to
third_party_examples (#714)
---
.../GPT_finetuning_with_wandb.ipynb | 1802 +++++++++++++++++
.../Openai_monitoring_with_wandb_weave.ipynb | 360 ++++
2 files changed, 2162 insertions(+)
create mode 100644 examples/third_party_examples/GPT_finetuning_with_wandb.ipynb
create mode 100644 examples/third_party_examples/Openai_monitoring_with_wandb_weave.ipynb
diff --git a/examples/third_party_examples/GPT_finetuning_with_wandb.ipynb b/examples/third_party_examples/GPT_finetuning_with_wandb.ipynb
new file mode 100644
index 0000000..8f0e82a
--- /dev/null
+++ b/examples/third_party_examples/GPT_finetuning_with_wandb.ipynb
@@ -0,0 +1,1802 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "vi2LlIiMU2dp"
+ },
+ "source": [
+ "
\n",
+ "\n",
+ "\n",
+ "# Fine-tune ChatGPT-3.5 and GPT-4 with Weights & Biases\n",
+ "\n",
+ "
\n",
+ "\n",
+ "**Note:** you will need an [OpenAI API key](https://platform.openai.com/account/api-keys) to run this colab."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "OKB1NaA3U2dp"
+ },
+ "source": [
+ "If you use OpenAI's API to [fine-tune ChatGPT-3.5](https://platform.openai.com/docs/guides/fine-tuning), you can now use the W&B integration to track experiments, models, and datasets in your central dashboard.\n",
+ "\n",
+ "All it takes is one line: `openai wandb sync`\n",
+ "\n",
+ "See the [OpenAI section](https://wandb.me/openai-docs) in the Weights & Biases documentation for full details of the integration"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "AebZsbWrU2dp"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install -Uq openai tiktoken datasets tenacity wandb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "L_qbldX6U2dq"
+ },
+ "outputs": [],
+ "source": [
+ "# Remove once this PR is merged: https://github.com/openai/openai-python/pull/590 and openai release is made\n",
+ "!pip uninstall -y openai -qq \\\n",
+ "&& pip install git+https://github.com/morganmcg1/openai-python.git@update_wandb_logger -qqq"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "STVFg2SMU2dq"
+ },
+ "source": [
+ "## Optional: Fine-tune ChatGPT-3.5\n",
+ "\n",
+ "It's always more fun to experiment with your own projects so if you have already used the openai API to fine-tune an OpenAI model, just skip this section.\n",
+ "\n",
+ "Otherwise let's fine-tune ChatGPT-3.5 on a legal dataset!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0G1keRuTU2dq"
+ },
+ "source": [
+ "### Imports and initial set-up"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "TpRQxo1QU2dq"
+ },
+ "outputs": [],
+ "source": [
+ "import openai\n",
+ "import wandb\n",
+ "\n",
+ "import os\n",
+ "import json\n",
+ "import random\n",
+ "import tiktoken\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from pathlib import Path\n",
+ "from tqdm.auto import tqdm\n",
+ "from collections import defaultdict\n",
+ "from tenacity import retry, stop_after_attempt, wait_fixed"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "X7h1WmYRU2dr"
+ },
+ "source": [
+ "Start your Weigths & Biases run. If you don't have an account you can sign up for one for free at www.wandb.ai"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "p5fPfbPCU2dr"
+ },
+ "outputs": [],
+ "source": [
+ "WANDB_PROJECT = \"OpenAI-Fine-Tune\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "AKc9uwv_U2dr"
+ },
+ "source": [
+ "### Set up your API key"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "JDs2cWLfU2dr"
+ },
+ "outputs": [],
+ "source": [
+ "# # Enter credentials\n",
+ "openai_key = \"YOUR_API_KEY\"\n",
+ "\n",
+ "openai.api_key = openai_key"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YQhM025QU2dr"
+ },
+ "source": [
+ "### Dataset Preparation\n",
+ "\n",
+ "We download a dataset from [LegalBench](https://hazyresearch.stanford.edu/legalbench/), a project to curate tasks for evaluating legal reasoning, specifically the [Contract NLI Explicit Identification task](https://github.com/HazyResearch/legalbench/tree/main/tasks/contract_nli_explicit_identification).\n",
+ "\n",
+ "This comprises of a total of 117 examples, from which we will create our own train and test datasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "THyWwbMyU2ds"
+ },
+ "outputs": [],
+ "source": [
+ "from datasets import load_dataset\n",
+ "\n",
+ "# Download the data, merge into a single dataset and shuffle\n",
+ "dataset = load_dataset(\"nguha/legalbench\", \"contract_nli_explicit_identification\")\n",
+ "\n",
+ "data = []\n",
+ "for d in dataset[\"train\"]:\n",
+ " data.append(d)\n",
+ "\n",
+ "for d in dataset[\"test\"]:\n",
+ " data.append(d)\n",
+ "\n",
+ "random.shuffle(data)\n",
+ "\n",
+ "for idx, d in enumerate(data):\n",
+ " d[\"new_index\"] = idx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "JPDSyx9AU2ds"
+ },
+ "source": [
+ "Let's look at a few samples."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "l6NZj5SZU2ds",
+ "outputId": "527c1836-cb2b-474d-c0c6-d646e4fd087b"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(117,\n",
+ " [{'answer': 'No',\n",
+ " 'index': '94',\n",
+ " 'text': 'Recipient shall use the Confidential Information exclusively for HySafe purposes, especially to advice the Governing Board of HySafe. ',\n",
+ " 'document_name': 'NDA_V3.pdf',\n",
+ " 'new_index': 0},\n",
+ " {'answer': 'No',\n",
+ " 'index': '53',\n",
+ " 'text': '3. In consideration of each and every disclosure of CONFIDENTIAL INFORMATION, the Parties agree to: (c) make no disclosures of any CONFIDENTIAL INFORMATION to any party other than officers and employees of a Party to this IRA; (d) limit access to CONFIDENTIAL INFORMATION to those officers and employees having a reasonable need for such INFORMATION and being boUnd by a written obligation to maintain the confidentiality of such INFORMATION; and ',\n",
+ " 'document_name': '1084000_0001144204-06-046785_v056501_ex10-16.txt',\n",
+ " 'new_index': 1}])"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(data), data[0:2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "VSUkLGT0U2ds"
+ },
+ "source": [
+ "### Format our Data for Chat Completion Models\n",
+ "We modify the `base_prompt` from the LegalBench task to make it a zero-shot prompt, as we are training the model instead of using few-shot prompting"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "WPCxFBbEU2ds"
+ },
+ "outputs": [],
+ "source": [
+ "base_prompt_zero_shot = \"Identify if the clause provides that all Confidential Information shall be expressly identified by the Disclosing Party. Answer with only `Yes` or `No`\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "jJFhwJjXU2ds"
+ },
+ "source": [
+ "We now split it into training/validation dataset, lets train on 30 samples and test on the remainder\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "H-2oiwxIU2ds"
+ },
+ "outputs": [],
+ "source": [
+ "n_train = 30\n",
+ "n_test = len(data) - n_train"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "rYhlg7wJU2ds",
+ "outputId": "c0121d63-7c1e-48cd-82c7-b9efeb663df5"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(30,\n",
+ " 87,\n",
+ " 87,\n",
+ " {'messages': [{'role': 'system',\n",
+ " 'content': 'Identify if the clause provides that all Confidential Information shall be expressly identified by the Disclosing Party. Answer with only `Yes` or `No`'},\n",
+ " {'role': 'user',\n",
+ " 'content': '2. The Contractor shall not, without the Stateβs prior written consent, copy, disclose, publish, release, transfer, disseminate, use, or allow access for any purpose or in any form, any Confidential Information except for the sole and exclusive purpose of performing under the Contract. '},\n",
+ " {'role': 'assistant', 'content': 'No'}]})"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_messages = []\n",
+ "test_messages = []\n",
+ "\n",
+ "for d in data:\n",
+ " prompts = []\n",
+ " prompts.append({\"role\": \"system\", \"content\": base_prompt_zero_shot})\n",
+ " prompts.append({\"role\": \"user\", \"content\": d[\"text\"]})\n",
+ " prompts.append({\"role\": \"assistant\", \"content\": d[\"answer\"]})\n",
+ "\n",
+ " if int(d[\"new_index\"]) < n_train:\n",
+ " train_messages.append({'messages': prompts})\n",
+ " else:\n",
+ " test_messages.append({'messages': prompts})\n",
+ "\n",
+ "len(train_messages), len(test_messages), n_test, train_messages[5]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "2Ul1lOV8U2dt"
+ },
+ "source": [
+ "### Save the data to Weigths & Biases\n",
+ "\n",
+ "Save the data in a train and test file first"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "z_kbcPDtU2dt"
+ },
+ "outputs": [],
+ "source": [
+ "train_file_path = 'encoded_train_data.jsonl'\n",
+ "with open(train_file_path, 'w') as file:\n",
+ " for item in train_messages:\n",
+ " line = json.dumps(item)\n",
+ " file.write(line + '\\n')\n",
+ "\n",
+ "test_file_path = 'encoded_test_data.jsonl'\n",
+ "with open(test_file_path, 'w') as file:\n",
+ " for item in test_messages:\n",
+ " line = json.dumps(item)\n",
+ " file.write(line + '\\n')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "C58GZECzU2dt"
+ },
+ "source": [
+ "Next, we validate that our training data is in the correct format using a script from the [OpenAI fine-tuning documentation](https://platform.openai.com/docs/guides/fine-tuning/)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "awSfo7T4U2dt"
+ },
+ "outputs": [],
+ "source": [
+ "# Next, we specify the data path and open the JSONL file\n",
+ "\n",
+ "def openai_validate_data(dataset_path):\n",
+ " data_path = dataset_path\n",
+ "\n",
+ " # Load dataset\n",
+ " with open(data_path) as f:\n",
+ " dataset = [json.loads(line) for line in f]\n",
+ "\n",
+ " # We can inspect the data quickly by checking the number of examples and the first item\n",
+ "\n",
+ " # Initial dataset stats\n",
+ " print(\"Num examples:\", len(dataset))\n",
+ " print(\"First example:\")\n",
+ " for message in dataset[0][\"messages\"]:\n",
+ " print(message)\n",
+ "\n",
+ " # Now that we have a sense of the data, we need to go through all the different examples and check to make sure the formatting is correct and matches the Chat completions message structure\n",
+ "\n",
+ " # Format error checks\n",
+ " format_errors = defaultdict(int)\n",
+ "\n",
+ " for ex in dataset:\n",
+ " if not isinstance(ex, dict):\n",
+ " format_errors[\"data_type\"] += 1\n",
+ " continue\n",
+ "\n",
+ " messages = ex.get(\"messages\", None)\n",
+ " if not messages:\n",
+ " format_errors[\"missing_messages_list\"] += 1\n",
+ " continue\n",
+ "\n",
+ " for message in messages:\n",
+ " if \"role\" not in message or \"content\" not in message:\n",
+ " format_errors[\"message_missing_key\"] += 1\n",
+ "\n",
+ " if any(k not in (\"role\", \"content\", \"name\") for k in message):\n",
+ " format_errors[\"message_unrecognized_key\"] += 1\n",
+ "\n",
+ " if message.get(\"role\", None) not in (\"system\", \"user\", \"assistant\"):\n",
+ " format_errors[\"unrecognized_role\"] += 1\n",
+ "\n",
+ " content = message.get(\"content\", None)\n",
+ " if not content or not isinstance(content, str):\n",
+ " format_errors[\"missing_content\"] += 1\n",
+ "\n",
+ " if not any(message.get(\"role\", None) == \"assistant\" for message in messages):\n",
+ " format_errors[\"example_missing_assistant_message\"] += 1\n",
+ "\n",
+ " if format_errors:\n",
+ " print(\"Found errors:\")\n",
+ " for k, v in format_errors.items():\n",
+ " print(f\"{k}: {v}\")\n",
+ " else:\n",
+ " print(\"No errors found\")\n",
+ "\n",
+ " # Beyond the structure of the message, we also need to ensure that the length does not exceed the 4096 token limit.\n",
+ "\n",
+ " # Token counting functions\n",
+ " encoding = tiktoken.get_encoding(\"cl100k_base\")\n",
+ "\n",
+ " # not exact!\n",
+ " # simplified from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb\n",
+ " def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):\n",
+ " num_tokens = 0\n",
+ " for message in messages:\n",
+ " num_tokens += tokens_per_message\n",
+ " for key, value in message.items():\n",
+ " num_tokens += len(encoding.encode(value))\n",
+ " if key == \"name\":\n",
+ " num_tokens += tokens_per_name\n",
+ " num_tokens += 3\n",
+ " return num_tokens\n",
+ "\n",
+ " def num_assistant_tokens_from_messages(messages):\n",
+ " num_tokens = 0\n",
+ " for message in messages:\n",
+ " if message[\"role\"] == \"assistant\":\n",
+ " num_tokens += len(encoding.encode(message[\"content\"]))\n",
+ " return num_tokens\n",
+ "\n",
+ " def print_distribution(values, name):\n",
+ " print(f\"\\n#### Distribution of {name}:\")\n",
+ " print(f\"min / max: {min(values)}, {max(values)}\")\n",
+ " print(f\"mean / median: {np.mean(values)}, {np.median(values)}\")\n",
+ " print(f\"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}\")\n",
+ "\n",
+ " # Last, we can look at the results of the different formatting operations before proceeding with creating a fine-tuning job:\n",
+ "\n",
+ " # Warnings and tokens counts\n",
+ " n_missing_system = 0\n",
+ " n_missing_user = 0\n",
+ " n_messages = []\n",
+ " convo_lens = []\n",
+ " assistant_message_lens = []\n",
+ "\n",
+ " for ex in dataset:\n",
+ " messages = ex[\"messages\"]\n",
+ " if not any(message[\"role\"] == \"system\" for message in messages):\n",
+ " n_missing_system += 1\n",
+ " if not any(message[\"role\"] == \"user\" for message in messages):\n",
+ " n_missing_user += 1\n",
+ " n_messages.append(len(messages))\n",
+ " convo_lens.append(num_tokens_from_messages(messages))\n",
+ " assistant_message_lens.append(num_assistant_tokens_from_messages(messages))\n",
+ "\n",
+ " print(\"Num examples missing system message:\", n_missing_system)\n",
+ " print(\"Num examples missing user message:\", n_missing_user)\n",
+ " print_distribution(n_messages, \"num_messages_per_example\")\n",
+ " print_distribution(convo_lens, \"num_total_tokens_per_example\")\n",
+ " print_distribution(assistant_message_lens, \"num_assistant_tokens_per_example\")\n",
+ " n_too_long = sum(l > 4096 for l in convo_lens)\n",
+ " print(f\"\\n{n_too_long} examples may be over the 4096 token limit, they will be truncated during fine-tuning\")\n",
+ "\n",
+ " # Pricing and default n_epochs estimate\n",
+ " MAX_TOKENS_PER_EXAMPLE = 4096\n",
+ "\n",
+ " MIN_TARGET_EXAMPLES = 100\n",
+ " MAX_TARGET_EXAMPLES = 25000\n",
+ " TARGET_EPOCHS = 3\n",
+ " MIN_EPOCHS = 1\n",
+ " MAX_EPOCHS = 25\n",
+ "\n",
+ " n_epochs = TARGET_EPOCHS\n",
+ " n_train_examples = len(dataset)\n",
+ " if n_train_examples * TARGET_EPOCHS < MIN_TARGET_EXAMPLES:\n",
+ " n_epochs = min(MAX_EPOCHS, MIN_TARGET_EXAMPLES // n_train_examples)\n",
+ " elif n_train_examples * TARGET_EPOCHS > MAX_TARGET_EXAMPLES:\n",
+ " n_epochs = max(MIN_EPOCHS, MAX_TARGET_EXAMPLES // n_train_examples)\n",
+ "\n",
+ " n_billing_tokens_in_dataset = sum(min(MAX_TOKENS_PER_EXAMPLE, length) for length in convo_lens)\n",
+ " print(f\"Dataset has ~{n_billing_tokens_in_dataset} tokens that will be charged for during training\")\n",
+ " print(f\"By default, you'll train for {n_epochs} epochs on this dataset\")\n",
+ " print(f\"By default, you'll be charged for ~{n_epochs * n_billing_tokens_in_dataset} tokens\")\n",
+ " print(\"See pricing page to estimate total costs\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "wmC69bJkU2dt"
+ },
+ "source": [
+ "Validate train data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "vej4HKR7U2dt",
+ "outputId": "170ed230-8d7c-4e35-94d4-864db843beb5"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Num examples: 30\n",
+ "First example:\n",
+ "{'role': 'system', 'content': 'Identify if the clause provides that all Confidential Information shall be expressly identified by the Disclosing Party. Answer with only `Yes` or `No`'}\n",
+ "{'role': 'user', 'content': 'Recipient shall use the Confidential Information exclusively for HySafe purposes, especially to advice the Governing Board of HySafe. '}\n",
+ "{'role': 'assistant', 'content': 'No'}\n",
+ "No errors found\n",
+ "Num examples missing system message: 0\n",
+ "Num examples missing user message: 0\n",
+ "\n",
+ "#### Distribution of num_messages_per_example:\n",
+ "min / max: 3, 3\n",
+ "mean / median: 3.0, 3.0\n",
+ "p5 / p95: 3.0, 3.0\n",
+ "\n",
+ "#### Distribution of num_total_tokens_per_example:\n",
+ "min / max: 69, 319\n",
+ "mean / median: 143.46666666666667, 122.0\n",
+ "p5 / p95: 82.10000000000001, 235.10000000000002\n",
+ "\n",
+ "#### Distribution of num_assistant_tokens_per_example:\n",
+ "min / max: 1, 1\n",
+ "mean / median: 1.0, 1.0\n",
+ "p5 / p95: 1.0, 1.0\n",
+ "\n",
+ "0 examples may be over the 4096 token limit, they will be truncated during fine-tuning\n",
+ "Dataset has ~4304 tokens that will be charged for during training\n",
+ "By default, you'll train for 3 epochs on this dataset\n",
+ "By default, you'll be charged for ~12912 tokens\n",
+ "See pricing page to estimate total costs\n"
+ ]
+ }
+ ],
+ "source": [
+ "openai_validate_data(train_file_path)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "C83jxUI9U2dt"
+ },
+ "source": [
+ "Log our data to Weigths & Biases Artifacts for storage and versioning"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "ep-aT9NKU2dt",
+ "outputId": "5d8fc8a3-8734-4878-b249-d7e9695fe584"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "Tracking run with wandb version 0.15.9"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Run data is saved locally in /Users/tcapelle/work/examples/colabs/openai/wandb/run-20230830_113853-ivu21mjl
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Syncing run mild-surf-1 to Weights & Biases (docs)
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ " View project at https://wandb.ai/capecape/OpenAI-Fine-Tune"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ " View run at https://wandb.ai/capecape/OpenAI-Fine-Tune/runs/ivu21mjl"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Waiting for W&B process to finish... (success)."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ " View run mild-surf-1 at: https://wandb.ai/capecape/OpenAI-Fine-Tune/runs/ivu21mjl
Synced 6 W&B file(s), 0 media file(s), 2 artifact file(s) and 1 other file(s)"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Find logs at: ./wandb/run-20230830_113853-ivu21mjl/logs
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "wandb.init(\n",
+ " project=WANDB_PROJECT,\n",
+ " # entity=\"prompt-eng\",\n",
+ " job_type=\"log-data\",\n",
+ " config = {'n_train': n_train,\n",
+ " 'n_valid': n_test})\n",
+ "\n",
+ "wandb.log_artifact(train_file_path,\n",
+ " \"legalbench-contract_nli_explicit_identification-train\",\n",
+ " type=\"train-data\")\n",
+ "\n",
+ "wandb.log_artifact(test_file_path,\n",
+ " \"legalbench-contract_nli_explicit_identification-test\",\n",
+ " type=\"test-data\")\n",
+ "\n",
+ "# keep entity (typically your wandb username) for reference of artifact later in this demo\n",
+ "entity = wandb.run.entity\n",
+ "\n",
+ "wandb.finish()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "hD2g6q3GU2dt"
+ },
+ "source": [
+ "### Create a fine-tuned model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4MlVEQTaU2du"
+ },
+ "source": [
+ "We'll now use OpenAI API to fine-tune ChatGPT-3.5\n",
+ "\n",
+ "Let's first download our training & validation files and save them to a folder called `my_data`. We will retrieve the `latest` version of the artifact, but it could also be `v0`, `v1` or any alias we associated with it"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "referenced_widgets": [
+ "dabd1490c65148978081fa0aedb0913b"
+ ]
+ },
+ "id": "K54fvZFTU2du",
+ "outputId": "adbcc0c2-c149-4dac-e7be-34253e66788e"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "dabd1490c65148978081fa0aedb0913b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016751802766035932, max=1.0β¦"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Tracking run with wandb version 0.15.9"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Run data is saved locally in /Users/tcapelle/work/examples/colabs/openai/wandb/run-20230830_113907-1ili9l51
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Syncing run jumping-water-2 to Weights & Biases (docs)
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ " View project at https://wandb.ai/capecape/OpenAI-Fine-Tune"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ " View run at https://wandb.ai/capecape/OpenAI-Fine-Tune/runs/1ili9l51"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'my_data/encoded_train_data.jsonl'"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "wandb.init(project=WANDB_PROJECT,\n",
+ " # entity=\"prompt-eng\",\n",
+ " job_type=\"finetune\")\n",
+ "\n",
+ "artifact_train = wandb.use_artifact(\n",
+ " f'{entity}/{WANDB_PROJECT}/legalbench-contract_nli_explicit_identification-train:latest',\n",
+ " type='train-data')\n",
+ "train_file = artifact_train.get_path(train_file_path).download(\"my_data\")\n",
+ "\n",
+ "train_file"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Dh86AmJuU2du"
+ },
+ "source": [
+ "Then we upload the training data to OpenAI. OpenAi has to process the data, so this will take a few minutes depending on the size of your dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "xaviUxz7U2du",
+ "outputId": "bf00b2e6-6045-4fca-be7d-9e3b0dd86421"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " JSON: {\n",
+ " \"object\": \"file\",\n",
+ " \"id\": \"file-spPASR6VWco54SqfN2yo7T8v\",\n",
+ " \"purpose\": \"fine-tune\",\n",
+ " \"filename\": \"file\",\n",
+ " \"bytes\": 24059,\n",
+ " \"created_at\": 1693388388,\n",
+ " \"status\": \"uploaded\",\n",
+ " \"status_details\": null\n",
+ "}"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "openai_train_file_info = openai.File.create(\n",
+ " file=open(train_file, \"rb\"),\n",
+ " purpose='fine-tune'\n",
+ ")\n",
+ "\n",
+ "# you may need to wait a couple of minutes for OpenAI to process the file\n",
+ "openai_train_file_info"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "woiHI6KoU2du"
+ },
+ "source": [
+ "### Time to train the model!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YvP8ZGqWU2du"
+ },
+ "source": [
+ "Let's define our ChatGPT-3.5 fine-tuning hyper-parameters."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "V4sV1wohU2du"
+ },
+ "outputs": [],
+ "source": [
+ "model = 'gpt-3.5-turbo'\n",
+ "n_epochs = 3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "cju9ZZCZU2du",
+ "outputId": "75155c4f-f260-4099-dac0-150a98dffc16"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " JSON: {\n",
+ " \"object\": \"fine_tuning.job\",\n",
+ " \"id\": \"ftjob-x4tl83IlSGolkUF3fCFyZNGs\",\n",
+ " \"model\": \"gpt-3.5-turbo-0613\",\n",
+ " \"created_at\": 1693388447,\n",
+ " \"finished_at\": null,\n",
+ " \"fine_tuned_model\": null,\n",
+ " \"organization_id\": \"org-WnF2wEqNkV1Nj65CzDxr6iUm\",\n",
+ " \"result_files\": [],\n",
+ " \"status\": \"created\",\n",
+ " \"validation_file\": null,\n",
+ " \"training_file\": \"file-spPASR6VWco54SqfN2yo7T8v\",\n",
+ " \"hyperparameters\": {\n",
+ " \"n_epochs\": 3\n",
+ " },\n",
+ " \"trained_tokens\": null\n",
+ "}"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "openai_ft_job_info = openai.FineTuningJob.create(\n",
+ " training_file=openai_train_file_info[\"id\"],\n",
+ " model=model,\n",
+ " hyperparameters={\"n_epochs\": n_epochs}\n",
+ ")\n",
+ "\n",
+ "ft_job_id = openai_ft_job_info[\"id\"]\n",
+ "\n",
+ "openai_ft_job_info"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "dP68_bGSU2du"
+ },
+ "source": [
+ "\n",
+ "> this takes around 5 minutes to train, and you get an email from OpenAI when finished."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "f-X8R7ILU2du"
+ },
+ "source": [
+ "**Thats it!**\n",
+ "\n",
+ "Now your model is training on OpenAI's machines. To get the current state of your fine-tuning job, run:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "0os7HN9fU2du",
+ "outputId": "f67adb43-c91d-4185-8fac-8d1a281a3666"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('succeeded',\n",
+ " 12732,\n",
+ " 1693389024,\n",
+ " 'ft:gpt-3.5-turbo-0613:weights-biases::7tC85HcX')"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "state = openai.FineTuningJob.retrieve(ft_job_id)\n",
+ "state[\"status\"], state[\"trained_tokens\"], state[\"finished_at\"], state[\"fine_tuned_model\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "59RBnpywU2du"
+ },
+ "source": [
+ "Show recent events for our fine-tuning job"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "3X0nX-5HU2du",
+ "outputId": "b3a64f02-3fff-4bc1-b166-7783208ca1a3"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " JSON: {\n",
+ " \"object\": \"list\",\n",
+ " \"data\": [\n",
+ " {\n",
+ " \"object\": \"fine_tuning.job.event\",\n",
+ " \"id\": \"ftevent-5x9Y6Payk6fIdyJyMRY5um1v\",\n",
+ " \"created_at\": 1693389024,\n",
+ " \"level\": \"info\",\n",
+ " \"message\": \"Fine-tuning job successfully completed\",\n",
+ " \"data\": null,\n",
+ " \"type\": \"message\"\n",
+ " },\n",
+ " {\n",
+ " \"object\": \"fine_tuning.job.event\",\n",
+ " \"id\": \"ftevent-i16NTGNakv9P0RkOtJ7vvvoG\",\n",
+ " \"created_at\": 1693389022,\n",
+ " \"level\": \"info\",\n",
+ " \"message\": \"New fine-tuned model created: ft:gpt-3.5-turbo-0613:weights-biases::7tC85HcX\",\n",
+ " \"data\": null,\n",
+ " \"type\": \"message\"\n",
+ " },\n",
+ " {\n",
+ " \"object\": \"fine_tuning.job.event\",\n",
+ " \"id\": \"ftevent-MkLrJQ8sDgaC67CdmFMwsIjV\",\n",
+ " \"created_at\": 1693389017,\n",
+ " \"level\": \"info\",\n",
+ " \"message\": \"Step 90/90: training loss=0.00\",\n",
+ " \"data\": {\n",
+ " \"step\": 90,\n",
+ " \"train_loss\": 2.5828578600339824e-06,\n",
+ " \"train_mean_token_accuracy\": 1.0\n",
+ " },\n",
+ " \"type\": \"metrics\"\n",
+ " },\n",
+ " {\n",
+ " \"object\": \"fine_tuning.job.event\",\n",
+ " \"id\": \"ftevent-3sRpTRSjK3TfFRZY88HEASpX\",\n",
+ " \"created_at\": 1693389015,\n",
+ " \"level\": \"info\",\n",
+ " \"message\": \"Step 89/90: training loss=0.00\",\n",
+ " \"data\": {\n",
+ " \"step\": 89,\n",
+ " \"train_loss\": 2.5828578600339824e-06,\n",
+ " \"train_mean_token_accuracy\": 1.0\n",
+ " },\n",
+ " \"type\": \"metrics\"\n",
+ " },\n",
+ " {\n",
+ " \"object\": \"fine_tuning.job.event\",\n",
+ " \"id\": \"ftevent-HtS6tJMVPOmazquZ82a1iCdV\",\n",
+ " \"created_at\": 1693389015,\n",
+ " \"level\": \"info\",\n",
+ " \"message\": \"Step 88/90: training loss=0.00\",\n",
+ " \"data\": {\n",
+ " \"step\": 88,\n",
+ " \"train_loss\": 2.5828578600339824e-06,\n",
+ " \"train_mean_token_accuracy\": 1.0\n",
+ " },\n",
+ " \"type\": \"metrics\"\n",
+ " }\n",
+ " ],\n",
+ " \"has_more\": true\n",
+ "}"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "openai.FineTuningJob.list_events(id=ft_job_id, limit=5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Q31LUKh3U2dv"
+ },
+ "source": [
+ "We can run a few different fine-tunes with different parameters or even with different datasets."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "hCvmrDWMU2dv"
+ },
+ "source": [
+ "## Log OpenAI fine-tune jobs to Weights & Biases"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ehXJW997U2dv"
+ },
+ "source": [
+ "We can log our fine-tunes with a simple command."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "851RDhDBU2dv",
+ "outputId": "6512e9ea-f58d-425f-d068-df405cc3ccd7"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "usage: openai wandb sync [-h] [-i ID] [-n N_FINE_TUNES] [--project PROJECT]\n",
+ " [--entity ENTITY] [--force] [--legacy]\n",
+ "\n",
+ "options:\n",
+ " -h, --help show this help message and exit\n",
+ " -i ID, --id ID The id of the fine-tune job (optional)\n",
+ " -n N_FINE_TUNES, --n_fine_tunes N_FINE_TUNES\n",
+ " Number of most recent fine-tunes to log when an id is\n",
+ " not provided. By default, every fine-tune is synced.\n",
+ " --project PROJECT Name of the Weights & Biases project where you're\n",
+ " sending runs. By default, it is \"OpenAI-Fine-Tune\".\n",
+ " --entity ENTITY Weights & Biases username or team name where you're\n",
+ " sending runs. By default, your default entity is used,\n",
+ " which is usually your username.\n",
+ " --force Forces logging and overwrite existing wandb run of the\n",
+ " same fine-tune.\n",
+ " --legacy Log results from legacy OpenAI /v1/fine-tunes api\n"
+ ]
+ }
+ ],
+ "source": [
+ "!openai wandb sync --help"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "cfnhSQY1U2dv"
+ },
+ "source": [
+ "Calling `openai wandb sync` will log all un-synced fine-tuned jobs to W&B\n",
+ "\n",
+ "Below we are just logging 1 job, passing:\n",
+ "- our OpenAI key as an environment variable\n",
+ "- the id of the fine-tune job we'd like to log\n",
+ "- the W&B project of where to log it to\n",
+ "\n",
+ "See the [OpenAI section](https://wandb.me/openai-docs) in the Weights & Biases documentation for full details of the integration"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "nl58wxsfU2dv",
+ "outputId": "acfe2d6d-3796-4c33-eb9e-7576ea7e69bb"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Retrieving fine-tune job...\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/Users/tcapelle/work/examples/colabs/openai/wandb/run-20230830_115915-ftjob-x4tl83IlSGolkUF3fCFyZNGs\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mftjob-x4tl83IlSGolkUF3fCFyZNGs\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: βοΈ View project at \u001b[34m\u001b[4mhttps://wandb.ai/capecape/OpenAI-Fine-Tune\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: π View run at \u001b[34m\u001b[4mhttps://wandb.ai/capecape/OpenAI-Fine-Tune/runs/ftjob-x4tl83IlSGolkUF3fCFyZNGs\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[32m(success).\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_accuracy ββββββββββββββββββββββββββββββββββββββββ\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss βββββββββ
βββββββββββββββββββββββββββββββ\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: fine_tuned_model ft:gpt-3.5-turbo-061...\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: status succeeded\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_accuracy 1.0\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 0.0\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: π View run \u001b[33mftjob-x4tl83IlSGolkUF3fCFyZNGs\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/capecape/OpenAI-Fine-Tune/runs/ftjob-x4tl83IlSGolkUF3fCFyZNGs\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 1 artifact file(s) and 0 other file(s)\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230830_115915-ftjob-x4tl83IlSGolkUF3fCFyZNGs/logs\u001b[0m\n",
+ "π wandb sync completed successfully\n"
+ ]
+ }
+ ],
+ "source": [
+ "!OPENAI_API_KEY={openai_key} openai wandb sync --id {ft_job_id} --project {WANDB_PROJECT}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "referenced_widgets": [
+ "b21fd890682b4bf786b2346dd18cae9a"
+ ]
+ },
+ "id": "LWHLs2oBU2dv",
+ "outputId": "cd141916-2a25-43e7-a66b-3bc9cfe19df2"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Waiting for W&B process to finish... (success)."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b21fd890682b4bf786b2346dd18cae9a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(Label(value='0.050 MB of 0.050 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, maxβ¦"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n",
+ "upload_file exception https://storage.googleapis.com/wandb-production.appspot.com/capecape/OpenAI-Fine-Tune/1ili9l51/requirements.txt?Expires=1693475972&GoogleAccessId=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com&Signature=NzF9wj2gS8rMEwRT9wlft2lNubemw67f2qrz9Zy90Bjxg5xCL9pIu%2FRbBGjRwLA2v64PuiP23Au5Dho26Tnw3UjUS1apqTkaOgjWDTlCCiDLzvMUsqHf0lhhWIgGMZcsA4gPpOi%2Bc%2ByJm4z6JE7D6RJ7r8y4fI0Jg6fX9KSWpzh8INiM6fQZiQjUChLVdtNJQZ2gfu7xRc%2BZIUEjgDuUqmS705pIUOgJXA9MS3%2Fhewkc7CxWay4ReMJixBZgaqLIRqHQnyzb38I5nPrRS3JrwrigQyX6tOsK05LDLA0o%2Bs0K11664%2F1ZxO6mSTfOaw7tXUmbUUWFOp33Qq8KXNz9Zg%3D%3D: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))\n",
+ "upload_file request headers: {'User-Agent': 'python-requests/2.28.2', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': '*/*', 'Connection': 'keep-alive', 'Content-Length': '4902'}\n",
+ "upload_file response body: \n",
+ "upload_file exception https://storage.googleapis.com/wandb-production.appspot.com/capecape/OpenAI-Fine-Tune/1ili9l51/conda-environment.yaml?Expires=1693475972&GoogleAccessId=gorilla-files-url-signer-man%40wandb-production.iam.gserviceaccount.com&Signature=wKnFdg7z7CiJOMn4WSvt6GSj2hPnMr0Xc4KuwAXa8akLucmw700x%2FWF87jmWaqnp%2FK4%2BF6JTRghQAokXF9jxCcXBSYhgFhCVACrOVyN%2BSTZ4u8tDgD6Dm%2FEFwWObiH%2BALSS1N0FmG7i6kL9Evyng3yPc4noEz%2FkLNIDIascAPgUe9UkPaBCRc9j7OxzYJx07bpeL4HaGe4yaCvk2mSVr4l%2FUfsICBI6E4KKrLDvtZvFFFUB4MgqXp0Sxc0k0pOxaw9zZhiNQQELDnhnuNY4wi78EPiXN1BpU6bTgIYaHe5mkS%2B7M5HiFs83ML98JI2OeRiAjAGtIIETT4xDjTYWVpA%3D%3D: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))\n",
+ "upload_file request headers: {'User-Agent': 'python-requests/2.28.2', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': '*/*', 'Connection': 'keep-alive', 'Content-Length': '8450'}\n",
+ "upload_file response body: \n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ " View run jumping-water-2 at: https://wandb.ai/capecape/OpenAI-Fine-Tune/runs/1ili9l51
Synced 7 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Find logs at: ./wandb/run-20230830_113907-1ili9l51/logs
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "wandb.finish()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "eashAVIvU2dv"
+ },
+ "source": [
+ "Our fine-tunes are now successfully synced to Weights & Biases.\n",
+ "\n",
+ "\n",
+ "\n",
+ "Anytime we have new fine-tunes, we can just call `openai wandb sync` to add them to our dashboard."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xfX_af2EU2dz"
+ },
+ "source": [
+ "## Run evalution and log the results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "9tvlkGoOU2dz"
+ },
+ "source": [
+ "The best way to evaluate a generative model is to explore sample predictions from your evaluation set.\n",
+ "\n",
+ "Let's generate a few inference samples and log them to W&B and see how the performance compares to a baseline ChatGPT-3.5 model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "SI00xE9nU2d0",
+ "outputId": "1f01df3d-0360-49e8-c92e-40ea8df73639"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Tracking run with wandb version 0.15.9"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Run data is saved locally in /Users/tcapelle/work/examples/colabs/openai/wandb/run-20230830_115947-iepk19m2
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Syncing run ethereal-energy-4 to Weights & Biases (docs)
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ " View project at https://wandb.ai/capecape/OpenAI-Fine-Tune"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ " View run at https://wandb.ai/capecape/OpenAI-Fine-Tune/runs/iepk19m2"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "There are 87 test examples\n"
+ ]
+ }
+ ],
+ "source": [
+ "wandb.init(project=WANDB_PROJECT,\n",
+ " job_type='eval')\n",
+ "\n",
+ "artifact_valid = wandb.use_artifact(\n",
+ " f'{entity}/{WANDB_PROJECT}/legalbench-contract_nli_explicit_identification-test:latest',\n",
+ " type='test-data')\n",
+ "test_file = artifact_valid.get_path(test_file_path).download(\"my_data\")\n",
+ "\n",
+ "with open(test_file) as f:\n",
+ " test_dataset = [json.loads(line) for line in f]\n",
+ "\n",
+ "print(f\"There are {len(test_dataset)} test examples\")\n",
+ "wandb.config.update({\"num_test_samples\":len(test_dataset)})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ZFUUHVMFU2d0"
+ },
+ "source": [
+ "### Run evaluation on the Fine-Tuned Model\n",
+ "Set up OpenAI call with retries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "FagY6Ev_U2d0"
+ },
+ "outputs": [],
+ "source": [
+ "@retry(stop=stop_after_attempt(3), wait=wait_fixed(60))\n",
+ "def call_openai(messages=\"\", model=\"gpt-3.5-turbo\"):\n",
+ " return openai.ChatCompletion.create(model=model, messages=messages, max_tokens=10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "9PB5SJrJU2d0"
+ },
+ "source": [
+ "Let's get our trained model id"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "O8vNiUlFU2d0",
+ "outputId": "d196a4f1-4c47-4977-b2bd-2f0e485075f7"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'ft:gpt-3.5-turbo-0613:weights-biases::7tC85HcX'"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "state = openai.FineTuningJob.retrieve(ft_job_id)\n",
+ "ft_model_id = state[\"fine_tuned_model\"]\n",
+ "ft_model_id"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0ncxuaqBU2d0"
+ },
+ "source": [
+ "Run evaluation and log results to W&B"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "referenced_widgets": [
+ "683c7db769d044f79cf9409f00af137d"
+ ]
+ },
+ "id": "6YF8ED9HU2d0",
+ "outputId": "c952c07f-d972-4201-8b62-ee1409dd8f9d"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "683c7db769d044f79cf9409f00af137d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/87 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prediction_table = wandb.Table(columns=['messages', 'completion', 'target'])\n",
+ "\n",
+ "eval_data = []\n",
+ "\n",
+ "for row in tqdm(test_dataset):\n",
+ " messages = row['messages'][:2]\n",
+ " target = row[\"messages\"][2]\n",
+ "\n",
+ " # res = call_openai(model=ft_model_id, messages=messages)\n",
+ " res = openai.ChatCompletion.create(model=model, messages=messages, max_tokens=10)\n",
+ " completion = res.choices[0].message.content\n",
+ "\n",
+ " eval_data.append([messages, completion, target])\n",
+ " prediction_table.add_data(messages[1]['content'], completion, target[\"content\"])\n",
+ "\n",
+ "wandb.log({'predictions': prediction_table})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "2BKojUKMU2d0"
+ },
+ "source": [
+ "Calculate the accuracy of the fine-tuned model and log to W&B\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "n02OqDO9U2d0",
+ "outputId": "7b3a9bd1-6af6-4f27-b9e1-186d204cc55f"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Accuracy is 0.8390804597701149\n"
+ ]
+ }
+ ],
+ "source": [
+ "correct = 0\n",
+ "for e in eval_data:\n",
+ " if e[1].lower() == e[2][\"content\"].lower():\n",
+ " correct+=1\n",
+ "\n",
+ "accuracy = correct / len(eval_data)\n",
+ "\n",
+ "print(f\"Accuracy is {accuracy}\")\n",
+ "wandb.log({\"eval/accuracy\": accuracy})\n",
+ "wandb.summary[\"eval/accuracy\"] = accuracy"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "MtlZPVG2U2d0"
+ },
+ "source": [
+ "### Run evaluation on a Baseline model for comparison\n",
+ "Lets compare our model to the baseline model, `gpt-3.5-turbo`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "referenced_widgets": [
+ "56a6a14b0c1346149c973f6c71d31b6c"
+ ]
+ },
+ "id": "9E0IRSTfU2d0",
+ "outputId": "0d016d12-1aa7-40aa-9403-3b0864acbc62"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "56a6a14b0c1346149c973f6c71d31b6c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/87 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "baseline_prediction_table = wandb.Table(columns=['messages', 'completion', 'target'])\n",
+ "baseline_eval_data = []\n",
+ "\n",
+ "for row in tqdm(test_dataset):\n",
+ " messages = row['messages'][:2]\n",
+ " target = row[\"messages\"][2]\n",
+ "\n",
+ " res = call_openai(model=\"gpt-3.5-turbo\", messages=messages)\n",
+ " completion = res.choices[0].message.content\n",
+ "\n",
+ " baseline_eval_data.append([messages, completion, target])\n",
+ " baseline_prediction_table.add_data(messages[1]['content'], completion, target[\"content\"])\n",
+ "\n",
+ "wandb.log({'baseline_predictions': baseline_prediction_table})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "i98CsZJ1U2d1"
+ },
+ "source": [
+ "Calculate the accuracy of the fine-tuned model and log to W&B"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "1Sl4zmPwU2d1",
+ "outputId": "7fd58a23-d4ee-4f7b-d8dc-03e208e1a5fd"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Baseline Accurcy is: 0.7931034482758621\n"
+ ]
+ }
+ ],
+ "source": [
+ "baseline_correct = 0\n",
+ "for e in baseline_eval_data:\n",
+ " if e[1].lower() == e[2][\"content\"].lower():\n",
+ " baseline_correct+=1\n",
+ "\n",
+ "baseline_accuracy = baseline_correct / len(baseline_eval_data)\n",
+ "print(f\"Baseline Accurcy is: {baseline_accuracy}\")\n",
+ "wandb.log({\"eval/baseline_accuracy\": baseline_accuracy})\n",
+ "wandb.summary[\"eval/baseline_accuracy\"] = baseline_accuracy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "referenced_widgets": [
+ "e7fa6b38a02847758fdd424affbc74c1"
+ ]
+ },
+ "id": "EbeRF7vEU2d1",
+ "outputId": "0b679cda-7740-42d9-bbe6-feb11d69ea6f"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Waiting for W&B process to finish... (success)."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e7fa6b38a02847758fdd424affbc74c1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(Label(value='0.248 MB of 0.248 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, maxβ¦"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "wandb: WARNING Source type is set to 'repo' but some required information is missing from the environment. A job will not be created from this run. See https://docs.wandb.ai/guides/launch/create-job\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "Run history:
eval/accuracy | β |
eval/baseline_accuracy | β |
Run summary:
eval/accuracy | 0.83908 |
eval/baseline_accuracy | 0.7931 |
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ " View run ethereal-energy-4 at: https://wandb.ai/capecape/OpenAI-Fine-Tune/runs/iepk19m2
Synced 7 W&B file(s), 2 media file(s), 2 artifact file(s) and 1 other file(s)"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Find logs at: ./wandb/run-20230830_115947-iepk19m2/logs
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "wandb.finish()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "A9nh4b15U2d1"
+ },
+ "source": [
+ "And thats it! In this example we have prepared our data, logged it to Weights & Biases, fine-tuned an OpenAI model using that data, logged the results to Weights & Biases and then run evaluation on the fine-tuned model.\n",
+ "\n",
+ "From here you can start to train on larger or more complex tasks, or else explore other ways to modify ChatGPT-3.5 such as giving it a different tone and style or response.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "aVtXGnB_U2d1"
+ },
+ "source": [
+ "# Resources\n",
+ "\n",
+ "* [OpenAI Fine-Tuning Guide](https://platform.openai.com/docs/guides/fine-tuning)\n",
+ "* [W&B Integration with OpenAI API Documentation](https://wandb.me/openai-docs)\n",
+ "* [W&B Report: GPT-3 exploration & fine-tuning tips](http://wandb.me/openai-report)"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "python3",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/third_party_examples/Openai_monitoring_with_wandb_weave.ipynb b/examples/third_party_examples/Openai_monitoring_with_wandb_weave.ipynb
new file mode 100644
index 0000000..947c0cb
--- /dev/null
+++ b/examples/third_party_examples/Openai_monitoring_with_wandb_weave.ipynb
@@ -0,0 +1,360 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "30ccfdbc",
+ "metadata": {
+ "id": "30ccfdbc"
+ },
+ "source": [
+ "
\n",
+ "\n",
+ "\n",
+ "# OpenAI API Monitoring with W&B Weave\n",
+ "\n",
+ "\n",
+ "
\n",
+ "\n",
+ "\n",
+ "**Note:** you will need an [OpenAI API key](https://platform.openai.com/account/api-keys) to run this colab.\n",
+ "\n",
+ "Use the W&B OpenAI integration to monitor OpenAI API calls and understand how your projects and teams are leveraging LLMs.\n",
+ "In this example, we'll generate templated Weave Boards: LLM usage monitoring dashboards which you can explore and customize from the UI.\n",
+ "\n",
+ "* automatically track LLM usage and aggregate useful metrics like cost, latency and throughput across your projects/teams\n",
+ "* dynamically query and derive insights from the logs of all your OpenAI API calls\n",
+ "* iterate visually to slice, aggregate, and explore your data; customize panels to focus on interesting patterns; share progress more easily with your team through an interactive dashboard\n",
+ "\n",
+ "
\n",
+ "\n",
+ "[Play with a live version of this Weave Board β](http://wandb.me/llm-monitoring-board)\n",
+ "\n",
+ "#### New to Weights & Biases? [-> Sign up for an account here <-](https://wandb.ai/site)\n",
+ "\n",
+ "# Step 0: Setup\n",
+ "\n",
+ "Install dependencies, login to W&B so you can save and share your work, and authenticate with OpenAI."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8107732a-fb90-45f8-8377-6381bd28475d",
+ "metadata": {
+ "id": "8107732a-fb90-45f8-8377-6381bd28475d"
+ },
+ "outputs": [],
+ "source": [
+ "# if not already installed\n",
+ "!pip install -qqq weave openai tiktoken wandb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "16686825-ce56-4d56-9207-ef4d2357eaf0",
+ "metadata": {
+ "id": "16686825-ce56-4d56-9207-ef4d2357eaf0"
+ },
+ "outputs": [],
+ "source": [
+ "import wandb\n",
+ "wandb.login()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7273a9b0-f41e-4490-b0e9-4ab2cdeb80ec",
+ "metadata": {
+ "id": "7273a9b0-f41e-4490-b0e9-4ab2cdeb80ec"
+ },
+ "outputs": [],
+ "source": [
+ "import weave\n",
+ "import os\n",
+ "WANDB_BASE_URL = \"https://api.wandb.ai\"\n",
+ "os.environ[\"WANDB_BASE_URL\"] = WANDB_BASE_URL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "de5c26c3-393d-45a0-84a8-b277c42e4724",
+ "metadata": {
+ "id": "de5c26c3-393d-45a0-84a8-b277c42e4724"
+ },
+ "outputs": [],
+ "source": [
+ "# authenticate with OpenAI\n",
+ "from getpass import getpass\n",
+ "\n",
+ "if os.getenv(\"OPENAI_API_KEY\") is None:\n",
+ " os.environ[\"OPENAI_API_KEY\"] = getpass(\"Paste your OpenAI key from: https://platform.openai.com/account/api-keys\\n\")\n",
+ "assert os.getenv(\"OPENAI_API_KEY\", \"\").startswith(\"sk-\"), \"This doesn't look like a valid OpenAI API key\"\n",
+ "print(\"OpenAI API key configured\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32e91f9c-0640-4346-8c4a-e5f35256afe6",
+ "metadata": {
+ "id": "32e91f9c-0640-4346-8c4a-e5f35256afe6"
+ },
+ "source": [
+ "# Step 1: Configure data streaming and storage in W&B\n",
+ "\n",
+ "Set WB_ENTITY to your wandb username or team name. Log in to W&B and navigate to Home Page at [wandb.ai/home](https://wandb.ai/home) to see valid options under your \"Profile\" and \"Teams\" in the left sidebar."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "347bacb8-1351-4517-8116-d11212aef57b",
+ "metadata": {
+ "id": "347bacb8-1351-4517-8116-d11212aef57b"
+ },
+ "outputs": [],
+ "source": [
+ "WB_ENTITY = # set to your wandb username or team name\n",
+ "WB_PROJECT = \"weave\" # top-level directory for this work\n",
+ "STREAM_NAME = \"openai_logs\" # record table which stores the logs of OpenAI API calls as they stream in"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "26402897-4f73-4dd6-9314-2a1c60638e4d",
+ "metadata": {
+ "id": "26402897-4f73-4dd6-9314-2a1c60638e4d"
+ },
+ "source": [
+ "# Step 2: Call init_monitor()\n",
+ "\n",
+ "To start monitoring OpenAI API usage, call `init_monitor()`, where `` has the form `//`. The stream records and stores all the OpenAI API calls.\n",
+ "\n",
+ "Running this cell will print out a link to view the current project in the Weave UI."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ba2b2070",
+ "metadata": {
+ "id": "ba2b2070"
+ },
+ "outputs": [],
+ "source": [
+ "from weave.monitoring import openai, init_monitor\n",
+ "m = init_monitor(f\"{WB_ENTITY}/{WB_PROJECT}/{STREAM_NAME}\")\n",
+ "\n",
+ "# specifying a single model for simplicity\n",
+ "OPENAI_MODEL = 'gpt-3.5-turbo'\n",
+ "\n",
+ "# prefill with some sample logs\n",
+ "r = openai.ChatCompletion.create(model=OPENAI_MODEL, messages=[{\"role\": \"user\", \"content\": \"hello world!\"}])\n",
+ "r = openai.ChatCompletion.create(model=OPENAI_MODEL, messages=[{\"role\": \"user\", \"content\": \"what is 2+2?\"}])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fb51abb4",
+ "metadata": {
+ "id": "fb51abb4"
+ },
+ "source": [
+ "# Step 3: Preview monitoring dashboard\n",
+ "\n",
+ "Click on the link above to preview the data stream, then click \"OpenAI Monitor Board\" in the right sidebar to create a Weave Board for this data stream.\n",
+ "\n",
+ "
\n",
+ "\n",
+ "# Step 4: Explore & understand your LLM usage\n",
+ "\n",
+ "To save your work, rename the board by clicking on the autogenerated name at the top of the page. To share your board, click \\\"Publish\\\" in the top right.\n",
+ "\n",
+ "
\n",
+ "\n",
+ "To visualize your work in real-time as you iterate, you can:\n",
+ "* keep the Board open in a separate tab and refresh to view the latest data\n",
+ "* rename the Board for easier reference at any point and \\\"Publish\\\" that version to share a link with others\n",
+ "* find previously saved Boards by navigating to the relevant W&B entity and W&B project name from weave.wandb.ai\n",
+ "* or open a new instance of a Board template to start fresh with all the data accumulated so far\n",
+ "\n",
+ "\n",
+ "Next we'll illustrate a few ways you could track OpenAI API calls. There are many more possibilities depending on your use case, and we can't wait to see what you create from these starter templates.\n",
+ "\n",
+ "# Examples\n",
+ "\n",
+ "## Example 0: Log a prompt and its completion\n",
+ "\n",
+ "Monitor a ChatCompletion request and print the corresponding response, extracting only the text of the completion."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e711d521",
+ "metadata": {
+ "id": "e711d521"
+ },
+ "outputs": [],
+ "source": [
+ "response = openai.ChatCompletion.create(model=OPENAI_MODEL, messages=[\n",
+ " {\"role\": \"user\", \"content\": f\"What is the meaning of life, the universe, and everything?\"},\n",
+ " ])\n",
+ "print(response['choices'][0]['message']['content'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4ace8f71-3e50-444b-89e0-a8a619a9e0a8",
+ "metadata": {
+ "id": "4ace8f71-3e50-444b-89e0-a8a619a9e0a8"
+ },
+ "source": [
+ "## Example 1: Track relevant parameters as attributes\n",
+ "\n",
+ "Factor out parameters of interest and track them as attributes on the logged record.\n",
+ "Here we track the \"system prompt\" separately from the \"prompt template\" and the \"equation\" parameter. This time we'll print the full structured response from the ChatCompletion call."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7930e774",
+ "metadata": {
+ "id": "7930e774"
+ },
+ "outputs": [],
+ "source": [
+ "system_prompt = \"you always write in bullet points\"\n",
+ "prompt_template = 'solve the following equation step by step: {equation}'\n",
+ "params = {'equation': '4 * (3 - 1)'}\n",
+ "openai.ChatCompletion.create(model=OPENAI_MODEL,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": prompt_template.format(**params)},\n",
+ " ],\n",
+ " # you can add additional attributes to the logged record\n",
+ " # see the monitor_api notebook for more examples\n",
+ " monitor_attributes={\n",
+ " 'system_prompt': system_prompt,\n",
+ " 'prompt_template': prompt_template,\n",
+ " 'params': params\n",
+ " })"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "450dc210-3526-4f6e-a05a-df15ebf8d398",
+ "metadata": {
+ "id": "450dc210-3526-4f6e-a05a-df15ebf8d398"
+ },
+ "source": [
+ "## Example 2: Log an ongoing stream of messages\n",
+ "\n",
+ "Monitor a stream of messages and log the result as a single record. Note: tokens are not counted in this format."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8b423df",
+ "metadata": {
+ "id": "b8b423df"
+ },
+ "outputs": [],
+ "source": [
+ "from weave.monitoring.openai import message_from_stream\n",
+ "r = openai.ChatCompletion.create(model=OPENAI_MODEL, messages=[\n",
+ " {\"role\": \"system\", \"content\": \"You are a robot and only speak in robot, like beep bloop bop.\"},\n",
+ " {\"role\": \"user\", \"content\": \"Tell me a 50-word story.\"},\n",
+ " ], stream=True)\n",
+ "for s in message_from_stream(r):\n",
+ " print(s, end='')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "81945b3c-310e-404b-aea7-a47c6760db2e",
+ "metadata": {
+ "id": "81945b3c-310e-404b-aea7-a47c6760db2e"
+ },
+ "source": [
+ "## Example 3: Structure prompt engineering experiments\n",
+ "\n",
+ "Here we compare a few toy options for the system prompt, user question, and intended audience. Try your own experiments and see if any interesting insights emerge as you explore in the Board and group by different parameters."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bfde1485-8d5f-4b25-a878-0c9de719dc6f",
+ "metadata": {
+ "id": "bfde1485-8d5f-4b25-a878-0c9de719dc6f"
+ },
+ "outputs": [],
+ "source": [
+ "def explain_math(system_prompt, prompt_template, params):\n",
+ " openai.ChatCompletion.create(model=OPENAI_MODEL,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": prompt_template.format(**params)},\n",
+ " ],\n",
+ " # you can add additional attributes to the logged record\n",
+ " # see the monitor_api notebook for more examples\n",
+ " monitor_attributes={\n",
+ " 'system_prompt': system_prompt,\n",
+ " 'prompt_template': prompt_template,\n",
+ " 'params': params\n",
+ " })"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "48d73400-835e-4e8d-b985-a812cf11b99d",
+ "metadata": {
+ "id": "48d73400-835e-4e8d-b985-a812cf11b99d"
+ },
+ "outputs": [],
+ "source": [
+ "# feel free to substitute your own prompts :)\n",
+ "system_prompts = [\"you're extremely flowery and poetic\", \"you're very direct and precise\", \"balance brevity with insight\"]\n",
+ "prompt_template = 'explain the solution of the following to a {audience}: {equation}'\n",
+ "equations = ['x^2 + 4x + 9 = 0', '15 * (2 - 6) / 4']\n",
+ "audience = [\"new student\", \"math genius\"]\n",
+ "\n",
+ "for system_prompt in system_prompts:\n",
+ " for equation in equations:\n",
+ " for person in audience:\n",
+ " params = {\"equation\" : equation, \"audience\" : person}\n",
+ " explain_math(system_prompt, prompt_template, params)"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}