mirror of
https://github.com/james-m-jordan/openai-cookbook.git
synced 2025-05-09 19:32:38 +00:00
lint
This commit is contained in:
parent
8b2d68f031
commit
cfe03038bd
@ -29,12 +29,11 @@
|
|||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
|
||||||
"datafile_path = \"https://cdn.openai.com/API/examples/data/fine_food_reviews_with_embeddings_1k.csv\" # for your convenience, we precomputed the embeddings\n",
|
"datafile_path = \"https://cdn.openai.com/API/examples/data/fine_food_reviews_with_embeddings_1k.csv\" # for your convenience, we precomputed the embeddings\n",
|
||||||
"df = pd.read_csv(datafile_path)\n",
|
"df = pd.read_csv(datafile_path)\n",
|
||||||
"df['babbage_similarity'] = df.babbage_similarity.apply(eval).apply(np.array)\n",
|
"df[\"babbage_similarity\"] = df.babbage_similarity.apply(eval).apply(np.array)\n",
|
||||||
"matrix = np.vstack(df.babbage_similarity.values)\n",
|
"matrix = np.vstack(df.babbage_similarity.values)\n",
|
||||||
"matrix.shape"
|
"matrix.shape\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -77,12 +76,12 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"n_clusters = 4\n",
|
"n_clusters = 4\n",
|
||||||
"\n",
|
"\n",
|
||||||
"kmeans = KMeans(n_clusters = n_clusters,init='k-means++',random_state=42)\n",
|
"kmeans = KMeans(n_clusters=n_clusters, init=\"k-means++\", random_state=42)\n",
|
||||||
"kmeans.fit(matrix)\n",
|
"kmeans.fit(matrix)\n",
|
||||||
"labels = kmeans.labels_\n",
|
"labels = kmeans.labels_\n",
|
||||||
"df['Cluster'] = labels\n",
|
"df[\"Cluster\"] = labels\n",
|
||||||
"\n",
|
"\n",
|
||||||
"df.groupby('Cluster').Score.mean().sort_values()"
|
"df.groupby(\"Cluster\").Score.mean().sort_values()\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -125,22 +124,24 @@
|
|||||||
"import matplotlib\n",
|
"import matplotlib\n",
|
||||||
"import matplotlib.pyplot as plt\n",
|
"import matplotlib.pyplot as plt\n",
|
||||||
"\n",
|
"\n",
|
||||||
"tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)\n",
|
"tsne = TSNE(\n",
|
||||||
|
" n_components=2, perplexity=15, random_state=42, init=\"random\", learning_rate=200\n",
|
||||||
|
")\n",
|
||||||
"vis_dims2 = tsne.fit_transform(matrix)\n",
|
"vis_dims2 = tsne.fit_transform(matrix)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"x = [x for x,y in vis_dims2]\n",
|
"x = [x for x, y in vis_dims2]\n",
|
||||||
"y = [y for x,y in vis_dims2]\n",
|
"y = [y for x, y in vis_dims2]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for category, color in enumerate(['purple', 'green', 'red', 'blue']):\n",
|
"for category, color in enumerate([\"purple\", \"green\", \"red\", \"blue\"]):\n",
|
||||||
" xs = np.array(x)[df.Cluster==category]\n",
|
" xs = np.array(x)[df.Cluster == category]\n",
|
||||||
" ys = np.array(y)[df.Cluster==category]\n",
|
" ys = np.array(y)[df.Cluster == category]\n",
|
||||||
" plt.scatter(xs, ys, color=color, alpha=0.3)\n",
|
" plt.scatter(xs, ys, color=color, alpha=0.3)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" avg_x = xs.mean()\n",
|
" avg_x = xs.mean()\n",
|
||||||
" avg_y = ys.mean()\n",
|
" avg_y = ys.mean()\n",
|
||||||
" \n",
|
"\n",
|
||||||
" plt.scatter(avg_x, avg_y, marker='x', color=color, s=100)\n",
|
" plt.scatter(avg_x, avg_y, marker=\"x\", color=color, s=100)\n",
|
||||||
"plt.title(\"Clusters identified visualized in language 2d using t-SNE\")"
|
"plt.title(\"Clusters identified visualized in language 2d using t-SNE\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -199,26 +200,32 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"for i in range(n_clusters):\n",
|
"for i in range(n_clusters):\n",
|
||||||
" print(f\"Cluster {i} Theme:\", end=\" \")\n",
|
" print(f\"Cluster {i} Theme:\", end=\" \")\n",
|
||||||
" \n",
|
"\n",
|
||||||
" reviews = \"\\n\".join(df[df.Cluster == i].combined.str.replace(\"Title: \", \"\").str.replace(\"\\n\\nContent: \", \": \").sample(rev_per_cluster, random_state=42).values)\n",
|
" reviews = \"\\n\".join(\n",
|
||||||
|
" df[df.Cluster == i]\n",
|
||||||
|
" .combined.str.replace(\"Title: \", \"\")\n",
|
||||||
|
" .str.replace(\"\\n\\nContent: \", \": \")\n",
|
||||||
|
" .sample(rev_per_cluster, random_state=42)\n",
|
||||||
|
" .values\n",
|
||||||
|
" )\n",
|
||||||
" response = openai.Completion.create(\n",
|
" response = openai.Completion.create(\n",
|
||||||
" engine=\"davinci-instruct-beta-v3\",\n",
|
" engine=\"davinci-instruct-beta-v3\",\n",
|
||||||
" prompt=f\"What do the following customer reviews have in common?\\n\\nCustomer reviews:\\n\\\"\\\"\\\"\\n{reviews}\\n\\\"\\\"\\\"\\n\\nTheme:\",\n",
|
" prompt=f'What do the following customer reviews have in common?\\n\\nCustomer reviews:\\n\"\"\"\\n{reviews}\\n\"\"\"\\n\\nTheme:',\n",
|
||||||
" temperature=0,\n",
|
" temperature=0,\n",
|
||||||
" max_tokens=64,\n",
|
" max_tokens=64,\n",
|
||||||
" top_p=1,\n",
|
" top_p=1,\n",
|
||||||
" frequency_penalty=0,\n",
|
" frequency_penalty=0,\n",
|
||||||
" presence_penalty=0\n",
|
" presence_penalty=0,\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" print(response[\"choices\"][0][\"text\"].replace('\\n',''))\n",
|
" print(response[\"choices\"][0][\"text\"].replace(\"\\n\", \"\"))\n",
|
||||||
"\n",
|
"\n",
|
||||||
" sample_cluster_rows = df[df.Cluster == i].sample(rev_per_cluster, random_state=42) \n",
|
" sample_cluster_rows = df[df.Cluster == i].sample(rev_per_cluster, random_state=42)\n",
|
||||||
" for j in range(rev_per_cluster):\n",
|
" for j in range(rev_per_cluster):\n",
|
||||||
" print(sample_cluster_rows.Score.values[j], end=\", \")\n",
|
" print(sample_cluster_rows.Score.values[j], end=\", \")\n",
|
||||||
" print(sample_cluster_rows.Summary.values[j], end=\": \")\n",
|
" print(sample_cluster_rows.Summary.values[j], end=\": \")\n",
|
||||||
" print(sample_cluster_rows.Text.str[:70].values[j])\n",
|
" print(sample_cluster_rows.Text.str[:70].values[j])\n",
|
||||||
" \n",
|
"\n",
|
||||||
" print(\"-\" * 100)"
|
" print(\"-\" * 100)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -237,11 +244,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"interpreter": {
|
|
||||||
"hash": "be4b5d5b73a21c599de40d6deb1129796d12dc1cc33a738f7bac13269cfcafe8"
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.7.3 64-bit ('base': conda)",
|
"display_name": "Python 3.9.9 ('openai')",
|
||||||
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
@ -256,7 +261,12 @@
|
|||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.9"
|
"version": "3.9.9"
|
||||||
},
|
},
|
||||||
"orig_nbformat": 4
|
"orig_nbformat": 4,
|
||||||
|
"vscode": {
|
||||||
|
"interpreter": {
|
||||||
|
"hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 2
|
"nbformat_minor": 2
|
||||||
|
Loading…
x
Reference in New Issue
Block a user