From 1c05442070e90d92ba63575b9ed059c01ba75ac1 Mon Sep 17 00:00:00 2001 From: Andriy Mulyar Date: Tue, 28 Mar 2023 19:11:05 -0400 Subject: [PATCH] Visualizing Embeddings with Atlas (#152) * Embedding visualization in Atlas * Updated Atlas Visualization Example * Atlas for Embedding Visualization: removed extra outputs * Rename Atlas_for_visualizing_embeddings.ipynb to Visualizing_embeddings_with_Atlas.ipynb --- .../Visualizing_embeddings_with_Atlas.ipynb | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 examples/Visualizing_embeddings_with_Atlas.ipynb diff --git a/examples/Visualizing_embeddings_with_Atlas.ipynb b/examples/Visualizing_embeddings_with_Atlas.ipynb new file mode 100644 index 0000000..a47590f --- /dev/null +++ b/examples/Visualizing_embeddings_with_Atlas.ipynb @@ -0,0 +1,154 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualizing Open AI Embeddings in Atlas\n", + "\n", + "In this example, we will upload food review embeddings to [Atlas](https://atlas.nomic.ai) to visualize the embeddings." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What is Atlas?\n", + "\n", + "[Atlas](https://atlas.nomic.ai) is a machine learning tool used to visualize massive datasets of embeddings in your web browser. Upload millions of embeddings to Atlas and interact with them in your web browser or jupyter notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Login to Atlas.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ] + } + ], + "source": [ + "!pip install nomic" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Load the embeddings\n", + "datafile_path = \"data/fine_food_reviews_with_embeddings_1k.csv\"\n", + "df = pd.read_csv(datafile_path)\n", + "\n", + "# Convert to a list of lists of floats\n", + "embeddings = np.array(df.embedding.apply(eval).to_list())\n", + "df = df.drop('embedding', axis=1)\n", + "df = df.rename(columns={'Unnamed: 0': 'id'})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ] + } + ], + "source": [ + "import nomic\n", + "from nomic import atlas\n", + "nomic.login('7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6') #demo account\n", + "\n", + "data = df.to_dict('records')\n", + "project = atlas.map_embeddings(embeddings=embeddings, data=data,\n", + " id_field='id',\n", + " colorable_fields=['Score'])\n", + "map = project.maps[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Interact with your embeddings in Jupyter" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "data": { + "text/plain": "meek-laborer: https://atlas.nomic.ai/map/fddc0e07-97c5-477c-827c-96bca44519aa/463f4614-7689-47e4-b55b-1da0cc679559", + "text/html": "\n

Project: meek-laborer

\n \n\n

Projection ID: 463f4614-7689-47e4-b55b-1da0cc679559

\n
\n
Hide embedded project
\n \n
\n \n \n\n \n \n \n \n " + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "map" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + }, + "vscode": { + "interpreter": { + "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}