diff --git a/examples/vector_databases/cassandra_astradb/Philosophical_Quotes_cassIO.ipynb b/examples/vector_databases/cassandra_astradb/Philosophical_Quotes_cassIO.ipynb index 5d17b7c..ecd0a28 100644 --- a/examples/vector_databases/cassandra_astradb/Philosophical_Quotes_cassIO.ipynb +++ b/examples/vector_databases/cassandra_astradb/Philosophical_Quotes_cassIO.ipynb @@ -96,7 +96,7 @@ }, "outputs": [], "source": [ - "!pip install cassio openai" + "!pip install \"cassio>=0.1.3\" openai" ] }, { @@ -112,73 +112,47 @@ "id": "65a8edc1-4633-491b-9ed3-11163ec24e46", "metadata": {}, "source": [ - "A couple of secrets are required to create a `Session` object (a connection to your Astra DB instance).\n", + "In order to connect to you Astra DB, you need two things:\n", + "- An Astra Token, with role \"Database Administrator\" (it looks like `AstraCS:...`)\n", + "- the database ID (it looks like `3df2a5b6-...`)\n", "\n", - "_(Note: some steps will be slightly different on Google Colab and on local Jupyter, that's why the notebook will detect the runtime type.)_" + " Make sure you have both strings, Both are obtained in the [Astra UI](https://astra.datastax.com) once you sign in. For more information, see here: [database ID](https://awesome-astra.github.io/docs/pages/astra/faq/#where-should-i-find-a-database-identifier) and [Token](https://awesome-astra.github.io/docs/pages/astra/create-token/#c-procedure).\n", + "\n", + "If you want to _connect to a Cassandra cluster_ (which however must [support](https://cassio.org/more_info/#use-a-local-vector-capable-cassandra) Vectors), replace with `cassio.init(session=..., keyspace=...)` with suitable Session and keyspace name for your cluster." ] }, { "cell_type": "code", "execution_count": 2, - "id": "a7429ed4-b3fe-44b0-ad00-60883df32070", - "metadata": {}, - "outputs": [], - "source": [ - "from cassandra.cluster import Cluster\n", - "from cassandra.auth import PlainTextAuthProvider" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e4f2eec1-b784-4cea-9006-03cfe7b31e25", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from getpass import getpass\n", - "\n", - "try:\n", - " from google.colab import files\n", - " IS_COLAB = True\n", - "except ModuleNotFoundError:\n", - " IS_COLAB = False" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "7615e522-574f-427e-9f7f-87fc721207a4", + "id": "ca5a2f5d-3ff2-43d6-91c0-4a52c0ecd06a", "metadata": {}, "outputs": [ { "name": "stdin", "output_type": "stream", "text": [ - "Please provide the full path to your Secure Connect Bundle zipfile: /path/to/secure-connect-DATABASE.zip\n", - "Please provide your Database Token ('AstraCS:...' string): ········\n", - "Please provide the Keyspace name for your Database: my_keyspace\n" + "Please enter your Astra token ('AstraCS:...') ········\n", + "Please enter your database id ('3df2a5b6-...') 00000000-0000-0000-0000-000000000000\n" ] } ], "source": [ - "# Your database's Secure Connect Bundle zip file is needed:\n", - "if IS_COLAB:\n", - " print('Please upload your Secure Connect Bundle zipfile: ')\n", - " uploaded = files.upload()\n", - " if uploaded:\n", - " astraBundleFileTitle = list(uploaded.keys())[0]\n", - " ASTRA_DB_SECURE_BUNDLE_PATH = os.path.join(os.getcwd(), astraBundleFileTitle)\n", - " else:\n", - " raise ValueError(\n", - " 'Cannot proceed without Secure Connect Bundle. Please re-run the cell.'\n", - " )\n", - "else:\n", - " # you are running a local-jupyter notebook:\n", - " ASTRA_DB_SECURE_BUNDLE_PATH = input(\"Please provide the full path to your Secure Connect Bundle zipfile: \")\n", + "from getpass import getpass\n", "\n", - "ASTRA_DB_APPLICATION_TOKEN = getpass(\"Please provide your Database Token ('AstraCS:...' string): \")\n", - "ASTRA_DB_KEYSPACE = input(\"Please provide the Keyspace name for your Database: \")" + "astra_token = getpass(\"Please enter your Astra token ('AstraCS:...')\")\n", + "database_id = input(\"Please enter your database id ('3df2a5b6-...')\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0fe028b0-3a40-4f12-b07c-8fd8bbee29b0", + "metadata": {}, + "outputs": [], + "source": [ + "import cassio\n", + "\n", + "cassio.init(token=astra_token, database_id=database_id)" ] }, { @@ -193,29 +167,6 @@ "_(Incidentally, you could also use any Cassandra cluster (as long as it provides Vector capabilities), just by [changing the parameters](https://docs.datastax.com/en/developer/python-driver/latest/getting_started/#connecting-to-cassandra) to the following `Cluster` instantiation.)_" ] }, - { - "cell_type": "code", - "execution_count": 5, - "id": "949ab020-90c8-499b-a139-f69f07af50ed", - "metadata": {}, - "outputs": [], - "source": [ - "# Don't mind the \"Closing connection\" error after \"downgrading protocol...\" messages,\n", - "# it is really just a warning: the connection will work smoothly.\n", - "cluster = Cluster(\n", - " cloud={\n", - " \"secure_connect_bundle\": ASTRA_DB_SECURE_BUNDLE_PATH,\n", - " },\n", - " auth_provider=PlainTextAuthProvider(\n", - " \"token\",\n", - " ASTRA_DB_APPLICATION_TOKEN,\n", - " ),\n", - ")\n", - "\n", - "session = cluster.connect()\n", - "keyspace = ASTRA_DB_KEYSPACE" - ] - }, { "cell_type": "markdown", "id": "60829851-bd48-4461-9243-974f76304933", @@ -228,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "id": "8db837dc-cd49-41e2-8b5d-edb17ccc470e", "metadata": {}, "outputs": [], @@ -239,17 +190,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "id": "691f1a07-cab4-42a1-baba-f17b561ddd3f", "metadata": {}, "outputs": [], "source": [ - "v_table = MetadataVectorCassandraTable(\n", - " session,\n", - " keyspace,\n", - " \"philosophers_cassio\",\n", - " vector_dimension=1536,\n", - ")" + "v_table = MetadataVectorCassandraTable(table=\"philosophers_cassio\", vector_dimension=1536)" ] }, { @@ -270,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "id": "37fe7653-dd64-4494-83e1-5702ec41725c", "metadata": {}, "outputs": [ @@ -288,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "id": "8065a42a-0ece-4453-b771-1dbef6d8a620", "metadata": {}, "outputs": [], @@ -310,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "id": "6bf89454-9a55-4202-ab6b-ea15b2048f3d", "metadata": {}, "outputs": [], @@ -328,7 +274,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "id": "50a8e6f0-0aa7-4ffc-94e9-702b68566815", "metadata": {}, "outputs": [ @@ -337,7 +283,7 @@ "output_type": "stream", "text": [ "len(result.data) = 2\n", - "result.data[1].embedding = [-0.01075850147753954, 0.0013505702372640371, 0.0036223...\n", + "result.data[1].embedding = [-0.011011358350515366, 0.0033741754014045, 0.004608382...\n", "len(result.data[1].embedding) = 1536\n" ] } @@ -368,7 +314,22 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, + "id": "aa68f038-3240-4e22-b7c6-a5f214eda381", + "metadata": {}, + "outputs": [], + "source": [ + "# Don't mind this cell, just autodetecting if we're on a Colab or not\n", + "try:\n", + " from google.colab import files\n", + " IS_COLAB = True\n", + "except ModuleNotFoundError:\n", + " IS_COLAB = False" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "id": "94ff33fb-4b52-4c15-ab74-4af4fe973cbf", "metadata": {}, "outputs": [], @@ -395,7 +356,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "6ab84ccb-3363-4bdc-9484-0d68c25a58ff", "metadata": {}, "outputs": [ @@ -457,7 +418,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "68e80e81-886b-45a4-be61-c33b8028bcfb", "metadata": {}, "outputs": [ @@ -517,7 +478,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "d6fcf182-3ab7-4d28-9472-dce35cc38182", "metadata": {}, "outputs": [], @@ -563,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "6722c2c0-3e54-4738-80ce-4d1149e95414", "metadata": {}, "outputs": [ @@ -572,13 +533,13 @@ "text/plain": [ "[('Life to the great majority is only a constant struggle for mere existence, with the certainty of losing it at last.',\n", " 'schopenhauer'),\n", - " ('We give up leisure in order that we may have leisure, just as we go to war in order that we may have peace.',\n", - " 'aristotle'),\n", - " ('Perhaps the gods are kind to us, by making life more disagreeable as we grow older. In the end death seems less intolerable than the manifold burdens we carry',\n", - " 'freud')]" + " ('The meager satisfaction that man can extract from reality leaves him starving.',\n", + " 'freud'),\n", + " ('To live is to suffer, to survive is to find some meaning in the suffering.',\n", + " 'nietzsche')]" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -597,7 +558,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "da9c705f-5c12-42b3-a038-202f89a3c6da", "metadata": {}, "outputs": [ @@ -610,7 +571,7 @@ " 'nietzsche')]" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -629,7 +590,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "abcfaec9-8f42-4789-a5ed-1073fa2932c2", "metadata": {}, "outputs": [ @@ -642,7 +603,7 @@ " 'nietzsche')]" ] }, - "execution_count": 18, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -670,7 +631,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "b9b43721-a3b0-4ac4-b730-7a6aeec52e70", "metadata": {}, "outputs": [ @@ -679,14 +640,14 @@ "output_type": "stream", "text": [ "8 quotes within the threshold:\n", - " 0. [distance=0.855] \"The assumption that animals are without rights, and the illusion that ...\"\n", - " 1. [distance=0.843] \"Animals are in possession of themselves; their soul is in possession o...\"\n", - " 2. [distance=0.841] \"At his best, man is the noblest of all animals; separated from law and...\"\n", - " 3. [distance=0.832] \"Man is the only animal that must be encouraged to live....\"\n", - " 4. [distance=0.831] \".... we are a part of nature as a whole, whose order we follow....\"\n", - " 5. [distance=0.824] \"Every human endeavor, however singular it seems, involves the whole hu...\"\n", - " 6. [distance=0.820] \"Because Christian morality leaves animals out of account, they are at ...\"\n", - " 7. [distance=0.819] \"A dog has the soul of a philosopher....\"\n" + " 0. [distance=0.858] \"The assumption that animals are without rights, and the illusion that ...\"\n", + " 1. [distance=0.849] \"Animals are in possession of themselves; their soul is in possession o...\"\n", + " 2. [distance=0.846] \"At his best, man is the noblest of all animals; separated from law and...\"\n", + " 3. [distance=0.840] \"Man is the only animal that must be encouraged to live....\"\n", + " 4. [distance=0.838] \".... we are a part of nature as a whole, whose order we follow....\"\n", + " 5. [distance=0.828] \"Because Christian morality leaves animals out of account, they are at ...\"\n", + " 6. [distance=0.827] \"Every human endeavor, however singular it seems, involves the whole hu...\"\n", + " 7. [distance=0.826] \"A dog has the soul of a philosopher....\"\n" ] } ], @@ -734,7 +695,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "id": "a6dd366d-665a-45fd-917b-b6b5312b0865", "metadata": {}, "outputs": [], @@ -762,7 +723,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "id": "397e6ebd-b30e-413b-be63-81a62947a7b8", "metadata": {}, "outputs": [], @@ -810,7 +771,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "id": "806ba758-8988-410e-9eeb-b9c6799e6b25", "metadata": {}, "outputs": [ @@ -820,11 +781,11 @@ "text": [ "** quotes found:\n", "** - Happiness is the reward of virtue. (aristotle)\n", - "** - It is better for a city to be governed by a good man than by good laws. (aristotle)\n", + "** - Enthusiasm is always connected with the senses, whatever be the object that excites it. The true strength of virtue is serenity of mind, combined with a deliberate and steadfast determination to execute her laws. That is the healthful condition of the moral life; on the other hand, enthusiasm, even when excited by representations of goodness, is a brilliant but feverish glow which leaves only exhaustion and languor behind. (kant)\n", "** end of logging\n", "\n", "A new generated quote:\n", - "Politics without virtue is like a ship without a captain - destined to be guided by turbulent currents, lacking true direction.\n" + "Politics without virtue is like a ship without a compass - destined to drift aimlessly, guided only by self-interest and corruption.\n" ] } ], @@ -844,7 +805,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "id": "7c2e2d4e-865f-4b2d-80cd-a695271415d9", "metadata": {}, "outputs": [ @@ -858,7 +819,7 @@ "** end of logging\n", "\n", "A new generated quote:\n", - "Neglecting the moral worth of animals reflects a crude and barbaric mindset. True morality lies in universal compassion.\n" + "By disregarding the worth of animals, we reveal our own moral ignorance. True morality lies in extending compassion to all living beings.\n" ] } ], @@ -900,7 +861,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "id": "49cabc31-47e3-4326-8ef5-d95690317321", "metadata": {}, "outputs": [], @@ -910,17 +871,12 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "id": "a614c333-4143-4ad6-abdf-7b3853fbf423", "metadata": {}, "outputs": [], "source": [ - "v_table_partitioned = ClusteredMetadataVectorCassandraTable(\n", - " session,\n", - " keyspace,\n", - " \"philosophers_cassio_partitioned\",\n", - " vector_dimension=1536,\n", - ")" + "v_table_partitioned = ClusteredMetadataVectorCassandraTable(table=\"philosophers_cassio_partitioned\", vector_dimension=1536)" ] }, { @@ -939,7 +895,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "id": "424513a6-0a9d-4164-bf30-22d5b7e3bb25", "metadata": {}, "outputs": [ @@ -978,7 +934,7 @@ " ))\n", " for future in futures:\n", " future.result()\n", - " print(f\" Done ({len(quotes)} quotes inserted).\")\n", + " print(f\"Done ({len(quotes)} quotes inserted).\")\n", "print(\"Finished inserting.\")" ] }, @@ -992,7 +948,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "id": "a3217a90-c682-4c72-b834-7717ed13a3af", "metadata": {}, "outputs": [], @@ -1032,7 +988,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "id": "d7343a7a-5a06-47c5-ad96-8b60b6948352", "metadata": {}, "outputs": [ @@ -1041,13 +997,13 @@ "text/plain": [ "[('Life to the great majority is only a constant struggle for mere existence, with the certainty of losing it at last.',\n", " 'schopenhauer'),\n", - " ('We give up leisure in order that we may have leisure, just as we go to war in order that we may have peace.',\n", - " 'aristotle'),\n", - " ('Perhaps the gods are kind to us, by making life more disagreeable as we grow older. In the end death seems less intolerable than the manifold burdens we carry',\n", - " 'freud')]" + " ('The meager satisfaction that man can extract from reality leaves him starving.',\n", + " 'freud'),\n", + " ('To live is to suffer, to survive is to find some meaning in the suffering.',\n", + " 'nietzsche')]" ] }, - "execution_count": 28, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1066,7 +1022,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "id": "d1abb677-5a8b-48c2-82c5-dbca94ef56f1", "metadata": {}, "outputs": [ @@ -1079,7 +1035,7 @@ " 'nietzsche')]" ] }, - "execution_count": 29, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1122,22 +1078,26 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "id": "1eb0fd16-7e15-4742-8fc5-94d9eeeda620", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 30, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# we peek at CassIO's config to get a direct handle to the DB connection\n", + "session = cassio.config.resolve_session()\n", + "keyspace = cassio.config.resolve_keyspace()\n", + "\n", "session.execute(f\"DROP TABLE IF EXISTS {keyspace}.philosophers_cassio;\")\n", "session.execute(f\"DROP TABLE IF EXISTS {keyspace}.philosophers_cassio_partitioned;\")" ]