diff --git a/examples/vector_databases/cassandra_astradb/Philosophical_Quotes_cassIO.ipynb b/examples/vector_databases/cassandra_astradb/Philosophical_Quotes_cassIO.ipynb index a47f078..5d17b7c 100644 --- a/examples/vector_databases/cassandra_astradb/Philosophical_Quotes_cassIO.ipynb +++ b/examples/vector_databases/cassandra_astradb/Philosophical_Quotes_cassIO.ipynb @@ -930,7 +930,9 @@ "source": [ "Now repeat the compute-embeddings-and-insert step on the new table.\n", "\n", - "The only difference is how one stores the quote's author as the _partition id_ in this new table, instead of adding it to the catch-all \"metadata\" dictionary.\n", + "Compared to what you have seen earlier, there is a crucial difference in that now the quote's author is stored as the _partition id_ for the inserted row, instead of being added to the catch-all \"metadata\" dictionary.\n", + "\n", + "While you are at it, by way of demonstration, you will insert all quotes by a given author _concurrently_: with CassIO, this is done by usng the asynchronous `put_async` method for each quote, collecting the resulting list of `Future` objects, and calling the `result()` method on them all afterwards, to ensure they all have executed. Cassandra / Astra DB well supports a high degree of concurrency in I/O operations.\n", "\n", "_(Note: one could have cached the embeddings computed previously to save a few API tokens -- here, however, we wanted to keep the code easier to inspect.)_" ] @@ -945,15 +947,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "aristotle: ************************************************** Done (50 quotes inserted).\n", - "freud: ************************************************** Done (50 quotes inserted).\n", - "hegel: ************************************************** Done (50 quotes inserted).\n", - "kant: ************************************************** Done (50 quotes inserted).\n", - "nietzsche: ************************************************** Done (50 quotes inserted).\n", - "plato: ************************************************** Done (50 quotes inserted).\n", - "sartre: ************************************************** Done (50 quotes inserted).\n", - "schopenhauer: ************************************************** Done (50 quotes inserted).\n", - "spinoza: ************************************************** Done (50 quotes inserted).\n", + "aristotle: Done (50 quotes inserted).\n", + "freud: Done (50 quotes inserted).\n", + "hegel: Done (50 quotes inserted).\n", + "kant: Done (50 quotes inserted).\n", + "nietzsche: Done (50 quotes inserted).\n", + "plato: Done (50 quotes inserted).\n", + "sartre: Done (50 quotes inserted).\n", + "schopenhauer: Done (50 quotes inserted).\n", + "spinoza: Done (50 quotes inserted).\n", "Finished inserting.\n" ] } @@ -965,15 +967,17 @@ " input=[quote[\"body\"] for quote in quotes],\n", " engine=embedding_model_name,\n", " )\n", + " futures = []\n", " for quote_idx, (quote, q_data) in enumerate(zip(quotes, result.data)):\n", - " v_table_partitioned.put(\n", + " futures.append(v_table_partitioned.put_async(\n", " partition_id=philosopher,\n", " row_id=f\"q_{philosopher}_{quote_idx}\",\n", " body_blob=quote[\"body\"],\n", " vector=q_data.embedding,\n", " metadata={tag: True for tag in quote[\"tags\"]},\n", - " )\n", - " print(\"*\", end='')\n", + " ))\n", + " for future in futures:\n", + " future.result()\n", " print(f\" Done ({len(quotes)} quotes inserted).\")\n", "print(\"Finished inserting.\")" ] @@ -1118,7 +1122,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "id": "1eb0fd16-7e15-4742-8fc5-94d9eeeda620", "metadata": {}, "outputs": [ @@ -1128,7 +1132,7 @@ "" ] }, - "execution_count": 31, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" }