Minor change to use SearchIndexingBufferedSender to support optimized batch indexing (#712)

2025-05-09 19:32:38 +00:00 · 2023-09-26 18:43:05 -05:00 · 2023-09-26 18:43:05 -05:00 · 552262ea89
commit 552262ea89
parent c777f1025a
1 changed files with 8 additions and 14 deletions
--- a/examples/vector_databases/azuresearch/Getting_started_with_azure_cognitive_search_and_openai.ipynb
+++ b/examples/vector_databases/azuresearch/Getting_started_with_azure_cognitive_search_and_openai.ipynb
@ -55,6 +55,7 @@
    "from azure.search.documents import SearchClient  \n",
    "from azure.search.documents.indexes import SearchIndexClient  \n",
    "from azure.search.documents.models import Vector \n",
+    "from azure.search.documents import SearchIndexingBufferedSender\n",
    "from azure.search.documents.indexes.models import (  \n",
    "    SearchIndex,  \n",
    "    SearchField,  \n",
@ -69,7 +70,7 @@
    "    SemanticSettings,  \n",
    "    VectorSearch,  \n",
    "    HnswVectorSearchAlgorithmConfiguration,   \n",
-    ") \n"
+    ")"
   ]
  },
  {
@ -394,26 +395,19 @@
    "# Convert the DataFrame to a list of dictionaries  \n",
    "documents = article_df.to_dict(orient='records')  \n",
    "  \n",
-    "search_client = SearchClient(endpoint=search_service_endpoint, index_name=index_name, credential=credential)  \n",
+    "# Use SearchIndexingBufferedSender to upload the documents in batches optimized for indexing \n",
+    "with SearchIndexingBufferedSender(search_service_endpoint, index_name, AzureKeyCredential(search_service_api_key)) as batch_client:  \n",
+    "    # Add upload actions for all documents  \n",
+    "    batch_client.upload_documents(documents=documents)  \n",
    "  \n",
-    "# Define the batch upload size  \n",
-    "batch_size = 250  \n",
-    "  \n",
-    "# Split the documents into batches  \n",
-    "batches = [documents[i:i + batch_size] for i in range(0, len(documents), batch_size)]  \n",
-    "  \n",
-    "# Upload each batch of documents  \n",
-    "for batch in batches:  \n",
-    "    result = search_client.upload_documents(batch)  \n",
-    "  \n",
-    "print(f\"Uploaded {len(documents)} documents in total\")  \n"
+    "print(f\"Uploaded {len(documents)} documents in total\")  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "If you dataset didn't already contain pre-computed embeddings, you can create embeddings by using the below function using the `openai` python library. You'll also notice the same function and model are being used to generate query embeddings for performing vector searches."
+    "If your dataset didn't already contain pre-computed embeddings, you can create embeddings by using the below function using the `openai` python library. You'll also notice the same function and model are being used to generate query embeddings for performing vector searches."
   ]
  },
  {