mirror of
https://github.com/james-m-jordan/openai-cookbook.git
synced 2025-05-09 19:32:38 +00:00
Minor change to use SearchIndexingBufferedSender to support optimized batch indexing (#712)
This commit is contained in:
parent
c777f1025a
commit
552262ea89
@ -55,6 +55,7 @@
|
||||
"from azure.search.documents import SearchClient \n",
|
||||
"from azure.search.documents.indexes import SearchIndexClient \n",
|
||||
"from azure.search.documents.models import Vector \n",
|
||||
"from azure.search.documents import SearchIndexingBufferedSender\n",
|
||||
"from azure.search.documents.indexes.models import ( \n",
|
||||
" SearchIndex, \n",
|
||||
" SearchField, \n",
|
||||
@ -69,7 +70,7 @@
|
||||
" SemanticSettings, \n",
|
||||
" VectorSearch, \n",
|
||||
" HnswVectorSearchAlgorithmConfiguration, \n",
|
||||
") \n"
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -394,26 +395,19 @@
|
||||
"# Convert the DataFrame to a list of dictionaries \n",
|
||||
"documents = article_df.to_dict(orient='records') \n",
|
||||
" \n",
|
||||
"search_client = SearchClient(endpoint=search_service_endpoint, index_name=index_name, credential=credential) \n",
|
||||
"# Use SearchIndexingBufferedSender to upload the documents in batches optimized for indexing \n",
|
||||
"with SearchIndexingBufferedSender(search_service_endpoint, index_name, AzureKeyCredential(search_service_api_key)) as batch_client: \n",
|
||||
" # Add upload actions for all documents \n",
|
||||
" batch_client.upload_documents(documents=documents) \n",
|
||||
" \n",
|
||||
"# Define the batch upload size \n",
|
||||
"batch_size = 250 \n",
|
||||
" \n",
|
||||
"# Split the documents into batches \n",
|
||||
"batches = [documents[i:i + batch_size] for i in range(0, len(documents), batch_size)] \n",
|
||||
" \n",
|
||||
"# Upload each batch of documents \n",
|
||||
"for batch in batches: \n",
|
||||
" result = search_client.upload_documents(batch) \n",
|
||||
" \n",
|
||||
"print(f\"Uploaded {len(documents)} documents in total\") \n"
|
||||
"print(f\"Uploaded {len(documents)} documents in total\") "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you dataset didn't already contain pre-computed embeddings, you can create embeddings by using the below function using the `openai` python library. You'll also notice the same function and model are being used to generate query embeddings for performing vector searches."
|
||||
"If your dataset didn't already contain pre-computed embeddings, you can create embeddings by using the below function using the `openai` python library. You'll also notice the same function and model are being used to generate query embeddings for performing vector searches."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user