pap-openai 872a322868
Adding GCP Bigquery vector search with ChatGPT cookbook (#1344)
Co-authored-by: Aaron Wilkowitz <157151487+aaronwilkowitz-openai@users.noreply.github.com>
2024-08-07 15:21:46 -07:00

77 lines
2.5 KiB
Python

from google.cloud import bigquery
import functions_framework
import os
from openai import OpenAI
import json
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
embeddings_model = os.getenv('EMBEDDINGS_MODEL')
project_id = os.getenv('PROJECT_ID')
dataset_id = os.getenv('DATASET_ID')
table_id = os.getenv('TABLE_ID')
def generate_embeddings(text, model):
print(f'Generating embedding for: {text}')
# Generate embeddings for the provided text using the specified model
embeddings_response = openai_client.embeddings.create(model=model, input=text)
# Extract the embedding data from the response
embedding = embeddings_response.data[0].embedding
return embedding
@functions_framework.http
def openai_docs_search(request):
print('received a request')
client = bigquery.Client()
request_json = request.get_json(silent=True)
print(request_json)
if not request_json:
return json.dumps({"error": "Invalid JSON in request"}), 400, {'Content-Type': 'application/json'}
query = request_json.get('query')
top_k = request_json.get('top_k', 3)
category = request_json.get('category', '')
if not query:
return json.dumps({"error": "Query parameter is required"}), 400, {'Content-Type': 'application/json'}
embedding_query = generate_embeddings(query, embeddings_model)
embedding_query_list = ', '.join(map(str, embedding_query))
sql_query = f"""
WITH search_results AS (
SELECT query.id AS query_id, base.id AS base_id, distance
FROM VECTOR_SEARCH(
TABLE `{project_id}.{dataset_id}.{table_id}`, 'content_vector',
(SELECT ARRAY[{embedding_query_list}] AS content_vector, 'query_vector' AS id),
top_k => {top_k}, distance_type => 'COSINE', options => '{{"use_brute_force": true}}')
)
SELECT sr.query_id, sr.base_id, sr.distance, ed.text, ed.title, ed.category
FROM search_results sr
JOIN `{project_id}.{dataset_id}.{table_id}` ed ON sr.base_id = ed.id
"""
if category:
sql_query += f" WHERE ed.category = '{category}'"
sql_query += " ORDER BY sr.distance;"
query_job = client.query(sql_query) # Make an API request.
rows = []
for row in query_job:
print(row.title)
rows.append({
"text": row.text,
"title": row.title,
"distance": row.distance,
"category": row.category
})
response = {
"items": rows
}
print('sending response')
print(len(rows))
return json.dumps(response), 200