mirror of
https://github.com/james-m-jordan/openai-cookbook.git
synced 2025-05-09 19:32:38 +00:00

Co-authored-by: Aaron Wilkowitz <157151487+aaronwilkowitz-openai@users.noreply.github.com>
77 lines
2.5 KiB
Python
77 lines
2.5 KiB
Python
from google.cloud import bigquery
|
|
import functions_framework
|
|
import os
|
|
from openai import OpenAI
|
|
import json
|
|
|
|
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
embeddings_model = os.getenv('EMBEDDINGS_MODEL')
|
|
project_id = os.getenv('PROJECT_ID')
|
|
dataset_id = os.getenv('DATASET_ID')
|
|
table_id = os.getenv('TABLE_ID')
|
|
|
|
def generate_embeddings(text, model):
|
|
print(f'Generating embedding for: {text}')
|
|
# Generate embeddings for the provided text using the specified model
|
|
embeddings_response = openai_client.embeddings.create(model=model, input=text)
|
|
# Extract the embedding data from the response
|
|
embedding = embeddings_response.data[0].embedding
|
|
return embedding
|
|
|
|
@functions_framework.http
|
|
def openai_docs_search(request):
|
|
print('received a request')
|
|
client = bigquery.Client()
|
|
|
|
request_json = request.get_json(silent=True)
|
|
print(request_json)
|
|
|
|
if not request_json:
|
|
return json.dumps({"error": "Invalid JSON in request"}), 400, {'Content-Type': 'application/json'}
|
|
|
|
query = request_json.get('query')
|
|
top_k = request_json.get('top_k', 3)
|
|
category = request_json.get('category', '')
|
|
|
|
if not query:
|
|
return json.dumps({"error": "Query parameter is required"}), 400, {'Content-Type': 'application/json'}
|
|
|
|
embedding_query = generate_embeddings(query, embeddings_model)
|
|
embedding_query_list = ', '.join(map(str, embedding_query))
|
|
|
|
sql_query = f"""
|
|
WITH search_results AS (
|
|
SELECT query.id AS query_id, base.id AS base_id, distance
|
|
FROM VECTOR_SEARCH(
|
|
TABLE `{project_id}.{dataset_id}.{table_id}`, 'content_vector',
|
|
(SELECT ARRAY[{embedding_query_list}] AS content_vector, 'query_vector' AS id),
|
|
top_k => {top_k}, distance_type => 'COSINE', options => '{{"use_brute_force": true}}')
|
|
)
|
|
SELECT sr.query_id, sr.base_id, sr.distance, ed.text, ed.title, ed.category
|
|
FROM search_results sr
|
|
JOIN `{project_id}.{dataset_id}.{table_id}` ed ON sr.base_id = ed.id
|
|
"""
|
|
|
|
if category:
|
|
sql_query += f" WHERE ed.category = '{category}'"
|
|
|
|
sql_query += " ORDER BY sr.distance;"
|
|
|
|
query_job = client.query(sql_query) # Make an API request.
|
|
|
|
rows = []
|
|
for row in query_job:
|
|
print(row.title)
|
|
rows.append({
|
|
"text": row.text,
|
|
"title": row.title,
|
|
"distance": row.distance,
|
|
"category": row.category
|
|
})
|
|
|
|
response = {
|
|
"items": rows
|
|
}
|
|
print('sending response')
|
|
print(len(rows))
|
|
return json.dumps(response), 200 |