mirror of
https://github.com/james-m-jordan/openai-cookbook.git
synced 2025-05-09 19:32:38 +00:00
fixed code repository for solution one in Sharepoint solution (#1264)
This commit is contained in:
parent
774c524bd8
commit
fb202f0369
@ -145,7 +145,7 @@ See the documentation [here](https://learn.microsoft.com/en-us/azure/azure-funct
|
|||||||
|
|
||||||
5. Leave all the other settings on this page as the default, but feel free to change based on your internal guidelines.
|
5. Leave all the other settings on this page as the default, but feel free to change based on your internal guidelines.
|
||||||
|
|
||||||
6. On the **permissions** tab, click **Add Permission** and add **Files.Read.All**, then **Add.** This allows this application to read files which is important in order to use the Microsoft Graph Search API.
|
6. On the **permissions** tab, click **Add Permission** and add **Files.Read.All** and **Sites.ReadAll**, then **Add.** This allows this application to read files which is important in order to use the Microsoft Graph Search API.
|
||||||
|
|
||||||
4. Once it is created, **click on the enterprise application you just created** (so, leave the Function App page and land on the Enterprise Application that you just spun up)**.** We are now going to give it one more permission, to execute the Azure Function by impersonating the user logging into the application. See [here](https://learn.microsoft.com/en-us/azure/app-service/configure-authentication-provider-aad?tabs=workforce-tenant) for more details.
|
4. Once it is created, **click on the enterprise application you just created** (so, leave the Function App page and land on the Enterprise Application that you just spun up)**.** We are now going to give it one more permission, to execute the Azure Function by impersonating the user logging into the application. See [here](https://learn.microsoft.com/en-us/azure/app-service/configure-authentication-provider-aad?tabs=workforce-tenant) for more details.
|
||||||
|
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
const { Client } = require('@microsoft/microsoft-graph-client');
|
const { Client } = require('@microsoft/microsoft-graph-client');
|
||||||
const pdfParse = require('pdf-parse');
|
|
||||||
const { Buffer } = require('buffer');
|
const { Buffer } = require('buffer');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const axios = require('axios');
|
const axios = require('axios');
|
||||||
const qs = require('querystring');
|
const qs = require('querystring');
|
||||||
const { OpenAI } = require("openai");
|
|
||||||
|
|
||||||
//// --------- ENVIRONMENT CONFIGURATION AND INITIALIZATION ---------
|
//// --------- ENVIRONMENT CONFIGURATION AND INITIALIZATION ---------
|
||||||
// Function to initialize Microsoft Graph client
|
// Function to initialize Microsoft Graph client
|
||||||
@ -46,79 +45,36 @@ const getOboToken = async (userAccessToken) => {
|
|||||||
};
|
};
|
||||||
//// --------- DOCUMENT PROCESSING ---------
|
//// --------- DOCUMENT PROCESSING ---------
|
||||||
// Function to fetch drive item content and convert to text
|
// Function to fetch drive item content and convert to text
|
||||||
|
|
||||||
const getDriveItemContent = async (client, driveId, itemId, name) => {
|
const getDriveItemContent = async (client, driveId, itemId, name) => {
|
||||||
try {
|
try {
|
||||||
const fileType = path.extname(name).toLowerCase();
|
// const fileType = path.extname(name).toLowerCase();
|
||||||
// the below files types are the ones that are able to be converted to PDF to extract the text. See https://learn.microsoft.com/en-us/graph/api/driveitem-get-content-format?view=graph-rest-1.0&tabs=http
|
// the below files types are the ones that are able to be converted to PDF to extract the text. See https://learn.microsoft.com/en-us/graph/api/driveitem-get-content-format?view=graph-rest-1.0&tabs=http
|
||||||
const allowedFileTypes = ['.pdf', '.doc', '.docx', '.odp', '.ods', '.odt', '.pot', '.potm', '.potx', '.pps', '.ppsx', '.ppsxm', '.ppt', '.pptm', '.pptx', '.rtf'];
|
// const allowedFileTypes = ['.pdf', '.doc', '.docx', '.odp', '.ods', '.odt', '.pot', '.potm', '.potx', '.pps', '.ppsx', '.ppsxm', '.ppt', '.pptm', '.pptx', '.rtf'];
|
||||||
// filePath changes based on file type, adding ?format=pdf to convert non-pdf types to pdf for text extraction, so all files in allowedFileTypes above are converted to pdf
|
// filePath changes based on file type, adding ?format=pdf to convert non-pdf types to pdf for text extraction, so all files in allowedFileTypes above are converted to pdf
|
||||||
const filePath = `/drives/${driveId}/items/${itemId}/content` + ((fileType === '.pdf' || fileType === '.txt' || fileType === '.csv') ? '' : '?format=pdf');
|
const filePath = `/drives/${driveId}/items/${itemId}`;
|
||||||
if (allowedFileTypes.includes(fileType)) {
|
const downloadPath = filePath + `/content`
|
||||||
response = await client.api(filePath).getStream();
|
const fileStream = await client.api(downloadPath).getStream();
|
||||||
// The below takes the chunks in response and combines
|
let chunks = [];
|
||||||
let chunks = [];
|
for await (let chunk of fileStream) {
|
||||||
for await (let chunk of response) {
|
|
||||||
chunks.push(chunk);
|
chunks.push(chunk);
|
||||||
}
|
}
|
||||||
let buffer = Buffer.concat(chunks);
|
const base64String = Buffer.concat(chunks).toString('base64');
|
||||||
// the below extracts the text from the PDF.
|
const file = await client.api(filePath).get();
|
||||||
const pdfContents = await pdfParse(buffer);
|
const mime_type = file.file.mimeType;
|
||||||
return pdfContents.text;
|
const name = file.name;
|
||||||
} else if (fileType === '.txt') {
|
return {"name":name, "mime_type":mime_type, "content":base64String}
|
||||||
// If the type is txt, it does not need to create a stream and instead just grabs the content
|
|
||||||
response = await client.api(filePath).get();
|
|
||||||
return response;
|
|
||||||
} else if (fileType === '.csv') {
|
|
||||||
response = await client.api(filePath).getStream();
|
|
||||||
let chunks = [];
|
|
||||||
for await (let chunk of response) {
|
|
||||||
chunks.push(chunk);
|
|
||||||
}
|
|
||||||
let buffer = Buffer.concat(chunks);
|
|
||||||
let dataString = buffer.toString('utf-8');
|
|
||||||
return dataString
|
|
||||||
|
|
||||||
} else {
|
|
||||||
return 'Unsupported File Type';
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching drive content:', error);
|
console.error('Error fetching drive content:', error);
|
||||||
throw new Error(`Failed to fetch content for ${name}: ${error.message}`);
|
throw new Error(`Failed to fetch content for ${name}: ${error.message}`);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Function to get relevant parts of text using gpt-3.5-turbo.
|
|
||||||
const getRelevantParts = async (text, query) => {
|
|
||||||
try {
|
|
||||||
// We use your OpenAI key to initialize the OpenAI client
|
|
||||||
const openAIKey = process.env["OPENAI_API_KEY"];
|
|
||||||
const openai = new OpenAI({
|
|
||||||
apiKey: openAIKey,
|
|
||||||
});
|
|
||||||
const response = await openai.chat.completions.create({
|
|
||||||
// Using gpt-3.5-turbo due to speed to prevent timeouts. You can tweak this prompt as needed
|
|
||||||
model: "gpt-3.5-turbo-0125",
|
|
||||||
messages: [
|
|
||||||
{"role": "system", "content": "You are a helpful assistant that finds relevant content in text based on a query. You only return the relevant sentences, and you return a maximum of 10 sentences"},
|
|
||||||
{"role": "user", "content": `Based on this question: **"${query}"**, get the relevant parts from the following text:*****\n\n${text}*****. If you cannot answer the question based on the text, respond with 'No information provided'`}
|
|
||||||
],
|
|
||||||
// using temperature of 0 since we want to just extract the relevant content
|
|
||||||
temperature: 0,
|
|
||||||
// using max_tokens of 1000, but you can customize this based on the number of documents you are searching.
|
|
||||||
max_tokens: 1000
|
|
||||||
});
|
|
||||||
return response.choices[0].message.content;
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Error with OpenAI:', error);
|
|
||||||
return 'Error processing text with OpenAI' + error;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//// --------- AZURE FUNCTION LOGIC ---------
|
//// --------- AZURE FUNCTION LOGIC ---------
|
||||||
// Below is what the Azure Function executes
|
// Below is what the Azure Function executes
|
||||||
module.exports = async function (context, req) {
|
module.exports = async function (context, req) {
|
||||||
const query = req.query.query || (req.body && req.body.query);
|
// const query = req.query.query || (req.body && req.body.query);
|
||||||
const searchTerm = req.query.searchTerm || (req.body && req.body.searchTerm);
|
const searchTerm = req.query.searchTerm || (req.body && req.body.searchTerm);
|
||||||
if (!req.headers.authorization) {
|
if (!req.headers.authorization) {
|
||||||
context.res = {
|
context.res = {
|
||||||
@ -157,25 +113,6 @@ module.exports = async function (context, req) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Function to tokenize content (e.g., based on words).
|
|
||||||
const tokenizeContent = (content) => {
|
|
||||||
return content.split(/\s+/);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Function to break tokens into 10k token windows for gpt-3.5-turbo
|
|
||||||
const breakIntoTokenWindows = (tokens) => {
|
|
||||||
const tokenWindows = []
|
|
||||||
const maxWindowTokens = 10000; // 10k tokens
|
|
||||||
let startIndex = 0;
|
|
||||||
|
|
||||||
while (startIndex < tokens.length) {
|
|
||||||
const window = tokens.slice(startIndex, startIndex + maxWindowTokens);
|
|
||||||
tokenWindows.push(window);
|
|
||||||
startIndex += maxWindowTokens;
|
|
||||||
}
|
|
||||||
|
|
||||||
return tokenWindows;
|
|
||||||
};
|
|
||||||
// This is where we are doing the search
|
// This is where we are doing the search
|
||||||
const list = await client.api('/search/query').post(requestBody);
|
const list = await client.api('/search/query').post(requestBody);
|
||||||
|
|
||||||
@ -187,30 +124,9 @@ module.exports = async function (context, req) {
|
|||||||
for (const hit of container.hits) {
|
for (const hit of container.hits) {
|
||||||
if (hit.resource["@odata.type"] === "#microsoft.graph.driveItem") {
|
if (hit.resource["@odata.type"] === "#microsoft.graph.driveItem") {
|
||||||
const { name, id } = hit.resource;
|
const { name, id } = hit.resource;
|
||||||
// We use the below to grab the URL of the file to include in the response
|
|
||||||
const webUrl = hit.resource.webUrl.replace(/\s/g, "%20");
|
|
||||||
// The Microsoft Graph API ranks the reponses, so we use this to order it
|
|
||||||
const rank = hit.rank;
|
|
||||||
// The below is where the file lives
|
|
||||||
const driveId = hit.resource.parentReference.driveId;
|
const driveId = hit.resource.parentReference.driveId;
|
||||||
const contents = await getDriveItemContent(client, driveId, id, name);
|
const contents = await getDriveItemContent(client, driveId, id, name);
|
||||||
if (contents !== 'Unsupported File Type') {
|
results.push(contents)
|
||||||
// Tokenize content using function defined previously
|
|
||||||
const tokens = tokenizeContent(contents);
|
|
||||||
|
|
||||||
// Break tokens into 10k token windows
|
|
||||||
const tokenWindows = breakIntoTokenWindows(tokens);
|
|
||||||
|
|
||||||
// Process each token window and combine results
|
|
||||||
const relevantPartsPromises = tokenWindows.map(window => getRelevantParts(window.join(' '), query));
|
|
||||||
const relevantParts = await Promise.all(relevantPartsPromises);
|
|
||||||
const combinedResults = relevantParts.join('\n'); // Combine results
|
|
||||||
|
|
||||||
results.push({ name, webUrl, rank, contents: combinedResults });
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
results.push({ name, webUrl, rank, contents: 'Unsupported File Type' });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
@ -224,7 +140,8 @@ module.exports = async function (context, req) {
|
|||||||
} else {
|
} else {
|
||||||
// If the Microsoft Graph API does return results, then run processList to iterate through.
|
// If the Microsoft Graph API does return results, then run processList to iterate through.
|
||||||
results = await processList();
|
results = await processList();
|
||||||
results.sort((a, b) => a.rank - b.rank);
|
results = {'openaiFileResponse': results}
|
||||||
|
// results.sort((a, b) => a.rank - b.rank);
|
||||||
}
|
}
|
||||||
context.res = {
|
context.res = {
|
||||||
status: 200,
|
status: 200,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user