mirror of
https://github.com/james-m-jordan/openai-cookbook.git
synced 2025-05-09 19:32:38 +00:00
File Q and A app
This commit is contained in:
parent
dad1c6af76
commit
e99e53aa1a
18
apps/file-q-and-a/README.md
Normal file
18
apps/file-q-and-a/README.md
Normal file
@ -0,0 +1,18 @@
|
||||
# File Q&A
|
||||
|
||||
File Q&A is a [Next.js](https://nextjs.org/) app that lets you find answers in your files using OpenAI APIs. You can upload files and ask questions related to their content, and the app will use embeddings and GPT to generate answers from the most relevant files.
|
||||
|
||||
This repo contains two versions of the app:
|
||||
|
||||
- `/nextjs`: A standalone Next.js app that stores embeddings locally in the browser. You will need an OpenAI API key to use this app. Read more in its [README](./nextjs/README.md).
|
||||
- `/nextjs-with-flask-server`: A Next.js app that uses a Flask server as a proxy to access the OpenAI APIs, and Pinecone as a vector database to store embeddings. You will need an OpenAI API key and a Pinecone API key to use this app. Read more in its [README](./nextjs-with-flask-server/README.md).
|
||||
|
||||
To run either version of the app, please follow the instructions in the respective README.md files in the subdirectories.
|
||||
|
||||
## How it works
|
||||
|
||||
When a file is uploaded, text is extracted from the file. This text is then split into shorter text chunks, and an embedding is created for each text chunk. When the user asks a question, an embedding is created for the question, and a similarity search is performed to find the file chunk embeddings that are most similar to the question (i.e. have highest cosine similarities with the question embedding). An API call is then made the the completions endpoint, with the question and the most relevant file chunks are included in the prompt. The generative model then gives the answer to the question found in the file chunks, if the answer can be found in the extracts.
|
||||
|
||||
## Limitations
|
||||
|
||||
The app may sometimes generate answers that are not in the files, or hallucinate about the existence of files that are not uploaded.
|
47
apps/file-q-and-a/nextjs-with-flask-server/README.md
Normal file
47
apps/file-q-and-a/nextjs-with-flask-server/README.md
Normal file
@ -0,0 +1,47 @@
|
||||
# File Q&A with Next.js and Flask
|
||||
|
||||
File Q&A is a web app that lets you find answers in your files. You can upload files and ask questions related to their content, and the app will use embeddings and GPT to generate answers from the most relevant files. \
|
||||
|
||||
## Requirements
|
||||
|
||||
To run the app, you need:
|
||||
|
||||
- An OpenAI API key. You can create a new API key [here](https://beta.openai.com/account/api-keys).
|
||||
- A Pinecone API key and index name. You can create a new account and index [here](https://www.pinecone.io/).
|
||||
- Python 3.7 or higher and pipenv for the Flask server.
|
||||
- Node.js and npm for the Next.js client.
|
||||
|
||||
## Set-Up and Development
|
||||
|
||||
### Server
|
||||
|
||||
Fill out the config.yaml file with your Pinecone API key, index name and environment.
|
||||
|
||||
Run the Flask server:
|
||||
|
||||
```
|
||||
cd server
|
||||
bash script/start "<your OPENAI_API_KEY>"
|
||||
```
|
||||
|
||||
### Client
|
||||
|
||||
Navigate to the client directory and install Node dependencies:
|
||||
|
||||
```
|
||||
cd client
|
||||
npm install
|
||||
```
|
||||
|
||||
Run the Next.js client:
|
||||
|
||||
```
|
||||
cd client
|
||||
npm run dev
|
||||
```
|
||||
|
||||
Open [http://localhost:3000](http://localhost:3000) with your browser to see the app.
|
||||
|
||||
## Limitations
|
||||
|
||||
The app may sometimes generate answers that are not in the files, or hallucinate about the existence of files that are not uploaded.
|
@ -0,0 +1,3 @@
|
||||
{
|
||||
"extends": "next/core-web-vitals"
|
||||
}
|
36
apps/file-q-and-a/nextjs-with-flask-server/client/.gitignore
vendored
Normal file
36
apps/file-q-and-a/nextjs-with-flask-server/client/.gitignore
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.js
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# local env files
|
||||
.env*.local
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
@ -0,0 +1,6 @@
|
||||
/** @type {import('next').NextConfig} */
|
||||
const nextConfig = {
|
||||
reactStrictMode: true,
|
||||
}
|
||||
|
||||
module.exports = nextConfig
|
8949
apps/file-q-and-a/nextjs-with-flask-server/client/package-lock.json
generated
Normal file
8949
apps/file-q-and-a/nextjs-with-flask-server/client/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,44 @@
|
||||
{
|
||||
"name": "file-q-and-a",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@headlessui/react": "^1.7.7",
|
||||
"@heroicons/react": "^2.0.13",
|
||||
"@next/font": "13.1.2",
|
||||
"@tailwindcss/line-clamp": "^0.4.2",
|
||||
"@tailwindcss/typography": "^0.5.9",
|
||||
"@types/formidable": "^2.0.5",
|
||||
"@types/lodash": "^4.14.191",
|
||||
"@types/node": "18.11.18",
|
||||
"@types/pdf-parse": "^1.1.1",
|
||||
"@types/react": "18.0.27",
|
||||
"@types/react-dom": "18.0.10",
|
||||
"axios": "^1.2.3",
|
||||
"clsx": "^1.2.1",
|
||||
"eslint": "8.32.0",
|
||||
"eslint-config-next": "13.1.2",
|
||||
"formidable": "^2.1.1",
|
||||
"lodash": "^4.17.21",
|
||||
"mammoth": "^1.5.1",
|
||||
"next": "13.1.2",
|
||||
"node-html-markdown": "^1.3.0",
|
||||
"openai": "^3.1.0",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"react": "18.2.0",
|
||||
"react-dom": "18.2.0",
|
||||
"react-markdown": "^8.0.5",
|
||||
"typescript": "4.9.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"autoprefixer": "^10.4.13",
|
||||
"postcss": "^8.4.21",
|
||||
"tailwindcss": "^3.2.4"
|
||||
}
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
module.exports = {
|
||||
plugins: {
|
||||
tailwindcss: {},
|
||||
autoprefixer: {},
|
||||
},
|
||||
};
|
Binary file not shown.
After Width: | Height: | Size: 262 KiB |
@ -0,0 +1 @@
|
||||
<svg id="openai-horizontal" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 120 29.53"><path d="M40.7,6.98s-.05,0-.07,0c-.02,0-.05,0-.07,0-4.67,0-7.58,2.91-7.58,7.6v2.53c0,4.69,2.9,7.6,7.58,7.6,.02,0,.05,0,.07,0,.02,0,.05,0,.07,0,4.67,0,7.58-2.91,7.58-7.6v-2.53c0-4.69-2.91-7.6-7.58-7.6Zm4.31,10.31c0,3.08-1.6,4.86-4.38,4.89-2.78-.03-4.38-1.81-4.38-4.89v-2.88c0-3.08,1.6-4.86,4.38-4.89,2.78,.03,4.38,1.81,4.38,4.89v2.88Zm40.57-5.79s-.06,0-.09,0c-.02,0-.03,0-.05,0-1.77,0-3.03,.6-3.65,1.75l-.19,.35v-1.8h-3.02v12.56h3.17v-7.48c0-1.76,.95-2.77,2.59-2.8,1.57,.03,2.47,1.02,2.47,2.73v7.55h3.17v-8.09c0-2.99-1.64-4.77-4.39-4.77Zm34.42-1.77v-2.4h-10.46v2.4h3.67v12.22h-3.67v2.4h10.46v-2.4h-3.67V9.73h3.67Zm-18.75-2.4h0s-3.28,0-3.28,0l-6.1,17.04h3.43l1.17-3.65h6.66v.04s1.17,3.62,1.17,3.62h3.43l-6.11-17.04h-.36Zm-4.03,10.98l2.57-8.05,2.55,8.05h-5.12Zm-39.45-6.81s-.05,0-.07,0c-.03,0-.05,0-.07,0-1.59,0-2.96,.66-3.68,1.76l-.18,.28v-1.74h-3.02V28.69h3.17v-5.9l.18,.27c.68,1.01,2.01,1.61,3.56,1.61,.03,0,.05,0,.08,0,.02,0,.04,0,.07,0,2.61,0,5.24-1.7,5.24-5.51v-2.14c0-2.74-1.62-5.51-5.26-5.51Zm2.1,7.5c0,2-1.15,3.24-3.01,3.28-1.73-.03-2.94-1.35-2.94-3.23v-1.89c0-1.9,1.22-3.24,2.97-3.28,1.84,.03,2.98,1.28,2.98,3.28v1.84Zm11.05-7.5h0c-.06,0-.12,.01-.18,.01-.06,0-.12-.01-.18-.01h0c-3.57,0-5.78,2.23-5.78,5.81v1.76c0,3.45,2.24,5.59,5.83,5.59,.08,0,.15,0,.22-.01,.05,0,.09,.01,.14,.01,2.41,0,4.09-.88,5.16-2.7l-2.13-1.23c-.71,1.05-1.66,1.84-3.02,1.84-1.82,0-2.91-1.12-2.91-3.01v-.5h8.44v-2.08c0-3.34-2.19-5.49-5.59-5.49Zm-2.86,5.54v-.3c0-2,.95-3.12,2.68-3.2,1.66,.08,2.66,1.18,2.66,2.99v.5s-5.34,0-5.34,0Z"></path><path d="M27.21,12.08c.67-2.01,.44-4.21-.63-6.04-1.61-2.8-4.85-4.24-8.01-3.57C17.16,.89,15.14-.01,13.02,0c-3.23,0-6.1,2.08-7.1,5.15-2.08,.43-3.87,1.73-4.92,3.57-1.62,2.8-1.25,6.32,.92,8.72-.67,2.01-.44,4.21,.63,6.03,1.61,2.81,4.85,4.25,8.02,3.58,1.4,1.58,3.42,2.49,5.54,2.48,3.23,0,6.1-2.08,7.1-5.15,2.08-.43,3.87-1.73,4.91-3.57,1.63-2.8,1.26-6.32-.91-8.72Zm-2.3-5.07c.64,1.12,.88,2.43,.66,3.7-.04-.03-.12-.07-.17-.1l-5.88-3.4c-.3-.17-.67-.17-.97,0l-6.89,3.98v-2.92l5.69-3.29c2.65-1.53,6.03-.62,7.56,2.03Zm-13.25,6.07l2.9-1.68,2.9,1.68v3.35l-2.9,1.68-2.9-1.68v-3.35ZM13.01,1.93c1.3,0,2.55,.45,3.55,1.28-.04,.02-.12,.07-.18,.1l-5.88,3.39c-.3,.17-.48,.49-.48,.84v7.96l-2.53-1.46V7.46c0-3.06,2.47-5.53,5.53-5.54ZM2.68,9.69h0c.65-1.12,1.66-1.98,2.88-2.43v6.99c0,.35,.18,.66,.48,.84l6.88,3.97-2.54,1.47-5.68-3.28c-2.64-1.53-3.55-4.91-2.02-7.56Zm1.55,12.83h0c-.65-1.11-.88-2.43-.66-3.7,.04,.03,.12,.07,.17,.1l5.88,3.4c.3,.17,.67,.17,.97,0l6.88-3.98v2.92l-5.69,3.28c-2.65,1.52-6.03,.62-7.56-2.02Zm11.89,5.08c-1.29,0-2.55-.45-3.54-1.28,.04-.02,.13-.07,.18-.1l5.88-3.39c.3-.17,.49-.49,.48-.84v-7.95l2.53,1.46v6.57c0,3.06-2.48,5.54-5.53,5.54Zm10.34-7.76c-.65,1.12-1.67,1.98-2.88,2.42v-6.99c0-.35-.18-.67-.48-.84h0l-6.89-3.98,2.53-1.46,5.69,3.28c2.65,1.53,3.55,4.91,2.02,7.56Z"></path></svg>
|
After Width: | Height: | Size: 2.8 KiB |
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" id="openai-symbol" viewBox="0 0 32 32"><path d="M29.71,13.09A8.09,8.09,0,0,0,20.34,2.68a8.08,8.08,0,0,0-13.7,2.9A8.08,8.08,0,0,0,2.3,18.9,8,8,0,0,0,3,25.45a8.08,8.08,0,0,0,8.69,3.87,8,8,0,0,0,6,2.68,8.09,8.09,0,0,0,7.7-5.61,8,8,0,0,0,5.33-3.86A8.09,8.09,0,0,0,29.71,13.09Zm-12,16.82a6,6,0,0,1-3.84-1.39l.19-.11,6.37-3.68a1,1,0,0,0,.53-.91v-9l2.69,1.56a.08.08,0,0,1,.05.07v7.44A6,6,0,0,1,17.68,29.91ZM4.8,24.41a6,6,0,0,1-.71-4l.19.11,6.37,3.68a1,1,0,0,0,1,0l7.79-4.49V22.8a.09.09,0,0,1,0,.08L13,26.6A6,6,0,0,1,4.8,24.41ZM3.12,10.53A6,6,0,0,1,6.28,7.9v7.57a1,1,0,0,0,.51.9l7.75,4.47L11.85,22.4a.14.14,0,0,1-.09,0L5.32,18.68a6,6,0,0,1-2.2-8.18Zm22.13,5.14-7.78-4.52L20.16,9.6a.08.08,0,0,1,.09,0l6.44,3.72a6,6,0,0,1-.9,10.81V16.56A1.06,1.06,0,0,0,25.25,15.67Zm2.68-4-.19-.12-6.36-3.7a1,1,0,0,0-1.05,0l-7.78,4.49V9.2a.09.09,0,0,1,0-.09L19,5.4a6,6,0,0,1,8.91,6.21ZM11.08,17.15,8.38,15.6a.14.14,0,0,1-.05-.08V8.1a6,6,0,0,1,9.84-4.61L18,3.6,11.61,7.28a1,1,0,0,0-.53.91ZM12.54,14,16,12l3.47,2v4L16,20l-3.47-2Z"/></svg>
|
After Width: | Height: | Size: 1.0 KiB |
@ -0,0 +1,77 @@
|
||||
import { useState, useCallback, memo } from "react";
|
||||
import { Transition } from "@headlessui/react";
|
||||
import {
|
||||
MagnifyingGlassMinusIcon,
|
||||
MagnifyingGlassPlusIcon,
|
||||
ArrowTopRightOnSquareIcon,
|
||||
} from "@heroicons/react/24/outline";
|
||||
|
||||
import { FileLite } from "../types/file";
|
||||
|
||||
type FileProps = {
|
||||
file: FileLite;
|
||||
showScore?: boolean;
|
||||
};
|
||||
|
||||
function File(props: FileProps) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
|
||||
const handleExpand = useCallback(() => {
|
||||
setExpanded((prev) => !prev);
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div
|
||||
className="border-gray-100 border rounded-md shadow p-2 cursor-pointer"
|
||||
onClick={handleExpand}
|
||||
>
|
||||
<div className="flex flex-row justify-between">
|
||||
<div className="flex hover:text-gray-600">{props.file.name}</div>
|
||||
|
||||
<div className="flex flex-row space-x-2">
|
||||
{props.showScore && props.file.score && (
|
||||
<div className="flex text-blue-600 mr-4">
|
||||
{props.file.score.toFixed(2)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="ml-auto w-max flex items-center justify-center">
|
||||
{expanded ? (
|
||||
<MagnifyingGlassMinusIcon className="text-gray-500 h-5" />
|
||||
) : (
|
||||
<MagnifyingGlassPlusIcon className="text-gray-500 h-5" />
|
||||
)}
|
||||
</div>
|
||||
|
||||
<a
|
||||
href={props.file.url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
onClick={(e) => e.stopPropagation()} // prevent the click event from bubbling up to the list item
|
||||
>
|
||||
<ArrowTopRightOnSquareIcon className="text-gray-500 h-5" />
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<Transition
|
||||
show={expanded}
|
||||
enter="transition duration-75 ease-out"
|
||||
enterFrom="transform translate-y-4 opacity-0"
|
||||
enterTo="transform translate-y-0 opacity-100"
|
||||
leave="transition duration-100 ease-out"
|
||||
leaveFrom="transform translate-y-0 opacity-100"
|
||||
leaveTo="transform translate-y-4 opacity-0"
|
||||
>
|
||||
<div className="items-center mt-2 justify-center">
|
||||
<iframe
|
||||
src={props.file.url}
|
||||
className="h-full w-full"
|
||||
title={props.file.name}
|
||||
></iframe>
|
||||
</div>
|
||||
</Transition>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(File);
|
@ -0,0 +1,147 @@
|
||||
import React, { memo, useCallback, useRef, useState } from "react";
|
||||
import { Transition } from "@headlessui/react";
|
||||
import axios from "axios";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
|
||||
import FileViewerList from "./FileViewerList";
|
||||
import LoadingText from "./LoadingText";
|
||||
import { isFileNameInString } from "../services/utils";
|
||||
import { FileChunk, FileLite } from "../types/file";
|
||||
import { SERVER_ADDRESS } from "../types/constants";
|
||||
|
||||
type FileQandAAreaProps = {
|
||||
files: FileLite[];
|
||||
};
|
||||
|
||||
function FileQandAArea(props: FileQandAAreaProps) {
|
||||
const searchBarRef = useRef(null);
|
||||
const [answerError, setAnswerError] = useState("");
|
||||
const [searchResultsLoading, setSearchResultsLoading] =
|
||||
useState<boolean>(false);
|
||||
const [answer, setAnswer] = useState("");
|
||||
|
||||
const handleSearch = useCallback(async () => {
|
||||
if (searchResultsLoading) {
|
||||
return;
|
||||
}
|
||||
|
||||
const question = (searchBarRef?.current as any)?.value ?? "";
|
||||
setAnswer("");
|
||||
|
||||
if (!question) {
|
||||
setAnswerError("Please ask a question.");
|
||||
return;
|
||||
}
|
||||
if (props.files.length === 0) {
|
||||
setAnswerError("Please upload files before asking a question.");
|
||||
return;
|
||||
}
|
||||
|
||||
setSearchResultsLoading(true);
|
||||
setAnswerError("");
|
||||
|
||||
let results: FileChunk[] = [];
|
||||
|
||||
try {
|
||||
const answerResponse = await axios.post(
|
||||
`${SERVER_ADDRESS}/answer_question`,
|
||||
{
|
||||
question,
|
||||
}
|
||||
);
|
||||
|
||||
if (answerResponse.status === 200) {
|
||||
setAnswer(answerResponse.data.answer);
|
||||
} else {
|
||||
setAnswerError("Sorry, something went wrong!");
|
||||
}
|
||||
} catch (err: any) {
|
||||
setAnswerError("Sorry, something went wrong!");
|
||||
}
|
||||
|
||||
setSearchResultsLoading(false);
|
||||
}, [props.files, searchResultsLoading]);
|
||||
|
||||
const handleEnterInSearchBar = useCallback(
|
||||
async (event: React.SyntheticEvent) => {
|
||||
if ((event as any).key === "Enter") {
|
||||
await handleSearch();
|
||||
}
|
||||
},
|
||||
[handleSearch]
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="space-y-4 text-gray-800">
|
||||
<div className="mt-2">
|
||||
Ask a question based on the content of your files:
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<input
|
||||
className="border rounded border-gray-200 w-full py-1 px-2"
|
||||
placeholder="e.g. What were the key takeaways from the Q1 planning meeting?"
|
||||
name="search"
|
||||
ref={searchBarRef}
|
||||
onKeyDown={handleEnterInSearchBar}
|
||||
/>
|
||||
<div
|
||||
className="rounded-md bg-gray-50 py-1 px-4 w-max text-gray-500 hover:bg-gray-100 border border-gray-100 shadow cursor-pointer"
|
||||
onClick={handleSearch}
|
||||
>
|
||||
{searchResultsLoading ? (
|
||||
<LoadingText text="Answering question..." />
|
||||
) : (
|
||||
"Ask question"
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="">
|
||||
{answerError && <div className="text-red-500">{answerError}</div>}
|
||||
<Transition
|
||||
show={answer !== ""}
|
||||
enter="transition duration-600 ease-out"
|
||||
enterFrom="transform opacity-0"
|
||||
enterTo="transform opacity-100"
|
||||
leave="transition duration-125 ease-out"
|
||||
leaveFrom="transform opacity-100"
|
||||
leaveTo="transform opacity-0"
|
||||
className="mb-8"
|
||||
>
|
||||
{/* answer from files */}
|
||||
{answer && (
|
||||
<div className="">
|
||||
<ReactMarkdown className="prose" linkTarget="_blank">
|
||||
{answer}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<Transition
|
||||
show={
|
||||
props.files.filter((file) =>
|
||||
isFileNameInString(file.name, answer)
|
||||
).length > 0
|
||||
}
|
||||
enter="transition duration-600 ease-out"
|
||||
enterFrom="transform opacity-0"
|
||||
enterTo="transform opacity-100"
|
||||
leave="transition duration-125 ease-out"
|
||||
leaveFrom="transform opacity-100"
|
||||
leaveTo="transform opacity-0"
|
||||
className="mb-8"
|
||||
>
|
||||
<FileViewerList
|
||||
files={props.files.filter((file) =>
|
||||
isFileNameInString(file.name, answer)
|
||||
)}
|
||||
title="Sources"
|
||||
listExpanded={true}
|
||||
/>
|
||||
</Transition>
|
||||
</Transition>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(FileQandAArea);
|
@ -0,0 +1,195 @@
|
||||
import React, {
|
||||
Dispatch,
|
||||
SetStateAction,
|
||||
useCallback,
|
||||
useState,
|
||||
memo,
|
||||
useRef,
|
||||
} from "react";
|
||||
import axios from "axios";
|
||||
import { ArrowUpTrayIcon } from "@heroicons/react/24/outline";
|
||||
import { compact } from "lodash";
|
||||
|
||||
import LoadingText from "./LoadingText";
|
||||
import { FileLite } from "../types/file";
|
||||
import FileViewerList from "./FileViewerList";
|
||||
import { SERVER_ADDRESS } from "../types/constants";
|
||||
|
||||
type FileUploadAreaProps = {
|
||||
handleSetFiles: Dispatch<SetStateAction<FileLite[]>>;
|
||||
maxNumFiles: number;
|
||||
maxFileSizeMB: number;
|
||||
};
|
||||
|
||||
function FileUploadArea(props: FileUploadAreaProps) {
|
||||
const handleSetFiles = props.handleSetFiles;
|
||||
|
||||
const [files, setFiles] = useState<FileLite[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState("");
|
||||
const [dragOver, setDragOver] = useState(false);
|
||||
const dropzoneRef = useRef<HTMLLabelElement>(null);
|
||||
|
||||
const handleFileChange = useCallback(
|
||||
async (selectedFiles: FileList | null) => {
|
||||
if (selectedFiles && selectedFiles.length > 0) {
|
||||
setError("");
|
||||
|
||||
if (files.length + selectedFiles.length > props.maxNumFiles) {
|
||||
setError(`You can only upload up to ${props.maxNumFiles} files.`);
|
||||
if (dropzoneRef.current) {
|
||||
(dropzoneRef.current as any).value = "";
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
setLoading(true);
|
||||
|
||||
const uploadedFiles = await Promise.all(
|
||||
Array.from(selectedFiles).map(async (file) => {
|
||||
// Check the file type
|
||||
if (
|
||||
file.type.match(
|
||||
/(text\/plain|application\/(pdf|msword|vnd\.openxmlformats-officedocument\.wordprocessingml\.document))/
|
||||
) && // AND file isnt too big
|
||||
file.size < props.maxFileSizeMB * 1024 * 1024
|
||||
) {
|
||||
// Check if the file name already exists in the files state
|
||||
if (files.find((f) => f.name === file.name)) {
|
||||
return null; // skip this file
|
||||
}
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append("file", file);
|
||||
|
||||
try {
|
||||
const processFileResponse = await axios.post(
|
||||
`${SERVER_ADDRESS}/process_file`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "multipart/form-data",
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
if (
|
||||
processFileResponse.status === 200 &&
|
||||
processFileResponse.data.success
|
||||
) {
|
||||
const fileObject: FileLite = {
|
||||
name: file.name,
|
||||
url: URL.createObjectURL(file),
|
||||
expanded: false,
|
||||
};
|
||||
console.log(fileObject);
|
||||
|
||||
return fileObject;
|
||||
} else {
|
||||
console.log("Error processing file");
|
||||
return null;
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.log(`error processing file: ${err}`);
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
alert(
|
||||
`Invalid file type or size. Only TXT, PD or DOCX are allowed, up to ${props.maxFileSizeMB}MB.`
|
||||
);
|
||||
return null; // Skip this file
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
// Filter out any null values from the uploadedFiles array
|
||||
const validFiles = compact(uploadedFiles);
|
||||
|
||||
// Set the files state with the valid files and the existing files
|
||||
setFiles((prevFiles) => [...prevFiles, ...validFiles]);
|
||||
handleSetFiles((prevFiles) => [...prevFiles, ...validFiles]);
|
||||
|
||||
setLoading(false);
|
||||
}
|
||||
},
|
||||
[files, handleSetFiles, props.maxFileSizeMB, props.maxNumFiles]
|
||||
);
|
||||
|
||||
const handleDragEnter = useCallback((event: React.DragEvent) => {
|
||||
event.preventDefault();
|
||||
setDragOver(true);
|
||||
}, []);
|
||||
|
||||
const handleDragOver = useCallback((event: React.DragEvent) => {
|
||||
event.preventDefault();
|
||||
}, []);
|
||||
|
||||
const handleDragLeave = useCallback((event: React.DragEvent) => {
|
||||
event.preventDefault();
|
||||
setDragOver(false);
|
||||
}, []);
|
||||
|
||||
const handleDrop = useCallback(
|
||||
(event: React.DragEvent) => {
|
||||
event.preventDefault();
|
||||
setDragOver(false);
|
||||
const droppedFiles = event.dataTransfer.files;
|
||||
handleFileChange(droppedFiles);
|
||||
},
|
||||
[handleFileChange]
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="flex items-center justify-center w-full flex-col">
|
||||
<label
|
||||
htmlFor="dropzone-file"
|
||||
className={`flex flex-col shadow items-center justify-center w-full h-36 border-2 border-gray-300 border-dashed rounded-lg cursor-pointer bg-gray-50 hover:bg-gray-100 relative ${
|
||||
dragOver ? "border-blue-500 bg-blue-50" : ""
|
||||
}`}
|
||||
ref={dropzoneRef}
|
||||
onDragEnter={handleDragEnter}
|
||||
onDragOver={handleDragOver}
|
||||
onDragLeave={handleDragLeave}
|
||||
onDrop={handleDrop}
|
||||
>
|
||||
<div className="flex flex-col items-center justify-center pt-5 pb-6">
|
||||
{loading ? (
|
||||
<LoadingText text="Uploading..." />
|
||||
) : (
|
||||
<div className="text-gray-500 flex flex-col items-center text-center">
|
||||
<ArrowUpTrayIcon className="w-7 h-7 mb-4" />
|
||||
<p className="mb-2 text-sm">
|
||||
<span className="font-semibold">Click to upload</span> or drag
|
||||
and drop
|
||||
</p>
|
||||
<p className="text-xs">
|
||||
PDF, DOCX or TXT (max {props.maxFileSizeMB}MB per file)
|
||||
</p>
|
||||
<p className="text-xs mt-1">
|
||||
You can upload up to {props.maxNumFiles - files.length} more{" "}
|
||||
{props.maxNumFiles - files.length === 1 ? "file" : "files"}
|
||||
</p>
|
||||
<input
|
||||
id="dropzone-file"
|
||||
type="file"
|
||||
className="hidden"
|
||||
multiple
|
||||
onChange={(event) => handleFileChange(event.target.files)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</label>
|
||||
|
||||
{error && (
|
||||
<div className="flex items-center justify-center w-full mt-4">
|
||||
<p className="text-sm text-red-500">{error}</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<FileViewerList files={files} title="Uploaded Files" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(FileUploadArea);
|
@ -0,0 +1,73 @@
|
||||
import React, { memo, useCallback, useState } from "react";
|
||||
import { ChevronUpIcon } from "@heroicons/react/24/outline";
|
||||
import clsx from "clsx";
|
||||
import { Transition } from "@headlessui/react";
|
||||
|
||||
import File from "./File";
|
||||
import { FileLite } from "../types/file";
|
||||
|
||||
type FileViewerListProps = {
|
||||
files: FileLite[];
|
||||
title: string;
|
||||
listExpanded?: boolean;
|
||||
showScores?: boolean;
|
||||
};
|
||||
|
||||
function FileViewerList(props: FileViewerListProps) {
|
||||
const [listExpanded, setListExpanded] = useState(props.listExpanded ?? false);
|
||||
|
||||
const handleListExpand = useCallback(() => {
|
||||
setListExpanded((prev) => !prev);
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div className="flex items-left justify-center w-full">
|
||||
{props.files.length > 0 && (
|
||||
<div className="flex flex-col items-left justify-center w-full mt-4">
|
||||
<div className="flex flex-row">
|
||||
<div
|
||||
className="rounded-md flex shadow p-2 mb-2 w-full bg-gray-50 items-center cursor-pointer "
|
||||
onClick={handleListExpand}
|
||||
>
|
||||
{props.title}
|
||||
<div className="bg-gray-300 ml-2 px-2 rounded-full w-max text-center text-sm ">
|
||||
{props.files.length}
|
||||
</div>
|
||||
</div>
|
||||
<div className="ml-auto w-max flex items-center justify-center">
|
||||
<ChevronUpIcon
|
||||
className={clsx(
|
||||
"w-6 h-6 ml-2 stroke-slate-400 transition-transform cursor-pointer",
|
||||
!listExpanded && "-rotate-180"
|
||||
)}
|
||||
onClick={handleListExpand}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Transition
|
||||
show={listExpanded}
|
||||
enter="transition duration-125 ease-out"
|
||||
enterFrom="transform translate-y-4 opacity-0"
|
||||
enterTo="transform translate-y-0 opacity-100"
|
||||
leave="transition duration-125 ease-out"
|
||||
leaveFrom="transform translate-y-0 opacity-100"
|
||||
leaveTo="transform translate-y-4 opacity-0"
|
||||
>
|
||||
<div className="text-sm text-gray-500 space-y-2">
|
||||
{props.files.map((file) => (
|
||||
<File
|
||||
key={file.name}
|
||||
file={file}
|
||||
showScore={props.showScores}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</Transition>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(FileViewerList);
|
@ -0,0 +1,33 @@
|
||||
import clsx from "clsx";
|
||||
|
||||
type Props = {
|
||||
className?: string;
|
||||
size?: number;
|
||||
};
|
||||
|
||||
export default function LoadingSpinner(props: Props) {
|
||||
const size = props.size || 5;
|
||||
return (
|
||||
<div className={clsx("flex flex-row", props.className)}>
|
||||
<svg
|
||||
aria-hidden="true"
|
||||
className={clsx(
|
||||
"mr-2 text-gray-200 animate-spin dark:text-gray-600 fill-black stroke-1",
|
||||
`w-${size} h-${size}`
|
||||
)}
|
||||
viewBox="0 0 100 101"
|
||||
fill="none"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
>
|
||||
<path
|
||||
d="M100 50.5908C100 78.2051 77.6142 100.591 50 100.591C22.3858 100.591 0 78.2051 0 50.5908C0 22.9766 22.3858 0.59082 50 0.59082C77.6142 0.59082 100 22.9766 100 50.5908ZM9.08144 50.5908C9.08144 73.1895 27.4013 91.5094 50 91.5094C72.5987 91.5094 90.9186 73.1895 90.9186 50.5908C90.9186 27.9921 72.5987 9.67226 50 9.67226C27.4013 9.67226 9.08144 27.9921 9.08144 50.5908Z"
|
||||
fill="currentColor"
|
||||
/>
|
||||
<path
|
||||
d="M93.9676 39.0409C96.393 38.4038 97.8624 35.9116 97.0079 33.5539C95.2932 28.8227 92.871 24.3692 89.8167 20.348C85.8452 15.1192 80.8826 10.7238 75.2124 7.41289C69.5422 4.10194 63.2754 1.94025 56.7698 1.05124C51.7666 0.367541 46.6976 0.446843 41.7345 1.27873C39.2613 1.69328 37.813 4.19778 38.4501 6.62326C39.0873 9.04874 41.5694 10.4717 44.0505 10.1071C47.8511 9.54855 51.7191 9.52689 55.5402 10.0491C60.8642 10.7766 65.9928 12.5457 70.6331 15.2552C75.2735 17.9648 79.3347 21.5619 82.5849 25.841C84.9175 28.9121 86.7997 32.2913 88.1811 35.8758C89.083 38.2158 91.5421 39.6781 93.9676 39.0409Z"
|
||||
fill="currentFill"
|
||||
/>
|
||||
</svg>
|
||||
</div>
|
||||
);
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
import React, { memo } from "react";
|
||||
|
||||
import LoadingSpinner from "./LoadingSpinner";
|
||||
|
||||
type LoadingTextProps = {
|
||||
text: string;
|
||||
};
|
||||
|
||||
function LoadingText(props: LoadingTextProps) {
|
||||
return (
|
||||
<div className="text-gray-500 text-md flex flex-row justify-center items-center">
|
||||
<LoadingSpinner />
|
||||
{props.text && <div className="flex">{props.text}</div>}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(LoadingText);
|
@ -0,0 +1,6 @@
|
||||
import "@/styles/globals.css";
|
||||
import type { AppProps } from "next/app";
|
||||
|
||||
export default function App({ Component, pageProps }: AppProps) {
|
||||
return <Component {...pageProps} />;
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
import { Html, Head, Main, NextScript } from "next/document";
|
||||
|
||||
export default function Document() {
|
||||
return (
|
||||
<Html lang="en">
|
||||
<Head />
|
||||
<body>
|
||||
<Main />
|
||||
<NextScript />
|
||||
</body>
|
||||
</Html>
|
||||
);
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
import Head from "next/head";
|
||||
import { useState } from "react";
|
||||
|
||||
import FileQandAArea from "../components/FileQandAArea";
|
||||
import { FileLite } from "../types/file";
|
||||
import FileUploadArea from "../components/FileUploadArea";
|
||||
|
||||
export default function FileQandA() {
|
||||
const [files, setFiles] = useState<FileLite[]>([]);
|
||||
|
||||
return (
|
||||
<div className="flex items-left text-left h-screen flex-col">
|
||||
<Head>
|
||||
<title>File Q&A</title>
|
||||
</Head>
|
||||
<div className="max-w-3xl mx-auto m-8 space-y-8 text-gray-800">
|
||||
<h1 className="text-4xl">File Q&A</h1>
|
||||
|
||||
<div className="">
|
||||
To search for answers from the content in your files, upload them here
|
||||
and we will use OpenAI embeddings and GPT to find answers from the
|
||||
relevant documents.
|
||||
</div>
|
||||
|
||||
<FileUploadArea
|
||||
handleSetFiles={setFiles}
|
||||
maxNumFiles={75}
|
||||
maxFileSizeMB={30}
|
||||
/>
|
||||
|
||||
<FileQandAArea files={files} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
// A function that takes a file name and a string and returns true if the file name is contained in the string
|
||||
// after removing punctuation and whitespace from both
|
||||
export const isFileNameInString = (fileName: string, str: string) => {
|
||||
// Convert both to lowercase and remove punctuation and whitespace
|
||||
const normalizedFileName = fileName
|
||||
.toLowerCase()
|
||||
.replace(/[.,/#!$%^&*;:{}=-_~()\s]/g, "");
|
||||
const normalizedStr = str
|
||||
.toLowerCase()
|
||||
.replace(/[.,/#!$%^&*;:{}=-_~()\s]/g, "");
|
||||
|
||||
// Return true if the normalized file name is included in the normalized string
|
||||
return normalizedStr.includes(normalizedFileName);
|
||||
};
|
@ -0,0 +1,5 @@
|
||||
@import "./preflight.css";
|
||||
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
@ -0,0 +1,368 @@
|
||||
/* Using a custom preflight to fix conflicts with Ant Design */
|
||||
/* Original: https://unpkg.com/tailwindcss@3.2.4/src/css/preflight.css */
|
||||
|
||||
/*
|
||||
1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
|
||||
2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
|
||||
*/
|
||||
|
||||
*,
|
||||
::before,
|
||||
::after {
|
||||
box-sizing: border-box; /* 1 */
|
||||
border-width: 0; /* 2 */
|
||||
border-style: solid; /* 2 */
|
||||
border-color: theme("borderColor.DEFAULT"); /* 2 */
|
||||
}
|
||||
|
||||
::before,
|
||||
::after {
|
||||
--tw-content: "";
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use a consistent sensible line-height in all browsers.
|
||||
2. Prevent adjustments of font size after orientation changes in iOS.
|
||||
3. Use a more readable tab size.
|
||||
4. Use the user's configured `sans` font-family by default.
|
||||
5. Use the user's configured `sans` font-feature-settings by default.
|
||||
*/
|
||||
|
||||
html {
|
||||
line-height: 1.5; /* 1 */
|
||||
-webkit-text-size-adjust: 100%; /* 2 */
|
||||
-moz-tab-size: 4; /* 3 */
|
||||
tab-size: 4; /* 3 */
|
||||
font-family: theme("fontFamily.sans"); /* 4 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove the margin in all browsers.
|
||||
2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
|
||||
*/
|
||||
|
||||
body {
|
||||
margin: 0; /* 1 */
|
||||
line-height: inherit; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Add the correct height in Firefox.
|
||||
2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
|
||||
3. Ensure horizontal rules are visible by default.
|
||||
*/
|
||||
|
||||
hr {
|
||||
height: 0; /* 1 */
|
||||
color: inherit; /* 2 */
|
||||
border-top-width: 1px; /* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct text decoration in Chrome, Edge, and Safari.
|
||||
*/
|
||||
|
||||
abbr:where([title]) {
|
||||
text-decoration: underline dotted;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the default font size and weight for headings.
|
||||
*/
|
||||
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6 {
|
||||
font-size: inherit;
|
||||
font-weight: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Reset links to optimize for opt-in styling instead of opt-out.
|
||||
*/
|
||||
|
||||
a {
|
||||
color: inherit;
|
||||
text-decoration: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font weight in Edge and Safari.
|
||||
*/
|
||||
|
||||
b,
|
||||
strong {
|
||||
font-weight: bolder;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use the user's configured `mono` font family by default.
|
||||
2. Correct the odd `em` font sizing in all browsers.
|
||||
*/
|
||||
|
||||
code,
|
||||
kbd,
|
||||
samp,
|
||||
pre {
|
||||
font-family: theme("fontFamily.mono"); /* 1 */
|
||||
font-size: 1em; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font size in all browsers.
|
||||
*/
|
||||
|
||||
small {
|
||||
font-size: 80%;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent `sub` and `sup` elements from affecting the line height in all browsers.
|
||||
*/
|
||||
|
||||
sub,
|
||||
sup {
|
||||
font-size: 75%;
|
||||
line-height: 0;
|
||||
position: relative;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
sub {
|
||||
bottom: -0.25em;
|
||||
}
|
||||
|
||||
sup {
|
||||
top: -0.5em;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
|
||||
2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
|
||||
3. Remove gaps between table borders by default.
|
||||
*/
|
||||
|
||||
table {
|
||||
text-indent: 0; /* 1 */
|
||||
border-color: inherit; /* 2 */
|
||||
border-collapse: collapse; /* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Change the font styles in all browsers.
|
||||
2. Remove the margin in Firefox and Safari.
|
||||
3. Remove default padding in all browsers.
|
||||
*/
|
||||
|
||||
button,
|
||||
input,
|
||||
optgroup,
|
||||
select,
|
||||
textarea {
|
||||
font-family: inherit; /* 1 */
|
||||
font-size: 100%; /* 1 */
|
||||
font-weight: inherit; /* 1 */
|
||||
line-height: inherit; /* 1 */
|
||||
color: inherit; /* 1 */
|
||||
margin: 0; /* 2 */
|
||||
padding: 0; /* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inheritance of text transform in Edge and Firefox.
|
||||
*/
|
||||
|
||||
button,
|
||||
select {
|
||||
text-transform: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Remove default button styles.
|
||||
*/
|
||||
|
||||
button,
|
||||
[type="button"],
|
||||
[type="reset"],
|
||||
[type="submit"] {
|
||||
-webkit-appearance: button; /* 1 */
|
||||
background-image: none; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Use the modern Firefox focus style for all focusable elements.
|
||||
*/
|
||||
|
||||
:-moz-focusring {
|
||||
outline: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
|
||||
*/
|
||||
|
||||
:-moz-ui-invalid {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct vertical alignment in Chrome and Firefox.
|
||||
*/
|
||||
|
||||
progress {
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
/*
|
||||
Correct the cursor style of increment and decrement buttons in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-inner-spin-button,
|
||||
::-webkit-outer-spin-button {
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the odd appearance in Chrome and Safari.
|
||||
2. Correct the outline style in Safari.
|
||||
*/
|
||||
|
||||
[type="search"] {
|
||||
-webkit-appearance: textfield; /* 1 */
|
||||
outline-offset: -2px; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inner padding in Chrome and Safari on macOS.
|
||||
*/
|
||||
|
||||
::-webkit-search-decoration {
|
||||
-webkit-appearance: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Change font properties to `inherit` in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-file-upload-button {
|
||||
-webkit-appearance: button; /* 1 */
|
||||
font: inherit; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct display in Chrome and Safari.
|
||||
*/
|
||||
|
||||
summary {
|
||||
display: list-item;
|
||||
}
|
||||
|
||||
/*
|
||||
Removes the default spacing and border for appropriate elements.
|
||||
*/
|
||||
|
||||
blockquote,
|
||||
dl,
|
||||
dd,
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6,
|
||||
hr,
|
||||
figure,
|
||||
p,
|
||||
pre {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
fieldset {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
legend {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
ol,
|
||||
ul,
|
||||
menu {
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent resizing textareas horizontally by default.
|
||||
*/
|
||||
|
||||
textarea {
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
|
||||
2. Set the default placeholder color to the user's configured gray 400 color.
|
||||
*/
|
||||
|
||||
input::placeholder,
|
||||
textarea::placeholder {
|
||||
opacity: 1; /* 1 */
|
||||
color: theme("colors.gray.400"); /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Set the default cursor for buttons.
|
||||
*/
|
||||
|
||||
button,
|
||||
[role="button"] {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/*
|
||||
Make sure disabled buttons don't get the pointer cursor.
|
||||
*/
|
||||
:disabled {
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
|
||||
This can trigger a poorly considered lint error in some tools but is included by design.
|
||||
*/
|
||||
|
||||
img,
|
||||
svg,
|
||||
video,
|
||||
canvas,
|
||||
audio,
|
||||
iframe,
|
||||
embed,
|
||||
object {
|
||||
display: block; /* 1 */
|
||||
vertical-align: middle; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
*/
|
||||
|
||||
img,
|
||||
video {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/* Make elements with the HTML hidden attribute stay hidden by default */
|
||||
[hidden] {
|
||||
display: none;
|
||||
}
|
@ -0,0 +1 @@
|
||||
export const SERVER_ADDRESS = "http://localhost:8080";
|
@ -0,0 +1,21 @@
|
||||
export interface FileLite {
|
||||
expanded?: boolean;
|
||||
name: string;
|
||||
url?: string;
|
||||
type?: string;
|
||||
score?: number;
|
||||
size?: number;
|
||||
embedding?: number[]; // The file embedding -- or mean embedding if there are multiple embeddings for the file
|
||||
chunks?: TextEmbedding[]; // The chunks of text and their embeddings
|
||||
extractedText?: string; // The extracted text from the file
|
||||
}
|
||||
|
||||
export interface FileChunk extends TextEmbedding {
|
||||
filename: string;
|
||||
score?: number;
|
||||
}
|
||||
|
||||
export interface TextEmbedding {
|
||||
text: string;
|
||||
embedding: number[];
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
const { fontFamily } = require("tailwindcss/defaultTheme");
|
||||
|
||||
/** @type {import('tailwindcss').Config} */
|
||||
module.exports = {
|
||||
content: [
|
||||
"./app/**/*.{js,ts,jsx,tsx}",
|
||||
"./src/**/*.{js,ts,jsx,tsx}",
|
||||
"./pages/**/*.{js,ts,jsx,tsx}",
|
||||
"./components/**/*.{js,ts,jsx,tsx}",
|
||||
],
|
||||
corePlugins: {
|
||||
preflight: false,
|
||||
},
|
||||
theme: {
|
||||
extend: {
|
||||
},
|
||||
},
|
||||
keyframes: {
|
||||
blink: {
|
||||
"0%, 100%": { opacity: 1 },
|
||||
"50%": { opacity: 0 },
|
||||
},
|
||||
},
|
||||
plugins: [
|
||||
require("@tailwindcss/line-clamp"),
|
||||
require("@tailwindcss/typography"),
|
||||
],
|
||||
};
|
@ -0,0 +1,24 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es5",
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "node",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"baseUrl": ".",
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
from utils import get_embedding
|
||||
from flask import jsonify
|
||||
from config import *
|
||||
from flask import current_app
|
||||
|
||||
import openai
|
||||
|
||||
from config import *
|
||||
|
||||
TOP_K = 10
|
||||
|
||||
|
||||
def get_answer_from_files(question, session_id, pinecone_index):
|
||||
logging.info(f"Getting answer for question: {question}")
|
||||
|
||||
search_query_embedding = get_embedding(question, EMBEDDINGS_MODEL)
|
||||
|
||||
try:
|
||||
query_response = pinecone_index.query(
|
||||
namespace=session_id,
|
||||
top_k=TOP_K,
|
||||
include_values=False,
|
||||
include_metadata=True,
|
||||
vector=search_query_embedding,
|
||||
)
|
||||
logging.info(
|
||||
f"[get_answer_from_files] received query response from Pinecone: {query_response}")
|
||||
|
||||
files_string = ""
|
||||
file_text_dict = current_app.config["file_text_dict"]
|
||||
|
||||
for i in range(len(query_response.matches)):
|
||||
result = query_response.matches[i]
|
||||
file_chunk_id = result.id
|
||||
score = result.score
|
||||
filename = result.metadata["filename"]
|
||||
file_text = file_text_dict.get(file_chunk_id)
|
||||
file_string = f"###\n\"{filename}\"\n{file_text}\n"
|
||||
if score < COSINE_SIM_THRESHOLD and i > 0:
|
||||
logging.info(
|
||||
f"[get_answer_from_files] score {score} is below threshold {COSINE_SIM_THRESHOLD} and i is {i}, breaking")
|
||||
break
|
||||
files_string += file_string
|
||||
|
||||
prompt = f"Given a question, try to answer it using the content of the file extracts below, and if you cannot answer, or find " \
|
||||
f"a relevant file, just output \"I couldn't find the answer to that question in your files.\".\n\n" \
|
||||
f"If the answer is not contained in the files or if there are no file extracts, respond with \"I couldn't find the answer " \
|
||||
f"to that question in your files.\" If the question is not actually a question, respond with \"That's not a valid question.\"\n\n" \
|
||||
f"In the cases where you can find the answer, first give the answer. Then explain how you found the answer from the source or sources, " \
|
||||
f"and use the exact filenames of the source files you mention. Do not make up the names of any other files other than those mentioned "\
|
||||
f"in the files context. Give the answer in markdown format." \
|
||||
f"Use the following format:\n\nQuestion: <question>\n\nFiles:\n<###\n\"filename 1\"\nfile text>\n<###\n\"filename 2\"\nfile text>...\n\n"\
|
||||
f"Answer: <answer or \"I couldn't find the answer to that question in your files\" or \"That's not a valid question.\">\n\n" \
|
||||
f"Question: {question}\n\n" \
|
||||
f"Files:\n{files_string}\n" \
|
||||
f"Answer:"
|
||||
|
||||
logging.info(f"[get_answer_from_files] prompt: {prompt}")
|
||||
|
||||
response = openai.Completion.create(
|
||||
prompt=prompt,
|
||||
temperature=0,
|
||||
max_tokens=1000,
|
||||
top_p=1,
|
||||
frequency_penalty=0,
|
||||
presence_penalty=0,
|
||||
engine=GENERATIVE_MODEL,
|
||||
)
|
||||
|
||||
answer = response.choices[0].text.strip()
|
||||
logging.info(f"[get_answer_from_files] answer: {answer}")
|
||||
|
||||
return jsonify({"answer": answer})
|
||||
|
||||
except Exception as e:
|
||||
logging.info(f"[get_answer_from_files] error: {e}")
|
||||
return str(e)
|
100
apps/file-q-and-a/nextjs-with-flask-server/server/app.py
Normal file
100
apps/file-q-and-a/nextjs-with-flask-server/server/app.py
Normal file
@ -0,0 +1,100 @@
|
||||
from __future__ import print_function
|
||||
from config import *
|
||||
|
||||
import tiktoken
|
||||
import pinecone
|
||||
import uuid
|
||||
import sys
|
||||
import logging
|
||||
|
||||
from flask import Flask, jsonify
|
||||
from flask_cors import CORS, cross_origin
|
||||
from flask import request
|
||||
|
||||
from handle_file import handle_file
|
||||
from answer_question import get_answer_from_files
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("debug.log"),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("debug.log"),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
def load_pinecone_index() -> pinecone.Index:
|
||||
"""
|
||||
Load index from Pinecone, raise error if the index can't be found.
|
||||
"""
|
||||
pinecone.init(
|
||||
api_key=PINECONE_API_KEY,
|
||||
environment=PINECONE_ENV,
|
||||
)
|
||||
index_name = PINECONE_INDEX
|
||||
if not index_name in pinecone.list_indexes():
|
||||
print(pinecone.list_indexes())
|
||||
raise KeyError(f"Index '{index_name}' does not exist.")
|
||||
index = pinecone.Index(index_name)
|
||||
|
||||
return index
|
||||
|
||||
def create_app():
|
||||
pinecone_index = load_pinecone_index()
|
||||
tokenizer = tiktoken.get_encoding("gpt2")
|
||||
session_id = str(uuid.uuid4().hex)
|
||||
app = Flask(__name__)
|
||||
app.pinecone_index = pinecone_index
|
||||
app.tokenizer = tokenizer
|
||||
app.session_id = session_id
|
||||
# log session id
|
||||
logging.info(f"session_id: {session_id}")
|
||||
app.config["file_text_dict"] = {}
|
||||
CORS(app, supports_credentials=True)
|
||||
|
||||
return app
|
||||
|
||||
app = create_app()
|
||||
|
||||
@app.route(f"/process_file", methods=["POST"])
|
||||
@cross_origin(supports_credentials=True)
|
||||
def process_file():
|
||||
try:
|
||||
file = request.files['file']
|
||||
logging.info(str(file))
|
||||
handle_file(
|
||||
file, app.session_id, app.pinecone_index, app.tokenizer)
|
||||
return jsonify({"success": True})
|
||||
except Exception as e:
|
||||
logging.error(str(e))
|
||||
return jsonify({"success": False})
|
||||
|
||||
@app.route(f"/answer_question", methods=["POST"])
|
||||
@cross_origin(supports_credentials=True)
|
||||
def answer_question():
|
||||
try:
|
||||
params = request.get_json()
|
||||
question = params["question"]
|
||||
|
||||
answer_question_response = get_answer_from_files(
|
||||
question, app.session_id, app.pinecone_index)
|
||||
return answer_question_response
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
@app.route("/healthcheck", methods=["GET"])
|
||||
@cross_origin(supports_credentials=True)
|
||||
def healthcheck():
|
||||
return "OK"
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, port=SERVER_PORT, threaded=True)
|
35
apps/file-q-and-a/nextjs-with-flask-server/server/config.py
Normal file
35
apps/file-q-and-a/nextjs-with-flask-server/server/config.py
Normal file
@ -0,0 +1,35 @@
|
||||
from pathlib import Path
|
||||
import logging
|
||||
import sys
|
||||
from pprint import pformat
|
||||
import yaml
|
||||
|
||||
# Load config items from config.yaml.
|
||||
# Use Path.resolve() to get the absolute path of the parent directory
|
||||
yaml_dir = Path(__file__).resolve().parent
|
||||
yaml_path = yaml_dir / "config.yaml" # Use Path / operator to join paths
|
||||
|
||||
def load_yaml_config(path):
|
||||
"""Load a yaml file and return a dictionary of its contents."""
|
||||
try:
|
||||
with open(path, "r") as stream:
|
||||
return yaml.safe_load(stream)
|
||||
except yaml.YAMLError as exc:
|
||||
logging.error(f"Failed to load {path}: {exc}")
|
||||
return None
|
||||
|
||||
# Load the config and update the global variables
|
||||
yaml_config = load_yaml_config(yaml_path)
|
||||
if yaml_config is not None:
|
||||
logging.info(f"Loaded config from {yaml_path}:")
|
||||
logging.info(pformat(yaml_config))
|
||||
globals().update(yaml_config)
|
||||
else:
|
||||
logging.error(f"Could not load config from {yaml_path}.")
|
||||
sys.exit(1) # Exit the program if the config is invalid
|
||||
|
||||
# Set a default value for SERVER_PORT if not specified in the config
|
||||
SERVER_PORT = yaml_config.get("SERVER_PORT", None)
|
||||
|
||||
# Use Path.resolve() to get the absolute path of the current directory
|
||||
SERVER_DIR = Path(__file__).resolve().parent
|
@ -0,0 +1,18 @@
|
||||
# ----- PINECONE CONFIG -----
|
||||
PINECONE_API_KEY: "<your Pinecone API key>"
|
||||
PINECONE_INDEX: "<your Pinecone Index name>" # dimensions: 1536, metric: cosine similarity
|
||||
PINECONE_ENV: "<your Pinecone env e.g.us-west1-gcp>"
|
||||
|
||||
# ----- SERVER PORT ----
|
||||
SERVER_PORT: "8080"
|
||||
|
||||
# ---- OPENAI CONFIG -----
|
||||
EMBEDDINGS_MODEL: "text-embedding-ada-002"
|
||||
GENERATIVE_MODEL: "text-davinci-003"
|
||||
EMBEDDING_DIMENSIONS: 1536
|
||||
TEXT_EMBEDDING_CHUNK_SIZE: 200
|
||||
# This is the minimum cosine similarity score that a file must have with the search query to be considered relevant
|
||||
# This is an arbitrary value, and you should vary/ remove this depending on the diversity of your dataset
|
||||
COSINE_SIM_THRESHOLD: 0.7
|
||||
MAX_TEXTS_TO_EMBED_BATCH_SIZE: 100
|
||||
MAX_PINECONE_VECTORS_TO_UPSERT_PATCH_SIZE: 100
|
168
apps/file-q-and-a/nextjs-with-flask-server/server/handle_file.py
Normal file
168
apps/file-q-and-a/nextjs-with-flask-server/server/handle_file.py
Normal file
@ -0,0 +1,168 @@
|
||||
import logging
|
||||
import sys
|
||||
import docx2txt
|
||||
|
||||
from PyPDF2 import PdfReader
|
||||
from numpy import array, average
|
||||
from flask import current_app
|
||||
from config import *
|
||||
|
||||
from utils import get_embeddings, get_pinecone_id_for_file_chunk
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("debug.log"),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
# Handle a file by extracting its text, creating embeddings, and upserting them to Pinecone
|
||||
def handle_file(file, session_id, pinecone_index, tokenizer):
|
||||
"""Handle a file by extracting its text, creating embeddings, and upserting them to Pinecone."""
|
||||
filename = file.filename
|
||||
logging.info("[handle_file] Handling file: {}".format(filename))
|
||||
|
||||
# Get the file text dict from the current app config
|
||||
file_text_dict = current_app.config["file_text_dict"]
|
||||
|
||||
# Extract text from the file
|
||||
try:
|
||||
extracted_text = extract_text_from_file(file)
|
||||
except ValueError as e:
|
||||
logging.error(
|
||||
"[handle_file] Error extracting text from file: {}".format(e))
|
||||
raise e
|
||||
|
||||
# Save extracted text to file text dict
|
||||
file_text_dict[filename] = extracted_text
|
||||
|
||||
# Handle the extracted text as a string
|
||||
return handle_file_string(filename, session_id, extracted_text, pinecone_index, tokenizer, file_text_dict)
|
||||
|
||||
# Extract text from a file based on its mimetype
|
||||
def extract_text_from_file(file):
|
||||
"""Return the text content of a file."""
|
||||
if file.mimetype == "application/pdf":
|
||||
# Extract text from pdf using PyPDF2
|
||||
reader = PdfReader(file)
|
||||
extracted_text = ""
|
||||
for page in reader.pages:
|
||||
extracted_text += page.extract_text()
|
||||
elif file.mimetype == "text/plain":
|
||||
# Read text from plain text file
|
||||
extracted_text = file.read().decode("utf-8")
|
||||
file.close()
|
||||
elif file.mimetype == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
||||
# Extract text from docx using docx2txt
|
||||
extracted_text = docx2txt.process(file)
|
||||
else:
|
||||
# Unsupported file type
|
||||
raise ValueError("Unsupported file type: {}".format(file.mimetype))
|
||||
|
||||
return extracted_text
|
||||
|
||||
# Handle a file string by creating embeddings and upserting them to Pinecone
|
||||
def handle_file_string(filename, session_id, file_body_string, pinecone_index, tokenizer, file_text_dict):
|
||||
"""Handle a file string by creating embeddings and upserting them to Pinecone."""
|
||||
logging.info("[handle_file_string] Starting...")
|
||||
|
||||
# Clean up the file string by replacing newlines and double spaces
|
||||
clean_file_body_string = file_body_string.replace(
|
||||
"\n", "; ").replace(" ", " ")
|
||||
# Add the filename to the text to embed
|
||||
text_to_embed = "Filename is: {}; {}".format(
|
||||
filename, clean_file_body_string)
|
||||
|
||||
# Create embeddings for the text
|
||||
try:
|
||||
text_embeddings, average_embedding = create_embeddings_for_text(
|
||||
text_to_embed, tokenizer)
|
||||
logging.info(
|
||||
"[handle_file_string] Created embedding for {}".format(filename))
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
"[handle_file_string] Error creating embedding: {}".format(e))
|
||||
raise e
|
||||
|
||||
# Get the vectors array of triples: file_chunk_id, embedding, metadata for each embedding
|
||||
# Metadata is a dict with keys: filename, file_chunk_index
|
||||
vectors = []
|
||||
for i, (text_chunk, embedding) in enumerate(text_embeddings):
|
||||
id = get_pinecone_id_for_file_chunk(session_id, filename, i)
|
||||
file_text_dict[id] = text_chunk
|
||||
vectors.append(
|
||||
(id, embedding, {"filename": filename, "file_chunk_index": i}))
|
||||
|
||||
logging.info(
|
||||
"[handle_file_string] Text chunk {}: {}".format(i, text_chunk))
|
||||
|
||||
# Split the vectors array into smaller batches of max length 2000
|
||||
batch_size = MAX_PINECONE_VECTORS_TO_UPSERT_PATCH_SIZE
|
||||
batches = [vectors[i:i+batch_size] for i in range(0, len(vectors), batch_size)]
|
||||
|
||||
# Upsert each batch to Pinecone
|
||||
for batch in batches:
|
||||
try:
|
||||
pinecone_index.upsert(
|
||||
vectors=batch, namespace=session_id)
|
||||
|
||||
logging.info(
|
||||
"[handle_file_string] Upserted batch of embeddings for {}".format(filename))
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
"[handle_file_string] Error upserting batch of embeddings to Pinecone: {}".format(e))
|
||||
raise e
|
||||
|
||||
# Compute the column-wise average of a list of lists
|
||||
def get_col_average_from_list_of_lists(list_of_lists):
|
||||
"""Return the average of each column in a list of lists."""
|
||||
if len(list_of_lists) == 1:
|
||||
return list_of_lists[0]
|
||||
else:
|
||||
list_of_lists_array = array(list_of_lists)
|
||||
average_embedding = average(list_of_lists_array, axis=0)
|
||||
return average_embedding.tolist()
|
||||
|
||||
# Create embeddings for a text using a tokenizer and an OpenAI engine
|
||||
def create_embeddings_for_text(text, tokenizer):
|
||||
"""Return a list of tuples (text_chunk, embedding) and an average embedding for a text."""
|
||||
token_chunks = list(chunks(text, TEXT_EMBEDDING_CHUNK_SIZE, tokenizer))
|
||||
text_chunks = [tokenizer.decode(chunk) for chunk in token_chunks]
|
||||
|
||||
# Split text_chunks into shorter arrays of max length 10
|
||||
text_chunks_arrays = [text_chunks[i:i+MAX_TEXTS_TO_EMBED_BATCH_SIZE] for i in range(0, len(text_chunks), MAX_TEXTS_TO_EMBED_BATCH_SIZE)]
|
||||
|
||||
# Call get_embeddings for each shorter array and combine the results
|
||||
embeddings = []
|
||||
for text_chunks_array in text_chunks_arrays:
|
||||
embeddings_response = get_embeddings(text_chunks_array, EMBEDDINGS_MODEL)
|
||||
embeddings.extend([embedding["embedding"] for embedding in embeddings_response])
|
||||
|
||||
text_embeddings = list(zip(text_chunks, embeddings))
|
||||
|
||||
average_embedding = get_col_average_from_list_of_lists(embeddings)
|
||||
|
||||
return (text_embeddings, average_embedding)
|
||||
|
||||
# Split a text into smaller chunks of size n, preferably ending at the end of a sentence
|
||||
def chunks(text, n, tokenizer):
|
||||
tokens = tokenizer.encode(text)
|
||||
"""Yield successive n-sized chunks from text."""
|
||||
i = 0
|
||||
while i < len(tokens):
|
||||
# Find the nearest end of sentence within a range of 0.5 * n and 1.5 * n tokens
|
||||
j = min(i + int(1.5 * n), len(tokens))
|
||||
while j > i + int(0.5 * n):
|
||||
# Decode the tokens and check for full stop or newline
|
||||
chunk = tokenizer.decode(tokens[i:j])
|
||||
if chunk.endswith(".") or chunk.endswith("\n"):
|
||||
break
|
||||
j -= 1
|
||||
# If no end of sentence found, use n tokens as the chunk size
|
||||
if j == i + int(0.5 * n):
|
||||
j = min(i + n, len(tokens))
|
||||
yield tokens[i:j]
|
||||
i = j
|
@ -0,0 +1,11 @@
|
||||
Flask-Cors==3.0.10
|
||||
openai==0.13.0
|
||||
pinecone-client==2.0.13
|
||||
PyPDF2==2.10.4
|
||||
numpy==1.23.2
|
||||
scikit-learn==1.1.2
|
||||
docx2txt==0.8
|
||||
flask>=1.1.4
|
||||
jinja2==3.0.1
|
||||
PyYAML==6.0
|
||||
tiktoken==0.1.2
|
@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "Starting Python server..."
|
||||
|
||||
pip3 install virtualenv
|
||||
python3 -m virtualenv venv
|
||||
source venv/bin/activate
|
||||
pip3 install -r requirements.txt
|
||||
OPENAI_API_KEY=$1 python3 app.py
|
38
apps/file-q-and-a/nextjs-with-flask-server/server/utils.py
Normal file
38
apps/file-q-and-a/nextjs-with-flask-server/server/utils.py
Normal file
@ -0,0 +1,38 @@
|
||||
import openai
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
|
||||
from config import *
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("debug.log"),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
def get_pinecone_id_for_file_chunk(session_id, filename, chunk_index):
|
||||
return str(session_id+"-!"+filename+"-!"+str(chunk_index))
|
||||
|
||||
def get_embedding(text, engine):
|
||||
return openai.Engine(id=engine).embeddings(input=[text])["data"][0]["embedding"]
|
||||
|
||||
def get_embeddings(text_array, engine):
|
||||
# Parameters for exponential backoff
|
||||
max_retries = 5 # Maximum number of retries
|
||||
base_delay = 1 # Base delay in seconds
|
||||
factor = 2 # Factor to multiply the delay by after each retry
|
||||
while True:
|
||||
try:
|
||||
return openai.Engine(id=engine).embeddings(input=text_array)["data"]
|
||||
except Exception as e:
|
||||
if max_retries > 0:
|
||||
logging.info(f"Request failed. Retrying in {base_delay} seconds.")
|
||||
time.sleep(base_delay)
|
||||
max_retries -= 1
|
||||
base_delay *= factor
|
||||
else:
|
||||
raise e
|
4
apps/file-q-and-a/nextjs/.env.local.example
Normal file
4
apps/file-q-and-a/nextjs/.env.local.example
Normal file
@ -0,0 +1,4 @@
|
||||
# create a copy of this file named .env.local
|
||||
|
||||
# Your own API key for OpenAI
|
||||
OPENAI_API_KEY='sk-......'
|
3
apps/file-q-and-a/nextjs/.eslintrc.json
Normal file
3
apps/file-q-and-a/nextjs/.eslintrc.json
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"extends": "next/core-web-vitals"
|
||||
}
|
36
apps/file-q-and-a/nextjs/.gitignore
vendored
Normal file
36
apps/file-q-and-a/nextjs/.gitignore
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.js
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# local env files
|
||||
.env*.local
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
39
apps/file-q-and-a/nextjs/README.md
Normal file
39
apps/file-q-and-a/nextjs/README.md
Normal file
@ -0,0 +1,39 @@
|
||||
# File Q&A
|
||||
|
||||
File Q&A is a [Next.js](https://nextjs.org/) app that lets you find answers in your files using OpenAI APIs. You can upload files and ask questions related to their content, and the app will use embeddings and GPT to generate answers from the most relevant files.
|
||||
|
||||
## Requirements
|
||||
|
||||
To run the app, you need an OpenAI API key. You can create a new API key [here](https://beta.openai.com/account/api-keys).
|
||||
|
||||
## Set Up
|
||||
|
||||
If you don't have Node.js and npm already, install them from [https://nodejs.org/en/download/](https://nodejs.org/en/download/).
|
||||
|
||||
In your terminal, navigate to the `nextjs` directory of this example app, and then install dependencies:
|
||||
|
||||
```
|
||||
npm install
|
||||
```
|
||||
|
||||
Copy the .env.local.example file into a .env.local file and fill out the OpenAI API key field.
|
||||
|
||||
## Development
|
||||
|
||||
Run the development server:
|
||||
|
||||
```
|
||||
npm run dev
|
||||
```
|
||||
|
||||
Open [http://localhost:3000](http://localhost:3000) with your browser to see the app.
|
||||
|
||||
## Deployment
|
||||
|
||||
You can deploy the app on [Vercel](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme), the platform from the creators of Next.js. Check out the [Next.js deployment documentation](https://nextjs.org/docs/deployment) for more details.
|
||||
|
||||
## Limitations
|
||||
|
||||
Uploaded files and generated embeddings don't persist on browser refresh. If you want to store more embeddings, we recommend using a vector database (e.g. Pinecone, Weaviate, Milvus, Qdrant, Redis, FAISS, etc.). The `nextjs-with-flask-server` version of this demo uses a Pinecone vector database.
|
||||
|
||||
The app may sometimes generate answers that are not in the files, or hallucinate about the existence of files that are not uploaded.
|
6
apps/file-q-and-a/nextjs/next.config.js
Normal file
6
apps/file-q-and-a/nextjs/next.config.js
Normal file
@ -0,0 +1,6 @@
|
||||
/** @type {import('next').NextConfig} */
|
||||
const nextConfig = {
|
||||
reactStrictMode: true,
|
||||
}
|
||||
|
||||
module.exports = nextConfig
|
8949
apps/file-q-and-a/nextjs/package-lock.json
generated
Normal file
8949
apps/file-q-and-a/nextjs/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
44
apps/file-q-and-a/nextjs/package.json
Normal file
44
apps/file-q-and-a/nextjs/package.json
Normal file
@ -0,0 +1,44 @@
|
||||
{
|
||||
"name": "file-q-and-a",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@headlessui/react": "^1.7.7",
|
||||
"@heroicons/react": "^2.0.13",
|
||||
"@next/font": "13.1.2",
|
||||
"@tailwindcss/line-clamp": "^0.4.2",
|
||||
"@tailwindcss/typography": "^0.5.9",
|
||||
"@types/formidable": "^2.0.5",
|
||||
"@types/lodash": "^4.14.191",
|
||||
"@types/node": "18.11.18",
|
||||
"@types/pdf-parse": "^1.1.1",
|
||||
"@types/react": "18.0.27",
|
||||
"@types/react-dom": "18.0.10",
|
||||
"axios": "^1.2.3",
|
||||
"clsx": "^1.2.1",
|
||||
"eslint": "8.32.0",
|
||||
"eslint-config-next": "13.1.2",
|
||||
"formidable": "^2.1.1",
|
||||
"lodash": "^4.17.21",
|
||||
"mammoth": "^1.5.1",
|
||||
"next": "13.1.2",
|
||||
"node-html-markdown": "^1.3.0",
|
||||
"openai": "^3.1.0",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"react": "18.2.0",
|
||||
"react-dom": "18.2.0",
|
||||
"react-markdown": "^8.0.5",
|
||||
"typescript": "4.9.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"autoprefixer": "^10.4.13",
|
||||
"postcss": "^8.4.21",
|
||||
"tailwindcss": "^3.2.4"
|
||||
}
|
||||
}
|
6
apps/file-q-and-a/nextjs/postcss.config.js
Normal file
6
apps/file-q-and-a/nextjs/postcss.config.js
Normal file
@ -0,0 +1,6 @@
|
||||
module.exports = {
|
||||
plugins: {
|
||||
tailwindcss: {},
|
||||
autoprefixer: {},
|
||||
},
|
||||
};
|
BIN
apps/file-q-and-a/nextjs/public/favicon.ico
Normal file
BIN
apps/file-q-and-a/nextjs/public/favicon.ico
Normal file
Binary file not shown.
After Width: | Height: | Size: 262 KiB |
1
apps/file-q-and-a/nextjs/public/openai-horizontal.svg
Normal file
1
apps/file-q-and-a/nextjs/public/openai-horizontal.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg id="openai-horizontal" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 120 29.53"><path d="M40.7,6.98s-.05,0-.07,0c-.02,0-.05,0-.07,0-4.67,0-7.58,2.91-7.58,7.6v2.53c0,4.69,2.9,7.6,7.58,7.6,.02,0,.05,0,.07,0,.02,0,.05,0,.07,0,4.67,0,7.58-2.91,7.58-7.6v-2.53c0-4.69-2.91-7.6-7.58-7.6Zm4.31,10.31c0,3.08-1.6,4.86-4.38,4.89-2.78-.03-4.38-1.81-4.38-4.89v-2.88c0-3.08,1.6-4.86,4.38-4.89,2.78,.03,4.38,1.81,4.38,4.89v2.88Zm40.57-5.79s-.06,0-.09,0c-.02,0-.03,0-.05,0-1.77,0-3.03,.6-3.65,1.75l-.19,.35v-1.8h-3.02v12.56h3.17v-7.48c0-1.76,.95-2.77,2.59-2.8,1.57,.03,2.47,1.02,2.47,2.73v7.55h3.17v-8.09c0-2.99-1.64-4.77-4.39-4.77Zm34.42-1.77v-2.4h-10.46v2.4h3.67v12.22h-3.67v2.4h10.46v-2.4h-3.67V9.73h3.67Zm-18.75-2.4h0s-3.28,0-3.28,0l-6.1,17.04h3.43l1.17-3.65h6.66v.04s1.17,3.62,1.17,3.62h3.43l-6.11-17.04h-.36Zm-4.03,10.98l2.57-8.05,2.55,8.05h-5.12Zm-39.45-6.81s-.05,0-.07,0c-.03,0-.05,0-.07,0-1.59,0-2.96,.66-3.68,1.76l-.18,.28v-1.74h-3.02V28.69h3.17v-5.9l.18,.27c.68,1.01,2.01,1.61,3.56,1.61,.03,0,.05,0,.08,0,.02,0,.04,0,.07,0,2.61,0,5.24-1.7,5.24-5.51v-2.14c0-2.74-1.62-5.51-5.26-5.51Zm2.1,7.5c0,2-1.15,3.24-3.01,3.28-1.73-.03-2.94-1.35-2.94-3.23v-1.89c0-1.9,1.22-3.24,2.97-3.28,1.84,.03,2.98,1.28,2.98,3.28v1.84Zm11.05-7.5h0c-.06,0-.12,.01-.18,.01-.06,0-.12-.01-.18-.01h0c-3.57,0-5.78,2.23-5.78,5.81v1.76c0,3.45,2.24,5.59,5.83,5.59,.08,0,.15,0,.22-.01,.05,0,.09,.01,.14,.01,2.41,0,4.09-.88,5.16-2.7l-2.13-1.23c-.71,1.05-1.66,1.84-3.02,1.84-1.82,0-2.91-1.12-2.91-3.01v-.5h8.44v-2.08c0-3.34-2.19-5.49-5.59-5.49Zm-2.86,5.54v-.3c0-2,.95-3.12,2.68-3.2,1.66,.08,2.66,1.18,2.66,2.99v.5s-5.34,0-5.34,0Z"></path><path d="M27.21,12.08c.67-2.01,.44-4.21-.63-6.04-1.61-2.8-4.85-4.24-8.01-3.57C17.16,.89,15.14-.01,13.02,0c-3.23,0-6.1,2.08-7.1,5.15-2.08,.43-3.87,1.73-4.92,3.57-1.62,2.8-1.25,6.32,.92,8.72-.67,2.01-.44,4.21,.63,6.03,1.61,2.81,4.85,4.25,8.02,3.58,1.4,1.58,3.42,2.49,5.54,2.48,3.23,0,6.1-2.08,7.1-5.15,2.08-.43,3.87-1.73,4.91-3.57,1.63-2.8,1.26-6.32-.91-8.72Zm-2.3-5.07c.64,1.12,.88,2.43,.66,3.7-.04-.03-.12-.07-.17-.1l-5.88-3.4c-.3-.17-.67-.17-.97,0l-6.89,3.98v-2.92l5.69-3.29c2.65-1.53,6.03-.62,7.56,2.03Zm-13.25,6.07l2.9-1.68,2.9,1.68v3.35l-2.9,1.68-2.9-1.68v-3.35ZM13.01,1.93c1.3,0,2.55,.45,3.55,1.28-.04,.02-.12,.07-.18,.1l-5.88,3.39c-.3,.17-.48,.49-.48,.84v7.96l-2.53-1.46V7.46c0-3.06,2.47-5.53,5.53-5.54ZM2.68,9.69h0c.65-1.12,1.66-1.98,2.88-2.43v6.99c0,.35,.18,.66,.48,.84l6.88,3.97-2.54,1.47-5.68-3.28c-2.64-1.53-3.55-4.91-2.02-7.56Zm1.55,12.83h0c-.65-1.11-.88-2.43-.66-3.7,.04,.03,.12,.07,.17,.1l5.88,3.4c.3,.17,.67,.17,.97,0l6.88-3.98v2.92l-5.69,3.28c-2.65,1.52-6.03,.62-7.56-2.02Zm11.89,5.08c-1.29,0-2.55-.45-3.54-1.28,.04-.02,.13-.07,.18-.1l5.88-3.39c.3-.17,.49-.49,.48-.84v-7.95l2.53,1.46v6.57c0,3.06-2.48,5.54-5.53,5.54Zm10.34-7.76c-.65,1.12-1.67,1.98-2.88,2.42v-6.99c0-.35-.18-.67-.48-.84h0l-6.89-3.98,2.53-1.46,5.69,3.28c2.65,1.53,3.55,4.91,2.02,7.56Z"></path></svg>
|
After Width: | Height: | Size: 2.8 KiB |
1
apps/file-q-and-a/nextjs/public/openai.svg
Normal file
1
apps/file-q-and-a/nextjs/public/openai.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" id="openai-symbol" viewBox="0 0 32 32"><path d="M29.71,13.09A8.09,8.09,0,0,0,20.34,2.68a8.08,8.08,0,0,0-13.7,2.9A8.08,8.08,0,0,0,2.3,18.9,8,8,0,0,0,3,25.45a8.08,8.08,0,0,0,8.69,3.87,8,8,0,0,0,6,2.68,8.09,8.09,0,0,0,7.7-5.61,8,8,0,0,0,5.33-3.86A8.09,8.09,0,0,0,29.71,13.09Zm-12,16.82a6,6,0,0,1-3.84-1.39l.19-.11,6.37-3.68a1,1,0,0,0,.53-.91v-9l2.69,1.56a.08.08,0,0,1,.05.07v7.44A6,6,0,0,1,17.68,29.91ZM4.8,24.41a6,6,0,0,1-.71-4l.19.11,6.37,3.68a1,1,0,0,0,1,0l7.79-4.49V22.8a.09.09,0,0,1,0,.08L13,26.6A6,6,0,0,1,4.8,24.41ZM3.12,10.53A6,6,0,0,1,6.28,7.9v7.57a1,1,0,0,0,.51.9l7.75,4.47L11.85,22.4a.14.14,0,0,1-.09,0L5.32,18.68a6,6,0,0,1-2.2-8.18Zm22.13,5.14-7.78-4.52L20.16,9.6a.08.08,0,0,1,.09,0l6.44,3.72a6,6,0,0,1-.9,10.81V16.56A1.06,1.06,0,0,0,25.25,15.67Zm2.68-4-.19-.12-6.36-3.7a1,1,0,0,0-1.05,0l-7.78,4.49V9.2a.09.09,0,0,1,0-.09L19,5.4a6,6,0,0,1,8.91,6.21ZM11.08,17.15,8.38,15.6a.14.14,0,0,1-.05-.08V8.1a6,6,0,0,1,9.84-4.61L18,3.6,11.61,7.28a1,1,0,0,0-.53.91ZM12.54,14,16,12l3.47,2v4L16,20l-3.47-2Z"/></svg>
|
After Width: | Height: | Size: 1.0 KiB |
77
apps/file-q-and-a/nextjs/src/components/File.tsx
Normal file
77
apps/file-q-and-a/nextjs/src/components/File.tsx
Normal file
@ -0,0 +1,77 @@
|
||||
import { useState, useCallback, memo } from "react";
|
||||
import { Transition } from "@headlessui/react";
|
||||
import {
|
||||
MagnifyingGlassMinusIcon,
|
||||
MagnifyingGlassPlusIcon,
|
||||
ArrowTopRightOnSquareIcon,
|
||||
} from "@heroicons/react/24/outline";
|
||||
|
||||
import { FileLite } from "../types/file";
|
||||
|
||||
type FileProps = {
|
||||
file: FileLite;
|
||||
showScore?: boolean;
|
||||
};
|
||||
|
||||
function File(props: FileProps) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
|
||||
const handleExpand = useCallback(() => {
|
||||
setExpanded((prev) => !prev);
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div
|
||||
className="border-gray-100 border rounded-md shadow p-2 cursor-pointer"
|
||||
onClick={handleExpand}
|
||||
>
|
||||
<div className="flex flex-row justify-between">
|
||||
<div className="flex hover:text-gray-600">{props.file.name}</div>
|
||||
|
||||
<div className="flex flex-row space-x-2">
|
||||
{props.showScore && props.file.score && (
|
||||
<div className="flex text-blue-600 mr-4">
|
||||
{props.file.score.toFixed(2)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="ml-auto w-max flex items-center justify-center">
|
||||
{expanded ? (
|
||||
<MagnifyingGlassMinusIcon className="text-gray-500 h-5" />
|
||||
) : (
|
||||
<MagnifyingGlassPlusIcon className="text-gray-500 h-5" />
|
||||
)}
|
||||
</div>
|
||||
|
||||
<a
|
||||
href={props.file.url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
onClick={(e) => e.stopPropagation()} // prevent the click event from bubbling up to the list item
|
||||
>
|
||||
<ArrowTopRightOnSquareIcon className="text-gray-500 h-5" />
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<Transition
|
||||
show={expanded}
|
||||
enter="transition duration-75 ease-out"
|
||||
enterFrom="transform translate-y-4 opacity-0"
|
||||
enterTo="transform translate-y-0 opacity-100"
|
||||
leave="transition duration-100 ease-out"
|
||||
leaveFrom="transform translate-y-0 opacity-100"
|
||||
leaveTo="transform translate-y-4 opacity-0"
|
||||
>
|
||||
<div className="items-center mt-2 justify-center">
|
||||
<iframe
|
||||
src={props.file.url}
|
||||
className="h-full w-full"
|
||||
title={props.file.name}
|
||||
></iframe>
|
||||
</div>
|
||||
</Transition>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(File);
|
172
apps/file-q-and-a/nextjs/src/components/FileQandAArea.tsx
Normal file
172
apps/file-q-and-a/nextjs/src/components/FileQandAArea.tsx
Normal file
@ -0,0 +1,172 @@
|
||||
import React, { memo, useCallback, useRef, useState } from "react";
|
||||
import { Transition } from "@headlessui/react";
|
||||
import axios from "axios";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
|
||||
import FileViewerList from "./FileViewerList";
|
||||
import LoadingText from "./LoadingText";
|
||||
import { isFileNameInString } from "../services/utils";
|
||||
import { FileChunk, FileLite } from "../types/file";
|
||||
|
||||
type FileQandAAreaProps = {
|
||||
files: FileLite[];
|
||||
};
|
||||
|
||||
function FileQandAArea(props: FileQandAAreaProps) {
|
||||
const questionRef = useRef(null);
|
||||
const [hasAskedQuestion, setHasAskedQuestion] = useState(false);
|
||||
const [answerError, setAnswerError] = useState("");
|
||||
const [answerLoading, setAnswerLoading] = useState<boolean>(false);
|
||||
const [answer, setAnswer] = useState("");
|
||||
const [answerDone, setAnswerDone] = useState(false);
|
||||
|
||||
const handleSearch = useCallback(async () => {
|
||||
if (answerLoading) {
|
||||
return;
|
||||
}
|
||||
|
||||
const question = (questionRef?.current as any)?.value ?? "";
|
||||
setAnswer("");
|
||||
setAnswerDone(false);
|
||||
|
||||
if (!question) {
|
||||
setAnswerError("Please ask a question.");
|
||||
return;
|
||||
}
|
||||
if (props.files.length === 0) {
|
||||
setAnswerError("Please upload files before asking a question.");
|
||||
return;
|
||||
}
|
||||
|
||||
setAnswerLoading(true);
|
||||
setAnswerError("");
|
||||
|
||||
let results: FileChunk[] = [];
|
||||
|
||||
try {
|
||||
const searchResultsResponse = await axios.post(
|
||||
"/api/search-file-chunks",
|
||||
{
|
||||
searchQuery: question,
|
||||
files: props.files,
|
||||
maxResults: 10,
|
||||
}
|
||||
);
|
||||
|
||||
if (searchResultsResponse.status === 200) {
|
||||
results = searchResultsResponse.data.searchResults;
|
||||
} else {
|
||||
setAnswerError("Sorry, something went wrong!");
|
||||
}
|
||||
} catch (err: any) {
|
||||
setAnswerError("Sorry, something went wrong!");
|
||||
}
|
||||
|
||||
setHasAskedQuestion(true);
|
||||
|
||||
const res = await fetch("/api/get-answer-from-files", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
question,
|
||||
fileChunks: results,
|
||||
}),
|
||||
});
|
||||
const reader = res.body!.getReader();
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
setAnswerDone(true);
|
||||
break;
|
||||
}
|
||||
setAnswer((prev) => prev + new TextDecoder().decode(value));
|
||||
}
|
||||
|
||||
setAnswerLoading(false);
|
||||
}, [props.files, answerLoading]);
|
||||
|
||||
const handleEnterInSearchBar = useCallback(
|
||||
async (event: React.SyntheticEvent) => {
|
||||
if ((event as any).key === "Enter") {
|
||||
await handleSearch();
|
||||
}
|
||||
},
|
||||
[handleSearch]
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="space-y-4 text-gray-800">
|
||||
<div className="mt-2">
|
||||
Ask a question based on the content of your files:
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<input
|
||||
className="border rounded border-gray-200 w-full py-1 px-2"
|
||||
placeholder="e.g. What were the key takeaways from the Q1 planning meeting?"
|
||||
name="search"
|
||||
ref={questionRef}
|
||||
onKeyDown={handleEnterInSearchBar}
|
||||
/>
|
||||
<div
|
||||
className="rounded-md bg-gray-50 py-1 px-4 w-max text-gray-500 hover:bg-gray-100 border border-gray-100 shadow cursor-pointer"
|
||||
onClick={handleSearch}
|
||||
>
|
||||
{answerLoading ? (
|
||||
<LoadingText text="Answering question..." />
|
||||
) : (
|
||||
"Ask question"
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="">
|
||||
{answerError && <div className="text-red-500">{answerError}</div>}
|
||||
<Transition
|
||||
show={hasAskedQuestion}
|
||||
enter="transition duration-600 ease-out"
|
||||
enterFrom="transform opacity-0"
|
||||
enterTo="transform opacity-100"
|
||||
leave="transition duration-125 ease-out"
|
||||
leaveFrom="transform opacity-100"
|
||||
leaveTo="transform opacity-0"
|
||||
className="mb-8"
|
||||
>
|
||||
{answer && (
|
||||
<div className="">
|
||||
<ReactMarkdown className="prose" linkTarget="_blank">
|
||||
{`${answer}${answerDone ? "" : " |"}`}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<Transition
|
||||
show={
|
||||
props.files.filter((file) =>
|
||||
isFileNameInString(file.name, answer)
|
||||
).length > 0
|
||||
}
|
||||
enter="transition duration-600 ease-out"
|
||||
enterFrom="transform opacity-0"
|
||||
enterTo="transform opacity-100"
|
||||
leave="transition duration-125 ease-out"
|
||||
leaveFrom="transform opacity-100"
|
||||
leaveTo="transform opacity-0"
|
||||
className="mb-8"
|
||||
>
|
||||
<FileViewerList
|
||||
files={props.files.filter((file) =>
|
||||
isFileNameInString(file.name, answer)
|
||||
)}
|
||||
title="Sources"
|
||||
listExpanded={true}
|
||||
/>
|
||||
</Transition>
|
||||
</Transition>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(FileQandAArea);
|
201
apps/file-q-and-a/nextjs/src/components/FileUploadArea.tsx
Normal file
201
apps/file-q-and-a/nextjs/src/components/FileUploadArea.tsx
Normal file
@ -0,0 +1,201 @@
|
||||
import React, {
|
||||
Dispatch,
|
||||
SetStateAction,
|
||||
useCallback,
|
||||
useState,
|
||||
memo,
|
||||
useRef,
|
||||
} from "react";
|
||||
import axios from "axios";
|
||||
import { ArrowUpTrayIcon } from "@heroicons/react/24/outline";
|
||||
import { compact } from "lodash";
|
||||
|
||||
import LoadingText from "./LoadingText";
|
||||
import { FileLite } from "../types/file";
|
||||
import FileViewerList from "./FileViewerList";
|
||||
|
||||
type FileUploadAreaProps = {
|
||||
handleSetFiles: Dispatch<SetStateAction<FileLite[]>>;
|
||||
maxNumFiles: number;
|
||||
maxFileSizeMB: number;
|
||||
};
|
||||
|
||||
function FileUploadArea(props: FileUploadAreaProps) {
|
||||
const handleSetFiles = props.handleSetFiles;
|
||||
|
||||
const [files, setFiles] = useState<FileLite[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState("");
|
||||
const [dragOver, setDragOver] = useState(false);
|
||||
const dropzoneRef = useRef<HTMLLabelElement>(null);
|
||||
|
||||
const handleFileChange = useCallback(
|
||||
async (selectedFiles: FileList | null) => {
|
||||
if (selectedFiles && selectedFiles.length > 0) {
|
||||
setError("");
|
||||
|
||||
if (files.length + selectedFiles.length > props.maxNumFiles) {
|
||||
setError(`You can only upload up to ${props.maxNumFiles} files.`);
|
||||
if (dropzoneRef.current) {
|
||||
(dropzoneRef.current as any).value = "";
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
setLoading(true);
|
||||
|
||||
const uploadedFiles = await Promise.all(
|
||||
Array.from(selectedFiles).map(async (file) => {
|
||||
// Check the file type
|
||||
if (
|
||||
file.type.match(
|
||||
/(text\/plain|application\/(pdf|msword|vnd\.openxmlformats-officedocument\.wordprocessingml\.document)|text\/(markdown|x-markdown))/
|
||||
) && // AND file isn't too big
|
||||
file.size < props.maxFileSizeMB * 1024 * 1024
|
||||
) {
|
||||
// Check if the file name already exists in the files state
|
||||
if (files.find((f) => f.name === file.name)) {
|
||||
return null; // Skip this file
|
||||
}
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append("file", file);
|
||||
formData.append("filename", file.name);
|
||||
|
||||
try {
|
||||
const processFileResponse = await axios.post(
|
||||
"/api/process-file",
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "multipart/form-data",
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
if (processFileResponse.status === 200) {
|
||||
const text = processFileResponse.data.text;
|
||||
const meanEmbedding = processFileResponse.data.meanEmbedding;
|
||||
const chunks = processFileResponse.data.chunks;
|
||||
|
||||
const fileObject: FileLite = {
|
||||
name: file.name,
|
||||
url: URL.createObjectURL(file),
|
||||
type: file.type,
|
||||
size: file.size,
|
||||
expanded: false,
|
||||
embedding: meanEmbedding,
|
||||
chunks,
|
||||
extractedText: text,
|
||||
};
|
||||
console.log(fileObject);
|
||||
|
||||
return fileObject;
|
||||
} else {
|
||||
console.log("Error creating file embedding");
|
||||
return null;
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.log(`Error creating file embedding: ${err}`);
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
alert(
|
||||
`Invalid file type or size. Only TXT, PDF, DOCX or MD are allowed, up to ${props.maxFileSizeMB}MB.`
|
||||
);
|
||||
return null; // Skip this file
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
// Filter out any null values from the uploadedFiles array
|
||||
const validFiles = compact(uploadedFiles);
|
||||
|
||||
// Set the files state with the valid files and the existing files
|
||||
setFiles((prevFiles) => [...prevFiles, ...validFiles]);
|
||||
handleSetFiles((prevFiles) => [...prevFiles, ...validFiles]);
|
||||
|
||||
setLoading(false);
|
||||
}
|
||||
},
|
||||
[files, handleSetFiles, props.maxFileSizeMB, props.maxNumFiles]
|
||||
);
|
||||
|
||||
const handleDragEnter = useCallback((event: React.DragEvent) => {
|
||||
event.preventDefault();
|
||||
setDragOver(true);
|
||||
}, []);
|
||||
|
||||
const handleDragOver = useCallback((event: React.DragEvent) => {
|
||||
event.preventDefault();
|
||||
}, []);
|
||||
|
||||
const handleDragLeave = useCallback((event: React.DragEvent) => {
|
||||
event.preventDefault();
|
||||
setDragOver(false);
|
||||
}, []);
|
||||
|
||||
const handleDrop = useCallback(
|
||||
(event: React.DragEvent) => {
|
||||
event.preventDefault();
|
||||
setDragOver(false);
|
||||
const droppedFiles = event.dataTransfer.files;
|
||||
handleFileChange(droppedFiles);
|
||||
},
|
||||
[handleFileChange]
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="flex items-center justify-center w-full flex-col">
|
||||
<label
|
||||
htmlFor="dropzone-file"
|
||||
className={`flex flex-col shadow items-center justify-center w-full h-36 border-2 border-gray-300 border-dashed rounded-lg cursor-pointer bg-gray-50 hover:bg-gray-100 relative ${
|
||||
dragOver ? "border-blue-500 bg-blue-50" : ""
|
||||
}`}
|
||||
ref={dropzoneRef}
|
||||
onDragEnter={handleDragEnter}
|
||||
onDragOver={handleDragOver}
|
||||
onDragLeave={handleDragLeave}
|
||||
onDrop={handleDrop}
|
||||
>
|
||||
<div className="flex flex-col items-center justify-center pt-5 pb-6">
|
||||
{loading ? (
|
||||
<LoadingText text="Uploading..." />
|
||||
) : (
|
||||
<div className="text-gray-500 flex flex-col items-center text-center">
|
||||
<ArrowUpTrayIcon className="w-7 h-7 mb-4" />
|
||||
<p className="mb-2 text-sm">
|
||||
<span className="font-semibold">Click to upload</span> or drag
|
||||
and drop
|
||||
</p>
|
||||
<p className="text-xs">
|
||||
TXT, PDF, DOCX or MD (max {props.maxFileSizeMB}MB per file)
|
||||
</p>
|
||||
<p className="text-xs mt-1">
|
||||
You can upload up to {props.maxNumFiles - files.length} more{" "}
|
||||
{props.maxNumFiles - files.length === 1 ? "file" : "files"}
|
||||
</p>
|
||||
<input
|
||||
id="dropzone-file"
|
||||
type="file"
|
||||
className="hidden"
|
||||
multiple
|
||||
onChange={(event) => handleFileChange(event.target.files)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</label>
|
||||
|
||||
{error && (
|
||||
<div className="flex items-center justify-center w-full mt-4">
|
||||
<p className="text-sm text-red-500">{error}</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<FileViewerList files={files} title="Uploaded Files" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(FileUploadArea);
|
73
apps/file-q-and-a/nextjs/src/components/FileViewerList.tsx
Normal file
73
apps/file-q-and-a/nextjs/src/components/FileViewerList.tsx
Normal file
@ -0,0 +1,73 @@
|
||||
import React, { memo, useCallback, useState } from "react";
|
||||
import { ChevronUpIcon } from "@heroicons/react/24/outline";
|
||||
import clsx from "clsx";
|
||||
import { Transition } from "@headlessui/react";
|
||||
|
||||
import File from "./File";
|
||||
import { FileLite } from "../types/file";
|
||||
|
||||
type FileViewerListProps = {
|
||||
files: FileLite[];
|
||||
title: string;
|
||||
listExpanded?: boolean;
|
||||
showScores?: boolean;
|
||||
};
|
||||
|
||||
function FileViewerList(props: FileViewerListProps) {
|
||||
const [listExpanded, setListExpanded] = useState(props.listExpanded ?? false);
|
||||
|
||||
const handleListExpand = useCallback(() => {
|
||||
setListExpanded((prev) => !prev);
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div className="flex items-left justify-center w-full">
|
||||
{props.files.length > 0 && (
|
||||
<div className="flex flex-col items-left justify-center w-full mt-4">
|
||||
<div className="flex flex-row">
|
||||
<div
|
||||
className="rounded-md flex shadow p-2 mb-2 w-full bg-gray-50 items-center cursor-pointer "
|
||||
onClick={handleListExpand}
|
||||
>
|
||||
{props.title}
|
||||
<div className="bg-gray-300 ml-2 px-2 rounded-full w-max text-center text-sm ">
|
||||
{props.files.length}
|
||||
</div>
|
||||
</div>
|
||||
<div className="ml-auto w-max flex items-center justify-center">
|
||||
<ChevronUpIcon
|
||||
className={clsx(
|
||||
"w-6 h-6 ml-2 stroke-slate-400 transition-transform cursor-pointer",
|
||||
!listExpanded && "-rotate-180"
|
||||
)}
|
||||
onClick={handleListExpand}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Transition
|
||||
show={listExpanded}
|
||||
enter="transition duration-125 ease-out"
|
||||
enterFrom="transform translate-y-4 opacity-0"
|
||||
enterTo="transform translate-y-0 opacity-100"
|
||||
leave="transition duration-125 ease-out"
|
||||
leaveFrom="transform translate-y-0 opacity-100"
|
||||
leaveTo="transform translate-y-4 opacity-0"
|
||||
>
|
||||
<div className="text-sm text-gray-500 space-y-2">
|
||||
{props.files.map((file) => (
|
||||
<File
|
||||
key={file.name}
|
||||
file={file}
|
||||
showScore={props.showScores}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</Transition>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(FileViewerList);
|
33
apps/file-q-and-a/nextjs/src/components/LoadingSpinner.tsx
Normal file
33
apps/file-q-and-a/nextjs/src/components/LoadingSpinner.tsx
Normal file
@ -0,0 +1,33 @@
|
||||
import clsx from "clsx";
|
||||
|
||||
type Props = {
|
||||
className?: string;
|
||||
size?: number;
|
||||
};
|
||||
|
||||
export default function LoadingSpinner(props: Props) {
|
||||
const size = props.size || 5;
|
||||
return (
|
||||
<div className={clsx("flex flex-row", props.className)}>
|
||||
<svg
|
||||
aria-hidden="true"
|
||||
className={clsx(
|
||||
"mr-2 text-gray-200 animate-spin dark:text-gray-600 fill-black stroke-1",
|
||||
`w-${size} h-${size}`
|
||||
)}
|
||||
viewBox="0 0 100 101"
|
||||
fill="none"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
>
|
||||
<path
|
||||
d="M100 50.5908C100 78.2051 77.6142 100.591 50 100.591C22.3858 100.591 0 78.2051 0 50.5908C0 22.9766 22.3858 0.59082 50 0.59082C77.6142 0.59082 100 22.9766 100 50.5908ZM9.08144 50.5908C9.08144 73.1895 27.4013 91.5094 50 91.5094C72.5987 91.5094 90.9186 73.1895 90.9186 50.5908C90.9186 27.9921 72.5987 9.67226 50 9.67226C27.4013 9.67226 9.08144 27.9921 9.08144 50.5908Z"
|
||||
fill="currentColor"
|
||||
/>
|
||||
<path
|
||||
d="M93.9676 39.0409C96.393 38.4038 97.8624 35.9116 97.0079 33.5539C95.2932 28.8227 92.871 24.3692 89.8167 20.348C85.8452 15.1192 80.8826 10.7238 75.2124 7.41289C69.5422 4.10194 63.2754 1.94025 56.7698 1.05124C51.7666 0.367541 46.6976 0.446843 41.7345 1.27873C39.2613 1.69328 37.813 4.19778 38.4501 6.62326C39.0873 9.04874 41.5694 10.4717 44.0505 10.1071C47.8511 9.54855 51.7191 9.52689 55.5402 10.0491C60.8642 10.7766 65.9928 12.5457 70.6331 15.2552C75.2735 17.9648 79.3347 21.5619 82.5849 25.841C84.9175 28.9121 86.7997 32.2913 88.1811 35.8758C89.083 38.2158 91.5421 39.6781 93.9676 39.0409Z"
|
||||
fill="currentFill"
|
||||
/>
|
||||
</svg>
|
||||
</div>
|
||||
);
|
||||
}
|
18
apps/file-q-and-a/nextjs/src/components/LoadingText.tsx
Normal file
18
apps/file-q-and-a/nextjs/src/components/LoadingText.tsx
Normal file
@ -0,0 +1,18 @@
|
||||
import React, { memo } from "react";
|
||||
|
||||
import LoadingSpinner from "./LoadingSpinner";
|
||||
|
||||
type LoadingTextProps = {
|
||||
text: string;
|
||||
};
|
||||
|
||||
function LoadingText(props: LoadingTextProps) {
|
||||
return (
|
||||
<div className="text-gray-500 text-md flex flex-row justify-center items-center">
|
||||
<LoadingSpinner />
|
||||
{props.text && <div className="flex">{props.text}</div>}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(LoadingText);
|
6
apps/file-q-and-a/nextjs/src/pages/_app.tsx
Normal file
6
apps/file-q-and-a/nextjs/src/pages/_app.tsx
Normal file
@ -0,0 +1,6 @@
|
||||
import "@/styles/globals.css";
|
||||
import type { AppProps } from "next/app";
|
||||
|
||||
export default function App({ Component, pageProps }: AppProps) {
|
||||
return <Component {...pageProps} />;
|
||||
}
|
13
apps/file-q-and-a/nextjs/src/pages/_document.tsx
Normal file
13
apps/file-q-and-a/nextjs/src/pages/_document.tsx
Normal file
@ -0,0 +1,13 @@
|
||||
import { Html, Head, Main, NextScript } from "next/document";
|
||||
|
||||
export default function Document() {
|
||||
return (
|
||||
<Html lang="en">
|
||||
<Head />
|
||||
<body>
|
||||
<Main />
|
||||
<NextScript />
|
||||
</body>
|
||||
</Html>
|
||||
);
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
import type { NextApiRequest, NextApiResponse } from "next";
|
||||
|
||||
import { completionStream } from "../../services/openai";
|
||||
import { FileChunk } from "../../types/file";
|
||||
|
||||
type Data = {
|
||||
answer?: string;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
const MAX_FILES_LENGTH = 2000 * 3;
|
||||
|
||||
export default async function handler(
|
||||
req: NextApiRequest,
|
||||
res: NextApiResponse<Data>
|
||||
) {
|
||||
// Only accept POST requests
|
||||
if (req.method !== "POST") {
|
||||
res.status(405).json({ error: "Method not allowed" });
|
||||
return;
|
||||
}
|
||||
|
||||
const fileChunks = req.body.fileChunks as FileChunk[];
|
||||
|
||||
const question = req.body.question as string;
|
||||
|
||||
if (!Array.isArray(fileChunks)) {
|
||||
res.status(400).json({ error: "fileChunks must be an array" });
|
||||
return;
|
||||
}
|
||||
|
||||
if (!question) {
|
||||
res.status(400).json({ error: "question must be a string" });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const filesString = fileChunks
|
||||
.map((fileChunk) => `###\n\"${fileChunk.filename}\"\n${fileChunk.text}`)
|
||||
.join("\n")
|
||||
.slice(0, MAX_FILES_LENGTH);
|
||||
|
||||
console.log(filesString);
|
||||
|
||||
const prompt =
|
||||
`Given a question, try to answer it using the content of the file extracts below, and if you cannot answer, or find a relevant file, just output \"I couldn't find the answer to that question in your files.\".\n\n` +
|
||||
`If the answer is not contained in the files or if there are no file extracts, respond with \"I couldn't find the answer to that question in your files.\" If the question is not actually a question, respond with \"That's not a valid question.\"\n\n` +
|
||||
`In the cases where you can find the answer, first give the answer. Then explain how you found the answer from the source or sources, and use the exact filenames of the source files you mention. Do not make up the names of any other files other than those mentioned in the files context. Give the answer in markdown format.` +
|
||||
`Use the following format:\n\nQuestion: <question>\n\nFiles:\n<###\n\"filename 1\"\nfile text>\n<###\n\"filename 2\"\nfile text>...\n\nAnswer: <answer or "I couldn't find the answer to that question in your files" or "That's not a valid question.">\n\n` +
|
||||
`Question: ${question}\n\n` +
|
||||
`Files:\n${filesString}\n\n` +
|
||||
`Answer:`;
|
||||
|
||||
const stream = completionStream({
|
||||
prompt,
|
||||
model: "text-davinci-003",
|
||||
});
|
||||
|
||||
// Set the response headers for streaming
|
||||
res.writeHead(200, {
|
||||
"Content-Type": "text/event-stream",
|
||||
"Cache-Control": "no-cache, no-transform",
|
||||
Connection: "keep-alive",
|
||||
});
|
||||
|
||||
// Write the data from the stream to the response
|
||||
for await (const data of stream) {
|
||||
res.write(data);
|
||||
}
|
||||
|
||||
// End the response when the stream is done
|
||||
res.end();
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
res.status(500).json({ error: "Something went wrong" });
|
||||
}
|
||||
}
|
65
apps/file-q-and-a/nextjs/src/pages/api/process-file.ts
Normal file
65
apps/file-q-and-a/nextjs/src/pages/api/process-file.ts
Normal file
@ -0,0 +1,65 @@
|
||||
import type { NextApiRequest, NextApiResponse } from "next";
|
||||
import formidable, { Fields, Files } from "formidable"; // to handle file uploads
|
||||
|
||||
import { TextEmbedding } from "../../types/file";
|
||||
import extractTextFromFile from "../../services/extractTextFromFile";
|
||||
import { createEmbeddings } from "../../services/createEmbeddings";
|
||||
|
||||
// Disable the default body parser to handle file uploads
|
||||
export const config = { api: { bodyParser: false } };
|
||||
|
||||
type Data = {
|
||||
text?: string;
|
||||
meanEmbedding?: number[];
|
||||
chunks?: TextEmbedding[];
|
||||
error?: string;
|
||||
};
|
||||
|
||||
// This function receives a file as a multipart form and returns the text extracted fom the file and the OpenAI embedding for that text
|
||||
export default async function handler(
|
||||
req: NextApiRequest,
|
||||
res: NextApiResponse<Data>
|
||||
) {
|
||||
if (req.method !== "POST") {
|
||||
res.status(405).json({ error: "Method not allowed" });
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a formidable instance to parse the request as a multipart form
|
||||
const form = new formidable.IncomingForm();
|
||||
try {
|
||||
const { fields, files } = await new Promise<{
|
||||
fields: Fields;
|
||||
files: Files;
|
||||
}>((resolve, reject) => {
|
||||
form.parse(req, (err, fields, files) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
resolve({ fields, files } as { fields: Fields; files: Files });
|
||||
}
|
||||
});
|
||||
});
|
||||
const file = files.file;
|
||||
if (!file || Array.isArray(file) || file.size === 0) {
|
||||
res.status(400).json({ error: "Invalid or missing file" });
|
||||
return;
|
||||
}
|
||||
|
||||
const text = await extractTextFromFile({
|
||||
filepath: file.filepath,
|
||||
filetype: file.mimetype ?? "",
|
||||
});
|
||||
|
||||
const { meanEmbedding, chunks } = await createEmbeddings({
|
||||
text,
|
||||
});
|
||||
|
||||
res.status(200).json({ text, meanEmbedding, chunks });
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ error: error.message });
|
||||
} finally {
|
||||
// Always send a response, even if it is empty
|
||||
res.end();
|
||||
}
|
||||
}
|
59
apps/file-q-and-a/nextjs/src/pages/api/search-file-chunks.ts
Normal file
59
apps/file-q-and-a/nextjs/src/pages/api/search-file-chunks.ts
Normal file
@ -0,0 +1,59 @@
|
||||
import type { NextApiRequest, NextApiResponse } from "next";
|
||||
|
||||
import { searchFileChunks } from "../../services/searchFileChunks";
|
||||
import { FileChunk, FileLite } from "../../types/file";
|
||||
|
||||
type Data = {
|
||||
searchResults?: FileChunk[];
|
||||
error?: string;
|
||||
};
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: {
|
||||
sizeLimit: "30mb",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
export default async function handler(
|
||||
req: NextApiRequest,
|
||||
res: NextApiResponse<Data>
|
||||
) {
|
||||
try {
|
||||
const searchQuery = req.body.searchQuery as string;
|
||||
|
||||
const files = req.body.files as FileLite[];
|
||||
|
||||
const maxResults = req.body.maxResults as number;
|
||||
|
||||
if (!searchQuery) {
|
||||
res.status(400).json({ error: "searchQuery must be a string" });
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Array.isArray(files) || files.length === 0) {
|
||||
res.status(400).json({ error: "files must be a non-empty array" });
|
||||
return;
|
||||
}
|
||||
|
||||
if (!maxResults || maxResults < 1) {
|
||||
res
|
||||
.status(400)
|
||||
.json({ error: "maxResults must be a number greater than 0" });
|
||||
return;
|
||||
}
|
||||
|
||||
const searchResults = await searchFileChunks({
|
||||
searchQuery,
|
||||
files,
|
||||
maxResults,
|
||||
});
|
||||
|
||||
res.status(200).json({ searchResults });
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
|
||||
res.status(500).json({ error: "Something went wrong" });
|
||||
}
|
||||
}
|
35
apps/file-q-and-a/nextjs/src/pages/index.tsx
Normal file
35
apps/file-q-and-a/nextjs/src/pages/index.tsx
Normal file
@ -0,0 +1,35 @@
|
||||
import Head from "next/head";
|
||||
import { useState } from "react";
|
||||
|
||||
import FileQandAArea from "../components/FileQandAArea";
|
||||
import { FileLite } from "../types/file";
|
||||
import FileUploadArea from "../components/FileUploadArea";
|
||||
|
||||
export default function FileQandA() {
|
||||
const [files, setFiles] = useState<FileLite[]>([]);
|
||||
|
||||
return (
|
||||
<div className="flex items-left text-left h-screen flex-col">
|
||||
<Head>
|
||||
<title>File Q&A</title>
|
||||
</Head>
|
||||
<div className="max-w-3xl mx-auto m-8 space-y-8 text-gray-800">
|
||||
<h1 className="text-4xl">File Q&A</h1>
|
||||
|
||||
<div className="">
|
||||
To search for answers from the content in your files, upload them here
|
||||
and we will use OpenAI embeddings and GPT to find answers from the
|
||||
relevant documents.
|
||||
</div>
|
||||
|
||||
<FileUploadArea
|
||||
handleSetFiles={setFiles}
|
||||
maxNumFiles={75}
|
||||
maxFileSizeMB={30}
|
||||
/>
|
||||
|
||||
<FileQandAArea files={files} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
74
apps/file-q-and-a/nextjs/src/services/chunkText.ts
Normal file
74
apps/file-q-and-a/nextjs/src/services/chunkText.ts
Normal file
@ -0,0 +1,74 @@
|
||||
// A function that splits a text into smaller pieces of roughly equal length
|
||||
// The pieces are delimited by sentences and try to avoid breaking words or punctuation
|
||||
// This can be useful for processing long texts with natural language models that have a limited input size
|
||||
export function chunkText({
|
||||
text, // The input text to be split
|
||||
// The desired maximum length of each piece in characters
|
||||
// This uses 4 characters as an approximation of the average token length
|
||||
// since there isn't a good JS tokenizer at the moment
|
||||
maxCharLength = 250 * 4,
|
||||
}: {
|
||||
text: string;
|
||||
maxCharLength?: number;
|
||||
}): string[] {
|
||||
// Create an empty array to store the pieces
|
||||
const chunks: string[] = [];
|
||||
|
||||
// Create a variable to hold the current piece
|
||||
let currentChunk = "";
|
||||
|
||||
// Remove any newline characters from the text and split it by periods
|
||||
// This assumes that periods mark the end of sentences, which may not be true for some languages
|
||||
const sentences = text.replace(/\n/g, " ").split(/([.])/);
|
||||
|
||||
for (const sentence of sentences) {
|
||||
// Remove any extra whitespace from the beginning and end of the sentence
|
||||
const trimmedSentence = sentence.trim();
|
||||
|
||||
// If the sentence is empty, skip it
|
||||
if (!trimmedSentence) continue;
|
||||
|
||||
// Check if adding the sentence to the current piece would make it too long, too short, or just right
|
||||
// This uses a tolerance range of 50% of the maximum length to allow some flexibility
|
||||
// If the piece is too long, save it and start a new one
|
||||
// If the piece is too short, add the sentence and continue
|
||||
// If the piece is just right, save it and start a new one
|
||||
const chunkLength = currentChunk.length + trimmedSentence.length + 1;
|
||||
const lowerBound = maxCharLength - maxCharLength * 0.5;
|
||||
const upperBound = maxCharLength + maxCharLength * 0.5;
|
||||
|
||||
if (
|
||||
chunkLength >= lowerBound &&
|
||||
chunkLength <= upperBound &&
|
||||
currentChunk
|
||||
) {
|
||||
// The piece is just right, so we save it and start a new one
|
||||
// We remove any periods or spaces from the beginning of the piece and trim any whitespace
|
||||
currentChunk = currentChunk.replace(/^[. ]+/, "").trim();
|
||||
// We only push the piece if it is not empty
|
||||
if (currentChunk) chunks.push(currentChunk);
|
||||
// Reset the current piece
|
||||
currentChunk = "";
|
||||
} else if (chunkLength > upperBound) {
|
||||
// The piece is too long, so save it and start a new one with the sentence
|
||||
// Remove any periods or spaces from the beginning of the piece and trim any whitespace
|
||||
currentChunk = currentChunk.replace(/^[. ]+/, "").trim();
|
||||
// We only push the piece if it is not empty
|
||||
if (currentChunk) chunks.push(currentChunk);
|
||||
// Set the current piece to the sentence
|
||||
currentChunk = trimmedSentence;
|
||||
} else {
|
||||
// The piece is too short, so add the sentence and continue
|
||||
// Add a space before the sentence unless it is a period
|
||||
currentChunk += `${trimmedSentence === "." ? "" : " "}${trimmedSentence}`;
|
||||
}
|
||||
}
|
||||
|
||||
// If there is any remaining piece, save it
|
||||
if (currentChunk) {
|
||||
chunks.push(currentChunk);
|
||||
}
|
||||
|
||||
// Return the array of pieces
|
||||
return chunks;
|
||||
}
|
54
apps/file-q-and-a/nextjs/src/services/createEmbeddings.ts
Normal file
54
apps/file-q-and-a/nextjs/src/services/createEmbeddings.ts
Normal file
@ -0,0 +1,54 @@
|
||||
import { TextEmbedding } from "../types/file";
|
||||
import { getEmbeddingsForText } from "./getEmbeddingsForText";
|
||||
|
||||
export type Embeddings = {
|
||||
meanEmbedding: number[];
|
||||
chunks: TextEmbedding[];
|
||||
};
|
||||
|
||||
export async function createEmbeddings({
|
||||
text,
|
||||
maxCharLength,
|
||||
}: {
|
||||
text: string;
|
||||
maxCharLength?: number;
|
||||
}): Promise<Embeddings> {
|
||||
try {
|
||||
const textEmbeddings = await getEmbeddingsForText({
|
||||
text,
|
||||
maxCharLength,
|
||||
});
|
||||
|
||||
// If there are 0 or 1 embeddings, the mean embedding is the same as the embedding
|
||||
if (textEmbeddings.length <= 1) {
|
||||
return {
|
||||
meanEmbedding: textEmbeddings[0]?.embedding ?? [],
|
||||
chunks: textEmbeddings,
|
||||
};
|
||||
}
|
||||
|
||||
// If there are multiple embeddings, calculate their average
|
||||
const embeddingLength = textEmbeddings[0].embedding.length;
|
||||
const meanEmbedding = [];
|
||||
for (let i = 0; i < embeddingLength; i++) {
|
||||
// Sum up the values at the same index of each embedding
|
||||
let sum = 0;
|
||||
for (const textEmbedding of textEmbeddings) {
|
||||
sum += textEmbedding.embedding[i];
|
||||
}
|
||||
// Divide by the number of embeddings to get the mean
|
||||
meanEmbedding.push(sum / textEmbeddings.length);
|
||||
}
|
||||
|
||||
return {
|
||||
meanEmbedding,
|
||||
chunks: textEmbeddings,
|
||||
};
|
||||
} catch (error: any) {
|
||||
console.log("Error: ", error);
|
||||
return {
|
||||
meanEmbedding: [],
|
||||
chunks: [],
|
||||
};
|
||||
}
|
||||
}
|
45
apps/file-q-and-a/nextjs/src/services/extractTextFromFile.ts
Normal file
45
apps/file-q-and-a/nextjs/src/services/extractTextFromFile.ts
Normal file
@ -0,0 +1,45 @@
|
||||
import fs from "fs";
|
||||
import mammoth from "mammoth";
|
||||
import pdfParse from "pdf-parse";
|
||||
import { NodeHtmlMarkdown } from "node-html-markdown";
|
||||
|
||||
export default async function extractTextFromFile({
|
||||
filepath,
|
||||
filetype,
|
||||
}: {
|
||||
filepath: string;
|
||||
filetype: string;
|
||||
}): Promise<string> {
|
||||
const buffer: Buffer = await new Promise((resolve, reject) => {
|
||||
const fileStream = fs.createReadStream(filepath);
|
||||
const chunks: any[] = [];
|
||||
fileStream.on("data", (chunk) => {
|
||||
chunks.push(chunk);
|
||||
});
|
||||
fileStream.on("error", (error) => {
|
||||
reject(error);
|
||||
});
|
||||
fileStream.on("end", () => {
|
||||
resolve(Buffer.concat(chunks));
|
||||
});
|
||||
});
|
||||
|
||||
// Handle different file types using different modules
|
||||
switch (filetype) {
|
||||
case "application/pdf":
|
||||
const pdfData = await pdfParse(buffer);
|
||||
return pdfData.text;
|
||||
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": // i.e. docx file
|
||||
const docxResult = await mammoth.extractRawText({ path: filepath });
|
||||
return docxResult.value;
|
||||
case "text/markdown":
|
||||
case "text/csv":
|
||||
case "text/html":
|
||||
const html = buffer.toString();
|
||||
return NodeHtmlMarkdown.translate(html);
|
||||
case "text/plain":
|
||||
return buffer.toString();
|
||||
default:
|
||||
throw new Error("Unsupported file type");
|
||||
}
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
import { TextEmbedding } from "../types/file";
|
||||
import { chunkText } from "./chunkText";
|
||||
import { embedding } from "./openai";
|
||||
|
||||
// There isn't a good JS tokenizer at the moment, so we are using this approximation of 4 characters per token instead. This might break for some languages.
|
||||
const MAX_CHAR_LENGTH = 250 * 4;
|
||||
|
||||
// This function takes a text and returns an array of embeddings for each chunk of the text
|
||||
// The text is split into chunks of a given maximum charcter length
|
||||
// The embeddings are computed in batches of a given size
|
||||
export async function getEmbeddingsForText({
|
||||
text,
|
||||
maxCharLength = MAX_CHAR_LENGTH,
|
||||
batchSize = 20,
|
||||
}: {
|
||||
text: string;
|
||||
maxCharLength?: number;
|
||||
batchSize?: number;
|
||||
}): Promise<TextEmbedding[]> {
|
||||
const textChunks = chunkText({ text, maxCharLength });
|
||||
|
||||
const batches = [];
|
||||
for (let i = 0; i < textChunks.length; i += batchSize) {
|
||||
batches.push(textChunks.slice(i, i + batchSize));
|
||||
}
|
||||
|
||||
try {
|
||||
const batchPromises = batches.map((batch) => embedding({ input: batch }));
|
||||
|
||||
const embeddings = (await Promise.all(batchPromises)).flat();
|
||||
|
||||
const textEmbeddings = embeddings.map((embedding, index) => ({
|
||||
embedding,
|
||||
text: textChunks[index],
|
||||
}));
|
||||
|
||||
return textEmbeddings;
|
||||
} catch (error: any) {
|
||||
console.log("Error: ", error);
|
||||
return [];
|
||||
}
|
||||
}
|
111
apps/file-q-and-a/nextjs/src/services/openai.ts
Normal file
111
apps/file-q-and-a/nextjs/src/services/openai.ts
Normal file
@ -0,0 +1,111 @@
|
||||
import { IncomingMessage } from "http";
|
||||
import {
|
||||
Configuration,
|
||||
CreateCompletionRequest,
|
||||
CreateCompletionResponse,
|
||||
OpenAIApi,
|
||||
} from "openai";
|
||||
|
||||
// This file contains utility functions for interacting with the OpenAI API
|
||||
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
throw new Error("Missing OPENAI_API_KEY environment variable");
|
||||
}
|
||||
|
||||
const configuration = new Configuration({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
export const openai = new OpenAIApi(configuration);
|
||||
|
||||
type CompletionOptions = Partial<CreateCompletionRequest> & {
|
||||
prompt: string;
|
||||
fallback?: string;
|
||||
};
|
||||
|
||||
type EmbeddingOptions = {
|
||||
input: string | string[];
|
||||
model?: string;
|
||||
};
|
||||
|
||||
export async function completion({
|
||||
prompt,
|
||||
fallback,
|
||||
max_tokens = 800,
|
||||
temperature = 0,
|
||||
model = "text-davinci-003",
|
||||
...otherOptions
|
||||
}: CompletionOptions) {
|
||||
try {
|
||||
const result = await openai.createCompletion({
|
||||
prompt,
|
||||
max_tokens,
|
||||
temperature,
|
||||
model,
|
||||
...otherOptions,
|
||||
});
|
||||
|
||||
if (!result.data.choices[0].text) {
|
||||
throw new Error("No text returned from the completions endpoint.");
|
||||
}
|
||||
return result.data.choices[0].text;
|
||||
} catch (error) {
|
||||
if (fallback) return fallback;
|
||||
else throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function* completionStream({
|
||||
prompt,
|
||||
fallback,
|
||||
max_tokens = 800,
|
||||
temperature = 0,
|
||||
model = "text-davinci-003",
|
||||
}: CompletionOptions) {
|
||||
try {
|
||||
const result = await openai.createCompletion(
|
||||
{
|
||||
prompt,
|
||||
max_tokens,
|
||||
temperature,
|
||||
model,
|
||||
stream: true,
|
||||
},
|
||||
{ responseType: "stream" }
|
||||
);
|
||||
|
||||
const stream = result.data as any as IncomingMessage;
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const line = chunk.toString().trim();
|
||||
const message = line.split("data: ")[1];
|
||||
|
||||
if (message === "[DONE]") {
|
||||
break;
|
||||
}
|
||||
|
||||
const data = JSON.parse(message) as CreateCompletionResponse;
|
||||
|
||||
yield data.choices[0].text;
|
||||
}
|
||||
} catch (error) {
|
||||
if (fallback) yield fallback;
|
||||
else throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function embedding({
|
||||
input,
|
||||
model = "text-embedding-ada-002",
|
||||
}: EmbeddingOptions): Promise<number[][]> {
|
||||
const result = await openai.createEmbedding({
|
||||
model,
|
||||
input,
|
||||
});
|
||||
|
||||
if (!result.data.data[0].embedding) {
|
||||
throw new Error("No embedding returned from the completions endpoint");
|
||||
}
|
||||
|
||||
// Otherwise, return the embeddings
|
||||
return result.data.data.map((d) => d.embedding);
|
||||
}
|
53
apps/file-q-and-a/nextjs/src/services/searchFileChunks.ts
Normal file
53
apps/file-q-and-a/nextjs/src/services/searchFileChunks.ts
Normal file
@ -0,0 +1,53 @@
|
||||
import { FileLite, FileChunk } from "../types/file";
|
||||
import { embedding } from "./openai";
|
||||
|
||||
// This is the minimum cosine similarity score that a file must have with the search query to be considered relevant
|
||||
// This is an arbitrary value, and you should vary/ remove this depending on the diversity of your dataset
|
||||
const COSINE_SIM_THRESHOLD = 0.72;
|
||||
|
||||
// This function takes a search query and a list of files, and returns the chunks of text that are most semantically similar to the query
|
||||
export async function searchFileChunks({
|
||||
searchQuery,
|
||||
files,
|
||||
maxResults,
|
||||
}: {
|
||||
searchQuery: string;
|
||||
files: FileLite[];
|
||||
maxResults: number;
|
||||
}): Promise<FileChunk[]> {
|
||||
// Get the search query embedding
|
||||
const searchQueryEmbeddingResponse = await embedding({
|
||||
input: searchQuery,
|
||||
});
|
||||
|
||||
// Get the first element in the embedding array
|
||||
const searchQueryEmbedding =
|
||||
searchQueryEmbeddingResponse.length > 0
|
||||
? searchQueryEmbeddingResponse[0]
|
||||
: [];
|
||||
|
||||
// Rank the chunks by their cosine similarity to the search query (using dot product since the embeddings are normalized) and return this
|
||||
const rankedChunks = files
|
||||
// Map each file to an array of chunks with the file name and score
|
||||
.flatMap((file) =>
|
||||
file.chunks
|
||||
? file.chunks.map((chunk) => {
|
||||
// Calculate the dot product between the chunk embedding and the search query embedding
|
||||
const dotProduct = chunk.embedding.reduce(
|
||||
(sum, val, i) => sum + val * searchQueryEmbedding[i],
|
||||
0
|
||||
);
|
||||
// Assign the dot product as the score for the chunk
|
||||
return { ...chunk, filename: file.name, score: dotProduct };
|
||||
})
|
||||
: []
|
||||
)
|
||||
// Sort the chunks by their scores in descending order
|
||||
.sort((a, b) => b.score - a.score)
|
||||
// Filter the chunks by their score above the threshold
|
||||
.filter((chunk) => chunk.score > COSINE_SIM_THRESHOLD)
|
||||
// Take the first maxResults chunks
|
||||
.slice(0, maxResults);
|
||||
|
||||
return rankedChunks;
|
||||
}
|
14
apps/file-q-and-a/nextjs/src/services/utils.ts
Normal file
14
apps/file-q-and-a/nextjs/src/services/utils.ts
Normal file
@ -0,0 +1,14 @@
|
||||
// A function that takes a file name and a string and returns true if the file name is contained in the string
|
||||
// after removing punctuation and whitespace from both
|
||||
export const isFileNameInString = (fileName: string, str: string) => {
|
||||
// Convert both to lowercase and remove punctuation and whitespace
|
||||
const normalizedFileName = fileName
|
||||
.toLowerCase()
|
||||
.replace(/[.,/#!$%^&*;:{}=-_~()\s]/g, "");
|
||||
const normalizedStr = str
|
||||
.toLowerCase()
|
||||
.replace(/[.,/#!$%^&*;:{}=-_~()\s]/g, "");
|
||||
|
||||
// Return true if the normalized file name is included in the normalized string
|
||||
return normalizedStr.includes(normalizedFileName);
|
||||
};
|
5
apps/file-q-and-a/nextjs/src/styles/globals.css
Normal file
5
apps/file-q-and-a/nextjs/src/styles/globals.css
Normal file
@ -0,0 +1,5 @@
|
||||
@import "./preflight.css";
|
||||
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
368
apps/file-q-and-a/nextjs/src/styles/preflight.css
Normal file
368
apps/file-q-and-a/nextjs/src/styles/preflight.css
Normal file
@ -0,0 +1,368 @@
|
||||
/* Using a custom preflight to fix conflicts with Ant Design */
|
||||
/* Original: https://unpkg.com/tailwindcss@3.2.4/src/css/preflight.css */
|
||||
|
||||
/*
|
||||
1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
|
||||
2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
|
||||
*/
|
||||
|
||||
*,
|
||||
::before,
|
||||
::after {
|
||||
box-sizing: border-box; /* 1 */
|
||||
border-width: 0; /* 2 */
|
||||
border-style: solid; /* 2 */
|
||||
border-color: theme("borderColor.DEFAULT"); /* 2 */
|
||||
}
|
||||
|
||||
::before,
|
||||
::after {
|
||||
--tw-content: "";
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use a consistent sensible line-height in all browsers.
|
||||
2. Prevent adjustments of font size after orientation changes in iOS.
|
||||
3. Use a more readable tab size.
|
||||
4. Use the user's configured `sans` font-family by default.
|
||||
5. Use the user's configured `sans` font-feature-settings by default.
|
||||
*/
|
||||
|
||||
html {
|
||||
line-height: 1.5; /* 1 */
|
||||
-webkit-text-size-adjust: 100%; /* 2 */
|
||||
-moz-tab-size: 4; /* 3 */
|
||||
tab-size: 4; /* 3 */
|
||||
font-family: theme("fontFamily.sans"); /* 4 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove the margin in all browsers.
|
||||
2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
|
||||
*/
|
||||
|
||||
body {
|
||||
margin: 0; /* 1 */
|
||||
line-height: inherit; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Add the correct height in Firefox.
|
||||
2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
|
||||
3. Ensure horizontal rules are visible by default.
|
||||
*/
|
||||
|
||||
hr {
|
||||
height: 0; /* 1 */
|
||||
color: inherit; /* 2 */
|
||||
border-top-width: 1px; /* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct text decoration in Chrome, Edge, and Safari.
|
||||
*/
|
||||
|
||||
abbr:where([title]) {
|
||||
text-decoration: underline dotted;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the default font size and weight for headings.
|
||||
*/
|
||||
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6 {
|
||||
font-size: inherit;
|
||||
font-weight: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Reset links to optimize for opt-in styling instead of opt-out.
|
||||
*/
|
||||
|
||||
a {
|
||||
color: inherit;
|
||||
text-decoration: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font weight in Edge and Safari.
|
||||
*/
|
||||
|
||||
b,
|
||||
strong {
|
||||
font-weight: bolder;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use the user's configured `mono` font family by default.
|
||||
2. Correct the odd `em` font sizing in all browsers.
|
||||
*/
|
||||
|
||||
code,
|
||||
kbd,
|
||||
samp,
|
||||
pre {
|
||||
font-family: theme("fontFamily.mono"); /* 1 */
|
||||
font-size: 1em; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font size in all browsers.
|
||||
*/
|
||||
|
||||
small {
|
||||
font-size: 80%;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent `sub` and `sup` elements from affecting the line height in all browsers.
|
||||
*/
|
||||
|
||||
sub,
|
||||
sup {
|
||||
font-size: 75%;
|
||||
line-height: 0;
|
||||
position: relative;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
sub {
|
||||
bottom: -0.25em;
|
||||
}
|
||||
|
||||
sup {
|
||||
top: -0.5em;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
|
||||
2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
|
||||
3. Remove gaps between table borders by default.
|
||||
*/
|
||||
|
||||
table {
|
||||
text-indent: 0; /* 1 */
|
||||
border-color: inherit; /* 2 */
|
||||
border-collapse: collapse; /* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Change the font styles in all browsers.
|
||||
2. Remove the margin in Firefox and Safari.
|
||||
3. Remove default padding in all browsers.
|
||||
*/
|
||||
|
||||
button,
|
||||
input,
|
||||
optgroup,
|
||||
select,
|
||||
textarea {
|
||||
font-family: inherit; /* 1 */
|
||||
font-size: 100%; /* 1 */
|
||||
font-weight: inherit; /* 1 */
|
||||
line-height: inherit; /* 1 */
|
||||
color: inherit; /* 1 */
|
||||
margin: 0; /* 2 */
|
||||
padding: 0; /* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inheritance of text transform in Edge and Firefox.
|
||||
*/
|
||||
|
||||
button,
|
||||
select {
|
||||
text-transform: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Remove default button styles.
|
||||
*/
|
||||
|
||||
button,
|
||||
[type="button"],
|
||||
[type="reset"],
|
||||
[type="submit"] {
|
||||
-webkit-appearance: button; /* 1 */
|
||||
background-image: none; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Use the modern Firefox focus style for all focusable elements.
|
||||
*/
|
||||
|
||||
:-moz-focusring {
|
||||
outline: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
|
||||
*/
|
||||
|
||||
:-moz-ui-invalid {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct vertical alignment in Chrome and Firefox.
|
||||
*/
|
||||
|
||||
progress {
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
/*
|
||||
Correct the cursor style of increment and decrement buttons in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-inner-spin-button,
|
||||
::-webkit-outer-spin-button {
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the odd appearance in Chrome and Safari.
|
||||
2. Correct the outline style in Safari.
|
||||
*/
|
||||
|
||||
[type="search"] {
|
||||
-webkit-appearance: textfield; /* 1 */
|
||||
outline-offset: -2px; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inner padding in Chrome and Safari on macOS.
|
||||
*/
|
||||
|
||||
::-webkit-search-decoration {
|
||||
-webkit-appearance: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Change font properties to `inherit` in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-file-upload-button {
|
||||
-webkit-appearance: button; /* 1 */
|
||||
font: inherit; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct display in Chrome and Safari.
|
||||
*/
|
||||
|
||||
summary {
|
||||
display: list-item;
|
||||
}
|
||||
|
||||
/*
|
||||
Removes the default spacing and border for appropriate elements.
|
||||
*/
|
||||
|
||||
blockquote,
|
||||
dl,
|
||||
dd,
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6,
|
||||
hr,
|
||||
figure,
|
||||
p,
|
||||
pre {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
fieldset {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
legend {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
ol,
|
||||
ul,
|
||||
menu {
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent resizing textareas horizontally by default.
|
||||
*/
|
||||
|
||||
textarea {
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
|
||||
2. Set the default placeholder color to the user's configured gray 400 color.
|
||||
*/
|
||||
|
||||
input::placeholder,
|
||||
textarea::placeholder {
|
||||
opacity: 1; /* 1 */
|
||||
color: theme("colors.gray.400"); /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Set the default cursor for buttons.
|
||||
*/
|
||||
|
||||
button,
|
||||
[role="button"] {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/*
|
||||
Make sure disabled buttons don't get the pointer cursor.
|
||||
*/
|
||||
:disabled {
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
|
||||
This can trigger a poorly considered lint error in some tools but is included by design.
|
||||
*/
|
||||
|
||||
img,
|
||||
svg,
|
||||
video,
|
||||
canvas,
|
||||
audio,
|
||||
iframe,
|
||||
embed,
|
||||
object {
|
||||
display: block; /* 1 */
|
||||
vertical-align: middle; /* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
*/
|
||||
|
||||
img,
|
||||
video {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/* Make elements with the HTML hidden attribute stay hidden by default */
|
||||
[hidden] {
|
||||
display: none;
|
||||
}
|
21
apps/file-q-and-a/nextjs/src/types/file.ts
Normal file
21
apps/file-q-and-a/nextjs/src/types/file.ts
Normal file
@ -0,0 +1,21 @@
|
||||
export interface FileLite {
|
||||
expanded?: boolean;
|
||||
name: string;
|
||||
url?: string;
|
||||
type?: string;
|
||||
score?: number;
|
||||
size?: number;
|
||||
embedding?: number[]; // The file embedding -- or mean embedding if there are multiple embeddings for the file
|
||||
chunks?: TextEmbedding[]; // The chunks of text and their embeddings
|
||||
extractedText?: string; // The extracted text from the file
|
||||
}
|
||||
|
||||
export interface FileChunk extends TextEmbedding {
|
||||
filename: string;
|
||||
score?: number;
|
||||
}
|
||||
|
||||
export interface TextEmbedding {
|
||||
text: string;
|
||||
embedding: number[];
|
||||
}
|
28
apps/file-q-and-a/nextjs/tailwind.config.js
Normal file
28
apps/file-q-and-a/nextjs/tailwind.config.js
Normal file
@ -0,0 +1,28 @@
|
||||
const { fontFamily } = require("tailwindcss/defaultTheme");
|
||||
|
||||
/** @type {import('tailwindcss').Config} */
|
||||
module.exports = {
|
||||
content: [
|
||||
"./app/**/*.{js,ts,jsx,tsx}",
|
||||
"./src/**/*.{js,ts,jsx,tsx}",
|
||||
"./pages/**/*.{js,ts,jsx,tsx}",
|
||||
"./components/**/*.{js,ts,jsx,tsx}",
|
||||
],
|
||||
corePlugins: {
|
||||
preflight: false,
|
||||
},
|
||||
theme: {
|
||||
extend: {
|
||||
},
|
||||
},
|
||||
keyframes: {
|
||||
blink: {
|
||||
"0%, 100%": { opacity: 1 },
|
||||
"50%": { opacity: 0 },
|
||||
},
|
||||
},
|
||||
plugins: [
|
||||
require("@tailwindcss/line-clamp"),
|
||||
require("@tailwindcss/typography"),
|
||||
],
|
||||
};
|
24
apps/file-q-and-a/nextjs/tsconfig.json
Normal file
24
apps/file-q-and-a/nextjs/tsconfig.json
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es5",
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "node",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"baseUrl": ".",
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user