matt HOFFNER
commited on
Commit
·
a73e8b4
1
Parent(s):
da2af98
refactor
Browse files- app/api/chat/route.ts +4 -1
- app/tools/odds.ts +48 -0
- package-lock.json +47 -0
- package.json +1 -0
- pages/api/functions/embed.ts +42 -0
- pages/api/functions/index.ts +6 -97
- pages/api/functions/utils.ts +40 -0
- pages/api/functions/vector-store.ts +23 -0
app/api/chat/route.ts
CHANGED
|
@@ -2,9 +2,11 @@ import { Configuration, OpenAIApi } from "openai-edge";
|
|
| 2 |
import { OpenAIStream, StreamingTextResponse } from "ai";
|
| 3 |
import { createUrlSurfer } from "@/app/tools/surfer";
|
| 4 |
import { createSearchApi } from "@/app/tools/search";
|
|
|
|
| 5 |
|
| 6 |
const [, urlSurferSchema] = createUrlSurfer();
|
| 7 |
const [, serpApiSchema] = createSearchApi({ apiKey: process.env.SERP_API_KEY || '' });
|
|
|
|
| 8 |
|
| 9 |
const config = new Configuration({
|
| 10 |
apiKey: process.env.OPENAI_API_KEY,
|
|
@@ -13,7 +15,8 @@ const openai = new OpenAIApi(config);
|
|
| 13 |
|
| 14 |
const functions: any[] = [
|
| 15 |
urlSurferSchema,
|
| 16 |
-
serpApiSchema
|
|
|
|
| 17 |
];
|
| 18 |
|
| 19 |
export async function POST(req: Request) {
|
|
|
|
| 2 |
import { OpenAIStream, StreamingTextResponse } from "ai";
|
| 3 |
import { createUrlSurfer } from "@/app/tools/surfer";
|
| 4 |
import { createSearchApi } from "@/app/tools/search";
|
| 5 |
+
import { createOddsApi } from "@/app/tools/odds";
|
| 6 |
|
| 7 |
const [, urlSurferSchema] = createUrlSurfer();
|
| 8 |
const [, serpApiSchema] = createSearchApi({ apiKey: process.env.SERP_API_KEY || '' });
|
| 9 |
+
const [, oddsApiSchema] = createOddsApi({ apiKey: process.env.ODDS_API_KEY || '' });
|
| 10 |
|
| 11 |
const config = new Configuration({
|
| 12 |
apiKey: process.env.OPENAI_API_KEY,
|
|
|
|
| 15 |
|
| 16 |
const functions: any[] = [
|
| 17 |
urlSurferSchema,
|
| 18 |
+
serpApiSchema,
|
| 19 |
+
oddsApiSchema
|
| 20 |
];
|
| 21 |
|
| 22 |
export async function POST(req: Request) {
|
app/tools/odds.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Tool } from 'openai-function-calling-tools';
|
| 2 |
+
import { z } from 'zod';
|
| 3 |
+
|
| 4 |
+
function createOddsApi({ apiKey }: { apiKey: string }) {
|
| 5 |
+
const paramsSchema = z.object({
|
| 6 |
+
input: z.string(),
|
| 7 |
+
});
|
| 8 |
+
const name = 'oddsApi';
|
| 9 |
+
const description = 'A realtime Sports Odds API. Useful for when you need to answer questions about sports odds, currently NBA and NFL. Input should be a sport and a corresponding game. Outputs a JSON array of results.';
|
| 10 |
+
|
| 11 |
+
const execute = async ({ input }: z.infer<typeof paramsSchema>) => {
|
| 12 |
+
try {
|
| 13 |
+
const oddsFormat = 'american';
|
| 14 |
+
const dateFormat = 'iso';
|
| 15 |
+
const regions = 'us';
|
| 16 |
+
let sportKey;
|
| 17 |
+
|
| 18 |
+
let market;
|
| 19 |
+
|
| 20 |
+
// americanfootball_nfl_super_bowl_winner
|
| 21 |
+
if (input.includes('nba')) {
|
| 22 |
+
sportKey = 'basketball_nba';
|
| 23 |
+
} else if (input.includes('nfl')) {
|
| 24 |
+
sportKey = 'americanfootball_nfl';
|
| 25 |
+
} else {
|
| 26 |
+
sportKey = 'upcoming';
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
if (input.includes('spread')) {
|
| 30 |
+
market = 'spread';
|
| 31 |
+
} else if (input.includes('o/u')) {
|
| 32 |
+
market = 'totals';
|
| 33 |
+
} else {
|
| 34 |
+
market = 'h2h';
|
| 35 |
+
}
|
| 36 |
+
const activeSports = await fetch(`https://api.the-odds-api.com/v4/sports/${sportKey}/odds?apiKey=${apiKey}&oddsFormat=${oddsFormat}&dateFormat=${dateFormat}&market=${market}®ions=${regions}`);
|
| 37 |
+
const oddsResponse = await activeSports.json();
|
| 38 |
+
console.log(oddsResponse);
|
| 39 |
+
return JSON.stringify(oddsResponse);
|
| 40 |
+
} catch (error) {
|
| 41 |
+
throw new Error(`Error in oddsApi: ${error}`);
|
| 42 |
+
}
|
| 43 |
+
};
|
| 44 |
+
|
| 45 |
+
return new Tool(paramsSchema, name, description, execute).tool;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
export { createOddsApi };
|
package-lock.json
CHANGED
|
@@ -30,6 +30,7 @@
|
|
| 30 |
"react-markdown": "^9.0.0",
|
| 31 |
"serpapi": "^2.0.0",
|
| 32 |
"sonner": "^1.1.0",
|
|
|
|
| 33 |
"zod": "^3.22.4",
|
| 34 |
"zod-to-json-schema": "^3.21.4"
|
| 35 |
},
|
|
@@ -7792,6 +7793,52 @@
|
|
| 7792 |
"integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==",
|
| 7793 |
"dev": true
|
| 7794 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7795 |
"node_modules/through": {
|
| 7796 |
"version": "2.3.8",
|
| 7797 |
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
|
|
|
|
| 30 |
"react-markdown": "^9.0.0",
|
| 31 |
"serpapi": "^2.0.0",
|
| 32 |
"sonner": "^1.1.0",
|
| 33 |
+
"the-odds-api": "^2.1.0",
|
| 34 |
"zod": "^3.22.4",
|
| 35 |
"zod-to-json-schema": "^3.21.4"
|
| 36 |
},
|
|
|
|
| 7793 |
"integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==",
|
| 7794 |
"dev": true
|
| 7795 |
},
|
| 7796 |
+
"node_modules/the-odds-api": {
|
| 7797 |
+
"version": "2.1.0",
|
| 7798 |
+
"resolved": "https://registry.npmjs.org/the-odds-api/-/the-odds-api-2.1.0.tgz",
|
| 7799 |
+
"integrity": "sha512-Xil75sSw/WJSD4Af5314AQKw7KFNIEiU9NQxVRYbouDzUmCy2HbaL/6PLRP3ExqYx6xp/3D3vL7rmHHXqHYLPw==",
|
| 7800 |
+
"dependencies": {
|
| 7801 |
+
"node-fetch": "^2.3.0"
|
| 7802 |
+
}
|
| 7803 |
+
},
|
| 7804 |
+
"node_modules/the-odds-api/node_modules/node-fetch": {
|
| 7805 |
+
"version": "2.7.0",
|
| 7806 |
+
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
| 7807 |
+
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
| 7808 |
+
"dependencies": {
|
| 7809 |
+
"whatwg-url": "^5.0.0"
|
| 7810 |
+
},
|
| 7811 |
+
"engines": {
|
| 7812 |
+
"node": "4.x || >=6.0.0"
|
| 7813 |
+
},
|
| 7814 |
+
"peerDependencies": {
|
| 7815 |
+
"encoding": "^0.1.0"
|
| 7816 |
+
},
|
| 7817 |
+
"peerDependenciesMeta": {
|
| 7818 |
+
"encoding": {
|
| 7819 |
+
"optional": true
|
| 7820 |
+
}
|
| 7821 |
+
}
|
| 7822 |
+
},
|
| 7823 |
+
"node_modules/the-odds-api/node_modules/tr46": {
|
| 7824 |
+
"version": "0.0.3",
|
| 7825 |
+
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
|
| 7826 |
+
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
|
| 7827 |
+
},
|
| 7828 |
+
"node_modules/the-odds-api/node_modules/webidl-conversions": {
|
| 7829 |
+
"version": "3.0.1",
|
| 7830 |
+
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
| 7831 |
+
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
|
| 7832 |
+
},
|
| 7833 |
+
"node_modules/the-odds-api/node_modules/whatwg-url": {
|
| 7834 |
+
"version": "5.0.0",
|
| 7835 |
+
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
| 7836 |
+
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
| 7837 |
+
"dependencies": {
|
| 7838 |
+
"tr46": "~0.0.3",
|
| 7839 |
+
"webidl-conversions": "^3.0.0"
|
| 7840 |
+
}
|
| 7841 |
+
},
|
| 7842 |
"node_modules/through": {
|
| 7843 |
"version": "2.3.8",
|
| 7844 |
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
|
package.json
CHANGED
|
@@ -31,6 +31,7 @@
|
|
| 31 |
"react-markdown": "^9.0.0",
|
| 32 |
"serpapi": "^2.0.0",
|
| 33 |
"sonner": "^1.1.0",
|
|
|
|
| 34 |
"zod": "^3.22.4",
|
| 35 |
"zod-to-json-schema": "^3.21.4"
|
| 36 |
},
|
|
|
|
| 31 |
"react-markdown": "^9.0.0",
|
| 32 |
"serpapi": "^2.0.0",
|
| 33 |
"sonner": "^1.1.0",
|
| 34 |
+
"the-odds-api": "^2.1.0",
|
| 35 |
"zod": "^3.22.4",
|
| 36 |
"zod-to-json-schema": "^3.21.4"
|
| 37 |
},
|
pages/api/functions/embed.ts
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { createSearchApi } from '../../../app/tools/search';
|
| 2 |
+
import { createOddsApi } from '@/app/tools/odds';
|
| 3 |
+
import { handleContentText } from './utils';
|
| 4 |
+
import { similaritySearch } from './vector-store';
|
| 5 |
+
|
| 6 |
+
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
| 7 |
+
|
| 8 |
+
const [serpApi] =
|
| 9 |
+
createSearchApi({
|
| 10 |
+
apiKey: process.env.SERP_API_KEY || "",
|
| 11 |
+
});
|
| 12 |
+
|
| 13 |
+
const [oddsApi] = createOddsApi({ apiKey: process.env.ODDS_API_KEY || "" });
|
| 14 |
+
type FunctionOutput = any;
|
| 15 |
+
type FunctionInput = any;
|
| 16 |
+
|
| 17 |
+
export const odds: FunctionOutput = async ({ input }: FunctionInput) => {
|
| 18 |
+
const content = await oddsApi({input});
|
| 19 |
+
const oddsApiResults = await similaritySearch(input, content);
|
| 20 |
+
return oddsApiResults;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
export const surfer: FunctionOutput = async ({ input }: FunctionInput) => {
|
| 24 |
+
const urls = input.match(urlRegex);
|
| 25 |
+
const targetUrl = urls ? urls[0] : null;
|
| 26 |
+
const promptWithoutUrl = urls ? input.replace(urlRegex, '').trim() : input;
|
| 27 |
+
|
| 28 |
+
const content: string = await handleContentText(targetUrl)
|
| 29 |
+
if (!content) {
|
| 30 |
+
return `Couldn't find ${targetUrl}, here is the prompt: ${promptWithoutUrl}`;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
const surferApiResults = await similaritySearch(promptWithoutUrl, content);
|
| 34 |
+
return surferApiResults;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
export const serp: FunctionOutput = async ({ input }: FunctionInput) => {
|
| 38 |
+
const content: string = await serpApi({input})
|
| 39 |
+
const serpApiResults = await similaritySearch(input, content);
|
| 40 |
+
return serpApiResults;
|
| 41 |
+
}
|
| 42 |
+
|
pages/api/functions/index.ts
CHANGED
|
@@ -1,13 +1,5 @@
|
|
| 1 |
import { NextApiRequest, NextApiResponse } from 'next';
|
| 2 |
-
import
|
| 3 |
-
import { JSDOM } from 'jsdom';
|
| 4 |
-
// @ts-ignore
|
| 5 |
-
import pdfParse from 'pdf-parse';
|
| 6 |
-
import puppeteer from 'puppeteer';
|
| 7 |
-
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
| 8 |
-
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
|
| 9 |
-
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
|
| 10 |
-
import { createSearchApi } from '../../../app/tools/search'
|
| 11 |
|
| 12 |
export const config = {
|
| 13 |
api: {
|
|
@@ -17,95 +9,12 @@ export const config = {
|
|
| 17 |
},
|
| 18 |
};
|
| 19 |
|
| 20 |
-
|
| 21 |
-
const VECTOR_STORE_SIZE = 10;
|
| 22 |
-
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: DEFAULT_CHUNK_SIZE });
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
const model = new HuggingFaceTransformersEmbeddings({
|
| 30 |
-
modelName: "Xenova/all-MiniLM-L6-v2",
|
| 31 |
-
});
|
| 32 |
-
|
| 33 |
-
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
| 34 |
-
|
| 35 |
-
const [serpApi] =
|
| 36 |
-
createSearchApi({
|
| 37 |
-
apiKey: process.env.SERP_API_KEY || "",
|
| 38 |
-
});
|
| 39 |
-
|
| 40 |
-
const handleContentText = async (targetUrl: string) => {
|
| 41 |
-
const response = await fetch(targetUrl);
|
| 42 |
-
const status = response.status;
|
| 43 |
-
const contentType = response.headers.get('content-type') || '';
|
| 44 |
-
let content;
|
| 45 |
-
|
| 46 |
-
if (status >= 400) {
|
| 47 |
-
// If status is 400 or greater, try using puppeteer
|
| 48 |
-
const browser = await puppeteer.launch();
|
| 49 |
-
const page = await browser.newPage();
|
| 50 |
-
await page.goto(targetUrl, { waitUntil: 'networkidle0' }); // waits for the network to be idle before considering the navigation to be finished.
|
| 51 |
-
content = await page.evaluate(() => document.body.innerText);
|
| 52 |
-
await browser.close();
|
| 53 |
-
return content;
|
| 54 |
-
} else if (contentType.includes('application/pdf')) {
|
| 55 |
-
const buffer = await response.arrayBuffer();
|
| 56 |
-
content = await extractTextFromPDF(buffer as any);
|
| 57 |
-
} else if (contentType.includes('text/html')) {
|
| 58 |
-
const html = await response.text();
|
| 59 |
-
const dom = new JSDOM(html);
|
| 60 |
-
const scripts = dom.window.document.querySelectorAll('script, style');
|
| 61 |
-
scripts.forEach(element => element.remove());
|
| 62 |
-
content = dom.window.document.body.textContent || '';
|
| 63 |
-
} else {
|
| 64 |
-
content = await response.text();
|
| 65 |
-
}
|
| 66 |
-
return content.trim();
|
| 67 |
-
}
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
const surferEmbedApi = async ({ input }: any) => {
|
| 71 |
-
const urls = input.match(urlRegex);
|
| 72 |
-
const targetUrl = urls ? urls[0] : null;
|
| 73 |
-
const promptWithoutUrl = urls ? input.replace(urlRegex, '').trim() : input;
|
| 74 |
-
|
| 75 |
-
const content: string = await handleContentText(targetUrl)
|
| 76 |
-
if (!content) {
|
| 77 |
-
return `Couldn't find ${targetUrl}, here is the prompt: ${promptWithoutUrl}`;
|
| 78 |
-
}
|
| 79 |
-
|
| 80 |
-
const documents = await textSplitter.createDocuments([content]);
|
| 81 |
-
|
| 82 |
-
const vectorStore = await MemoryVectorStore.fromTexts(
|
| 83 |
-
// @ts-ignore
|
| 84 |
-
[...documents.map(doc => doc.pageContent)],
|
| 85 |
-
// @ts-ignore
|
| 86 |
-
[...documents.map((v, k) => k)],
|
| 87 |
-
model
|
| 88 |
-
)
|
| 89 |
-
const queryResult = await vectorStore.similaritySearch(promptWithoutUrl, VECTOR_STORE_SIZE);
|
| 90 |
-
return `Here is the context: ${JSON.stringify(queryResult.map(result => result.pageContent))} from using the prompt to lookup relevant information. Here is the prompt: ${promptWithoutUrl}`;
|
| 91 |
-
}
|
| 92 |
-
|
| 93 |
-
const serpEmbedApi = async ({ input }: any) => {
|
| 94 |
-
const content: string = await serpApi({input})
|
| 95 |
-
const documents = await textSplitter.createDocuments([content]);
|
| 96 |
-
const vectorStore = await MemoryVectorStore.fromTexts(
|
| 97 |
-
// @ts-ignore
|
| 98 |
-
[...documents.map(doc => doc.pageContent)],
|
| 99 |
-
// @ts-ignore
|
| 100 |
-
[...documents.map((v, k) => k)],
|
| 101 |
-
model
|
| 102 |
-
)
|
| 103 |
-
const queryResult = await vectorStore.similaritySearch(input, VECTOR_STORE_SIZE);
|
| 104 |
-
return queryResult;
|
| 105 |
-
}
|
| 106 |
-
const handlers: any = {
|
| 107 |
-
'searchApi': serpEmbedApi,
|
| 108 |
-
'surfer': surferEmbedApi
|
| 109 |
};
|
| 110 |
|
| 111 |
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
|
|
|
| 1 |
import { NextApiRequest, NextApiResponse } from 'next';
|
| 2 |
+
import { odds, serp, surfer } from './embed';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
export const config = {
|
| 5 |
api: {
|
|
|
|
| 9 |
},
|
| 10 |
};
|
| 11 |
|
| 12 |
+
type FunctionHandler = any;
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
const handlers: FunctionHandler = {
|
| 15 |
+
'searchApi': serp,
|
| 16 |
+
'surfer': surfer,
|
| 17 |
+
'oddsApi': odds
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
};
|
| 19 |
|
| 20 |
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
pages/api/functions/utils.ts
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import fetch from 'node-fetch';
|
| 2 |
+
import { JSDOM } from 'jsdom';
|
| 3 |
+
import pdfParse from 'pdf-parse';
|
| 4 |
+
import puppeteer from 'puppeteer';
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
export const extractTextFromPDF = async (buffer: Buffer): Promise<string> => {
|
| 8 |
+
const data = await pdfParse(buffer);
|
| 9 |
+
return data.text;
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
export const handleContentText = async (targetUrl: string) => {
|
| 13 |
+
const response = await fetch(targetUrl);
|
| 14 |
+
const status = response.status;
|
| 15 |
+
const contentType = response.headers.get('content-type') || '';
|
| 16 |
+
let content;
|
| 17 |
+
|
| 18 |
+
if (status >= 400) {
|
| 19 |
+
// If status is 400 or greater, try using puppeteer
|
| 20 |
+
const browser = await puppeteer.launch();
|
| 21 |
+
const page = await browser.newPage();
|
| 22 |
+
await page.goto(targetUrl, { waitUntil: 'networkidle0' }); // waits for the network to be idle before considering the navigation to be finished.
|
| 23 |
+
content = await page.evaluate(() => document.body.innerText);
|
| 24 |
+
await browser.close();
|
| 25 |
+
return content;
|
| 26 |
+
} else if (contentType.includes('application/pdf')) {
|
| 27 |
+
const buffer = await response.arrayBuffer();
|
| 28 |
+
content = await extractTextFromPDF(buffer as any);
|
| 29 |
+
} else if (contentType.includes('text/html')) {
|
| 30 |
+
const html = await response.text();
|
| 31 |
+
const dom = new JSDOM(html);
|
| 32 |
+
const scripts = dom.window.document.querySelectorAll('script, style');
|
| 33 |
+
scripts.forEach(element => element.remove());
|
| 34 |
+
content = dom.window.document.body.textContent || '';
|
| 35 |
+
} else {
|
| 36 |
+
content = await response.text();
|
| 37 |
+
}
|
| 38 |
+
return content.trim();
|
| 39 |
+
}
|
| 40 |
+
|
pages/api/functions/vector-store.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
| 2 |
+
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
|
| 3 |
+
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
|
| 4 |
+
|
| 5 |
+
const DEFAULT_CHUNK_SIZE = 1000;
|
| 6 |
+
const VECTOR_STORE_SIZE = 3;
|
| 7 |
+
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: DEFAULT_CHUNK_SIZE });
|
| 8 |
+
|
| 9 |
+
const model = new HuggingFaceTransformersEmbeddings({
|
| 10 |
+
modelName: "Xenova/all-MiniLM-L6-v2",
|
| 11 |
+
});
|
| 12 |
+
|
| 13 |
+
export const similaritySearch = async (input: string, content: string) => {
|
| 14 |
+
const documents = await textSplitter.createDocuments([content]);
|
| 15 |
+
|
| 16 |
+
const vectorStore = await MemoryVectorStore.fromTexts(
|
| 17 |
+
[...documents.map(doc => doc.pageContent)],
|
| 18 |
+
[...documents.map((v, k) => k)],
|
| 19 |
+
model
|
| 20 |
+
)
|
| 21 |
+
const queryResult = await vectorStore.similaritySearch(input, VECTOR_STORE_SIZE);
|
| 22 |
+
return queryResult;
|
| 23 |
+
}
|