|
import { NextApiRequest, NextApiResponse } from 'next'; |
|
import fetch from 'node-fetch'; |
|
import { JSDOM } from 'jsdom'; |
|
|
|
import pdfParse from 'pdf-parse'; |
|
import puppeteer from 'puppeteer'; |
|
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; |
|
import { MemoryVectorStore } from 'langchain/vectorstores/memory'; |
|
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers"; |
|
import { createSerpApi } from '../../../app/tools/serp-api' |
|
|
|
export const config = { |
|
api: { |
|
bodyParser: { |
|
sizeLimit: '1mb', |
|
}, |
|
}, |
|
}; |
|
|
|
const DEFAULT_CHUNK_SIZE = 1000; |
|
const VECTOR_STORE_SIZE = 10; |
|
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: DEFAULT_CHUNK_SIZE }); |
|
|
|
async function extractTextFromPDF(buffer: Buffer): Promise<string> { |
|
const data = await pdfParse(buffer); |
|
return data.text; |
|
} |
|
|
|
const model = new HuggingFaceTransformersEmbeddings({ |
|
modelName: "Xenova/all-MiniLM-L6-v2", |
|
}); |
|
|
|
const urlRegex = /(https?:\/\/[^\s]+)/g; |
|
|
|
const [serpApi] = |
|
createSerpApi({ |
|
apiKey: process.env.SERP_API_KEY || "", |
|
}); |
|
|
|
const handleContentText = async (targetUrl: string) => { |
|
const response = await fetch(targetUrl); |
|
const contentType = response.headers.get('content-type') || ''; |
|
let content; |
|
if (contentType.includes('application/pdf')) { |
|
const buffer = await response.arrayBuffer(); |
|
content = await extractTextFromPDF(buffer as any); |
|
} else if (contentType.includes('text/html')) { |
|
const html = await response.text(); |
|
const dom = new JSDOM(html); |
|
const scripts = dom.window.document.querySelectorAll('script, style'); |
|
scripts.forEach(element => element.remove()); |
|
content = dom.window.document.body.textContent || ''; |
|
|
|
if (!content.trim()) { |
|
const browser = await puppeteer.launch(); |
|
const page = await browser.newPage(); |
|
await page.goto(targetUrl); |
|
content = await page.evaluate(() => document.body.innerText); |
|
await browser.close(); |
|
} |
|
} else { |
|
content = await response.text(); |
|
} |
|
return content; |
|
} |
|
|
|
const surferEmbedApi = async ({ input }: any) => { |
|
const urls = input.match(urlRegex); |
|
const targetUrl = urls ? urls[0] : null; |
|
const promptWithoutUrl = urls ? input.replace(urlRegex, '').trim() : input; |
|
|
|
const content: string = await handleContentText(targetUrl) |
|
if (!content) { |
|
return `Couldn't find ${targetUrl}, here is the prompt: ${promptWithoutUrl}`; |
|
} |
|
|
|
const documents = await textSplitter.createDocuments([content]); |
|
|
|
const vectorStore = await MemoryVectorStore.fromTexts( |
|
|
|
[...documents.map(doc => doc.pageContent)], |
|
|
|
[...documents.map((v, k) => k)], |
|
model |
|
) |
|
const queryResult = await vectorStore.similaritySearch(promptWithoutUrl, VECTOR_STORE_SIZE); |
|
return `Here is the context: ${JSON.stringify(queryResult.map(result => result.pageContent))} from using the prompt to lookup relevant information. Here is the prompt: ${promptWithoutUrl}`; |
|
} |
|
|
|
const serpEmbedApi = async ({ input }: any) => { |
|
const content: string = await serpApi({ input }) |
|
const documents = await textSplitter.createDocuments([content]); |
|
const vectorStore = await MemoryVectorStore.fromTexts( |
|
|
|
[...documents.map(doc => doc.pageContent)], |
|
|
|
[...documents.map((v, k) => k)], |
|
model |
|
) |
|
const queryResult = await vectorStore.similaritySearch(input, VECTOR_STORE_SIZE); |
|
return queryResult; |
|
} |
|
|
|
export default async function handler(req: NextApiRequest, res: NextApiResponse) { |
|
const prompt = req.body.prompt as string; |
|
const functionName = req.body.name as string; |
|
|
|
try { |
|
if (functionName === 'serpApi') { |
|
const result = await serpEmbedApi({ input: prompt }); |
|
return res.status(200).send(result); |
|
} else { |
|
const result = await surferEmbedApi({ input: prompt }) |
|
return res.status(200).send(result); |
|
} |
|
} catch (error) { |
|
console.error(error); |
|
|
|
return res.status(500).json({ error: error.message }); |
|
} |
|
} |