import { NextApiRequest, NextApiResponse } from 'next'; import fetch from 'node-fetch'; import { JSDOM } from 'jsdom'; // @ts-ignore import pdfParse from 'pdf-parse'; import puppeteer from 'puppeteer'; import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; import { MemoryVectorStore } from 'langchain/vectorstores/memory'; import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers"; import { createSerpApi } from '../../../app/tools/serp-api' export const config = { api: { bodyParser: { sizeLimit: '1mb', }, }, }; const DEFAULT_CHUNK_SIZE = 1000; const VECTOR_STORE_SIZE = 10; const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: DEFAULT_CHUNK_SIZE }); async function extractTextFromPDF(buffer: Buffer): Promise { const data = await pdfParse(buffer); return data.text; } const model = new HuggingFaceTransformersEmbeddings({ modelName: "Xenova/all-MiniLM-L6-v2", }); const urlRegex = /(https?:\/\/[^\s]+)/g; const [serpApi] = createSerpApi({ apiKey: process.env.SERP_API_KEY || "", }); const handleContentText = async (targetUrl: string) => { const response = await fetch(targetUrl); const contentType = response.headers.get('content-type') || ''; let content; if (contentType.includes('application/pdf')) { const buffer = await response.arrayBuffer(); content = await extractTextFromPDF(buffer as any); } else if (contentType.includes('text/html')) { const html = await response.text(); const dom = new JSDOM(html); const scripts = dom.window.document.querySelectorAll('script, style'); scripts.forEach(element => element.remove()); content = dom.window.document.body.textContent || ''; if (!content.trim()) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(targetUrl); content = await page.evaluate(() => document.body.innerText); await browser.close(); } } else { content = await response.text(); } return content; } const surferEmbedApi = async ({ input }: any) => { const urls = input.match(urlRegex); const targetUrl = urls ? urls[0] : null; const promptWithoutUrl = urls ? input.replace(urlRegex, '').trim() : input; const content: string = await handleContentText(targetUrl) if (!content) { return `Couldn't find ${targetUrl}, here is the prompt: ${promptWithoutUrl}`; } const documents = await textSplitter.createDocuments([content]); const vectorStore = await MemoryVectorStore.fromTexts( // @ts-ignore [...documents.map(doc => doc.pageContent)], // @ts-ignore [...documents.map((v, k) => k)], model ) const queryResult = await vectorStore.similaritySearch(promptWithoutUrl, VECTOR_STORE_SIZE); return `Here is the context: ${JSON.stringify(queryResult.map(result => result.pageContent))} from using the prompt to lookup relevant information. Here is the prompt: ${promptWithoutUrl}`; } const serpEmbedApi = async ({ input }: any) => { const content: string = await serpApi({ input }) const documents = await textSplitter.createDocuments([content]); const vectorStore = await MemoryVectorStore.fromTexts( // @ts-ignore [...documents.map(doc => doc.pageContent)], // @ts-ignore [...documents.map((v, k) => k)], model ) const queryResult = await vectorStore.similaritySearch(input, VECTOR_STORE_SIZE); return queryResult; } export default async function handler(req: NextApiRequest, res: NextApiResponse) { const prompt = req.body.prompt as string; const functionName = req.body.name as string; try { if (functionName === 'serpApi') { const result = await serpEmbedApi({ input: prompt }); return res.status(200).send(result); } else { const result = await surferEmbedApi({ input: prompt }) return res.status(200).send(result); } } catch (error) { console.error(error); // @ts-ignore return res.status(500).json({ error: error.message }); } }