matt HOFFNER
commited on
Commit
Β·
a98334b
1
Parent(s):
caaca78
fix
Browse files- app/layout.tsx +1 -1
- app/tools/search.ts +1 -1
- app/tools/surfer.ts +1 -1
- pages/api/functions/index.ts +16 -13
app/layout.tsx
CHANGED
|
@@ -5,7 +5,7 @@ import './globals.css'
|
|
| 5 |
const inter = Inter({ subsets: ['latin'] })
|
| 6 |
|
| 7 |
export const metadata: Metadata = {
|
| 8 |
-
title: '
|
| 9 |
description: 'Navigate to URLs and perform realtime similarity search',
|
| 10 |
}
|
| 11 |
|
|
|
|
| 5 |
const inter = Inter({ subsets: ['latin'] })
|
| 6 |
|
| 7 |
export const metadata: Metadata = {
|
| 8 |
+
title: 'Functions Playground',
|
| 9 |
description: 'Navigate to URLs and perform realtime similarity search',
|
| 10 |
}
|
| 11 |
|
app/tools/search.ts
CHANGED
|
@@ -6,7 +6,7 @@ function createSearchApi({ apiKey }: { apiKey: string }) {
|
|
| 6 |
const paramsSchema = z.object({
|
| 7 |
input: z.string(),
|
| 8 |
});
|
| 9 |
-
const name = '
|
| 10 |
const description = 'A custom search engine. Useful for when you need to answer questions about current events. Input should be a search query. Outputs a JSON array of results.';
|
| 11 |
|
| 12 |
const execute = async ({ input }: z.infer<typeof paramsSchema>) => {
|
|
|
|
| 6 |
const paramsSchema = z.object({
|
| 7 |
input: z.string(),
|
| 8 |
});
|
| 9 |
+
const name = 'searchApi';
|
| 10 |
const description = 'A custom search engine. Useful for when you need to answer questions about current events. Input should be a search query. Outputs a JSON array of results.';
|
| 11 |
|
| 12 |
const execute = async ({ input }: z.infer<typeof paramsSchema>) => {
|
app/tools/surfer.ts
CHANGED
|
@@ -5,7 +5,7 @@ function createUrlSurfer() {
|
|
| 5 |
const paramsSchema = z.object({
|
| 6 |
input: z.string(),
|
| 7 |
});
|
| 8 |
-
const name = '
|
| 9 |
const description = 'A custom URL navigator. Useful when a URL is provided with a question. Input should be a prompt with a URL. Outputs a JSON array of relevant results.';
|
| 10 |
|
| 11 |
return new Tool(paramsSchema, name, description, {} as any).tool;
|
|
|
|
| 5 |
const paramsSchema = z.object({
|
| 6 |
input: z.string(),
|
| 7 |
});
|
| 8 |
+
const name = 'surfer';
|
| 9 |
const description = 'A custom URL navigator. Useful when a URL is provided with a question. Input should be a prompt with a URL. Outputs a JSON array of relevant results.';
|
| 10 |
|
| 11 |
return new Tool(paramsSchema, name, description, {} as any).tool;
|
pages/api/functions/index.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import { NextApiRequest, NextApiResponse } from 'next';
|
| 2 |
-
import fetch from 'node-fetch';
|
| 3 |
import { JSDOM } from 'jsdom';
|
| 4 |
// @ts-ignore
|
| 5 |
import pdfParse from 'pdf-parse';
|
|
@@ -7,7 +7,7 @@ import puppeteer from 'puppeteer';
|
|
| 7 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
| 8 |
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
|
| 9 |
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
|
| 10 |
-
import {
|
| 11 |
|
| 12 |
export const config = {
|
| 13 |
api: {
|
|
@@ -33,15 +33,25 @@ const model = new HuggingFaceTransformersEmbeddings({
|
|
| 33 |
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
| 34 |
|
| 35 |
const [serpApi] =
|
| 36 |
-
|
| 37 |
apiKey: process.env.SERP_API_KEY || "",
|
| 38 |
});
|
| 39 |
|
| 40 |
const handleContentText = async (targetUrl: string) => {
|
| 41 |
const response = await fetch(targetUrl);
|
|
|
|
| 42 |
const contentType = response.headers.get('content-type') || '';
|
| 43 |
let content;
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
const buffer = await response.arrayBuffer();
|
| 46 |
content = await extractTextFromPDF(buffer as any);
|
| 47 |
} else if (contentType.includes('text/html')) {
|
|
@@ -50,20 +60,13 @@ const handleContentText = async (targetUrl: string) => {
|
|
| 50 |
const scripts = dom.window.document.querySelectorAll('script, style');
|
| 51 |
scripts.forEach(element => element.remove());
|
| 52 |
content = dom.window.document.body.textContent || '';
|
| 53 |
-
|
| 54 |
-
if (!content.trim()) {
|
| 55 |
-
const browser = await puppeteer.launch();
|
| 56 |
-
const page = await browser.newPage();
|
| 57 |
-
await page.goto(targetUrl);
|
| 58 |
-
content = await page.evaluate(() => document.body.innerText);
|
| 59 |
-
await browser.close();
|
| 60 |
-
}
|
| 61 |
} else {
|
| 62 |
content = await response.text();
|
| 63 |
}
|
| 64 |
-
return content;
|
| 65 |
}
|
| 66 |
|
|
|
|
| 67 |
const surferEmbedApi = async ({ input }: any) => {
|
| 68 |
const urls = input.match(urlRegex);
|
| 69 |
const targetUrl = urls ? urls[0] : null;
|
|
|
|
| 1 |
import { NextApiRequest, NextApiResponse } from 'next';
|
| 2 |
+
import fetch, { RequestInfo } from 'node-fetch';
|
| 3 |
import { JSDOM } from 'jsdom';
|
| 4 |
// @ts-ignore
|
| 5 |
import pdfParse from 'pdf-parse';
|
|
|
|
| 7 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
| 8 |
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
|
| 9 |
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
|
| 10 |
+
import { createSearchApi } from '../../../app/tools/search'
|
| 11 |
|
| 12 |
export const config = {
|
| 13 |
api: {
|
|
|
|
| 33 |
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
| 34 |
|
| 35 |
const [serpApi] =
|
| 36 |
+
createSearchApi({
|
| 37 |
apiKey: process.env.SERP_API_KEY || "",
|
| 38 |
});
|
| 39 |
|
| 40 |
const handleContentText = async (targetUrl: string) => {
|
| 41 |
const response = await fetch(targetUrl);
|
| 42 |
+
const status = response.status;
|
| 43 |
const contentType = response.headers.get('content-type') || '';
|
| 44 |
let content;
|
| 45 |
+
|
| 46 |
+
if (status >= 400) {
|
| 47 |
+
// If status is 400 or greater, try using puppeteer
|
| 48 |
+
const browser = await puppeteer.launch();
|
| 49 |
+
const page = await browser.newPage();
|
| 50 |
+
await page.goto(targetUrl, { waitUntil: 'networkidle0' }); // waits for the network to be idle before considering the navigation to be finished.
|
| 51 |
+
content = await page.evaluate(() => document.body.innerText);
|
| 52 |
+
await browser.close();
|
| 53 |
+
return content;
|
| 54 |
+
} else if (contentType.includes('application/pdf')) {
|
| 55 |
const buffer = await response.arrayBuffer();
|
| 56 |
content = await extractTextFromPDF(buffer as any);
|
| 57 |
} else if (contentType.includes('text/html')) {
|
|
|
|
| 60 |
const scripts = dom.window.document.querySelectorAll('script, style');
|
| 61 |
scripts.forEach(element => element.remove());
|
| 62 |
content = dom.window.document.body.textContent || '';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
} else {
|
| 64 |
content = await response.text();
|
| 65 |
}
|
| 66 |
+
return content.trim();
|
| 67 |
}
|
| 68 |
|
| 69 |
+
|
| 70 |
const surferEmbedApi = async ({ input }: any) => {
|
| 71 |
const urls = input.match(urlRegex);
|
| 72 |
const targetUrl = urls ? urls[0] : null;
|