matt HOFFNER commited on
Commit
a98334b
β€’
1 Parent(s): caaca78
app/layout.tsx CHANGED
@@ -5,7 +5,7 @@ import './globals.css'
5
  const inter = Inter({ subsets: ['latin'] })
6
 
7
  export const metadata: Metadata = {
8
- title: 'πŸ”— URL Surfer πŸ„β€β™‚οΈ',
9
  description: 'Navigate to URLs and perform realtime similarity search',
10
  }
11
 
 
5
  const inter = Inter({ subsets: ['latin'] })
6
 
7
  export const metadata: Metadata = {
8
+ title: 'Functions Playground',
9
  description: 'Navigate to URLs and perform realtime similarity search',
10
  }
11
 
app/tools/search.ts CHANGED
@@ -6,7 +6,7 @@ function createSearchApi({ apiKey }: { apiKey: string }) {
6
  const paramsSchema = z.object({
7
  input: z.string(),
8
  });
9
- const name = 'Search API';
10
  const description = 'A custom search engine. Useful for when you need to answer questions about current events. Input should be a search query. Outputs a JSON array of results.';
11
 
12
  const execute = async ({ input }: z.infer<typeof paramsSchema>) => {
 
6
  const paramsSchema = z.object({
7
  input: z.string(),
8
  });
9
+ const name = 'searchApi';
10
  const description = 'A custom search engine. Useful for when you need to answer questions about current events. Input should be a search query. Outputs a JSON array of results.';
11
 
12
  const execute = async ({ input }: z.infer<typeof paramsSchema>) => {
app/tools/surfer.ts CHANGED
@@ -5,7 +5,7 @@ function createUrlSurfer() {
5
  const paramsSchema = z.object({
6
  input: z.string(),
7
  });
8
- const name = 'URL Surfer';
9
  const description = 'A custom URL navigator. Useful when a URL is provided with a question. Input should be a prompt with a URL. Outputs a JSON array of relevant results.';
10
 
11
  return new Tool(paramsSchema, name, description, {} as any).tool;
 
5
  const paramsSchema = z.object({
6
  input: z.string(),
7
  });
8
+ const name = 'surfer';
9
  const description = 'A custom URL navigator. Useful when a URL is provided with a question. Input should be a prompt with a URL. Outputs a JSON array of relevant results.';
10
 
11
  return new Tool(paramsSchema, name, description, {} as any).tool;
pages/api/functions/index.ts CHANGED
@@ -1,5 +1,5 @@
1
  import { NextApiRequest, NextApiResponse } from 'next';
2
- import fetch from 'node-fetch';
3
  import { JSDOM } from 'jsdom';
4
  // @ts-ignore
5
  import pdfParse from 'pdf-parse';
@@ -7,7 +7,7 @@ import puppeteer from 'puppeteer';
7
  import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
8
  import { MemoryVectorStore } from 'langchain/vectorstores/memory';
9
  import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
10
- import { createSerpApi } from '../../../app/tools/search'
11
 
12
  export const config = {
13
  api: {
@@ -33,15 +33,25 @@ const model = new HuggingFaceTransformersEmbeddings({
33
  const urlRegex = /(https?:\/\/[^\s]+)/g;
34
 
35
  const [serpApi] =
36
- createSerpApi({
37
  apiKey: process.env.SERP_API_KEY || "",
38
  });
39
 
40
  const handleContentText = async (targetUrl: string) => {
41
  const response = await fetch(targetUrl);
 
42
  const contentType = response.headers.get('content-type') || '';
43
  let content;
44
- if (contentType.includes('application/pdf')) {
 
 
 
 
 
 
 
 
 
45
  const buffer = await response.arrayBuffer();
46
  content = await extractTextFromPDF(buffer as any);
47
  } else if (contentType.includes('text/html')) {
@@ -50,20 +60,13 @@ const handleContentText = async (targetUrl: string) => {
50
  const scripts = dom.window.document.querySelectorAll('script, style');
51
  scripts.forEach(element => element.remove());
52
  content = dom.window.document.body.textContent || '';
53
-
54
- if (!content.trim()) {
55
- const browser = await puppeteer.launch();
56
- const page = await browser.newPage();
57
- await page.goto(targetUrl);
58
- content = await page.evaluate(() => document.body.innerText);
59
- await browser.close();
60
- }
61
  } else {
62
  content = await response.text();
63
  }
64
- return content;
65
  }
66
 
 
67
  const surferEmbedApi = async ({ input }: any) => {
68
  const urls = input.match(urlRegex);
69
  const targetUrl = urls ? urls[0] : null;
 
1
  import { NextApiRequest, NextApiResponse } from 'next';
2
+ import fetch, { RequestInfo } from 'node-fetch';
3
  import { JSDOM } from 'jsdom';
4
  // @ts-ignore
5
  import pdfParse from 'pdf-parse';
 
7
  import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
8
  import { MemoryVectorStore } from 'langchain/vectorstores/memory';
9
  import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
10
+ import { createSearchApi } from '../../../app/tools/search'
11
 
12
  export const config = {
13
  api: {
 
33
  const urlRegex = /(https?:\/\/[^\s]+)/g;
34
 
35
  const [serpApi] =
36
+ createSearchApi({
37
  apiKey: process.env.SERP_API_KEY || "",
38
  });
39
 
40
  const handleContentText = async (targetUrl: string) => {
41
  const response = await fetch(targetUrl);
42
+ const status = response.status;
43
  const contentType = response.headers.get('content-type') || '';
44
  let content;
45
+
46
+ if (status >= 400) {
47
+ // If status is 400 or greater, try using puppeteer
48
+ const browser = await puppeteer.launch();
49
+ const page = await browser.newPage();
50
+ await page.goto(targetUrl, { waitUntil: 'networkidle0' }); // waits for the network to be idle before considering the navigation to be finished.
51
+ content = await page.evaluate(() => document.body.innerText);
52
+ await browser.close();
53
+ return content;
54
+ } else if (contentType.includes('application/pdf')) {
55
  const buffer = await response.arrayBuffer();
56
  content = await extractTextFromPDF(buffer as any);
57
  } else if (contentType.includes('text/html')) {
 
60
  const scripts = dom.window.document.querySelectorAll('script, style');
61
  scripts.forEach(element => element.remove());
62
  content = dom.window.document.body.textContent || '';
 
 
 
 
 
 
 
 
63
  } else {
64
  content = await response.text();
65
  }
66
+ return content.trim();
67
  }
68
 
69
+
70
  const surferEmbedApi = async ({ input }: any) => {
71
  const urls = input.match(urlRegex);
72
  const targetUrl = urls ? urls[0] : null;