matt HOFFNER
commited on
Commit
Β·
f4e05b5
1
Parent(s):
cb5d7d2
add serp api
Browse files- README.md +0 -14
- app/api/chat/route.ts +5 -2
- app/page.tsx +4 -2
- app/tools/serp-api.ts +30 -0
- package-lock.json +6 -0
- package.json +1 -0
- pages/api/{surfer β functions}/index.ts +28 -16
README.md
CHANGED
|
@@ -7,17 +7,3 @@ app_port: 3000
|
|
| 7 |
|
| 8 |
# π url-surfer πββοΈ
|
| 9 |
|
| 10 |
-
Simple API to navigate to a URL from a prompt, returning relevant context from the prompt using a vector store.
|
| 11 |
-
|
| 12 |
-
## How it works
|
| 13 |
-
|
| 14 |
-
1. Navigate (parse url/fetch/mime-type check)
|
| 15 |
-
2. Extract text (jsdom/puppeteer/pdf-parse)
|
| 16 |
-
3. Create vector store (transformers.js)
|
| 17 |
-
4. Return vector results in prompt (langchain)
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
## Ideas
|
| 21 |
-
|
| 22 |
-
* Configuration: Vector search size, token counting
|
| 23 |
-
* OpenAI functions integration
|
|
|
|
| 7 |
|
| 8 |
# π url-surfer πββοΈ
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/api/chat/route.ts
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
import { Configuration, OpenAIApi } from "openai-edge";
|
| 2 |
import { OpenAIStream, StreamingTextResponse } from "ai";
|
| 3 |
import { createUrlSurfer } from "@/app/tools/surfer";
|
|
|
|
| 4 |
|
| 5 |
-
const [,
|
|
|
|
| 6 |
|
| 7 |
const config = new Configuration({
|
| 8 |
apiKey: process.env.OPENAI_API_KEY,
|
|
@@ -10,7 +12,8 @@ const config = new Configuration({
|
|
| 10 |
const openai = new OpenAIApi(config);
|
| 11 |
|
| 12 |
const functions: any[] = [
|
| 13 |
-
|
|
|
|
| 14 |
];
|
| 15 |
|
| 16 |
export async function POST(req: Request) {
|
|
|
|
| 1 |
import { Configuration, OpenAIApi } from "openai-edge";
|
| 2 |
import { OpenAIStream, StreamingTextResponse } from "ai";
|
| 3 |
import { createUrlSurfer } from "@/app/tools/surfer";
|
| 4 |
+
import { createSerpApi } from "@/app/tools/serp-api";
|
| 5 |
|
| 6 |
+
const [, urlSurferSchema] = createUrlSurfer();
|
| 7 |
+
const [, serpApiSchema] = createSerpApi({ apiKey: process.env.SERP_API_KEY || '' });
|
| 8 |
|
| 9 |
const config = new Configuration({
|
| 10 |
apiKey: process.env.OPENAI_API_KEY,
|
|
|
|
| 12 |
const openai = new OpenAIApi(config);
|
| 13 |
|
| 14 |
const functions: any[] = [
|
| 15 |
+
urlSurferSchema,
|
| 16 |
+
serpApiSchema
|
| 17 |
];
|
| 18 |
|
| 19 |
export async function POST(req: Request) {
|
app/page.tsx
CHANGED
|
@@ -15,13 +15,15 @@ const Page: React.FC = () => {
|
|
| 15 |
functionCall,
|
| 16 |
) => {
|
| 17 |
let result;
|
| 18 |
-
const
|
|
|
|
| 19 |
method: "POST",
|
| 20 |
headers: {
|
| 21 |
"Content-Type": "application/json",
|
| 22 |
},
|
| 23 |
body: JSON.stringify({
|
| 24 |
-
prompt: input
|
|
|
|
| 25 |
})
|
| 26 |
} as any);
|
| 27 |
|
|
|
|
| 15 |
functionCall,
|
| 16 |
) => {
|
| 17 |
let result;
|
| 18 |
+
const { name, arguments: args } = functionCall;
|
| 19 |
+
const response = await fetch("/api/functions", {
|
| 20 |
method: "POST",
|
| 21 |
headers: {
|
| 22 |
"Content-Type": "application/json",
|
| 23 |
},
|
| 24 |
body: JSON.stringify({
|
| 25 |
+
prompt: input,
|
| 26 |
+
name: name
|
| 27 |
})
|
| 28 |
} as any);
|
| 29 |
|
app/tools/serp-api.ts
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Tool } from 'openai-function-calling-tools';
|
| 2 |
+
import { z } from 'zod';
|
| 3 |
+
import { getJson } from 'serpapi';
|
| 4 |
+
|
| 5 |
+
function createSerpApi({ apiKey }: { apiKey: string }) {
|
| 6 |
+
const paramsSchema = z.object({
|
| 7 |
+
input: z.string(),
|
| 8 |
+
});
|
| 9 |
+
const name = 'serpApi';
|
| 10 |
+
const description = 'A custom search engine. Useful for when you need to answer questions about current events. Input should be a search query. Outputs a JSON array of results.';
|
| 11 |
+
|
| 12 |
+
const execute = async ({ input }: z.infer<typeof paramsSchema>) => {
|
| 13 |
+
try {
|
| 14 |
+
const response = await getJson({
|
| 15 |
+
engine: "google",
|
| 16 |
+
api_key: apiKey,
|
| 17 |
+
q: input,
|
| 18 |
+
location: "Seattle, Washington",
|
| 19 |
+
});
|
| 20 |
+
|
| 21 |
+
return JSON.stringify(response);
|
| 22 |
+
} catch (error) {
|
| 23 |
+
throw new Error(`Error in serpApi: ${error}`);
|
| 24 |
+
}
|
| 25 |
+
};
|
| 26 |
+
|
| 27 |
+
return new Tool(paramsSchema, name, description, execute).tool;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
export { createSerpApi };
|
package-lock.json
CHANGED
|
@@ -28,6 +28,7 @@
|
|
| 28 |
"react": "^18",
|
| 29 |
"react-dom": "^18",
|
| 30 |
"react-markdown": "^9.0.0",
|
|
|
|
| 31 |
"sonner": "^1.1.0",
|
| 32 |
"zod": "^3.22.4",
|
| 33 |
"zod-to-json-schema": "^3.21.4"
|
|
@@ -7245,6 +7246,11 @@
|
|
| 7245 |
"node": ">=10"
|
| 7246 |
}
|
| 7247 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7248 |
"node_modules/set-function-length": {
|
| 7249 |
"version": "1.1.1",
|
| 7250 |
"resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.1.1.tgz",
|
|
|
|
| 28 |
"react": "^18",
|
| 29 |
"react-dom": "^18",
|
| 30 |
"react-markdown": "^9.0.0",
|
| 31 |
+
"serpapi": "^2.0.0",
|
| 32 |
"sonner": "^1.1.0",
|
| 33 |
"zod": "^3.22.4",
|
| 34 |
"zod-to-json-schema": "^3.21.4"
|
|
|
|
| 7246 |
"node": ">=10"
|
| 7247 |
}
|
| 7248 |
},
|
| 7249 |
+
"node_modules/serpapi": {
|
| 7250 |
+
"version": "2.0.0",
|
| 7251 |
+
"resolved": "https://registry.npmjs.org/serpapi/-/serpapi-2.0.0.tgz",
|
| 7252 |
+
"integrity": "sha512-LKrIBrXWhZDmS1urFwB8e/T8omW2C7r7ZmW231aVBtMbfNx/p9pi5gkiScM/VSAcsyhX1q3oPeWCvbzQiiwaCg=="
|
| 7253 |
+
},
|
| 7254 |
"node_modules/set-function-length": {
|
| 7255 |
"version": "1.1.1",
|
| 7256 |
"resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.1.1.tgz",
|
package.json
CHANGED
|
@@ -29,6 +29,7 @@
|
|
| 29 |
"react": "^18",
|
| 30 |
"react-dom": "^18",
|
| 31 |
"react-markdown": "^9.0.0",
|
|
|
|
| 32 |
"sonner": "^1.1.0",
|
| 33 |
"zod": "^3.22.4",
|
| 34 |
"zod-to-json-schema": "^3.21.4"
|
|
|
|
| 29 |
"react": "^18",
|
| 30 |
"react-dom": "^18",
|
| 31 |
"react-markdown": "^9.0.0",
|
| 32 |
+
"serpapi": "^2.0.0",
|
| 33 |
"sonner": "^1.1.0",
|
| 34 |
"zod": "^3.22.4",
|
| 35 |
"zod-to-json-schema": "^3.21.4"
|
pages/api/{surfer β functions}/index.ts
RENAMED
|
@@ -7,6 +7,7 @@ import puppeteer from 'puppeteer';
|
|
| 7 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
| 8 |
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
|
| 9 |
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
|
|
|
|
| 10 |
|
| 11 |
export const config = {
|
| 12 |
api: {
|
|
@@ -31,6 +32,11 @@ const model = new HuggingFaceTransformersEmbeddings({
|
|
| 31 |
|
| 32 |
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
const handleContentText = async (targetUrl: string) => {
|
| 35 |
const response = await fetch(targetUrl);
|
| 36 |
const contentType = response.headers.get('content-type') || '';
|
|
@@ -60,6 +66,7 @@ const handleContentText = async (targetUrl: string) => {
|
|
| 60 |
|
| 61 |
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
| 62 |
const prompt = req.body.prompt as string;
|
|
|
|
| 63 |
const urls = prompt.match(urlRegex);
|
| 64 |
const targetUrl = urls ? urls[0] : null;
|
| 65 |
const promptWithoutUrl = urls ? prompt.replace(urlRegex, '').trim() : prompt;
|
|
@@ -69,23 +76,28 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|
| 69 |
}
|
| 70 |
|
| 71 |
try {
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
return
|
| 75 |
-
}
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
| 89 |
} catch (error) {
|
| 90 |
console.error(error);
|
| 91 |
// @ts-ignore
|
|
|
|
| 7 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
| 8 |
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
|
| 9 |
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
|
| 10 |
+
import { createSerpApi } from '../../../app/tools/serp-api'
|
| 11 |
|
| 12 |
export const config = {
|
| 13 |
api: {
|
|
|
|
| 32 |
|
| 33 |
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
| 34 |
|
| 35 |
+
const [serpApi] =
|
| 36 |
+
createSerpApi({
|
| 37 |
+
apiKey: process.env.SERP_API_KEY || "",
|
| 38 |
+
});
|
| 39 |
+
|
| 40 |
const handleContentText = async (targetUrl: string) => {
|
| 41 |
const response = await fetch(targetUrl);
|
| 42 |
const contentType = response.headers.get('content-type') || '';
|
|
|
|
| 66 |
|
| 67 |
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
| 68 |
const prompt = req.body.prompt as string;
|
| 69 |
+
const functionName = req.body.name as string;
|
| 70 |
const urls = prompt.match(urlRegex);
|
| 71 |
const targetUrl = urls ? urls[0] : null;
|
| 72 |
const promptWithoutUrl = urls ? prompt.replace(urlRegex, '').trim() : prompt;
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
try {
|
| 79 |
+
if (functionName === 'serpApi') {
|
| 80 |
+
const result = await serpApi({ input: prompt });
|
| 81 |
+
return res.status(200).send(result);
|
| 82 |
+
} else {
|
| 83 |
+
const content: string = await handleContentText(targetUrl)
|
| 84 |
+
if (!content) {
|
| 85 |
+
return `Couldn't find ${targetUrl}, here is the prompt: ${promptWithoutUrl}`;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
const documents = await textSplitter.createDocuments([content]);
|
| 89 |
|
| 90 |
+
const vectorStore = await MemoryVectorStore.fromTexts(
|
| 91 |
+
// @ts-ignore
|
| 92 |
+
[...documents.map(doc => doc.pageContent)],
|
| 93 |
+
// @ts-ignore
|
| 94 |
+
[...documents.map((v, k) => k)],
|
| 95 |
+
model
|
| 96 |
+
)
|
| 97 |
+
const queryResult = await vectorStore.similaritySearch(promptWithoutUrl, VECTOR_STORE_SIZE);
|
| 98 |
+
return res.status(200).send(
|
| 99 |
+
`Here is the context: ${JSON.stringify(queryResult.map(result => result.pageContent))} from using the prompt to lookup relevant information. Here is the prompt: ${promptWithoutUrl}`);
|
| 100 |
+
}
|
| 101 |
} catch (error) {
|
| 102 |
console.error(error);
|
| 103 |
// @ts-ignore
|