Sheshank Joshi commited on
Commit
541377c
·
1 Parent(s): 59e0000

latest changes

Browse files
Files changed (4) hide show
  1. agent.py +398 -4
  2. app.py +6 -3
  3. basic_tools.py +164 -0
  4. system_prompt.txt +17 -0
agent.py CHANGED
@@ -1,9 +1,403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  class BasicAgent:
2
- def __init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  print("BasicAgent initialized.")
4
 
5
  def __call__(self, question: str) -> str:
6
  print(f"Agent received question (first 50 chars): {question[:50]}...")
7
- fixed_answer = "This is a default answer."
8
- print(f"Agent returning fixed answer: {fixed_answer}")
9
- return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
2
+ from langchain.tools.retriever import create_retriever_tool
3
+ from langchain_core.tools import BaseTool
4
+ from langgraph.graph import START, StateGraph, MessagesState, END
5
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
6
+ from langchain.vectorstores import VectorStore
7
+ from langchain_core.language_models import BaseChatModel
8
+ from langgraph.prebuilt import tools_condition
9
+ from langgraph.prebuilt import ToolNode
10
+ from langchain_community.vectorstores import FAISS
11
+ from langchain_core.documents import Document
12
+ from langchain_groq import ChatGroq
13
+ from basic_tools import *
14
+ from typing import List
15
+ import numpy as np
16
+ from datetime import datetime, timedelta
17
+ import heapq
18
+
19
+ # build a retriever
20
+ embeddings = HuggingFaceEmbeddings(
21
+ model_name="sentence-transformers/all-mpnet-base-v2") # dim=768
22
+ vector_store: FAISS = FAISS.from_texts(
23
+ texts=[],
24
+ embedding=embeddings)
25
+
26
+
27
+ # load the system prompt from the file
28
+ with open("system_prompt.txt", "r", encoding="utf-8") as f:
29
+ system_prompt = f.read()
30
+
31
+
32
+ # System message
33
+ sys_msg = SystemMessage(content=system_prompt)
34
+
35
+
36
+
37
+
38
  class BasicAgent:
39
+ tools: List[BaseTool] = [multiply,
40
+ add,
41
+ subtract,
42
+ divide,
43
+ modulus,
44
+ wiki_search,
45
+ web_search,
46
+ arxiv_search,
47
+ requests_get,
48
+ requests_post
49
+ ]
50
+ def __init__(self, embeddings: HuggingFaceEmbeddings, vector_store: VectorStore, llm: BaseChatModel):
51
+ self.embedding_model = embeddings
52
+ self.vector_store = vector_store
53
+ ret = self.vector_store.as_retriever()
54
+ self.retriever = create_retriever_tool(
55
+ retriever=ret, #type: ignore
56
+ name="Question Search", #type: ignore
57
+ description="A tool to retrieve similar questions from a vector store." #type: ignore
58
+ )
59
+ self.llm = llm.bind_tools(self.tools)
60
+ self.graph = self.build_graph()
61
  print("BasicAgent initialized.")
62
 
63
  def __call__(self, question: str) -> str:
64
  print(f"Agent received question (first 50 chars): {question[:50]}...")
65
+
66
+ # Search for similar content to enhance context
67
+ similar_docs = self.vector_store.similarity_search(question, k=3)
68
+
69
+ # Create enhanced context with relevant past information
70
+ enhanced_context = question
71
+ if similar_docs:
72
+ context_additions = []
73
+ for doc in similar_docs:
74
+ # Extract relevant information from similar documents
75
+ content = doc.page_content
76
+ if "Question:" in content and "Final answer:" in content:
77
+ q = content.split("Question:")[1].split("Final answer:")[0].strip()
78
+ a = content.split("Final answer:")[1].split("Timestamp:", 1)[0].strip()
79
+ # Only add if it's not exactly the same question
80
+ if not question.lower() == q.lower():
81
+ context_additions.append(f"Related Q: {q}\nRelated A: {a}")
82
+
83
+ if context_additions:
84
+ enhanced_context = (
85
+ "I'll answer your question, but first consider this relevant information:\n\n" +
86
+ "\n\n".join(context_additions) +
87
+ "\n\nNow answering your original question: " + question
88
+ )
89
+
90
+ # Process with the graph
91
+ input_messages = [HumanMessage(content=enhanced_context)]
92
+ result = self.graph.invoke({"messages": input_messages})
93
+ answer = result["messages"][-1].content
94
+
95
+ # Store this Q&A pair for future reference
96
+ self._cache_result(question, answer)
97
+
98
+ print(f"Agent returning answer (first 50 chars): {answer[:50]}...")
99
+ return answer
100
+
101
+ def _cache_result(self, question: str, answer: str) -> None:
102
+ """Cache the question and answer in the vector store"""
103
+
104
+
105
+ timestamp = datetime.now().isoformat()
106
+ content = f"Question: {question}\nFinal answer: {answer}\nTimestamp: {timestamp}"
107
+
108
+ # Create document with metadata
109
+ doc = Document(
110
+ page_content=content,
111
+ metadata={
112
+ "question": question,
113
+ "timestamp": timestamp,
114
+ "type": "qa_pair"
115
+ }
116
+ )
117
+
118
+ # Add to vector store
119
+ self.vector_store.add_documents([doc])
120
+ print(f"Cached new Q&A in vector store")
121
+
122
+ # Build graph function
123
+
124
+
125
+ def build_graph(self):
126
+ """Build the graph with context enhancement"""
127
+ from langgraph.graph import END
128
+
129
+ def context_enhanced_generation(state: MessagesState):
130
+ """Node that enhances context with relevant information"""
131
+ query = str(state["messages"][-1].content)
132
+
133
+ # Retrieve relevant information
134
+ similar_docs = self.vector_store.similarity_search(query, k=3)
135
+
136
+ # Extract relevant context
137
+ context = ""
138
+ if similar_docs:
139
+ context_pieces = []
140
+ for doc in similar_docs:
141
+ content = doc.page_content
142
+ # Extract the relevant parts
143
+ if "Question:" in content:
144
+ context_pieces.append(content)
145
+
146
+ if context_pieces:
147
+ context = "Relevant context:\n\n" + "\n\n".join(context_pieces) + "\n\n"
148
+
149
+ # Create enhanced messages
150
+ enhanced_messages = state["messages"].copy()
151
+ if context:
152
+ # Add context to system message if it exists, otherwise add a new one
153
+ system_message_found = False
154
+ for i, msg in enumerate(enhanced_messages):
155
+ if isinstance(msg, SystemMessage):
156
+ enhanced_messages[i] = SystemMessage(content=f"{msg.content}\n\n{context}")
157
+ system_message_found = True
158
+ break
159
+
160
+ if not system_message_found:
161
+ enhanced_messages.insert(0, SystemMessage(content=context))
162
+
163
+ # Process with LLM
164
+ response = self.llm.invoke(enhanced_messages)
165
+
166
+ return {"messages": state["messages"] + [response]}
167
+
168
+ # Tool handling node
169
+ tool_node = ToolNode(self.tools)
170
+
171
+ # Build graph with tool handling
172
+ builder = StateGraph(MessagesState)
173
+ builder.add_node("context_enhanced_generation", context_enhanced_generation)
174
+ builder.add_node("tools", tool_node)
175
+
176
+ # Connect nodes
177
+ builder.set_entry_point("context_enhanced_generation")
178
+ builder.add_conditional_edges(
179
+ "context_enhanced_generation",
180
+ tools_condition,
181
+ {
182
+ "tools": "tools",
183
+ None: END
184
+ }
185
+ )
186
+ builder.add_edge("tools", "context_enhanced_generation")
187
+
188
+ return builder.compile()
189
+
190
+
191
+ @staticmethod
192
+ def get_llm(provider: str="groq") -> BaseChatModel:
193
+ # Load environment variables from .env file
194
+ if provider == "groq":
195
+ # Groq https://console.groq.com/docs/models
196
+ # optional : qwen-qwq-32b gemma2-9b-it
197
+ llm = ChatGroq(model="qwen-qwq-32b", temperature=0)
198
+ elif provider == "huggingface":
199
+ # TODO: Add huggingface endpoint
200
+ llm = ChatHuggingFace(
201
+ llm=HuggingFaceEndpoint(
202
+ model="Meta-DeepLearning/llama-2-7b-chat-hf",
203
+ temperature=0,
204
+ ),
205
+ )
206
+ elif provider == "openai_local":
207
+ from langchain_openai import ChatOpenAI
208
+ llm = ChatOpenAI(
209
+ base_url="http://localhost:11432/v1", # default LM Studio endpoint
210
+ api_key="not-used", # required by interface but ignored #type: ignore
211
+ model="mistral-nemo-instruct-2407",
212
+ temperature=0.2
213
+ )
214
+ elif provider == "openai":
215
+ from langchain_openai import ChatOpenAI
216
+ llm = ChatOpenAI(
217
+ model="gpt-4o",
218
+ temperature=0.2,
219
+ )
220
+ else:
221
+ raise ValueError(
222
+ "Invalid provider. Choose 'groq' or 'huggingface'.")
223
+ return llm
224
+
225
+ def manage_memory(self, max_documents: int = 1000, max_age_days: int = 30) -> None:
226
+ """
227
+ Manage memory by pruning old or less useful entries from the vector store.
228
+
229
+ This implementation works with various vector store types, not just FAISS.
230
+
231
+ Args:
232
+ max_documents: Maximum number of documents to keep
233
+ max_age_days: Remove documents older than this many days
234
+ """
235
+
236
+ print(f"Starting memory management...")
237
+
238
+ # Get all documents from the vector store
239
+ try:
240
+ # For vector stores that have a get_all_documents method
241
+ if hasattr(self.vector_store, "get_all_documents"):
242
+ all_docs = self.vector_store.get_all_documents()
243
+ all_ids = [doc.metadata.get("id", i) for i, doc in enumerate(all_docs)]
244
+ # For FAISS and similar implementations
245
+ elif hasattr(self.vector_store, "docstore") and hasattr(self.vector_store, "index_to_docstore_id"):
246
+ # Access docstore in a more robust way
247
+ if hasattr(self.vector_store.docstore, "docstore"):
248
+ all_ids = list(self.vector_store.index_to_docstore_id.values())
249
+ all_docs = []
250
+ for doc_id in all_ids:
251
+ doc = self.vector_store.docstore.search(doc_id)
252
+ if doc:
253
+ all_docs.append(doc)
254
+ else:
255
+ # Fallback for newer FAISS implementations
256
+ try:
257
+ all_docs = []
258
+ all_ids = []
259
+ # Get all index positions
260
+ for i in range(self.vector_store.index.ntotal):
261
+ # Map index position to document ID
262
+ if i in self.vector_store.index_to_docstore_id:
263
+ doc_id = self.vector_store.index_to_docstore_id[i]
264
+ doc = self.vector_store.docstore.search(doc_id)
265
+ if doc:
266
+ all_docs.append(doc)
267
+ all_ids.append(doc_id)
268
+ except Exception as e:
269
+ print(f"Error accessing FAISS documents: {e}")
270
+ all_docs = []
271
+ all_ids = []
272
+ else:
273
+ print("Warning: Vector store doesn't expose required attributes for memory management")
274
+ return
275
+ except Exception as e:
276
+ print(f"Error accessing vector store documents: {e}")
277
+ return
278
+
279
+ if not all_docs:
280
+ print("No documents found in vector store")
281
+ return
282
+
283
+ print(f"Retrieved {len(all_docs)} documents for scoring")
284
+
285
+ # Score each document based on recency and other factors
286
+ scored_docs = []
287
+ cutoff_date = datetime.now() - timedelta(days=max_age_days)
288
+
289
+ for i, doc in enumerate(all_docs):
290
+ doc_id = all_ids[i] if i < len(all_ids) else i
291
+
292
+ # Extract timestamp from content or metadata
293
+ timestamp = None
294
+ if hasattr(doc, "metadata") and doc.metadata and "timestamp" in doc.metadata:
295
+ try:
296
+ timestamp = datetime.fromisoformat(doc.metadata["timestamp"])
297
+ except (ValueError, TypeError):
298
+ pass
299
+
300
+ # If no timestamp in metadata, try to extract from content
301
+ if not timestamp and hasattr(doc, "page_content") and "Timestamp:" in doc.page_content:
302
+ try:
303
+ timestamp_str = doc.page_content.split("Timestamp:")[-1].strip().split('\n')[0]
304
+ timestamp = datetime.fromisoformat(timestamp_str)
305
+ except (ValueError, TypeError):
306
+ timestamp = datetime.now() - timedelta(days=max_age_days+1)
307
+
308
+ # If still no timestamp, use a default
309
+ if not timestamp:
310
+ timestamp = datetime.now() - timedelta(days=max_age_days+1)
311
+
312
+ # Calculate age score (newer is better)
313
+ age_factor = max(0.0, min(1.0, (timestamp - cutoff_date).total_seconds() /
314
+ (datetime.now() - cutoff_date).total_seconds()))
315
+
316
+ # Calculate importance score - could be based on various factors
317
+ importance_factor = 1.0
318
+ # If document has been accessed often, increase importance
319
+ if hasattr(doc, "metadata") and doc.metadata and "access_count" in doc.metadata:
320
+ importance_factor += min(1.0, doc.metadata["access_count"] / 10)
321
+
322
+ # Create combined score (higher = more valuable to keep)
323
+ total_score = (0.7 * age_factor) + (0.3 * importance_factor)
324
+
325
+ # Add to priority queue (negative for max-heap behavior)
326
+ heapq.heappush(scored_docs, (-total_score, i, doc))
327
+
328
+ # Select top documents to keep
329
+ docs_to_keep = []
330
+ for _ in range(min(max_documents, len(scored_docs))):
331
+ if scored_docs:
332
+ _, _, doc = heapq.heappop(scored_docs)
333
+ docs_to_keep.append(doc)
334
+
335
+ # Only rebuild if we're actually pruning some documents
336
+ if len(docs_to_keep) < len(all_docs):
337
+ print(f"Memory management: Keeping {len(docs_to_keep)} documents out of {len(all_docs)}")
338
+
339
+ # Create a new vector store with the same type as the current one
340
+ vector_store_type = type(self.vector_store)
341
+
342
+ # Different approaches based on vector store type
343
+ if hasattr(vector_store_type, "from_documents"):
344
+ # Most langchain vector stores support this method
345
+ new_vector_store = vector_store_type.from_documents(
346
+ docs_to_keep,
347
+ embedding=self.embedding_model
348
+ )
349
+ self.vector_store = new_vector_store
350
+ print(f"Vector store rebuilt with {len(docs_to_keep)} documents")
351
+
352
+ elif hasattr(vector_store_type, "from_texts"):
353
+ # For vector stores that use from_texts
354
+ texts = [doc.page_content for doc in docs_to_keep]
355
+ metadatas = [doc.metadata if hasattr(doc, "metadata") else {} for doc in docs_to_keep]
356
+
357
+ new_vector_store = vector_store_type.from_texts(
358
+ texts=texts,
359
+ embedding=self.embedding_model,
360
+ metadatas=metadatas
361
+ )
362
+ self.vector_store = new_vector_store
363
+ print(f"Vector store rebuilt with {len(docs_to_keep)} documents")
364
+
365
+ else:
366
+ print("Warning: Could not determine how to rebuild the vector store")
367
+ print(f"Vector store type: {vector_store_type.__name__}")
368
+
369
+ def capture_tool_result(self, tool_name: str, tool_input: str, tool_output: str) -> None:
370
+ """
371
+ Capture knowledge gained from tool usage for future reference
372
+
373
+ Args:
374
+ tool_name: Name of the tool used
375
+ tool_input: Input/query sent to the tool
376
+ tool_output: Result returned by the tool
377
+ """
378
+
379
+ # Format the content
380
+ timestamp = datetime.now().isoformat()
381
+ content = (
382
+ f"Tool Knowledge\n"
383
+ f"Tool: {tool_name}\n"
384
+ f"Query: {tool_input}\n"
385
+ f"Result: {tool_output}\n"
386
+ f"Timestamp: {timestamp}"
387
+ )
388
+
389
+ # Create document with metadata
390
+ doc = Document(
391
+ page_content=content,
392
+ metadata={
393
+ "type": "tool_knowledge",
394
+ "tool": tool_name,
395
+ "timestamp": timestamp,
396
+ "query": tool_input
397
+ }
398
+ )
399
+
400
+ # Add to vector store
401
+ self.vector_store.add_documents([doc])
402
+ print(f"Captured knowledge from tool '{tool_name}' in vector store")
403
+
app.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- from agent import BasicAgent
7
  from dotenv import load_dotenv
8
  import os
9
 
@@ -17,7 +17,7 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
18
 
19
 
20
- def run_and_submit_all( profile: gr.OAuthProfile | None):
21
  """
22
  Fetches all questions, runs the BasicAgent on them, submits all answers,
23
  and displays the results.
@@ -38,7 +38,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
38
 
39
  # 1. Instantiate Agent ( modify this part to create your agent)
40
  try:
41
- agent = BasicAgent()
 
 
 
42
  except Exception as e:
43
  print(f"Error instantiating agent: {e}")
44
  return f"Error initializing agent: {e}", None
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from agent import BasicAgent, embeddings, vector_store,
7
  from dotenv import load_dotenv
8
  import os
9
 
 
17
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
18
 
19
 
20
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
21
  """
22
  Fetches all questions, runs the BasicAgent on them, submits all answers,
23
  and displays the results.
 
38
 
39
  # 1. Instantiate Agent ( modify this part to create your agent)
40
  try:
41
+ # llm = BasicAgent.get_llm("groq")
42
+ llm = BasicAgent.get_llm("openai_local")
43
+ agent = BasicAgent(embeddings, vector_store, llm)
44
+ print("Agent instantiated successfully.")
45
  except Exception as e:
46
  print(f"Error instantiating agent: {e}")
47
  return f"Error initializing agent: {e}", None
basic_tools.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langgraph.graph import START, StateGraph, MessagesState
4
+ from langgraph.prebuilt import tools_condition
5
+ from langgraph.prebuilt import ToolNode
6
+ # from langchain_google_genai import ChatGoogleGenerativeAI
7
+ # from langchain_groq import ChatGroq
8
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
9
+ from langchain_community.tools.tavily_search import TavilySearchResults
10
+ from langchain_community.document_loaders import WikipediaLoader
11
+ from langchain_community.document_loaders import ArxivLoader
12
+ from langchain_community.vectorstores import SupabaseVectorStore
13
+ from langchain_core.messages import SystemMessage, HumanMessage
14
+ from langchain_core.tools import tool
15
+ from langchain.tools.retriever import create_retriever_tool
16
+ from langchain.requests import RequestsWrapper
17
+
18
+ # from supabase.client import Client, create_client
19
+ # from langchain.tools.requests.toolkit import RequestsToolkit # Added for RequestsToolKit
20
+ from langchain_community.tools import RequestsPostTool, RequestsGetTool
21
+
22
+ load_dotenv()
23
+ requests_wrapper = RequestsWrapper()
24
+
25
+ @tool
26
+ def multiply(a: int, b: int) -> int:
27
+ """Multiply two numbers.
28
+ Args:
29
+ a: first int
30
+ b: second int
31
+ """
32
+ return a * b
33
+
34
+
35
+ @tool
36
+ def add(a: int, b: int) -> int:
37
+ """Add two numbers.
38
+
39
+ Args:
40
+ a: first int
41
+ b: second int
42
+ """
43
+ return a + b
44
+
45
+
46
+ @tool
47
+ def subtract(a: int, b: int) -> int:
48
+ """Subtract two numbers.
49
+
50
+ Args:
51
+ a: first int
52
+ b: second int
53
+ """
54
+ return a - b
55
+
56
+
57
+ @tool
58
+ def divide(a: int, b: int) -> float:
59
+ """Divide two numbers.
60
+
61
+ Args:
62
+ a: first int
63
+ b: second int
64
+ """
65
+ if b == 0:
66
+ raise ValueError("Cannot divide by zero.")
67
+ return a / b
68
+
69
+
70
+ @tool
71
+ def modulus(a: int, b: int) -> int:
72
+ """Get the modulus of two numbers.
73
+
74
+ Args:
75
+ a: first int
76
+ b: second int
77
+ """
78
+ return a % b
79
+
80
+
81
+ @tool
82
+ def wiki_search(query: str) -> str:
83
+ """Search Wikipedia for a query and return maximum 2 results.
84
+
85
+ Args:
86
+ query: The search query."""
87
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
88
+ formatted_search_docs = "\n\n---\n\n".join(
89
+ [
90
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
91
+ for doc in search_docs
92
+ ])
93
+ return formatted_search_docs
94
+
95
+
96
+ @tool
97
+ def web_search(query: str) -> str:
98
+ """Search Tavily for a query and return maximum 3 results.
99
+
100
+ Args:
101
+ query: The search query."""
102
+ search_docs = TavilySearchResults(max_results=3).invoke(input=query)
103
+ formatted_search_docs = "\n\n---\n\n".join(
104
+ [
105
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
106
+ for doc in search_docs
107
+ ])
108
+ return formatted_search_docs
109
+
110
+ @tool
111
+ def arxiv_search(query: str) -> str:
112
+ """Search Arxiv for a query and return maximum 3 result.
113
+
114
+ Args:
115
+ query: The search query."""
116
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
117
+ formatted_search_docs = "\n\n---\n\n".join(
118
+ [
119
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
120
+ for doc in search_docs
121
+ ])
122
+ return formatted_search_docs
123
+
124
+
125
+ # @tool
126
+ # def requests_get(url: str, params: dict = {}) -> str:
127
+ # """Perform an HTTP GET request using LangChain's RequestsToolKit.
128
+
129
+ # Args:
130
+ # url: The URL to send the GET request to.
131
+ # params: Optional dictionary of query parameters.
132
+
133
+ # Returns:
134
+ # The response content as text.
135
+ # """
136
+
137
+ # toolkit = RequestsGetTool(requests_wrapper=requests_wrapper)
138
+ # # The get method is expected to return a response-like object.
139
+ # response = toolkit.run(url, params=params)
140
+ # return response.text
141
+
142
+ # Adding request toolkits
143
+ requests_get = RequestsGetTool(requests_wrapper=requests_wrapper)
144
+ requests_post = RequestsPostTool(requests_wrapper=requests_wrapper)
145
+
146
+ # @tool
147
+ # def requests_post(url: str, data: dict = {}, json: dict = {}, headers: dict = {}) -> str:
148
+ # """Perform an HTTP POST request using LangChain's RequestsToolKit.
149
+
150
+ # Args:
151
+ # url: The URL to send the POST request to.
152
+ # data: Optional dictionary of form data.
153
+ # json: Optional dictionary to send as JSON.
154
+ # headers: Optional dictionary of HTTP headers.
155
+
156
+ # Returns:
157
+ # The response content as text.
158
+ # """
159
+ # toolkit = RequestsPostTool(requests_wrapper=requests_wrapper)
160
+ # response = toolkit.run(url, data=data, json=json, headers=headers)
161
+ # return response.text
162
+
163
+
164
+
system_prompt.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a helpful assistant tasked with answering questions using a set of tools.
2
+
3
+ Your final answer must strictly follow this format:
4
+ FINAL ANSWER: [ANSWER]
5
+
6
+ Only write the answer in that exact format. Do not explain anything. Do not include any other text.
7
+
8
+ If you are provided with a similar question and its final answer, and the current question is **exactly the same**, then simply return the same final answer without using any tools.
9
+
10
+ Only use tools if the current question is different from the similar one.
11
+
12
+ Examples:
13
+ - FINAL ANSWER: FunkMonk
14
+ - FINAL ANSWER: Paris
15
+ - FINAL ANSWER: 128
16
+
17
+ If you do not follow this format exactly, your response will be considered incorrect.