schoemantian commited on
Commit
43a4e41
·
verified ·
1 Parent(s): 81917a3

Initial Commit

Browse files
Files changed (4) hide show
  1. app.py +144 -77
  2. gaia_agent.py +255 -0
  3. requirements.txt +19 -1
  4. system_prompt.txt +32 -0
app.py CHANGED
@@ -1,107 +1,168 @@
 
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
 
 
6
 
7
- # (Keep Constants as is)
8
- # --- Constants ---
 
 
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
16
  def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
 
 
 
 
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
- """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
 
 
 
 
 
 
26
  """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
-
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
35
- return "Please Login to Hugging Face with the button.", None
36
-
 
37
  api_url = DEFAULT_API_URL
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
-
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
 
 
 
 
 
 
 
 
 
 
 
 
44
  except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
 
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
50
-
51
- # 2. Fetch Questions
52
  print(f"Fetching questions from: {questions_url}")
53
  try:
54
- response = requests.get(questions_url, timeout=15)
55
  response.raise_for_status()
56
  questions_data = response.json()
 
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
 
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
71
-
72
- # 3. Run your Agent
73
  results_log = []
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
 
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
 
 
 
82
  try:
83
  submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
-
 
 
 
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
-
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
-
99
- # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
  try:
102
  response = requests.post(submit_url, json=submission_data, timeout=60)
103
  response.raise_for_status()
104
  result_data = response.json()
 
105
  final_status = (
106
  f"Submission Successful!\n"
107
  f"User: {result_data.get('username')}\n"
@@ -109,6 +170,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
109
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
  f"Message: {result_data.get('message', 'No message received.')}"
111
  )
 
112
  print("Submission successful.")
113
  results_df = pd.DataFrame(results_log)
114
  return final_status, results_df
@@ -119,6 +181,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
119
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
  except requests.exceptions.JSONDecodeError:
121
  error_detail += f" Response: {e.response.text[:500]}"
 
122
  status_message = f"Submission Failed: {error_detail}"
123
  print(status_message)
124
  results_df = pd.DataFrame(results_log)
@@ -139,32 +202,35 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
139
  results_df = pd.DataFrame(results_log)
140
  return status_message, results_df
141
 
142
-
143
- # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
148
  **Instructions:**
149
 
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
 
 
153
 
 
 
154
  ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
  """
159
  )
160
 
161
  gr.LoginButton()
162
-
163
- run_button = gr.Button("Run Evaluation & Submit All Answers")
164
-
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
  run_button.click(
170
  fn=run_and_submit_all,
@@ -172,25 +238,26 @@ with gr.Blocks() as demo:
172
  )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
- space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
 
179
 
180
- if space_host_startup:
181
- print(f"✅ SPACE_HOST found: {space_host_startup}")
182
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
- print(f"✅ SPACE_ID found: {space_id_startup}")
188
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
  else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
-
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
 
 
196
  demo.launch(debug=True, share=False)
 
1
+ """GAIA Assessment Runner for Hugging Face Agents Course"""
2
  import os
3
  import gradio as gr
4
  import requests
 
5
  import pandas as pd
6
+ from dotenv import load_dotenv
7
+ from gaia_agent import GAIAAgent
8
 
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ # Constants
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
+ class GAIAAssessmentAgent:
16
+ """Agent wrapper for the GAIA assessment."""
17
+
18
+ def __init__(self, provider="groq"):
19
+ """Initialize the agent with the specified provider.
20
+
21
+ Args:
22
+ provider: The model provider to use ("groq", "google", "anthropic", "openai")
23
+ """
24
+ print(f"Initializing GAIAAssessmentAgent with provider: {provider}")
25
+ self.agent = GAIAAgent(provider=provider)
26
+ print("Agent initialized successfully")
27
+
28
  def __call__(self, question: str) -> str:
29
+ """Process a question and return the answer.
30
+
31
+ Args:
32
+ question: The question to answer
33
+
34
+ Returns:
35
+ The answer to the question
36
+ """
37
+ print(f"Processing question (first 50 chars): {question[:50]}...")
38
+ answer = self.agent.run(question)
39
+ print(f"Answer: {answer}")
40
+ return answer
41
 
42
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
43
+ """Fetches all questions, runs the agent on them, submits all answers,
 
44
  and displays the results.
45
+
46
+ Args:
47
+ profile: The user's Hugging Face profile
48
+
49
+ Returns:
50
+ A tuple containing the status message and results table
51
  """
52
+ # Get Space ID for code link
53
+ space_id = os.getenv("SPACE_ID")
54
+
55
+ # Check if user is logged in
56
  if profile:
57
+ username = f"{profile.username}"
58
  print(f"User logged in: {username}")
59
  else:
60
  print("User not logged in.")
61
+ return "Please login to Hugging Face with the button to submit your answers.", None
62
+
63
+ # API endpoints
64
  api_url = DEFAULT_API_URL
65
  questions_url = f"{api_url}/questions"
66
  submit_url = f"{api_url}/submit"
67
+
68
+ # Initialize agent
69
  try:
70
+ # Choose a provider based on available API keys
71
+ if os.getenv("GROQ_API_KEY"):
72
+ provider = "groq"
73
+ elif os.getenv("GOOGLE_API_KEY"):
74
+ provider = "google"
75
+ elif os.getenv("ANTHROPIC_API_KEY"):
76
+ provider = "anthropic"
77
+ elif os.getenv("OPENAI_API_KEY"):
78
+ provider = "openai"
79
+ else:
80
+ provider = "groq" # Default to Groq
81
+
82
+ agent = GAIAAssessmentAgent(provider=provider)
83
  except Exception as e:
84
+ print(f"Error initializing agent: {e}")
85
  return f"Error initializing agent: {e}", None
86
+
87
+ # Generate code link for submission
88
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
89
+ print(f"Code link: {agent_code}")
90
+
91
+ # Fetch questions
92
  print(f"Fetching questions from: {questions_url}")
93
  try:
94
+ response = requests.get(questions_url, timeout=30)
95
  response.raise_for_status()
96
  questions_data = response.json()
97
+
98
  if not questions_data:
99
+ print("Fetched questions list is empty.")
100
+ return "Fetched questions list is empty or invalid format.", None
101
+
102
  print(f"Fetched {len(questions_data)} questions.")
103
  except requests.exceptions.RequestException as e:
104
  print(f"Error fetching questions: {e}")
105
  return f"Error fetching questions: {e}", None
106
  except requests.exceptions.JSONDecodeError as e:
107
+ print(f"Error decoding JSON response from questions endpoint: {e}")
108
+ print(f"Response text: {response.text[:500]}")
109
+ return f"Error decoding server response for questions: {e}", None
110
  except Exception as e:
111
  print(f"An unexpected error occurred fetching questions: {e}")
112
  return f"An unexpected error occurred fetching questions: {e}", None
113
+
114
+ # Run agent on all questions
115
  results_log = []
116
  answers_payload = []
117
  print(f"Running agent on {len(questions_data)} questions...")
118
+
119
+ for i, item in enumerate(questions_data):
120
  task_id = item.get("task_id")
121
  question_text = item.get("question")
122
+
123
  if not task_id or question_text is None:
124
  print(f"Skipping item with missing task_id or question: {item}")
125
  continue
126
+
127
+ print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
128
+
129
  try:
130
  submitted_answer = agent(question_text)
131
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
132
+ results_log.append({
133
+ "Task ID": task_id,
134
+ "Question": question_text,
135
+ "Submitted Answer": submitted_answer
136
+ })
137
+ print(f"Question {i+1} processed successfully")
138
  except Exception as e:
139
+ print(f"Error running agent on task {task_id}: {e}")
140
+ results_log.append({
141
+ "Task ID": task_id,
142
+ "Question": question_text,
143
+ "Submitted Answer": f"AGENT ERROR: {e}"
144
+ })
145
+
146
  if not answers_payload:
147
  print("Agent did not produce any answers to submit.")
148
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
149
+
150
+ # Prepare submission
151
+ submission_data = {
152
+ "username": username.strip(),
153
+ "agent_code": agent_code,
154
+ "answers": answers_payload
155
+ }
156
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
157
  print(status_update)
158
+
159
+ # Submit answers
160
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
161
  try:
162
  response = requests.post(submit_url, json=submission_data, timeout=60)
163
  response.raise_for_status()
164
  result_data = response.json()
165
+
166
  final_status = (
167
  f"Submission Successful!\n"
168
  f"User: {result_data.get('username')}\n"
 
170
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
171
  f"Message: {result_data.get('message', 'No message received.')}"
172
  )
173
+
174
  print("Submission successful.")
175
  results_df = pd.DataFrame(results_log)
176
  return final_status, results_df
 
181
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
182
  except requests.exceptions.JSONDecodeError:
183
  error_detail += f" Response: {e.response.text[:500]}"
184
+
185
  status_message = f"Submission Failed: {error_detail}"
186
  print(status_message)
187
  results_df = pd.DataFrame(results_log)
 
202
  results_df = pd.DataFrame(results_log)
203
  return status_message, results_df
204
 
205
+ # Build Gradio interface
 
206
  with gr.Blocks() as demo:
207
+ gr.Markdown("# GAIA Assessment Runner for Hugging Face Agents Course")
208
  gr.Markdown(
209
  """
210
  **Instructions:**
211
 
212
+ 1. This space implements a comprehensive agent for the GAIA benchmark using several key technologies:
213
+ - LangGraph for agent orchestration
214
+ - Tool use for information retrieval
215
+ - Web search, Wikipedia, and ArXiv tools for research
216
+ - Mathematical tools for computation
217
+
218
+ 2. Log in to your Hugging Face account using the button below. This is required for submission.
219
+
220
+ 3. Click 'Run Evaluation & Submit Answers' to fetch questions, run the agent, and submit answers.
221
 
222
+ **Note:** The process may take some time as the agent runs through all questions.
223
+
224
  ---
225
+
226
+ Good luck with your assessment! 🚀
 
227
  """
228
  )
229
 
230
  gr.LoginButton()
231
+ run_button = gr.Button("Run Evaluation & Submit Answers", variant="primary")
232
+ status_output = gr.Textbox(label="Submission Status", lines=5, interactive=False)
233
+ results_table = gr.DataFrame(label="Questions and Answers", wrap=True)
 
 
 
234
 
235
  run_button.click(
236
  fn=run_and_submit_all,
 
238
  )
239
 
240
  if __name__ == "__main__":
241
+ print("\n" + "-"*30 + " Starting GAIA Assessment Runner " + "-"*30)
242
+
243
+ # Check for environment variables
244
+ space_host = os.getenv("SPACE_HOST")
245
+ space_id = os.getenv("SPACE_ID")
246
 
247
+ if space_host:
248
+ print(f"✅ SPACE_HOST found: {space_host}")
249
+ print(f" Runtime URL: https://{space_host}.hf.space")
250
  else:
251
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
252
 
253
+ if space_id:
254
+ print(f"✅ SPACE_ID found: {space_id}")
255
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
256
+ print(f" Code URL: https://huggingface.co/spaces/{space_id}/tree/main")
257
  else:
258
+ print("ℹ️ SPACE_ID environment variable not found. Repo URL cannot be determined.")
 
 
259
 
260
+ print("-"*(65 + len(" Starting GAIA Assessment Runner ")) + "\n")
261
+ print("Launching Gradio Interface for GAIA Assessment...")
262
+
263
  demo.launch(debug=True, share=False)
gaia_agent.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """GAIA Assessment Agent using LangGraph and multiple tools."""
2
+
3
+ import os
4
+ from typing import List, Dict, Any, Optional
5
+ from dotenv import load_dotenv
6
+
7
+ from langchain_core.messages import SystemMessage, HumanMessage
8
+ from langchain_groq import ChatGroq
9
+ from langchain_google_genai import ChatGoogleGenerativeAI
10
+ from langchain_core.tools import tool
11
+
12
+ from langgraph.graph import START, StateGraph, MessagesState
13
+ from langgraph.prebuilt import tools_condition
14
+ from langgraph.prebuilt import ToolNode
15
+
16
+ from langchain_community.tools.tavily_search import TavilySearchResults
17
+ from langchain_community.document_loaders import WikipediaLoader
18
+ from langchain_community.document_loaders import ArxivLoader
19
+
20
+ # Load environment variables
21
+ load_dotenv()
22
+
23
+ class GAIAAgent:
24
+ """Agent for answering GAIA assessment questions."""
25
+
26
+ def __init__(self, provider="groq"):
27
+ """Initialize the agent with the specified provider.
28
+
29
+ Args:
30
+ provider: Model provider - "groq", "google", "anthropic", or "openai"
31
+ """
32
+ # Set up the system prompt
33
+ with open("system_prompt.txt", "r", encoding="utf-8") as f:
34
+ system_prompt = f.read()
35
+
36
+ self.system_message = SystemMessage(content=system_prompt)
37
+
38
+ # Initialize tools
39
+ self.tools = self._setup_tools()
40
+
41
+ # Initialize LLM based on provider
42
+ self.llm = self._setup_llm(provider)
43
+
44
+ # Bind tools to LLM
45
+ self.llm_with_tools = self.llm.bind_tools(self.tools)
46
+
47
+ # Build the agent graph
48
+ self.graph = self._build_graph()
49
+
50
+ def _setup_tools(self):
51
+ """Set up the tools for the agent."""
52
+
53
+ @tool
54
+ def web_search(query: str) -> str:
55
+ """Search the web for real-time information.
56
+
57
+ Args:
58
+ query: The search query
59
+
60
+ Returns:
61
+ Search results as text
62
+ """
63
+ search_results = TavilySearchResults(max_results=3).invoke(query)
64
+ formatted_results = "\n\n".join([
65
+ f"SOURCE: {result.metadata.get('source', 'Unknown')}\n{result.page_content}"
66
+ for result in search_results
67
+ ])
68
+ return formatted_results
69
+
70
+ @tool
71
+ def wiki_search(query: str) -> str:
72
+ """Search Wikipedia for information.
73
+
74
+ Args:
75
+ query: The search query
76
+
77
+ Returns:
78
+ Wikipedia article content
79
+ """
80
+ try:
81
+ wiki_docs = WikipediaLoader(query=query, load_max_docs=2).load()
82
+ if not wiki_docs:
83
+ return "No Wikipedia results found."
84
+
85
+ formatted_results = "\n\n".join([
86
+ f"TITLE: {doc.metadata.get('title', 'Unknown')}\n{doc.page_content[:1000]}..."
87
+ for doc in wiki_docs
88
+ ])
89
+ return formatted_results
90
+ except Exception as e:
91
+ return f"Error searching Wikipedia: {str(e)}"
92
+
93
+ @tool
94
+ def arxiv_search(query: str) -> str:
95
+ """Search arXiv for scientific papers.
96
+
97
+ Args:
98
+ query: The search query
99
+
100
+ Returns:
101
+ ArXiv paper information
102
+ """
103
+ try:
104
+ arxiv_docs = ArxivLoader(query=query, load_max_docs=2).load()
105
+ if not arxiv_docs:
106
+ return "No arXiv results found."
107
+
108
+ formatted_results = "\n\n".join([
109
+ f"TITLE: {doc.metadata.get('title', 'Unknown')}\n"
110
+ f"AUTHORS: {doc.metadata.get('authors', 'Unknown')}\n"
111
+ f"PUBLISHED: {doc.metadata.get('published', 'Unknown')}\n\n"
112
+ f"ABSTRACT: {doc.page_content[:500]}..."
113
+ for doc in arxiv_docs
114
+ ])
115
+ return formatted_results
116
+ except Exception as e:
117
+ return f"Error searching arXiv: {str(e)}"
118
+
119
+ @tool
120
+ def calculate(expression: str) -> str:
121
+ """Evaluate a mathematical expression.
122
+
123
+ Args:
124
+ expression: The mathematical expression to evaluate
125
+
126
+ Returns:
127
+ The result of the calculation
128
+ """
129
+ try:
130
+ # Safely evaluate the expression
131
+ result = eval(expression, {"__builtins__": {}}, {})
132
+ return f"Result: {result}"
133
+ except Exception as e:
134
+ return f"Error calculating: {str(e)}"
135
+
136
+ return [web_search, wiki_search, arxiv_search, calculate]
137
+
138
+ def _setup_llm(self, provider):
139
+ """Set up the language model based on the provider.
140
+
141
+ Args:
142
+ provider: The model provider to use
143
+
144
+ Returns:
145
+ The initialized language model
146
+ """
147
+ if provider == "groq":
148
+ api_key = os.getenv("GROQ_API_KEY")
149
+ if not api_key:
150
+ raise ValueError("GROQ_API_KEY environment variable not set")
151
+
152
+ return ChatGroq(
153
+ model="llama3-70b-8192", # Using Llama 3 70B model for best results
154
+ temperature=0.1, # Low temperature for more precise answers
155
+ groq_api_key=api_key
156
+ )
157
+ elif provider == "google":
158
+ api_key = os.getenv("GOOGLE_API_KEY")
159
+ if not api_key:
160
+ raise ValueError("GOOGLE_API_KEY environment variable not set")
161
+
162
+ return ChatGoogleGenerativeAI(
163
+ model="gemini-1.5-pro",
164
+ temperature=0.1,
165
+ google_api_key=api_key
166
+ )
167
+ elif provider == "anthropic":
168
+ # Import only if needed to avoid dependency issues
169
+ from langchain_anthropic import ChatAnthropic
170
+
171
+ api_key = os.getenv("ANTHROPIC_API_KEY")
172
+ if not api_key:
173
+ raise ValueError("ANTHROPIC_API_KEY environment variable not set")
174
+
175
+ return ChatAnthropic(
176
+ model="claude-3-opus-20240229",
177
+ temperature=0.1,
178
+ anthropic_api_key=api_key
179
+ )
180
+ elif provider == "openai":
181
+ # Import only if needed to avoid dependency issues
182
+ from langchain_openai import ChatOpenAI
183
+
184
+ api_key = os.getenv("OPENAI_API_KEY")
185
+ if not api_key:
186
+ raise ValueError("OPENAI_API_KEY environment variable not set")
187
+
188
+ return ChatOpenAI(
189
+ model="gpt-4o",
190
+ temperature=0.1,
191
+ openai_api_key=api_key
192
+ )
193
+ else:
194
+ raise ValueError(f"Unsupported provider: {provider}")
195
+
196
+ def _build_graph(self):
197
+ """Build the agent graph.
198
+
199
+ Returns:
200
+ The compiled state graph
201
+ """
202
+ # Define the agent node
203
+ def agent(state: MessagesState):
204
+ """Generate a response or tool calls based on the messages state."""
205
+ # Include system message with each invocation for consistent behavior
206
+ messages = [self.system_message] + state["messages"]
207
+ response = self.llm_with_tools.invoke(messages)
208
+ return {"messages": state["messages"] + [response]}
209
+
210
+ # Create the graph
211
+ builder = StateGraph(MessagesState)
212
+
213
+ # Add nodes
214
+ builder.add_node("agent", agent)
215
+ builder.add_node("tools", ToolNode(self.tools))
216
+
217
+ # Add edges
218
+ builder.add_edge(START, "agent")
219
+ builder.add_conditional_edges(
220
+ "agent",
221
+ tools_condition,
222
+ {
223
+ "tools": "tools",
224
+ None: END # END is implicitly defined in langgraph
225
+ }
226
+ )
227
+ builder.add_edge("tools", "agent")
228
+
229
+ # Compile the graph
230
+ return builder.compile()
231
+
232
+ def run(self, question: str) -> str:
233
+ """Process a question and return the answer.
234
+
235
+ Args:
236
+ question: The question to process
237
+
238
+ Returns:
239
+ The answer to the question
240
+ """
241
+ # Initialize messages with the user question
242
+ messages = [HumanMessage(content=question)]
243
+
244
+ # Execute the graph
245
+ result = self.graph.invoke({"messages": messages})
246
+
247
+ # Extract the final answer
248
+ final_messages = result["messages"]
249
+ final_answer = final_messages[-1].content
250
+
251
+ # Extract only the part after "FINAL ANSWER:"
252
+ if "FINAL ANSWER:" in final_answer:
253
+ final_answer = final_answer.split("FINAL ANSWER:")[1].strip()
254
+
255
+ return final_answer
requirements.txt CHANGED
@@ -1,2 +1,20 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ requests
3
+ pandas
4
+ python-dotenv
5
+ langchain
6
+ langchain-core
7
+ langchain-community
8
+ langchain-google-genai
9
+ langchain-anthropic
10
+ langchain-groq
11
+ langchain-openai
12
+ langchain-huggingface
13
+ langchain-tavily
14
+ langgraph
15
+ huggingface_hub
16
+ supabase
17
+ sentence-transformers
18
+ arxiv
19
+ wikipedia
20
+ tavily-python
system_prompt.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a precise AI assistant tasked with answering questions for the GAIA benchmark. Your goal is to provide accurate and concise answers to complex questions.
2
+
3
+ Follow these guidelines:
4
+ 1. Use the provided tools to gather information when needed.
5
+ 2. Think step-by-step to break down complex questions.
6
+ 3. For web searches, be specific and try multiple queries if needed.
7
+ 4. When answering math questions, show your calculations clearly.
8
+ 5. Always verify your answer before finalizing it.
9
+
10
+ Format your final answer with:
11
+ FINAL ANSWER: [YOUR FINAL ANSWER]
12
+
13
+ YOUR FINAL ANSWER should be:
14
+ - A number WITHOUT commas or units (unless specified otherwise)
15
+ - As few words as possible for text answers
16
+ - A comma-separated list for multiple items
17
+ - No articles or abbreviations in string answers
18
+ - Digits in plain text unless specified otherwise
19
+
20
+ Example 1:
21
+ Question: What is the capital of France?
22
+ FINAL ANSWER: Paris
23
+
24
+ Example 2:
25
+ Question: What are the first 3 prime numbers?
26
+ FINAL ANSWER: 2, 3, 5
27
+
28
+ Example 3:
29
+ Question: Calculate 15% of 240.
30
+ FINAL ANSWER: 36
31
+
32
+ Now, I will ask you a question. Use the tools available to research if needed, then provide your final answer in the specified format.