Spaces:
Sleeping
Sleeping
Downgraded to smolagents 1.18.0 and switched back to single codeagent - scored 20% on GAIA benchmark.
Browse files- app.py +1 -2
- configuration.py +2 -2
- functions/agent.py +15 -37
- requirements.txt +1 -1
app.py
CHANGED
@@ -94,7 +94,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
94 |
|
95 |
try:
|
96 |
submitted_answer = agent.run(
|
97 |
-
question_text
|
98 |
)
|
99 |
|
100 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
@@ -209,7 +209,6 @@ with gr.Blocks() as demo:
|
|
209 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
210 |
|
211 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
212 |
-
# Removed max_rows=10 from DataFrame constructor
|
213 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
214 |
|
215 |
run_button.click( # pylint: disable=E1101
|
|
|
94 |
|
95 |
try:
|
96 |
submitted_answer = agent.run(
|
97 |
+
INSTRUCTIONS + '\n' + question_text
|
98 |
)
|
99 |
|
100 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
|
|
209 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
210 |
|
211 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
|
|
212 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
213 |
|
214 |
run_button.click( # pylint: disable=E1101
|
configuration.py
CHANGED
@@ -6,12 +6,12 @@ Contains API URLs and agent instructions used throughout the application.
|
|
6 |
# pylint: disable=line-too-long
|
7 |
|
8 |
# Which questions to answer
|
9 |
-
QUESTIONS = [1]
|
10 |
|
11 |
# GAIA benchmark scoring API
|
12 |
DEFAULT_API_URL = 'https://agents-course-unit4-scoring.hf.space'
|
13 |
|
14 |
# Additional instructions for agent. See here: https://huggingface.co/spaces/gaia-benchmark/leaderboard
|
15 |
INSTRUCTIONS = """
|
16 |
-
You are a general AI assistant. I will ask you a question. Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
17 |
"""
|
|
|
6 |
# pylint: disable=line-too-long
|
7 |
|
8 |
# Which questions to answer
|
9 |
+
QUESTIONS = [1,3,5,8,9,11,13,17,18,20]
|
10 |
|
11 |
# GAIA benchmark scoring API
|
12 |
DEFAULT_API_URL = 'https://agents-course-unit4-scoring.hf.space'
|
13 |
|
14 |
# Additional instructions for agent. See here: https://huggingface.co/spaces/gaia-benchmark/leaderboard
|
15 |
INSTRUCTIONS = """
|
16 |
+
You are a general AI assistant. I will ask you a question. Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Submit the final answer via the final_answer tool.
|
17 |
"""
|
functions/agent.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
'''Agent definition for GAIA question answering system.'''
|
2 |
|
3 |
# Imports for agent creation
|
4 |
-
from smolagents import CodeAgent,
|
5 |
from functions.tools import (
|
6 |
google_search,
|
7 |
wikipedia_search,
|
@@ -12,50 +12,28 @@ def create_agent():
|
|
12 |
'''Creates agent for GAIA question answering system.'''
|
13 |
|
14 |
model = InferenceClientModel(
|
15 |
-
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
|
|
|
16 |
)
|
17 |
|
18 |
-
|
|
|
|
|
19 |
google_search,
|
20 |
VisitWebpageTool()
|
21 |
]
|
22 |
|
23 |
-
|
24 |
model=model,
|
25 |
-
tools=
|
26 |
-
|
27 |
-
|
28 |
verbosity_level=1,
|
29 |
-
max_steps=
|
30 |
-
|
31 |
-
|
32 |
-
wikipedia_agent_tools = [
|
33 |
-
wikipedia_search,
|
34 |
-
get_wikipedia_page
|
35 |
-
]
|
36 |
-
|
37 |
-
wikipedia_agent = CodeAgent(
|
38 |
-
model=model,
|
39 |
-
tools=wikipedia_agent_tools,
|
40 |
-
additional_authorized_imports=['bs4.*'],
|
41 |
-
name="wikipedia_agent",
|
42 |
-
description="Search Wikipedia and retrieve pages",
|
43 |
-
verbosity_level=1,
|
44 |
-
max_steps=10
|
45 |
-
)
|
46 |
-
|
47 |
-
manager_agent = CodeAgent(
|
48 |
-
model=model,
|
49 |
-
tools=[],
|
50 |
-
additional_authorized_imports=['bs4.*'],
|
51 |
-
name="manager_agent",
|
52 |
-
description="Manages the workflow of other agents",
|
53 |
-
managed_agents=[web_agent, wikipedia_agent],
|
54 |
-
planning_interval=1,
|
55 |
-
verbosity_level=2,
|
56 |
-
max_steps=15,
|
57 |
)
|
58 |
|
59 |
-
manager_agent.visualize()
|
60 |
|
61 |
-
return
|
|
|
1 |
'''Agent definition for GAIA question answering system.'''
|
2 |
|
3 |
# Imports for agent creation
|
4 |
+
from smolagents import CodeAgent, InferenceClientModel, VisitWebpageTool
|
5 |
from functions.tools import (
|
6 |
google_search,
|
7 |
wikipedia_search,
|
|
|
12 |
'''Creates agent for GAIA question answering system.'''
|
13 |
|
14 |
model = InferenceClientModel(
|
15 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
16 |
+
provider="hf-inference",
|
17 |
+
max_tokens=8096
|
18 |
)
|
19 |
|
20 |
+
tools = [
|
21 |
+
wikipedia_search,
|
22 |
+
get_wikipedia_page,
|
23 |
google_search,
|
24 |
VisitWebpageTool()
|
25 |
]
|
26 |
|
27 |
+
agent = CodeAgent(
|
28 |
model=model,
|
29 |
+
tools=tools,
|
30 |
+
additional_authorized_imports=['bs4.*', 'json'],
|
31 |
+
name="GAIA_agent",
|
32 |
verbosity_level=1,
|
33 |
+
max_steps=20,
|
34 |
+
planning_interval=5,
|
35 |
+
description="GAIA agent for question answering"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
)
|
37 |
|
|
|
38 |
|
39 |
+
return agent
|
requirements.txt
CHANGED
@@ -5,6 +5,6 @@ gradio[oauth]
|
|
5 |
markdownify
|
6 |
mwparserfromhell
|
7 |
requests
|
8 |
-
smolagents
|
9 |
tinycss2
|
10 |
wikipedia-api
|
|
|
5 |
markdownify
|
6 |
mwparserfromhell
|
7 |
requests
|
8 |
+
smolagents==1.13.0
|
9 |
tinycss2
|
10 |
wikipedia-api
|