GAIA-Assessment-Agent

Sleeping

App Files Files Community

schoemantian commited on Apr 29

Commit

a64f076

verified ·

1 Parent(s): 31b01d1

Fix Errors in responses

Browse files

Files changed (3) hide show

app.py +74 -131
gaia_agent.py +156 -172
system_prompt.txt +16 -21

app.py CHANGED Viewed

@@ -6,163 +6,114 @@ import pandas as pd
 from dotenv import load_dotenv
 from gaia_agent import GAIAAgent
-# Load environment variables
 load_dotenv()
-# Constants
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-class GAIAAssessmentAgent:
-    """Agent wrapper for the GAIA assessment."""
-    def __init__(self, provider="groq"):
-        """Initialize the agent with the specified provider.
-        Args:
-            provider: The model provider to use ("groq", "google", "anthropic", "openai")
-        """
-        print(f"Initializing GAIAAssessmentAgent with provider: {provider}")
         self.agent = GAIAAgent(provider=provider)
-        print("Agent initialized successfully")
     def __call__(self, question: str) -> str:
-        """Process a question and return the answer.
-        Args:
-            question: The question to answer
-        Returns:
-            The answer to the question
-        """
-        print(f"Processing question (first 50 chars): {question[:50]}...")
-        answer = self.agent.run(question)
-        print(f"Answer: {answer}")
-        return answer
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """Fetches all questions, runs the agent on them, submits all answers,
     and displays the results.
-    Args:
-        profile: The user's Hugging Face profile
-    Returns:
-        A tuple containing the status message and results table
     """
-    # Get Space ID for code link
-    space_id = os.getenv("SPACE_ID")
-    # Check if user is logged in
     if profile:
-        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
-        return "Please login to Hugging Face with the button to submit your answers.", None
-    # API endpoints
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # Initialize agent
     try:
-        # Choose a provider based on available API keys
-        if os.getenv("GROQ_API_KEY"):
-            provider = "groq"
-        elif os.getenv("GOOGLE_API_KEY"):
-            provider = "google"
-        elif os.getenv("ANTHROPIC_API_KEY"):
-            provider = "anthropic"
-        elif os.getenv("OPENAI_API_KEY"):
-            provider = "openai"
-        else:
-            provider = "groq"  # Default to Groq
-        agent = GAIAAssessmentAgent(provider=provider)
     except Exception as e:
-        print(f"Error initializing agent: {e}")
         return f"Error initializing agent: {e}", None
-    # Generate code link for submission
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(f"Code link: {agent_code}")
-    # Fetch questions
     print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-            print("Fetched questions list is empty.")
-            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-        print(f"Error decoding JSON response from questions endpoint: {e}")
-        print(f"Response text: {response.text[:500]}")
-        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # Run agent on all questions
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": submitted_answer
-            })
-            print(f"Question {i+1} processed successfully")
         except Exception as e:
-            print(f"Error running agent on task {task_id}: {e}")
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": f"AGENT ERROR: {e}"
-            })
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # Prepare submission
-    submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
-        "answers": answers_payload
-    }
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
-    # Submit answers
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
@@ -170,7 +121,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
@@ -181,7 +131,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
         except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
@@ -202,33 +151,29 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-# Build Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# GAIA Assessment Runner for Hugging Face Agents Course")
     gr.Markdown(
         """
         **Instructions:**
-        1. This space implements a comprehensive agent for the GAIA benchmark using several key technologies:
-           - LangGraph for agent orchestration
-           - Tool use for information retrieval
-           - Web search, Wikipedia, and ArXiv tools for research
-           - Mathematical tools for computation
-        2. Log in to your Hugging Face account using the button below. This is required for submission.
-        3. Click 'Run Evaluation & Submit Answers' to fetch questions, run the agent, and submit answers.
-        **Note:** The process may take some time as the agent runs through all questions.
-        ---
-        Good luck with your assessment! 🚀
         """
     )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit Answers", variant="primary")
     status_output = gr.Textbox(label="Submission Status", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Answers", wrap=True)
@@ -238,26 +183,24 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " Starting GAIA Assessment Runner " + "-"*30)
-    # Check for environment variables
-    space_host = os.getenv("SPACE_HOST")
-    space_id = os.getenv("SPACE_ID")
-    if space_host:
-        print(f"✅ SPACE_HOST found: {space_host}")
-        print(f"   Runtime URL: https://{space_host}.hf.space")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id:
-        print(f"✅ SPACE_ID found: {space_id}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id}")
-        print(f"   Code URL: https://huggingface.co/spaces/{space_id}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found. Repo URL cannot be determined.")
-    print("-"*(65 + len(" Starting GAIA Assessment Runner ")) + "\n")
     print("Launching Gradio Interface for GAIA Assessment...")
     demo.launch(debug=True, share=False)

 from dotenv import load_dotenv
 from gaia_agent import GAIAAgent
 load_dotenv()
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+class BasicAgent:
+    """A simple wrapper for the GAIA Agent."""
+    def __init__(self):
+        print("BasicAgent initialized.")
+        if os.getenv("GROQ_API_KEY"):
+            provider = "groq"
+        elif os.getenv("GOOGLE_API_KEY"):
+            provider = "google"
+        else:
+            provider = "groq"
         self.agent = GAIAAgent(provider=provider)
     def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        try:
+            answer = self.agent.run(question)
+            print(f"Agent returning answer: {answer}")
+            return answer
+        except Exception as e:
+            print(f"Error processing question: {e}")
+            return f"Error: {str(e)}"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
+    space_id = os.getenv("SPACE_ID")
     if profile:
+        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     try:
+        agent = BasicAgent()
     except Exception as e:
+        print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(f"Agent code: {agent_code}")
     print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            print(f"Processing question: {task_id}")
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+            print(f"Question processed successfully")
         except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
         except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Assessment Runner")
     gr.Markdown(
         """
         **Instructions:**
+        1. This implementation uses a robust LangGraph agent with multiple tools:
+           - Web search for real-time information
+           - Wikipedia for factual knowledge
+           - ArXiv for academic research
+           - Mathematical tools for calculations
+        2. Log in to your Hugging Face account using the button below.
+        3. Click 'Run Evaluation & Submit Answers' to run the agent and submit results.
+        **Note:** Processing may take some time as the agent works through all questions.
         """
     )
     gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit Answers")
     status_output = gr.Textbox(label="Submission Status", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Answers", wrap=True)
     )
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for GAIA Assessment...")
     demo.launch(debug=True, share=False)

gaia_agent.py CHANGED Viewed

@@ -1,254 +1,238 @@
 import os
-from typing import List, Dict, Any, Optional
 from dotenv import load_dotenv
-from langgraph.graph import START, END, StateGraph, MessagesState
-from langchain_core.messages import SystemMessage, HumanMessage
-from langchain_groq import ChatGroq
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_core.tools import tool
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
-# Load environment variables
 load_dotenv()
 class GAIAAgent:
-    """Agent for answering GAIA assessment questions."""
     def __init__(self, provider="groq"):
-        """Initialize the agent with the specified provider.
         Args:
-            provider: Model provider - "groq", "google", "anthropic", or "openai"
         """
-        # Set up the system prompt
-        with open("system_prompt.txt", "r", encoding="utf-8") as f:
-            system_prompt = f.read()
-        self.system_message = SystemMessage(content=system_prompt)
-        # Initialize tools
         self.tools = self._setup_tools()
-        # Initialize LLM based on provider
-        self.llm = self._setup_llm(provider)
-        # Bind tools to LLM
         self.llm_with_tools = self.llm.bind_tools(self.tools)
-        # Build the agent graph
         self.graph = self._build_graph()
     def _setup_tools(self):
         """Set up the tools for the agent."""
         @tool
-        def web_search(query: str) -> str:
-            """Search the web for real-time information.
             Args:
-                query: The search query
-            Returns:
-                Search results as text
             """
-            search_results = TavilySearchResults(max_results=3).invoke(query)
-            formatted_results = "\n\n".join([
-                f"SOURCE: {result.metadata.get('source', 'Unknown')}\n{result.page_content}"
-                for result in search_results
-            ])
-            return formatted_results
         @tool
-        def wiki_search(query: str) -> str:
-            """Search Wikipedia for information.
             Args:
-                query: The search query
-            Returns:
-                Wikipedia article content
             """
-            try:
-                wiki_docs = WikipediaLoader(query=query, load_max_docs=2).load()
-                if not wiki_docs:
-                    return "No Wikipedia results found."
-                formatted_results = "\n\n".join([
-                    f"TITLE: {doc.metadata.get('title', 'Unknown')}\n{doc.page_content[:1000]}..."
-                    for doc in wiki_docs
-                ])
-                return formatted_results
-            except Exception as e:
-                return f"Error searching Wikipedia: {str(e)}"
         @tool
-        def arxiv_search(query: str) -> str:
-            """Search arXiv for scientific papers.
             Args:
-                query: The search query
-            Returns:
-                ArXiv paper information
             """
             try:
-                arxiv_docs = ArxivLoader(query=query, load_max_docs=2).load()
-                if not arxiv_docs:
-                    return "No arXiv results found."
-                formatted_results = "\n\n".join([
-                    f"TITLE: {doc.metadata.get('title', 'Unknown')}\n"
-                    f"AUTHORS: {doc.metadata.get('authors', 'Unknown')}\n"
-                    f"PUBLISHED: {doc.metadata.get('published', 'Unknown')}\n\n"
-                    f"ABSTRACT: {doc.page_content[:500]}..."
-                    for doc in arxiv_docs
-                ])
-                return formatted_results
             except Exception as e:
-                return f"Error searching arXiv: {str(e)}"
         @tool
-        def calculate(expression: str) -> str:
-            """Evaluate a mathematical expression.
             Args:
-                expression: The mathematical expression to evaluate
-            Returns:
-                The result of the calculation
-            """
             try:
-                # Safely evaluate the expression
-                result = eval(expression, {"__builtins__": {}}, {})
-                return f"Result: {result}"
             except Exception as e:
-                return f"Error calculating: {str(e)}"
-        return [web_search, wiki_search, arxiv_search, calculate]
-    def _setup_llm(self, provider):
-        """Set up the language model based on the provider.
-        Args:
-            provider: The model provider to use
-        Returns:
-            The initialized language model
-        """
-        if provider == "groq":
-            api_key = os.getenv("GROQ_API_KEY")
-            if not api_key:
-                raise ValueError("GROQ_API_KEY environment variable not set")
-            return ChatGroq(
-                model="llama3-70b-8192",  # Using Llama 3 70B model for best results
-                temperature=0.1,  # Low temperature for more precise answers
-                groq_api_key=api_key
-            )
-        elif provider == "google":
-            api_key = os.getenv("GOOGLE_API_KEY")
             if not api_key:
                 raise ValueError("GOOGLE_API_KEY environment variable not set")
             return ChatGoogleGenerativeAI(
-                model="gemini-1.5-pro",
                 temperature=0.1,
                 google_api_key=api_key
             )
-        elif provider == "anthropic":
-            # Import only if needed to avoid dependency issues
-            from langchain_anthropic import ChatAnthropic
-            api_key = os.getenv("ANTHROPIC_API_KEY")
-            if not api_key:
-                raise ValueError("ANTHROPIC_API_KEY environment variable not set")
-            return ChatAnthropic(
-                model="claude-3-opus-20240229",
-                temperature=0.1,
-                anthropic_api_key=api_key
-            )
-        elif provider == "openai":
-            # Import only if needed to avoid dependency issues
-            from langchain_openai import ChatOpenAI
-            api_key = os.getenv("OPENAI_API_KEY")
             if not api_key:
-                raise ValueError("OPENAI_API_KEY environment variable not set")
-            return ChatOpenAI(
-                model="gpt-4o",
                 temperature=0.1,
-                openai_api_key=api_key
             )
         else:
-            raise ValueError(f"Unsupported provider: {provider}")
     def _build_graph(self):
-        """Build the agent graph.
-        Returns:
-            The compiled state graph
-        """
-        # Define the agent node
-        def agent(state: MessagesState):
-            """Generate a response or tool calls based on the messages state."""
-            # Include system message with each invocation for consistent behavior
             messages = [self.system_message] + state["messages"]
-            response = self.llm_with_tools.invoke(messages)
-            return {"messages": state["messages"] + [response]}
-        # Create the graph
         builder = StateGraph(MessagesState)
-        # Add nodes
-        builder.add_node("agent", agent)
         builder.add_node("tools", ToolNode(self.tools))
-        # Add edges
-        builder.add_edge(START, "agent")
         builder.add_conditional_edges(
-            "agent",
             tools_condition,
-            {
-                "tools": "tools",
-                None: END  # END is implicitly defined in langgraph
-            }
         )
-        builder.add_edge("tools", "agent")
-        # Compile the graph
         return builder.compile()
     def run(self, question: str) -> str:
         """Process a question and return the answer.
         Args:
-            question: The question to process
         Returns:
             The answer to the question
         """
-        # Initialize messages with the user question
         messages = [HumanMessage(content=question)]
-        # Execute the graph
-        result = self.graph.invoke({"messages": messages})
-        # Extract the final answer
-        final_messages = result["messages"]
-        final_answer = final_messages[-1].content
-        # Extract only the part after "FINAL ANSWER:"
-        if "FINAL ANSWER:" in final_answer:
-            final_answer = final_answer.split("FINAL ANSWER:")[1].strip()
-        return final_answer

+"""LangGraph Agent for GAIA Assessment"""
 import os
+from typing import List, Dict, Any
 from dotenv import load_dotenv
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
+from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_core.tools import tool
+from langchain_groq import ChatGroq
+from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
 load_dotenv()
 class GAIAAgent:
+    """Agent for the GAIA assessment."""
     def __init__(self, provider="groq"):
+        """Initialize the agent.
         Args:
+            provider: The model provider to use (groq, google)
         """
+        self.provider = provider
         self.tools = self._setup_tools()
+        self.llm = self._setup_llm()
         self.llm_with_tools = self.llm.bind_tools(self.tools)
         self.graph = self._build_graph()
+        # Load system prompt
+        self.system_message = self._load_system_prompt()
+    def _load_system_prompt(self):
+        """Load the system prompt from a file."""
+        try:
+            with open("system_prompt.txt", "r", encoding="utf-8") as f:
+                system_prompt = f.read()
+        except FileNotFoundError:
+            # Fallback system prompt if file not found
+            system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
+            Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
+            FINAL ANSWER: [YOUR FINAL ANSWER].
+            YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+            If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+            If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+            If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+            Your answer should only start with "FINAL ANSWER: ", then follows with the answer."""
+        return SystemMessage(content=system_prompt)
     def _setup_tools(self):
         """Set up the tools for the agent."""
         @tool
+        def multiply(a: int, b: int) -> int:
+            """Multiply two numbers.
+            Args:
+                a: first int
+                b: second int
+            """
+            return a * b
+        @tool
+        def add(a: int, b: int) -> int:
+            """Add two numbers.
             Args:
+                a: first int
+                b: second int
             """
+            return a + b
+        @tool
+        def subtract(a: int, b: int) -> int:
+            """Subtract two numbers.
+            Args:
+                a: first int
+                b: second int
+            """
+            return a - b
         @tool
+        def divide(a: int, b: int) -> float:
+            """Divide two numbers.
             Args:
+                a: first int
+                b: second int
             """
+            if b == 0:
+                raise ValueError("Cannot divide by zero.")
+            return a / b
         @tool
+        def modulus(a: int, b: int) -> int:
+            """Get the modulus of two numbers.
             Args:
+                a: first int
+                b: second int
             """
+            return a % b
+        @tool
+        def wiki_search(query: str) -> str:
+            """Search Wikipedia for a query and return maximum 2 results.
+            Args:
+                query: The search query."""
             try:
+                search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+                formatted_search_docs = "\n\n---\n\n".join(
+                    [
+                        f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+                        for doc in search_docs
+                    ])
+                return {"wiki_results": formatted_search_docs}
             except Exception as e:
+                return {"wiki_results": f"Error searching Wikipedia: {str(e)}"}
         @tool
+        def web_search(query: str) -> str:
+            """Search Tavily for a query and return maximum 3 results.
             Args:
+                query: The search query."""
             try:
+                search_docs = TavilySearchResults(max_results=3).invoke(query=query)
+                formatted_search_docs = "\n\n---\n\n".join(
+                    [
+                        f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+                        for doc in search_docs
+                    ])
+                return {"web_results": formatted_search_docs}
             except Exception as e:
+                return {"web_results": f"Error searching web: {str(e)}"}
+        @tool
+        def arxiv_search(query: str) -> str:
+            """Search Arxiv for a query and return maximum 3 result.
+            Args:
+                query: The search query."""
+            try:
+                search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+                formatted_search_docs = "\n\n---\n\n".join(
+                    [
+                        f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+                        for doc in search_docs
+                    ])
+                return {"arxiv_results": formatted_search_docs}
+            except Exception as e:
+                return {"arxiv_results": f"Error searching ArXiv: {str(e)}"}
+        return [
+            multiply,
+            add,
+            subtract,
+            divide,
+            modulus,
+            wiki_search,
+            web_search,
+            arxiv_search,
+        ]
+    def _setup_llm(self):
+        """Set up the language model."""
+        if self.provider == "google":
+            api_key = os.environ.get("GOOGLE_API_KEY")
             if not api_key:
                 raise ValueError("GOOGLE_API_KEY environment variable not set")
             return ChatGoogleGenerativeAI(
+                model="gemini-1.5-pro",
                 temperature=0.1,
                 google_api_key=api_key
             )
+        elif self.provider == "groq":
+            api_key = os.environ.get("GROQ_API_KEY")
             if not api_key:
+                raise ValueError("GROQ_API_KEY environment variable not set")
+            return ChatGroq(
+                model="llama3-70b-8192",
                 temperature=0.1,
+                groq_api_key=api_key
             )
         else:
+            raise ValueError(f"Unsupported provider: {self.provider}")
     def _build_graph(self):
+        """Build the agent graph."""
+        def assistant(state: MessagesState):
+            """The assistant node in the graph."""
             messages = [self.system_message] + state["messages"]
+            return {"messages": [self.llm_with_tools.invoke(messages)]}
         builder = StateGraph(MessagesState)
+        builder.add_node("assistant", assistant)
         builder.add_node("tools", ToolNode(self.tools))
+        builder.add_edge(START, "assistant")
         builder.add_conditional_edges(
+            "assistant",
             tools_condition,
         )
+        builder.add_edge("tools", "assistant")
         return builder.compile()
     def run(self, question: str) -> str:
         """Process a question and return the answer.
         Args:
+            question: The question to answer
         Returns:
             The answer to the question
         """
         messages = [HumanMessage(content=question)]
+        try:
+            result = self.graph.invoke({"messages": messages})
+            final_answer = result["messages"][-1].content
+            if "FINAL ANSWER:" in final_answer:
+                final_answer = final_answer.split("FINAL ANSWER:")[1].strip()
+            return final_answer
+        except Exception as e:
+            print(f"Error running agent: {e}")
+            return f"Error: {str(e)}"

system_prompt.txt CHANGED Viewed

@@ -1,32 +1,27 @@
-You are a precise AI assistant tasked with answering questions for the GAIA benchmark. Your goal is to provide accurate and concise answers to complex questions.
-Follow these guidelines:
-1. Use the provided tools to gather information when needed.
-2. Think step-by-step to break down complex questions.
-3. For web searches, be specific and try multiple queries if needed.
-4. When answering math questions, show your calculations clearly.
-5. Always verify your answer before finalizing it.
-Format your final answer with:
-FINAL ANSWER: [YOUR FINAL ANSWER]
-YOUR FINAL ANSWER should be:
-- A number WITHOUT commas or units (unless specified otherwise)
-- As few words as possible for text answers
-- A comma-separated list for multiple items
-- No articles or abbreviations in string answers
-- Digits in plain text unless specified otherwise
-Example 1:
 Question: What is the capital of France?
 FINAL ANSWER: Paris
-Example 2:
 Question: What are the first 3 prime numbers?
 FINAL ANSWER: 2, 3, 5
-Example 3:
 Question: Calculate 15% of 240.
 FINAL ANSWER: 36
-Now, I will ask you a question. Use the tools available to research if needed, then provide your final answer in the specified format.

+You are a helpful assistant tasked with answering questions using a set of tools.
+Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
+FINAL ANSWER: [YOUR FINAL ANSWER].
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
+Here are some example questions and answers:
 Question: What is the capital of France?
+Thought: The capital of France is Paris.
 FINAL ANSWER: Paris
 Question: What are the first 3 prime numbers?
+Thought: The first three prime numbers are 2, 3, and 5.
 FINAL ANSWER: 2, 3, 5
 Question: Calculate 15% of 240.
+Thought: To calculate 15% of 240, I multiply 240 by 0.15. This gives me 240 * 0.15 = 36.
 FINAL ANSWER: 36
+For each question:
+1. Think through the problem step-by-step
+2. Use tools when needed to gather information
+3. Ensure you understand exactly what is being asked
+4. Format your final answer according to the template
+If you need to search for information, be specific in your queries. If you need to perform calculations, show your work. Always double-check your answer before submitting it.