agent-course-gaia

Sleeping

App Files Files Community

agent-course-gaia / basic_agent.py

kirbah

Remove redundant code

489f584 8 days ago

raw

history blame contribute delete

8.5 kB

	import os
	# This assumes 'file_handler.py' is in the same directory or accessible via PYTHONPATH
	from file_handler import get_task_file_path, DEFAULT_FILES_DIR
	from youtube_tool import get_youtube_video_transcript
	from file_processing_tools import get_csv_data_summary, get_excel_data_summary
	from audio_tools import transcribe_mp3_audio_file

	from smolagents import (
	ToolCallingAgent,
	DuckDuckGoSearchTool,
	VisitWebpageTool,
	WikipediaSearchTool,
	PythonInterpreterTool,
	LiteLLMModel
	)


	class BasicAgent:
	def __init__(self):
	print("BasicAgent initialized.")
	# This agent will use the DEFAULT_FILES_DIR from file_handler for context in prompts
	self.files_dir_for_prompt_context = os.path.abspath(DEFAULT_FILES_DIR)

	def __call__(self, task_id: str, question: str) -> str:
	print(f"\nProcessing Task ID: {task_id}")
	print(f"Original Question (first 70 chars): {question[:70]}...")

	# Use the function from file_handler.py to get the local file path
	# This function handles checking local first, then downloading.
	# It uses DEFAULT_FILES_DIR ("files") by default.
	local_file_path_str = get_task_file_path(task_id)

	file_context_for_prompt: str
	if local_file_path_str:
	file_context_for_prompt = (
	f"--- File Information for Task ID '{task_id}' ---\n"

	f"A file relevant to this task has been made available to the agent. "
	# local_file_path_str is the string filename
	f"Its local path is: '{local_file_path_str}'. "
	"If the question requires information from this file, you MUST pass the full path to the proper tool "
	"to access and process its content from this exact local path."
	f"--- End File Information ---"
	)
	else:
	file_context_for_prompt = ""

	question_to_llm = (
	f"{question}\n\n"
	f"{file_context_for_prompt}\n"
	)

	print(
	f"\nContext for LLM (first 400 chars of question part):\n{question_to_llm[:400]}...")

	# LLM as specified by the user
	model = LiteLLMModel(
	model_id="groq/deepseek-r1-distill-llama-70b",
	api_key=os.getenv("GROQ_API_KEY")
	)

	# use LLM proxy server to avoid rate limits issues
	model = LiteLLMModel(
	model_id="groq/deepseek-r1-distill-llama-70b",
	api_base="http://127.0.0.1:4000"
	)

	agent_tools = [
	get_youtube_video_transcript,
	get_csv_data_summary,
	get_excel_data_summary,
	transcribe_mp3_audio_file,
	DuckDuckGoSearchTool(),
	VisitWebpageTool(),
	WikipediaSearchTool(),
	PythonInterpreterTool(authorized_imports=[
	'statistics', 'unicodedata', 'collections', 'queue', 'time', 'pandas', 'openpyxl',
	'stat', 'random', 'datetime', 're', 'math', 'itertools', 'os', 'sys',
	'io', 'csv', 'json', 'pathlib', 'subprocess', 'base64'
	])
	]

	agent = ToolCallingAgent(
	tools=agent_tools,
	model=model,
	max_steps=8,
	name="TaskProcessorAgent",
	description="An agent designed to answer questions by searching the web, processing local files (if a path is provided in 'File Information'), and executing Python code.",
	verbosity_level=2
	)

	# Simplified prompt template
	prompt_template = f"""
	Your primary goal is to accurately and concisely answer the provided question using your available tools and any supplied information.
	The framework will provide you with a list of available tools and their descriptions.

	Key Instructions:
	1. Understand the Task: Carefully read the entire question. Note any URLs, specific phrases to find, or file information (check the "File Information" section provided below the question; it states if a local file is available and its path).
	2. Strategize and Select Tools:
	* Choose the most appropriate tool(s) for the task based on the question, any file details, URLs provided, and the capabilities of your available tools.
	* For general web research or finding information/URLs, consider tools like `web_search` or `wikipedia_search`.
	* To get content from specific web page URLs, use a tool like `visit_webpage`.
	* For YouTube videos: If the question is about the content of a YouTube video (e.g., "Examine the video at https://www.youtube.com/..."), consider using the `get_youtube_video_transcript` tool with the video URL or ID to get its transcript. Once a transcript is retrieved, your primary focus should be to analyze this transcript to answer the question.
	* Working with Local Files: If the "File Information" section indicates a local file path is available and that file's content is needed to answer the question, the `PythonInterpreterTool` is the primary tool to access this local file. Generate Python code for this tool to open the file (using its full, exact local path from "File Information"), then read, process, or execute its content as required.
	* For general calculations, data analysis not involving a mentioned local file, or other Python tasks, the `PythonInterpreterTool` is appropriate.
	3. Information Processing and Answer Extraction:
	* Direct Tool Output First: When a tool (like `get_youtube_video_transcript`, `visit_webpage`, or `PythonInterpreterTool` reading a file) provides specific content (e.g., a transcript, web page text, file content):
	a. Thoroughly analyze THIS content first to find the answer.
	b. If the question asks for specific dialogue or phrases (e.g., "What does X say in response to Y?"), search for the quoted phrases (Y) in the content and identify the subsequent statement (X's response).
	c. Only consider using other tools (like `web_search`) to re-verify or find the same piece of information if the direct output is clearly nonsensical, an obvious error, or explicitly states it's incomplete for the specific question asked. Do not use web_search just because the formatting of the direct output isn't perfect or seems slightly unclear at first glance if the data is present.
	* Critical Evaluation: Pay attention to details like speaker identification (if inferable), names, dates, roles (e.g., distinguish between a nominator and a promoter), and exact phrasing.
	* Synthesize if Necessary: Combine information from multiple different pieces of evidence if the question requires it.
	* Fact-Based Answers: Base your final answer only on confirmed facts from the information gathered.
	4. Handling Tool Errors:
	* If a tool call itself returns an error: analyze the error message, try to correct the input to the tool (e.g., fix code, verify URL). Do not immediately retry the exact same call. Consider if a different tool or approach is more suitable.
	5. Formulate Your Response:
	* Provide only the final, concise answer to the question.
	* Do not include your reasoning steps, apologies, self-correction narratives, or any conversational filler in the final answer.
	* Number Formatting:
	* For a single large number, do not use commas as thousands separators (e.g., write `1234567` not `1,234,567`).
	* Do not include units such as `$` or percent signs `%` unless the question specifically asks for them.
	* List Formatting:
	* If the answer is a list of items (e.g., numbers, names, page numbers) and the question implies a comma-separated format or that's the most natural way to present it:
	* Separate items with a comma followed by a single space (e.g., `apple, pear, orange` or `132, 197, 245`).
	* If the question asks for the list to be sorted, ensure it is.
	* "I don't know": If, after thorough investigation (including careful analysis of direct tool outputs and appropriate error handling), you cannot determine a definitive answer, respond with the exact phrase 'I don't know'.
	--- Start of Question & File Information ---
	{question_to_llm}
	--- End of Question & File Information ---
	"""
	answer = agent.run(prompt_template)

	print(f"Agent returning answer for Task ID {task_id}: {answer}")
	return answer