Renzo
Update .gitignore, refactor app.py for improved agent functionality, and enhance README
6c75f10
raw
history blame
4.83 kB
import os
import asyncio
import argparse
import gradio as gr
import requests
import pandas as pd
from agno.agent import RunResponse
from agent import agent
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
async def _async_answer(answer_text: str) -> str:
response: RunResponse = await agent.arun(answer_text)
return response.content
class BasicAgent:
def __init__(self):
pass
def __call__(self, question: str) -> str:
return asyncio.run(_async_answer(question))
def run_agent(profile: gr.OAuthProfile | None, task_id: str | None = None, submit: bool = True):
space_id = os.getenv("SPACE_ID")
if profile:
username = f"{profile.username}"
else:
return "Please log in to Hugging Face.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent_instance = BasicAgent()
except Exception as e:
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
except Exception as e:
return f"Error fetching questions: {e}", None
if task_id:
questions_data = [q for q in questions_data if str(q.get("task_id")) == str(task_id)]
if not questions_data:
return f"Task {task_id} not found.", None
results_log = []
answers_payload = []
for item in questions_data:
tid = item.get("task_id")
qtext = item.get("question")
if not tid or qtext is None:
continue
try:
submitted_answer = agent_instance(qtext)
answers_payload.append({"task_id": tid, "submitted_answer": submitted_answer})
results_log.append({"Task ID": tid, "Question": qtext, "Submitted Answer": submitted_answer})
except Exception as e:
results_log.append({"Task ID": tid, "Question": qtext, "Submitted Answer": f"AGENT ERROR: {e}"})
if not answers_payload:
return "No answers produced.", pd.DataFrame(results_log)
if not submit:
return "Test mode: nothing submitted.", pd.DataFrame(results_log)
submission_data = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload,
}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful\n"
f"User: {result_data.get('username')}\n"
f"Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')})\n"
f"Message: {result_data.get('message', '')}"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission failed: {e}", pd.DataFrame(results_log)
def run_agent_single(profile: gr.OAuthProfile | None, task_id: str):
return run_agent(profile, task_id or None, submit=False)
def run_agent_all(profile: gr.OAuthProfile | None, task_id: str):
return run_agent(profile, task_id or None, submit=True)
with gr.Blocks() as demo:
gr.Markdown("# Basic Agent Evaluation Runner")
gr.LoginButton()
task_id_input = gr.Textbox(label="Task ID (optional)", placeholder="e.g. 2023060607")
run_test_button = gr.Button("Test Single Task (no submit)")
run_all_button = gr.Button("Run & Submit All")
status_output = gr.Textbox(label="Status", lines=5, interactive=False)
results_table = gr.DataFrame(label="Results", wrap=True)
run_test_button.click(
fn=run_agent_single,
inputs=[task_id_input],
outputs=[status_output, results_table],
)
run_all_button.click(
fn=run_agent_all,
inputs=[task_id_input],
outputs=[status_output, results_table],
)
gr.Markdown(
"Running all tasks may take time. Use the single‑task button to debug quickly."
)
if __name__ == "__main__":
space_host = os.getenv("SPACE_HOST")
space_id = os.getenv("SPACE_ID")
if space_host:
print(f"SPACE_HOST: {space_host}")
if space_id:
print(f"SPACE_ID: {space_id}")
parser = argparse.ArgumentParser()
parser.add_argument("--task-id", help="Run a single task locally without submission")
args, _ = parser.parse_known_args()
if args.task_id:
status, table = run_agent(profile=None, task_id=args.task_id, submit=False)
print(status)
if table is not None:
print(table)
else:
demo.launch(debug=True, share=False)