|
import requests |
|
from bs4 import BeautifulSoup |
|
import pandas as pd |
|
import gradio as gr |
|
|
|
BASE_URL = "https://scale.com/leaderboard" |
|
|
|
LEADERBOARDS = { |
|
"Main Leaderboard": "", |
|
"Adversarial Robustness": "/adversarial_robustness", |
|
"Coding": "/coding", |
|
"Instruction Following": "/instruction_following", |
|
"Math": "/math", |
|
"Spanish": "/spanish", |
|
"Methodology": "/methodology" |
|
} |
|
|
|
def scrape_leaderboard(leaderboard): |
|
url = BASE_URL + LEADERBOARDS[leaderboard] |
|
|
|
response = requests.get(url) |
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
leaderboard_div = soup.find('div', class_='flex flex-col gap-4 sticky top-20') |
|
|
|
if not leaderboard_div: |
|
raise ValueError("Leaderboard div not found. The page structure might have changed.") |
|
|
|
table = leaderboard_div.find('table', class_='w-full caption-bottom text-sm') |
|
|
|
if not table: |
|
raise ValueError("Leaderboard table not found within the div.") |
|
|
|
data = [] |
|
for row in table.find('tbody').find_all('tr'): |
|
cols = row.find_all('td') |
|
rank = cols[0].find('div', class_='flex').text.strip().split()[0] |
|
model = cols[0].find('a').text.strip() |
|
score = cols[1].text.strip() |
|
confidence = cols[2].text.strip() |
|
data.append([rank, model, score, confidence]) |
|
|
|
df = pd.DataFrame(data, columns=['Rank', 'Model', 'Score', '95% Confidence']) |
|
return df |
|
|
|
def update_leaderboard(leaderboard): |
|
try: |
|
df = scrape_leaderboard(leaderboard) |
|
return df.to_html(index=False) |
|
except Exception as e: |
|
return f"An error occurred: {str(e)}" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=update_leaderboard, |
|
inputs=gr.Dropdown(choices=list(LEADERBOARDS.keys()), label="Select Leaderboard"), |
|
outputs=gr.HTML(label="Leaderboard Data"), |
|
title="Scale AI Leaderboard Viewer", |
|
description="Select a leaderboard to view the latest data from Scale.com" |
|
) |
|
|
|
|
|
iface.launch() |