leaderboard / app.py
burtenshaw
fix dataframe sorting again
bfcd720
raw
history blame
3.53 kB
import json
import random
from pathlib import Path
import gradio as gr
import pandas as pd
from datasets import load_dataset
abs_path = Path(__file__).parent
submissions = json.load(open(abs_path / "submissions.json"))
TASKS = [("gsm8k", "lighteval|gsm8k|0", "extractive_match")]
TYPES = [
"markdown",
"markdown",
"number",
"number",
"markdown",
]
COLUMNS = ["User", "Model Name", "MMLU", "Average ⬆️", "Results"]
WIDTHS = ["25%", "25%", "15%", "15%", "10%"]
def load_results(dataset):
results = []
try:
output = dataset["latest"]["results"]
output = output[-1]
except KeyError as e:
raise ValueError("Cannot find 'latest' key in the dataset")
try:
output = json.loads(output)
except ValueError as e:
raise ValueError("Cannot parse the output as JSON")
for name, task, metric in TASKS:
try:
output = output[task]
except KeyError as e:
raise ValueError(f"Cannot find '{task}' key in the dataset")
try:
output = (name, output[metric])
except KeyError as e:
raise ValueError("Cannot find 'extractive_match' key in the dataset")
results.append(output)
return results
def load_submissions():
leaderboard = []
for i, submission in enumerate(submissions["submissions"]):
try:
ds = load_dataset(submission["results-dataset"], "results")
results = load_results(ds)
except ValueError as e:
print(f"Cannot load results for {ds['results-dataset']} {e}")
except Exception as e:
print(f"Cannot load dataset for {i} : {submission['results-dataset']} {e}")
continue
leaderboard_row = {}
leaderboard_row["username"] = (
f"[{submission['username']}](https://huggingface.co/{submission['username']})"
)
leaderboard_row["model_name"] = (
f"[{submission['model_name']}](https://huggingface.co/{submission['username']}/{submission['model_name']})"
)
for name, result in results:
leaderboard_row[name] = result
leaderboard_row["Average ⬆️"] = sum(result for _, result in results) / len(
results
)
leaderboard_row["results-dataset"] = (
f"[🔗](https://huggingface.co/datasets/{submission['results-dataset']})"
)
leaderboard.append(leaderboard_row)
df = pd.DataFrame(leaderboard)
for column in df.columns:
if df[column].dtype == "float64":
df[column] = df[column].round(2)
return df.sort_values(by="Average ⬆️", ascending=False)
with gr.Blocks() as demo:
gr.Markdown("""
# 🥇 a smol course leaderboad
A leaderboard of smol course students' submissions.
""")
with gr.Tabs():
with gr.Tab("Demo"):
df = gr.Dataframe(
label="a smol course leaderboard",
value=load_submissions(),
headers=COLUMNS,
show_search="search",
show_copy_button=True,
show_fullscreen_button=True,
show_row_numbers=True,
pinned_columns=1,
static_columns=[0],
datatype=TYPES,
column_widths=WIDTHS,
)
with gr.Tab("Docs"):
gr.Markdown((Path(__file__).parent / "docs.md").read_text())
if __name__ == "__main__":
demo.launch()