import json import random from pathlib import Path import gradio as gr import pandas as pd from datasets import load_dataset abs_path = Path(__file__).parent submissions = json.load(open(abs_path / "submissions.json")) TASKS = [("gsm8k", "lighteval|gsm8k|0", "extractive_match")] TYPES = [ "markdown", "markdown", "number", "number", "markdown", ] COLUMNS = ["User", "Model Name", "MMLU", "Average ⬆️", "Results"] WIDTHS = ["25%", "25%", "15%", "15%", "10%"] def load_results(dataset): results = [] try: output = dataset["latest"]["results"] output = output[-1] except KeyError as e: raise ValueError("Cannot find 'latest' key in the dataset") try: output = json.loads(output) except ValueError as e: raise ValueError("Cannot parse the output as JSON") for name, task, metric in TASKS: try: output = output[task] except KeyError as e: raise ValueError(f"Cannot find '{task}' key in the dataset") try: output = (name, output[metric]) except KeyError as e: raise ValueError("Cannot find 'extractive_match' key in the dataset") results.append(output) return results def load_submissions(): leaderboard = [] for submission in submissions["submissions"]: ds = load_dataset(submission["results-dataset"], "results") try: results = load_results(ds) except ValueError as e: raise ValueError(f"Cannot load results for {ds['results-dataset']}") from e leaderboard_row = {} leaderboard_row["username"] = ( f"[{submission['username']}](https://huggingface.co/{submission['username']})" ) leaderboard_row["model_name"] = ( f"[{submission['model_name']}](https://huggingface.co/{submission['username']}/{submission['model_name']})" ) for name, result in results: leaderboard_row[name] = result leaderboard_row["Average ⬆️"] = sum(result for _, result in results) / len( results ) leaderboard_row["results-dataset"] = ( f"[🔗](https://huggingface.co/datasets/{submission['results-dataset']})" ) leaderboard.append(leaderboard_row) return pd.DataFrame(leaderboard) with gr.Blocks() as demo: gr.Markdown(""" # 🥇 a smol course leaderboad A leaderboard of smol course students' submissions. """) with gr.Tabs(): with gr.Tab("Demo"): df = gr.Dataframe( label="a smol course leaderboard", value=load_submissions(), headers=COLUMNS, show_search="search", show_copy_button=True, show_fullscreen_button=True, show_row_numbers=True, pinned_columns=1, static_columns=[0], datatype=TYPES, column_widths=WIDTHS, ) with gr.Tab("Docs"): gr.Markdown((Path(__file__).parent / "docs.md").read_text()) if __name__ == "__main__": demo.launch()