File size: 3,189 Bytes
362d13b
8b0cd75
 
362d13b
 
 
8b0cd75
362d13b
 
 
 
1c7c01e
8b0cd75
 
 
 
 
 
 
 
 
362d13b
 
1c7c01e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362d13b
8b0cd75
362d13b
 
 
1c7c01e
 
 
 
362d13b
8b0cd75
362d13b
8b0cd75
362d13b
 
8b0cd75
 
4d25207
362d13b
 
1c7c01e
 
8b0cd75
1c7c01e
 
 
8b0cd75
 
362d13b
 
 
8b0cd75
362d13b
8b0cd75
362d13b
 
 
 
 
 
 
 
 
 
8b0cd75
 
362d13b
8b0cd75
 
 
 
 
 
 
 
 
362d13b
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import json
import random
from pathlib import Path

import gradio as gr
import pandas as pd
from datasets import load_dataset

abs_path = Path(__file__).parent
submissions = json.load(open(abs_path / "submissions.json"))

TASKS = [("gsm8k", "lighteval|gsm8k|0", "extractive_match")]
TYPES = [
    "markdown",
    "markdown",
    "number",
    "number",
    "markdown",
]
COLUMNS = ["User", "Model Name", "MMLU", "Average ⬆️", "Results"]
WIDTHS = ["25%", "25%", "15%", "15%", "10%"]


def load_results(dataset):
    results = []

    try:
        output = dataset["latest"]["results"]
        output = output[-1]
    except KeyError as e:
        raise ValueError("Cannot find 'latest' key in the dataset")

    try:
        output = json.loads(output)
    except ValueError as e:
        raise ValueError("Cannot parse the output as JSON")

    for name, task, metric in TASKS:
        try:
            output = output[task]
        except KeyError as e:
            raise ValueError(f"Cannot find '{task}' key in the dataset")

        try:
            output = (name, output[metric])
        except KeyError as e:
            raise ValueError("Cannot find 'extractive_match' key in the dataset")

        results.append(output)

    return results


def load_submissions():
    leaderboard = []
    for submission in submissions["submissions"]:
        ds = load_dataset(submission["results-dataset"], "results")

        try:
            results = load_results(ds)
        except ValueError as e:
            raise ValueError(f"Cannot load results for {ds['results-dataset']}") from e

        leaderboard_row = {}

        leaderboard_row["username"] = (
            f"[{submission['username']}](https://huggingface.co/{submission['username']})"
        )

        leaderboard_row["model_name"] = (
            f"[{submission['model_name']}](https://huggingface.co/{submission['username']}/{submission['model_name']})"
        )

        for name, result in results:
            leaderboard_row[name] = result

        leaderboard_row["Average ⬆️"] = sum(result for _, result in results) / len(
            results
        )

        leaderboard_row["results-dataset"] = (
            f"[🔗](https://huggingface.co/datasets/{submission['results-dataset']})"
        )

        leaderboard.append(leaderboard_row)

    return pd.DataFrame(leaderboard)


with gr.Blocks() as demo:
    gr.Markdown("""
    # 🥇 a smol course leaderboad
    
    A leaderboard of smol course students' submissions. 
    """)
    with gr.Tabs():
        with gr.Tab("Demo"):
            df = gr.Dataframe(
                label="a smol course leaderboard",
                value=load_submissions(),
                headers=COLUMNS,
                show_search="search",
                show_copy_button=True,
                show_fullscreen_button=True,
                show_row_numbers=True,
                pinned_columns=1,
                static_columns=[0],
                datatype=TYPES,
                column_widths=WIDTHS,
            )
        with gr.Tab("Docs"):
            gr.Markdown((Path(__file__).parent / "docs.md").read_text())

if __name__ == "__main__":
    demo.launch()