iwonachristop commited on
Commit
17b190b
·
1 Parent(s): 7e588e5

Add application file

Browse files
Files changed (1) hide show
  1. app.py +120 -0
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_leaderboard import Leaderboard
3
+ from pathlib import Path
4
+ import pandas as pd
5
+ from collections import defaultdict
6
+
7
+
8
+ abs_path = Path(__file__).parent
9
+
10
+
11
+ def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"):
12
+ df = df[df["language"] == "all"]
13
+ df = df[["model", "temperature", "f1_macro", "weighted_f1", "accuracy"]]
14
+ df = df.sort_values(by=sort_column, ascending=False)
15
+ df.insert(0, "Rank", range(1, len(df) + 1))
16
+
17
+ df["temperature"] = df["temperature"].round(1)
18
+
19
+ for col in ["f1_macro", "weighted_f1", "accuracy"]:
20
+ df[col] = df[col].round(4)
21
+
22
+ return df
23
+
24
+
25
+ def build_lang_dict(df: pd.DataFrame):
26
+ lang_data = defaultdict(lambda: defaultdict(dict))
27
+ for row in df.itertuples():
28
+ for metric in ["f1_macro", "weighted_f1", "accuracy"]:
29
+ key = f"{row.language}.{metric}"
30
+ lang_data[row.model][row.temperature][key] = getattr(row, metric)
31
+ return lang_data
32
+
33
+
34
+ def build_ds_dict(df: pd.DataFrame):
35
+ ds_data = defaultdict(lambda: defaultdict(dict))
36
+ for row in df.itertuples():
37
+ for metric in ["f1_macro", "weighted_f1", "accuracy"]:
38
+ key = f"{row.dataset}.{metric}"
39
+ ds_data[row.model][row.temperature][key] = getattr(row, metric)
40
+ return ds_data
41
+
42
+
43
+ def build_emo_dict(df: pd.DataFrame):
44
+ df = df[df["language"] == "all"]
45
+ emo_data = defaultdict(lambda: defaultdict(dict))
46
+ emotions = df.iloc[0].metrics_per_label.keys() - ["accuracy", "macro avg", "weighted avg"]
47
+ for row in df.itertuples():
48
+ for emotion in emotions:
49
+ emo_data[row.model][row.temperature][emotion] = row.metrics_per_label[emotion].get("f1-score")
50
+ emo_data[row.model][row.temperature]["all"] = row.f1_macro
51
+ return emo_data
52
+
53
+
54
+ def leaderboard_per_group(lang_dict, metric: str = "f1_macro"):
55
+ df = []
56
+ for model, inner in lang_dict.items():
57
+ for temperature, metrics in inner.items():
58
+ entry = {"model": model, "temperature": temperature}
59
+ for k, v in metrics.items():
60
+ if metric in k:
61
+ entry[k.split(".")[0]] = v
62
+ elif "." not in k:
63
+ entry[k] = v
64
+ df.append(entry)
65
+
66
+ df = pd.DataFrame(df)
67
+
68
+ df["temperature"] = df["temperature"].round(1)
69
+
70
+ for col in df.columns.difference(["model", "temperature"]):
71
+ df[col] = df[col].round(4)
72
+
73
+ df = df[["model", "temperature", "all"] + sorted(df.columns.difference(["model", "temperature", "all"]))]
74
+ df = df.sort_values(by="all", ascending=False)
75
+ df.insert(0, "Rank", range(1, len(df) + 1))
76
+
77
+ return df
78
+
79
+
80
+ def app():
81
+ with gr.Blocks() as demo:
82
+ gr.Markdown("# 🏆 Leaderboard Viewer")
83
+
84
+ with gr.Tabs():
85
+ with gr.Tab("Overall Results"):
86
+ overall_table = gr.Dataframe()
87
+
88
+ with gr.Tab("Results per Language"):
89
+ lang_table = gr.Dataframe()
90
+
91
+ with gr.Tab("Results per Dataset"):
92
+ dataset_table = gr.Dataframe()
93
+
94
+ with gr.Tab("Results per Emotion"):
95
+ emotion_table = gr.Dataframe()
96
+
97
+ df_state = gr.State()
98
+
99
+ def update_leaderboards(select_lang_metric="f1_macro", select_ds_metric="f1_macro"):
100
+ df = pd.read_json(str(abs_path / "results.jsonl"), lines=True)
101
+ lang_dict = build_lang_dict(df)
102
+ ds_dict = build_ds_dict(df)
103
+ emo_dict = build_emo_dict(df)
104
+ overall = overall_leaderboard(df)
105
+ by_lang = leaderboard_per_group(lang_dict, metric=select_lang_metric)
106
+ by_dataset = leaderboard_per_group(ds_dict, metric=select_ds_metric)
107
+ by_emotion = leaderboard_per_group(emo_dict)
108
+ return overall, by_lang, by_dataset, by_emotion, "Loaded successfully.", df
109
+
110
+ demo.load(
111
+ update_leaderboards,
112
+ inputs=[],
113
+ outputs=[overall_table, lang_table, dataset_table, emotion_table, df_state]
114
+ )
115
+
116
+ return demo
117
+
118
+ if __name__ == "__main__":
119
+ demo = app()
120
+ demo.launch()