Spaces:
Running
Running
Maciej
commited on
Commit
Β·
4f08cdd
1
Parent(s):
35b8093
Add filters to tabs
Browse files- app.py +45 -13
- results.jsonl +0 -0
app.py
CHANGED
@@ -8,7 +8,7 @@ abs_path = Path(__file__).parent
|
|
8 |
|
9 |
|
10 |
def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"):
|
11 |
-
df = df[df["language"] == "
|
12 |
df = df[["model", "temperature", "f1_macro", "weighted_f1", "accuracy"]]
|
13 |
df = df.sort_values(by=sort_column, ascending=False)
|
14 |
df.insert(0, "Rank", range(1, len(df) + 1))
|
@@ -40,17 +40,17 @@ def build_ds_dict(df: pd.DataFrame):
|
|
40 |
|
41 |
|
42 |
def build_emo_dict(df: pd.DataFrame):
|
43 |
-
df = df[df["language"] == "
|
44 |
emo_data = defaultdict(lambda: defaultdict(dict))
|
45 |
emotions = df.iloc[0].metrics_per_label.keys() - ["accuracy", "macro avg", "weighted avg"]
|
46 |
for row in df.itertuples():
|
47 |
for emotion in emotions:
|
48 |
emo_data[row.model][row.temperature][emotion] = row.metrics_per_label[emotion].get("f1-score")
|
49 |
-
emo_data[row.model][row.temperature]["
|
50 |
return emo_data
|
51 |
|
52 |
|
53 |
-
def leaderboard_per_group(lang_dict, metric: str = "f1_macro"):
|
54 |
df = []
|
55 |
for model, inner in lang_dict.items():
|
56 |
for temperature, metrics in inner.items():
|
@@ -69,8 +69,9 @@ def leaderboard_per_group(lang_dict, metric: str = "f1_macro"):
|
|
69 |
for col in df.columns.difference(["model", "temperature"]):
|
70 |
df[col] = df[col].round(4)
|
71 |
|
72 |
-
df = df[["model", "temperature"
|
73 |
-
|
|
|
74 |
df.insert(0, "Rank", range(1, len(df) + 1))
|
75 |
|
76 |
return df
|
@@ -80,38 +81,69 @@ def app():
|
|
80 |
with gr.Blocks() as demo:
|
81 |
gr.Markdown("# π Leaderboard Viewer")
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
with gr.Tabs():
|
84 |
with gr.Tab("Overall Results"):
|
85 |
overall_table = gr.Dataframe()
|
86 |
|
87 |
with gr.Tab("Results per Language"):
|
|
|
|
|
88 |
lang_table = gr.Dataframe()
|
89 |
|
90 |
with gr.Tab("Results per Dataset"):
|
|
|
|
|
91 |
dataset_table = gr.Dataframe()
|
92 |
|
93 |
with gr.Tab("Results per Emotion"):
|
|
|
94 |
emotion_table = gr.Dataframe()
|
95 |
|
96 |
df_state = gr.State()
|
97 |
|
98 |
-
def update_leaderboards(select_lang_metric="f1_macro", select_ds_metric="f1_macro"):
|
99 |
df = pd.read_json(str(abs_path / "results.jsonl"), lines=True)
|
100 |
lang_dict = build_lang_dict(df)
|
101 |
ds_dict = build_ds_dict(df)
|
102 |
emo_dict = build_emo_dict(df)
|
103 |
overall = overall_leaderboard(df)
|
104 |
-
by_lang = leaderboard_per_group(lang_dict, metric=select_lang_metric)
|
105 |
-
by_dataset = leaderboard_per_group(ds_dict, metric=select_ds_metric)
|
106 |
-
by_emotion = leaderboard_per_group(emo_dict)
|
107 |
-
return overall, by_lang, by_dataset, by_emotion, "Loaded successfully."
|
108 |
|
109 |
demo.load(
|
110 |
update_leaderboards,
|
111 |
-
inputs=[],
|
112 |
outputs=[overall_table, lang_table, dataset_table, emotion_table, df_state]
|
113 |
)
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
return demo
|
116 |
|
117 |
if __name__ == "__main__":
|
|
|
8 |
|
9 |
|
10 |
def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"):
|
11 |
+
df = df[df["language"] == "All"]
|
12 |
df = df[["model", "temperature", "f1_macro", "weighted_f1", "accuracy"]]
|
13 |
df = df.sort_values(by=sort_column, ascending=False)
|
14 |
df.insert(0, "Rank", range(1, len(df) + 1))
|
|
|
40 |
|
41 |
|
42 |
def build_emo_dict(df: pd.DataFrame):
|
43 |
+
df = df[df["language"] == "All"]
|
44 |
emo_data = defaultdict(lambda: defaultdict(dict))
|
45 |
emotions = df.iloc[0].metrics_per_label.keys() - ["accuracy", "macro avg", "weighted avg"]
|
46 |
for row in df.itertuples():
|
47 |
for emotion in emotions:
|
48 |
emo_data[row.model][row.temperature][emotion] = row.metrics_per_label[emotion].get("f1-score")
|
49 |
+
emo_data[row.model][row.temperature]["All"] = row.f1_macro
|
50 |
return emo_data
|
51 |
|
52 |
|
53 |
+
def leaderboard_per_group(lang_dict, use_cols, metric: str = "f1_macro"):
|
54 |
df = []
|
55 |
for model, inner in lang_dict.items():
|
56 |
for temperature, metrics in inner.items():
|
|
|
69 |
for col in df.columns.difference(["model", "temperature"]):
|
70 |
df[col] = df[col].round(4)
|
71 |
|
72 |
+
df = df[["model", "temperature"] + sorted(use_cols)]
|
73 |
+
if "All" in use_cols:
|
74 |
+
df = df.sort_values(by="All", ascending=False)
|
75 |
df.insert(0, "Rank", range(1, len(df) + 1))
|
76 |
|
77 |
return df
|
|
|
81 |
with gr.Blocks() as demo:
|
82 |
gr.Markdown("# π Leaderboard Viewer")
|
83 |
|
84 |
+
languages = ['All', 'Bengali', 'English', 'French', 'German', 'Italian', 'Polish', 'Russian', 'Spanish']
|
85 |
+
datasets = ['All', 'CaFE', 'CREMA-D', 'EMNS', 'Emozionalmente', 'eNTERFACE', 'JL-Corpus', 'MESD', 'nEMO', 'Oreau', 'PAVOQUE', 'RAVDESS', 'RESD', 'SUBESCO']
|
86 |
+
emotions = ['All', 'anger', 'anxiety',
|
87 |
+
'apology', 'assertiveness', 'calm', 'concern', 'disgust',
|
88 |
+
'encouragement', 'enthusiasm', 'excitement', 'fear', 'happiness',
|
89 |
+
'neutral', 'poker', 'sadness', 'sarcasm', 'surprise']
|
90 |
+
metric=["f1_macro", "accuracy", "weighted_f1"]
|
91 |
+
|
92 |
with gr.Tabs():
|
93 |
with gr.Tab("Overall Results"):
|
94 |
overall_table = gr.Dataframe()
|
95 |
|
96 |
with gr.Tab("Results per Language"):
|
97 |
+
languages_filter = gr.CheckboxGroup(choices=languages, label="Filter by Language", value=languages)
|
98 |
+
select_lang_metric = gr.Radio(metric, value='f1_macro', label="Metric")
|
99 |
lang_table = gr.Dataframe()
|
100 |
|
101 |
with gr.Tab("Results per Dataset"):
|
102 |
+
dataset_filter = gr.CheckboxGroup(choices=datasets, label="Filter by Dataset", value=datasets)
|
103 |
+
select_ds_metric = gr.Radio(metric, value='f1_macro', label="Metric")
|
104 |
dataset_table = gr.Dataframe()
|
105 |
|
106 |
with gr.Tab("Results per Emotion"):
|
107 |
+
emo_filter = gr.CheckboxGroup(choices=emotions, label="Filter by Emotion", value=emotions)
|
108 |
emotion_table = gr.Dataframe()
|
109 |
|
110 |
df_state = gr.State()
|
111 |
|
112 |
+
def update_leaderboards(languages=[], datasets=[], emotions=[], select_lang_metric="f1_macro", select_ds_metric="f1_macro"):
|
113 |
df = pd.read_json(str(abs_path / "results.jsonl"), lines=True)
|
114 |
lang_dict = build_lang_dict(df)
|
115 |
ds_dict = build_ds_dict(df)
|
116 |
emo_dict = build_emo_dict(df)
|
117 |
overall = overall_leaderboard(df)
|
118 |
+
by_lang = leaderboard_per_group(lang_dict, languages, metric=select_lang_metric)
|
119 |
+
by_dataset = leaderboard_per_group(ds_dict, datasets, metric=select_ds_metric)
|
120 |
+
by_emotion = leaderboard_per_group(emo_dict, emotions)
|
121 |
+
return overall, by_lang, by_dataset, by_emotion, "Loaded successfully."
|
122 |
|
123 |
demo.load(
|
124 |
update_leaderboards,
|
125 |
+
inputs=[languages_filter, dataset_filter, emo_filter],
|
126 |
outputs=[overall_table, lang_table, dataset_table, emotion_table, df_state]
|
127 |
)
|
128 |
+
|
129 |
+
def on_change(selected_languages, selected_lang_metric, selected_datasets, selected_ds_metric, selected_emotions):
|
130 |
+
return update_leaderboards(languages=selected_languages, select_lang_metric=selected_lang_metric, datasets=selected_datasets, select_ds_metric=selected_ds_metric, emotions=selected_emotions)
|
131 |
+
|
132 |
+
languages_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
|
133 |
+
[overall_table, lang_table, dataset_table, emotion_table])
|
134 |
+
|
135 |
+
select_lang_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
|
136 |
+
[overall_table, lang_table, dataset_table, emotion_table])
|
137 |
+
|
138 |
+
dataset_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
|
139 |
+
[overall_table, lang_table, dataset_table, emotion_table])
|
140 |
+
|
141 |
+
select_ds_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
|
142 |
+
[overall_table, lang_table, dataset_table, emotion_table])
|
143 |
+
|
144 |
+
emo_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
|
145 |
+
[overall_table, lang_table, dataset_table, emotion_table])
|
146 |
+
|
147 |
return demo
|
148 |
|
149 |
if __name__ == "__main__":
|
results.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|