Maciej commited on
Commit
4f08cdd
Β·
1 Parent(s): 35b8093

Add filters to tabs

Browse files
Files changed (2) hide show
  1. app.py +45 -13
  2. results.jsonl +0 -0
app.py CHANGED
@@ -8,7 +8,7 @@ abs_path = Path(__file__).parent
8
 
9
 
10
  def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"):
11
- df = df[df["language"] == "all"]
12
  df = df[["model", "temperature", "f1_macro", "weighted_f1", "accuracy"]]
13
  df = df.sort_values(by=sort_column, ascending=False)
14
  df.insert(0, "Rank", range(1, len(df) + 1))
@@ -40,17 +40,17 @@ def build_ds_dict(df: pd.DataFrame):
40
 
41
 
42
  def build_emo_dict(df: pd.DataFrame):
43
- df = df[df["language"] == "all"]
44
  emo_data = defaultdict(lambda: defaultdict(dict))
45
  emotions = df.iloc[0].metrics_per_label.keys() - ["accuracy", "macro avg", "weighted avg"]
46
  for row in df.itertuples():
47
  for emotion in emotions:
48
  emo_data[row.model][row.temperature][emotion] = row.metrics_per_label[emotion].get("f1-score")
49
- emo_data[row.model][row.temperature]["all"] = row.f1_macro
50
  return emo_data
51
 
52
 
53
- def leaderboard_per_group(lang_dict, metric: str = "f1_macro"):
54
  df = []
55
  for model, inner in lang_dict.items():
56
  for temperature, metrics in inner.items():
@@ -69,8 +69,9 @@ def leaderboard_per_group(lang_dict, metric: str = "f1_macro"):
69
  for col in df.columns.difference(["model", "temperature"]):
70
  df[col] = df[col].round(4)
71
 
72
- df = df[["model", "temperature", "all"] + sorted(df.columns.difference(["model", "temperature", "all"]))]
73
- df = df.sort_values(by="all", ascending=False)
 
74
  df.insert(0, "Rank", range(1, len(df) + 1))
75
 
76
  return df
@@ -80,38 +81,69 @@ def app():
80
  with gr.Blocks() as demo:
81
  gr.Markdown("# πŸ† Leaderboard Viewer")
82
 
 
 
 
 
 
 
 
 
83
  with gr.Tabs():
84
  with gr.Tab("Overall Results"):
85
  overall_table = gr.Dataframe()
86
 
87
  with gr.Tab("Results per Language"):
 
 
88
  lang_table = gr.Dataframe()
89
 
90
  with gr.Tab("Results per Dataset"):
 
 
91
  dataset_table = gr.Dataframe()
92
 
93
  with gr.Tab("Results per Emotion"):
 
94
  emotion_table = gr.Dataframe()
95
 
96
  df_state = gr.State()
97
 
98
- def update_leaderboards(select_lang_metric="f1_macro", select_ds_metric="f1_macro"):
99
  df = pd.read_json(str(abs_path / "results.jsonl"), lines=True)
100
  lang_dict = build_lang_dict(df)
101
  ds_dict = build_ds_dict(df)
102
  emo_dict = build_emo_dict(df)
103
  overall = overall_leaderboard(df)
104
- by_lang = leaderboard_per_group(lang_dict, metric=select_lang_metric)
105
- by_dataset = leaderboard_per_group(ds_dict, metric=select_ds_metric)
106
- by_emotion = leaderboard_per_group(emo_dict)
107
- return overall, by_lang, by_dataset, by_emotion, "Loaded successfully.", df
108
 
109
  demo.load(
110
  update_leaderboards,
111
- inputs=[],
112
  outputs=[overall_table, lang_table, dataset_table, emotion_table, df_state]
113
  )
114
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  return demo
116
 
117
  if __name__ == "__main__":
 
8
 
9
 
10
  def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"):
11
+ df = df[df["language"] == "All"]
12
  df = df[["model", "temperature", "f1_macro", "weighted_f1", "accuracy"]]
13
  df = df.sort_values(by=sort_column, ascending=False)
14
  df.insert(0, "Rank", range(1, len(df) + 1))
 
40
 
41
 
42
  def build_emo_dict(df: pd.DataFrame):
43
+ df = df[df["language"] == "All"]
44
  emo_data = defaultdict(lambda: defaultdict(dict))
45
  emotions = df.iloc[0].metrics_per_label.keys() - ["accuracy", "macro avg", "weighted avg"]
46
  for row in df.itertuples():
47
  for emotion in emotions:
48
  emo_data[row.model][row.temperature][emotion] = row.metrics_per_label[emotion].get("f1-score")
49
+ emo_data[row.model][row.temperature]["All"] = row.f1_macro
50
  return emo_data
51
 
52
 
53
+ def leaderboard_per_group(lang_dict, use_cols, metric: str = "f1_macro"):
54
  df = []
55
  for model, inner in lang_dict.items():
56
  for temperature, metrics in inner.items():
 
69
  for col in df.columns.difference(["model", "temperature"]):
70
  df[col] = df[col].round(4)
71
 
72
+ df = df[["model", "temperature"] + sorted(use_cols)]
73
+ if "All" in use_cols:
74
+ df = df.sort_values(by="All", ascending=False)
75
  df.insert(0, "Rank", range(1, len(df) + 1))
76
 
77
  return df
 
81
  with gr.Blocks() as demo:
82
  gr.Markdown("# πŸ† Leaderboard Viewer")
83
 
84
+ languages = ['All', 'Bengali', 'English', 'French', 'German', 'Italian', 'Polish', 'Russian', 'Spanish']
85
+ datasets = ['All', 'CaFE', 'CREMA-D', 'EMNS', 'Emozionalmente', 'eNTERFACE', 'JL-Corpus', 'MESD', 'nEMO', 'Oreau', 'PAVOQUE', 'RAVDESS', 'RESD', 'SUBESCO']
86
+ emotions = ['All', 'anger', 'anxiety',
87
+ 'apology', 'assertiveness', 'calm', 'concern', 'disgust',
88
+ 'encouragement', 'enthusiasm', 'excitement', 'fear', 'happiness',
89
+ 'neutral', 'poker', 'sadness', 'sarcasm', 'surprise']
90
+ metric=["f1_macro", "accuracy", "weighted_f1"]
91
+
92
  with gr.Tabs():
93
  with gr.Tab("Overall Results"):
94
  overall_table = gr.Dataframe()
95
 
96
  with gr.Tab("Results per Language"):
97
+ languages_filter = gr.CheckboxGroup(choices=languages, label="Filter by Language", value=languages)
98
+ select_lang_metric = gr.Radio(metric, value='f1_macro', label="Metric")
99
  lang_table = gr.Dataframe()
100
 
101
  with gr.Tab("Results per Dataset"):
102
+ dataset_filter = gr.CheckboxGroup(choices=datasets, label="Filter by Dataset", value=datasets)
103
+ select_ds_metric = gr.Radio(metric, value='f1_macro', label="Metric")
104
  dataset_table = gr.Dataframe()
105
 
106
  with gr.Tab("Results per Emotion"):
107
+ emo_filter = gr.CheckboxGroup(choices=emotions, label="Filter by Emotion", value=emotions)
108
  emotion_table = gr.Dataframe()
109
 
110
  df_state = gr.State()
111
 
112
+ def update_leaderboards(languages=[], datasets=[], emotions=[], select_lang_metric="f1_macro", select_ds_metric="f1_macro"):
113
  df = pd.read_json(str(abs_path / "results.jsonl"), lines=True)
114
  lang_dict = build_lang_dict(df)
115
  ds_dict = build_ds_dict(df)
116
  emo_dict = build_emo_dict(df)
117
  overall = overall_leaderboard(df)
118
+ by_lang = leaderboard_per_group(lang_dict, languages, metric=select_lang_metric)
119
+ by_dataset = leaderboard_per_group(ds_dict, datasets, metric=select_ds_metric)
120
+ by_emotion = leaderboard_per_group(emo_dict, emotions)
121
+ return overall, by_lang, by_dataset, by_emotion, "Loaded successfully."
122
 
123
  demo.load(
124
  update_leaderboards,
125
+ inputs=[languages_filter, dataset_filter, emo_filter],
126
  outputs=[overall_table, lang_table, dataset_table, emotion_table, df_state]
127
  )
128
+
129
+ def on_change(selected_languages, selected_lang_metric, selected_datasets, selected_ds_metric, selected_emotions):
130
+ return update_leaderboards(languages=selected_languages, select_lang_metric=selected_lang_metric, datasets=selected_datasets, select_ds_metric=selected_ds_metric, emotions=selected_emotions)
131
+
132
+ languages_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
133
+ [overall_table, lang_table, dataset_table, emotion_table])
134
+
135
+ select_lang_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
136
+ [overall_table, lang_table, dataset_table, emotion_table])
137
+
138
+ dataset_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
139
+ [overall_table, lang_table, dataset_table, emotion_table])
140
+
141
+ select_ds_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
142
+ [overall_table, lang_table, dataset_table, emotion_table])
143
+
144
+ emo_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
145
+ [overall_table, lang_table, dataset_table, emotion_table])
146
+
147
  return demo
148
 
149
  if __name__ == "__main__":
results.jsonl CHANGED
The diff for this file is too large to render. See raw diff