rzanoli commited on
Commit
cb9f237
·
1 Parent(s): a43032b

Add Size field to the leaderboard

Browse files
Files changed (3) hide show
  1. app.py +33 -33
  2. src/display/utils.py +27 -2
  3. src/leaderboard/read_evals.py +15 -4
app.py CHANGED
@@ -117,7 +117,7 @@ def barplot_mean_few_minus_zero_shot(dataframe, tasks=None):
117
 
118
  def boxplot_per_task(dataframe=None, baselines=None):
119
 
120
- print(dataframe.columns)
121
 
122
  tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
123
 
@@ -399,7 +399,7 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
399
  sorted_dataframe = dataframe.sort_values(by="Avg. Comb. Perf. ⬆️", ascending=False)
400
 
401
  sorted_dataframe = sorted_dataframe.reset_index(drop=True)
402
- sorted_dataframe["rank"] = sorted_dataframe.index + 1
403
 
404
  # Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
405
  large_medal_fs_assigned = False
@@ -415,26 +415,26 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
415
 
416
  for _, row in sorted_dataframe.iterrows():
417
  if row['IS_FS']: # 5-Few-Shot
418
- if row["#Params (B)"] > 50 and not large_medal_fs_assigned:
419
- new_model_column.append(f"{row['Model']} 1️⃣0️⃣0️⃣🅱️🏆")
420
  large_medal_fs_assigned = True
421
- elif 10 < row["#Params (B)"] <= 50 and not medium_medal_fs_assigned:
422
- new_model_column.append(f"{row['Model']} 5️⃣0️⃣🅱️🏆")
423
  medium_medal_fs_assigned = True
424
- elif row["#Params (B)"] <= 10 and not small_medal_fs_assigned:
425
- new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🏆")
426
  small_medal_fs_assigned = True
427
  else:
428
  new_model_column.append(row["Model"])
429
  else: # 0-Shot
430
- if row["#Params (B)"] > 50 and not large_medal_0shot_assigned:
431
- new_model_column.append(f"{row['Model']} 1️⃣0️⃣0️⃣🅱️🎖️")
432
  large_medal_0shot_assigned = True
433
- elif 10 < row["#Params (B)"] <= 50 and not medium_medal_0shot_assigned:
434
- new_model_column.append(f"{row['Model']} 5️⃣0️⃣🅱️🎖️")
435
  medium_medal_0shot_assigned = True
436
- elif row["#Params (B)"] <= 10 and not small_medal_0shot_assigned:
437
- new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🎖️")
438
  small_medal_0shot_assigned = True
439
  else:
440
  new_model_column.append(row["Model"])
@@ -488,7 +488,7 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
488
 
489
  # aggiungo la colonna rank in base alla posizione
490
  sorted_dataframe = sorted_dataframe.reset_index(drop=True)
491
- sorted_dataframe["rank"] = sorted_dataframe.index + 1
492
 
493
  # Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
494
  large_medal_fs_assigned = False
@@ -504,26 +504,26 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
504
 
505
  for _, row in sorted_dataframe.iterrows():
506
  if row['IS_FS']: # 5-Few-Shot
507
- if row["#Params (B)"] > 30 and not large_medal_fs_assigned:
508
- new_model_column.append(f"{row['Model']} 1️⃣0️⃣0️⃣🅱️🏆")
509
  large_medal_fs_assigned = True
510
- elif 10 < row["#Params (B)"] <= 30 and not medium_medal_fs_assigned:
511
- new_model_column.append(f"{row['Model']} 5️⃣0️⃣🅱️🏆")
512
  medium_medal_fs_assigned = True
513
- elif row["#Params (B)"] <= 10 and not small_medal_fs_assigned:
514
- new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🏆")
515
  small_medal_fs_assigned = True
516
  else:
517
  new_model_column.append(row["Model"])
518
  else: # 0-Shot
519
- if row["#Params (B)"] > 30 and not large_medal_0shot_assigned:
520
- new_model_column.append(f"{row['Model']} 1️⃣0️⃣0️⃣🅱️🎖️")
521
  large_medal_0shot_assigned = True
522
- elif 10 < row["#Params (B)"] <= 30 and not medium_medal_0shot_assigned:
523
- new_model_column.append(f"{row['Model']} 5️⃣0️⃣🅱️🎖️")
524
  medium_medal_0shot_assigned = True
525
- elif row["#Params (B)"] <= 10 and not small_medal_0shot_assigned:
526
- new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🎖️")
527
  small_medal_0shot_assigned = True
528
  else:
529
  new_model_column.append(row["Model"])
@@ -646,8 +646,8 @@ with demo:
646
 
647
  leaderboard = init_leaderboard(
648
  LEADERBOARD_DF,
649
- default_selection=['rank', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
650
- hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['rank', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
651
  )
652
 
653
  gr.HTML(
@@ -693,8 +693,8 @@ with demo:
693
 
694
  leaderboard = update_task_leaderboard(
695
  LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average", f"{task} Prompt Std": "Prompt Std", f"{task} Best Prompt": "Best Prompt", f"{task} Best Prompt Id": "Best Prompt Id", task: "Combined Performance"}),
696
- default_selection=['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
697
- hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id']]
698
  )
699
 
700
  # About tab
@@ -713,10 +713,10 @@ with demo:
713
  f"{task} Best Prompt": "Best Prompt",
714
  f"{task} Best Prompt Id": "Best Prompt Id",
715
  task: "Combined Performance"}),
716
- default_selection=['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt',
717
  'Best Prompt Id'],
718
  hidden_columns=[col for col in LEADERBOARD_DF.columns if
719
- col not in ['rank', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std',
720
  'Best Prompt', 'Best Prompt Id']]
721
  )
722
 
 
117
 
118
  def boxplot_per_task(dataframe=None, baselines=None):
119
 
120
+ #print(dataframe.columns)
121
 
122
  tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
123
 
 
399
  sorted_dataframe = dataframe.sort_values(by="Avg. Comb. Perf. ⬆️", ascending=False)
400
 
401
  sorted_dataframe = sorted_dataframe.reset_index(drop=True)
402
+ sorted_dataframe["Rank"] = sorted_dataframe.index + 1
403
 
404
  # Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
405
  large_medal_fs_assigned = False
 
415
 
416
  for _, row in sorted_dataframe.iterrows():
417
  if row['IS_FS']: # 5-Few-Shot
418
+ if row["Size"] == "🔵🔵🔵" and not large_medal_fs_assigned:
419
+ new_model_column.append(f"{row['Model']} 🔵🔵🔵🏆")
420
  large_medal_fs_assigned = True
421
+ elif row["Size"] == "🔵🔵" and not medium_medal_fs_assigned:
422
+ new_model_column.append(f"{row['Model']} 🔵🔵🏆")
423
  medium_medal_fs_assigned = True
424
+ elif row["Size"] == "🔵" and not small_medal_fs_assigned:
425
+ new_model_column.append(f"{row['Model']} 🔵🏆")
426
  small_medal_fs_assigned = True
427
  else:
428
  new_model_column.append(row["Model"])
429
  else: # 0-Shot
430
+ if row["Size"] == "🔵🔵🔵" and not large_medal_0shot_assigned:
431
+ new_model_column.append(f"{row['Model']} 🔵🔵🔵🎖️")
432
  large_medal_0shot_assigned = True
433
+ elif row["Size"] == "🔵🔵" and not medium_medal_0shot_assigned:
434
+ new_model_column.append(f"{row['Model']} 🔵🔵🎖️")
435
  medium_medal_0shot_assigned = True
436
+ elif row["Size"] == "🔵" and not small_medal_0shot_assigned:
437
+ new_model_column.append(f"{row['Model']} 🔵🎖️")
438
  small_medal_0shot_assigned = True
439
  else:
440
  new_model_column.append(row["Model"])
 
488
 
489
  # aggiungo la colonna rank in base alla posizione
490
  sorted_dataframe = sorted_dataframe.reset_index(drop=True)
491
+ sorted_dataframe["Rank"] = sorted_dataframe.index + 1
492
 
493
  # Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
494
  large_medal_fs_assigned = False
 
504
 
505
  for _, row in sorted_dataframe.iterrows():
506
  if row['IS_FS']: # 5-Few-Shot
507
+ if row["Size"] == "🔵🔵🔵" and not large_medal_fs_assigned:
508
+ new_model_column.append(f"{row['Model']} 🔵🔵🔵🏆")
509
  large_medal_fs_assigned = True
510
+ elif row["Size"] == "🔵🔵" and not medium_medal_fs_assigned:
511
+ new_model_column.append(f"{row['Model']} 🔵🔵🏆")
512
  medium_medal_fs_assigned = True
513
+ elif row["Size"] == "🔵" and not small_medal_fs_assigned:
514
+ new_model_column.append(f"{row['Model']} 🔵🏆")
515
  small_medal_fs_assigned = True
516
  else:
517
  new_model_column.append(row["Model"])
518
  else: # 0-Shot
519
+ if row["Size"] == "🔵🔵🔵" and not large_medal_0shot_assigned:
520
+ new_model_column.append(f"{row['Model']} 🔵🔵🔵🎖️")
521
  large_medal_0shot_assigned = True
522
+ elif row["Size"] == "🔵🔵" and not medium_medal_0shot_assigned:
523
+ new_model_column.append(f"{row['Model']} 🔵🔵🎖️")
524
  medium_medal_0shot_assigned = True
525
+ elif row["Size"] == "🔵" and not small_medal_0shot_assigned:
526
+ new_model_column.append(f"{row['Model']} 🔵🎖️")
527
  small_medal_0shot_assigned = True
528
  else:
529
  new_model_column.append(row["Model"])
 
646
 
647
  leaderboard = init_leaderboard(
648
  LEADERBOARD_DF,
649
+ default_selection=['Rank', 'Size', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
650
+ hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
651
  )
652
 
653
  gr.HTML(
 
693
 
694
  leaderboard = update_task_leaderboard(
695
  LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average", f"{task} Prompt Std": "Prompt Std", f"{task} Best Prompt": "Best Prompt", f"{task} Best Prompt Id": "Best Prompt Id", task: "Combined Performance"}),
696
+ default_selection=['Rank', 'Size', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
697
+ hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id']]
698
  )
699
 
700
  # About tab
 
713
  f"{task} Best Prompt": "Best Prompt",
714
  f"{task} Best Prompt Id": "Best Prompt Id",
715
  task: "Combined Performance"}),
716
+ default_selection=['Rank', 'Size', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt',
717
  'Best Prompt Id'],
718
  hidden_columns=[col for col in LEADERBOARD_DF.columns if
719
+ col not in ['Rank', 'Size', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std',
720
  'Best Prompt', 'Best Prompt Id']]
721
  )
722
 
src/display/utils.py CHANGED
@@ -25,7 +25,8 @@ auto_eval_column_dict = []
25
  # Init
26
  #auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
 
28
- auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("rank", "str", True, never_hidden=True)])
 
29
 
30
  auto_eval_column_dict.append(["fewshot_symbol", ColumnContent, ColumnContent("FS", "str", True, never_hidden=True)])
31
  auto_eval_column_dict.append(["is_5fewshot", ColumnContent, ColumnContent("IS_FS", "bool", True)])
@@ -99,7 +100,7 @@ class FewShotDetails:
99
  symbol: str = "" # emoji
100
 
101
  class FewShotType(Enum):
102
- ZS = FewShotDetails(name="zero-shot", symbol="0️⃣")
103
  FS = FewShotDetails(name="5-few-shot", symbol="5️⃣")
104
  Unknown = FewShotDetails(name="unknown", symbol="❓")
105
 
@@ -115,6 +116,30 @@ class FewShotType(Enum):
115
  return FewShotType.FS
116
  return FewShotType.Unknown
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  class WeightType(Enum):
119
  Adapter = ModelDetails("Adapter")
120
  Original = ModelDetails("Original")
 
25
  # Init
26
  #auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
 
28
+ auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("Rank", "number", True, never_hidden=True)])
29
+ auto_eval_column_dict.append(["size_symbol", ColumnContent, ColumnContent("Size", "number", True, never_hidden=True)])
30
 
31
  auto_eval_column_dict.append(["fewshot_symbol", ColumnContent, ColumnContent("FS", "str", True, never_hidden=True)])
32
  auto_eval_column_dict.append(["is_5fewshot", ColumnContent, ColumnContent("IS_FS", "bool", True)])
 
100
  symbol: str = "" # emoji
101
 
102
  class FewShotType(Enum):
103
+ ZS = FewShotDetails(name="zero-shot", symbol="🅾️")
104
  FS = FewShotDetails(name="5-few-shot", symbol="5️⃣")
105
  Unknown = FewShotDetails(name="unknown", symbol="❓")
106
 
 
116
  return FewShotType.FS
117
  return FewShotType.Unknown
118
 
119
+ @dataclass
120
+ class SizeDetails:
121
+ name: str
122
+ symbol: str = "" # emoji
123
+
124
+ class SizeType(Enum):
125
+ SMALL = SizeDetails(name="small", symbol="🔵")
126
+ MEDIUM = SizeDetails(name="medium", symbol="🔵🔵")
127
+ LARGE = SizeDetails(name="large", symbol="🔵🔵🔵")
128
+ Unknown = SizeDetails(name="unknown", symbol="❓")
129
+
130
+ def to_str(self, separator=" "):
131
+ return f"{self.value.symbol}{separator}{self.value.name}"
132
+
133
+ @staticmethod
134
+ def num2type(size):
135
+ """Determines FewShotType based on num_fewshot."""
136
+ if size <= 10:
137
+ return SizeType.SMALL
138
+ elif size > 10 and size <= 50:
139
+ return SizeType.MEDIUM
140
+ else:
141
+ return SizeType.LARGE
142
+
143
  class WeightType(Enum):
144
  Adapter = ModelDetails("Adapter")
145
  Original = ModelDetails("Original")
src/leaderboard/read_evals.py CHANGED
@@ -11,7 +11,7 @@ from datetime import datetime
11
 
12
  #from get_model_info import num_params
13
  from src.display.formatting import make_clickable_model
14
- from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType, FewShotType
15
  from src.submission.check_validity import is_model_on_hub
16
 
17
 
@@ -36,7 +36,8 @@ class EvalResult:
36
  num_params: int = 0
37
  date: str = "" # submission date of request file
38
  still_on_hub: bool = False
39
- rank: int = field(default=0) # 👈 nuovo campo con default = 0
 
40
 
41
  @classmethod
42
  def init_from_json_file(self, json_filepath):
@@ -52,6 +53,8 @@ class EvalResult:
52
  # Get number of fewshot
53
  fewshot = config.get("num_fewshot", False)
54
 
 
 
55
  try:
56
  if fewshot == "5":
57
  is_5fewshot = True
@@ -68,6 +71,8 @@ class EvalResult:
68
  if num_params_billion is not None:
69
  num_params = math.ceil(num_params_billion)
70
 
 
 
71
  # Get model and org
72
  org_and_model = config.get("model_name", config.get("model_args", None))
73
  org_and_model = org_and_model.split("/", 1)
@@ -121,7 +126,8 @@ class EvalResult:
121
  still_on_hub=still_on_hub,
122
  architecture=architecture,
123
  num_params=num_params,
124
- rank = 0,
 
125
  #submitted_time=config.get("submitted_time", ""),
126
  )
127
 
@@ -151,6 +157,10 @@ class EvalResult:
151
  self.fewshot_symbol.value.symbol if isinstance(self.fewshot_symbol, FewShotType) else "❓"
152
  )
153
 
 
 
 
 
154
  data_dict = {
155
  "eval_name": self.eval_name, # not a column, just a save name,
156
  #AutoEvalColumn.precision.name: self.precision.value.name,
@@ -169,7 +179,8 @@ class EvalResult:
169
  AutoEvalColumn.likes.name: self.likes,
170
  AutoEvalColumn.params.name: self.num_params,
171
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
172
- "rank": self.rank
 
173
  }
174
 
175
  for task in Tasks:
 
11
 
12
  #from get_model_info import num_params
13
  from src.display.formatting import make_clickable_model
14
+ from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType, FewShotType, SizeType
15
  from src.submission.check_validity import is_model_on_hub
16
 
17
 
 
36
  num_params: int = 0
37
  date: str = "" # submission date of request file
38
  still_on_hub: bool = False
39
+ rank: int = 0#str = field(default=0) # nuovo campo con default = 0
40
+ size_symbol: SizeType = SizeType.Unknown
41
 
42
  @classmethod
43
  def init_from_json_file(self, json_filepath):
 
53
  # Get number of fewshot
54
  fewshot = config.get("num_fewshot", False)
55
 
56
+ rank = 0
57
+
58
  try:
59
  if fewshot == "5":
60
  is_5fewshot = True
 
71
  if num_params_billion is not None:
72
  num_params = math.ceil(num_params_billion)
73
 
74
+ size_symbol = SizeType.num2type(num_params)
75
+
76
  # Get model and org
77
  org_and_model = config.get("model_name", config.get("model_args", None))
78
  org_and_model = org_and_model.split("/", 1)
 
126
  still_on_hub=still_on_hub,
127
  architecture=architecture,
128
  num_params=num_params,
129
+ rank = rank,
130
+ size_symbol=size_symbol
131
  #submitted_time=config.get("submitted_time", ""),
132
  )
133
 
 
157
  self.fewshot_symbol.value.symbol if isinstance(self.fewshot_symbol, FewShotType) else "❓"
158
  )
159
 
160
+ size_symbol = (
161
+ self.size_symbol.value.symbol if isinstance(self.size_symbol, SizeType) else "❓"
162
+ )
163
+
164
  data_dict = {
165
  "eval_name": self.eval_name, # not a column, just a save name,
166
  #AutoEvalColumn.precision.name: self.precision.value.name,
 
179
  AutoEvalColumn.likes.name: self.likes,
180
  AutoEvalColumn.params.name: self.num_params,
181
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
182
+ AutoEvalColumn.rank.name: self.rank,
183
+ AutoEvalColumn.size_symbol.name: size_symbol
184
  }
185
 
186
  for task in Tasks: