Add Size field to the leaderboard
Browse files- app.py +33 -33
- src/display/utils.py +27 -2
- src/leaderboard/read_evals.py +15 -4
app.py
CHANGED
@@ -117,7 +117,7 @@ def barplot_mean_few_minus_zero_shot(dataframe, tasks=None):
|
|
117 |
|
118 |
def boxplot_per_task(dataframe=None, baselines=None):
|
119 |
|
120 |
-
print(dataframe.columns)
|
121 |
|
122 |
tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
|
123 |
|
@@ -399,7 +399,7 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
|
|
399 |
sorted_dataframe = dataframe.sort_values(by="Avg. Comb. Perf. ⬆️", ascending=False)
|
400 |
|
401 |
sorted_dataframe = sorted_dataframe.reset_index(drop=True)
|
402 |
-
sorted_dataframe["
|
403 |
|
404 |
# Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
|
405 |
large_medal_fs_assigned = False
|
@@ -415,26 +415,26 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
|
|
415 |
|
416 |
for _, row in sorted_dataframe.iterrows():
|
417 |
if row['IS_FS']: # 5-Few-Shot
|
418 |
-
if row["
|
419 |
-
new_model_column.append(f"{row['Model']}
|
420 |
large_medal_fs_assigned = True
|
421 |
-
elif
|
422 |
-
new_model_column.append(f"{row['Model']}
|
423 |
medium_medal_fs_assigned = True
|
424 |
-
elif row["
|
425 |
-
new_model_column.append(f"{row['Model']}
|
426 |
small_medal_fs_assigned = True
|
427 |
else:
|
428 |
new_model_column.append(row["Model"])
|
429 |
else: # 0-Shot
|
430 |
-
if row["
|
431 |
-
new_model_column.append(f"{row['Model']}
|
432 |
large_medal_0shot_assigned = True
|
433 |
-
elif
|
434 |
-
new_model_column.append(f"{row['Model']}
|
435 |
medium_medal_0shot_assigned = True
|
436 |
-
elif row["
|
437 |
-
new_model_column.append(f"{row['Model']}
|
438 |
small_medal_0shot_assigned = True
|
439 |
else:
|
440 |
new_model_column.append(row["Model"])
|
@@ -488,7 +488,7 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
|
|
488 |
|
489 |
# aggiungo la colonna rank in base alla posizione
|
490 |
sorted_dataframe = sorted_dataframe.reset_index(drop=True)
|
491 |
-
sorted_dataframe["
|
492 |
|
493 |
# Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
|
494 |
large_medal_fs_assigned = False
|
@@ -504,26 +504,26 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
|
|
504 |
|
505 |
for _, row in sorted_dataframe.iterrows():
|
506 |
if row['IS_FS']: # 5-Few-Shot
|
507 |
-
if row["
|
508 |
-
new_model_column.append(f"{row['Model']}
|
509 |
large_medal_fs_assigned = True
|
510 |
-
elif
|
511 |
-
new_model_column.append(f"{row['Model']}
|
512 |
medium_medal_fs_assigned = True
|
513 |
-
elif row["
|
514 |
-
new_model_column.append(f"{row['Model']}
|
515 |
small_medal_fs_assigned = True
|
516 |
else:
|
517 |
new_model_column.append(row["Model"])
|
518 |
else: # 0-Shot
|
519 |
-
if row["
|
520 |
-
new_model_column.append(f"{row['Model']}
|
521 |
large_medal_0shot_assigned = True
|
522 |
-
elif
|
523 |
-
new_model_column.append(f"{row['Model']}
|
524 |
medium_medal_0shot_assigned = True
|
525 |
-
elif row["
|
526 |
-
new_model_column.append(f"{row['Model']}
|
527 |
small_medal_0shot_assigned = True
|
528 |
else:
|
529 |
new_model_column.append(row["Model"])
|
@@ -646,8 +646,8 @@ with demo:
|
|
646 |
|
647 |
leaderboard = init_leaderboard(
|
648 |
LEADERBOARD_DF,
|
649 |
-
default_selection=['
|
650 |
-
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['
|
651 |
)
|
652 |
|
653 |
gr.HTML(
|
@@ -693,8 +693,8 @@ with demo:
|
|
693 |
|
694 |
leaderboard = update_task_leaderboard(
|
695 |
LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average", f"{task} Prompt Std": "Prompt Std", f"{task} Best Prompt": "Best Prompt", f"{task} Best Prompt Id": "Best Prompt Id", task: "Combined Performance"}),
|
696 |
-
default_selection=['
|
697 |
-
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['
|
698 |
)
|
699 |
|
700 |
# About tab
|
@@ -713,10 +713,10 @@ with demo:
|
|
713 |
f"{task} Best Prompt": "Best Prompt",
|
714 |
f"{task} Best Prompt Id": "Best Prompt Id",
|
715 |
task: "Combined Performance"}),
|
716 |
-
default_selection=['
|
717 |
'Best Prompt Id'],
|
718 |
hidden_columns=[col for col in LEADERBOARD_DF.columns if
|
719 |
-
col not in ['
|
720 |
'Best Prompt', 'Best Prompt Id']]
|
721 |
)
|
722 |
|
|
|
117 |
|
118 |
def boxplot_per_task(dataframe=None, baselines=None):
|
119 |
|
120 |
+
#print(dataframe.columns)
|
121 |
|
122 |
tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
|
123 |
|
|
|
399 |
sorted_dataframe = dataframe.sort_values(by="Avg. Comb. Perf. ⬆️", ascending=False)
|
400 |
|
401 |
sorted_dataframe = sorted_dataframe.reset_index(drop=True)
|
402 |
+
sorted_dataframe["Rank"] = sorted_dataframe.index + 1
|
403 |
|
404 |
# Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
|
405 |
large_medal_fs_assigned = False
|
|
|
415 |
|
416 |
for _, row in sorted_dataframe.iterrows():
|
417 |
if row['IS_FS']: # 5-Few-Shot
|
418 |
+
if row["Size"] == "🔵🔵🔵" and not large_medal_fs_assigned:
|
419 |
+
new_model_column.append(f"{row['Model']} 🔵🔵🔵🏆")
|
420 |
large_medal_fs_assigned = True
|
421 |
+
elif row["Size"] == "🔵🔵" and not medium_medal_fs_assigned:
|
422 |
+
new_model_column.append(f"{row['Model']} 🔵🔵🏆")
|
423 |
medium_medal_fs_assigned = True
|
424 |
+
elif row["Size"] == "🔵" and not small_medal_fs_assigned:
|
425 |
+
new_model_column.append(f"{row['Model']} 🔵🏆")
|
426 |
small_medal_fs_assigned = True
|
427 |
else:
|
428 |
new_model_column.append(row["Model"])
|
429 |
else: # 0-Shot
|
430 |
+
if row["Size"] == "🔵🔵🔵" and not large_medal_0shot_assigned:
|
431 |
+
new_model_column.append(f"{row['Model']} 🔵🔵🔵🎖️")
|
432 |
large_medal_0shot_assigned = True
|
433 |
+
elif row["Size"] == "🔵🔵" and not medium_medal_0shot_assigned:
|
434 |
+
new_model_column.append(f"{row['Model']} 🔵🔵🎖️")
|
435 |
medium_medal_0shot_assigned = True
|
436 |
+
elif row["Size"] == "🔵" and not small_medal_0shot_assigned:
|
437 |
+
new_model_column.append(f"{row['Model']} 🔵🎖️")
|
438 |
small_medal_0shot_assigned = True
|
439 |
else:
|
440 |
new_model_column.append(row["Model"])
|
|
|
488 |
|
489 |
# aggiungo la colonna rank in base alla posizione
|
490 |
sorted_dataframe = sorted_dataframe.reset_index(drop=True)
|
491 |
+
sorted_dataframe["Rank"] = sorted_dataframe.index + 1
|
492 |
|
493 |
# Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
|
494 |
large_medal_fs_assigned = False
|
|
|
504 |
|
505 |
for _, row in sorted_dataframe.iterrows():
|
506 |
if row['IS_FS']: # 5-Few-Shot
|
507 |
+
if row["Size"] == "🔵🔵🔵" and not large_medal_fs_assigned:
|
508 |
+
new_model_column.append(f"{row['Model']} 🔵🔵🔵🏆")
|
509 |
large_medal_fs_assigned = True
|
510 |
+
elif row["Size"] == "🔵🔵" and not medium_medal_fs_assigned:
|
511 |
+
new_model_column.append(f"{row['Model']} 🔵🔵🏆")
|
512 |
medium_medal_fs_assigned = True
|
513 |
+
elif row["Size"] == "🔵" and not small_medal_fs_assigned:
|
514 |
+
new_model_column.append(f"{row['Model']} 🔵🏆")
|
515 |
small_medal_fs_assigned = True
|
516 |
else:
|
517 |
new_model_column.append(row["Model"])
|
518 |
else: # 0-Shot
|
519 |
+
if row["Size"] == "🔵🔵🔵" and not large_medal_0shot_assigned:
|
520 |
+
new_model_column.append(f"{row['Model']} 🔵🔵🔵🎖️")
|
521 |
large_medal_0shot_assigned = True
|
522 |
+
elif row["Size"] == "🔵🔵" and not medium_medal_0shot_assigned:
|
523 |
+
new_model_column.append(f"{row['Model']} 🔵🔵🎖️")
|
524 |
medium_medal_0shot_assigned = True
|
525 |
+
elif row["Size"] == "🔵" and not small_medal_0shot_assigned:
|
526 |
+
new_model_column.append(f"{row['Model']} 🔵🎖️")
|
527 |
small_medal_0shot_assigned = True
|
528 |
else:
|
529 |
new_model_column.append(row["Model"])
|
|
|
646 |
|
647 |
leaderboard = init_leaderboard(
|
648 |
LEADERBOARD_DF,
|
649 |
+
default_selection=['Rank', 'Size', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
|
650 |
+
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
|
651 |
)
|
652 |
|
653 |
gr.HTML(
|
|
|
693 |
|
694 |
leaderboard = update_task_leaderboard(
|
695 |
LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average", f"{task} Prompt Std": "Prompt Std", f"{task} Best Prompt": "Best Prompt", f"{task} Best Prompt Id": "Best Prompt Id", task: "Combined Performance"}),
|
696 |
+
default_selection=['Rank', 'Size', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
|
697 |
+
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id']]
|
698 |
)
|
699 |
|
700 |
# About tab
|
|
|
713 |
f"{task} Best Prompt": "Best Prompt",
|
714 |
f"{task} Best Prompt Id": "Best Prompt Id",
|
715 |
task: "Combined Performance"}),
|
716 |
+
default_selection=['Rank', 'Size', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt',
|
717 |
'Best Prompt Id'],
|
718 |
hidden_columns=[col for col in LEADERBOARD_DF.columns if
|
719 |
+
col not in ['Rank', 'Size', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std',
|
720 |
'Best Prompt', 'Best Prompt Id']]
|
721 |
)
|
722 |
|
src/display/utils.py
CHANGED
@@ -25,7 +25,8 @@ auto_eval_column_dict = []
|
|
25 |
# Init
|
26 |
#auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
27 |
|
28 |
-
auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("
|
|
|
29 |
|
30 |
auto_eval_column_dict.append(["fewshot_symbol", ColumnContent, ColumnContent("FS", "str", True, never_hidden=True)])
|
31 |
auto_eval_column_dict.append(["is_5fewshot", ColumnContent, ColumnContent("IS_FS", "bool", True)])
|
@@ -99,7 +100,7 @@ class FewShotDetails:
|
|
99 |
symbol: str = "" # emoji
|
100 |
|
101 |
class FewShotType(Enum):
|
102 |
-
ZS = FewShotDetails(name="zero-shot", symbol="
|
103 |
FS = FewShotDetails(name="5-few-shot", symbol="5️⃣")
|
104 |
Unknown = FewShotDetails(name="unknown", symbol="❓")
|
105 |
|
@@ -115,6 +116,30 @@ class FewShotType(Enum):
|
|
115 |
return FewShotType.FS
|
116 |
return FewShotType.Unknown
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
class WeightType(Enum):
|
119 |
Adapter = ModelDetails("Adapter")
|
120 |
Original = ModelDetails("Original")
|
|
|
25 |
# Init
|
26 |
#auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
27 |
|
28 |
+
auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("Rank", "number", True, never_hidden=True)])
|
29 |
+
auto_eval_column_dict.append(["size_symbol", ColumnContent, ColumnContent("Size", "number", True, never_hidden=True)])
|
30 |
|
31 |
auto_eval_column_dict.append(["fewshot_symbol", ColumnContent, ColumnContent("FS", "str", True, never_hidden=True)])
|
32 |
auto_eval_column_dict.append(["is_5fewshot", ColumnContent, ColumnContent("IS_FS", "bool", True)])
|
|
|
100 |
symbol: str = "" # emoji
|
101 |
|
102 |
class FewShotType(Enum):
|
103 |
+
ZS = FewShotDetails(name="zero-shot", symbol="🅾️")
|
104 |
FS = FewShotDetails(name="5-few-shot", symbol="5️⃣")
|
105 |
Unknown = FewShotDetails(name="unknown", symbol="❓")
|
106 |
|
|
|
116 |
return FewShotType.FS
|
117 |
return FewShotType.Unknown
|
118 |
|
119 |
+
@dataclass
|
120 |
+
class SizeDetails:
|
121 |
+
name: str
|
122 |
+
symbol: str = "" # emoji
|
123 |
+
|
124 |
+
class SizeType(Enum):
|
125 |
+
SMALL = SizeDetails(name="small", symbol="🔵")
|
126 |
+
MEDIUM = SizeDetails(name="medium", symbol="🔵🔵")
|
127 |
+
LARGE = SizeDetails(name="large", symbol="🔵🔵🔵")
|
128 |
+
Unknown = SizeDetails(name="unknown", symbol="❓")
|
129 |
+
|
130 |
+
def to_str(self, separator=" "):
|
131 |
+
return f"{self.value.symbol}{separator}{self.value.name}"
|
132 |
+
|
133 |
+
@staticmethod
|
134 |
+
def num2type(size):
|
135 |
+
"""Determines FewShotType based on num_fewshot."""
|
136 |
+
if size <= 10:
|
137 |
+
return SizeType.SMALL
|
138 |
+
elif size > 10 and size <= 50:
|
139 |
+
return SizeType.MEDIUM
|
140 |
+
else:
|
141 |
+
return SizeType.LARGE
|
142 |
+
|
143 |
class WeightType(Enum):
|
144 |
Adapter = ModelDetails("Adapter")
|
145 |
Original = ModelDetails("Original")
|
src/leaderboard/read_evals.py
CHANGED
@@ -11,7 +11,7 @@ from datetime import datetime
|
|
11 |
|
12 |
#from get_model_info import num_params
|
13 |
from src.display.formatting import make_clickable_model
|
14 |
-
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType, FewShotType
|
15 |
from src.submission.check_validity import is_model_on_hub
|
16 |
|
17 |
|
@@ -36,7 +36,8 @@ class EvalResult:
|
|
36 |
num_params: int = 0
|
37 |
date: str = "" # submission date of request file
|
38 |
still_on_hub: bool = False
|
39 |
-
rank: int = field(default=0) #
|
|
|
40 |
|
41 |
@classmethod
|
42 |
def init_from_json_file(self, json_filepath):
|
@@ -52,6 +53,8 @@ class EvalResult:
|
|
52 |
# Get number of fewshot
|
53 |
fewshot = config.get("num_fewshot", False)
|
54 |
|
|
|
|
|
55 |
try:
|
56 |
if fewshot == "5":
|
57 |
is_5fewshot = True
|
@@ -68,6 +71,8 @@ class EvalResult:
|
|
68 |
if num_params_billion is not None:
|
69 |
num_params = math.ceil(num_params_billion)
|
70 |
|
|
|
|
|
71 |
# Get model and org
|
72 |
org_and_model = config.get("model_name", config.get("model_args", None))
|
73 |
org_and_model = org_and_model.split("/", 1)
|
@@ -121,7 +126,8 @@ class EvalResult:
|
|
121 |
still_on_hub=still_on_hub,
|
122 |
architecture=architecture,
|
123 |
num_params=num_params,
|
124 |
-
rank =
|
|
|
125 |
#submitted_time=config.get("submitted_time", ""),
|
126 |
)
|
127 |
|
@@ -151,6 +157,10 @@ class EvalResult:
|
|
151 |
self.fewshot_symbol.value.symbol if isinstance(self.fewshot_symbol, FewShotType) else "❓"
|
152 |
)
|
153 |
|
|
|
|
|
|
|
|
|
154 |
data_dict = {
|
155 |
"eval_name": self.eval_name, # not a column, just a save name,
|
156 |
#AutoEvalColumn.precision.name: self.precision.value.name,
|
@@ -169,7 +179,8 @@ class EvalResult:
|
|
169 |
AutoEvalColumn.likes.name: self.likes,
|
170 |
AutoEvalColumn.params.name: self.num_params,
|
171 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
172 |
-
|
|
|
173 |
}
|
174 |
|
175 |
for task in Tasks:
|
|
|
11 |
|
12 |
#from get_model_info import num_params
|
13 |
from src.display.formatting import make_clickable_model
|
14 |
+
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType, FewShotType, SizeType
|
15 |
from src.submission.check_validity import is_model_on_hub
|
16 |
|
17 |
|
|
|
36 |
num_params: int = 0
|
37 |
date: str = "" # submission date of request file
|
38 |
still_on_hub: bool = False
|
39 |
+
rank: int = 0#str = field(default=0) # nuovo campo con default = 0
|
40 |
+
size_symbol: SizeType = SizeType.Unknown
|
41 |
|
42 |
@classmethod
|
43 |
def init_from_json_file(self, json_filepath):
|
|
|
53 |
# Get number of fewshot
|
54 |
fewshot = config.get("num_fewshot", False)
|
55 |
|
56 |
+
rank = 0
|
57 |
+
|
58 |
try:
|
59 |
if fewshot == "5":
|
60 |
is_5fewshot = True
|
|
|
71 |
if num_params_billion is not None:
|
72 |
num_params = math.ceil(num_params_billion)
|
73 |
|
74 |
+
size_symbol = SizeType.num2type(num_params)
|
75 |
+
|
76 |
# Get model and org
|
77 |
org_and_model = config.get("model_name", config.get("model_args", None))
|
78 |
org_and_model = org_and_model.split("/", 1)
|
|
|
126 |
still_on_hub=still_on_hub,
|
127 |
architecture=architecture,
|
128 |
num_params=num_params,
|
129 |
+
rank = rank,
|
130 |
+
size_symbol=size_symbol
|
131 |
#submitted_time=config.get("submitted_time", ""),
|
132 |
)
|
133 |
|
|
|
157 |
self.fewshot_symbol.value.symbol if isinstance(self.fewshot_symbol, FewShotType) else "❓"
|
158 |
)
|
159 |
|
160 |
+
size_symbol = (
|
161 |
+
self.size_symbol.value.symbol if isinstance(self.size_symbol, SizeType) else "❓"
|
162 |
+
)
|
163 |
+
|
164 |
data_dict = {
|
165 |
"eval_name": self.eval_name, # not a column, just a save name,
|
166 |
#AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
179 |
AutoEvalColumn.likes.name: self.likes,
|
180 |
AutoEvalColumn.params.name: self.num_params,
|
181 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
182 |
+
AutoEvalColumn.rank.name: self.rank,
|
183 |
+
AutoEvalColumn.size_symbol.name: size_symbol
|
184 |
}
|
185 |
|
186 |
for task in Tasks:
|