rename to benchmark score
Browse files- src/display/utils.py +1 -1
- src/leaderboard/read_evals.py +1 -1
- src/populate.py +1 -1
src/display/utils.py
CHANGED
|
@@ -32,7 +32,7 @@ auto_eval_column_dict.append(["model_category", ColumnContent, ColumnContent("Ca
|
|
| 32 |
#auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 33 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 34 |
#Scores
|
| 35 |
-
auto_eval_column_dict.append(["
|
| 36 |
for eval_dim in EvalDimensions:
|
| 37 |
auto_eval_column_dict.append([eval_dim.name, ColumnContent, ColumnContent(eval_dim.value.col_name, "number", True)])
|
| 38 |
# Model information
|
|
|
|
| 32 |
#auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 33 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 34 |
#Scores
|
| 35 |
+
auto_eval_column_dict.append(["average_score", ColumnContent, ColumnContent("Benchmark Score", "number", True)])
|
| 36 |
for eval_dim in EvalDimensions:
|
| 37 |
auto_eval_column_dict.append([eval_dim.name, ColumnContent, ColumnContent(eval_dim.value.col_name, "number", True)])
|
| 38 |
# Model information
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -127,7 +127,7 @@ class EvalResult:
|
|
| 127 |
#AutoEvalColumn.architecture.name: self.architecture,
|
| 128 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 129 |
#AutoEvalColumn.revision.name: self.revision,
|
| 130 |
-
AutoEvalColumn.
|
| 131 |
AutoEvalColumn.license.name: self.license,
|
| 132 |
AutoEvalColumn.likes.name: self.likes,
|
| 133 |
AutoEvalColumn.params.name: self.num_params,
|
|
|
|
| 127 |
#AutoEvalColumn.architecture.name: self.architecture,
|
| 128 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 129 |
#AutoEvalColumn.revision.name: self.revision,
|
| 130 |
+
AutoEvalColumn.average_score.name: average_score,
|
| 131 |
AutoEvalColumn.license.name: self.license,
|
| 132 |
AutoEvalColumn.likes.name: self.likes,
|
| 133 |
AutoEvalColumn.params.name: self.num_params,
|
src/populate.py
CHANGED
|
@@ -16,7 +16,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
|
| 18 |
if not df.empty:
|
| 19 |
-
df = df.sort_values(by=[AutoEvalColumn.
|
| 20 |
|
| 21 |
|
| 22 |
# filter out if any of the benchmarks have not been produced
|
|
|
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
|
| 18 |
if not df.empty:
|
| 19 |
+
df = df.sort_values(by=[AutoEvalColumn.average_score.name], ascending=False)
|
| 20 |
|
| 21 |
|
| 22 |
# filter out if any of the benchmarks have not been produced
|