Alexis Gobé commited on
Commit
71fffd4
·
1 Parent(s): 4a5f8bc

Change GPQA metric

Browse files
backend/app/services/leaderboard.py CHANGED
@@ -91,7 +91,7 @@ class Task:
91
  col_name: str
92
 
93
  class Tasks(Enum):
94
- task0 = Task("community|gpqa-fr|0", "acc", "GPQA-fr") # On pourrait vouloir mettre "Connaissances"
95
  task1 = Task("community|ifeval-fr|0", "prompt_level_strict_acc", "IFEval-fr") # FIXME norm_acc should be acc # et "Suivi d'instructions"
96
  task2 = Task("community|bac-fr|0", "bac-fr-qem", "bac-fr") # et "Suivi d'instructions"
97
 
@@ -187,7 +187,7 @@ class EvalResult:
187
  #FIXME postprocessing of metrics is done here ftm
188
  display = True # Do not display models evaluation if something went wrong (missing task, 0 score, ...)
189
  if(task.col_name == "GPQA-fr"):
190
- accs = np.array([v.get("acc", None) for k, v in data["results"].items() if task.benchmark == k])
191
  if accs.size == 0 or any([acc is None for acc in accs]):
192
  display = False
193
  continue
@@ -268,7 +268,8 @@ class LeaderboardService:
268
 
269
  for root, _, files in os.walk(results_path):
270
  #FIXME We will remove this check when results we be homogeneous
271
- folderName = "clearML-sprint1-wr"
 
272
  normalized_root = os.path.normpath(root)
273
  path_components = normalized_root.split(os.sep)
274
  if folderName in path_components:
 
91
  col_name: str
92
 
93
  class Tasks(Enum):
94
+ task0 = Task("community|gpqa-fr|0", "new_acc", "GPQA-fr") # On pourrait vouloir mettre "Connaissances"
95
  task1 = Task("community|ifeval-fr|0", "prompt_level_strict_acc", "IFEval-fr") # FIXME norm_acc should be acc # et "Suivi d'instructions"
96
  task2 = Task("community|bac-fr|0", "bac-fr-qem", "bac-fr") # et "Suivi d'instructions"
97
 
 
187
  #FIXME postprocessing of metrics is done here ftm
188
  display = True # Do not display models evaluation if something went wrong (missing task, 0 score, ...)
189
  if(task.col_name == "GPQA-fr"):
190
+ accs = np.array([v.get("new_acc", None) for k, v in data["results"].items() if task.benchmark == k])
191
  if accs.size == 0 or any([acc is None for acc in accs]):
192
  display = False
193
  continue
 
268
 
269
  for root, _, files in os.walk(results_path):
270
  #FIXME We will remove this check when results we be homogeneous
271
+ folderName = "clearML-sprint1.5"
272
+
273
  normalized_root = os.path.normpath(root)
274
  path_components = normalized_root.split(os.sep)
275
  if folderName in path_components: