Spaces:
Runtime error
Runtime error
missing
Browse files
src/leaderboard/read_evals.py
CHANGED
|
@@ -456,15 +456,26 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
| 456 |
if task_name in missing_results_for_task:
|
| 457 |
missing_results_for_task[task_name].append(f"{v.full_model}|{v.org_and_model}")
|
| 458 |
if v.still_on_hub and task.value.benchmark in all_tasks:
|
| 459 |
-
|
|
|
|
| 460 |
else:
|
| 461 |
missing_results_for_task[task_name] = [f"{v.full_model}|{v.org_and_model}"]
|
| 462 |
if v.still_on_hub and task.value.benchmark in all_tasks:
|
| 463 |
-
|
|
|
|
| 464 |
if r[AutoEvalColumn.lang.name] is None or r[AutoEvalColumn.lang.name] == "?":
|
| 465 |
missing_metadata.append(f"{v.full_model}")
|
| 466 |
all_models.append((v.full_model, v.num_params, v.still_on_hub))
|
| 467 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
# print('missing_results_for_task', missing_results_for_task)
|
| 469 |
for task, models in missing_results_for_task.items():
|
| 470 |
print(f"Missing results for {task} for {len(models)} models")
|
|
|
|
| 456 |
if task_name in missing_results_for_task:
|
| 457 |
missing_results_for_task[task_name].append(f"{v.full_model}|{v.org_and_model}")
|
| 458 |
if v.still_on_hub and task.value.benchmark in all_tasks:
|
| 459 |
+
for_run.append([r["n_shot"], task.value.benchmark, v.full_model])
|
| 460 |
+
# print(f'sbatch start.sh "bash eval_model_task_bs1.sh {r["n_shot"]} {task.value.benchmark} {v.full_model}"')
|
| 461 |
else:
|
| 462 |
missing_results_for_task[task_name] = [f"{v.full_model}|{v.org_and_model}"]
|
| 463 |
if v.still_on_hub and task.value.benchmark in all_tasks:
|
| 464 |
+
for_run.append([r["n_shot"], task.value.benchmark, v.full_model])
|
| 465 |
+
# print(f'sbatch start.sh "bash eval_model_task_bs1.sh {r["n_shot"]} {task.value.benchmark} {v.full_model}"')
|
| 466 |
if r[AutoEvalColumn.lang.name] is None or r[AutoEvalColumn.lang.name] == "?":
|
| 467 |
missing_metadata.append(f"{v.full_model}")
|
| 468 |
all_models.append((v.full_model, v.num_params, v.still_on_hub))
|
| 469 |
|
| 470 |
+
print(f"Missing sbatch results:")
|
| 471 |
+
for r in for_run:
|
| 472 |
+
fm=r[2].replace(',multiturn','')
|
| 473 |
+
if ',chat' in fm:
|
| 474 |
+
fm=fm.replace(',chat','')
|
| 475 |
+
print(f'sbatch start.sh "bash eval_model_task_bs1_chat.sh {r[0]} {r[1]} {fm}"')
|
| 476 |
+
else:
|
| 477 |
+
print(f'sbatch start.sh "bash eval_model_task_bs1.sh {r[0]} {r[1]} {fm}"')
|
| 478 |
+
|
| 479 |
# print('missing_results_for_task', missing_results_for_task)
|
| 480 |
for task, models in missing_results_for_task.items():
|
| 481 |
print(f"Missing results for {task} for {len(models)} models")
|