Spaces:
Runtime error
Runtime error
Update src/backend/run_eval_suite.py
Browse files
src/backend/run_eval_suite.py
CHANGED
|
@@ -15,16 +15,14 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
|
|
| 15 |
print(
|
| 16 |
"WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
|
| 17 |
)
|
| 18 |
-
|
| 19 |
-
task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
|
| 20 |
|
| 21 |
print(f"Selected Tasks: {task_names}")
|
| 22 |
-
|
| 23 |
results = evaluator.simple_evaluate(
|
| 24 |
model="hf-causal-experimental", # "hf-causal"
|
| 25 |
model_args=eval_request.get_model_args(),
|
| 26 |
tasks=task_names,
|
| 27 |
-
num_fewshot=num_fewshot,
|
| 28 |
batch_size=batch_size,
|
| 29 |
device=device,
|
| 30 |
no_cache=no_cache,
|
|
@@ -54,4 +52,4 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
|
|
| 54 |
repo_type="dataset",
|
| 55 |
)
|
| 56 |
|
| 57 |
-
return results
|
|
|
|
| 15 |
print(
|
| 16 |
"WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
|
| 17 |
)
|
| 18 |
+
task_names = ["medmcqa", "medqa_4options", "mmlu_anatomy", "mmlu_clinical_knowledge", "mmlu_college_biology", "mmlu_college_medicine", "mmlu_medical_genetics", "mmlu_professional_medicine", "pubmedqa"]
|
|
|
|
| 19 |
|
| 20 |
print(f"Selected Tasks: {task_names}")
|
|
|
|
| 21 |
results = evaluator.simple_evaluate(
|
| 22 |
model="hf-causal-experimental", # "hf-causal"
|
| 23 |
model_args=eval_request.get_model_args(),
|
| 24 |
tasks=task_names,
|
| 25 |
+
# num_fewshot=num_fewshot,
|
| 26 |
batch_size=batch_size,
|
| 27 |
device=device,
|
| 28 |
no_cache=no_cache,
|
|
|
|
| 52 |
repo_type="dataset",
|
| 53 |
)
|
| 54 |
|
| 55 |
+
return results
|