| import json | |
| import os | |
| from collections import defaultdict | |
| from refactor_eval_results import AGENTIC_LOG_MODEL_NAME_MAP, AGENTIC_TASKS | |
| def main(): | |
| base_bm_input_path = "./base_benchmarking_logs" | |
| agentic_bm_input_path = "/fs01/projects/aieng/public/inspect_evals/agentic_benchmarking_runs" | |
| log_file_map = defaultdict() | |
| for model_name in os.listdir(base_bm_input_path): | |
| log_file_map[model_name] = defaultdict(str) | |
| if os.path.isdir(os.path.join(base_bm_input_path, model_name)): | |
| for task_log_file in os.listdir(os.path.join(base_bm_input_path, model_name)): | |
| with open(os.path.join(base_bm_input_path, model_name, task_log_file), "r") as f: | |
| result = json.load(f) | |
| task_name = result["eval"]["task"].split("/")[-1] | |
| log_file_map[model_name][task_name] = task_log_file | |
| for model_name in AGENTIC_LOG_MODEL_NAME_MAP.keys(): | |
| log_file_path = os.path.join(agentic_bm_input_path, AGENTIC_LOG_MODEL_NAME_MAP[model_name]) | |
| if os.path.isdir(log_file_path): | |
| for task in AGENTIC_TASKS: | |
| for task_log_file in os.listdir(os.path.join(log_file_path, task)): | |
| if task_log_file.endswith(".json"): | |
| with open(os.path.join(log_file_path, task, task_log_file), "r") as f: | |
| result = json.load(f) | |
| task_name = result["eval"]["task"].split("/")[-1] | |
| log_file_map[model_name][task_name] = task_log_file | |
| with open("./inspect_log_file_names.json", "w") as f: | |
| json.dump(log_file_map, f, indent=4) | |
| if __name__ == "__main__": | |
| main() | |