#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import os import csv from pathlib import Path from collections import defaultdict def parse_race_result(race_result_file): """解析race_result.txt文件获取各维度分数""" scores = {} with open(race_result_file, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if ':' in line: key, value = line.split(':', 1) key = key.strip() value = float(value.strip()) if key == 'Comprehensiveness': scores['comprehensiveness'] = value * 100 elif key == 'Insight': scores['insight'] = value * 100 elif key == 'Instruction Following': scores['instruction_following'] = value * 100 elif key == 'Readability': scores['readability'] = value * 100 elif key == 'Overall Score': scores['overall_score'] = value * 100 return scores def parse_fact_result(fact_result_file): """解析fact_result.txt文件获取引用相关指标""" citation_scores = {} if not fact_result_file.exists(): return citation_scores with open(fact_result_file, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if ':' in line: key, value = line.split(':', 1) key = key.strip() value = float(value.strip()) if key == 'valid_rate': citation_scores['citation_accuracy'] = value * 100 elif key == 'total_valid_citations': citation_scores['effective_citations'] = value elif key == 'supported_per_task': citation_scores['effective_citations'] = value return citation_scores def process_model_data(model_dir): """处理单个模型文件夹的数据""" model_name = model_dir.name race_result_file = model_dir / "race_result.txt" if not race_result_file.exists(): print(f"警告: 模型 {model_name} 的文件夹中未找到 race_result.txt") return None print(f"正在处理模型: {model_name}") try: scores = parse_race_result(race_result_file) if not scores: print(f" - 警告: 未能解析到有效分数") return None # 查找对应的fact_result.txt文件 project_root = Path(__file__).parent.parent fact_results_dir = project_root / "data" / "fact_results" fact_result_file = fact_results_dir / model_name / "fact_result.txt" citation_scores = parse_fact_result(fact_result_file) if citation_scores: print(f" - 总分: {scores['overall_score']:.2f}, 引用准确率: {citation_scores.get('citation_accuracy', 'N/A'):.2f}%, 有效引用数: {citation_scores.get('effective_citations', 'N/A')}") else: print(f" - 总分: {scores['overall_score']:.2f}, 引用数据: 未找到") result = { 'model': model_name, 'overall_score': scores['overall_score'], 'comprehensiveness': scores['comprehensiveness'], 'insight': scores['insight'], 'instruction_following': scores['instruction_following'], 'readability': scores['readability'], 'citation_accuracy': citation_scores.get('citation_accuracy', None), 'effective_citations': citation_scores.get('effective_citations', None) } return result except Exception as e: print(f" - 错误: 处理文件时出错: {e}") return None def rank_leaderboard(): """计算排行榜并保存到CSV""" project_root = Path(__file__).parent.parent input_dir = project_root / "data" / "raw_results" output_file = project_root / "data" / "leaderboard.csv" model_dirs = [d for d in input_dir.iterdir() if d.is_dir()] print(f"找到 {len(model_dirs)} 个模型文件夹") if not model_dirs: print("未找到任何模型文件夹") return model_results = [] for model_dir in model_dirs: try: result = process_model_data(model_dir) if result: model_results.append(result) except Exception as e: print(f"处理文件夹 {model_dir.name} 时出错: {e}") continue # 按overall_score排序 model_results.sort(key=lambda x: x['overall_score'], reverse=True) # 写入CSV文件 with open(output_file, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['model', 'overall_score', 'comprehensiveness', 'insight', 'instruction_following', 'readability', 'citation_accuracy', 'effective_citations'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for result in model_results: # 格式化数值,对于None值使用"-" row = { 'model': result['model'], 'overall_score': f"{result['overall_score']:.2f}", 'comprehensiveness': f"{result['comprehensiveness']:.2f}", 'insight': f"{result['insight']:.2f}", 'instruction_following': f"{result['instruction_following']:.2f}", 'readability': f"{result['readability']:.2f}", 'citation_accuracy': f"{result['citation_accuracy']:.2f}" if result['citation_accuracy'] is not None else "-", 'effective_citations': f"{result['effective_citations']:.2f}" if result['effective_citations'] is not None else "-" } writer.writerow(row) print(f"\n排行榜已保存到: {output_file}") print(f"共处理了 {len(model_results)} 个模型") if __name__ == "__main__": rank_leaderboard() print("排行榜计算完成!")