File size: 6,109 Bytes
927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 1d11ffb 927e909 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import os
import csv
from pathlib import Path
from collections import defaultdict
def parse_race_result(race_result_file):
"""解析race_result.txt文件获取各维度分数"""
scores = {}
with open(race_result_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if ':' in line:
key, value = line.split(':', 1)
key = key.strip()
value = float(value.strip())
if key == 'Comprehensiveness':
scores['comprehensiveness'] = value * 100
elif key == 'Insight':
scores['insight'] = value * 100
elif key == 'Instruction Following':
scores['instruction_following'] = value * 100
elif key == 'Readability':
scores['readability'] = value * 100
elif key == 'Overall Score':
scores['overall_score'] = value * 100
return scores
def parse_fact_result(fact_result_file):
"""解析fact_result.txt文件获取引用相关指标"""
citation_scores = {}
if not fact_result_file.exists():
return citation_scores
with open(fact_result_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if ':' in line:
key, value = line.split(':', 1)
key = key.strip()
value = float(value.strip())
if key == 'valid_rate':
citation_scores['citation_accuracy'] = value * 100
elif key == 'total_valid_citations':
citation_scores['effective_citations'] = value
elif key == 'supported_per_task':
citation_scores['effective_citations'] = value
return citation_scores
def process_model_data(model_dir):
"""处理单个模型文件夹的数据"""
model_name = model_dir.name
race_result_file = model_dir / "race_result.txt"
if not race_result_file.exists():
print(f"警告: 模型 {model_name} 的文件夹中未找到 race_result.txt")
return None
print(f"正在处理模型: {model_name}")
try:
scores = parse_race_result(race_result_file)
if not scores:
print(f" - 警告: 未能解析到有效分数")
return None
# 查找对应的fact_result.txt文件
project_root = Path(__file__).parent.parent
fact_results_dir = project_root / "data" / "fact_results"
fact_result_file = fact_results_dir / model_name / "fact_result.txt"
citation_scores = parse_fact_result(fact_result_file)
if citation_scores:
print(f" - 总分: {scores['overall_score']:.2f}, 引用准确率: {citation_scores.get('citation_accuracy', 'N/A'):.2f}%, 有效引用数: {citation_scores.get('effective_citations', 'N/A')}")
else:
print(f" - 总分: {scores['overall_score']:.2f}, 引用数据: 未找到")
result = {
'model': model_name,
'overall_score': scores['overall_score'],
'comprehensiveness': scores['comprehensiveness'],
'insight': scores['insight'],
'instruction_following': scores['instruction_following'],
'readability': scores['readability'],
'citation_accuracy': citation_scores.get('citation_accuracy', None),
'effective_citations': citation_scores.get('effective_citations', None)
}
return result
except Exception as e:
print(f" - 错误: 处理文件时出错: {e}")
return None
def rank_leaderboard():
"""计算排行榜并保存到CSV"""
project_root = Path(__file__).parent.parent
input_dir = project_root / "data" / "raw_results"
output_file = project_root / "data" / "leaderboard.csv"
model_dirs = [d for d in input_dir.iterdir() if d.is_dir()]
print(f"找到 {len(model_dirs)} 个模型文件夹")
if not model_dirs:
print("未找到任何模型文件夹")
return
model_results = []
for model_dir in model_dirs:
try:
result = process_model_data(model_dir)
if result:
model_results.append(result)
except Exception as e:
print(f"处理文件夹 {model_dir.name} 时出错: {e}")
continue
# 按overall_score排序
model_results.sort(key=lambda x: x['overall_score'], reverse=True)
# 写入CSV文件
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['model', 'overall_score', 'comprehensiveness', 'insight', 'instruction_following', 'readability', 'citation_accuracy', 'effective_citations']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for result in model_results:
# 格式化数值,对于None值使用"-"
row = {
'model': result['model'],
'overall_score': f"{result['overall_score']:.2f}",
'comprehensiveness': f"{result['comprehensiveness']:.2f}",
'insight': f"{result['insight']:.2f}",
'instruction_following': f"{result['instruction_following']:.2f}",
'readability': f"{result['readability']:.2f}",
'citation_accuracy': f"{result['citation_accuracy']:.2f}" if result['citation_accuracy'] is not None else "-",
'effective_citations': f"{result['effective_citations']:.2f}" if result['effective_citations'] is not None else "-"
}
writer.writerow(row)
print(f"\n排行榜已保存到: {output_file}")
print(f"共处理了 {len(model_results)} 个模型")
if __name__ == "__main__":
rank_leaderboard()
print("排行榜计算完成!")
|