DeepResearch-Leaderboard / tabs /leaderboard_tab.py
Ayanami0730's picture
Add DeepResearch Bench application with LFS support
927e909
raw
history blame
6.02 kB
import gradio as gr
import pandas as pd
from pathlib import Path
# 相对于主脚本的路径调整
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"
# 新增:用于高亮显示的常量
HIGHLIGHT_COLOR = "#E6D8FF"
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
# 新增:列名重命名映射
COLUMN_RENAME_MAP = {
'overall_score': 'overall',
'comprehensiveness': 'comp.',
'insight': 'insight',
'instruction_following': 'inst.',
'readability': 'read.',
'citation_accuracy': 'c.acc.',
'effective_citations': 'eff.c.'
}
# 模型分类映射
MODEL_CATEGORIES = {
"Deep Research Agent": [
"gemini-2.5-pro-deepresearch",
"grok-deeper-search",
"openai-deepresearch",
"perplexity-Research"
],
"LLM with Search": [
"claude-3-7-sonnet-with-search",
"perplexity-sonar-reasoning-pro",
"perplexity-sonar-reasoning",
"perplexity-sonar-pro",
"gemini-2.5-pro-with-grounding",
"gpt-4o-search-preview",
"perplexity-sonar",
"gpt-4.1-with-search",
"gemini-2.5-flash-preview-04-17",
"gpt-4o-mini-search-preview",
"gpt-4.1-mini-with-search",
"claude-3-5-sonnet-with-search"
]
}
def load_leaderboard() -> pd.DataFrame:
if not DATA_PATH.exists():
raise FileNotFoundError(
f"Leaderboard file not found: {DATA_PATH}.\n"
"→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv"
)
df = pd.read_csv(DATA_PATH)
df.columns = [c.strip() for c in df.columns]
def get_category(model_name):
for category, models in MODEL_CATEGORIES.items():
if model_name in models:
return category
return "Others" # 为不在预定义类别中的模型提供默认类别
df['category'] = df['model'].apply(get_category)
return df
def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True)
ranked.insert(0, "Rank", range(1, len(ranked) + 1))
# 重命名列名为简写形式
ranked = ranked.rename(columns=COLUMN_RENAME_MAP)
return ranked
def filter_data(search_text: str, selected_categories: list):
df = load_leaderboard()
if search_text.strip():
df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)]
if selected_categories:
df = df[df['category'].isin(selected_categories)]
return make_ranked(df)
# 新增:辅助函数用于样式化DataFrame
def _style_specific_rows(row, category_column_name='category', target_category=CATEGORY_TO_HIGHLIGHT, color=HIGHLIGHT_COLOR):
"""
根据行的类别返回样式列表。如果类别匹配目标类别,则应用背景色。
"""
apply_color = color if row.get(category_column_name) == target_category else ''
return [f'background-color: {apply_color}' for _ in row]
def _apply_table_styling(df: pd.DataFrame) -> pd.io.formats.style.Styler:
"""
应用表格样式:
- 高亮显示 CATEGORY_TO_HIGHLIGHT 的行
- 保留 'category' 列显示
- 格式化数值为两位小数
返回 Pandas Styler 对象。
"""
if df.empty:
return df.style
styled_df = df.copy()
# 获取数值列(排除 Rank, model, category 列)
numeric_columns = []
for col in styled_df.columns:
if col not in ['Rank', 'model', 'category']:
# 检查是否为数值类型
if styled_df[col].dtype in ['float64', 'int64'] or pd.api.types.is_numeric_dtype(styled_df[col]):
numeric_columns.append(col)
# 应用行样式 - 高亮特定类别的行
styler = styled_df.style.apply(
_style_specific_rows,
axis=1,
category_column_name='category',
target_category=CATEGORY_TO_HIGHLIGHT,
color=HIGHLIGHT_COLOR
)
# 使用 Styler 的 format 方法格式化数值列为两位小数
if numeric_columns:
format_dict = {col: '{:.2f}' for col in numeric_columns}
styler = styler.format(format_dict)
return styler
def create_leaderboard_tab():
with gr.Tab("🏆Leaderboard"):
with gr.Row():
search_box = gr.Textbox(
label="Model Search",
placeholder="Entering model name to search...",
value=""
)
category_checkboxes = gr.CheckboxGroup(
label="Model Categories",
choices=list(MODEL_CATEGORIES.keys()),
value=list(MODEL_CATEGORIES.keys())
)
initial_df_raw = make_ranked(load_leaderboard())
styled_initial_value = _apply_table_styling(initial_df_raw.copy())
table = gr.Dataframe(
interactive=False,
wrap=False,
value=styled_initial_value,
)
def update_display(search_text, selected_categories):
filtered_df_raw = filter_data(search_text, selected_categories)
styled_updated_value = _apply_table_styling(filtered_df_raw.copy())
return styled_updated_value
search_box.change(
fn=update_display,
inputs=[search_box, category_checkboxes],
outputs=table
)
category_checkboxes.change(
fn=update_display,
inputs=[search_box, category_checkboxes],
outputs=table
)
# 在底部添加列名说明
gr.Markdown("""
### Column Abbreviations
The leaderboard uses abbreviated column names for compact display: (i) **overall** - Overall Score; (ii) **comp.** - Comprehensiveness; (iii) **insight** - Insight quality; (iv) **inst.** - Instruction Following; (v) **read.** - Readability; (vi) **c.acc.** - Citation Accuracy; (vii) **eff.c.** - Effective Citations.
""")
return search_box