|
import gradio as gr |
|
import pandas as pd |
|
from pathlib import Path |
|
|
|
|
|
BASE_DIR = Path(__file__).resolve().parent.parent |
|
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv" |
|
|
|
|
|
HIGHLIGHT_COLOR = "#E6D8FF" |
|
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent" |
|
|
|
|
|
COLUMN_RENAME_MAP = { |
|
'overall_score': 'overall', |
|
'comprehensiveness': 'comp.', |
|
'insight': 'insight', |
|
'instruction_following': 'inst.', |
|
'readability': 'read.', |
|
'citation_accuracy': 'c.acc.', |
|
'effective_citations': 'eff.c.' |
|
} |
|
|
|
|
|
MODEL_CATEGORIES = { |
|
"Deep Research Agent": [ |
|
"gemini-2.5-pro-deepresearch", |
|
"grok-deeper-search", |
|
"openai-deepresearch", |
|
"perplexity-Research" |
|
], |
|
"LLM with Search": [ |
|
"claude-3-7-sonnet-with-search", |
|
"perplexity-sonar-reasoning-pro", |
|
"perplexity-sonar-reasoning", |
|
"perplexity-sonar-pro", |
|
"gemini-2.5-pro-with-grounding", |
|
"gpt-4o-search-preview", |
|
"perplexity-sonar", |
|
"gpt-4.1-with-search", |
|
"gemini-2.5-flash-preview-04-17", |
|
"gpt-4o-mini-search-preview", |
|
"gpt-4.1-mini-with-search", |
|
"claude-3-5-sonnet-with-search" |
|
] |
|
} |
|
|
|
def load_leaderboard() -> pd.DataFrame: |
|
if not DATA_PATH.exists(): |
|
raise FileNotFoundError( |
|
f"Leaderboard file not found: {DATA_PATH}.\n" |
|
"→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv" |
|
) |
|
df = pd.read_csv(DATA_PATH) |
|
df.columns = [c.strip() for c in df.columns] |
|
|
|
def get_category(model_name): |
|
for category, models in MODEL_CATEGORIES.items(): |
|
if model_name in models: |
|
return category |
|
return "Others" |
|
|
|
df['category'] = df['model'].apply(get_category) |
|
return df |
|
|
|
def make_ranked(df: pd.DataFrame) -> pd.DataFrame: |
|
ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True) |
|
ranked.insert(0, "Rank", range(1, len(ranked) + 1)) |
|
|
|
|
|
ranked = ranked.rename(columns=COLUMN_RENAME_MAP) |
|
|
|
return ranked |
|
|
|
def filter_data(search_text: str, selected_categories: list): |
|
df = load_leaderboard() |
|
|
|
if search_text.strip(): |
|
df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)] |
|
|
|
if selected_categories: |
|
df = df[df['category'].isin(selected_categories)] |
|
|
|
return make_ranked(df) |
|
|
|
|
|
def _style_specific_rows(row, category_column_name='category', target_category=CATEGORY_TO_HIGHLIGHT, color=HIGHLIGHT_COLOR): |
|
""" |
|
根据行的类别返回样式列表。如果类别匹配目标类别,则应用背景色。 |
|
""" |
|
apply_color = color if row.get(category_column_name) == target_category else '' |
|
return [f'background-color: {apply_color}' for _ in row] |
|
|
|
def _apply_table_styling(df: pd.DataFrame) -> pd.io.formats.style.Styler: |
|
""" |
|
应用表格样式: |
|
- 高亮显示 CATEGORY_TO_HIGHLIGHT 的行 |
|
- 保留 'category' 列显示 |
|
- 格式化数值为两位小数 |
|
返回 Pandas Styler 对象。 |
|
""" |
|
if df.empty: |
|
return df.style |
|
|
|
styled_df = df.copy() |
|
|
|
|
|
numeric_columns = [] |
|
for col in styled_df.columns: |
|
if col not in ['Rank', 'model', 'category']: |
|
|
|
if styled_df[col].dtype in ['float64', 'int64'] or pd.api.types.is_numeric_dtype(styled_df[col]): |
|
numeric_columns.append(col) |
|
|
|
|
|
styler = styled_df.style.apply( |
|
_style_specific_rows, |
|
axis=1, |
|
category_column_name='category', |
|
target_category=CATEGORY_TO_HIGHLIGHT, |
|
color=HIGHLIGHT_COLOR |
|
) |
|
|
|
|
|
if numeric_columns: |
|
format_dict = {col: '{:.2f}' for col in numeric_columns} |
|
styler = styler.format(format_dict) |
|
|
|
return styler |
|
|
|
def create_leaderboard_tab(): |
|
with gr.Tab("🏆Leaderboard"): |
|
with gr.Row(): |
|
search_box = gr.Textbox( |
|
label="Model Search", |
|
placeholder="Entering model name to search...", |
|
value="" |
|
) |
|
category_checkboxes = gr.CheckboxGroup( |
|
label="Model Categories", |
|
choices=list(MODEL_CATEGORIES.keys()), |
|
value=list(MODEL_CATEGORIES.keys()) |
|
) |
|
|
|
initial_df_raw = make_ranked(load_leaderboard()) |
|
styled_initial_value = _apply_table_styling(initial_df_raw.copy()) |
|
|
|
table = gr.Dataframe( |
|
interactive=False, |
|
wrap=False, |
|
value=styled_initial_value, |
|
) |
|
|
|
def update_display(search_text, selected_categories): |
|
filtered_df_raw = filter_data(search_text, selected_categories) |
|
styled_updated_value = _apply_table_styling(filtered_df_raw.copy()) |
|
return styled_updated_value |
|
|
|
search_box.change( |
|
fn=update_display, |
|
inputs=[search_box, category_checkboxes], |
|
outputs=table |
|
) |
|
category_checkboxes.change( |
|
fn=update_display, |
|
inputs=[search_box, category_checkboxes], |
|
outputs=table |
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
### Column Abbreviations |
|
The leaderboard uses abbreviated column names for compact display: (i) **overall** - Overall Score; (ii) **comp.** - Comprehensiveness; (iii) **insight** - Insight quality; (iv) **inst.** - Instruction Following; (v) **read.** - Readability; (vi) **c.acc.** - Citation Accuracy; (vii) **eff.c.** - Effective Citations. |
|
""") |
|
|
|
return search_box |