from __future__ import annotations import gradio as gr import pandas as pd from pathlib import Path from typing import Union # 相对于主脚本的路径调整 BASE_DIR = Path(__file__).resolve().parent.parent DATA_PATH = BASE_DIR / "data" / "leaderboard.csv" # 用于标注的常量 CATEGORY_TO_HIGHLIGHT = "Deep Research Agent" HIGHLIGHT_EMOJI = "🚀" # 列名重命名映射 COLUMN_RENAME_MAP = { 'overall_score': 'overall', 'comprehensiveness': 'comp.', 'insight': 'insight', 'instruction_following': 'inst.', 'readability': 'read.', 'citation_accuracy': 'c.acc.', 'effective_citations': 'eff.c.' } # 模型分类映射 MODEL_CATEGORIES = { "Deep Research Agent": [ "gemini-2.5-pro-deepresearch", "grok-deeper-search", "openai-deepresearch", "perplexity-Research", "doubao-deepresearch", "kimi-researcher", "claude-research", "langchain-open-deep-research" ], "LLM with Search": [ "claude-3-7-sonnet-with-search", "claude-3-5-sonnet-with-search", "sonar-reasoning-pro", "sonar-reasoning", "sonar-pro", "sonar", "gemini-2.5-pro-preview-05-06", "gpt-4o-search-preview", "gpt-4.1", "gemini-2.5-flash-preview-04-17", "gpt-4o-mini-search-preview", "nvidia-aiq-research-assistant", "gpt-4.1-mini" ] } # 模型链接映射(目前都设置为空,可以后续添加具体链接) MODEL_LINKS = { # Deep Research Agent "gemini-2.5-pro-deepresearch": "https://gemini.google/overview/deep-research/", "grok-deeper-search": "https://x.ai/news/grok-3", "openai-deepresearch": "https://openai.com/zh-Hans-CN/index/introducing-deep-research/", "perplexity-Research": "https://www.perplexity.ai/hub/blog/introducing-perplexity-deep-research", "doubao-deepresearch": "https://www.doubao.com/chat/", "kimi-researcher": "https://moonshotai.github.io/Kimi-Researcher/", "claude-research": "https://www.anthropic.com/news/research", "nvidia-aiq-research-assistant": "https://github.com/NVIDIA-AI-Blueprints/aiq-research-assistant", "langchain-open-deep-research": "https://github.com/langchain-ai/open_deep_research", # LLM with Search "claude-3-7-sonnet-with-search": "", "claude-3-5-sonnet-with-search": "", "sonar-reasoning-pro": "", "sonar-reasoning": "", "sonar-pro": "", "sonar": "", "gemini-2.5-pro-preview-05-06": "", "gpt-4o-search-preview": "", "gpt-4.1": "", "gemini-2.5-flash-preview-04-17": "", "gpt-4o-mini-search-preview": "", "gpt-4.1-mini": "" } # 模型许可证类型映射 MODEL_LICENSE_TYPE = { # Deep Research Agent "gemini-2.5-pro-deepresearch": "Proprietary", "grok-deeper-search": "Proprietary", "openai-deepresearch": "Proprietary", "perplexity-Research": "Proprietary", "doubao-deepresearch": "Proprietary", "kimi-researcher": "Proprietary", "claude-research": "Proprietary", "nvidia-aiq-research-assistant": "Apache 2.0", "langchain-open-deep-research": "MIT", # 需要确认具体许可证 # LLM with Search "claude-3-7-sonnet-with-search": "Proprietary", "claude-3-5-sonnet-with-search": "Proprietary", "sonar-reasoning-pro": "Proprietary", "sonar-reasoning": "Proprietary", "sonar-pro": "Proprietary", "sonar": "Proprietary", "gemini-2.5-pro-preview-05-06": "Proprietary", "gpt-4o-search-preview": "Proprietary", "gpt-4.1": "Proprietary", "gemini-2.5-flash-preview-04-17": "Proprietary", "gpt-4o-mini-search-preview": "Proprietary", "gpt-4.1-mini": "Proprietary" } def load_leaderboard() -> pd.DataFrame: if not DATA_PATH.exists(): raise FileNotFoundError( f"Leaderboard file not found: {DATA_PATH}.\n" "→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv" ) df = pd.read_csv(DATA_PATH) df.columns = [c.strip() for c in df.columns] def get_category(model_name): for category, models in MODEL_CATEGORIES.items(): if model_name in models: return category return "Others" def get_license_type(model_name): return MODEL_LICENSE_TYPE.get(model_name, "Unknown") df['category'] = df['model'].apply(get_category) df['license_type'] = df['model'].apply(get_license_type) return df def make_ranked(df: pd.DataFrame) -> pd.DataFrame: ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True) ranked.insert(0, "Rank", range(1, len(ranked) + 1)) # 重命名列名为简写形式 ranked = ranked.rename(columns=COLUMN_RENAME_MAP) # 格式化数值列为两位小数,但跳过包含"-"的值 numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.'] for col in numeric_columns: if col in ranked.columns: # 只对数值进行round操作,保持"-"不变 ranked[col] = ranked[col].apply( lambda x: round(float(x), 2) if x != "-" and pd.notna(x) else x ) # 为模型添加链接和高亮样式 def format_model_name(row): model_name = row['model'] link = MODEL_LINKS.get(model_name, "") # 根据类别决定是否高亮 if row['category'] == CATEGORY_TO_HIGHLIGHT: display_name = f'{HIGHLIGHT_EMOJI} {model_name}' else: display_name = model_name # 如果有链接,包装成标签 if link and link.strip(): return f'{display_name}' else: # 没有链接时,为将来添加链接做准备(可以添加点击事件等) return f'{display_name}' ranked['model'] = ranked.apply(format_model_name, axis=1) return ranked def filter_data(search_text: str, selected_categories: list): df = load_leaderboard() if search_text.strip(): df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)] if selected_categories: df = df[df['category'].isin(selected_categories)] ranked_df = make_ranked(df) return ranked_df def create_leaderboard_tab(): with gr.Tab("🏆Leaderboard"): with gr.Row(): with gr.Column(scale=1): search_box = gr.Textbox( label="Model Search", placeholder="Entering model name to search...", value="" ) with gr.Column(scale=2): category_checkboxes = gr.CheckboxGroup( label="Model Categories", choices=list(MODEL_CATEGORIES.keys()), value=list(MODEL_CATEGORIES.keys()) ) # 初始化数据(不使用样式) initial_df = make_ranked(load_leaderboard()) # 获取列数据类型,将 model 列设置为 html column_count = len(initial_df.columns) datatypes = ["str"] * column_count model_col_index = initial_df.columns.get_loc('model') datatypes[model_col_index] = "html" # 创建 Dataframe 组件 table = gr.Dataframe( value=initial_df, datatype=datatypes, # 设置数据类型,model 列为 html max_height=600, # 设置表格最大高度 show_label=False, # 不显示标签 elem_id="leaderboard_table", # 添加元素ID interactive=False, # 禁用编辑功能 wrap=False, # 不换行 column_widths=["80px", "350px", "100px", "100px", "100px", "100px", "100px", "100px", "100px", "200px", "150px"] # 设置各列宽度,model列设置为350px ) def update_display(search_text, selected_categories): df = filter_data(search_text, selected_categories) return df # 绑定搜索框和复选框的变化事件 search_box.change( fn=update_display, inputs=[search_box, category_checkboxes], outputs=table ) category_checkboxes.change( fn=update_display, inputs=[search_box, category_checkboxes], outputs=table ) # 在底部添加说明 with gr.Row(): gr.Markdown(f""" ### 📊 Column Descriptions - **Rank**: Model ranking based on overall score - **model**: Model name ({HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}) - **overall**: Overall Score (weighted average of all metrics) - **comp.**: Comprehensiveness - How thorough and complete the research is - **insight**: Insight Quality - Depth and value of analysis - **inst.**: Instruction Following - Adherence to user instructions - **read.**: Readability - Clarity and organization of content - **c.acc.**: Citation Accuracy - Correctness of references - **eff.c.**: Effective Citations - Relevance and quality of sources - **category**: Model category - **license_type**: The software license type of the model/service 💡 **Tip**: Model names are clickable when links are available. Visit the GitHub repositories for more details! """) return search_box