Commit
·
71d9111
1
Parent(s):
141f575
fix bugs
Browse files- requirements.txt +2 -2
- tabs/__pycache__/leaderboard_tab.cpython-39.pyc +0 -0
- tabs/leaderboard_tab.py +57 -62
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
gradio
|
| 2 |
-
pandas
|
| 3 |
numpy
|
| 4 |
plotly
|
| 5 |
pathlib
|
|
|
|
| 1 |
+
gradio>=5.31.0
|
| 2 |
+
pandas>=1.5
|
| 3 |
numpy
|
| 4 |
plotly
|
| 5 |
pathlib
|
tabs/__pycache__/leaderboard_tab.cpython-39.pyc
CHANGED
|
Binary files a/tabs/__pycache__/leaderboard_tab.cpython-39.pyc and b/tabs/__pycache__/leaderboard_tab.cpython-39.pyc differ
|
|
|
tabs/leaderboard_tab.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
from pathlib import Path
|
|
@@ -7,9 +8,9 @@ from typing import Union
|
|
| 7 |
BASE_DIR = Path(__file__).resolve().parent.parent
|
| 8 |
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"
|
| 9 |
|
| 10 |
-
#
|
| 11 |
-
HIGHLIGHT_COLOR = "#E6D8FF"
|
| 12 |
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
|
|
|
|
| 13 |
|
| 14 |
# 列名重命名映射
|
| 15 |
COLUMN_RENAME_MAP = {
|
|
@@ -59,7 +60,7 @@ def load_leaderboard() -> pd.DataFrame:
|
|
| 59 |
for category, models in MODEL_CATEGORIES.items():
|
| 60 |
if model_name in models:
|
| 61 |
return category
|
| 62 |
-
return "Others"
|
| 63 |
|
| 64 |
df['category'] = df['model'].apply(get_category)
|
| 65 |
return df
|
|
@@ -77,32 +78,15 @@ def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 77 |
if col in ranked.columns:
|
| 78 |
ranked[col] = ranked[col].round(2)
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
if row['category'] == CATEGORY_TO_HIGHLIGHT:
|
| 88 |
-
return [f'background-color: {HIGHLIGHT_COLOR}'] * len(row)
|
| 89 |
-
else:
|
| 90 |
-
return [''] * len(row)
|
| 91 |
-
|
| 92 |
-
# 创建 styler 对象
|
| 93 |
-
styler = df.style.apply(highlight_row, axis=1)
|
| 94 |
-
|
| 95 |
-
# 格式化数值列显示
|
| 96 |
-
numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.']
|
| 97 |
-
format_dict = {}
|
| 98 |
-
for col in numeric_columns:
|
| 99 |
-
if col in df.columns:
|
| 100 |
-
format_dict[col] = '{:.2f}'
|
| 101 |
|
| 102 |
-
|
| 103 |
-
styler = styler.format(format_dict)
|
| 104 |
-
|
| 105 |
-
return styler
|
| 106 |
|
| 107 |
def filter_data(search_text: str, selected_categories: list):
|
| 108 |
df = load_leaderboard()
|
|
@@ -114,37 +98,47 @@ def filter_data(search_text: str, selected_categories: list):
|
|
| 114 |
df = df[df['category'].isin(selected_categories)]
|
| 115 |
|
| 116 |
ranked_df = make_ranked(df)
|
| 117 |
-
return
|
| 118 |
|
| 119 |
def create_leaderboard_tab():
|
| 120 |
with gr.Tab("🏆Leaderboard"):
|
| 121 |
with gr.Row():
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
| 132 |
|
| 133 |
-
#
|
| 134 |
initial_df = make_ranked(load_leaderboard())
|
| 135 |
-
styled_initial_df = apply_styling(initial_df)
|
| 136 |
|
| 137 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
table = gr.Dataframe(
|
| 139 |
-
value=
|
| 140 |
-
|
| 141 |
-
wrap=False,
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
| 143 |
)
|
| 144 |
|
| 145 |
def update_display(search_text, selected_categories):
|
| 146 |
-
|
| 147 |
-
return
|
| 148 |
|
| 149 |
# 绑定搜索框和复选框的变化事件
|
| 150 |
search_box.change(
|
|
@@ -158,19 +152,20 @@ def create_leaderboard_tab():
|
|
| 158 |
outputs=table
|
| 159 |
)
|
| 160 |
|
| 161 |
-
#
|
| 162 |
-
gr.
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
| 175 |
|
| 176 |
return search_box
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
import gradio as gr
|
| 3 |
import pandas as pd
|
| 4 |
from pathlib import Path
|
|
|
|
| 8 |
BASE_DIR = Path(__file__).resolve().parent.parent
|
| 9 |
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"
|
| 10 |
|
| 11 |
+
# 用于标注的常量
|
|
|
|
| 12 |
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
|
| 13 |
+
HIGHLIGHT_EMOJI = "🚀"
|
| 14 |
|
| 15 |
# 列名重命名映射
|
| 16 |
COLUMN_RENAME_MAP = {
|
|
|
|
| 60 |
for category, models in MODEL_CATEGORIES.items():
|
| 61 |
if model_name in models:
|
| 62 |
return category
|
| 63 |
+
return "Others"
|
| 64 |
|
| 65 |
df['category'] = df['model'].apply(get_category)
|
| 66 |
return df
|
|
|
|
| 78 |
if col in ranked.columns:
|
| 79 |
ranked[col] = ranked[col].round(2)
|
| 80 |
|
| 81 |
+
# 为 Deep Research Agent 添加 HTML 格式(加粗 + 颜色)
|
| 82 |
+
ranked['model'] = ranked.apply(
|
| 83 |
+
lambda row: f'<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} {row["model"]}</span>'
|
| 84 |
+
if row['category'] == CATEGORY_TO_HIGHLIGHT
|
| 85 |
+
else row['model'],
|
| 86 |
+
axis=1
|
| 87 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
+
return ranked
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
def filter_data(search_text: str, selected_categories: list):
|
| 92 |
df = load_leaderboard()
|
|
|
|
| 98 |
df = df[df['category'].isin(selected_categories)]
|
| 99 |
|
| 100 |
ranked_df = make_ranked(df)
|
| 101 |
+
return ranked_df
|
| 102 |
|
| 103 |
def create_leaderboard_tab():
|
| 104 |
with gr.Tab("🏆Leaderboard"):
|
| 105 |
with gr.Row():
|
| 106 |
+
with gr.Column(scale=1):
|
| 107 |
+
search_box = gr.Textbox(
|
| 108 |
+
label="Model Search",
|
| 109 |
+
placeholder="Entering model name to search...",
|
| 110 |
+
value=""
|
| 111 |
+
)
|
| 112 |
+
with gr.Column(scale=2):
|
| 113 |
+
category_checkboxes = gr.CheckboxGroup(
|
| 114 |
+
label="Model Categories",
|
| 115 |
+
choices=list(MODEL_CATEGORIES.keys()),
|
| 116 |
+
value=list(MODEL_CATEGORIES.keys())
|
| 117 |
+
)
|
| 118 |
|
| 119 |
+
# 初始化数据(不使用样式)
|
| 120 |
initial_df = make_ranked(load_leaderboard())
|
|
|
|
| 121 |
|
| 122 |
+
# 获取列数据类型,将 model 列设置为 html
|
| 123 |
+
column_count = len(initial_df.columns)
|
| 124 |
+
datatypes = ["str"] * column_count
|
| 125 |
+
model_col_index = initial_df.columns.get_loc('model')
|
| 126 |
+
datatypes[model_col_index] = "html"
|
| 127 |
+
|
| 128 |
+
# 创建 Dataframe 组件
|
| 129 |
table = gr.Dataframe(
|
| 130 |
+
value=initial_df,
|
| 131 |
+
datatype=datatypes, # 设置数据类型,model 列为 html
|
| 132 |
+
wrap=False, # 防止文本换行
|
| 133 |
+
line_breaks=False, # 单元格内不换行
|
| 134 |
+
max_height=600, # 设置表格最大高度
|
| 135 |
+
show_label=False, # 不显示标签
|
| 136 |
+
elem_id="leaderboard_table" # 添加元素ID
|
| 137 |
)
|
| 138 |
|
| 139 |
def update_display(search_text, selected_categories):
|
| 140 |
+
df = filter_data(search_text, selected_categories)
|
| 141 |
+
return df
|
| 142 |
|
| 143 |
# 绑定搜索框和复选框的变化事件
|
| 144 |
search_box.change(
|
|
|
|
| 152 |
outputs=table
|
| 153 |
)
|
| 154 |
|
| 155 |
+
# 在底部添加说明
|
| 156 |
+
with gr.Row():
|
| 157 |
+
gr.Markdown(f"""
|
| 158 |
+
### 📊 Column Descriptions
|
| 159 |
+
- **Rank**: Model ranking based on overall score
|
| 160 |
+
- **model**: Model name (<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}</span>)
|
| 161 |
+
- **overall**: Overall Score (weighted average of all metrics)
|
| 162 |
+
- **comp.**: Comprehensiveness - How thorough and complete the research is
|
| 163 |
+
- **insight**: Insight Quality - Depth and value of analysis
|
| 164 |
+
- **inst.**: Instruction Following - Adherence to user instructions
|
| 165 |
+
- **read.**: Readability - Clarity and organization of content
|
| 166 |
+
- **c.acc.**: Citation Accuracy - Correctness of references
|
| 167 |
+
- **eff.c.**: Effective Citations - Relevance and quality of sources
|
| 168 |
+
- **category**: Model category
|
| 169 |
+
""")
|
| 170 |
|
| 171 |
return search_box
|