Spaces:

Ayanami0730
/

DeepResearch-Leaderboard

Running

App Files Files Community

DeepResearch-Leaderboard / tabs /leaderboard_tab.py

Ayanami0730

Add DeepResearch Bench application with LFS support

927e909 4 months ago

raw

history blame

6.02 kB

	import gradio as gr
	import pandas as pd
	from pathlib import Path

	# 相对于主脚本的路径调整
	BASE_DIR = Path(__file__).resolve().parent.parent
	DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"

	# 新增：用于高亮显示的常量
	HIGHLIGHT_COLOR = "#E6D8FF"
	CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"

	# 新增：列名重命名映射
	COLUMN_RENAME_MAP = {
	'overall_score': 'overall',
	'comprehensiveness': 'comp.',
	'insight': 'insight',
	'instruction_following': 'inst.',
	'readability': 'read.',
	'citation_accuracy': 'c.acc.',
	'effective_citations': 'eff.c.'
	}

	# 模型分类映射
	MODEL_CATEGORIES = {
	"Deep Research Agent": [
	"gemini-2.5-pro-deepresearch",
	"grok-deeper-search",
	"openai-deepresearch",
	"perplexity-Research"
	],
	"LLM with Search": [
	"claude-3-7-sonnet-with-search",
	"perplexity-sonar-reasoning-pro",
	"perplexity-sonar-reasoning",
	"perplexity-sonar-pro",
	"gemini-2.5-pro-with-grounding",
	"gpt-4o-search-preview",
	"perplexity-sonar",
	"gpt-4.1-with-search",
	"gemini-2.5-flash-preview-04-17",
	"gpt-4o-mini-search-preview",
	"gpt-4.1-mini-with-search",
	"claude-3-5-sonnet-with-search"
	]
	}

	def load_leaderboard() -> pd.DataFrame:
	if not DATA_PATH.exists():
	raise FileNotFoundError(
	f"Leaderboard file not found: {DATA_PATH}.\n"
	"→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv"
	)
	df = pd.read_csv(DATA_PATH)
	df.columns = [c.strip() for c in df.columns]

	def get_category(model_name):
	for category, models in MODEL_CATEGORIES.items():
	if model_name in models:
	return category
	return "Others" # 为不在预定义类别中的模型提供默认类别

	df['category'] = df['model'].apply(get_category)
	return df

	def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
	ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True)
	ranked.insert(0, "Rank", range(1, len(ranked) + 1))

	# 重命名列名为简写形式
	ranked = ranked.rename(columns=COLUMN_RENAME_MAP)

	return ranked

	def filter_data(search_text: str, selected_categories: list):
	df = load_leaderboard()

	if search_text.strip():
	df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)]

	if selected_categories:
	df = df[df['category'].isin(selected_categories)]

	return make_ranked(df)

	# 新增：辅助函数用于样式化DataFrame
	def _style_specific_rows(row, category_column_name='category', target_category=CATEGORY_TO_HIGHLIGHT, color=HIGHLIGHT_COLOR):
	"""
	根据行的类别返回样式列表。如果类别匹配目标类别，则应用背景色。
	"""
	apply_color = color if row.get(category_column_name) == target_category else ''
	return [f'background-color: {apply_color}' for _ in row]

	def _apply_table_styling(df: pd.DataFrame) -> pd.io.formats.style.Styler:
	"""
	应用表格样式：
	- 高亮显示 CATEGORY_TO_HIGHLIGHT 的行
	- 保留 'category' 列显示
	- 格式化数值为两位小数
	返回 Pandas Styler 对象。
	"""
	if df.empty:
	return df.style

	styled_df = df.copy()

	# 获取数值列（排除 Rank, model, category 列）
	numeric_columns = []
	for col in styled_df.columns:
	if col not in ['Rank', 'model', 'category']:
	# 检查是否为数值类型
	if styled_df[col].dtype in ['float64', 'int64'] or pd.api.types.is_numeric_dtype(styled_df[col]):
	numeric_columns.append(col)

	# 应用行样式 - 高亮特定类别的行
	styler = styled_df.style.apply(
	_style_specific_rows,
	axis=1,
	category_column_name='category',
	target_category=CATEGORY_TO_HIGHLIGHT,
	color=HIGHLIGHT_COLOR
	)

	# 使用 Styler 的 format 方法格式化数值列为两位小数
	if numeric_columns:
	format_dict = {col: '{:.2f}' for col in numeric_columns}
	styler = styler.format(format_dict)

	return styler

	def create_leaderboard_tab():
	with gr.Tab("🏆Leaderboard"):
	with gr.Row():
	search_box = gr.Textbox(
	label="Model Search",
	placeholder="Entering model name to search...",
	value=""
	)
	category_checkboxes = gr.CheckboxGroup(
	label="Model Categories",
	choices=list(MODEL_CATEGORIES.keys()),
	value=list(MODEL_CATEGORIES.keys())
	)

	initial_df_raw = make_ranked(load_leaderboard())
	styled_initial_value = _apply_table_styling(initial_df_raw.copy())

	table = gr.Dataframe(
	interactive=False,
	wrap=False,
	value=styled_initial_value,
	)

	def update_display(search_text, selected_categories):
	filtered_df_raw = filter_data(search_text, selected_categories)
	styled_updated_value = _apply_table_styling(filtered_df_raw.copy())
	return styled_updated_value

	search_box.change(
	fn=update_display,
	inputs=[search_box, category_checkboxes],
	outputs=table
	)
	category_checkboxes.change(
	fn=update_display,
	inputs=[search_box, category_checkboxes],
	outputs=table
	)

	# 在底部添加列名说明
	gr.Markdown("""
	### Column Abbreviations
	The leaderboard uses abbreviated column names for compact display: (i) overall - Overall Score; (ii) comp. - Comprehensiveness; (iii) insight - Insight quality; (iv) inst. - Instruction Following; (v) read. - Readability; (vi) c.acc. - Citation Accuracy; (vii) eff.c. - Effective Citations.
	""")

	return search_box