myhs's picture
Update app.py
73af091 verified
import gradio as gr
import json
import pandas as pd
from urllib.request import urlopen
from urllib.error import URLError
import re
from datetime import datetime
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
author={OpenCompass Contributors},
howpublished = {\url{https://github.com/open-compass/opencompass}},
year={2023}
}"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
Predictions_BUTTON_LABEL = "All model predictions are listed here. Access this URL for more details."
Predictions_BUTTON_TEXT = "https://huggingface.co/datasets/opencompass/compass_academic_predictions"
head_style = """
<style>
@media (min-width: 1536px)
{
.gradio-container {
min-width: var(--size-full) !important;
}
}
</style>
"""
DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/"
MAIN_LEADERBOARD_DESCRIPTION = """## Compass Academic Leaderboard (Full Version)
The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs.
- The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval).
- Currently, the evaluation primarily targets chat models, with updates featuring the latest community models at irregular intervals.
- Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
"""
Initial_title = 'Compass Academic Leaderboard'
MODEL_SIZE = ['<10B', '10B-70B', '>70B', 'Unknown']
MODEL_TYPE = ['API', 'OpenSource']
def findfile():
model_meta_info = 'model-meta-info'
results_sum = 'hf-academic'
url = f"{DATA_URL_BASE}{model_meta_info}.json"
response = urlopen(url)
model_info = json.loads(response.read().decode('utf-8'))
url = f"{DATA_URL_BASE}{results_sum}.json"
response = urlopen(url)
results = json.loads(response.read().decode('utf-8'))
return model_info, results
model_info, results = findfile()
def findfile_predictions():
with open('data/hf-academic-predictions.json', 'r') as file:
predictions = json.load(file)
file.close()
return predictions
def make_results_tab(model_info, results):
models_list, datasets_list = [], []
for i in model_info:
models_list.append(i)
for i in results.keys():
datasets_list.append(i)
result_list = []
index = 1
for model in models_list:
this_result = {}
this_result['Index'] = index
this_result['Model Name'] = model['display_name']
this_result['Release Time'] = model['release_time']
this_result['Parameters'] = model['num_param']
this_result['OpenSource'] = model['release_type']
is_all_results_none = 1
for dataset in datasets_list:
if results[dataset][model['abbr']] != '-':
is_all_results_none = 0
this_result[dataset] = results[dataset][model['abbr']]
if is_all_results_none == 0:
result_list.append(this_result)
index += 1
df = pd.DataFrame(result_list)
return df, models_list, datasets_list
def calculate_column_widths(df):
column_widths = []
for column in df.columns:
header_length = len(str(column))
max_content_length = df[column].astype(str).map(len).max()
width = max(header_length * 10, max_content_length * 8) + 20
width = max(160, min(400, width))
column_widths.append(width)
return column_widths
def show_results_tab(df):
def filter_df(model_name, size_ranges, model_types):
newdf, modellist, datasetlist = make_results_tab(model_info, results)
# search model name
default_val = 'Input the Model Name'
if model_name != default_val:
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in newdf['Model Name']]
flag = [model_name.lower() in name for name in method_names]
newdf['TEMP'] = flag
newdf = newdf[newdf['TEMP'] == True]
newdf.pop('TEMP')
# filter size
if size_ranges:
def get_size_in_B(param):
if param == 'N/A':
return None
try:
return float(param.replace('B', ''))
except:
return None
newdf['size_in_B'] = newdf['Parameters'].apply(get_size_in_B)
mask = pd.Series(False, index=newdf.index)
for size_range in size_ranges:
if size_range == '<10B':
mask |= (newdf['size_in_B'] < 10) & (newdf['size_in_B'].notna())
elif size_range == '10B-70B':
mask |= (newdf['size_in_B'] >= 10) & (newdf['size_in_B'] < 70)
elif size_range == '>70B':
mask |= newdf['size_in_B'] >= 70
elif size_range == 'Unknown':
mask |= newdf['size_in_B'].isna()
newdf = newdf[mask]
newdf.drop('size_in_B', axis=1, inplace=True)
# filter opensource
if model_types:
type_mask = pd.Series(False, index=newdf.index)
for model_type in model_types:
if model_type == 'API':
type_mask |= newdf['OpenSource'] == 'API'
elif model_type == 'OpenSource':
type_mask |= newdf['OpenSource'] == 'OpenSource'
newdf = newdf[type_mask]
# for i in range(len(newdf)):
# newdf.loc[i, 'Index'] = i+1
return newdf
with gr.Row():
with gr.Column():
model_name = gr.Textbox(
value='Input the Model Name',
label='Search Model Name',
interactive=True
)
with gr.Column():
size_filter = gr.CheckboxGroup(
choices=MODEL_SIZE,
value=MODEL_SIZE,
label='Model Size',
interactive=True,
)
with gr.Column():
type_filter = gr.CheckboxGroup(
choices=MODEL_TYPE,
value=MODEL_TYPE,
label='Model Type',
interactive=True,
)
# with gr.Row():
# btn = gr.Button(value="生成表格", interactive=True)
with gr.Column():
table = gr.DataFrame(
value=df,
interactive=False,
wrap=False,
column_widths=calculate_column_widths(df),
)
model_name.submit(
fn=filter_df,
inputs=[model_name, size_filter, type_filter],
outputs=table
)
size_filter.change(
fn=filter_df,
inputs=[model_name, size_filter, type_filter],
outputs=table,
)
type_filter.change(
fn=filter_df,
inputs=[model_name, size_filter, type_filter],
outputs=table,
)
# def download_table():
# newdf, modellist, datasetlist = make_results_tab(model_info, results)
# return newdf.to_csv('df.csv',index=False,sep=',',encoding='utf-8',header=True)
# download_btn = gr.File(visible=True)
# btn.click(fn=download_table, inputs=None, outputs=download_btn)
with gr.Row():
with gr.Accordion("Storage of Model Predictions", open=True):
citation_button = gr.Textbox(
value=Predictions_BUTTON_TEXT,
label=Predictions_BUTTON_LABEL,
elem_id='predictions-button',
lines=2, # 增加行数
max_lines=4, # 设置最大行数
show_copy_button=True # 添加复制按钮使其更方便使用
)
with gr.Row():
with gr.Accordion("Citation", open=True):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id='citation-button',
lines=6, # 增加行数
max_lines=8, # 设置最大行数
show_copy_button=True # 添加复制按钮使其更方便使用
)
ERROR_DF = {
"Type": ['NoneType'],
"Details": ["Do not find the combination predictions of the two options above."]
}
def show_predictions_tab(model_list, dataset_list, predictions):
def get_pre_df(model_name, dataset_name):
if dataset_name not in predictions.keys() or model_name not in predictions[dataset_name].keys():
return pd.DataFrame(ERROR_DF)
this_predictions = predictions[dataset_name][model_name]['predictions']
for i in range(len(this_predictions)):
this_predictions[i]['origin_prompt'] = str(this_predictions[i]['origin_prompt'])
this_predictions[i]['gold'] = str(this_predictions[i]['gold'])
this_predictions = pd.DataFrame(this_predictions)
return this_predictions
model_list = [i['abbr'] for i in model_list]
initial_predictions = get_pre_df('MiniMax-Text-01', 'IFEval')
with gr.Row():
with gr.Column():
model_drop = gr.Dropdown(
label="Model Name",
choices=model_list, # 去重获取主类别
interactive=True
)
with gr.Column():
dataset_drop = gr.Dropdown(
label="Dataset Name",
choices=dataset_list, # 去重获取主类别
interactive=True
)
with gr.Column():
table = gr.DataFrame(
value=initial_predictions,
interactive=False,
wrap=False,
max_height=1000,
column_widths=calculate_column_widths(initial_predictions),
)
model_drop.change(
fn=get_pre_df,
inputs=[model_drop, dataset_drop],
outputs=table,
)
dataset_drop.change(
fn=get_pre_df,
inputs=[model_drop, dataset_drop],
outputs=table,
)
with gr.Row():
with gr.Accordion("Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id='citation-button',
lines=6, # 增加行数
max_lines=8, # 设置最大行数
show_copy_button=True # 添加复制按钮使其更方便使用
)
def create_interface():
df, model_list, dataset_list = make_results_tab(model_info, results)
predictions = findfile_predictions()
with gr.Blocks() as demo:
# title_comp = gr.Markdown(Initial_title)
gr.Markdown(MAIN_LEADERBOARD_DESCRIPTION)
with gr.Tabs(elem_classes='tab-buttons') as tabs:
with gr.TabItem('Results', elem_id='main', id=0):
show_results_tab(df)
# with gr.TabItem('Predictions', elem_id='notmain', id=1):
# show_predictions_tab(model_list, dataset_list, predictions)
return demo
# model_info, results = findfile()
# breakpoint()
if __name__ == '__main__':
demo = create_interface()
demo.queue()
demo.launch(server_name='0.0.0.0')