import gradio as gr import json import pandas as pd from urllib.request import urlopen from urllib.error import URLError import re from datetime import datetime CITATION_BUTTON_TEXT = r"""@misc{2023opencompass, title={OpenCompass: A Universal Evaluation Platform for Foundation Models}, author={OpenCompass Contributors}, howpublished = {\url{https://github.com/open-compass/opencompass}}, year={2023} }""" CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" Predictions_BUTTON_LABEL = "All model predictions are listed here. Access this URL for more details." Predictions_BUTTON_TEXT = "https://huggingface.co/datasets/opencompass/compass_academic_predictions" head_style = """ """ DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/" MAIN_LEADERBOARD_DESCRIPTION = """## Compass Academic Leaderboard (Full Version) The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs. - The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval). - Currently, the evaluation primarily targets chat models, with updates featuring the latest community models at irregular intervals. - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆. """ Initial_title = 'Compass Academic Leaderboard' MODEL_SIZE = ['<10B', '10B-70B', '>70B', 'Unknown'] MODEL_TYPE = ['API', 'OpenSource'] def findfile(): model_meta_info = 'model-meta-info' results_sum = 'hf-academic' url = f"{DATA_URL_BASE}{model_meta_info}.json" response = urlopen(url) model_info = json.loads(response.read().decode('utf-8')) url = f"{DATA_URL_BASE}{results_sum}.json" response = urlopen(url) results = json.loads(response.read().decode('utf-8')) return model_info, results model_info, results = findfile() def findfile_predictions(): with open('data/hf-academic-predictions.json', 'r') as file: predictions = json.load(file) file.close() return predictions def make_results_tab(model_info, results): models_list, datasets_list = [], [] for i in model_info: models_list.append(i) for i in results.keys(): datasets_list.append(i) result_list = [] index = 1 for model in models_list: this_result = {} this_result['Index'] = index this_result['Model Name'] = model['display_name'] this_result['Release Time'] = model['release_time'] this_result['Parameters'] = model['num_param'] this_result['OpenSource'] = model['release_type'] is_all_results_none = 1 for dataset in datasets_list: if results[dataset][model['abbr']] != '-': is_all_results_none = 0 this_result[dataset] = results[dataset][model['abbr']] if is_all_results_none == 0: result_list.append(this_result) index += 1 df = pd.DataFrame(result_list) return df, models_list, datasets_list def calculate_column_widths(df): column_widths = [] for column in df.columns: header_length = len(str(column)) max_content_length = df[column].astype(str).map(len).max() width = max(header_length * 10, max_content_length * 8) + 20 width = max(160, min(400, width)) column_widths.append(width) return column_widths def show_results_tab(df): def filter_df(model_name, size_ranges, model_types): newdf, modellist, datasetlist = make_results_tab(model_info, results) # search model name default_val = 'Input the Model Name' if model_name != default_val: method_names = [x.split('')[0].split('>')[-1].lower() for x in newdf['Model Name']] flag = [model_name.lower() in name for name in method_names] newdf['TEMP'] = flag newdf = newdf[newdf['TEMP'] == True] newdf.pop('TEMP') # filter size if size_ranges: def get_size_in_B(param): if param == 'N/A': return None try: return float(param.replace('B', '')) except: return None newdf['size_in_B'] = newdf['Parameters'].apply(get_size_in_B) mask = pd.Series(False, index=newdf.index) for size_range in size_ranges: if size_range == '<10B': mask |= (newdf['size_in_B'] < 10) & (newdf['size_in_B'].notna()) elif size_range == '10B-70B': mask |= (newdf['size_in_B'] >= 10) & (newdf['size_in_B'] < 70) elif size_range == '>70B': mask |= newdf['size_in_B'] >= 70 elif size_range == 'Unknown': mask |= newdf['size_in_B'].isna() newdf = newdf[mask] newdf.drop('size_in_B', axis=1, inplace=True) # filter opensource if model_types: type_mask = pd.Series(False, index=newdf.index) for model_type in model_types: if model_type == 'API': type_mask |= newdf['OpenSource'] == 'API' elif model_type == 'OpenSource': type_mask |= newdf['OpenSource'] == 'OpenSource' newdf = newdf[type_mask] # for i in range(len(newdf)): # newdf.loc[i, 'Index'] = i+1 return newdf with gr.Row(): with gr.Column(): model_name = gr.Textbox( value='Input the Model Name', label='Search Model Name', interactive=True ) with gr.Column(): size_filter = gr.CheckboxGroup( choices=MODEL_SIZE, value=MODEL_SIZE, label='Model Size', interactive=True, ) with gr.Column(): type_filter = gr.CheckboxGroup( choices=MODEL_TYPE, value=MODEL_TYPE, label='Model Type', interactive=True, ) # with gr.Row(): # btn = gr.Button(value="生成表格", interactive=True) with gr.Column(): table = gr.DataFrame( value=df, interactive=False, wrap=False, column_widths=calculate_column_widths(df), ) model_name.submit( fn=filter_df, inputs=[model_name, size_filter, type_filter], outputs=table ) size_filter.change( fn=filter_df, inputs=[model_name, size_filter, type_filter], outputs=table, ) type_filter.change( fn=filter_df, inputs=[model_name, size_filter, type_filter], outputs=table, ) # def download_table(): # newdf, modellist, datasetlist = make_results_tab(model_info, results) # return newdf.to_csv('df.csv',index=False,sep=',',encoding='utf-8',header=True) # download_btn = gr.File(visible=True) # btn.click(fn=download_table, inputs=None, outputs=download_btn) with gr.Row(): with gr.Accordion("Storage of Model Predictions", open=True): citation_button = gr.Textbox( value=Predictions_BUTTON_TEXT, label=Predictions_BUTTON_LABEL, elem_id='predictions-button', lines=2, # 增加行数 max_lines=4, # 设置最大行数 show_copy_button=True # 添加复制按钮使其更方便使用 ) with gr.Row(): with gr.Accordion("Citation", open=True): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id='citation-button', lines=6, # 增加行数 max_lines=8, # 设置最大行数 show_copy_button=True # 添加复制按钮使其更方便使用 ) ERROR_DF = { "Type": ['NoneType'], "Details": ["Do not find the combination predictions of the two options above."] } def show_predictions_tab(model_list, dataset_list, predictions): def get_pre_df(model_name, dataset_name): if dataset_name not in predictions.keys() or model_name not in predictions[dataset_name].keys(): return pd.DataFrame(ERROR_DF) this_predictions = predictions[dataset_name][model_name]['predictions'] for i in range(len(this_predictions)): this_predictions[i]['origin_prompt'] = str(this_predictions[i]['origin_prompt']) this_predictions[i]['gold'] = str(this_predictions[i]['gold']) this_predictions = pd.DataFrame(this_predictions) return this_predictions model_list = [i['abbr'] for i in model_list] initial_predictions = get_pre_df('MiniMax-Text-01', 'IFEval') with gr.Row(): with gr.Column(): model_drop = gr.Dropdown( label="Model Name", choices=model_list, # 去重获取主类别 interactive=True ) with gr.Column(): dataset_drop = gr.Dropdown( label="Dataset Name", choices=dataset_list, # 去重获取主类别 interactive=True ) with gr.Column(): table = gr.DataFrame( value=initial_predictions, interactive=False, wrap=False, max_height=1000, column_widths=calculate_column_widths(initial_predictions), ) model_drop.change( fn=get_pre_df, inputs=[model_drop, dataset_drop], outputs=table, ) dataset_drop.change( fn=get_pre_df, inputs=[model_drop, dataset_drop], outputs=table, ) with gr.Row(): with gr.Accordion("Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id='citation-button', lines=6, # 增加行数 max_lines=8, # 设置最大行数 show_copy_button=True # 添加复制按钮使其更方便使用 ) def create_interface(): df, model_list, dataset_list = make_results_tab(model_info, results) predictions = findfile_predictions() with gr.Blocks() as demo: # title_comp = gr.Markdown(Initial_title) gr.Markdown(MAIN_LEADERBOARD_DESCRIPTION) with gr.Tabs(elem_classes='tab-buttons') as tabs: with gr.TabItem('Results', elem_id='main', id=0): show_results_tab(df) # with gr.TabItem('Predictions', elem_id='notmain', id=1): # show_predictions_tab(model_list, dataset_list, predictions) return demo # model_info, results = findfile() # breakpoint() if __name__ == '__main__': demo = create_interface() demo.queue() demo.launch(server_name='0.0.0.0')