import gradio as gr import json import pandas as pd from urllib.request import urlopen from urllib.error import URLError import re from datetime import datetime CITATION_BUTTON_TEXT = r"""@misc{2023opencompass, title={OpenCompass: A Universal Evaluation Platform for Foundation Models}, author={OpenCompass Contributors}, howpublished = {\url{https://github.com/open-compass/opencompass}}, year={2023} }""" CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" head_style = """ """ DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/" def findfile(): model_meta_info = 'model-meta-info' results_sum = 'hf-academic' url = f"{DATA_URL_BASE}{model_meta_info}.json" response = urlopen(url) model_info = json.loads(response.read().decode('utf-8')) url = f"{DATA_URL_BASE}{results_sum}.json" response = urlopen(url) results = json.loads(response.read().decode('utf-8')) return model_info, results MAIN_LEADERBOARD_DESCRIPTION = """## Main Evaluation Results The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs. - The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval). - Currently, the evaluation primarily targets chat models, with updates featuring the latest community models at irregular intervals. - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆. """ def create_interface(): model_info, results = findfile() with gr.Blocks(title="Math Leaderboard", head=head_style) as demo: with gr.Tabs(elem_classes='tab-buttons') as tabs: with gr.TabItem('Results', elem_id='main', id=0): # math_main_tab(results) pass with gr.TabItem('Predictions', elem_id='notmain', id=0): # dataset_tab(results, structs[i], dataset) pass return demo # model_info, results = findfile() # breakpoint() if __name__ == '__main__': demo = create_interface() demo.queue() demo.launch(server_name='0.0.0.0')