|
import gradio as gr |
|
import json |
|
import pandas as pd |
|
from urllib.request import urlopen |
|
from urllib.error import URLError |
|
import re |
|
from datetime import datetime |
|
|
|
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass, |
|
title={OpenCompass: A Universal Evaluation Platform for Foundation Models}, |
|
author={OpenCompass Contributors}, |
|
howpublished = {\url{https://github.com/open-compass/opencompass}}, |
|
year={2023} |
|
}""" |
|
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" |
|
|
|
|
|
head_style = """ |
|
<style> |
|
@media (min-width: 1536px) |
|
{ |
|
.gradio-container { |
|
min-width: var(--size-full) !important; |
|
} |
|
} |
|
</style> |
|
""" |
|
|
|
|
|
DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/" |
|
|
|
def findfile(): |
|
model_meta_info = 'model-meta-info' |
|
results_sum = 'hf-academic' |
|
|
|
url = f"{DATA_URL_BASE}{model_meta_info}.json" |
|
response = urlopen(url) |
|
model_info = json.loads(response.read().decode('utf-8')) |
|
|
|
url = f"{DATA_URL_BASE}{results_sum}.json" |
|
response = urlopen(url) |
|
results = json.loads(response.read().decode('utf-8')) |
|
|
|
return model_info, results |
|
|
|
|
|
MAIN_LEADERBOARD_DESCRIPTION = """## Main Evaluation Results |
|
The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs. |
|
- The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval). |
|
- Currently, the evaluation primarily targets chat models, with updates featuring the latest community models at irregular intervals. |
|
- Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)π. |
|
""" |
|
|
|
|
|
|
|
|
|
def create_interface(): |
|
model_info, results = findfile() |
|
|
|
with gr.Blocks(title="Math Leaderboard", head=head_style) as demo: |
|
with gr.Tabs(elem_classes='tab-buttons') as tabs: |
|
with gr.TabItem('Results', elem_id='main', id=0): |
|
|
|
pass |
|
with gr.TabItem('Predictions', elem_id='notmain', id=0): |
|
|
|
pass |
|
|
|
return demo |
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
demo = create_interface() |
|
demo.queue() |
|
demo.launch(server_name='0.0.0.0') |
|
|
|
|
|
|