Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,76 @@
|
|
1 |
import gradio as gr
|
2 |
import json
|
3 |
import pandas as pd
|
4 |
-
from urllib.request import urlopen
|
|
|
5 |
import re
|
6 |
from datetime import datetime
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
if __name__ == '__main__':
|
11 |
demo = create_interface()
|
12 |
demo.queue()
|
|
|
1 |
import gradio as gr
|
2 |
import json
|
3 |
import pandas as pd
|
4 |
+
from urllib.request import urlopen
|
5 |
+
from urllib.error import URLError
|
6 |
import re
|
7 |
from datetime import datetime
|
8 |
|
9 |
+
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
|
10 |
+
title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
|
11 |
+
author={OpenCompass Contributors},
|
12 |
+
howpublished = {\url{https://github.com/open-compass/opencompass}},
|
13 |
+
year={2023}
|
14 |
+
}"""
|
15 |
+
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
16 |
|
17 |
|
18 |
+
head_style = """
|
19 |
+
<style>
|
20 |
+
@media (min-width: 1536px)
|
21 |
+
{
|
22 |
+
.gradio-container {
|
23 |
+
min-width: var(--size-full) !important;
|
24 |
+
}
|
25 |
+
}
|
26 |
+
</style>
|
27 |
+
"""
|
28 |
+
|
29 |
+
|
30 |
+
DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/"
|
31 |
+
|
32 |
+
def findfile():
|
33 |
+
model_meta_info = 'model-meta-info'
|
34 |
+
results_sum = 'hf-academic'
|
35 |
+
|
36 |
+
url = f"{DATA_URL_BASE}{model_meta_info}.json"
|
37 |
+
response = urlopen(url)
|
38 |
+
model_info = json.loads(response.read().decode('utf-8'))
|
39 |
+
|
40 |
+
url = f"{DATA_URL_BASE}{results_sum}.json"
|
41 |
+
response = urlopen(url)
|
42 |
+
results = json.loads(response.read().decode('utf-8'))
|
43 |
+
|
44 |
+
return model_info, results
|
45 |
+
|
46 |
+
|
47 |
+
MAIN_LEADERBOARD_DESCRIPTION = """## Main Evaluation Results
|
48 |
+
The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs.
|
49 |
+
- The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval).
|
50 |
+
- Currently, the evaluation primarily targets chat models, with updates featuring the latest community models at irregular intervals.
|
51 |
+
- Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
|
52 |
+
"""
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
def create_interface():
|
58 |
+
model_info, results = findfile()
|
59 |
+
|
60 |
+
with gr.Blocks(title="Math Leaderboard", head=head_style) as demo:
|
61 |
+
with gr.Tabs(elem_classes='tab-buttons') as tabs:
|
62 |
+
with gr.TabItem('Results', elem_id='main', id=0):
|
63 |
+
# math_main_tab(results)
|
64 |
+
pass
|
65 |
+
with gr.TabItem('Predictions', elem_id='notmain', id=0):
|
66 |
+
# dataset_tab(results, structs[i], dataset)
|
67 |
+
pass
|
68 |
+
|
69 |
+
return demo
|
70 |
+
|
71 |
+
# model_info, results = findfile()
|
72 |
+
# breakpoint()
|
73 |
+
|
74 |
if __name__ == '__main__':
|
75 |
demo = create_interface()
|
76 |
demo.queue()
|