myhs commited on
Commit
5f54938
·
verified ·
1 Parent(s): d868240

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -1
app.py CHANGED
@@ -1,12 +1,76 @@
1
  import gradio as gr
2
  import json
3
  import pandas as pd
4
- from urllib.request import urlopen, URLError
 
5
  import re
6
  from datetime import datetime
7
 
 
 
 
 
 
 
 
8
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  if __name__ == '__main__':
11
  demo = create_interface()
12
  demo.queue()
 
1
  import gradio as gr
2
  import json
3
  import pandas as pd
4
+ from urllib.request import urlopen
5
+ from urllib.error import URLError
6
  import re
7
  from datetime import datetime
8
 
9
+ CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
10
+ title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
11
+ author={OpenCompass Contributors},
12
+ howpublished = {\url{https://github.com/open-compass/opencompass}},
13
+ year={2023}
14
+ }"""
15
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
16
 
17
 
18
+ head_style = """
19
+ <style>
20
+ @media (min-width: 1536px)
21
+ {
22
+ .gradio-container {
23
+ min-width: var(--size-full) !important;
24
+ }
25
+ }
26
+ </style>
27
+ """
28
+
29
+
30
+ DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/"
31
+
32
+ def findfile():
33
+ model_meta_info = 'model-meta-info'
34
+ results_sum = 'hf-academic'
35
+
36
+ url = f"{DATA_URL_BASE}{model_meta_info}.json"
37
+ response = urlopen(url)
38
+ model_info = json.loads(response.read().decode('utf-8'))
39
+
40
+ url = f"{DATA_URL_BASE}{results_sum}.json"
41
+ response = urlopen(url)
42
+ results = json.loads(response.read().decode('utf-8'))
43
+
44
+ return model_info, results
45
+
46
+
47
+ MAIN_LEADERBOARD_DESCRIPTION = """## Main Evaluation Results
48
+ The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs.
49
+ - The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval).
50
+ - Currently, the evaluation primarily targets chat models, with updates featuring the latest community models at irregular intervals.
51
+ - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
52
+ """
53
+
54
+
55
+
56
+
57
+ def create_interface():
58
+ model_info, results = findfile()
59
+
60
+ with gr.Blocks(title="Math Leaderboard", head=head_style) as demo:
61
+ with gr.Tabs(elem_classes='tab-buttons') as tabs:
62
+ with gr.TabItem('Results', elem_id='main', id=0):
63
+ # math_main_tab(results)
64
+ pass
65
+ with gr.TabItem('Predictions', elem_id='notmain', id=0):
66
+ # dataset_tab(results, structs[i], dataset)
67
+ pass
68
+
69
+ return demo
70
+
71
+ # model_info, results = findfile()
72
+ # breakpoint()
73
+
74
  if __name__ == '__main__':
75
  demo = create_interface()
76
  demo.queue()