Spaces:

opencompass
/

CompassAcademic-Leaderboard-Full-Version

Running

App Files Files Community

myhs commited on Mar 27

Commit

5f54938

verified ·

1 Parent(s): d868240

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -1

app.py CHANGED Viewed

@@ -1,12 +1,76 @@
 import gradio as gr
 import json
 import pandas as pd
-from urllib.request import urlopen, URLError
 import re
 from datetime import datetime
 if __name__ == '__main__':
     demo = create_interface()
     demo.queue()

 import gradio as gr
 import json
 import pandas as pd
+from urllib.request import urlopen
+from urllib.error import URLError
 import re
 from datetime import datetime
+CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
+    title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
+    author={OpenCompass Contributors},
+    howpublished = {\url{https://github.com/open-compass/opencompass}},
+    year={2023}
+}"""
+CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
+head_style = """
+<style>
+@media (min-width: 1536px)
+{
+    .gradio-container {
+        min-width: var(--size-full) !important;
+    }
+}
+</style>
+"""
+DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/"
+def findfile():
+    model_meta_info = 'model-meta-info'
+    results_sum = 'hf-academic'
+    url = f"{DATA_URL_BASE}{model_meta_info}.json"
+    response = urlopen(url)
+    model_info = json.loads(response.read().decode('utf-8'))
+    url = f"{DATA_URL_BASE}{results_sum}.json"
+    response = urlopen(url)
+    results = json.loads(response.read().decode('utf-8'))
+    return model_info, results
+MAIN_LEADERBOARD_DESCRIPTION = """## Main Evaluation Results
+The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs.
+- The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval).
+- Currently, the evaluation primarily targets chat models, with updates featuring the latest community models at irregular intervals.
+- Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
+"""
+def create_interface():
+    model_info, results = findfile()
+    with gr.Blocks(title="Math Leaderboard", head=head_style) as demo:
+        with gr.Tabs(elem_classes='tab-buttons') as tabs:
+            with gr.TabItem('Results', elem_id='main', id=0):
+                # math_main_tab(results)
+                pass
+            with gr.TabItem('Predictions', elem_id='notmain', id=0):
+                # dataset_tab(results, structs[i], dataset)
+                pass
+    return demo
+# model_info, results = findfile()
+# breakpoint()
 if __name__ == '__main__':
     demo = create_interface()
     demo.queue()