Spaces:

opencompass
/

CompassAcademic-Leaderboard-Full-Version

Running

App Files Files Community

CompassAcademic-Leaderboard-Full-Version / app.py

myhs

Update app.py

5f54938 verified 4 months ago

raw

history blame

2.47 kB

	import gradio as gr
	import json
	import pandas as pd
	from urllib.request import urlopen
	from urllib.error import URLError
	import re
	from datetime import datetime

	CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
	title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
	author={OpenCompass Contributors},
	howpublished = {\url{https://github.com/open-compass/opencompass}},
	year={2023}
	}"""
	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"


	head_style = """
	<style>
	@media (min-width: 1536px)
	{
	.gradio-container {
	min-width: var(--size-full) !important;
	}
	}
	</style>
	"""


	DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/"

	def findfile():
	model_meta_info = 'model-meta-info'
	results_sum = 'hf-academic'

	url = f"{DATA_URL_BASE}{model_meta_info}.json"
	response = urlopen(url)
	model_info = json.loads(response.read().decode('utf-8'))

	url = f"{DATA_URL_BASE}{results_sum}.json"
	response = urlopen(url)
	results = json.loads(response.read().decode('utf-8'))

	return model_info, results


	MAIN_LEADERBOARD_DESCRIPTION = """## Main Evaluation Results
	The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs.
	- The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval).
	- Currently, the evaluation primarily targets chat models, with updates featuring the latest community models at irregular intervals.
	- Prompts and reproduction scripts can be found in [OpenCompass: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
	"""




	def create_interface():
	model_info, results = findfile()

	with gr.Blocks(title="Math Leaderboard", head=head_style) as demo:
	with gr.Tabs(elem_classes='tab-buttons') as tabs:
	with gr.TabItem('Results', elem_id='main', id=0):
	# math_main_tab(results)
	pass
	with gr.TabItem('Predictions', elem_id='notmain', id=0):
	# dataset_tab(results, structs[i], dataset)
	pass

	return demo

	# model_info, results = findfile()
	# breakpoint()

	if __name__ == '__main__':
	demo = create_interface()
	demo.queue()
	demo.launch(server_name='0.0.0.0')