Spaces:

ferid197
/

LLaMA-Factory

Runtime error

App Files Files Community

LLaMA-Factory / evaluation /mmlu /mmlu.py

ferid197

Upload folder using huggingface_hub

e81015c verified 6 months ago

raw

history blame contribute delete

4.76 kB

	# Copyright 2025 the LlamaFactory team.
	# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os

	import datasets
	import pandas as pd


	_CITATION = """\
	@article{hendryckstest2021,
	title={Measuring Massive Multitask Language Understanding},
	author={Dan Hendrycks and Collin Burns and others},
	journal={Proceedings of the International Conference on Learning Representations (ICLR)},
	year={2021}
	}
	"""

	_DESCRIPTION = """\
	Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart,
	Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021).
	"""

	_HOMEPAGE = "https://github.com/hendrycks/test"

	_LICENSE = "MIT"

	_URL = "mmlu.zip"

	task_list = [
	"high_school_european_history",
	"business_ethics",
	"clinical_knowledge",
	"medical_genetics",
	"high_school_us_history",
	"high_school_physics",
	"high_school_world_history",
	"virology",
	"high_school_microeconomics",
	"econometrics",
	"college_computer_science",
	"high_school_biology",
	"abstract_algebra",
	"professional_accounting",
	"philosophy",
	"professional_medicine",
	"nutrition",
	"global_facts",
	"machine_learning",
	"security_studies",
	"public_relations",
	"professional_psychology",
	"prehistory",
	"anatomy",
	"human_sexuality",
	"college_medicine",
	"high_school_government_and_politics",
	"college_chemistry",
	"logical_fallacies",
	"high_school_geography",
	"elementary_mathematics",
	"human_aging",
	"college_mathematics",
	"high_school_psychology",
	"formal_logic",
	"high_school_statistics",
	"international_law",
	"high_school_mathematics",
	"high_school_computer_science",
	"conceptual_physics",
	"miscellaneous",
	"high_school_chemistry",
	"marketing",
	"professional_law",
	"management",
	"college_physics",
	"jurisprudence",
	"world_religions",
	"sociology",
	"us_foreign_policy",
	"high_school_macroeconomics",
	"computer_security",
	"moral_scenarios",
	"moral_disputes",
	"electrical_engineering",
	"astronomy",
	"college_biology",
	]


	class MMLUConfig(datasets.BuilderConfig):
	def __init__(self, **kwargs):
	super().__init__(version=datasets.Version("1.0.0"), **kwargs)


	class MMLU(datasets.GeneratorBasedBuilder):
	BUILDER_CONFIGS = [
	MMLUConfig(
	name=task_name,
	)
	for task_name in task_list
	]

	def _info(self):
	features = datasets.Features(
	{
	"question": datasets.Value("string"),
	"A": datasets.Value("string"),
	"B": datasets.Value("string"),
	"C": datasets.Value("string"),
	"D": datasets.Value("string"),
	"answer": datasets.Value("string"),
	}
	)
	return datasets.DatasetInfo(
	description=_DESCRIPTION,
	features=features,
	homepage=_HOMEPAGE,
	license=_LICENSE,
	citation=_CITATION,
	)

	def _split_generators(self, dl_manager):
	data_dir = dl_manager.download_and_extract(_URL)
	task_name = self.config.name
	return [
	datasets.SplitGenerator(
	name=datasets.Split.TEST,
	gen_kwargs={
	"filepath": os.path.join(data_dir, "data", "test", f"{task_name}_test.csv"),
	},
	),
	datasets.SplitGenerator(
	name=datasets.Split.VALIDATION,
	gen_kwargs={
	"filepath": os.path.join(data_dir, "data", "val", f"{task_name}_val.csv"),
	},
	),
	datasets.SplitGenerator(
	name=datasets.Split.TRAIN,
	gen_kwargs={
	"filepath": os.path.join(data_dir, "data", "dev", f"{task_name}_dev.csv"),
	},
	),
	]

	def _generate_examples(self, filepath):
	df = pd.read_csv(filepath, header=None)
	df.columns = ["question", "A", "B", "C", "D", "answer"]

	yield from enumerate(df.to_dict(orient="records"))