Global MMLU: Understanding and Addressing Cultural and Linguistic Biases in Multilingual Evaluation
Paper
•
2412.03304
•
Published
•
17
None defined yet.
ds = load_dataset("HuggingFaceH4/OpenHermesPreferences", split="train")
# Get the categories of the source dataset
# ['airoboros2.2', 'CamelAI', 'caseus_custom', ...]
sources = ds.unique("source")
# Filter for a subset
ds_filtered = ds.filter(lambda x : x["source"] in ["metamath", "EvolInstruct_70k"], num_proc=6)
from datasets import load_dataset
# Instead of this:
# dataset = load_dataset("Intel/orca_dpo_pairs", split="train")
# use this:
dataset = load_dataset("argilla/distilabel-intel-orca-dpo-pairs", split="train")
dataset = dataset.filter(
lambda r:
r["status"] != "tie" and
r["chosen_score"] >= 8 and
not r["in_gsm8k_train"]
)