import pandas as pd from collections import defaultdict def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"): df = df[df["language"] == "All"] df = df[["model", "temperature", "f1_macro", "weighted_f1", "accuracy"]] df = df.sort_values(by=sort_column, ascending=False) df["temperature"] = df["temperature"].round(1) for col in ["f1_macro", "weighted_f1", "accuracy"]: df[col] = df[col].round(4) return df def build_lang_dict(df: pd.DataFrame): lang_data = defaultdict(lambda: defaultdict(dict)) for row in df.itertuples(): for metric in ["f1_macro", "weighted_f1", "accuracy"]: key = f"{row.language}.{metric}" lang_data[row.model][row.temperature][key] = getattr(row, metric) return lang_data def build_ds_dict(df: pd.DataFrame): ds_data = defaultdict(lambda: defaultdict(dict)) for row in df.itertuples(): for metric in ["f1_macro", "weighted_f1", "accuracy"]: key = f"{row.dataset}.{metric}" ds_data[row.model][row.temperature][key] = getattr(row, metric) return ds_data def build_emo_dict(df: pd.DataFrame): df = df[df["language"] == "All"] emo_data = defaultdict(lambda: defaultdict(dict)) emotions = df.iloc[0].metrics_per_label.keys() - ["accuracy", "macro avg", "weighted avg"] for row in df.itertuples(): for emotion in emotions: emo_data[row.model][row.temperature][emotion] = row.metrics_per_label[emotion].get("f1-score") emo_data[row.model][row.temperature]["All"] = row.f1_macro return emo_data def leaderboard_per_group(lang_dict, use_cols, metric: str = "f1_macro"): df = [] for model, inner in lang_dict.items(): for temperature, metrics in inner.items(): entry = {"model": model, "temperature": temperature} for k, v in metrics.items(): if metric in k: entry[k.split(".")[0]] = v elif "." not in k: entry[k] = v df.append(entry) df = pd.DataFrame(df) df["temperature"] = df["temperature"].round(1) for col in df.columns.difference(["model", "temperature"]): df[col] = df[col].round(4) df = df[["model", "temperature"] + sorted(use_cols)] if "All" in use_cols: df = df.sort_values(by="All", ascending=False) return df