File size: 2,408 Bytes
9cd3569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
from collections import defaultdict


def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"):
    df = df[df["language"] == "All"]
    df = df[["model", "temperature", "f1_macro", "weighted_f1", "accuracy"]]
    df = df.sort_values(by=sort_column, ascending=False)

    df["temperature"] = df["temperature"].round(1)

    for col in ["f1_macro", "weighted_f1", "accuracy"]:
        df[col] = df[col].round(4)

    return df


def build_lang_dict(df: pd.DataFrame):
    lang_data = defaultdict(lambda: defaultdict(dict))
    for row in df.itertuples():
        for metric in ["f1_macro", "weighted_f1", "accuracy"]:
            key = f"{row.language}.{metric}"
            lang_data[row.model][row.temperature][key] = getattr(row, metric)
    return lang_data


def build_ds_dict(df: pd.DataFrame):
    ds_data = defaultdict(lambda: defaultdict(dict))
    for row in df.itertuples():
        for metric in ["f1_macro", "weighted_f1", "accuracy"]:
            key = f"{row.dataset}.{metric}"
            ds_data[row.model][row.temperature][key] = getattr(row, metric)
    return ds_data


def build_emo_dict(df: pd.DataFrame):
    df = df[df["language"] == "All"]
    emo_data = defaultdict(lambda: defaultdict(dict))
    emotions = df.iloc[0].metrics_per_label.keys() - ["accuracy", "macro avg", "weighted avg"]
    for row in df.itertuples():
        for emotion in emotions:
            emo_data[row.model][row.temperature][emotion] = row.metrics_per_label[emotion].get("f1-score")
        emo_data[row.model][row.temperature]["All"] = row.f1_macro
    return emo_data


def leaderboard_per_group(lang_dict, use_cols, metric: str = "f1_macro"):
    df = []
    for model, inner in lang_dict.items():
        for temperature, metrics in inner.items():
            entry = {"model": model, "temperature": temperature}
            for k, v in metrics.items():
                if metric in k:
                    entry[k.split(".")[0]] = v
                elif "." not in k:
                    entry[k] = v
            df.append(entry)
    
    df = pd.DataFrame(df)

    df["temperature"] = df["temperature"].round(1)

    for col in df.columns.difference(["model", "temperature"]):
        df[col] = df[col].round(4)

    df = df[["model", "temperature"] + sorted(use_cols)]
    if "All" in use_cols:
        df = df.sort_values(by="All", ascending=False)

    return df