iwonachristop's picture
Add evaluate.md
04191fb
raw
history blame
7.46 kB
import gradio as gr
from pathlib import Path
import pandas as pd
from src.process_data import *
abs_path = Path(__file__).parent
CITATION_TEXT = open(f"pages/citation.bib", "r").read()
title=(
"""
<center>
<h1> CAMEO Leaderboard</h1>
</center>
"""
)
description=(
"""
<center>
<h3> The CAMEO Leaderboard ranks and evaluates the Speech Emotion Recognition (SER) models for the CAMEO dataset.</h3>
</center>
"""
)
def app():
with gr.Blocks() as demo:
gr.HTML("""
<link href="https://fonts.googleapis.com/css2?family=Inter&display=swap" rel="stylesheet">
""")
demo.css = """
.tab-item {
font-size: 14px;
padding: 10px 20px;
font-family: 'Inter', sans-serif;
}
"""
gr.HTML(title, elem_classes='tab-item')
gr.HTML(description, elem_classes='tab-item')
with gr.Tabs():
with gr.Tab("πŸ† Leaderboard", elem_classes='tab-item'):
languages = ['All', 'Bengali', 'English', 'French', 'German', 'Italian', 'Polish', 'Russian', 'Spanish']
datasets = ['All', 'CaFE', 'CREMA-D', 'EMNS', 'Emozionalmente', 'eNTERFACE', 'JL-Corpus', 'MESD', 'nEMO', 'Oreau', 'PAVOQUE', 'RAVDESS', 'RESD', 'SUBESCO']
emotions = ['All', 'anger', 'anxiety',
'apology', 'assertiveness', 'calm', 'concern', 'disgust',
'encouragement', 'enthusiasm', 'excitement', 'fear', 'happiness',
'neutral', 'poker', 'sadness', 'sarcasm', 'surprise']
metric=["f1_macro", "accuracy", "weighted_f1"]
# with gr.Tabs():
with gr.Tab("Overall Results", elem_classes='tab-item'):
gr.Markdown('The results show the values of the `f1_macro`, `weighted_f1` and `accuracy` metrics between the expected and generated emotion labels by each model with the given `temperature`.', elem_classes='tab-item')
overall_table = gr.Dataframe()
with gr.Tab("Results per Language", elem_classes='tab-item'):
gr.Markdown('The results show the values of the `f1_macro`, `weighted_f1` or `accuracy` metrics between the expected and generated emotion labels by each model with a given `temperature` for each `language` in the CAMEO dataset.', elem_classes='tab-item')
languages_filter = gr.CheckboxGroup(choices=languages, label="Select column", value=languages)
select_lang_metric = gr.Radio(metric, value='f1_macro', label="Select metric")
lang_table = gr.Dataframe()
with gr.Tab("Results per Dataset", elem_classes='tab-item'):
gr.Markdown(
'The results show the values of the `f1_macro`, `weighted_f1` or `accuracy` metrics between the expected and generated emotion labels by each model with a given `temperature` for each `dataset` in the CAMEO dataset.',
elem_classes='tab-item')
dataset_filter = gr.CheckboxGroup(choices=datasets, label="Select column", value=datasets)
select_ds_metric = gr.Radio(metric, value='f1_macro', label="Select metric")
dataset_table = gr.Dataframe()
with gr.Tab("Results per Emotion", elem_classes='tab-item'):
gr.Markdown(
'The results show the values of the `f1_macro` metrics between the expected and generated emotion labels by each model with a given `temperature` for each `emotion` in the CAMEO dataset.',
elem_classes='tab-item')
emo_filter = gr.CheckboxGroup(choices=emotions, label="Select column", value=emotions)
emotion_table = gr.Dataframe()
df_state = gr.State()
def update_leaderboards(languages=[], datasets=[], emotions=[], select_lang_metric="f1_macro", select_ds_metric="f1_macro"):
df = pd.read_json(str(abs_path / "results.jsonl"), lines=True)
lang_dict = build_lang_dict(df)
ds_dict = build_ds_dict(df)
emo_dict = build_emo_dict(df)
overall = overall_leaderboard(df)
by_lang = leaderboard_per_group(lang_dict, languages, metric=select_lang_metric)
by_dataset = leaderboard_per_group(ds_dict, datasets, metric=select_ds_metric)
by_emotion = leaderboard_per_group(emo_dict, emotions)
return overall, by_lang, by_dataset, by_emotion, "Loaded successfully."
demo.load(
update_leaderboards,
inputs=[languages_filter, dataset_filter, emo_filter],
outputs=[overall_table, lang_table, dataset_table, emotion_table, df_state]
)
def on_change(selected_languages, selected_lang_metric, selected_datasets, selected_ds_metric, selected_emotions):
return update_leaderboards(languages=selected_languages, select_lang_metric=selected_lang_metric, datasets=selected_datasets, select_ds_metric=selected_ds_metric, emotions=selected_emotions)
languages_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
[overall_table, lang_table, dataset_table, emotion_table])
select_lang_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
[overall_table, lang_table, dataset_table, emotion_table])
dataset_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
[overall_table, lang_table, dataset_table, emotion_table])
select_ds_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
[overall_table, lang_table, dataset_table, emotion_table])
emo_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
[overall_table, lang_table, dataset_table, emotion_table])
with gr.Tab("πŸ“ About", elem_classes='tab-item'):
gr.Markdown(open("pages/about.md", "r").read(), elem_classes='tab-item')
with gr.Tab("πŸ”’ Evaluate your model", elem_classes='tab-item'):
gr.Markdown(open("pages/evaluate.md", "r").read(), elem_classes='tab-item')
with gr.Tab("πŸš€ Submit here!", elem_classes='tab-item'):
gr.Markdown(open("pages/submit.md", "r").read(), elem_classes='tab-item')
with gr.Column():
with gr.Accordion("πŸ“™ Citation", open=False, elem_classes='tab-item'):
citation_button = gr.Textbox(
label="",
value=CITATION_TEXT,
lines=20,
elem_id="citation-button",
show_copy_button=True,
)
return demo
if __name__ == "__main__":
demo = app()
demo.launch()