Spaces:

amu-cai
/

cameo-leaderboard

Running

App Files Files Community

Maciej commited on May 15

Commit

3de0d0d

1 Parent(s): 4f08cdd

add tabs: about and submit; add: citation

Browse files

Files changed (4) hide show

app.py +99 -62
pages/about.md +14 -0
pages/citation.bib +205 -0
pages/submit.md +15 -0

app.py CHANGED Viewed

@@ -5,6 +5,14 @@ from collections import defaultdict
 abs_path = Path(__file__).parent
 def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"):
@@ -77,73 +85,102 @@ def leaderboard_per_group(lang_dict, use_cols, metric: str = "f1_macro"):
     return df
 def app():
     with gr.Blocks() as demo:
-        gr.Markdown("# 🏆 Leaderboard Viewer")
-        languages = ['All', 'Bengali', 'English', 'French', 'German', 'Italian', 'Polish', 'Russian', 'Spanish']
-        datasets = ['All', 'CaFE', 'CREMA-D', 'EMNS', 'Emozionalmente', 'eNTERFACE', 'JL-Corpus', 'MESD', 'nEMO', 'Oreau', 'PAVOQUE', 'RAVDESS', 'RESD', 'SUBESCO']
-        emotions = ['All', 'anger', 'anxiety',
-       'apology', 'assertiveness', 'calm', 'concern', 'disgust',
-       'encouragement', 'enthusiasm', 'excitement', 'fear', 'happiness',
-       'neutral', 'poker', 'sadness', 'sarcasm', 'surprise']
-        metric=["f1_macro", "accuracy", "weighted_f1"]
-        with gr.Tabs():
-            with gr.Tab("Overall Results"):
-                overall_table = gr.Dataframe()
-            with gr.Tab("Results per Language"):
-                languages_filter = gr.CheckboxGroup(choices=languages, label="Filter by Language", value=languages)
-                select_lang_metric = gr.Radio(metric, value='f1_macro', label="Metric")
-                lang_table = gr.Dataframe()
-            with gr.Tab("Results per Dataset"):
-                dataset_filter = gr.CheckboxGroup(choices=datasets, label="Filter by Dataset", value=datasets)
-                select_ds_metric = gr.Radio(metric, value='f1_macro', label="Metric")
-                dataset_table = gr.Dataframe()
-            with gr.Tab("Results per Emotion"):
-                emo_filter = gr.CheckboxGroup(choices=emotions, label="Filter by Emotion", value=emotions)
-                emotion_table = gr.Dataframe()
-        df_state = gr.State()
-        def update_leaderboards(languages=[], datasets=[], emotions=[], select_lang_metric="f1_macro", select_ds_metric="f1_macro"):
-            df = pd.read_json(str(abs_path / "results.jsonl"), lines=True)
-            lang_dict = build_lang_dict(df)
-            ds_dict = build_ds_dict(df)
-            emo_dict = build_emo_dict(df)
-            overall = overall_leaderboard(df)
-            by_lang = leaderboard_per_group(lang_dict, languages, metric=select_lang_metric)
-            by_dataset = leaderboard_per_group(ds_dict, datasets, metric=select_ds_metric)
-            by_emotion = leaderboard_per_group(emo_dict, emotions)
-            return overall, by_lang, by_dataset, by_emotion, "Loaded successfully."
-        demo.load(
-            update_leaderboards,
-            inputs=[languages_filter, dataset_filter, emo_filter],
-            outputs=[overall_table, lang_table, dataset_table, emotion_table, df_state]
-        )
-        def on_change(selected_languages, selected_lang_metric, selected_datasets, selected_ds_metric, selected_emotions):
-            return update_leaderboards(languages=selected_languages, select_lang_metric=selected_lang_metric, datasets=selected_datasets, select_ds_metric=selected_ds_metric, emotions=selected_emotions)
-        languages_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
-                               [overall_table, lang_table, dataset_table, emotion_table])
-        select_lang_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
-                               [overall_table, lang_table, dataset_table, emotion_table])
-        dataset_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
-                               [overall_table, lang_table, dataset_table, emotion_table])
-        select_ds_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
-                                  [overall_table, lang_table, dataset_table, emotion_table])
-        emo_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
-                               [overall_table, lang_table, dataset_table, emotion_table])
     return demo
 if __name__ == "__main__":

 abs_path = Path(__file__).parent
+CITATION_TEXT = open(f"pages/citation.bib", "r").read()
+title=(
+"""
+<center>
+<h1> CAMEO 🎧🌍😊</h1>
+</center>
+"""
+)
 def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"):
     return df
 def app():
     with gr.Blocks() as demo:
+        gr.HTML("""
+                <link href="https://fonts.googleapis.com/css2?family=Inter&display=swap" rel="stylesheet">
+            """)
+        demo.css = """
+            .tab-item {
+                font-size: 14px;
+                padding: 10px 20px;
+                font-family: 'Inter', sans-serif;
+            }
+            """
+        gr.HTML(title, elem_classes='tab-item')
+        with gr.Tabs():
+            with gr.Tab("🏆 Leaderboard", elem_classes='tab-item'):
+                gr.Markdown("# 🏆 Leaderboard", elem_classes='tab-item')
+                languages = ['All', 'Bengali', 'English', 'French', 'German', 'Italian', 'Polish', 'Russian', 'Spanish']
+                datasets = ['All', 'CaFE', 'CREMA-D', 'EMNS', 'Emozionalmente', 'eNTERFACE', 'JL-Corpus', 'MESD', 'nEMO', 'Oreau', 'PAVOQUE', 'RAVDESS', 'RESD', 'SUBESCO']
+                emotions = ['All', 'anger', 'anxiety',
+               'apology', 'assertiveness', 'calm', 'concern', 'disgust',
+               'encouragement', 'enthusiasm', 'excitement', 'fear', 'happiness',
+               'neutral', 'poker', 'sadness', 'sarcasm', 'surprise']
+                metric=["f1_macro", "accuracy", "weighted_f1"]
+                # with gr.Tabs():
+                with gr.Tab("Overall Results", elem_classes='tab-item'):
+                    overall_table = gr.Dataframe()
+                with gr.Tab("Results per Language", elem_classes='tab-item'):
+                    languages_filter = gr.CheckboxGroup(choices=languages, label="Filter by Language", value=languages)
+                    select_lang_metric = gr.Radio(metric, value='f1_macro', label="Metric")
+                    lang_table = gr.Dataframe()
+                with gr.Tab("Results per Dataset", elem_classes='tab-item'):
+                    dataset_filter = gr.CheckboxGroup(choices=datasets, label="Filter by Dataset", value=datasets)
+                    select_ds_metric = gr.Radio(metric, value='f1_macro', label="Metric")
+                    dataset_table = gr.Dataframe()
+                with gr.Tab("Results per Emotion", elem_classes='tab-item'):
+                    emo_filter = gr.CheckboxGroup(choices=emotions, label="Filter by Emotion", value=emotions)
+                    emotion_table = gr.Dataframe()
+                df_state = gr.State()
+                def update_leaderboards(languages=[], datasets=[], emotions=[], select_lang_metric="f1_macro", select_ds_metric="f1_macro"):
+                    df = pd.read_json(str(abs_path / "results.jsonl"), lines=True)
+                    lang_dict = build_lang_dict(df)
+                    ds_dict = build_ds_dict(df)
+                    emo_dict = build_emo_dict(df)
+                    overall = overall_leaderboard(df)
+                    by_lang = leaderboard_per_group(lang_dict, languages, metric=select_lang_metric)
+                    by_dataset = leaderboard_per_group(ds_dict, datasets, metric=select_ds_metric)
+                    by_emotion = leaderboard_per_group(emo_dict, emotions)
+                    return overall, by_lang, by_dataset, by_emotion, "Loaded successfully."
+                demo.load(
+                    update_leaderboards,
+                    inputs=[languages_filter, dataset_filter, emo_filter],
+                    outputs=[overall_table, lang_table, dataset_table, emotion_table, df_state]
+                )
+                def on_change(selected_languages, selected_lang_metric, selected_datasets, selected_ds_metric, selected_emotions):
+                    return update_leaderboards(languages=selected_languages, select_lang_metric=selected_lang_metric, datasets=selected_datasets, select_ds_metric=selected_ds_metric, emotions=selected_emotions)
+                languages_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
+                                       [overall_table, lang_table, dataset_table, emotion_table])
+                select_lang_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
+                                       [overall_table, lang_table, dataset_table, emotion_table])
+                dataset_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
+                                       [overall_table, lang_table, dataset_table, emotion_table])
+                select_ds_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
+                                          [overall_table, lang_table, dataset_table, emotion_table])
+                emo_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
+                                       [overall_table, lang_table, dataset_table, emotion_table])
+            with gr.Tab("📝 About", elem_classes='tab-item'):
+                gr.Markdown(open("pages/about.md", "r").read(), elem_classes='tab-item')
+            with gr.Tab("🚀 Submit here!", elem_classes='tab-item'):
+                gr.Markdown(open("pages/submit.md", "r").read(), elem_classes='tab-item')
+        with gr.Column():
+            with gr.Accordion("📙 Citation", open=False, elem_classes='tab-item'):
+                citation_button = gr.Textbox(
+                    label="",
+                    value=CITATION_TEXT,
+                    lines=66,
+                    elem_id="citation-button",
+                    show_copy_button=True,
+                )
     return demo
 if __name__ == "__main__":

pages/about.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# 📝 About
+The **CAMEO Leaderboard** is part of the **Speech Emotion Recognition (SER)** benchmark, which is performed on the **CAMEO** dataset.
+<br>
+## Evaluation
+As an input, the model accepts an audio sample. On the output, return an emotion label.  After the response given by the model, post processing takes place to find the nearest table from the dataset.
+After receiving the label, f1_macro, accuracy and weighted_f1 are counted.
+<br>
+## Software Library
+The dataset is available on the HuggingFace ([here](https://huggingface.co/datasets/amu-cai/CAMEO)).

pages/citation.bib ADDED Viewed

	@@ -0,0 +1,205 @@

+@inproceedings{cafe,
+    author = {Gournay, Philippe and Lahaie, Olivier and Lefebvre, Roch},
+    title = {{A Canadian French Emotional Speech Dataset}},
+    year = {2018},
+    isbn = {9781450351928},
+    publisher = {Association for Computing Machinery},
+    address = {New York, NY, USA},
+    url = {https://doi.org/10.1145/3204949.3208121},
+    doi = {10.1145/3204949.3208121},
+    booktitle = {Proceedings of the 9th ACM Multimedia Systems Conference},
+    pages = {399–402},
+    numpages = {4},
+    keywords = {canadian french, digital recording, emotional speech, speech dataset},
+    location = {Amsterdam, Netherlands},
+    series = {MMSys '18}
+}
+@article{cremad,
+author = {Cao, Houwei and Cooper, David and Keutmann, Michael and Gur, Ruben and Nenkova, Ani and Verma, Ragini},
+year = {2014},
+month = {10},
+pages = {377-390},
+title = {{CREMA-D: Crowd-sourced emotional multimodal actors dataset}},
+volume = {5},
+journal = {IEEE transactions on affective computing},
+doi = {10.1109/TAFFC.2014.2336244}
+}
+@misc{emns,
+      title={{EMNS /Imz/ Corpus: An emotive single-speaker dataset for narrative storytelling in games, television and graphic novels}},
+      author={Kari Ali Noriy and Xiaosong Yang and Jian Jun Zhang},
+      year={2023},
+      eprint={2305.13137},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2305.13137},
+}
+@article{emozionalmente,
+author = {Catania, Fabio and Wilke, Jordan and Garzotto, Franca},
+year = {2025},
+month = {01},
+pages = {1-14},
+title = {{Emozionalmente: A Crowdsourced Corpus of Simulated Emotional Speech in Italian}},
+volume = {PP},
+journal = {IEEE Transactions on Audio, Speech and Language Processing},
+doi = {10.1109/TASLPRO.2025.3540662}
+}
+@INPROCEEDINGS{enterface,
+  author={Martin, O. and Kotsia, I. and Macq, B. and Pitas, I.},
+  booktitle={22nd International Conference on Data Engineering Workshops (ICDEW'06)},
+  title={{The eNTERFACE' 05 Audio-Visual Emotion Database}},
+  year={2006},
+  volume={},
+  number={},
+  pages={8-8},
+  keywords={Audio databases;Image databases;Emotion recognition;Spatial databases;Visual databases;Signal processing algorithms;Protocols;Speech analysis;Humans;Informatics},
+  doi={10.1109/ICDEW.2006.145}}
+@inproceedings{jlcorpus,
+author = {James, Jesin and Tian, Li and Watson, Catherine},
+year = {2018},
+month = {09},
+pages = {2768-2772},
+title = {{An Open Source Emotional Speech Corpus for Human Robot Interaction Applications}},
+doi = {10.21437/Interspeech.2018-1349}
+}
+@inproceedings{mesd,
+author = {Duville, Mathilde Marie and Alonso-Valerdi, Luz and Ibarra-Zarate, David I.},
+year = {2021},
+month = {12},
+pages = {},
+title = {{The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning}},
+volume = {2021},
+doi = {10.1109/EMBC46164.2021.9629934}
+}
+@article{mesd2,
+author = {Duville, Mathilde Marie and Alonso-Valerdi, Luz and Ibarra-Zarate, David I.},
+year = {2021},
+month = {12},
+pages = {},
+title = {{Mexican Emotional Speech Database Based on Semantic, Frequency, Familiarity, Concreteness, and Cultural Shaping of Affective Prosody}},
+volume = {6},
+journal = {Data},
+doi = {10.3390/data6120130}
+}
+@misc{christop2024nemodatasetemotionalspeech,
+      title={{nEMO: Dataset of Emotional Speech in Polish}},
+      author={Iwona Christop},
+      year={2024},
+      eprint={2404.06292},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2404.06292},
+}
+@MISC{oreau,
+  title = {{French emotional speech database - Or{\'e}au}},
+  author = {Kerkeni, Leila and Cleder, Catherine and Serrestou, Youssef and
+               Raoof, Kosai},
+  abstract = {This document presents the French emotional speech database -
+               Or{\'e}au, recorded in a quiet environment. The database is
+               designed for general study of emotional speech and analysis of
+               emotion characteristics for speech synthesis purposes. It
+               contains 79 utterances which could be used in everyday life in
+               the classroom. Between 10 and 13 utterances were written for
+               each of the 7 emotions in French language by 32 non-professional
+               speakers. 2 versions are available, the first one contains 502
+               sentences. A perception test was performed to evaluate the
+               recognition of emotions and their naturalness. 90\% of
+               utterances (434 utterances) were correctly identified and
+               retained after the test and various analyses, which constitutes
+               the second version of database.},
+  publisher = {Zenodo},
+  year      =  {2020}
+}
+@inproceedings{pavoque,
+    author = {Steiner, Ingmar and Schröder, Marc and Klepp, Annette},
+    title = {{The PAVOQUE corpus as a resource for analysis and synthesis of expressive speech}},
+    booktitle = {Phonetik & Phonologie 9. Phonetik & Phonologie (P&P-9), October 11-12, Zurich, Switzerland},
+    year = {2013},
+    month = {10},
+    pages = {83--84},
+    organization = {UZH},
+    publisher = {Peter Lang}
+}
+@article{ravdess,
+    doi = {10.1371/journal.pone.0196391},
+    author = {Livingstone, Steven R. AND Russo, Frank A.},
+    journal = {PLOS ONE},
+    publisher = {Public Library of Science},
+    title = {{The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English}},
+    year = {2018},
+    month = {05},
+    volume = {13},
+    url = {https://doi.org/10.1371/journal.pone.0196391},
+    pages = {1-35},
+    abstract = {The RAVDESS is a validated multimodal database of emotional speech and song. The database is gender balanced consisting of 24 professional actors, vocalizing lexically-matched statements in a neutral North American accent. Speech includes calm, happy, sad, angry, fearful, surprise, and disgust expressions, and song contains calm, happy, sad, angry, and fearful emotions. Each expression is produced at two levels of emotional intensity, with an additional neutral expression. All conditions are available in face-and-voice, face-only, and voice-only formats. The set of 7356 recordings were each rated 10 times on emotional validity, intensity, and genuineness. Ratings were provided by 247 individuals who were characteristic of untrained research participants from North America. A further set of 72 participants provided test-retest data. High levels of emotional validity and test-retest intrarater reliability were reported. Corrected accuracy and composite "goodness" measures are presented to assist researchers in the selection of stimuli. All recordings are made freely available under a Creative Commons license and can be downloaded at https://doi.org/10.5281/zenodo.1188976.},
+    number = {5},
+}
+@misc{resd,
+  author = {Artem Amentes and Nikita Davidchuk and Ilya Lubenets},
+  title = {{Russian Emotional Speech Dialogs with annotated text}},
+  year = {2022},
+  publisher = {Hugging Face},
+  journal = {Hugging Face Hub},
+  howpublished = {\url{https://huggingface.co/datasets/Aniemore/resd_annotated}},
+}
+@article{subesco,
+    doi = {10.1371/journal.pone.0250173},
+    author = {Sultana, Sadia AND Rahman, M. Shahidur AND Selim, M. Reza AND Iqbal, M. Zafar},
+    journal = {PLOS ONE},
+    publisher = {Public Library of Science},
+    title = {{SUST Bangla Emotional Speech Corpus (SUBESCO): An audio-only emotional speech corpus for Bangla}},
+    year = {2021},
+    month = {04},
+    volume = {16},
+    url = {https://doi.org/10.1371/journal.pone.0250173},
+    pages = {1-27},
+    abstract = {SUBESCO is an audio-only emotional speech corpus for Bangla language. The total duration of the corpus is in excess of 7 hours containing 7000 utterances, and it is the largest emotional speech corpus available for this language. Twenty native speakers participated in the gender-balanced set, each recording of 10 sentences simulating seven targeted emotions. Fifty university students participated in the evaluation of this corpus. Each audio clip of this corpus, except those of Disgust emotion, was validated four times by male and female raters. Raw hit rates and unbiased rates were calculated producing scores above chance level of responses. Overall recognition rate was reported to be above 70% for human perception tests. Kappa statistics and intra-class correlation coefficient scores indicated high-level of inter-rater reliability and consistency of this corpus evaluation. SUBESCO is an Open Access database, licensed under Creative Common Attribution 4.0 International, and can be downloaded free of charge from the web link: https://doi.org/10.5281/zenodo.4526477.},
+    number = {4},
+}
+@misc{chu2024qwen2audiotechnicalreport,
+      title={Qwen2-Audio Technical Report},
+      author={Yunfei Chu and Jin Xu and Qian Yang and Haojie Wei and Xipin Wei and Zhifang Guo and Yichong Leng and Yuanjun Lv and Jinzheng He and Junyang Lin and Chang Zhou and Jingren Zhou},
+      year={2024},
+      eprint={2407.10759},
+      archivePrefix={arXiv},
+      primaryClass={eess.AS},
+      url={https://arxiv.org/abs/2407.10759},
+}
+@article{Ichigo,
+  title={Llama3-S},
+  author={Homebrew Research},
+  year={2024},
+  month={August},
+  url={https://huggingface.co/homebrewltd/llama3.1-s-2024-08-20}
+}
+@misc{zhang2024seallms3openfoundation,
+      title={SeaLLMs 3: Open Foundation and Chat Multilingual Large Language Models for Southeast Asian Languages},
+      author={Wenxuan Zhang and Hou Pong Chan and Yiran Zhao and Mahani Aljunied and Jianyu Wang and Chaoqun Liu and Yue Deng and Zhiqiang Hu and Weiwen Xu and Yew Ken Chia and Xin Li and Lidong Bing},
+      year={2024},
+      eprint={2407.19672},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2407.19672},
+}
+@misc{ultravox2024,
+  author       = {{Fixie AI}},
+  title        = {{Ultravox v0.5 (Llama 3.1 8B)}},
+  year         = {2024},
+  howpublished = {\url{https://huggingface.co/fixie-ai/ultravox-v0_5-llama-3_1-8b}},
+}

pages/submit.md ADDED Viewed

	@@ -0,0 +1,15 @@

+# ✉️✨ Submit Your Model Here! ✨✉️
+Help us improve the leaderboard by submitting your model.
+<br>
+## 📌 How to Submit Your Model:
+✉️ **Step 1:** Send an email to [`[email protected]`](mailto:[email protected]).
+🔗 **Step 2:** Include the link to your voice cloning model.
+🏆 **Step 3:** Once evaluated, your model will join the leaderboard.
+Thanks for sharing your work with us and making this project even better!