Spaces:

amu-cai
/

cameo-leaderboard

Running

App Files Files Community

iwonachristop commited on May 20

Commit

08336d1

verified ·

1 Parent(s): 2921097

Update pages/citation.bib

Browse files

Files changed (1) hide show

pages/citation.bib +117 -98

pages/citation.bib CHANGED Viewed

@@ -1,50 +1,60 @@
 @inproceedings{cafe,
-    author = {Gournay, Philippe and Lahaie, Olivier and Lefebvre, Roch},
-    title = {{A Canadian French Emotional Speech Dataset}},
-    year = {2018},
-    isbn = {9781450351928},
-    publisher = {Association for Computing Machinery},
-    address = {New York, NY, USA},
-    url = {https://doi.org/10.1145/3204949.3208121},
-    doi = {10.1145/3204949.3208121},
-    booktitle = {Proceedings of the 9th ACM Multimedia Systems Conference},
-    pages = {399–402},
-    numpages = {4},
-    keywords = {canadian french, digital recording, emotional speech, speech dataset},
-    location = {Amsterdam, Netherlands},
-    series = {MMSys '18}
 }
 @article{cremad,
-author = {Cao, Houwei and Cooper, David and Keutmann, Michael and Gur, Ruben and Nenkova, Ani and Verma, Ragini},
-year = {2014},
-month = {10},
-pages = {377-390},
-title = {{CREMA-D: Crowd-sourced emotional multimodal actors dataset}},
-volume = {5},
-journal = {IEEE transactions on affective computing},
-doi = {10.1109/TAFFC.2014.2336244}
 }
 @misc{emns,
-      title={{EMNS /Imz/ Corpus: An emotive single-speaker dataset for narrative storytelling in games, television and graphic novels}},
-      author={Kari Ali Noriy and Xiaosong Yang and Jian Jun Zhang},
-      year={2023},
-      eprint={2305.13137},
-      archivePrefix={arXiv},
-      primaryClass={cs.CL},
-      url={https://arxiv.org/abs/2305.13137},
 }
 @article{emozionalmente,
-author = {Catania, Fabio and Wilke, Jordan and Garzotto, Franca},
-year = {2025},
-month = {01},
-pages = {1-14},
-title = {{Emozionalmente: A Crowdsourced Corpus of Simulated Emotional Speech in Italian}},
-volume = {PP},
-journal = {IEEE Transactions on Audio, Speech and Language Processing},
-doi = {10.1109/TASLPRO.2025.3540662}
 }
 @INPROCEEDINGS{enterface,
@@ -59,43 +69,52 @@ doi = {10.1109/TASLPRO.2025.3540662}
   doi={10.1109/ICDEW.2006.145}}
 @inproceedings{jlcorpus,
-author = {James, Jesin and Tian, Li and Watson, Catherine},
-year = {2018},
-month = {09},
-pages = {2768-2772},
-title = {{An Open Source Emotional Speech Corpus for Human Robot Interaction Applications}},
-doi = {10.21437/Interspeech.2018-1349}
 }
 @inproceedings{mesd,
-author = {Duville, Mathilde Marie and Alonso-Valerdi, Luz and Ibarra-Zarate, David I.},
-year = {2021},
-month = {12},
-pages = {},
-title = {{The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning}},
-volume = {2021},
-doi = {10.1109/EMBC46164.2021.9629934}
 }
 @article{mesd2,
-author = {Duville, Mathilde Marie and Alonso-Valerdi, Luz and Ibarra-Zarate, David I.},
-year = {2021},
-month = {12},
-pages = {},
-title = {{Mexican Emotional Speech Database Based on Semantic, Frequency, Familiarity, Concreteness, and Cultural Shaping of Affective Prosody}},
-volume = {6},
-journal = {Data},
-doi = {10.3390/data6120130}
 }
-@misc{christop2024nemodatasetemotionalspeech,
-      title={{nEMO: Dataset of Emotional Speech in Polish}},
-      author={Iwona Christop},
-      year={2024},
-      eprint={2404.06292},
-      archivePrefix={arXiv},
-      primaryClass={cs.CL},
-      url={https://arxiv.org/abs/2404.06292},
 }
 @MISC{oreau,
@@ -120,29 +139,29 @@ doi = {10.3390/data6120130}
 }
 @inproceedings{pavoque,
-    author = {Steiner, Ingmar and Schröder, Marc and Klepp, Annette},
-    title = {{The PAVOQUE corpus as a resource for analysis and synthesis of expressive speech}},
-    booktitle = {Phonetik & Phonologie 9. Phonetik & Phonologie (P&P-9), October 11-12, Zurich, Switzerland},
-    year = {2013},
-    month = {10},
-    pages = {83--84},
-    organization = {UZH},
-    publisher = {Peter Lang}
 }
 @article{ravdess,
-    doi = {10.1371/journal.pone.0196391},
-    author = {Livingstone, Steven R. AND Russo, Frank A.},
-    journal = {PLOS ONE},
-    publisher = {Public Library of Science},
-    title = {{The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English}},
-    year = {2018},
-    month = {05},
-    volume = {13},
-    url = {https://doi.org/10.1371/journal.pone.0196391},
-    pages = {1-35},
-    abstract = {The RAVDESS is a validated multimodal database of emotional speech and song. The database is gender balanced consisting of 24 professional actors, vocalizing lexically-matched statements in a neutral North American accent. Speech includes calm, happy, sad, angry, fearful, surprise, and disgust expressions, and song contains calm, happy, sad, angry, and fearful emotions. Each expression is produced at two levels of emotional intensity, with an additional neutral expression. All conditions are available in face-and-voice, face-only, and voice-only formats. The set of 7356 recordings were each rated 10 times on emotional validity, intensity, and genuineness. Ratings were provided by 247 individuals who were characteristic of untrained research participants from North America. A further set of 72 participants provided test-retest data. High levels of emotional validity and test-retest intrarater reliability were reported. Corrected accuracy and composite "goodness" measures are presented to assist researchers in the selection of stimuli. All recordings are made freely available under a Creative Commons license and can be downloaded at https://doi.org/10.5281/zenodo.1188976.},
-    number = {5},
 }
 @misc{resd,
@@ -155,16 +174,16 @@ doi = {10.3390/data6120130}
 }
 @article{subesco,
-    doi = {10.1371/journal.pone.0250173},
-    author = {Sultana, Sadia AND Rahman, M. Shahidur AND Selim, M. Reza AND Iqbal, M. Zafar},
-    journal = {PLOS ONE},
-    publisher = {Public Library of Science},
-    title = {{SUST Bangla Emotional Speech Corpus (SUBESCO): An audio-only emotional speech corpus for Bangla}},
-    year = {2021},
-    month = {04},
-    volume = {16},
-    url = {https://doi.org/10.1371/journal.pone.0250173},
-    pages = {1-27},
-    abstract = {SUBESCO is an audio-only emotional speech corpus for Bangla language. The total duration of the corpus is in excess of 7 hours containing 7000 utterances, and it is the largest emotional speech corpus available for this language. Twenty native speakers participated in the gender-balanced set, each recording of 10 sentences simulating seven targeted emotions. Fifty university students participated in the evaluation of this corpus. Each audio clip of this corpus, except those of Disgust emotion, was validated four times by male and female raters. Raw hit rates and unbiased rates were calculated producing scores above chance level of responses. Overall recognition rate was reported to be above 70% for human perception tests. Kappa statistics and intra-class correlation coefficient scores indicated high-level of inter-rater reliability and consistency of this corpus evaluation. SUBESCO is an Open Access database, licensed under Creative Common Attribution 4.0 International, and can be downloaded free of charge from the web link: https://doi.org/10.5281/zenodo.4526477.},
-    number = {4},
 }

+@misc{christop2025cameocollectionmultilingualemotional,
+  title={CAMEO: Collection of Multilingual Emotional Speech Corpora},
+  author={Iwona Christop and Maciej Czajka},
+  year={2025},
+  eprint={2505.11051},
+  archivePrefix={arXiv},
+  primaryClass={cs.CL},
+  url={https://arxiv.org/abs/2505.11051},
+}
 @inproceedings{cafe,
+  author = {Gournay, Philippe and Lahaie, Olivier and Lefebvre, Roch},
+  title = {{A Canadian French Emotional Speech Dataset}},
+  year = {2018},
+  isbn = {9781450351928},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
+  url = {https://doi.org/10.1145/3204949.3208121},
+  doi = {10.1145/3204949.3208121},
+  booktitle = {Proceedings of the 9th ACM Multimedia Systems Conference},
+  pages = {399–402},
+  numpages = {4},
+  keywords = {canadian french, digital recording, emotional speech, speech dataset},
+  location = {Amsterdam, Netherlands},
+  series = {MMSys '18}
 }
 @article{cremad,
+  author = {Cao, Houwei and Cooper, David and Keutmann, Michael and Gur, Ruben and Nenkova, Ani and Verma, Ragini},
+  year = {2014},
+  month = {10},
+  pages = {377-390},
+  title = {{CREMA-D: Crowd-sourced emotional multimodal actors dataset}},
+  volume = {5},
+  journal = {IEEE transactions on affective computing},
+  doi = {10.1109/TAFFC.2014.2336244}
 }
 @misc{emns,
+  title={{EMNS /Imz/ Corpus: An emotive single-speaker dataset for narrative storytelling in games, television and graphic novels}},
+  author={Kari Ali Noriy and Xiaosong Yang and Jian Jun Zhang},
+  year={2023},
+  eprint={2305.13137},
+  archivePrefix={arXiv},
+  primaryClass={cs.CL},
+  url={https://arxiv.org/abs/2305.13137},
 }
 @article{emozionalmente,
+  author = {Catania, Fabio and Wilke, Jordan and Garzotto, Franca},
+  year = {2025},
+  month = {01},
+  pages = {1-14},
+  title = {{Emozionalmente: A Crowdsourced Corpus of Simulated Emotional Speech in Italian}},
+  volume = {PP},
+  journal = {IEEE Transactions on Audio, Speech and Language Processing},
+  doi = {10.1109/TASLPRO.2025.3540662}
 }
 @INPROCEEDINGS{enterface,
   doi={10.1109/ICDEW.2006.145}}
 @inproceedings{jlcorpus,
+  author = {James, Jesin and Tian, Li and Watson, Catherine},
+  year = {2018},
+  month = {09},
+  pages = {2768-2772},
+  title = {{An Open Source Emotional Speech Corpus for Human Robot Interaction Applications}},
+  doi = {10.21437/Interspeech.2018-1349}
 }
 @inproceedings{mesd,
+  author = {Duville, Mathilde Marie and Alonso-Valerdi, Luz and Ibarra-Zarate, David I.},
+  year = {2021},
+  month = {12},
+  pages = {},
+  title = {{The Mexican Emotional Speech Database (MESD): elaboration and assessment based on machine learning}},
+  volume = {2021},
+  doi = {10.1109/EMBC46164.2021.9629934}
 }
 @article{mesd2,
+  author = {Duville, Mathilde Marie and Alonso-Valerdi, Luz and Ibarra-Zarate, David I.},
+  year = {2021},
+  month = {12},
+  pages = {},
+  title = {{Mexican Emotional Speech Database Based on Semantic, Frequency, Familiarity, Concreteness, and Cultural Shaping of Affective Prosody}},
+  volume = {6},
+  journal = {Data},
+  doi = {10.3390/data6120130}
 }
+@inproceedings{christop-2024-nemo,
+  title = "n{EMO}: Dataset of Emotional Speech in {P}olish",
+  author = "Christop, Iwona",
+  editor = "Calzolari, Nicoletta  and
+    Kan, Min-Yen  and
+    Hoste, Veronique  and
+    Lenci, Alessandro  and
+    Sakti, Sakriani  and
+    Xue, Nianwen",
+  booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
+  month = may,
+  year = "2024",
+  address = "Torino, Italia",
+  publisher = "ELRA and ICCL",
+  url = "https://aclanthology.org/2024.lrec-main.1059/",
+  pages = "12111--12116",
+  abstract = "Speech emotion recognition has become increasingly important in recent years due to its potential applications in healthcare, customer service, and personalization of dialogue systems. However, a major issue in this field is the lack of datasets that adequately represent basic emotional states across various language families. As datasets covering Slavic languages are rare, there is a need to address this research gap. This paper presents the development of nEMO, a novel corpus of emotional speech in Polish. The dataset comprises over 3 hours of samples recorded with the participation of nine actors portraying six emotional states: anger, fear, happiness, sadness, surprise, and a neutral state. The text material used was carefully selected to represent the phonetics of the Polish language adequately. The corpus is freely available under the terms of a Creative Commons license (CC BY-NC-SA 4.0)."
 }
 @MISC{oreau,
 }
 @inproceedings{pavoque,
+  author = {Steiner, Ingmar and Schröder, Marc and Klepp, Annette},
+  title = {{The PAVOQUE corpus as a resource for analysis and synthesis of expressive speech}},
+  booktitle = {Phonetik & Phonologie 9. Phonetik & Phonologie (P&P-9), October 11-12, Zurich, Switzerland},
+  year = {2013},
+  month = {10},
+  pages = {83--84},
+  organization = {UZH},
+  publisher = {Peter Lang}
 }
 @article{ravdess,
+  doi = {10.1371/journal.pone.0196391},
+  author = {Livingstone, Steven R. AND Russo, Frank A.},
+  journal = {PLOS ONE},
+  publisher = {Public Library of Science},
+  title = {{The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English}},
+  year = {2018},
+  month = {05},
+  volume = {13},
+  url = {https://doi.org/10.1371/journal.pone.0196391},
+  pages = {1-35},
+  abstract = {The RAVDESS is a validated multimodal database of emotional speech and song. The database is gender balanced consisting of 24 professional actors, vocalizing lexically-matched statements in a neutral North American accent. Speech includes calm, happy, sad, angry, fearful, surprise, and disgust expressions, and song contains calm, happy, sad, angry, and fearful emotions. Each expression is produced at two levels of emotional intensity, with an additional neutral expression. All conditions are available in face-and-voice, face-only, and voice-only formats. The set of 7356 recordings were each rated 10 times on emotional validity, intensity, and genuineness. Ratings were provided by 247 individuals who were characteristic of untrained research participants from North America. A further set of 72 participants provided test-retest data. High levels of emotional validity and test-retest intrarater reliability were reported. Corrected accuracy and composite "goodness" measures are presented to assist researchers in the selection of stimuli. All recordings are made freely available under a Creative Commons license and can be downloaded at https://doi.org/10.5281/zenodo.1188976.},
+  number = {5},
 }
 @misc{resd,
 }
 @article{subesco,
+  doi = {10.1371/journal.pone.0250173},
+  author = {Sultana, Sadia AND Rahman, M. Shahidur AND Selim, M. Reza AND Iqbal, M. Zafar},
+  journal = {PLOS ONE},
+  publisher = {Public Library of Science},
+  title = {{SUST Bangla Emotional Speech Corpus (SUBESCO): An audio-only emotional speech corpus for Bangla}},
+  year = {2021},
+  month = {04},
+  volume = {16},
+  url = {https://doi.org/10.1371/journal.pone.0250173},
+  pages = {1-27},
+  abstract = {SUBESCO is an audio-only emotional speech corpus for Bangla language. The total duration of the corpus is in excess of 7 hours containing 7000 utterances, and it is the largest emotional speech corpus available for this language. Twenty native speakers participated in the gender-balanced set, each recording of 10 sentences simulating seven targeted emotions. Fifty university students participated in the evaluation of this corpus. Each audio clip of this corpus, except those of Disgust emotion, was validated four times by male and female raters. Raw hit rates and unbiased rates were calculated producing scores above chance level of responses. Overall recognition rate was reported to be above 70% for human perception tests. Kappa statistics and intra-class correlation coefficient scores indicated high-level of inter-rater reliability and consistency of this corpus evaluation. SUBESCO is an Open Access database, licensed under Creative Common Attribution 4.0 International, and can be downloaded free of charge from the web link: https://doi.org/10.5281/zenodo.4526477.},
+  number = {4},
 }