Upload folder using huggingface_hub
Browse files- app/content.py +32 -2
- app/pages.py +0 -5
app/content.py
CHANGED
@@ -69,6 +69,13 @@ displayname2datasetname = {
|
|
69 |
'YouTube ASR: Chinese with English Prompt': 'ytb_asr_batch3_chinese',
|
70 |
'YouTube ASR: Chinese with Chinese Prompt': 'ytb_asr_batch3_zh_zh_prompt',
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
'SEAME-Dev-Mandarin' : 'seame_dev_man',
|
74 |
'SEAME-Dev-Singlish' : 'seame_dev_sge',
|
@@ -158,21 +165,44 @@ dataset_diaplay_information = {
|
|
158 |
|
159 |
'YouTube ASR: Chinese with Chinese Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Chinese and some Chinese-English codeswitch audio clips, featuring with Chinese prompts. <br> It includes approximately 3.32 hours of audio, with individual clips ranging from 17 seconds to 1966 seconds in length.',
|
160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
'SEAME-Dev-Mandarin' : 'Under Development',
|
162 |
'SEAME-Dev-Singlish' : 'Under Development',
|
163 |
|
164 |
'YouTube SQA: English with Singapore Content': 'YouTube Evaluation Dataset for Speech-QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 7.6 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
|
165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
'YouTube SDS: English with Singapore Content': 'YouTube Evaluation Dataset for Summary Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 5.4 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
'YouTube PQA: English with Singapore Content': 'YouTube Evaluation Dataset for Paralinguistics QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 41.4 hours of audio, with individual clips ranging from 41 seconds to 83 seconds in length.',
|
169 |
|
170 |
|
171 |
}
|
172 |
|
173 |
|
174 |
-
|
175 |
-
|
176 |
metrics_info = {
|
177 |
'wer' : 'Word Error Rate (WER) - The Lower, the better.',
|
178 |
'llama3_70b_judge_binary': 'Model-as-a-Judge Peformance. Using LLAMA-3-70B. Scale from 0-100. The higher, the better.',
|
|
|
69 |
'YouTube ASR: Chinese with English Prompt': 'ytb_asr_batch3_chinese',
|
70 |
'YouTube ASR: Chinese with Chinese Prompt': 'ytb_asr_batch3_zh_zh_prompt',
|
71 |
|
72 |
+
'YouTube SQA: Malay': 'ytb_sqa_batch3_malay',
|
73 |
+
'YouTube SQA: Chinese': 'ytb_sqa_batch3_chinese',
|
74 |
+
'YouTube SQA: Tamil': 'ytb_sqa_batch3_tamil',
|
75 |
+
|
76 |
+
'YouTube SDS: Malay': 'ytb_sds_batch3_malay',
|
77 |
+
'YouTube SDS: Chinese': 'ytb_sds_batch3_chinese',
|
78 |
+
'YouTube SDS: Tamil': 'ytb_sds_batch3_tamil',
|
79 |
|
80 |
'SEAME-Dev-Mandarin' : 'seame_dev_man',
|
81 |
'SEAME-Dev-Singlish' : 'seame_dev_sge',
|
|
|
165 |
|
166 |
'YouTube ASR: Chinese with Chinese Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Chinese and some Chinese-English codeswitch audio clips, featuring with Chinese prompts. <br> It includes approximately 3.32 hours of audio, with individual clips ranging from 17 seconds to 1966 seconds in length.',
|
167 |
|
168 |
+
'YouTube ASR: Tamil with Tamil Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Tamil and some Tamil-English codeswitch audio clips, featuring with Tamil prompts. <br> It includes approximately 2.44 hours of audio, with individual clips ranging from 30 seconds to 324 seconds in length.',
|
169 |
+
|
170 |
+
'YouTube ASR: Tamil with English Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Tamil and some Tamil-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.44 hours of audio, with individual clips ranging from 30 seconds to 324 seconds in length.',
|
171 |
+
|
172 |
+
'YouTube ASR Translation: Malay2English': 'YouTube Evaluation Dataset for ASR Task: <br> The audio of dataset is same as <i>YouTube ASR: Malay<i>',
|
173 |
+
|
174 |
+
# 'YouTube ASR Translation: Chinese2English': 'YouTube Evaluation Dataset for ASR Task: <br> The audio of dataset is same as <i>YouTube ASR: Chinese<i>',
|
175 |
+
|
176 |
+
# 'YouTube ASR Translation: Tamil2English': 'YouTube Evaluation Dataset for ASR Task: <br> The audio of dataset is same as <i>YouTube ASR: Tamil<i>',
|
177 |
+
|
178 |
+
|
179 |
+
|
180 |
'SEAME-Dev-Mandarin' : 'Under Development',
|
181 |
'SEAME-Dev-Singlish' : 'Under Development',
|
182 |
|
183 |
'YouTube SQA: English with Singapore Content': 'YouTube Evaluation Dataset for Speech-QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 7.6 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
|
184 |
|
185 |
+
'YouTube SQA: Malay': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Malay<i>, it contains Malay and some Malay-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
|
186 |
+
|
187 |
+
'YouTube SQA: Chinese': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Chinese<i>',
|
188 |
+
|
189 |
+
'YouTube SQA: Tamil': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Tamil<i>',
|
190 |
+
|
191 |
'YouTube SDS: English with Singapore Content': 'YouTube Evaluation Dataset for Summary Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 5.4 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
|
192 |
|
193 |
+
'YouTube SDS: Malay': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Malay<i>, it contains Malay and some Malay-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
|
194 |
+
|
195 |
+
'YouTube SDS: Chinese': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Chinese<i>',
|
196 |
+
|
197 |
+
'YouTube SDS: Tamil': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Tamil<i>',
|
198 |
+
|
199 |
+
|
200 |
'YouTube PQA: English with Singapore Content': 'YouTube Evaluation Dataset for Paralinguistics QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 41.4 hours of audio, with individual clips ranging from 41 seconds to 83 seconds in length.',
|
201 |
|
202 |
|
203 |
}
|
204 |
|
205 |
|
|
|
|
|
206 |
metrics_info = {
|
207 |
'wer' : 'Word Error Rate (WER) - The Lower, the better.',
|
208 |
'llama3_70b_judge_binary': 'Model-as-a-Judge Peformance. Using LLAMA-3-70B. Scale from 0-100. The higher, the better.',
|
app/pages.py
CHANGED
@@ -522,11 +522,6 @@ def music_understanding():
|
|
522 |
|
523 |
|
524 |
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
def under_development():
|
531 |
st.title("Task: Under Development")
|
532 |
|
|
|
522 |
|
523 |
|
524 |
|
|
|
|
|
|
|
|
|
|
|
525 |
def under_development():
|
526 |
st.title("Task: Under Development")
|
527 |
|