update ctc results
Browse files- app.py +2 -2
- app/pages.py +1 -1
- model_information.py +4 -0
- results_organized/wer/asr_english.csv +14 -13
- results_organized/wer/asr_indonesian.csv +12 -11
- results_organized/wer/asr_malay.csv +6 -5
- results_organized/wer/asr_mandarin.csv +14 -13
- results_organized/wer/asr_private.csv +14 -13
- results_organized/wer/asr_singlish.csv +14 -13
- results_organized/wer/asr_tamil.csv +10 -9
- results_organized/wer/asr_thai.csv +11 -10
- results_organized/wer/asr_vietnamese.csv +11 -10
app.py
CHANGED
@@ -24,7 +24,7 @@ pages = {
|
|
24 |
'ASR-Indonesian' : asr_indonesian,
|
25 |
'ASR-Thai' : asr_thai,
|
26 |
'ASR-Vietnamese' : asr_vietnamese,
|
27 |
-
'ASR-
|
28 |
'Speech Translation' : speech_translation,
|
29 |
'SQA-English' : speech_question_answering_english,
|
30 |
'SQA-Singlish' : speech_question_answering_singlish,
|
@@ -58,7 +58,7 @@ menu_items = [
|
|
58 |
sac.MenuItem(label='ASR-Indonesian', icon='mic'),
|
59 |
sac.MenuItem(label='ASR-Thai', icon='mic'),
|
60 |
sac.MenuItem(label='ASR-Vietnamese', icon='mic'),
|
61 |
-
sac.MenuItem(label='ASR-
|
62 |
]
|
63 |
),
|
64 |
|
|
|
24 |
'ASR-Indonesian' : asr_indonesian,
|
25 |
'ASR-Thai' : asr_thai,
|
26 |
'ASR-Vietnamese' : asr_vietnamese,
|
27 |
+
'ASR-Other' : asr_private,
|
28 |
'Speech Translation' : speech_translation,
|
29 |
'SQA-English' : speech_question_answering_english,
|
30 |
'SQA-Singlish' : speech_question_answering_singlish,
|
|
|
58 |
sac.MenuItem(label='ASR-Indonesian', icon='mic'),
|
59 |
sac.MenuItem(label='ASR-Thai', icon='mic'),
|
60 |
sac.MenuItem(label='ASR-Vietnamese', icon='mic'),
|
61 |
+
sac.MenuItem(label='ASR-Other', icon='mic'),
|
62 |
]
|
63 |
),
|
64 |
|
app/pages.py
CHANGED
@@ -282,7 +282,7 @@ def asr_vietnamese():
|
|
282 |
|
283 |
|
284 |
def asr_private():
|
285 |
-
st.title("Task: Automatic Speech Recognition -
|
286 |
|
287 |
sum = ['Overall']
|
288 |
|
|
|
282 |
|
283 |
|
284 |
def asr_private():
|
285 |
+
st.title("Task: Automatic Speech Recognition - Other Datasets")
|
286 |
|
287 |
sum = ['Overall']
|
288 |
|
model_information.py
CHANGED
@@ -48,6 +48,10 @@ data['Original Name'].append('MERaLiON-AudioLLM-v2-9b-asr')
|
|
48 |
data['Proper Display Name'].append('Fusion: MERaLiON-2-10B-ASR')
|
49 |
data['Link'].append('https://huggingface.co/MERaLiON/MERaLiON-AudioLLM-Whisper-SEA-LION')
|
50 |
|
|
|
|
|
|
|
|
|
51 |
data['Original Name'].append('phi_4_multimodal_instruct')
|
52 |
data['Proper Display Name'].append('Fusion: Phi-4-multimodal-instruct')
|
53 |
data['Link'].append('https://huggingface.co/microsoft/Phi-4-multimodal-instruct')
|
|
|
48 |
data['Proper Display Name'].append('Fusion: MERaLiON-2-10B-ASR')
|
49 |
data['Link'].append('https://huggingface.co/MERaLiON/MERaLiON-AudioLLM-Whisper-SEA-LION')
|
50 |
|
51 |
+
data['Original Name'].append('MERaLiON-SpeechEncoder2-ASR-CTC')
|
52 |
+
data['Proper Display Name'].append('Fusion: MERaLiON-SpeechEncoder2-ASR-CTC')
|
53 |
+
data['Link'].append('https://huggingface.co/MERaLiON/MERaLiON-SpeechEncoder-2')
|
54 |
+
|
55 |
data['Original Name'].append('phi_4_multimodal_instruct')
|
56 |
data['Proper Display Name'].append('Fusion: Phi-4-multimodal-instruct')
|
57 |
data['Link'].append('https://huggingface.co/microsoft/Phi-4-multimodal-instruct')
|
results_organized/wer/asr_english.csv
CHANGED
@@ -4,16 +4,17 @@ Qwen2-Audio-7B-Instruct,0.0351416606934017,0.0604157603041594,0.114388725008194,
|
|
4 |
old_models,,,,,,,,,
|
5 |
gemini-1.5-flash,,,,,,,,,
|
6 |
WavLLM_fairseq,0.0210321801788206,0.0479883481188643,0.1453332562130063,0.3792176325635977,0.154917784145464,0.6447482518259942,0.6671766188447099,0.0662148255917107,0.4536784258110264
|
7 |
-
MERaLiON-AudioLLM-Whisper-SEA-LION,0.
|
8 |
-
MERaLiON-AudioLLM-v2-2b,0.
|
9 |
-
MERaLiON-AudioLLM-v2-9b,0.
|
10 |
-
MERaLiON-AudioLLM-v2-9b-asr,0.
|
11 |
-
Qwen2.5-Omni-3B,0.
|
12 |
-
Qwen2.5-Omni-7B,0.
|
13 |
-
SALMONN_7B,0.
|
14 |
-
SeaLLMs-Audio-7B,0.
|
15 |
-
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.
|
16 |
-
cascade_whisper_large_v3_llama_3_8b_instruct,0.
|
17 |
-
hy_whisper_local_cs,0.
|
18 |
-
phi_4_multimodal_instruct,0.
|
19 |
-
whisper_large_v3,0.
|
|
|
|
4 |
old_models,,,,,,,,,
|
5 |
gemini-1.5-flash,,,,,,,,,
|
6 |
WavLLM_fairseq,0.0210321801788206,0.0479883481188643,0.1453332562130063,0.3792176325635977,0.154917784145464,0.6447482518259942,0.6671766188447099,0.0662148255917107,0.4536784258110264
|
7 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,0.0239370732259403,0.0422569845082944,0.0773423490014847,0.2162032352994574,0.1447721045203051,0.1383892341385865,0.1655357488642665,0.0815443028991164,0.1051232051054777
|
8 |
+
MERaLiON-AudioLLM-v2-2b,0.0271249104010261,0.0509580645771464,0.0927619696492502,0.2062705589729962,0.0923790829027624,0.2188608242265233,0.2393591837520922,0.0345622937440119,0.1383797199078177
|
9 |
+
MERaLiON-AudioLLM-v2-9b,0.024974535028483,0.0466075245427204,0.0900174185793895,0.2047653079245195,0.0902306155346474,0.1084090226901313,0.1506214218439992,0.0351300521628047,0.0435738344265201
|
10 |
+
MERaLiON-AudioLLM-v2-9b-asr,0.020956728411363,0.0404032761457998,0.0759154395459599,0.1957668115250735,0.0876810340721353,0.0921084812842547,0.1277414998676963,0.0313686526383024,0.0349583407197305
|
11 |
+
Qwen2.5-Omni-3B,0.0211076319462783,0.044924054703312,0.0882305679999485,0.2615060102759792,0.1144654277255075,0.1465408944869984,0.1968800659389456,0.048046556190341,0.0714766885304024
|
12 |
+
Qwen2.5-Omni-7B,0.0440449692534047,0.068776363326839,0.0796241186263104,0.3124105638254503,0.1396754485583708,0.1893975608942646,0.2410502378931979,0.049146588126752,0.0838149264314837
|
13 |
+
SALMONN_7B,0.0963896329271513,0.1177672271927667,0.3197948335593678,0.2415894922913651,0.1102487158081571,0.2773315471756845,0.3795646042497366,0.0393527554025762,0.1413933699698634
|
14 |
+
SeaLLMs-Audio-7B,0.0510619836269664,0.0971116197249702,0.1575192343538092,0.3754099415889971,0.1272885439307677,0.3793954486331447,0.4555525478853205,0.0471239487598027,0.0899485906754121
|
15 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.0329912853208586,0.0538142886867043,0.1053020613056864,0.2028589866953632,0.0999425905452394,0.1409183889006236,0.1718792295362679,0.0493949824349739,0.0863676653075695
|
16 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.0180329724223789,0.035504189759207,0.0979489783456848,0.1454201251404983,0.0950164080734239,0.1087230825671754,0.1459710229559586,0.0381462687626414,0.0493529516043254
|
17 |
+
hy_whisper_local_cs,0.0290866563549251,0.0559138971381012,0.1060026609933089,0.1787914748654434,0.102128662359704,0.1492507031606096,0.1701445810737788,0.0466626450445335,0.0697394079063995
|
18 |
+
phi_4_multimodal_instruct,0.0168446070849209,0.0385117370003972,0.0794891407048418,0.2147161396912585,0.0988294989332872,0.1306461295594268,0.2257202440876468,0.028636315247862,0.0506293210423683
|
19 |
+
whisper_large_v3,0.0189383936318708,0.0363175515917301,0.0981932241083422,0.1455769221247146,0.0959389704905374,0.1078364887549888,0.1409171231397644,0.0382882083673397,0.045559297996809
|
20 |
+
MERaLiON-SpeechEncoder2-ASR-CTC,0.0412,0.0623,0.12480000000000001,0.2095,0.1334,0.153,0.16820000000000002,0.042199999999999994,0.044000000000000004
|
results_organized/wer/asr_indonesian.csv
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
Model,commonvoice_17_id_asr,gigaspeech2_id_test
|
2 |
-
MERaLiON-AudioLLM-Whisper-SEA-LION,0.
|
3 |
-
MERaLiON-AudioLLM-v2-2b,0.
|
4 |
-
MERaLiON-AudioLLM-v2-9b,0.
|
5 |
-
MERaLiON-AudioLLM-v2-9b-asr,0.
|
6 |
-
Qwen2.5-Omni-3B,0.
|
7 |
-
Qwen2.5-Omni-7B,0.
|
8 |
SALMONN_7B,1.1888858220627472,2.1181172136986777
|
9 |
-
SeaLLMs-Audio-7B,0.
|
10 |
-
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.
|
11 |
-
cascade_whisper_large_v3_llama_3_8b_instruct,0.
|
12 |
-
hy_whisper_local_cs,0.
|
13 |
phi_4_multimodal_instruct,1.327169012788665,5.803850364012302
|
14 |
-
whisper_large_v3,0.
|
|
|
|
1 |
Model,commonvoice_17_id_asr,gigaspeech2_id_test
|
2 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,0.259545496365811,0.337184855698226
|
3 |
+
MERaLiON-AudioLLM-v2-2b,0.0854724445671174,0.1784268413462373
|
4 |
+
MERaLiON-AudioLLM-v2-9b,0.1133498941944981,0.1722759890883186
|
5 |
+
MERaLiON-AudioLLM-v2-9b-asr,0.0792161192382003,0.1628238319462061
|
6 |
+
Qwen2.5-Omni-3B,0.1357990615512006,0.2746338157871875
|
7 |
+
Qwen2.5-Omni-7B,0.1099457171772932,0.227373030076625
|
8 |
SALMONN_7B,1.1888858220627472,2.1181172136986777
|
9 |
+
SeaLLMs-Audio-7B,0.1243444659122274,0.3161086303059788
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.0997791885178029,0.2191718937327333
|
11 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.0781580642193394,0.1926224523482703
|
12 |
+
hy_whisper_local_cs,0.1026773392216395,0.2138203047625666
|
13 |
phi_4_multimodal_instruct,1.327169012788665,5.803850364012302
|
14 |
+
whisper_large_v3,0.0781580642193394,0.1960453842545132
|
15 |
+
MERaLiON-SpeechEncoder2-ASR-CTC,0.0559,0.16620000000000001
|
results_organized/wer/asr_malay.csv
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
Model,ytb_asr_batch3_malay
|
2 |
-
MERaLiON-AudioLLM-Whisper-SEA-LION,0.
|
3 |
MERaLiON-AudioLLM-v2-2b,0.2798911851169321
|
4 |
-
MERaLiON-AudioLLM-v2-9b,0.
|
5 |
-
MERaLiON-AudioLLM-v2-9b-asr,0.
|
6 |
Qwen2.5-Omni-3B,2.943749725768944
|
7 |
-
Qwen2.5-Omni-7B,1.
|
8 |
SALMONN_7B,1.0858672282918695
|
9 |
SeaLLMs-Audio-7B,0.7655653547452942
|
10 |
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.3143784827344127
|
11 |
cascade_whisper_large_v3_llama_3_8b_instruct,0.3119213724715897
|
12 |
hy_whisper_local_cs,0.2421569917950068
|
13 |
phi_4_multimodal_instruct,3.762932736606555
|
14 |
-
whisper_large_v3,0.
|
|
|
|
1 |
Model,ytb_asr_batch3_malay
|
2 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,0.2898951340441402
|
3 |
MERaLiON-AudioLLM-v2-2b,0.2798911851169321
|
4 |
+
MERaLiON-AudioLLM-v2-9b,0.2090737571848536
|
5 |
+
MERaLiON-AudioLLM-v2-9b-asr,0.1946382343907682
|
6 |
Qwen2.5-Omni-3B,2.943749725768944
|
7 |
+
Qwen2.5-Omni-7B,1.460664297310342
|
8 |
SALMONN_7B,1.0858672282918695
|
9 |
SeaLLMs-Audio-7B,0.7655653547452942
|
10 |
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.3143784827344127
|
11 |
cascade_whisper_large_v3_llama_3_8b_instruct,0.3119213724715897
|
12 |
hy_whisper_local_cs,0.2421569917950068
|
13 |
phi_4_multimodal_instruct,3.762932736606555
|
14 |
+
whisper_large_v3,0.2196919836777675
|
15 |
+
MERaLiON-SpeechEncoder2-ASR-CTC,0.1559
|
results_organized/wer/asr_mandarin.csv
CHANGED
@@ -4,16 +4,17 @@ Qwen2-Audio-7B-Instruct,0.0926035912969452,,
|
|
4 |
old_models,,,
|
5 |
gemini-1.5-flash,,,
|
6 |
WavLLM_fairseq,0.7054601967888183,,
|
7 |
-
MERaLiON-AudioLLM-Whisper-SEA-LION,0.
|
8 |
-
MERaLiON-AudioLLM-v2-2b,0.
|
9 |
-
MERaLiON-AudioLLM-v2-9b,0.
|
10 |
-
MERaLiON-AudioLLM-v2-9b-asr,0.
|
11 |
-
Qwen2.5-Omni-3B,0.
|
12 |
-
Qwen2.5-Omni-7B,0.
|
13 |
-
SALMONN_7B,0.9314703727900854,1.
|
14 |
-
SeaLLMs-Audio-7B,0.
|
15 |
-
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.
|
16 |
-
cascade_whisper_large_v3_llama_3_8b_instruct,0.
|
17 |
-
hy_whisper_local_cs,0.
|
18 |
-
phi_4_multimodal_instruct,0.
|
19 |
-
whisper_large_v3,0.
|
|
|
|
4 |
old_models,,,
|
5 |
gemini-1.5-flash,,,
|
6 |
WavLLM_fairseq,0.7054601967888183,,
|
7 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,0.1284670665795569,0.3269799259362027,0.418102808691044
|
8 |
+
MERaLiON-AudioLLM-v2-2b,0.0501078972896992,0.1313938721278934,0.2561314255431902
|
9 |
+
MERaLiON-AudioLLM-v2-9b,0.0578982795826651,0.1468469526055729,0.1913301536830948
|
10 |
+
MERaLiON-AudioLLM-v2-9b-asr,0.0433172972223872,0.1183419954537208,0.1494223635400106
|
11 |
+
Qwen2.5-Omni-3B,0.0280730929896458,0.1130806911198147,0.250132485426603
|
12 |
+
Qwen2.5-Omni-7B,0.0243808279384688,0.0764756731374662,0.2064016958134605
|
13 |
+
SALMONN_7B,0.9314703727900854,1.0013340021130597,0.8858293587705353
|
14 |
+
SeaLLMs-Audio-7B,0.1775256874740155,0.0899224144371044,0.661897191308956
|
15 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.2088950921581437,0.3193814499002166,0.3469210386857446
|
16 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.1245075330126111,0.1962263748225777,0.2698675145733969
|
17 |
+
hy_whisper_local_cs,0.1567579339153847,0.287290695068461,0.2752093269740328
|
18 |
+
phi_4_multimodal_instruct,0.1223297895507909,0.154221316286565,0.4400847906730259
|
19 |
+
whisper_large_v3,0.1245471283482805,0.1975710489525415,0.190821409644939
|
20 |
+
MERaLiON-SpeechEncoder2-ASR-CTC,0.0747,0.16510000000000002,0.1769
|
results_organized/wer/asr_private.csv
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
Model,cna_test,idpc_short_test,idpc_test,mediacorp_short_test,mediacorp_test,parliament_test,ukusnews_test,ytb_asr_batch1,ytb_asr_batch2
|
2 |
-
MERaLiON-AudioLLM-Whisper-SEA-LION,0.
|
3 |
-
MERaLiON-AudioLLM-v2-2b,0.
|
4 |
-
MERaLiON-AudioLLM-v2-9b,0.
|
5 |
-
MERaLiON-AudioLLM-v2-9b-asr,0.
|
6 |
-
Qwen2.5-Omni-3B,0.
|
7 |
-
Qwen2.5-Omni-7B,0.
|
8 |
-
SALMONN_7B,0.1492577165438428,0.2398190045248869,0.5414884516680923,0.
|
9 |
-
SeaLLMs-Audio-7B,0.
|
10 |
-
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.
|
11 |
-
cascade_whisper_large_v3_llama_3_8b_instruct,0.
|
12 |
-
hy_whisper_local_cs,0.
|
13 |
-
phi_4_multimodal_instruct,0.
|
14 |
-
whisper_large_v3,0.
|
|
|
|
1 |
Model,cna_test,idpc_short_test,idpc_test,mediacorp_short_test,mediacorp_test,parliament_test,ukusnews_test,ytb_asr_batch1,ytb_asr_batch2
|
2 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,0.1450389832318701,0.1649843369300382,0.2035928143712574,0.1282887339779626,0.1225089839921594,0.0587803954962626,0.1128757799205899,0.1072443727433356,0.1326846145529246
|
3 |
+
MERaLiON-AudioLLM-v2-2b,0.1349460642956317,0.1510616080751827,0.1774165953806672,0.1208680008994828,0.1225089839921594,0.1854480083262371,0.1738324825108716,0.0993316432357686,0.1599091793707427
|
4 |
+
MERaLiON-AudioLLM-v2-9b,0.1333440136708319,0.1566306996171249,0.1603079555175363,0.1169327636608949,0.1045409996733093,0.0602469486233323,0.0697296275288334,0.098486594453407,0.1110174072872743
|
5 |
+
MERaLiON-AudioLLM-v2-9b-asr,0.1270960162341129,0.1400974591019839,0.1661248930710008,0.1178322464582864,0.1037242731133616,0.0528432207398997,0.0559652108148988,0.0923023738188522,0.0993620931992647
|
6 |
+
Qwen2.5-Omni-3B,0.1739826978532521,0.2111033762617473,0.1986313088109495,0.1476276141218799,0.1515844495262986,0.1004825432869713,0.0907543959160521,0.1622877775217024,0.2445453562547302
|
7 |
+
Qwen2.5-Omni-7B,0.1832211897895973,0.4138531152105812,0.2203592814371257,0.1414436698898133,0.2354622672329304,0.1102280253571766,0.176214785403668,0.1736191134670046,0.3505243810141636
|
8 |
+
SALMONN_7B,0.1492577165438428,0.2398190045248869,0.5414884516680923,0.1990105689228693,0.3636883371447239,0.2043003122338915,0.191869918699187,0.2207497887378044,0.3495513028435506
|
9 |
+
SeaLLMs-Audio-7B,0.2734700416533162,0.7187608771319178,1.1654405474764755,0.1875421632561277,0.3995426331264293,0.1938688617655407,0.1761391567404046,0.4502573557655374,0.9038382527840848
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.1517141941685357,0.1973546815175774,0.1704020530367835,0.1541488644029683,0.157546553413917,0.0900747469013151,0.1227831348080922,0.1247599293231927,0.125527084009082
|
11 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.1381501655452312,0.1534492642843493,0.1618477331052181,0.1143467506183944,0.1512577589023195,0.0653798845680764,0.089430894308943,0.1081662441422754,0.0838793383068439
|
12 |
+
hy_whisper_local_cs,0.1467478372316565,0.1830838844413505,0.1757057313943541,0.1288509107263323,0.1256125449199608,0.072570725707257,0.1694838343732274,0.1284858262272413,0.1431506108768515
|
13 |
+
phi_4_multimodal_instruct,0.1908042294136494,0.5388096066829099,0.2607356715141146,0.1217674836968743,0.1981378634433191,0.2778645094143249,0.0752127056154282,0.169393869555197,0.2323278192236998
|
14 |
+
whisper_large_v3,0.1416212752322973,0.1879568395405499,0.1654405474764756,0.1163705869125253,0.1128716105847762,0.0670829785220929,0.0697674418604651,0.106860259660444,0.0816953184128013
|
15 |
+
MERaLiON-SpeechEncoder2-ASR-CTC,0.1628,0.1753,0.1855,0.1176,0.1052,0.0616,0.08039999999999999,0.1152,0.1192
|
results_organized/wer/asr_singlish.csv
CHANGED
@@ -4,16 +4,17 @@ Qwen2-Audio-7B-Instruct,0.0719771779679613,0.1905689473257041,0.3507616694273223
|
|
4 |
old_models,,,,,,
|
5 |
gemini-1.5-flash,,,,,,
|
6 |
WavLLM_fairseq,0.1007729256577182,0.4463923382842302,0.7540934640345399,1.143645714142011,0.3979658840524726,0.4254106170965293
|
7 |
-
MERaLiON-AudioLLM-Whisper-SEA-LION,0.
|
8 |
-
MERaLiON-AudioLLM-v2-2b,0.
|
9 |
-
MERaLiON-AudioLLM-v2-9b,0.
|
10 |
-
MERaLiON-AudioLLM-v2-9b-asr,0.
|
11 |
-
Qwen2.5-Omni-3B,0.
|
12 |
-
Qwen2.5-Omni-7B,0.
|
13 |
-
SALMONN_7B,0.
|
14 |
-
SeaLLMs-Audio-7B,0.
|
15 |
-
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.
|
16 |
-
cascade_whisper_large_v3_llama_3_8b_instruct,0.
|
17 |
-
hy_whisper_local_cs,0.
|
18 |
-
phi_4_multimodal_instruct,0.
|
19 |
-
whisper_large_v3,0.
|
|
|
|
4 |
old_models,,,,,,
|
5 |
gemini-1.5-flash,,,,,,
|
6 |
WavLLM_fairseq,0.1007729256577182,0.4463923382842302,0.7540934640345399,1.143645714142011,0.3979658840524726,0.4254106170965293
|
7 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,0.0430351352010338,0.0473581689797906,0.2129958997474678,0.296608784217078,0.1540616655236316,0.1087388362215152
|
8 |
+
MERaLiON-AudioLLM-v2-2b,0.0490576158778923,0.0581933284635987,0.2641404404377223,0.3595795244502006,0.2020253607856298,0.1493725673864242
|
9 |
+
MERaLiON-AudioLLM-v2-9b,0.0519591349084436,0.145320996672348,0.2265457408966247,0.2948987161915779,0.1676029825918197,0.1265524314023159
|
10 |
+
MERaLiON-AudioLLM-v2-9b-asr,0.0436203155097164,0.0540946351757162,0.1962283107502647,0.2457091123992505,0.1403598371539887,0.0989680065892537
|
11 |
+
Qwen2.5-Omni-3B,0.0529832004486382,0.0947975002029056,0.4752084068753903,1.2504495215581737,0.2798879339277115,0.1830294416899497
|
12 |
+
Qwen2.5-Omni-7B,0.0529100529100529,0.0941076211346481,0.5354359573139272,1.3034993524374756,0.3737578614057871,0.2747137389169721
|
13 |
+
SALMONN_7B,0.0927510789261941,0.4578362145929713,0.681280039101746,0.7865181254636674,0.3753337905473435,0.2552205300473198
|
14 |
+
SeaLLMs-Audio-7B,0.1289834930387925,0.2900738576414252,1.195359383061341,1.8651561065774749,0.6310282937648656,0.6653369724963258
|
15 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.0705386097091166,0.3298433568703839,0.2810437993863198,0.4594298934979693,0.2182953699785498,0.1751481774576462
|
16 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.0692219540145807,0.3191299407515623,0.2770250088250468,0.4581096203900464,0.2139177890297821,0.1722411537654032
|
17 |
+
hy_whisper_local_cs,0.0669299978055738,0.2735167600032465,0.2558041654221087,0.3612895924757007,0.186411988735025,0.1441722250036337
|
18 |
+
phi_4_multimodal_instruct,0.0576158778923755,0.3451018586153721,0.4381839411301491,1.4697028756805697,0.2385927536443361,0.1439784234241509
|
19 |
+
whisper_large_v3,0.0691975715017189,0.319251684116549,0.2671952643440953,0.4567139031045279,0.2108972581751324,0.1704161888919394
|
20 |
+
MERaLiON-SpeechEncoder2-ASR-CTC,0.048600000000000004,0.051100000000000007,0.1856,0.26780000000000004,0.1363,0.0968
|
results_organized/wer/asr_tamil.csv
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
Model,commonvoice_17_ta_asr,fleurs_tamil_ta_30_asr,ytb_asr_batch3_tamil
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.5284951114826634,0.4624736472241743,0.6929759165018962
|
3 |
-
MERaLiON-AudioLLM-v2-2b,0.
|
4 |
-
MERaLiON-AudioLLM-v2-9b,0.
|
5 |
-
MERaLiON-AudioLLM-v2-9b-asr,0.
|
6 |
-
Qwen2.5-Omni-3B,0.8307319012713203,1.653935347856641,1.
|
7 |
Qwen2.5-Omni-7B,0.8465494917777076,0.8666549543218552,1.3615441962983372
|
8 |
SALMONN_7B,1.4272941368377052,1.507519325368939,0.985267900554277
|
9 |
-
SeaLLMs-Audio-7B,1.
|
10 |
-
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.
|
11 |
-
cascade_whisper_large_v3_llama_3_8b_instruct,0.
|
12 |
-
hy_whisper_local_cs,0.
|
13 |
phi_4_multimodal_instruct,1.1784589191228196,1.7016514406184118,2.7500567242552916
|
14 |
-
whisper_large_v3,0.
|
|
|
|
1 |
Model,commonvoice_17_ta_asr,fleurs_tamil_ta_30_asr,ytb_asr_batch3_tamil
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.5284951114826634,0.4624736472241743,0.6929759165018962
|
3 |
+
MERaLiON-AudioLLM-v2-2b,0.1385300804387941,0.1432185523541813,0.7504943113675407
|
4 |
+
MERaLiON-AudioLLM-v2-9b,0.1559177057102368,0.1608573436401967,0.6644679264853651
|
5 |
+
MERaLiON-AudioLLM-v2-9b-asr,0.1287122656417262,0.1383345045678145,0.5467894071504975
|
6 |
+
Qwen2.5-Omni-3B,0.8307319012713203,1.653935347856641,1.460763022268322
|
7 |
Qwen2.5-Omni-7B,0.8465494917777076,0.8666549543218552,1.3615441962983372
|
8 |
SALMONN_7B,1.4272941368377052,1.507519325368939,0.985267900554277
|
9 |
+
SeaLLMs-Audio-7B,1.2968793010286783,2.061876317638791,3.6174516223137014
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.2380539724938065,0.2724525650035137,0.9665002755178114
|
11 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.2440435531721838,0.283977512297962,0.8976532365239376
|
12 |
+
hy_whisper_local_cs,0.3179371374392121,0.3311314125087842,0.8339924151567211
|
13 |
phi_4_multimodal_instruct,1.1784589191228196,1.7016514406184118,2.7500567242552916
|
14 |
+
whisper_large_v3,0.2448438631011245,0.2314476458186929,0.8481572720495284
|
15 |
+
MERaLiON-SpeechEncoder2-ASR-CTC,0.1442,0.1632,0.6578
|
results_organized/wer/asr_thai.csv
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
Model,gigaspeech2_th_test,lotus_thai_th_30_asr
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.9866395307075302,0.8520208370756243
|
3 |
-
MERaLiON-AudioLLM-v2-2b,0.
|
4 |
-
MERaLiON-AudioLLM-v2-9b,0.
|
5 |
-
MERaLiON-AudioLLM-v2-9b-asr,0.
|
6 |
-
Qwen2.5-Omni-3B,0.3000742248294026,0.
|
7 |
-
Qwen2.5-Omni-7B,0.
|
8 |
SALMONN_7B,1.2470441757452413,1.1351535836177475
|
9 |
-
SeaLLMs-Audio-7B,0.2088686699389441,0.
|
10 |
-
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.276058900993655,0.
|
11 |
-
cascade_whisper_large_v3_llama_3_8b_instruct,0.
|
12 |
-
hy_whisper_local_cs,0.
|
13 |
phi_4_multimodal_instruct,1.7344522925894887,1.2856834920064666
|
14 |
-
whisper_large_v3,0.
|
|
|
|
1 |
Model,gigaspeech2_th_test,lotus_thai_th_30_asr
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.9866395307075302,0.8520208370756243
|
3 |
+
MERaLiON-AudioLLM-v2-2b,0.199683945887705,0.0148733608765942
|
4 |
+
MERaLiON-AudioLLM-v2-9b,0.2000478869867113,0.0186815160768816
|
5 |
+
MERaLiON-AudioLLM-v2-9b-asr,0.1823823775888902,0.0106700197592958
|
6 |
+
Qwen2.5-Omni-3B,0.3000742248294026,0.0262259744925453
|
7 |
+
Qwen2.5-Omni-7B,0.2315096372560756,0.0214837434884138
|
8 |
SALMONN_7B,1.2470441757452413,1.1351535836177475
|
9 |
+
SeaLLMs-Audio-7B,0.2088686699389441,0.0254356026585234
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.276058900993655,0.0681516076881623
|
11 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.2087202202801388,0.0317945033231543
|
12 |
+
hy_whisper_local_cs,0.2648629235005387,0.076019400035926
|
13 |
phi_4_multimodal_instruct,1.7344522925894887,1.2856834920064666
|
14 |
+
whisper_large_v3,0.2139446905303483,0.0282019040776001
|
15 |
+
MERaLiON-SpeechEncoder2-ASR-CTC,0.16699999999999998,0.0137
|
results_organized/wer/asr_vietnamese.csv
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
Model,commonvoice_17_vi_asr,gigaspeech2_vi_test
|
2 |
-
MERaLiON-AudioLLM-Whisper-SEA-LION,0.
|
3 |
-
MERaLiON-AudioLLM-v2-2b,0.
|
4 |
-
MERaLiON-AudioLLM-v2-9b,0.
|
5 |
-
MERaLiON-AudioLLM-v2-9b-asr,0.1423883125132331,0.
|
6 |
-
Qwen2.5-Omni-3B,0.
|
7 |
-
Qwen2.5-Omni-7B,0.
|
8 |
SALMONN_7B,1.496294727927165,1.5460526688938172
|
9 |
-
SeaLLMs-Audio-7B,0.
|
10 |
-
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.1567859411391065,0.
|
11 |
-
cascade_whisper_large_v3_llama_3_8b_instruct,0.
|
12 |
hy_whisper_local_cs,0.1681134871903451,0.1781020821398794
|
13 |
phi_4_multimodal_instruct,1.1070294304467498,2.5042567310800923
|
14 |
-
whisper_large_v3,0.
|
|
|
|
1 |
Model,commonvoice_17_vi_asr,gigaspeech2_vi_test
|
2 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,0.9221892864704636,0.9818897503814326
|
3 |
+
MERaLiON-AudioLLM-v2-2b,0.1419648528477662,0.1682557328326971
|
4 |
+
MERaLiON-AudioLLM-v2-9b,0.1564683463900063,0.1131479391295963
|
5 |
+
MERaLiON-AudioLLM-v2-9b-asr,0.1423883125132331,0.094997986489629
|
6 |
+
Qwen2.5-Omni-3B,0.196485284776625,0.1770868191640812
|
7 |
+
Qwen2.5-Omni-7B,0.1836756298962523,0.2273054693747908
|
8 |
SALMONN_7B,1.496294727927165,1.5460526688938172
|
9 |
+
SeaLLMs-Audio-7B,0.2551344484437857,0.18886727279946
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.1567859411391065,0.1713695840824915
|
11 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.1167690027524878,0.1553806101787203
|
12 |
hy_whisper_local_cs,0.1681134871903451,0.1781020821398794
|
13 |
phi_4_multimodal_instruct,1.1070294304467498,2.5042567310800923
|
14 |
+
whisper_large_v3,0.1366716070294304,0.1602754255313115
|
15 |
+
MERaLiON-SpeechEncoder2-ASR-CTC,0.19879999999999998,0.1322
|