YingxuHe's picture
update seallm-audio and whisper
b82cc59
Model,librispeech_test_clean,librispeech_test_other,common_voice_15_en_test,peoples_speech_test,gigaspeech_test,earnings21_test,earnings22_test,tedlium3_test,tedlium3_long_form_test
Qwen-Audio-Chat,0.0202587995623797,0.043467569561352,0.1127242112839891,0.3141914474672335,0.1301891002258773,0.2655529121410546,0.3664994875132684,0.0405237571413363,0.2911540507002305
Qwen2-Audio-7B-Instruct,0.0351416606934017,0.0604157603041594,0.114388725008194,0.2165498391593041,0.1172381289030281,0.1887221931940723,0.2354255566133092,0.06114048472375,0.0873958517993263
old_models,,,,,,,,,
gemini-1.5-flash,,,,,,,,,
WavLLM_fairseq,0.0210321801788206,0.0479883481188643,0.1453332562130063,0.3792176325635977,0.154917784145464,0.6447482518259942,0.6671766188447099,0.0662148255917107,0.4536784258110264
MERaLiON-AudioLLM-Whisper-SEA-LION,0.023937073225940318,0.0422569845082944,0.07734234900148476,0.21620323529945748,0.14477210452030514,0.13838923413858656,0.16553574886426656,0.08154430289911642,0.10512320510547775
MERaLiON-AudioLLM-v2-2b,0.027124910401026145,0.050958064577146425,0.09276196964925024,0.20627055897299626,0.09237908290276242,0.21886082422652334,0.23935918375209228,0.03456229374401192,0.13837971990781775
MERaLiON-AudioLLM-v2-9b,0.02497453502848304,0.046607524542720415,0.09001741857938951,0.20476530792451958,0.09023061553464748,0.1084090226901313,0.15062142184399924,0.03513005216280473,0.043573834426520124
MERaLiON-AudioLLM-v2-9b-asr,0.020956728411363035,0.04040327614579984,0.07591543954595999,0.1957668115250735,0.08768103407213536,0.09210848128425476,0.1277414998676963,0.0313686526383024,0.03495834071973054
Qwen2.5-Omni-3B,0.021107631946278342,0.04492405470331209,0.08823056799994858,0.2615060102759792,0.11446542772550759,0.14654089448699847,0.19688006593894564,0.04804655619034101,0.07147668853040241
Qwen2.5-Omni-7B,0.04404496925340476,0.06877636332683905,0.07962411862631041,0.3124105638254503,0.13967544855837088,0.18939756089426465,0.24105023789319796,0.049146588126752065,0.08381492643148378
SALMONN_7B,0.09638963292715132,0.11776722719276675,0.3197948335593678,0.24158949229136512,0.11024871580815716,0.27733154717568453,0.37956460424973665,0.039352755402576205,0.14139336996986349
SeaLLMs-Audio-7B,0.05106198362696646,0.0971116197249702,0.15751923435380927,0.37540994158899715,0.12728854393076772,0.3793954486331447,0.45555254788532057,0.047123948759802706,0.08994859067541217
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.03299128532085864,0.05381428868670437,0.10530206130568642,0.20285898669536326,0.09994259054523941,0.14091838890062366,0.17187922953626794,0.04939498243497392,0.08636766530756958
cascade_whisper_large_v3_llama_3_8b_instruct,0.018032972422378994,0.035504189759207064,0.09794897834568489,0.14542012514049835,0.09501640807342393,0.10872308256717546,0.1459710229559586,0.038146268762641496,0.04935295160432548
hy_whisper_local_cs,0.029086656354925113,0.05591389713810127,0.10600266099330895,0.17879147486544342,0.10212866235970408,0.14925070316060968,0.17014458107377883,0.04666264504453355,0.06973940790639957
phi_4_multimodal_instruct,0.016844607084920964,0.03851173700039722,0.07948914070484185,0.2147161396912585,0.0988294989332872,0.1306461295594268,0.22572024408764688,0.028636315247862035,0.05062932104236838
whisper_large_v3,0.018938393631870828,0.03631755159173019,0.09819322410834228,0.14557692212471468,0.09593897049053747,0.10783648875498883,0.1409171231397644,0.0382882083673397,0.04555929799680908