File size: 4,115 Bytes
b064a39 5ccc223 2deac9d b064a39 329b392 7feaac0 b064a39 fdd2a40 0493b36 ed12281 fdd2a40 b064a39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
from dataclasses import dataclass
# These classes are for user facing column names, to avoid having to change them
# all around the code when a modif is needed
@dataclass
class ColumnContent:
name: str
type: str
def fields(raw_class):
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
@dataclass(frozen=True)
class AutoEvalColumn: # Auto evals column
model = ColumnContent("Model", "markdown")
avg_wer = ColumnContent("Average WER ⬇️", "number")
rtf = ColumnContent("RTFx ⬆️️", "number")
ami_wer = ColumnContent("AMI", "number")
e22_wer = ColumnContent("Earnings22", "number")
gs_wer = ColumnContent("Gigaspeech", "number")
lsc_wer = ColumnContent("LS Clean", "number")
lso_wer = ColumnContent("LS Other", "number")
ss_wer = ColumnContent("SPGISpeech", "number")
tl_wer = ColumnContent("Tedlium", "number")
vp_wer = ColumnContent("Voxpopuli", "number")
@dataclass(frozen=True)
class MultilingualColumn: # Multilingual benchmark columns
model = ColumnContent("Model", "markdown")
avg_multilingual = ColumnContent("Average WER ⬇️", "number")
rtf = ColumnContent("RTFx ⬆️️", "number")
de_avg = ColumnContent("🇩🇪 German", "number")
fr_avg = ColumnContent("🇫🇷 French", "number")
es_avg = ColumnContent("🇪🇸 Spanish", "number")
it_avg = ColumnContent("🇮🇹 Italian", "number")
nl_avg = ColumnContent("🇳🇱 Dutch", "number")
pl_avg = ColumnContent("🇵🇱 Polish", "number")
pt_avg = ColumnContent("🇵🇹 Portuguese", "number")
cs_avg = ColumnContent("🇨🇿 Czech", "number")
ro_avg = ColumnContent("🇷🇴 Romanian", "number")
hu_avg = ColumnContent("🇭🇺 Hungarian", "number")
@dataclass(frozen=True)
class LongformColumn: # Long-form ASR benchmark columns
model = ColumnContent("Model", "markdown")
avg_longform = ColumnContent("Average WER ⬇️", "number")
rtf = ColumnContent("RTFx ⬆️️", "number")
earnings21_wer = ColumnContent("Earnings21", "number")
mustc_wer = ColumnContent("MustC", "number")
def make_clickable_model(model_name):
model_name_list = model_name.split("/")
if model_name_list[0] == "trt-llm":
link = "https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/whisper"
elif model_name_list[0] == "faster-whisper":
link = "https://github.com/guillaumekln/faster-whisper"
elif model_name_list[0] == "Whisper.cpp":
link = "https://github.com/ggerganov/whisper.cpp"
elif model_name_list[0] == "WhisperKit":
link = "https://github.com/argmaxinc/WhisperKit"
elif model_name_list[0] == "WhisperMLX":
link = "https://huggingface.co/collections/mlx-community/whisper-663256f9964fbb1177db93dc"
elif model_name_list[0] == "elevenlabs":
link = "https://elevenlabs.io/speech-to-text"
elif model_name_list[0] == "openai" and (model_name_list[1] == "whisper-1" or model_name_list[1] == "gpt-4o-transcribe" or model_name_list[1] == "gpt-4o-mini-transcribe"):
link = "https://platform.openai.com/docs/guides/speech-to-text"
elif model_name_list[0] == "assemblyai":
link = "https://www.assemblyai.com/docs"
elif model_name_list[0] == "revai":
link = "https://docs.rev.ai/api/asynchronous/get-started/"
elif model_name_list[0] == "speechmatics":
link = "https://www.speechmatics.com/"
elif model_name_list[0] == "ultravox":
link = "https://huggingface.co/fixie-ai"
else:
link = f"https://huggingface.co/{model_name}"
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def styled_error(error):
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
def styled_warning(warn):
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
def styled_message(message):
return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
|