Recommend faster-whisper for GPU inference
Browse files
app.py
CHANGED
|
@@ -399,16 +399,31 @@ def create_ui(app_config: ApplicationConfig):
|
|
| 399 |
ui.set_parallel_devices(app_config.vad_parallel_devices)
|
| 400 |
ui.set_auto_parallel(app_config.auto_parallel)
|
| 401 |
|
| 402 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
|
| 404 |
ui_description += " as well as speech translation and language identification. "
|
| 405 |
|
| 406 |
ui_description += "\n\n\n\nFor longer audio files (>10 minutes) not in English, it is recommended that you select Silero VAD (Voice Activity Detector) in the VAD option."
|
| 407 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
if app_config.input_audio_max_duration > 0:
|
| 409 |
ui_description += "\n\n" + "Max audio file length: " + str(app_config.input_audio_max_duration) + " s"
|
| 410 |
|
| 411 |
-
ui_article = "Read the [documentation here](https://gitlab.com/aadnk/whisper-webui/-/blob/main/docs/options.md)"
|
| 412 |
|
| 413 |
whisper_models = app_config.get_model_names()
|
| 414 |
|
|
|
|
| 399 |
ui.set_parallel_devices(app_config.vad_parallel_devices)
|
| 400 |
ui.set_auto_parallel(app_config.auto_parallel)
|
| 401 |
|
| 402 |
+
is_whisper = False
|
| 403 |
+
|
| 404 |
+
if app_config.whisper_implementation == "whisper":
|
| 405 |
+
implementation_name = "Whisper"
|
| 406 |
+
is_whisper = True
|
| 407 |
+
elif app_config.whisper_implementation in ["faster-whisper", "faster_whisper"]:
|
| 408 |
+
implementation_name = "Faster Whisper"
|
| 409 |
+
else:
|
| 410 |
+
# Try to convert from camel-case to title-case
|
| 411 |
+
implementation_name = app_config.whisper_implementation.title().replace("_", " ").replace("-", " ")
|
| 412 |
+
|
| 413 |
+
ui_description = implementation_name + " is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
|
| 414 |
ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
|
| 415 |
ui_description += " as well as speech translation and language identification. "
|
| 416 |
|
| 417 |
ui_description += "\n\n\n\nFor longer audio files (>10 minutes) not in English, it is recommended that you select Silero VAD (Voice Activity Detector) in the VAD option."
|
| 418 |
|
| 419 |
+
# Recommend faster-whisper
|
| 420 |
+
if is_whisper:
|
| 421 |
+
ui_description += "\n\n\n\nFor faster inference on GPU, try [faster-whisper](https://huggingface.co/spaces/aadnk/faster-whisper-webui)."
|
| 422 |
+
|
| 423 |
if app_config.input_audio_max_duration > 0:
|
| 424 |
ui_description += "\n\n" + "Max audio file length: " + str(app_config.input_audio_max_duration) + " s"
|
| 425 |
|
| 426 |
+
ui_article = "Read the [documentation here](https://gitlab.com/aadnk/whisper-webui/-/blob/main/docs/options.md)."
|
| 427 |
|
| 428 |
whisper_models = app_config.get_model_names()
|
| 429 |
|