Spaces:
Runtime error
Runtime error
Refactor language list
Browse files- app.py +2 -21
- cli.py +3 -2
- src/languages.py +147 -0
- src/whisper/fasterWhisperContainer.py +6 -20
app.py
CHANGED
|
@@ -16,6 +16,7 @@ from src.config import ApplicationConfig
|
|
| 16 |
from src.hooks.progressListener import ProgressListener
|
| 17 |
from src.hooks.subTaskProgressListener import SubTaskProgressListener
|
| 18 |
from src.hooks.whisperProgressHook import create_progress_listener_handle
|
|
|
|
| 19 |
from src.modelCache import ModelCache
|
| 20 |
from src.source import get_audio_source_collection
|
| 21 |
from src.vadParallel import ParallelContext, ParallelTranscription
|
|
@@ -40,26 +41,6 @@ MAX_FILE_PREFIX_LENGTH = 17
|
|
| 40 |
# Limit auto_parallel to a certain number of CPUs (specify vad_cpu_cores to get a higher number)
|
| 41 |
MAX_AUTO_CPU_CORES = 8
|
| 42 |
|
| 43 |
-
LANGUAGES = [
|
| 44 |
-
"English", "Chinese", "German", "Spanish", "Russian", "Korean",
|
| 45 |
-
"French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
|
| 46 |
-
"Dutch", "Arabic", "Swedish", "Italian", "Indonesian", "Hindi",
|
| 47 |
-
"Finnish", "Vietnamese", "Hebrew", "Ukrainian", "Greek", "Malay",
|
| 48 |
-
"Czech", "Romanian", "Danish", "Hungarian", "Tamil", "Norwegian",
|
| 49 |
-
"Thai", "Urdu", "Croatian", "Bulgarian", "Lithuanian", "Latin",
|
| 50 |
-
"Maori", "Malayalam", "Welsh", "Slovak", "Telugu", "Persian",
|
| 51 |
-
"Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
|
| 52 |
-
"Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
|
| 53 |
-
"Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
|
| 54 |
-
"Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer",
|
| 55 |
-
"Shona", "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian",
|
| 56 |
-
"Belarusian", "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish",
|
| 57 |
-
"Lao", "Uzbek", "Faroese", "Haitian Creole", "Pashto", "Turkmen",
|
| 58 |
-
"Nynorsk", "Maltese", "Sanskrit", "Luxembourgish", "Myanmar", "Tibetan",
|
| 59 |
-
"Tagalog", "Malagasy", "Assamese", "Tatar", "Hawaiian", "Lingala",
|
| 60 |
-
"Hausa", "Bashkir", "Javanese", "Sundanese"
|
| 61 |
-
]
|
| 62 |
-
|
| 63 |
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2"]
|
| 64 |
|
| 65 |
class WhisperTranscriber:
|
|
@@ -433,7 +414,7 @@ def create_ui(app_config: ApplicationConfig):
|
|
| 433 |
|
| 434 |
simple_inputs = lambda : [
|
| 435 |
gr.Dropdown(choices=whisper_models, value=app_config.default_model_name, label="Model"),
|
| 436 |
-
gr.Dropdown(choices=sorted(
|
| 437 |
gr.Text(label="URL (YouTube, etc.)"),
|
| 438 |
gr.File(label="Upload Files", file_count="multiple"),
|
| 439 |
gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
|
|
|
|
| 16 |
from src.hooks.progressListener import ProgressListener
|
| 17 |
from src.hooks.subTaskProgressListener import SubTaskProgressListener
|
| 18 |
from src.hooks.whisperProgressHook import create_progress_listener_handle
|
| 19 |
+
from src.languages import get_language_names
|
| 20 |
from src.modelCache import ModelCache
|
| 21 |
from src.source import get_audio_source_collection
|
| 22 |
from src.vadParallel import ParallelContext, ParallelTranscription
|
|
|
|
| 41 |
# Limit auto_parallel to a certain number of CPUs (specify vad_cpu_cores to get a higher number)
|
| 42 |
MAX_AUTO_CPU_CORES = 8
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2"]
|
| 45 |
|
| 46 |
class WhisperTranscriber:
|
|
|
|
| 414 |
|
| 415 |
simple_inputs = lambda : [
|
| 416 |
gr.Dropdown(choices=whisper_models, value=app_config.default_model_name, label="Model"),
|
| 417 |
+
gr.Dropdown(choices=sorted(get_language_names()), label="Language", value=app_config.language),
|
| 418 |
gr.Text(label="URL (YouTube, etc.)"),
|
| 419 |
gr.File(label="Upload Files", file_count="multiple"),
|
| 420 |
gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
|
cli.py
CHANGED
|
@@ -6,9 +6,10 @@ import warnings
|
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
import torch
|
| 9 |
-
from app import
|
| 10 |
from src.config import ApplicationConfig
|
| 11 |
from src.download import download_url
|
|
|
|
| 12 |
|
| 13 |
from src.utils import optional_float, optional_int, str2bool
|
| 14 |
from src.whisper.whisperFactory import create_whisper_container
|
|
@@ -41,7 +42,7 @@ def cli():
|
|
| 41 |
|
| 42 |
parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
|
| 43 |
help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
|
| 44 |
-
parser.add_argument("--language", type=str, default=app_config.language, choices=sorted(
|
| 45 |
help="language spoken in the audio, specify None to perform language detection")
|
| 46 |
|
| 47 |
parser.add_argument("--vad", type=str, default=app_config.default_vad, choices=["none", "silero-vad", "silero-vad-skip-gaps", "silero-vad-expand-into-gaps", "periodic-vad"], \
|
|
|
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
import torch
|
| 9 |
+
from app import WhisperTranscriber
|
| 10 |
from src.config import ApplicationConfig
|
| 11 |
from src.download import download_url
|
| 12 |
+
from src.languages import get_language_names
|
| 13 |
|
| 14 |
from src.utils import optional_float, optional_int, str2bool
|
| 15 |
from src.whisper.whisperFactory import create_whisper_container
|
|
|
|
| 42 |
|
| 43 |
parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
|
| 44 |
help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
|
| 45 |
+
parser.add_argument("--language", type=str, default=app_config.language, choices=sorted(get_language_names()), \
|
| 46 |
help="language spoken in the audio, specify None to perform language detection")
|
| 47 |
|
| 48 |
parser.add_argument("--vad", type=str, default=app_config.default_vad, choices=["none", "silero-vad", "silero-vad-skip-gaps", "silero-vad-expand-into-gaps", "periodic-vad"], \
|
src/languages.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Language():
|
| 2 |
+
def __init__(self, code, name):
|
| 3 |
+
self.code = code
|
| 4 |
+
self.name = name
|
| 5 |
+
|
| 6 |
+
def __str__(self):
|
| 7 |
+
return "Language(code={}, name={})".format(self.code, self.name)
|
| 8 |
+
|
| 9 |
+
LANGUAGES = [
|
| 10 |
+
Language('en', 'English'),
|
| 11 |
+
Language('zh', 'Chinese'),
|
| 12 |
+
Language('de', 'German'),
|
| 13 |
+
Language('es', 'Spanish'),
|
| 14 |
+
Language('ru', 'Russian'),
|
| 15 |
+
Language('ko', 'Korean'),
|
| 16 |
+
Language('fr', 'French'),
|
| 17 |
+
Language('ja', 'Japanese'),
|
| 18 |
+
Language('pt', 'Portuguese'),
|
| 19 |
+
Language('tr', 'Turkish'),
|
| 20 |
+
Language('pl', 'Polish'),
|
| 21 |
+
Language('ca', 'Catalan'),
|
| 22 |
+
Language('nl', 'Dutch'),
|
| 23 |
+
Language('ar', 'Arabic'),
|
| 24 |
+
Language('sv', 'Swedish'),
|
| 25 |
+
Language('it', 'Italian'),
|
| 26 |
+
Language('id', 'Indonesian'),
|
| 27 |
+
Language('hi', 'Hindi'),
|
| 28 |
+
Language('fi', 'Finnish'),
|
| 29 |
+
Language('vi', 'Vietnamese'),
|
| 30 |
+
Language('he', 'Hebrew'),
|
| 31 |
+
Language('uk', 'Ukrainian'),
|
| 32 |
+
Language('el', 'Greek'),
|
| 33 |
+
Language('ms', 'Malay'),
|
| 34 |
+
Language('cs', 'Czech'),
|
| 35 |
+
Language('ro', 'Romanian'),
|
| 36 |
+
Language('da', 'Danish'),
|
| 37 |
+
Language('hu', 'Hungarian'),
|
| 38 |
+
Language('ta', 'Tamil'),
|
| 39 |
+
Language('no', 'Norwegian'),
|
| 40 |
+
Language('th', 'Thai'),
|
| 41 |
+
Language('ur', 'Urdu'),
|
| 42 |
+
Language('hr', 'Croatian'),
|
| 43 |
+
Language('bg', 'Bulgarian'),
|
| 44 |
+
Language('lt', 'Lithuanian'),
|
| 45 |
+
Language('la', 'Latin'),
|
| 46 |
+
Language('mi', 'Maori'),
|
| 47 |
+
Language('ml', 'Malayalam'),
|
| 48 |
+
Language('cy', 'Welsh'),
|
| 49 |
+
Language('sk', 'Slovak'),
|
| 50 |
+
Language('te', 'Telugu'),
|
| 51 |
+
Language('fa', 'Persian'),
|
| 52 |
+
Language('lv', 'Latvian'),
|
| 53 |
+
Language('bn', 'Bengali'),
|
| 54 |
+
Language('sr', 'Serbian'),
|
| 55 |
+
Language('az', 'Azerbaijani'),
|
| 56 |
+
Language('sl', 'Slovenian'),
|
| 57 |
+
Language('kn', 'Kannada'),
|
| 58 |
+
Language('et', 'Estonian'),
|
| 59 |
+
Language('mk', 'Macedonian'),
|
| 60 |
+
Language('br', 'Breton'),
|
| 61 |
+
Language('eu', 'Basque'),
|
| 62 |
+
Language('is', 'Icelandic'),
|
| 63 |
+
Language('hy', 'Armenian'),
|
| 64 |
+
Language('ne', 'Nepali'),
|
| 65 |
+
Language('mn', 'Mongolian'),
|
| 66 |
+
Language('bs', 'Bosnian'),
|
| 67 |
+
Language('kk', 'Kazakh'),
|
| 68 |
+
Language('sq', 'Albanian'),
|
| 69 |
+
Language('sw', 'Swahili'),
|
| 70 |
+
Language('gl', 'Galician'),
|
| 71 |
+
Language('mr', 'Marathi'),
|
| 72 |
+
Language('pa', 'Punjabi'),
|
| 73 |
+
Language('si', 'Sinhala'),
|
| 74 |
+
Language('km', 'Khmer'),
|
| 75 |
+
Language('sn', 'Shona'),
|
| 76 |
+
Language('yo', 'Yoruba'),
|
| 77 |
+
Language('so', 'Somali'),
|
| 78 |
+
Language('af', 'Afrikaans'),
|
| 79 |
+
Language('oc', 'Occitan'),
|
| 80 |
+
Language('ka', 'Georgian'),
|
| 81 |
+
Language('be', 'Belarusian'),
|
| 82 |
+
Language('tg', 'Tajik'),
|
| 83 |
+
Language('sd', 'Sindhi'),
|
| 84 |
+
Language('gu', 'Gujarati'),
|
| 85 |
+
Language('am', 'Amharic'),
|
| 86 |
+
Language('yi', 'Yiddish'),
|
| 87 |
+
Language('lo', 'Lao'),
|
| 88 |
+
Language('uz', 'Uzbek'),
|
| 89 |
+
Language('fo', 'Faroese'),
|
| 90 |
+
Language('ht', 'Haitian creole'),
|
| 91 |
+
Language('ps', 'Pashto'),
|
| 92 |
+
Language('tk', 'Turkmen'),
|
| 93 |
+
Language('nn', 'Nynorsk'),
|
| 94 |
+
Language('mt', 'Maltese'),
|
| 95 |
+
Language('sa', 'Sanskrit'),
|
| 96 |
+
Language('lb', 'Luxembourgish'),
|
| 97 |
+
Language('my', 'Myanmar'),
|
| 98 |
+
Language('bo', 'Tibetan'),
|
| 99 |
+
Language('tl', 'Tagalog'),
|
| 100 |
+
Language('mg', 'Malagasy'),
|
| 101 |
+
Language('as', 'Assamese'),
|
| 102 |
+
Language('tt', 'Tatar'),
|
| 103 |
+
Language('haw', 'Hawaiian'),
|
| 104 |
+
Language('ln', 'Lingala'),
|
| 105 |
+
Language('ha', 'Hausa'),
|
| 106 |
+
Language('ba', 'Bashkir'),
|
| 107 |
+
Language('jw', 'Javanese'),
|
| 108 |
+
Language('su', 'Sundanese')
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
_TO_LANGUAGE_CODE = {
|
| 112 |
+
**{language.code: language for language in LANGUAGES},
|
| 113 |
+
"burmese": "my",
|
| 114 |
+
"valencian": "ca",
|
| 115 |
+
"flemish": "nl",
|
| 116 |
+
"haitian": "ht",
|
| 117 |
+
"letzeburgesch": "lb",
|
| 118 |
+
"pushto": "ps",
|
| 119 |
+
"panjabi": "pa",
|
| 120 |
+
"moldavian": "ro",
|
| 121 |
+
"moldovan": "ro",
|
| 122 |
+
"sinhalese": "si",
|
| 123 |
+
"castilian": "es",
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
_FROM_LANGUAGE_NAME = {
|
| 127 |
+
**{language.name.lower(): language for language in LANGUAGES}
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
def get_language_from_code(language_code, default=None) -> Language:
|
| 131 |
+
"""Return the language name from the language code."""
|
| 132 |
+
return _TO_LANGUAGE_CODE.get(language_code, default)
|
| 133 |
+
|
| 134 |
+
def get_language_from_name(language, default=None) -> Language:
|
| 135 |
+
"""Return the language code from the language name."""
|
| 136 |
+
return _FROM_LANGUAGE_NAME.get(language.lower() if language else None, default)
|
| 137 |
+
|
| 138 |
+
def get_language_names():
|
| 139 |
+
"""Return a list of language names."""
|
| 140 |
+
return [language.name for language in LANGUAGES]
|
| 141 |
+
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
# Test lookup
|
| 144 |
+
print(get_language_from_code('en'))
|
| 145 |
+
print(get_language_from_name('English'))
|
| 146 |
+
|
| 147 |
+
print(get_language_names())
|
src/whisper/fasterWhisperContainer.py
CHANGED
|
@@ -4,6 +4,7 @@ from typing import List, Union
|
|
| 4 |
from faster_whisper import WhisperModel, download_model
|
| 5 |
from src.config import ModelConfig
|
| 6 |
from src.hooks.progressListener import ProgressListener
|
|
|
|
| 7 |
from src.modelCache import ModelCache
|
| 8 |
from src.whisper.abstractWhisperContainer import AbstractWhisperCallback, AbstractWhisperContainer
|
| 9 |
|
|
@@ -179,24 +180,9 @@ class FasterWhisperCallback(AbstractWhisperCallback):
|
|
| 179 |
return [int(token) for token in suppress_tokens.split(",")]
|
| 180 |
|
| 181 |
def _lookup_language_code(self, language: str):
|
| 182 |
-
|
| 183 |
-
"english": "en", "chinese": "zh", "german": "de", "spanish": "es", "russian": "ru", "korean": "ko",
|
| 184 |
-
"french": "fr", "japanese": "ja", "portuguese": "pt", "turkish": "tr", "polish": "pl", "catalan": "ca",
|
| 185 |
-
"dutch": "nl", "arabic": "ar", "swedish": "sv", "italian": "it", "indonesian": "id", "hindi": "hi",
|
| 186 |
-
"finnish": "fi", "vietnamese": "vi", "hebrew": "he", "ukrainian": "uk", "greek": "el", "malay": "ms",
|
| 187 |
-
"czech": "cs", "romanian": "ro", "danish": "da", "hungarian": "hu", "tamil": "ta", "norwegian": "no",
|
| 188 |
-
"thai": "th", "urdu": "ur", "croatian": "hr", "bulgarian": "bg", "lithuanian": "lt", "latin": "la",
|
| 189 |
-
"maori": "mi", "malayalam": "ml", "welsh": "cy", "slovak": "sk", "telugu": "te", "persian": "fa",
|
| 190 |
-
"latvian": "lv", "bengali": "bn", "serbian": "sr", "azerbaijani": "az", "slovenian": "sl",
|
| 191 |
-
"kannada": "kn", "estonian": "et", "macedonian": "mk", "breton": "br", "basque": "eu", "icelandic": "is",
|
| 192 |
-
"armenian": "hy", "nepali": "ne", "mongolian": "mn", "bosnian": "bs", "kazakh": "kk", "albanian": "sq",
|
| 193 |
-
"swahili": "sw", "galician": "gl", "marathi": "mr", "punjabi": "pa", "sinhala": "si", "khmer": "km",
|
| 194 |
-
"shona": "sn", "yoruba": "yo", "somali": "so", "afrikaans": "af", "occitan": "oc", "georgian": "ka",
|
| 195 |
-
"belarusian": "be", "tajik": "tg", "sindhi": "sd", "gujarati": "gu", "amharic": "am", "yiddish": "yi",
|
| 196 |
-
"lao": "lo", "uzbek": "uz", "faroese": "fo", "haitian creole": "ht", "pashto": "ps", "turkmen": "tk",
|
| 197 |
-
"nynorsk": "nn", "maltese": "mt", "sanskrit": "sa", "luxembourgish": "lb", "myanmar": "my", "tibetan": "bo",
|
| 198 |
-
"tagalog": "tl", "malagasy": "mg", "assamese": "as", "tatar": "tt", "hawaiian": "haw", "lingala": "ln",
|
| 199 |
-
"hausa": "ha", "bashkir": "ba", "javanese": "jv", "sundanese": "su"
|
| 200 |
-
}
|
| 201 |
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from faster_whisper import WhisperModel, download_model
|
| 5 |
from src.config import ModelConfig
|
| 6 |
from src.hooks.progressListener import ProgressListener
|
| 7 |
+
from src.languages import get_language_from_name
|
| 8 |
from src.modelCache import ModelCache
|
| 9 |
from src.whisper.abstractWhisperContainer import AbstractWhisperCallback, AbstractWhisperContainer
|
| 10 |
|
|
|
|
| 180 |
return [int(token) for token in suppress_tokens.split(",")]
|
| 181 |
|
| 182 |
def _lookup_language_code(self, language: str):
|
| 183 |
+
language = get_language_from_name(language)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
+
if language is None:
|
| 186 |
+
raise ValueError("Invalid language: " + language)
|
| 187 |
+
|
| 188 |
+
return language.code
|