Integration of progress bar with translation model compatibility
Browse files- app.py +27 -12
- src/hooks/progressListener.py +1 -1
- src/hooks/subTaskProgressListener.py +4 -4
- src/nllb/nllbModel.py +3 -0
- src/vad.py +6 -3
- src/vadParallel.py +2 -2
- src/whisper/fasterWhisperContainer.py +1 -1
app.py
CHANGED
|
@@ -156,13 +156,15 @@ class WhisperTranscriber:
|
|
| 156 |
word_timestamps=word_timestamps, prepend_punctuations=prepend_punctuations, append_punctuations=append_punctuations, highlight_words=highlight_words,
|
| 157 |
progress=progress)
|
| 158 |
|
| 159 |
-
def transcribe_webui(self, modelName, languageName, nllbModelName, nllbLangName, urlData, multipleFiles, microphoneData, task,
|
| 160 |
vadOptions: VadOptions, progress: gr.Progress = None, highlight_words: bool = False,
|
| 161 |
**decodeOptions: dict):
|
| 162 |
try:
|
|
|
|
| 163 |
sources = self.__get_source(urlData, multipleFiles, microphoneData)
|
| 164 |
|
| 165 |
try:
|
|
|
|
| 166 |
whisper_lang = get_language_from_name(languageName)
|
| 167 |
selectedLanguage = languageName.lower() if languageName is not None and len(languageName) > 0 else None
|
| 168 |
selectedModel = modelName if modelName is not None else "base"
|
|
@@ -170,13 +172,15 @@ class WhisperTranscriber:
|
|
| 170 |
model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
|
| 171 |
model_name=selectedModel, compute_type=self.app_config.compute_type,
|
| 172 |
cache=self.model_cache, models=self.app_config.models)
|
| 173 |
-
|
|
|
|
| 174 |
nllb_lang = get_nllb_lang_from_name(nllbLangName)
|
| 175 |
selectedNllbModelName = nllbModelName if nllbModelName is not None and len(nllbModelName) > 0 else "nllb-200-distilled-600M/facebook"
|
| 176 |
selectedNllbModel = next((modelConfig for modelConfig in self.app_config.nllb_models if modelConfig.name == selectedNllbModelName), None)
|
| 177 |
-
|
| 178 |
nllb_model = NllbModel(model_config=selectedNllbModel, whisper_lang=whisper_lang, nllb_lang=nllb_lang) # load_model=True
|
| 179 |
-
|
|
|
|
| 180 |
# Result
|
| 181 |
download = []
|
| 182 |
zip_file_lookup = {}
|
|
@@ -186,6 +190,7 @@ class WhisperTranscriber:
|
|
| 186 |
# Write result
|
| 187 |
downloadDirectory = tempfile.mkdtemp()
|
| 188 |
source_index = 0
|
|
|
|
| 189 |
|
| 190 |
outputDirectory = self.output_dir if self.output_dir is not None else downloadDirectory
|
| 191 |
|
|
@@ -195,9 +200,10 @@ class WhisperTranscriber:
|
|
| 195 |
|
| 196 |
# A listener that will report progress to Gradio
|
| 197 |
root_progress_listener = self._create_progress_listener(progress)
|
|
|
|
| 198 |
|
| 199 |
# Execute whisper
|
| 200 |
-
for source in sources:
|
| 201 |
source_prefix = ""
|
| 202 |
source_audio_duration = source.get_audio_duration()
|
| 203 |
|
|
@@ -208,9 +214,9 @@ class WhisperTranscriber:
|
|
| 208 |
print("Transcribing ", source.source_path)
|
| 209 |
|
| 210 |
scaled_progress_listener = SubTaskProgressListener(root_progress_listener,
|
| 211 |
-
base_task_total=
|
| 212 |
-
sub_task_start=
|
| 213 |
-
sub_task_total=
|
| 214 |
|
| 215 |
# Transcribe
|
| 216 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
|
|
@@ -219,7 +225,7 @@ class WhisperTranscriber:
|
|
| 219 |
# Update progress
|
| 220 |
current_progress += source_audio_duration
|
| 221 |
|
| 222 |
-
source_download, source_text, source_vtt = self.write_result(result, nllb_model, filePrefix, outputDirectory, highlight_words)
|
| 223 |
|
| 224 |
if len(sources) > 1:
|
| 225 |
# Add new line separators
|
|
@@ -377,9 +383,9 @@ class WhisperTranscriber:
|
|
| 377 |
def __init__(self, progress: gr.Progress):
|
| 378 |
self.progress = progress
|
| 379 |
|
| 380 |
-
def on_progress(self, current: Union[int, float], total: Union[int, float]):
|
| 381 |
# From 0 to 1
|
| 382 |
-
self.progress(current / total)
|
| 383 |
|
| 384 |
def on_finished(self):
|
| 385 |
self.progress(1)
|
|
@@ -435,7 +441,7 @@ class WhisperTranscriber:
|
|
| 435 |
|
| 436 |
return config
|
| 437 |
|
| 438 |
-
def write_result(self, result: dict, nllb_model: NllbModel, source_name: str, output_dir: str, highlight_words: bool = False):
|
| 439 |
if not os.path.exists(output_dir):
|
| 440 |
os.makedirs(output_dir)
|
| 441 |
|
|
@@ -446,6 +452,10 @@ class WhisperTranscriber:
|
|
| 446 |
|
| 447 |
if nllb_model.nllb_lang is not None:
|
| 448 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
pbar = tqdm.tqdm(total=len(segments))
|
| 450 |
perf_start_time = time.perf_counter()
|
| 451 |
nllb_model.load_model()
|
|
@@ -456,9 +466,14 @@ class WhisperTranscriber:
|
|
| 456 |
if nllb_model.nllb_lang is not None:
|
| 457 |
segment["text"] = nllb_model.translation(seg_text)
|
| 458 |
pbar.update(1)
|
|
|
|
| 459 |
|
| 460 |
nllb_model.release_vram()
|
| 461 |
perf_end_time = time.perf_counter()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
print("\n\nprocess segments took {} seconds.\n\n".format(perf_end_time - perf_start_time))
|
| 463 |
except Exception as e:
|
| 464 |
# Ignore error - it's just a cleanup
|
|
|
|
| 156 |
word_timestamps=word_timestamps, prepend_punctuations=prepend_punctuations, append_punctuations=append_punctuations, highlight_words=highlight_words,
|
| 157 |
progress=progress)
|
| 158 |
|
| 159 |
+
def transcribe_webui(self, modelName: str, languageName: str, nllbModelName: str, nllbLangName: str, urlData: str, multipleFiles, microphoneData: str, task: str,
|
| 160 |
vadOptions: VadOptions, progress: gr.Progress = None, highlight_words: bool = False,
|
| 161 |
**decodeOptions: dict):
|
| 162 |
try:
|
| 163 |
+
progress(0, desc="init audio sources")
|
| 164 |
sources = self.__get_source(urlData, multipleFiles, microphoneData)
|
| 165 |
|
| 166 |
try:
|
| 167 |
+
progress(0, desc="init whisper model")
|
| 168 |
whisper_lang = get_language_from_name(languageName)
|
| 169 |
selectedLanguage = languageName.lower() if languageName is not None and len(languageName) > 0 else None
|
| 170 |
selectedModel = modelName if modelName is not None else "base"
|
|
|
|
| 172 |
model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
|
| 173 |
model_name=selectedModel, compute_type=self.app_config.compute_type,
|
| 174 |
cache=self.model_cache, models=self.app_config.models)
|
| 175 |
+
|
| 176 |
+
progress(0, desc="init translate model")
|
| 177 |
nllb_lang = get_nllb_lang_from_name(nllbLangName)
|
| 178 |
selectedNllbModelName = nllbModelName if nllbModelName is not None and len(nllbModelName) > 0 else "nllb-200-distilled-600M/facebook"
|
| 179 |
selectedNllbModel = next((modelConfig for modelConfig in self.app_config.nllb_models if modelConfig.name == selectedNllbModelName), None)
|
| 180 |
+
|
| 181 |
nllb_model = NllbModel(model_config=selectedNllbModel, whisper_lang=whisper_lang, nllb_lang=nllb_lang) # load_model=True
|
| 182 |
+
|
| 183 |
+
progress(0, desc="init transcribe")
|
| 184 |
# Result
|
| 185 |
download = []
|
| 186 |
zip_file_lookup = {}
|
|
|
|
| 190 |
# Write result
|
| 191 |
downloadDirectory = tempfile.mkdtemp()
|
| 192 |
source_index = 0
|
| 193 |
+
extra_tasks_count = 1 if nllb_lang is not None else 0
|
| 194 |
|
| 195 |
outputDirectory = self.output_dir if self.output_dir is not None else downloadDirectory
|
| 196 |
|
|
|
|
| 200 |
|
| 201 |
# A listener that will report progress to Gradio
|
| 202 |
root_progress_listener = self._create_progress_listener(progress)
|
| 203 |
+
sub_task_total = 1/(len(sources)+extra_tasks_count*len(sources))
|
| 204 |
|
| 205 |
# Execute whisper
|
| 206 |
+
for idx, source in enumerate(sources):
|
| 207 |
source_prefix = ""
|
| 208 |
source_audio_duration = source.get_audio_duration()
|
| 209 |
|
|
|
|
| 214 |
print("Transcribing ", source.source_path)
|
| 215 |
|
| 216 |
scaled_progress_listener = SubTaskProgressListener(root_progress_listener,
|
| 217 |
+
base_task_total=1,
|
| 218 |
+
sub_task_start=idx*1/len(sources),
|
| 219 |
+
sub_task_total=sub_task_total)
|
| 220 |
|
| 221 |
# Transcribe
|
| 222 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
|
|
|
|
| 225 |
# Update progress
|
| 226 |
current_progress += source_audio_duration
|
| 227 |
|
| 228 |
+
source_download, source_text, source_vtt = self.write_result(result, nllb_model, filePrefix, outputDirectory, highlight_words, scaled_progress_listener)
|
| 229 |
|
| 230 |
if len(sources) > 1:
|
| 231 |
# Add new line separators
|
|
|
|
| 383 |
def __init__(self, progress: gr.Progress):
|
| 384 |
self.progress = progress
|
| 385 |
|
| 386 |
+
def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
|
| 387 |
# From 0 to 1
|
| 388 |
+
self.progress(current / total, desc=desc)
|
| 389 |
|
| 390 |
def on_finished(self):
|
| 391 |
self.progress(1)
|
|
|
|
| 441 |
|
| 442 |
return config
|
| 443 |
|
| 444 |
+
def write_result(self, result: dict, nllb_model: NllbModel, source_name: str, output_dir: str, highlight_words: bool = False, progressListener: ProgressListener = None):
|
| 445 |
if not os.path.exists(output_dir):
|
| 446 |
os.makedirs(output_dir)
|
| 447 |
|
|
|
|
| 452 |
|
| 453 |
if nllb_model.nllb_lang is not None:
|
| 454 |
try:
|
| 455 |
+
segments_progress_listener = SubTaskProgressListener(progressListener,
|
| 456 |
+
base_task_total=progressListener.sub_task_total,
|
| 457 |
+
sub_task_start=1,
|
| 458 |
+
sub_task_total=1)
|
| 459 |
pbar = tqdm.tqdm(total=len(segments))
|
| 460 |
perf_start_time = time.perf_counter()
|
| 461 |
nllb_model.load_model()
|
|
|
|
| 466 |
if nllb_model.nllb_lang is not None:
|
| 467 |
segment["text"] = nllb_model.translation(seg_text)
|
| 468 |
pbar.update(1)
|
| 469 |
+
segments_progress_listener.on_progress(idx+1, len(segments), "Process segments")
|
| 470 |
|
| 471 |
nllb_model.release_vram()
|
| 472 |
perf_end_time = time.perf_counter()
|
| 473 |
+
# Call the finished callback
|
| 474 |
+
if segments_progress_listener is not None:
|
| 475 |
+
segments_progress_listener.on_finished()
|
| 476 |
+
|
| 477 |
print("\n\nprocess segments took {} seconds.\n\n".format(perf_end_time - perf_start_time))
|
| 478 |
except Exception as e:
|
| 479 |
# Ignore error - it's just a cleanup
|
src/hooks/progressListener.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from typing import Union
|
| 2 |
|
| 3 |
class ProgressListener:
|
| 4 |
-
def on_progress(self, current: Union[int, float], total: Union[int, float]):
|
| 5 |
self.total = total
|
| 6 |
|
| 7 |
def on_finished(self):
|
|
|
|
| 1 |
from typing import Union
|
| 2 |
|
| 3 |
class ProgressListener:
|
| 4 |
+
def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
|
| 5 |
self.total = total
|
| 6 |
|
| 7 |
def on_finished(self):
|
src/hooks/subTaskProgressListener.py
CHANGED
|
@@ -25,13 +25,13 @@ class SubTaskProgressListener(ProgressListener):
|
|
| 25 |
):
|
| 26 |
self.base_task_listener = base_task_listener
|
| 27 |
self.base_task_total = base_task_total
|
| 28 |
-
self.sub_task_start = sub_task_start
|
| 29 |
-
self.sub_task_total = sub_task_total
|
| 30 |
|
| 31 |
-
def on_progress(self, current: Union[int, float], total: Union[int, float]):
|
| 32 |
sub_task_progress_frac = current / total
|
| 33 |
sub_task_progress = self.sub_task_start + self.sub_task_total * sub_task_progress_frac
|
| 34 |
-
self.base_task_listener.on_progress(sub_task_progress, self.base_task_total)
|
| 35 |
|
| 36 |
def on_finished(self):
|
| 37 |
self.base_task_listener.on_progress(self.sub_task_start + self.sub_task_total, self.base_task_total)
|
|
|
|
| 25 |
):
|
| 26 |
self.base_task_listener = base_task_listener
|
| 27 |
self.base_task_total = base_task_total
|
| 28 |
+
self.sub_task_start = base_task_total*sub_task_start
|
| 29 |
+
self.sub_task_total = base_task_total*sub_task_total
|
| 30 |
|
| 31 |
+
def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
|
| 32 |
sub_task_progress_frac = current / total
|
| 33 |
sub_task_progress = self.sub_task_start + self.sub_task_total * sub_task_progress_frac
|
| 34 |
+
self.base_task_listener.on_progress(sub_task_progress, self.base_task_total, desc=desc)
|
| 35 |
|
| 36 |
def on_finished(self):
|
| 37 |
self.base_task_listener.on_progress(self.sub_task_start + self.sub_task_total, self.base_task_total)
|
src/nllb/nllbModel.py
CHANGED
|
@@ -54,6 +54,9 @@ class NllbModel:
|
|
| 54 |
self.nllb_lang = nllb_lang
|
| 55 |
self.model_config = model_config
|
| 56 |
|
|
|
|
|
|
|
|
|
|
| 57 |
if os.path.isdir(model_config.url):
|
| 58 |
self.model_path = model_config.url
|
| 59 |
else:
|
|
|
|
| 54 |
self.nllb_lang = nllb_lang
|
| 55 |
self.model_config = model_config
|
| 56 |
|
| 57 |
+
if nllb_lang is None:
|
| 58 |
+
return
|
| 59 |
+
|
| 60 |
if os.path.isdir(model_config.url):
|
| 61 |
self.model_path = model_config.url
|
| 62 |
else:
|
src/vad.py
CHANGED
|
@@ -181,9 +181,10 @@ class AbstractTranscription(ABC):
|
|
| 181 |
# Calculate progress
|
| 182 |
progress_start_offset = merged[0]['start'] if len(merged) > 0 else 0
|
| 183 |
progress_total_duration = sum([segment['end'] - segment['start'] for segment in merged])
|
|
|
|
| 184 |
|
| 185 |
# For each time segment, run whisper
|
| 186 |
-
for segment in merged:
|
| 187 |
segment_index += 1
|
| 188 |
segment_start = segment['start']
|
| 189 |
segment_end = segment['end']
|
|
@@ -208,8 +209,10 @@ class AbstractTranscription(ABC):
|
|
| 208 |
|
| 209 |
perf_start_time = time.perf_counter()
|
| 210 |
|
| 211 |
-
scaled_progress_listener = SubTaskProgressListener(progressListener,
|
| 212 |
-
|
|
|
|
|
|
|
| 213 |
segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
|
| 214 |
|
| 215 |
perf_end_time = time.perf_counter()
|
|
|
|
| 181 |
# Calculate progress
|
| 182 |
progress_start_offset = merged[0]['start'] if len(merged) > 0 else 0
|
| 183 |
progress_total_duration = sum([segment['end'] - segment['start'] for segment in merged])
|
| 184 |
+
sub_task_total = 1/len(merged)
|
| 185 |
|
| 186 |
# For each time segment, run whisper
|
| 187 |
+
for idx, segment in enumerate(merged):
|
| 188 |
segment_index += 1
|
| 189 |
segment_start = segment['start']
|
| 190 |
segment_end = segment['end']
|
|
|
|
| 209 |
|
| 210 |
perf_start_time = time.perf_counter()
|
| 211 |
|
| 212 |
+
scaled_progress_listener = SubTaskProgressListener(progressListener,
|
| 213 |
+
base_task_total=progressListener.sub_task_total if isinstance(progressListener, SubTaskProgressListener) else progress_total_duration,
|
| 214 |
+
sub_task_start=idx*(1/len(merged)),
|
| 215 |
+
sub_task_total=1/len(merged))
|
| 216 |
segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
|
| 217 |
|
| 218 |
perf_end_time = time.perf_counter()
|
src/vadParallel.py
CHANGED
|
@@ -18,7 +18,7 @@ class _ProgressListenerToQueue(ProgressListener):
|
|
| 18 |
self.progress_total = 0
|
| 19 |
self.prev_progress = 0
|
| 20 |
|
| 21 |
-
def on_progress(self, current: Union[int, float], total: Union[int, float]):
|
| 22 |
delta = current - self.prev_progress
|
| 23 |
self.prev_progress = current
|
| 24 |
self.progress_total = total
|
|
@@ -178,7 +178,7 @@ class ParallelTranscription(AbstractTranscription):
|
|
| 178 |
|
| 179 |
total_progress += delta
|
| 180 |
if progress_listener is not None:
|
| 181 |
-
progress_listener.on_progress(total_progress, total_duration)
|
| 182 |
|
| 183 |
results = results_async.get()
|
| 184 |
|
|
|
|
| 18 |
self.progress_total = 0
|
| 19 |
self.prev_progress = 0
|
| 20 |
|
| 21 |
+
def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
|
| 22 |
delta = current - self.prev_progress
|
| 23 |
self.prev_progress = current
|
| 24 |
self.progress_total = total
|
|
|
|
| 178 |
|
| 179 |
total_progress += delta
|
| 180 |
if progress_listener is not None:
|
| 181 |
+
progress_listener.on_progress(total_progress, total_duration, desc="Transcribe parallel")
|
| 182 |
|
| 183 |
results = results_async.get()
|
| 184 |
|
src/whisper/fasterWhisperContainer.py
CHANGED
|
@@ -150,7 +150,7 @@ class FasterWhisperCallback(AbstractWhisperCallback):
|
|
| 150 |
segments.append(segment)
|
| 151 |
|
| 152 |
if progress_listener is not None:
|
| 153 |
-
progress_listener.on_progress(segment.end, info.duration)
|
| 154 |
if verbose:
|
| 155 |
print("[{}->{}] {}".format(format_timestamp(segment.start, True), format_timestamp(segment.end, True),
|
| 156 |
segment.text))
|
|
|
|
| 150 |
segments.append(segment)
|
| 151 |
|
| 152 |
if progress_listener is not None:
|
| 153 |
+
progress_listener.on_progress(segment.end, info.duration, "Transcribe")
|
| 154 |
if verbose:
|
| 155 |
print("[{}->{}] {}".format(format_timestamp(segment.start, True), format_timestamp(segment.end, True),
|
| 156 |
segment.text))
|