Spaces:
Runtime error
Runtime error
Add output_dir to the WebUI
Browse files
app.py
CHANGED
|
@@ -60,7 +60,8 @@ LANGUAGES = [
|
|
| 60 |
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2"]
|
| 61 |
|
| 62 |
class WhisperTranscriber:
|
| 63 |
-
def __init__(self, input_audio_max_duration: float = DEFAULT_INPUT_AUDIO_MAX_DURATION, vad_process_timeout: float = None,
|
|
|
|
| 64 |
self.model_cache = ModelCache()
|
| 65 |
self.parallel_device_list = None
|
| 66 |
self.gpu_parallel_context = None
|
|
@@ -71,6 +72,7 @@ class WhisperTranscriber:
|
|
| 71 |
self.vad_model = None
|
| 72 |
self.inputAudioMaxDuration = input_audio_max_duration
|
| 73 |
self.deleteUploadedFiles = delete_uploaded_files
|
|
|
|
| 74 |
|
| 75 |
def set_parallel_devices(self, vad_parallel_devices: str):
|
| 76 |
self.parallel_device_list = [ device.strip() for device in vad_parallel_devices.split(",") ] if vad_parallel_devices else None
|
|
@@ -103,6 +105,8 @@ class WhisperTranscriber:
|
|
| 103 |
downloadDirectory = tempfile.mkdtemp()
|
| 104 |
source_index = 0
|
| 105 |
|
|
|
|
|
|
|
| 106 |
# Execute whisper
|
| 107 |
for source in sources:
|
| 108 |
source_prefix = ""
|
|
@@ -117,7 +121,7 @@ class WhisperTranscriber:
|
|
| 117 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow)
|
| 118 |
filePrefix = slugify(source_prefix + source.get_short_name(), allow_unicode=True)
|
| 119 |
|
| 120 |
-
source_download, source_text, source_vtt = self.write_result(result, filePrefix,
|
| 121 |
|
| 122 |
if len(sources) > 1:
|
| 123 |
# Add new line separators
|
|
@@ -332,8 +336,10 @@ class WhisperTranscriber:
|
|
| 332 |
|
| 333 |
|
| 334 |
def create_ui(input_audio_max_duration, share=False, server_name: str = None, server_port: int = 7860,
|
| 335 |
-
default_model_name: str = "medium", default_vad: str = None, vad_parallel_devices: str = None,
|
| 336 |
-
|
|
|
|
|
|
|
| 337 |
|
| 338 |
# Specify a list of devices to use for parallel processing
|
| 339 |
ui.set_parallel_devices(vad_parallel_devices)
|
|
@@ -385,6 +391,7 @@ if __name__ == '__main__':
|
|
| 385 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
| 386 |
parser.add_argument("--vad_process_timeout", type=float, default="1800", help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
|
| 387 |
parser.add_argument("--auto_parallel", type=bool, default=False, help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.")
|
|
|
|
| 388 |
|
| 389 |
args = parser.parse_args().__dict__
|
| 390 |
create_ui(**args)
|
|
|
|
| 60 |
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2"]
|
| 61 |
|
| 62 |
class WhisperTranscriber:
|
| 63 |
+
def __init__(self, input_audio_max_duration: float = DEFAULT_INPUT_AUDIO_MAX_DURATION, vad_process_timeout: float = None,
|
| 64 |
+
vad_cpu_cores: int = 1, delete_uploaded_files: bool = DELETE_UPLOADED_FILES, output_dir: str = None):
|
| 65 |
self.model_cache = ModelCache()
|
| 66 |
self.parallel_device_list = None
|
| 67 |
self.gpu_parallel_context = None
|
|
|
|
| 72 |
self.vad_model = None
|
| 73 |
self.inputAudioMaxDuration = input_audio_max_duration
|
| 74 |
self.deleteUploadedFiles = delete_uploaded_files
|
| 75 |
+
self.output_dir = output_dir
|
| 76 |
|
| 77 |
def set_parallel_devices(self, vad_parallel_devices: str):
|
| 78 |
self.parallel_device_list = [ device.strip() for device in vad_parallel_devices.split(",") ] if vad_parallel_devices else None
|
|
|
|
| 105 |
downloadDirectory = tempfile.mkdtemp()
|
| 106 |
source_index = 0
|
| 107 |
|
| 108 |
+
outputDirectory = self.output_dir if self.output_dir is not None else downloadDirectory
|
| 109 |
+
|
| 110 |
# Execute whisper
|
| 111 |
for source in sources:
|
| 112 |
source_prefix = ""
|
|
|
|
| 121 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow)
|
| 122 |
filePrefix = slugify(source_prefix + source.get_short_name(), allow_unicode=True)
|
| 123 |
|
| 124 |
+
source_download, source_text, source_vtt = self.write_result(result, filePrefix, outputDirectory)
|
| 125 |
|
| 126 |
if len(sources) > 1:
|
| 127 |
# Add new line separators
|
|
|
|
| 336 |
|
| 337 |
|
| 338 |
def create_ui(input_audio_max_duration, share=False, server_name: str = None, server_port: int = 7860,
|
| 339 |
+
default_model_name: str = "medium", default_vad: str = None, vad_parallel_devices: str = None,
|
| 340 |
+
vad_process_timeout: float = None, vad_cpu_cores: int = 1, auto_parallel: bool = False,
|
| 341 |
+
output_dir: str = None):
|
| 342 |
+
ui = WhisperTranscriber(input_audio_max_duration, vad_process_timeout, vad_cpu_cores, DELETE_UPLOADED_FILES, output_dir)
|
| 343 |
|
| 344 |
# Specify a list of devices to use for parallel processing
|
| 345 |
ui.set_parallel_devices(vad_parallel_devices)
|
|
|
|
| 391 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
| 392 |
parser.add_argument("--vad_process_timeout", type=float, default="1800", help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
|
| 393 |
parser.add_argument("--auto_parallel", type=bool, default=False, help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.")
|
| 394 |
+
parser.add_argument("--output_dir", "-o", type=str, default=None, help="directory to save the outputs")
|
| 395 |
|
| 396 |
args = parser.parse_args().__dict__
|
| 397 |
create_ui(**args)
|