Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Fix WHISPER_IMPLEMENTATION argument
Browse files- app.py +24 -19
- cli.py +7 -3
- dockerfile +12 -2
- requirements-fastWhisper.txt → requirements-fasterWhisper.txt +2 -1
- src/whisper/whisperFactory.py +2 -0
    	
        app.py
    CHANGED
    
    | @@ -125,7 +125,7 @@ class WhisperTranscriber: | |
| 125 | 
             
                            selectedLanguage = languageName.lower() if len(languageName) > 0 else None
         | 
| 126 | 
             
                            selectedModel = modelName if modelName is not None else "base"
         | 
| 127 |  | 
| 128 | 
            -
                            model = create_whisper_container(whisper_implementation=app_config.whisper_implementation, 
         | 
| 129 | 
             
                                                             model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
         | 
| 130 |  | 
| 131 | 
             
                            # Result
         | 
| @@ -485,38 +485,43 @@ def create_ui(app_config: ApplicationConfig): | |
| 485 | 
             
                ui.close()
         | 
| 486 |  | 
| 487 | 
             
            if __name__ == '__main__':
         | 
| 488 | 
            -
                 | 
| 489 | 
            -
                whisper_models =  | 
|  | |
|  | |
|  | |
| 490 |  | 
| 491 | 
             
                parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
         | 
| 492 | 
            -
                parser.add_argument("--input_audio_max_duration", type=int, default= | 
| 493 | 
             
                                    help="Maximum audio file length in seconds, or -1 for no limit.") # 600
         | 
| 494 | 
            -
                parser.add_argument("--share", type=bool, default= | 
| 495 | 
             
                                    help="True to share the app on HuggingFace.") # False
         | 
| 496 | 
            -
                parser.add_argument("--server_name", type=str, default= | 
| 497 | 
             
                                    help="The host or IP to bind to. If None, bind to localhost.") # None
         | 
| 498 | 
            -
                parser.add_argument("--server_port", type=int, default= | 
| 499 | 
             
                                    help="The port to bind to.") # 7860
         | 
| 500 | 
            -
                parser.add_argument("--queue_concurrency_count", type=int, default= | 
| 501 | 
             
                                    help="The number of concurrent requests to process.") # 1
         | 
| 502 | 
            -
                parser.add_argument("--default_model_name", type=str, choices=whisper_models, default= | 
| 503 | 
             
                                    help="The default model name.") # medium
         | 
| 504 | 
            -
                parser.add_argument("--default_vad", type=str, default= | 
| 505 | 
             
                                    help="The default VAD.") # silero-vad
         | 
| 506 | 
            -
                parser.add_argument("--vad_parallel_devices", type=str, default= | 
| 507 | 
             
                                    help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
         | 
| 508 | 
            -
                parser.add_argument("--vad_cpu_cores", type=int, default= | 
| 509 | 
             
                                    help="The number of CPU cores to use for VAD pre-processing.") # 1
         | 
| 510 | 
            -
                parser.add_argument("--vad_process_timeout", type=float, default= | 
| 511 | 
             
                                    help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.") # 1800
         | 
| 512 | 
            -
                parser.add_argument("--auto_parallel", type=bool, default= | 
| 513 | 
             
                                    help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
         | 
| 514 | 
            -
                parser.add_argument("--output_dir", "-o", type=str, default= | 
| 515 | 
            -
                                    help="directory to save the outputs") | 
| 516 | 
            -
                parser.add_argument("--whisper_implementation", type=str, default= | 
| 517 | 
            -
                                    help="the Whisper implementation to use") | 
| 518 |  | 
| 519 | 
             
                args = parser.parse_args().__dict__
         | 
| 520 |  | 
| 521 | 
            -
                updated_config =  | 
|  | |
|  | |
| 522 | 
             
                create_ui(app_config=updated_config)
         | 
|  | |
| 125 | 
             
                            selectedLanguage = languageName.lower() if len(languageName) > 0 else None
         | 
| 126 | 
             
                            selectedModel = modelName if modelName is not None else "base"
         | 
| 127 |  | 
| 128 | 
            +
                            model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation, 
         | 
| 129 | 
             
                                                             model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
         | 
| 130 |  | 
| 131 | 
             
                            # Result
         | 
|  | |
| 485 | 
             
                ui.close()
         | 
| 486 |  | 
| 487 | 
             
            if __name__ == '__main__':
         | 
| 488 | 
            +
                default_app_config = ApplicationConfig.create_default()
         | 
| 489 | 
            +
                whisper_models = default_app_config.get_model_names()
         | 
| 490 | 
            +
             | 
| 491 | 
            +
                # Environment variable overrides
         | 
| 492 | 
            +
                default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", default_app_config.whisper_implementation)
         | 
| 493 |  | 
| 494 | 
             
                parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
         | 
| 495 | 
            +
                parser.add_argument("--input_audio_max_duration", type=int, default=default_app_config.input_audio_max_duration, \
         | 
| 496 | 
             
                                    help="Maximum audio file length in seconds, or -1 for no limit.") # 600
         | 
| 497 | 
            +
                parser.add_argument("--share", type=bool, default=default_app_config.share, \
         | 
| 498 | 
             
                                    help="True to share the app on HuggingFace.") # False
         | 
| 499 | 
            +
                parser.add_argument("--server_name", type=str, default=default_app_config.server_name, \
         | 
| 500 | 
             
                                    help="The host or IP to bind to. If None, bind to localhost.") # None
         | 
| 501 | 
            +
                parser.add_argument("--server_port", type=int, default=default_app_config.server_port, \
         | 
| 502 | 
             
                                    help="The port to bind to.") # 7860
         | 
| 503 | 
            +
                parser.add_argument("--queue_concurrency_count", type=int, default=default_app_config.queue_concurrency_count, \
         | 
| 504 | 
             
                                    help="The number of concurrent requests to process.") # 1
         | 
| 505 | 
            +
                parser.add_argument("--default_model_name", type=str, choices=whisper_models, default=default_app_config.default_model_name, \
         | 
| 506 | 
             
                                    help="The default model name.") # medium
         | 
| 507 | 
            +
                parser.add_argument("--default_vad", type=str, default=default_app_config.default_vad, \
         | 
| 508 | 
             
                                    help="The default VAD.") # silero-vad
         | 
| 509 | 
            +
                parser.add_argument("--vad_parallel_devices", type=str, default=default_app_config.vad_parallel_devices, \
         | 
| 510 | 
             
                                    help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
         | 
| 511 | 
            +
                parser.add_argument("--vad_cpu_cores", type=int, default=default_app_config.vad_cpu_cores, \
         | 
| 512 | 
             
                                    help="The number of CPU cores to use for VAD pre-processing.") # 1
         | 
| 513 | 
            +
                parser.add_argument("--vad_process_timeout", type=float, default=default_app_config.vad_process_timeout, \
         | 
| 514 | 
             
                                    help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.") # 1800
         | 
| 515 | 
            +
                parser.add_argument("--auto_parallel", type=bool, default=default_app_config.auto_parallel, \
         | 
| 516 | 
             
                                    help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
         | 
| 517 | 
            +
                parser.add_argument("--output_dir", "-o", type=str, default=default_app_config.output_dir, \
         | 
| 518 | 
            +
                                    help="directory to save the outputs")
         | 
| 519 | 
            +
                parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
         | 
| 520 | 
            +
                                    help="the Whisper implementation to use")
         | 
| 521 |  | 
| 522 | 
             
                args = parser.parse_args().__dict__
         | 
| 523 |  | 
| 524 | 
            +
                updated_config = default_app_config.update(**args)
         | 
| 525 | 
            +
             | 
| 526 | 
            +
                print(f"Using {updated_config.whisper_implementation} for Whisper")
         | 
| 527 | 
             
                create_ui(app_config=updated_config)
         | 
    	
        cli.py
    CHANGED
    
    | @@ -20,6 +20,9 @@ def cli(): | |
| 20 | 
             
                # For the CLI, we fallback to saving the output to the current directory
         | 
| 21 | 
             
                output_dir = app_config.output_dir if app_config.output_dir is not None else "."
         | 
| 22 |  | 
|  | |
|  | |
|  | |
| 23 | 
             
                parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
         | 
| 24 | 
             
                parser.add_argument("audio", nargs="+", type=str, \
         | 
| 25 | 
             
                                    help="audio file(s) to transcribe")
         | 
| @@ -32,9 +35,9 @@ def cli(): | |
| 32 | 
             
                parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
         | 
| 33 | 
             
                                    help="directory to save the outputs")
         | 
| 34 | 
             
                parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
         | 
| 35 | 
            -
                                    help="whether to print out the progress and debug messages") | 
| 36 | 
            -
                parser.add_argument("--whisper_implementation", type=str, default= | 
| 37 | 
            -
                                    help="the Whisper implementation to use") | 
| 38 |  | 
| 39 | 
             
                parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
         | 
| 40 | 
             
                                    help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
         | 
| @@ -95,6 +98,7 @@ def cli(): | |
| 95 | 
             
                os.makedirs(output_dir, exist_ok=True)
         | 
| 96 |  | 
| 97 | 
             
                whisper_implementation = args.pop("whisper_implementation")
         | 
|  | |
| 98 |  | 
| 99 | 
             
                if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
         | 
| 100 | 
             
                    warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")
         | 
|  | |
| 20 | 
             
                # For the CLI, we fallback to saving the output to the current directory
         | 
| 21 | 
             
                output_dir = app_config.output_dir if app_config.output_dir is not None else "."
         | 
| 22 |  | 
| 23 | 
            +
                # Environment variable overrides
         | 
| 24 | 
            +
                default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", app_config.whisper_implementation)
         | 
| 25 | 
            +
             | 
| 26 | 
             
                parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
         | 
| 27 | 
             
                parser.add_argument("audio", nargs="+", type=str, \
         | 
| 28 | 
             
                                    help="audio file(s) to transcribe")
         | 
|  | |
| 35 | 
             
                parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
         | 
| 36 | 
             
                                    help="directory to save the outputs")
         | 
| 37 | 
             
                parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
         | 
| 38 | 
            +
                                    help="whether to print out the progress and debug messages")
         | 
| 39 | 
            +
                parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
         | 
| 40 | 
            +
                                    help="the Whisper implementation to use")
         | 
| 41 |  | 
| 42 | 
             
                parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
         | 
| 43 | 
             
                                    help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
         | 
|  | |
| 98 | 
             
                os.makedirs(output_dir, exist_ok=True)
         | 
| 99 |  | 
| 100 | 
             
                whisper_implementation = args.pop("whisper_implementation")
         | 
| 101 | 
            +
                print(f"Using {whisper_implementation} for Whisper")
         | 
| 102 |  | 
| 103 | 
             
                if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
         | 
| 104 | 
             
                    warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")
         | 
    	
        dockerfile
    CHANGED
    
    | @@ -1,13 +1,23 @@ | |
|  | |
|  | |
| 1 | 
             
            FROM huggingface/transformers-pytorch-gpu
         | 
| 2 | 
             
            EXPOSE 7860
         | 
| 3 |  | 
|  | |
|  | |
|  | |
| 4 | 
             
            ADD . /opt/whisper-webui/
         | 
| 5 |  | 
| 6 | 
             
            # Latest version of transformers-pytorch-gpu seems to lack tk. 
         | 
| 7 | 
             
            # Further, pip install fails, so we must upgrade pip first.
         | 
| 8 | 
             
            RUN apt-get -y install python3-tk
         | 
| 9 | 
            -
            RUN  python3 -m pip install --upgrade pip | 
| 10 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 11 |  | 
| 12 | 
             
            # Note: Models will be downloaded on demand to the directory /root/.cache/whisper.
         | 
| 13 | 
             
            # You can also bind this directory in the container to somewhere on the host.
         | 
|  | |
| 1 | 
            +
            # docker build -t whisper-webui --build-arg WHISPER_IMPLEMENTATION=whisper .
         | 
| 2 | 
            +
             | 
| 3 | 
             
            FROM huggingface/transformers-pytorch-gpu
         | 
| 4 | 
             
            EXPOSE 7860
         | 
| 5 |  | 
| 6 | 
            +
            ARG WHISPER_IMPLEMENTATION=whisper
         | 
| 7 | 
            +
            ENV WHISPER_IMPLEMENTATION=${WHISPER_IMPLEMENTATION}
         | 
| 8 | 
            +
             | 
| 9 | 
             
            ADD . /opt/whisper-webui/
         | 
| 10 |  | 
| 11 | 
             
            # Latest version of transformers-pytorch-gpu seems to lack tk. 
         | 
| 12 | 
             
            # Further, pip install fails, so we must upgrade pip first.
         | 
| 13 | 
             
            RUN apt-get -y install python3-tk
         | 
| 14 | 
            +
            RUN  python3 -m pip install --upgrade pip
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            RUN if [ "${WHISPER_IMPLEMENTATION}" = "whisper" ]; then \
         | 
| 17 | 
            +
                python3 -m pip install -r /opt/whisper-webui/requirements.txt; \
         | 
| 18 | 
            +
              else \
         | 
| 19 | 
            +
                python3 -m pip install -r /opt/whisper-webui/requirements-fasterWhisper.txt; \
         | 
| 20 | 
            +
              fi
         | 
| 21 |  | 
| 22 | 
             
            # Note: Models will be downloaded on demand to the directory /root/.cache/whisper.
         | 
| 23 | 
             
            # You can also bind this directory in the container to somewhere on the host.
         | 
    	
        requirements-fastWhisper.txt → requirements-fasterWhisper.txt
    RENAMED
    
    | @@ -5,4 +5,5 @@ gradio==3.23.0 | |
| 5 | 
             
            yt-dlp
         | 
| 6 | 
             
            json5
         | 
| 7 | 
             
            torch
         | 
| 8 | 
            -
            torchaudio
         | 
|  | 
|  | |
| 5 | 
             
            yt-dlp
         | 
| 6 | 
             
            json5
         | 
| 7 | 
             
            torch
         | 
| 8 | 
            +
            torchaudio
         | 
| 9 | 
            +
            more_itertools
         | 
    	
        src/whisper/whisperFactory.py
    CHANGED
    
    | @@ -6,6 +6,8 @@ from src.whisper.abstractWhisperContainer import AbstractWhisperContainer | |
| 6 | 
             
            def create_whisper_container(whisper_implementation: str, 
         | 
| 7 | 
             
                                         model_name: str, device: str = None, download_root: str = None,
         | 
| 8 | 
             
                                         cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
         | 
|  | |
|  | |
| 9 | 
             
                if (whisper_implementation == "whisper"):
         | 
| 10 | 
             
                    from src.whisper.whisperContainer import WhisperContainer
         | 
| 11 | 
             
                    return WhisperContainer(model_name, device, download_root, cache, models)
         | 
|  | |
| 6 | 
             
            def create_whisper_container(whisper_implementation: str, 
         | 
| 7 | 
             
                                         model_name: str, device: str = None, download_root: str = None,
         | 
| 8 | 
             
                                         cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
         | 
| 9 | 
            +
                print("Creating whisper container for " + whisper_implementation)
         | 
| 10 | 
            +
             | 
| 11 | 
             
                if (whisper_implementation == "whisper"):
         | 
| 12 | 
             
                    from src.whisper.whisperContainer import WhisperContainer
         | 
| 13 | 
             
                    return WhisperContainer(model_name, device, download_root, cache, models)
         |