whisper-webui3

Paused

App Files Files Community

aadnk commited on Mar 29, 2023

Commit

33ee1bb

1 Parent(s): 2698c96

Support CLI into faster-whisper

Browse files

Files changed (8) hide show

app.py +4 -1
cli.py +6 -2
config.json5 +3 -1
src/config.py +2 -3
src/whisper/abstractWhisperContainer.py +12 -3
src/whisper/fasterWhisperContainer.py +41 -8
src/whisper/whisperContainer.py +14 -4
src/whisper/whisperFactory.py +4 -3

app.py CHANGED Viewed

@@ -126,7 +126,8 @@ class WhisperTranscriber:
                 selectedModel = modelName if modelName is not None else "base"
                 model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
-                                                 model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
                 # Result
                 download = []
@@ -518,6 +519,8 @@ if __name__ == '__main__':
                         help="directory to save the outputs")
     parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
                         help="the Whisper implementation to use")
     args = parser.parse_args().__dict__

                 selectedModel = modelName if modelName is not None else "base"
                 model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
+                                                 model_name=selectedModel, compute_type=self.app_config.compute_type,
+                                                 cache=self.model_cache, models=self.app_config.models)
                 # Result
                 download = []
                         help="directory to save the outputs")
     parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
                         help="the Whisper implementation to use")
+    parser.add_argument("--compute_type", type=str, default=default_app_config.compute_type, choices=["int8", "int8_float16", "int16", "float16"], \
+                        help="the compute type to use for inference")
     args = parser.parse_args().__dict__

cli.py CHANGED Viewed

@@ -80,6 +80,8 @@ def cli():
                         help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop")
     parser.add_argument("--fp16", type=str2bool, default=app_config.fp16, \
                         help="whether to perform inference in fp16; True by default")
     parser.add_argument("--temperature_increment_on_fallback", type=optional_float, default=app_config.temperature_increment_on_fallback, \
                         help="temperature to increase when falling back when the decoding fails to meet either of the thresholds below")
@@ -119,12 +121,14 @@ def cli():
     vad_cpu_cores = args.pop("vad_cpu_cores")
     auto_parallel = args.pop("auto_parallel")
     transcriber = WhisperTranscriber(delete_uploaded_files=False, vad_cpu_cores=vad_cpu_cores, app_config=app_config)
     transcriber.set_parallel_devices(args.pop("vad_parallel_devices"))
     transcriber.set_auto_parallel(auto_parallel)
-    model = create_whisper_container(whisper_implementation=whisper_implementation,
-                                     device=device, download_root=model_dir, models=app_config.models)
     if (transcriber._has_parallel_devices()):
         print("Using parallel devices:", transcriber.parallel_device_list)

                         help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop")
     parser.add_argument("--fp16", type=str2bool, default=app_config.fp16, \
                         help="whether to perform inference in fp16; True by default")
+    parser.add_argument("--compute_type", type=str, default=app_config.compute_type, choices=["int8", "int8_float16", "int16", "float16"], \
+                        help="the compute type to use for inference")
     parser.add_argument("--temperature_increment_on_fallback", type=optional_float, default=app_config.temperature_increment_on_fallback, \
                         help="temperature to increase when falling back when the decoding fails to meet either of the thresholds below")
     vad_cpu_cores = args.pop("vad_cpu_cores")
     auto_parallel = args.pop("auto_parallel")
+    compute_type = args.pop("compute_type")
     transcriber = WhisperTranscriber(delete_uploaded_files=False, vad_cpu_cores=vad_cpu_cores, app_config=app_config)
     transcriber.set_parallel_devices(args.pop("vad_parallel_devices"))
     transcriber.set_auto_parallel(auto_parallel)
+    model = create_whisper_container(whisper_implementation=whisper_implementation, model_name=model_name,
+                                     device=device, compute_type=compute_type, download_root=model_dir, models=app_config.models)
     if (transcriber._has_parallel_devices()):
         print("Using parallel devices:", transcriber.parallel_device_list)

config.json5 CHANGED Viewed

@@ -104,7 +104,7 @@
     // Number of beams in beam search, only applicable when temperature is zero
     "beam_size": 5,
     // Optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search
-    "patience": null,
     // Optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default
     "length_penalty": null,
     // Comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations
@@ -115,6 +115,8 @@
     "condition_on_previous_text": true,
     // Whether to perform inference in fp16; True by default
     "fp16": true,
     // Temperature to increase when falling back when the decoding fails to meet either of the thresholds below
     "temperature_increment_on_fallback": 0.2,
     // If the gzip compression ratio is higher than this value, treat the decoding as failed

     // Number of beams in beam search, only applicable when temperature is zero
     "beam_size": 5,
     // Optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search
+    "patience": 1,
     // Optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default
     "length_penalty": null,
     // Comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations
     "condition_on_previous_text": true,
     // Whether to perform inference in fp16; True by default
     "fp16": true,
+    // The compute type used by faster-whisper. Can be "int8". "int16" or "float16".
+    "compute_type": "float16",
     // Temperature to increase when falling back when the decoding fails to meet either of the thresholds below
     "temperature_increment_on_fallback": 0.2,
     // If the gzip compression ratio is higher than this value, treat the decoding as failed

src/config.py CHANGED Viewed

@@ -39,12 +39,10 @@ class ApplicationConfig:
                  patience: float = None, length_penalty: float = None,
                  suppress_tokens: str = "-1", initial_prompt: str = None,
                  condition_on_previous_text: bool = True, fp16: bool = True,
                  temperature_increment_on_fallback: float = 0.2, compression_ratio_threshold: float = 2.4,
                  logprob_threshold: float = -1.0, no_speech_threshold: float = 0.6):
-        if device is None:
-            device = "cuda" if torch.cuda.is_available() else "cpu"
         self.models = models
         # WebUI settings
@@ -82,6 +80,7 @@ class ApplicationConfig:
         self.initial_prompt = initial_prompt
         self.condition_on_previous_text = condition_on_previous_text
         self.fp16 = fp16
         self.temperature_increment_on_fallback = temperature_increment_on_fallback
         self.compression_ratio_threshold = compression_ratio_threshold
         self.logprob_threshold = logprob_threshold

                  patience: float = None, length_penalty: float = None,
                  suppress_tokens: str = "-1", initial_prompt: str = None,
                  condition_on_previous_text: bool = True, fp16: bool = True,
+                 compute_type: str = "float16",
                  temperature_increment_on_fallback: float = 0.2, compression_ratio_threshold: float = 2.4,
                  logprob_threshold: float = -1.0, no_speech_threshold: float = 0.6):
         self.models = models
         # WebUI settings
         self.initial_prompt = initial_prompt
         self.condition_on_previous_text = condition_on_previous_text
         self.fp16 = fp16
+        self.compute_type = compute_type
         self.temperature_increment_on_fallback = temperature_increment_on_fallback
         self.compression_ratio_threshold = compression_ratio_threshold
         self.logprob_threshold = logprob_threshold

src/whisper/abstractWhisperContainer.py CHANGED Viewed

@@ -33,10 +33,12 @@ class AbstractWhisperCallback:
             return prompt1 + " " + prompt2
 class AbstractWhisperContainer:
-    def __init__(self, model_name: str, device: str = None, download_root: str = None,
-                       cache: ModelCache = None, models: List[ModelConfig] = []):
         self.model_name = model_name
         self.device = device
         self.download_root = download_root
         self.cache = cache
@@ -87,13 +89,20 @@ class AbstractWhisperContainer:
     # This is required for multiprocessing
     def __getstate__(self):
-        return { "model_name": self.model_name, "device": self.device, "download_root": self.download_root, "models": self.models }
     def __setstate__(self, state):
         self.model_name = state["model_name"]
         self.device = state["device"]
         self.download_root = state["download_root"]
         self.models = state["models"]
         self.model = None
         # Depickled objects must use the global cache
         self.cache = GLOBAL_MODEL_CACHE

             return prompt1 + " " + prompt2
 class AbstractWhisperContainer:
+    def __init__(self, model_name: str, device: str = None, compute_type: str = "float16",
+                 download_root: str = None,
+                 cache: ModelCache = None, models: List[ModelConfig] = []):
         self.model_name = model_name
         self.device = device
+        self.compute_type = compute_type
         self.download_root = download_root
         self.cache = cache
     # This is required for multiprocessing
     def __getstate__(self):
+        return {
+            "model_name": self.model_name,
+            "device": self.device,
+            "download_root": self.download_root,
+            "models": self.models,
+            "compute_type": self.compute_type
+        }
     def __setstate__(self, state):
         self.model_name = state["model_name"]
         self.device = state["device"]
         self.download_root = state["download_root"]
         self.models = state["models"]
+        self.compute_type = state["compute_type"]
         self.model = None
         # Depickled objects must use the global cache
         self.cache = GLOBAL_MODEL_CACHE

src/whisper/fasterWhisperContainer.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from typing import List
 from faster_whisper import WhisperModel, download_model
 from src.config import ModelConfig
@@ -8,10 +8,10 @@ from src.modelCache import ModelCache
 from src.whisper.abstractWhisperContainer import AbstractWhisperCallback, AbstractWhisperContainer
 class FasterWhisperContainer(AbstractWhisperContainer):
-    def __init__(self, model_name: str, device: str = None, download_root: str = None,
-                       cache: ModelCache = None,
-                       models: List[ModelConfig] = []):
-        super().__init__(model_name, device, download_root, cache, models)
     def ensure_downloaded(self):
         """
@@ -35,7 +35,7 @@ class FasterWhisperContainer(AbstractWhisperContainer):
         return None
     def _create_model(self):
-        print("Loading faster whisper model " + self.model_name)
         model_config = self._get_model_config()
         if model_config.type == "whisper" and model_config.url not in ["tiny", "base", "small", "medium", "large", "large-v2"]:
@@ -46,7 +46,7 @@ class FasterWhisperContainer(AbstractWhisperContainer):
         if (device is None):
             device = "auto"
-        model = WhisperModel(model_config.url, device=device, compute_type="float16")
         return model
     def create_callback(self, language: str = None, task: str = None, initial_prompt: str = None, **decodeOptions: dict):
@@ -96,10 +96,33 @@ class FasterWhisperCallback(AbstractWhisperCallback):
         model: WhisperModel = self.model_container.get_model()
         language_code = self._lookup_language_code(self.language) if self.language else None
         segments_generator, info = model.transcribe(audio, \
             language=language_code if language_code else detected_language, task=self.task, \
             initial_prompt=self._concat_prompt(self.initial_prompt, prompt) if segment_index == 0 else prompt, \
-            **self.decodeOptions
         )
         segments = []
@@ -109,6 +132,8 @@ class FasterWhisperCallback(AbstractWhisperCallback):
             if progress_listener is not None:
                 progress_listener.on_progress(segment.end, info.duration)
         text = " ".join([segment.text for segment in segments])
@@ -141,6 +166,14 @@ class FasterWhisperCallback(AbstractWhisperCallback):
             progress_listener.on_finished()
         return result
     def _lookup_language_code(self, language: str):
         lookup = {
             "english": "en", "chinese": "zh-cn", "german": "de", "spanish": "es", "russian": "ru", "korean": "ko",

 import os
+from typing import List, Union
 from faster_whisper import WhisperModel, download_model
 from src.config import ModelConfig
 from src.whisper.abstractWhisperContainer import AbstractWhisperCallback, AbstractWhisperContainer
 class FasterWhisperContainer(AbstractWhisperContainer):
+    def __init__(self, model_name: str, device: str = None, compute_type: str = "float16",
+                       download_root: str = None,
+                       cache: ModelCache = None, models: List[ModelConfig] = []):
+        super().__init__(model_name, device, compute_type, download_root, cache, models)
     def ensure_downloaded(self):
         """
         return None
     def _create_model(self):
+        print("Loading faster whisper model " + self.model_name + " for device " + str(self.device))
         model_config = self._get_model_config()
         if model_config.type == "whisper" and model_config.url not in ["tiny", "base", "small", "medium", "large", "large-v2"]:
         if (device is None):
             device = "auto"
+        model = WhisperModel(model_config.url, device=device, compute_type=self.compute_type)
         return model
     def create_callback(self, language: str = None, task: str = None, initial_prompt: str = None, **decodeOptions: dict):
         model: WhisperModel = self.model_container.get_model()
         language_code = self._lookup_language_code(self.language) if self.language else None
+        # Copy decode options and remove options that are not supported by faster-whisper
+        decodeOptions = self.decodeOptions.copy()
+        verbose = decodeOptions.pop("verbose", None)
+        logprob_threshold = decodeOptions.pop("logprob_threshold", None)
+        patience = decodeOptions.pop("patience", None)
+        length_penalty = decodeOptions.pop("length_penalty", None)
+        suppress_tokens = decodeOptions.pop("suppress_tokens", None)
+        if (decodeOptions.pop("fp16", None) is not None):
+            print("WARNING: fp16 option is ignored by faster-whisper - use compute_type instead.")
+        # Fix up decode options
+        if (logprob_threshold is not None):
+            decodeOptions["log_prob_threshold"] = logprob_threshold
+        decodeOptions["patience"] = float(patience) if patience is not None else 1.0
+        decodeOptions["length_penalty"] = float(length_penalty) if length_penalty is not None else 1.0
+        # See if supress_tokens is a string - if so, convert it to a list of ints
+        decodeOptions["suppress_tokens"] = self._split_suppress_tokens(suppress_tokens)
         segments_generator, info = model.transcribe(audio, \
             language=language_code if language_code else detected_language, task=self.task, \
             initial_prompt=self._concat_prompt(self.initial_prompt, prompt) if segment_index == 0 else prompt, \
+            **decodeOptions
         )
         segments = []
             if progress_listener is not None:
                 progress_listener.on_progress(segment.end, info.duration)
+            if verbose:
+                print(segment.text)
         text = " ".join([segment.text for segment in segments])
             progress_listener.on_finished()
         return result
+    def _split_suppress_tokens(self, suppress_tokens: Union[str, List[int]]):
+        if (suppress_tokens is None):
+            return None
+        if (isinstance(suppress_tokens, list)):
+            return suppress_tokens
+        return [int(token) for token in suppress_tokens.split(",")]
     def _lookup_language_code(self, language: str):
         lookup = {
             "english": "en", "chinese": "zh-cn", "german": "de", "spanish": "es", "russian": "ru", "korean": "ko",

src/whisper/whisperContainer.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 import sys
 from typing import List
 from urllib.parse import urlparse
 import urllib3
 from src.hooks.progressListener import ProgressListener
@@ -18,9 +19,12 @@ from src.utils import download_file
 from src.whisper.abstractWhisperContainer import AbstractWhisperCallback, AbstractWhisperContainer
 class WhisperContainer(AbstractWhisperContainer):
-    def __init__(self, model_name: str, device: str = None, download_root: str = None,
-                       cache: ModelCache = None, models: List[ModelConfig] = []):
-        super().__init__(model_name, device, download_root, cache, models)
     def ensure_downloaded(self):
         """
@@ -184,8 +188,14 @@ class WhisperCallback(AbstractWhisperCallback):
             return self._transcribe(model, audio, segment_index, prompt, detected_language)
     def _transcribe(self, model: Whisper, audio, segment_index: int, prompt: str, detected_language: str):
         return model.transcribe(audio, \
             language=self.language if self.language else detected_language, task=self.task, \
             initial_prompt=self._concat_prompt(self.initial_prompt, prompt) if segment_index == 0 else prompt, \
-            **self.decodeOptions
         )

 import sys
 from typing import List
 from urllib.parse import urlparse
+import torch
 import urllib3
 from src.hooks.progressListener import ProgressListener
 from src.whisper.abstractWhisperContainer import AbstractWhisperCallback, AbstractWhisperContainer
 class WhisperContainer(AbstractWhisperContainer):
+    def __init__(self, model_name: str, device: str = None, compute_type: str = "float16",
+                 download_root: str = None,
+                 cache: ModelCache = None, models: List[ModelConfig] = []):
+        if device is None:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        super().__init__(model_name, device, compute_type, download_root, cache, models)
     def ensure_downloaded(self):
         """
             return self._transcribe(model, audio, segment_index, prompt, detected_language)
     def _transcribe(self, model: Whisper, audio, segment_index: int, prompt: str, detected_language: str):
+        decodeOptions = self.decodeOptions.copy()
+        # Add fp16
+        if self.model_container.compute_type in ["fp16", "float16"]:
+            decodeOptions["fp16"] = True
         return model.transcribe(audio, \
             language=self.language if self.language else detected_language, task=self.task, \
             initial_prompt=self._concat_prompt(self.initial_prompt, prompt) if segment_index == 0 else prompt, \
+            **decodeOptions
         )

src/whisper/whisperFactory.py CHANGED Viewed

@@ -4,15 +4,16 @@ from src.config import ModelConfig
 from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
 def create_whisper_container(whisper_implementation: str,
-                             model_name: str, device: str = None, download_root: str = None,
                              cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
     print("Creating whisper container for " + whisper_implementation)
     if (whisper_implementation == "whisper"):
         from src.whisper.whisperContainer import WhisperContainer
-        return WhisperContainer(model_name, device, download_root, cache, models)
     elif (whisper_implementation == "faster-whisper" or whisper_implementation == "faster_whisper"):
         from src.whisper.fasterWhisperContainer import FasterWhisperContainer
-        return FasterWhisperContainer(model_name, device, download_root, cache, models)
     else:
         raise ValueError("Unknown Whisper implementation: " + whisper_implementation)

 from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
 def create_whisper_container(whisper_implementation: str,
+                             model_name: str, device: str = None, compute_type: str = "float16",
+                             download_root: str = None,
                              cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
     print("Creating whisper container for " + whisper_implementation)
     if (whisper_implementation == "whisper"):
         from src.whisper.whisperContainer import WhisperContainer
+        return WhisperContainer(model_name=model_name, device=device, compute_type=compute_type, download_root=download_root, cache=cache, models=models)
     elif (whisper_implementation == "faster-whisper" or whisper_implementation == "faster_whisper"):
         from src.whisper.fasterWhisperContainer import FasterWhisperContainer
+        return FasterWhisperContainer(model_name=model_name, device=device, compute_type=compute_type, download_root=download_root, cache=cache, models=models)
     else:
         raise ValueError("Unknown Whisper implementation: " + whisper_implementation)