Spaces:
Running
on
Zero
Running
on
Zero
Update whisper_cs_dev.py
Browse filesDeleted the fake model hack
- whisper_cs_dev.py +5 -20
whisper_cs_dev.py
CHANGED
|
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
| 11 |
import glob
|
| 12 |
import ctypes
|
| 13 |
|
| 14 |
-
from settings import DEBUG_MODE, MODEL_PATH_V2_FAST, MODEL_PATH_V2, LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH,
|
| 15 |
|
| 16 |
def load_cudnn():
|
| 17 |
|
|
@@ -93,15 +93,9 @@ def load_model(use_v2_fast, device, compute_type):
|
|
| 93 |
device = device,
|
| 94 |
)
|
| 95 |
|
| 96 |
-
# HACK we need to do this for strange reasons.
|
| 97 |
-
# If we don't do this, we get:
|
| 98 |
-
#Could not load library libcudnn_ops_infer.so.8. Error: libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory
|
| 99 |
-
#fake_model = whisper_ts.load_model(MODEL_PATH_V2, device=device)
|
| 100 |
-
fake_model = None
|
| 101 |
-
|
| 102 |
if DEBUG_MODE: print(f"Exiting load_model function...")
|
| 103 |
|
| 104 |
-
return model
|
| 105 |
|
| 106 |
|
| 107 |
def split_input_stereo_channels(audio_path):
|
|
@@ -202,19 +196,10 @@ def transcribe_audio_no_fast_model(model, audio_path):
|
|
| 202 |
if DEBUG_MODE: print(f"Exited transcribe_audio_no_fast_model function.")
|
| 203 |
|
| 204 |
|
| 205 |
-
def transcribe_channels(left_waveform, right_waveform, model, use_v2_fast
|
| 206 |
|
| 207 |
if DEBUG_MODE: print(f"Entering transcribe_channels function...")
|
| 208 |
|
| 209 |
-
# HACK we need to do this for strange reasons.
|
| 210 |
-
# If we don't do this, we get:
|
| 211 |
-
#Could not load library libcudnn_ops_infer.so.8. Error: libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory
|
| 212 |
-
#fake_result = whisper_ts.transcribe(
|
| 213 |
-
# fake_model,
|
| 214 |
-
# FAKE_AUDIO_PATH,
|
| 215 |
-
# beam_size=1,
|
| 216 |
-
#)
|
| 217 |
-
|
| 218 |
if DEBUG_MODE: print(f"Preparing to transcribe...")
|
| 219 |
|
| 220 |
if use_v2_fast:
|
|
@@ -354,10 +339,10 @@ def generate(audio_path, use_v2_fast):
|
|
| 354 |
|
| 355 |
load_cudnn()
|
| 356 |
device, compute_type = get_settings()
|
| 357 |
-
model
|
| 358 |
split_input_stereo_channels(audio_path)
|
| 359 |
left_waveform, right_waveform = process_waveforms()
|
| 360 |
-
left_result, right_result = transcribe_channels(left_waveform, right_waveform, model, use_v2_fast
|
| 361 |
output = post_process_transcripts(left_result, right_result, use_v2_fast)
|
| 362 |
cleanup_temp_files(LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH)
|
| 363 |
|
|
|
|
| 11 |
import glob
|
| 12 |
import ctypes
|
| 13 |
|
| 14 |
+
from settings import DEBUG_MODE, MODEL_PATH_V2_FAST, MODEL_PATH_V2, LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH, RESAMPLING_FREQ
|
| 15 |
|
| 16 |
def load_cudnn():
|
| 17 |
|
|
|
|
| 93 |
device = device,
|
| 94 |
)
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
if DEBUG_MODE: print(f"Exiting load_model function...")
|
| 97 |
|
| 98 |
+
return model
|
| 99 |
|
| 100 |
|
| 101 |
def split_input_stereo_channels(audio_path):
|
|
|
|
| 196 |
if DEBUG_MODE: print(f"Exited transcribe_audio_no_fast_model function.")
|
| 197 |
|
| 198 |
|
| 199 |
+
def transcribe_channels(left_waveform, right_waveform, model, use_v2_fast):
|
| 200 |
|
| 201 |
if DEBUG_MODE: print(f"Entering transcribe_channels function...")
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
if DEBUG_MODE: print(f"Preparing to transcribe...")
|
| 204 |
|
| 205 |
if use_v2_fast:
|
|
|
|
| 339 |
|
| 340 |
load_cudnn()
|
| 341 |
device, compute_type = get_settings()
|
| 342 |
+
model = load_model(use_v2_fast, device, compute_type)
|
| 343 |
split_input_stereo_channels(audio_path)
|
| 344 |
left_waveform, right_waveform = process_waveforms()
|
| 345 |
+
left_result, right_result = transcribe_channels(left_waveform, right_waveform, model, use_v2_fast)
|
| 346 |
output = post_process_transcripts(left_result, right_result, use_v2_fast)
|
| 347 |
cleanup_temp_files(LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH)
|
| 348 |
|