Spaces:
Running
on
Zero
Running
on
Zero
Update whisper_cs.py
#35
by
ssolito
- opened
- whisper_cs.py +5 -27
whisper_cs.py
CHANGED
|
@@ -105,6 +105,7 @@ def cleanup_temp_files(*file_paths):
|
|
| 105 |
if path and os.path.exists(path):
|
| 106 |
os.remove(path)
|
| 107 |
|
|
|
|
| 108 |
try:
|
| 109 |
faster_model = WhisperModel(
|
| 110 |
MODEL_PATH_V2_FAST,
|
|
@@ -118,6 +119,9 @@ except RuntimeError as e:
|
|
| 118 |
device="cpu",
|
| 119 |
compute_type="int8"
|
| 120 |
)
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
def load_whisper_model(model_path: str):
|
| 123 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -165,33 +169,7 @@ def transcribe_audio(model, audio_path: str) -> Dict:
|
|
| 165 |
'error': str(e),
|
| 166 |
'success': False
|
| 167 |
}
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
diarization_pipeline = DiarizationPipeline.from_pretrained("./pyannote/config.yaml")
|
| 172 |
-
align_model, metadata = whisperx.load_align_model(language_code="en", device=DEVICE)
|
| 173 |
-
|
| 174 |
-
asr_pipe = pipeline(
|
| 175 |
-
task="automatic-speech-recognition",
|
| 176 |
-
model=MODEL_PATH_1,
|
| 177 |
-
chunk_length_s=30,
|
| 178 |
-
device=DEVICE,
|
| 179 |
-
return_timestamps=True)
|
| 180 |
-
|
| 181 |
-
def diarization(audio_path):
|
| 182 |
-
diarization_result = diarization_pipeline(audio_path)
|
| 183 |
-
diarized_segments = list(diarization_result.itertracks(yield_label=True))
|
| 184 |
-
print('diarized_segments',diarized_segments)
|
| 185 |
-
return diarized_segments
|
| 186 |
-
|
| 187 |
-
def asr(audio_path):
|
| 188 |
-
print(f"[DEBUG] Starting ASR on audio: {audio_path}")
|
| 189 |
-
asr_result = asr_pipe(audio_path, return_timestamps=True)
|
| 190 |
-
print(f"[DEBUG] Raw ASR result: {asr_result}")
|
| 191 |
-
asr_segments = hf_chunks_to_whisperx_segments(asr_result['chunks'])
|
| 192 |
-
asr_segments = assign_timestamps(asr_segments, audio_path)
|
| 193 |
-
return asr_segments
|
| 194 |
-
|
| 195 |
|
| 196 |
def generate(audio_path, use_v2_fast):
|
| 197 |
|
|
|
|
| 105 |
if path and os.path.exists(path):
|
| 106 |
os.remove(path)
|
| 107 |
|
| 108 |
+
'''
|
| 109 |
try:
|
| 110 |
faster_model = WhisperModel(
|
| 111 |
MODEL_PATH_V2_FAST,
|
|
|
|
| 119 |
device="cpu",
|
| 120 |
compute_type="int8"
|
| 121 |
)
|
| 122 |
+
'''
|
| 123 |
+
|
| 124 |
+
faster_model = WhisperModel(MODEL_PATH_V2_FAST, device=DEVICE, compute_type="int8")
|
| 125 |
|
| 126 |
def load_whisper_model(model_path: str):
|
| 127 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 169 |
'error': str(e),
|
| 170 |
'success': False
|
| 171 |
}
|
| 172 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
def generate(audio_path, use_v2_fast):
|
| 175 |
|