asr-inference

Running on Zero

App Files Files Community

Update whisper_cs.py

#35

by ssolito - opened Aug 4

base: refs/heads/main

←

from: refs/pr/35

Discussion Files changed

-27

Files changed (1) hide show

whisper_cs.py +5 -27

whisper_cs.py CHANGED Viewed

@@ -105,6 +105,7 @@ def cleanup_temp_files(*file_paths):
         if path and os.path.exists(path):
             os.remove(path)
 try:
     faster_model = WhisperModel(
         MODEL_PATH_V2_FAST,
@@ -118,6 +119,9 @@ except RuntimeError as e:
         device="cpu",
         compute_type="int8"
     )
 def load_whisper_model(model_path: str):
     device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -165,33 +169,7 @@ def transcribe_audio(model, audio_path: str) -> Dict:
             'error': str(e),
             'success': False
         }
-diarization_pipeline = DiarizationPipeline.from_pretrained("./pyannote/config.yaml")
-align_model, metadata = whisperx.load_align_model(language_code="en", device=DEVICE)
-asr_pipe = pipeline(
-    task="automatic-speech-recognition",
-    model=MODEL_PATH_1,
-    chunk_length_s=30,
-    device=DEVICE,
-    return_timestamps=True)
-def diarization(audio_path):
-    diarization_result = diarization_pipeline(audio_path)
-    diarized_segments = list(diarization_result.itertracks(yield_label=True))
-    print('diarized_segments',diarized_segments)
-    return diarized_segments
-def asr(audio_path):
-    print(f"[DEBUG] Starting ASR on audio: {audio_path}")
-    asr_result = asr_pipe(audio_path, return_timestamps=True)
-    print(f"[DEBUG] Raw ASR result: {asr_result}")
-    asr_segments = hf_chunks_to_whisperx_segments(asr_result['chunks'])
-    asr_segments = assign_timestamps(asr_segments, audio_path)
-    return asr_segments
 def generate(audio_path, use_v2_fast):

         if path and os.path.exists(path):
             os.remove(path)
+'''
 try:
     faster_model = WhisperModel(
         MODEL_PATH_V2_FAST,
         device="cpu",
         compute_type="int8"
     )
+'''
+faster_model = WhisperModel(MODEL_PATH_V2_FAST, device=DEVICE, compute_type="int8")
 def load_whisper_model(model_path: str):
     device = "cuda" if torch.cuda.is_available() else "cpu"
             'error': str(e),
             'success': False
         }
 def generate(audio_path, use_v2_fast):