Spaces:
Running
on
Zero
Running
on
Zero
Update whisper.py
Browse files- whisper.py +10 -10
whisper.py
CHANGED
|
@@ -204,8 +204,8 @@ def processing_vad_threshold(audio, output_vad, threshold, max_duration, concate
|
|
| 204 |
def format_audio(audio_path):
|
| 205 |
input_audio, sample_rate = torchaudio.load(audio_path)
|
| 206 |
|
| 207 |
-
if input_audio.shape[0] == 2: #stereo2mono
|
| 208 |
-
|
| 209 |
|
| 210 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
| 211 |
input_audio = resampler(input_audio)
|
|
@@ -220,12 +220,12 @@ def transcribe_pipeline(audio, task):
|
|
| 220 |
def generate(audio_path, use_v5):
|
| 221 |
audio = AudioSegment.from_wav(audio_path)
|
| 222 |
|
| 223 |
-
temp_mono_path = None
|
| 224 |
-
if audio.channels != 1: #stereo2mono
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
|
| 230 |
output_vad = pipeline_vad(audio_path)
|
| 231 |
concatenated_segment = AudioSegment.empty()
|
|
@@ -239,7 +239,7 @@ def generate(audio_path, use_v5):
|
|
| 239 |
|
| 240 |
clean_output = post_process_transcription(output)
|
| 241 |
|
| 242 |
-
if temp_mono_path and os.path.exists(temp_mono_path):
|
| 243 |
-
|
| 244 |
|
| 245 |
return clean_output
|
|
|
|
| 204 |
def format_audio(audio_path):
|
| 205 |
input_audio, sample_rate = torchaudio.load(audio_path)
|
| 206 |
|
| 207 |
+
#if input_audio.shape[0] == 2: #stereo2mono
|
| 208 |
+
# input_audio = torch.mean(input_audio, dim=0, keepdim=True)
|
| 209 |
|
| 210 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
| 211 |
input_audio = resampler(input_audio)
|
|
|
|
| 220 |
def generate(audio_path, use_v5):
|
| 221 |
audio = AudioSegment.from_wav(audio_path)
|
| 222 |
|
| 223 |
+
#temp_mono_path = None
|
| 224 |
+
#if audio.channels != 1: #stereo2mono
|
| 225 |
+
# audio = audio.set_channels(1)
|
| 226 |
+
# temp_mono_path = "temp_mono.wav"
|
| 227 |
+
# audio.export(temp_mono_path, format="wav")
|
| 228 |
+
# audio_path = temp_mono_path
|
| 229 |
|
| 230 |
output_vad = pipeline_vad(audio_path)
|
| 231 |
concatenated_segment = AudioSegment.empty()
|
|
|
|
| 239 |
|
| 240 |
clean_output = post_process_transcription(output)
|
| 241 |
|
| 242 |
+
#if temp_mono_path and os.path.exists(temp_mono_path):
|
| 243 |
+
# os.remove(temp_mono_path)
|
| 244 |
|
| 245 |
return clean_output
|