Athspi commited on
Commit
13b616e
·
verified ·
1 Parent(s): 3a2dc34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -2
app.py CHANGED
@@ -187,6 +187,21 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
187
 
188
  return output_path
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  def detect_voice_activity(audio_file, threshold=0.02):
191
  """
192
  Detect voice activity in the audio file and trim the audio to include only voice segments.
@@ -198,8 +213,11 @@ def detect_voice_activity(audio_file, threshold=0.02):
198
  Returns:
199
  str: Path to the output audio file with only voice segments.
200
  """
201
- # Load the audio file
202
- sample_rate, data = wavfile.read(audio_file)
 
 
 
203
 
204
  # Normalize the audio data
205
  if data.dtype != np.float32:
@@ -230,6 +248,9 @@ def detect_voice_activity(audio_file, threshold=0.02):
230
  output_path = "voice_trimmed_audio.wav"
231
  wavfile.write(output_path, sample_rate, trimmed_audio)
232
 
 
 
 
233
  return output_path
234
 
235
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
 
187
 
188
  return output_path
189
 
190
+ def convert_to_wav(audio_file):
191
+ """
192
+ Convert the input audio file to WAV format.
193
+
194
+ Args:
195
+ audio_file (str): Path to the input audio file.
196
+
197
+ Returns:
198
+ str: Path to the converted WAV file.
199
+ """
200
+ audio = AudioSegment.from_file(audio_file)
201
+ wav_path = "converted_audio.wav"
202
+ audio.export(wav_path, format="wav")
203
+ return wav_path
204
+
205
  def detect_voice_activity(audio_file, threshold=0.02):
206
  """
207
  Detect voice activity in the audio file and trim the audio to include only voice segments.
 
213
  Returns:
214
  str: Path to the output audio file with only voice segments.
215
  """
216
+ # Convert the input audio to WAV format
217
+ wav_path = convert_to_wav(audio_file)
218
+
219
+ # Load the WAV file
220
+ sample_rate, data = wavfile.read(wav_path)
221
 
222
  # Normalize the audio data
223
  if data.dtype != np.float32:
 
248
  output_path = "voice_trimmed_audio.wav"
249
  wavfile.write(output_path, sample_rate, trimmed_audio)
250
 
251
+ # Clean up the converted WAV file
252
+ os.remove(wav_path)
253
+
254
  return output_path
255
 
256
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):