Spaces:

Athspi
/

Ai-audio

Running

Athspi commited on Jan 12

Commit

13b616e

verified ·

1 Parent(s): 3a2dc34

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -187,6 +187,21 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
     return output_path
 def detect_voice_activity(audio_file, threshold=0.02):
     """
     Detect voice activity in the audio file and trim the audio to include only voice segments.
@@ -198,8 +213,11 @@ def detect_voice_activity(audio_file, threshold=0.02):
     Returns:
         str: Path to the output audio file with only voice segments.
     """
-    # Load the audio file
-    sample_rate, data = wavfile.read(audio_file)
     # Normalize the audio data
     if data.dtype != np.float32:
@@ -230,6 +248,9 @@ def detect_voice_activity(audio_file, threshold=0.02):
     output_path = "voice_trimmed_audio.wav"
     wavfile.write(output_path, sample_rate, trimmed_audio)
     return output_path
 def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):

     return output_path
+def convert_to_wav(audio_file):
+    """
+    Convert the input audio file to WAV format.
+    Args:
+        audio_file (str): Path to the input audio file.
+    Returns:
+        str: Path to the converted WAV file.
+    """
+    audio = AudioSegment.from_file(audio_file)
+    wav_path = "converted_audio.wav"
+    audio.export(wav_path, format="wav")
+    return wav_path
 def detect_voice_activity(audio_file, threshold=0.02):
     """
     Detect voice activity in the audio file and trim the audio to include only voice segments.
     Returns:
         str: Path to the output audio file with only voice segments.
     """
+    # Convert the input audio to WAV format
+    wav_path = convert_to_wav(audio_file)
+    # Load the WAV file
+    sample_rate, data = wavfile.read(wav_path)
     # Normalize the audio data
     if data.dtype != np.float32:
     output_path = "voice_trimmed_audio.wav"
     wavfile.write(output_path, sample_rate, trimmed_audio)
+    # Clean up the converted WAV file
+    os.remove(wav_path)
     return output_path
 def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):