Spaces:

Athspi
/

Ai-audio

Running

App Files Files Community

Athspi commited on Jan 12

Commit

573f5cd

verified ·

1 Parent(s): dc75979

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -74

app.py CHANGED Viewed

@@ -2,11 +2,10 @@ import gradio as gr
 import whisper
 import torch
 import os
-import numpy as np
 from pydub import AudioSegment, silence
 from faster_whisper import WhisperModel  # Import faster-whisper
-import noisereduce as nr  # Import noisereduce for background noise removal
-from spleeter.separator import Separator  # Import Spleeter for music separation
 # Mapping of model names to Whisper model sizes
 MODELS = {
@@ -188,63 +187,48 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
     return output_path
-def remove_background_noise(audio_file, noise_reduce_level=0.5):
     """
-    Remove background noise from the audio file using AI-based noise reduction.
     Args:
         audio_file (str): Path to the input audio file.
-        noise_reduce_level (float): Noise reduction level (0.0 to 1.0). Default is 0.5.
     Returns:
-        str: Path to the output audio file with background noise removed.
     """
     # Load the audio file
-    audio = AudioSegment.from_file(audio_file)
-    # Convert audio to numpy array for noisereduce
-    samples = np.array(audio.get_array_of_samples())
-    sample_rate = audio.frame_rate
-    # Perform noise reduction
-    reduced_noise = nr.reduce_noise(
-        y=samples,
-        sr=sample_rate,
-        prop_decrease=noise_reduce_level
-    )
-    # Convert back to AudioSegment
-    reduced_audio = AudioSegment(
-        reduced_noise.tobytes(),
-        frame_rate=sample_rate,
-        sample_width=audio.sample_width,
-        channels=audio.channels
-    )
-    # Export the processed audio
-    output_path = "noise_reduced_audio.wav"
-    reduced_audio.export(output_path, format="wav")
-    return output_path
-def remove_background_music(audio_file):
-    """
-    Remove background music from the audio file using Spleeter.
-    Args:
-        audio_file (str): Path to the input audio file.
-    Returns:
-        str: Path to the output audio file with background music removed.
-    """
-    # Initialize Spleeter separator (2 stems: vocals and accompaniment)
-    separator = Separator('spleeter:2stems')
-    # Separate vocals from background music
-    separator.separate_to_file(audio_file, "output")
-    # Load the separated vocals
-    output_path = os.path.join("output", os.path.basename(audio_file).replace(".wav", ""), "vocals.wav")
     return output_path
@@ -339,22 +323,18 @@ with gr.Blocks() as demo:
         silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
         silence_button = gr.Button("Remove Silence")
-    with gr.Tab("Remove Background Noise"):
-        gr.Markdown("Upload an audio file to remove background noise.")
-        noise_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
-        noise_reduce_slider = gr.Slider(
-            minimum=0.0, maximum=1.0, value=0.5, step=0.1,
-            label="Noise Reduction Level",
-            info="Higher values remove more noise."
         )
-        noise_output = gr.Audio(label="Processed Audio (Noise Removed)", type="filepath")
-        noise_button = gr.Button("Remove Background Noise")
-    with gr.Tab("Remove Background Music"):
-        gr.Markdown("Upload an audio file to remove background music.")
-        music_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
-        music_output = gr.Audio(label="Processed Audio (Music Removed)", type="filepath")
-        music_button = gr.Button("Remove Background Music")
     # Link buttons to functions
     detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
@@ -368,15 +348,10 @@ with gr.Blocks() as demo:
         inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
         outputs=silence_output
     )
-    noise_button.click(
-        remove_background_noise,
-        inputs=[noise_audio_input, noise_reduce_slider],
-        outputs=noise_output
-    )
-    music_button.click(
-        remove_background_music,
-        inputs=music_audio_input,
-        outputs=music_output
     )
 # Launch the Gradio interface

 import whisper
 import torch
 import os
 from pydub import AudioSegment, silence
 from faster_whisper import WhisperModel  # Import faster-whisper
+import numpy as np
+from scipy.io import wavfile
 # Mapping of model names to Whisper model sizes
 MODELS = {
     return output_path
+def detect_voice_activity(audio_file, threshold=0.02):
     """
+    Detect voice activity in the audio file and trim the audio to include only voice segments.
     Args:
         audio_file (str): Path to the input audio file.
+        threshold (float): Amplitude threshold for voice detection. Default is 0.02.
     Returns:
+        str: Path to the output audio file with only voice segments.
     """
     # Load the audio file
+    sample_rate, data = wavfile.read(audio_file)
+    # Normalize the audio data
+    if data.dtype != np.float32:
+        data = data.astype(np.float32) / np.iinfo(data.dtype).max
+    # Detect voice activity
+    voice_segments = []
+    is_voice = False
+    start = 0
+    for i, sample in enumerate(data):
+        if abs(sample) > threshold and not is_voice:
+            is_voice = True
+            start = i
+        elif abs(sample) <= threshold and is_voice:
+            is_voice = False
+            voice_segments.append((start, i))
+    # If the last segment is voice, add it
+    if is_voice:
+        voice_segments.append((start, len(data)))
+    # Trim the audio to include only voice segments
+    trimmed_audio = np.array([], dtype=np.float32)
+    for segment in voice_segments:
+        trimmed_audio = np.concatenate((trimmed_audio, data[segment[0]:segment[1]]))
+    # Export the trimmed audio
+    output_path = "voice_trimmed_audio.wav"
+    wavfile.write(output_path, sample_rate, trimmed_audio)
     return output_path
         silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
         silence_button = gr.Button("Remove Silence")
+    with gr.Tab("Voice Detection and Trimming"):
+        gr.Markdown("Upload two audio files to detect voice activity and trim the audio.")
+        voice_audio_input1 = gr.Audio(type="filepath", label="Upload Audio File 1")
+        voice_audio_input2 = gr.Audio(type="filepath", label="Upload Audio File 2")
+        voice_threshold_slider = gr.Slider(
+            minimum=0.01, maximum=0.1, value=0.02, step=0.01,
+            label="Voice Detection Threshold",
+            info="Higher values detect louder sounds as voice."
         )
+        voice_output1 = gr.Audio(label="Trimmed Audio 1", type="filepath")
+        voice_output2 = gr.Audio(label="Trimmed Audio 2", type="filepath")
+        voice_button = gr.Button("Detect and Trim Voice")
     # Link buttons to functions
     detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
         inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
         outputs=silence_output
     )
+    voice_button.click(
+        lambda audio1, audio2, threshold: (detect_voice_activity(audio1, threshold), detect_voice_activity(audio2, threshold)),
+        inputs=[voice_audio_input1, voice_audio_input2, voice_threshold_slider],
+        outputs=[voice_output1, voice_output2]
     )
 # Launch the Gradio interface