Update app.py
Browse files
app.py
CHANGED
@@ -2,10 +2,9 @@ import gradio as gr
|
|
2 |
import whisper
|
3 |
import torch
|
4 |
import os
|
5 |
-
import numpy as np
|
6 |
from pydub import AudioSegment, silence
|
7 |
from faster_whisper import WhisperModel # Import faster-whisper
|
8 |
-
|
9 |
|
10 |
# Mapping of model names to Whisper model sizes
|
11 |
MODELS = {
|
@@ -187,44 +186,29 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
|
|
187 |
|
188 |
return output_path
|
189 |
|
190 |
-
def
|
191 |
"""
|
192 |
-
Remove background
|
193 |
|
194 |
Args:
|
195 |
audio_file (str): Path to the input audio file.
|
196 |
-
noise_reduce_level (float): Noise reduction level (0.0 to 1.0). Default is 0.5.
|
197 |
|
198 |
Returns:
|
199 |
-
str: Path to the output audio file with background
|
200 |
"""
|
201 |
-
#
|
202 |
-
|
203 |
|
204 |
-
#
|
205 |
-
|
206 |
-
|
207 |
|
208 |
-
#
|
209 |
-
|
210 |
-
|
211 |
-
sr=sample_rate,
|
212 |
-
prop_decrease=noise_reduce_level
|
213 |
-
)
|
214 |
-
|
215 |
-
# Convert back to AudioSegment
|
216 |
-
reduced_audio = AudioSegment(
|
217 |
-
reduced_noise.tobytes(),
|
218 |
-
frame_rate=sample_rate,
|
219 |
-
sample_width=audio.sample_width,
|
220 |
-
channels=audio.channels
|
221 |
-
)
|
222 |
|
223 |
-
#
|
224 |
-
|
225 |
-
reduced_audio.export(output_path, format="wav")
|
226 |
-
|
227 |
-
return output_path
|
228 |
|
229 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
230 |
"""Transcribe the audio file."""
|
@@ -317,16 +301,11 @@ with gr.Blocks() as demo:
|
|
317 |
silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
|
318 |
silence_button = gr.Button("Remove Silence")
|
319 |
|
320 |
-
with gr.Tab("Remove Background
|
321 |
-
gr.Markdown("Upload an audio file to remove background
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
label="Noise Reduction Level",
|
326 |
-
info="Higher values remove more noise."
|
327 |
-
)
|
328 |
-
noise_output = gr.Audio(label="Processed Audio (Noise Removed)", type="filepath")
|
329 |
-
noise_button = gr.Button("Remove Background Noise")
|
330 |
|
331 |
# Link buttons to functions
|
332 |
detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
|
@@ -340,10 +319,10 @@ with gr.Blocks() as demo:
|
|
340 |
inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
|
341 |
outputs=silence_output
|
342 |
)
|
343 |
-
|
344 |
-
|
345 |
-
inputs=
|
346 |
-
outputs=
|
347 |
)
|
348 |
|
349 |
# Launch the Gradio interface
|
|
|
2 |
import whisper
|
3 |
import torch
|
4 |
import os
|
|
|
5 |
from pydub import AudioSegment, silence
|
6 |
from faster_whisper import WhisperModel # Import faster-whisper
|
7 |
+
from spleeter.separator import Separator # Import Spleeter for music separation
|
8 |
|
9 |
# Mapping of model names to Whisper model sizes
|
10 |
MODELS = {
|
|
|
186 |
|
187 |
return output_path
|
188 |
|
189 |
+
def remove_background_music(audio_file):
|
190 |
"""
|
191 |
+
Remove background music from the audio file using Spleeter.
|
192 |
|
193 |
Args:
|
194 |
audio_file (str): Path to the input audio file.
|
|
|
195 |
|
196 |
Returns:
|
197 |
+
str: Path to the output audio file with background music removed.
|
198 |
"""
|
199 |
+
# Initialize Spleeter separator (2 stems: vocals and accompaniment)
|
200 |
+
separator = Separator('spleeter:2stems')
|
201 |
|
202 |
+
# Separate the audio into vocals and accompaniment
|
203 |
+
output_folder = "output"
|
204 |
+
separator.separate_to_file(audio_file, output_folder)
|
205 |
|
206 |
+
# Load the separated vocals
|
207 |
+
base_name = os.path.splitext(os.path.basename(audio_file))[0]
|
208 |
+
vocals_path = os.path.join(output_folder, base_name, "vocals.wav")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
+
# Return the path to the vocals file
|
211 |
+
return vocals_path
|
|
|
|
|
|
|
212 |
|
213 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
214 |
"""Transcribe the audio file."""
|
|
|
301 |
silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
|
302 |
silence_button = gr.Button("Remove Silence")
|
303 |
|
304 |
+
with gr.Tab("Remove Background Music"):
|
305 |
+
gr.Markdown("Upload an audio file to remove background music.")
|
306 |
+
bg_music_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
|
307 |
+
bg_music_output = gr.Audio(label="Processed Audio (Background Music Removed)", type="filepath")
|
308 |
+
bg_music_button = gr.Button("Remove Background Music")
|
|
|
|
|
|
|
|
|
|
|
309 |
|
310 |
# Link buttons to functions
|
311 |
detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
|
|
|
319 |
inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
|
320 |
outputs=silence_output
|
321 |
)
|
322 |
+
bg_music_button.click(
|
323 |
+
remove_background_music,
|
324 |
+
inputs=bg_music_audio_input,
|
325 |
+
outputs=bg_music_output
|
326 |
)
|
327 |
|
328 |
# Launch the Gradio interface
|