Athspi commited on
Commit
34dc965
·
verified ·
1 Parent(s): e710c16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -44
app.py CHANGED
@@ -2,10 +2,9 @@ import gradio as gr
2
  import whisper
3
  import torch
4
  import os
5
- import numpy as np
6
  from pydub import AudioSegment, silence
7
  from faster_whisper import WhisperModel # Import faster-whisper
8
- import noisereduce as nr # Import noisereduce for background noise removal
9
 
10
  # Mapping of model names to Whisper model sizes
11
  MODELS = {
@@ -187,44 +186,29 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
187
 
188
  return output_path
189
 
190
- def remove_background_noise(audio_file, noise_reduce_level=0.5):
191
  """
192
- Remove background noise from the audio file using AI-based noise reduction.
193
 
194
  Args:
195
  audio_file (str): Path to the input audio file.
196
- noise_reduce_level (float): Noise reduction level (0.0 to 1.0). Default is 0.5.
197
 
198
  Returns:
199
- str: Path to the output audio file with background noise removed.
200
  """
201
- # Load the audio file
202
- audio = AudioSegment.from_file(audio_file)
203
 
204
- # Convert audio to numpy array for noisereduce
205
- samples = np.array(audio.get_array_of_samples())
206
- sample_rate = audio.frame_rate
207
 
208
- # Perform noise reduction
209
- reduced_noise = nr.reduce_noise(
210
- y=samples,
211
- sr=sample_rate,
212
- prop_decrease=noise_reduce_level
213
- )
214
-
215
- # Convert back to AudioSegment
216
- reduced_audio = AudioSegment(
217
- reduced_noise.tobytes(),
218
- frame_rate=sample_rate,
219
- sample_width=audio.sample_width,
220
- channels=audio.channels
221
- )
222
 
223
- # Export the processed audio
224
- output_path = "noise_reduced_audio.wav"
225
- reduced_audio.export(output_path, format="wav")
226
-
227
- return output_path
228
 
229
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
230
  """Transcribe the audio file."""
@@ -317,16 +301,11 @@ with gr.Blocks() as demo:
317
  silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
318
  silence_button = gr.Button("Remove Silence")
319
 
320
- with gr.Tab("Remove Background Noise"):
321
- gr.Markdown("Upload an audio file to remove background noise.")
322
- noise_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
323
- noise_reduce_slider = gr.Slider(
324
- minimum=0.0, maximum=1.0, value=0.5, step=0.1,
325
- label="Noise Reduction Level",
326
- info="Higher values remove more noise."
327
- )
328
- noise_output = gr.Audio(label="Processed Audio (Noise Removed)", type="filepath")
329
- noise_button = gr.Button("Remove Background Noise")
330
 
331
  # Link buttons to functions
332
  detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
@@ -340,10 +319,10 @@ with gr.Blocks() as demo:
340
  inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
341
  outputs=silence_output
342
  )
343
- noise_button.click(
344
- remove_background_noise,
345
- inputs=[noise_audio_input, noise_reduce_slider],
346
- outputs=noise_output
347
  )
348
 
349
  # Launch the Gradio interface
 
2
  import whisper
3
  import torch
4
  import os
 
5
  from pydub import AudioSegment, silence
6
  from faster_whisper import WhisperModel # Import faster-whisper
7
+ from spleeter.separator import Separator # Import Spleeter for music separation
8
 
9
  # Mapping of model names to Whisper model sizes
10
  MODELS = {
 
186
 
187
  return output_path
188
 
189
+ def remove_background_music(audio_file):
190
  """
191
+ Remove background music from the audio file using Spleeter.
192
 
193
  Args:
194
  audio_file (str): Path to the input audio file.
 
195
 
196
  Returns:
197
+ str: Path to the output audio file with background music removed.
198
  """
199
+ # Initialize Spleeter separator (2 stems: vocals and accompaniment)
200
+ separator = Separator('spleeter:2stems')
201
 
202
+ # Separate the audio into vocals and accompaniment
203
+ output_folder = "output"
204
+ separator.separate_to_file(audio_file, output_folder)
205
 
206
+ # Load the separated vocals
207
+ base_name = os.path.splitext(os.path.basename(audio_file))[0]
208
+ vocals_path = os.path.join(output_folder, base_name, "vocals.wav")
 
 
 
 
 
 
 
 
 
 
 
209
 
210
+ # Return the path to the vocals file
211
+ return vocals_path
 
 
 
212
 
213
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
214
  """Transcribe the audio file."""
 
301
  silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
302
  silence_button = gr.Button("Remove Silence")
303
 
304
+ with gr.Tab("Remove Background Music"):
305
+ gr.Markdown("Upload an audio file to remove background music.")
306
+ bg_music_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
307
+ bg_music_output = gr.Audio(label="Processed Audio (Background Music Removed)", type="filepath")
308
+ bg_music_button = gr.Button("Remove Background Music")
 
 
 
 
 
309
 
310
  # Link buttons to functions
311
  detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
 
319
  inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
320
  outputs=silence_output
321
  )
322
+ bg_music_button.click(
323
+ remove_background_music,
324
+ inputs=bg_music_audio_input,
325
+ outputs=bg_music_output
326
  )
327
 
328
  # Launch the Gradio interface