import gradio as gr from pydub import AudioSegment import numpy as np import tempfile import os import noisereduce as nr from scipy.io import wavfile import subprocess import torch from demucs import pretrained from demucs.apply import apply_model from demucs.audio import load_audio, save_audio # Helper functions def audiosegment_to_array(audio): return np.array(audio.get_array_of_samples()), audio.frame_rate def array_to_audiosegment(samples, frame_rate, channels=1): return AudioSegment( samples.tobytes(), frame_rate=frame_rate, sample_width=samples.dtype.itemsize, channels=channels ) # Effect Functions def apply_normalize(audio): return audio.normalize() def apply_noise_reduction(audio): samples, frame_rate = audiosegment_to_array(audio) reduced = nr.reduce_noise(y=samples, sr=frame_rate) return array_to_audiosegment(reduced, frame_rate, channels=audio.channels) def apply_compression(audio): return audio.compress_dynamic_range() def apply_reverb(audio): reverb = audio - 10 return audio.overlay(reverb, position=1000) def apply_pitch_shift(audio, semitones=-2): new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12))) samples = np.array(audio.get_array_of_samples()) resampled = np.interp( np.arange(0, len(samples), 2 ** (semitones / 12)), np.arange(len(samples)), samples ).astype(np.int16) return AudioSegment( resampled.tobytes(), frame_rate=new_frame_rate, sample_width=audio.sample_width, channels=audio.channels ) def apply_echo(audio, delay_ms=500, decay=0.5): echo = audio - 10 return audio.overlay(echo, position=delay_ms) def apply_stereo_widen(audio, pan_amount=0.3): left = audio.pan(-pan_amount) right = audio.pan(pan_amount) return AudioSegment.from_mono_audiosegments(left, right) def apply_bass_boost(audio, gain=10): return audio.low_pass_filter(100).apply_gain(gain) def apply_treble_boost(audio, gain=10): return audio.high_pass_filter(4000).apply_gain(gain) # Vocal Isolation using Demucs def apply_vocal_isolation(audio_path): model = pretrained.get_model(name='htdemucs') wav = load_audio(audio_path) ref = wav.mean(0) wav -= ref[:, None] sources = apply_model(model, wav[None])[0] wav += ref[:, None] vocal_track = sources[3] # index 3 = vocals out_path = os.path.join(tempfile.gettempdir(), "vocals.wav") save_audio(vocal_track, out_path, samplerate=model.samplerate) return out_path # Apply selected effects in order def process_audio(audio_file, effects, isolate_vocals): audio = AudioSegment.from_file(audio_file) original = audio effect_map = { "Noise Reduction": apply_noise_reduction, "Compress Dynamic Range": apply_compression, "Add Reverb": apply_reverb, "Pitch Shift": lambda x: apply_pitch_shift(x), "Echo": apply_echo, "Stereo Widening": apply_stereo_widen, "Bass Boost": apply_bass_boost, "Treble Boost": apply_treble_boost, "Normalize": apply_normalize, } for effect_name in effects: if effect_name in effect_map: audio = effect_map[effect_name](audio) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: if isolate_vocals: temp_input = os.path.join(tempfile.gettempdir(), "input.wav") audio.export(temp_input, format="wav") vocal_path = apply_vocal_isolation(temp_input) final_audio = AudioSegment.from_wav(vocal_path) else: final_audio = audio final_audio.export(f.name, format="wav") return f.name # Gradio Interface effect_choices = [ "Noise Reduction", "Compress Dynamic Range", "Add Reverb", "Pitch Shift", "Echo", "Stereo Widening", "Bass Boost", "Treble Boost", "Normalize" ] interface = gr.Interface( fn=process_audio, inputs=[ gr.Audio(label="Upload Audio", type="filepath"), gr.CheckboxGroup(choices=effect_choices, label="Apply Effects in Order"), gr.Checkbox(label="Isolate Vocals After Effects") ], outputs=gr.Audio(label="Processed Audio", type="filepath"), title="Fix My Recording - Studio Pro", description="Apply multiple effects in sequence and optionally isolate vocals!", allow_flagging="never" ) interface.launch()