import gradio as gr from pydub import AudioSegment import numpy as np import tempfile import os import noisereduce as nr import json import torch from demucs import pretrained from demucs.apply import apply_model import torchaudio from pathlib import Path # === Helper Functions === def audiosegment_to_array(audio): return np.array(audio.get_array_of_samples()), audio.frame_rate def array_to_audiosegment(samples, frame_rate, channels=1): return AudioSegment( samples.tobytes(), frame_rate=frame_rate, sample_width=samples.dtype.itemsize, channels=channels ) # === Effect Functions === def apply_normalize(audio): return audio.normalize() def apply_noise_reduction(audio): samples, frame_rate = audiosegment_to_array(audio) reduced = nr.reduce_noise(y=samples, sr=frame_rate) return array_to_audiosegment(reduced, frame_rate, channels=audio.channels) def apply_compression(audio): return audio.compress_dynamic_range() def apply_reverb(audio): reverb = audio - 10 return audio.overlay(reverb, position=1000) def apply_pitch_shift(audio, semitones=-2): new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12))) samples = np.array(audio.get_array_of_samples()) resampled = np.interp( np.arange(0, len(samples), 2 ** (semitones / 12)), np.arange(len(samples)), samples ).astype(np.int16) return AudioSegment( resampled.tobytes(), frame_rate=new_frame_rate, sample_width=audio.sample_width, channels=audio.channels ) def apply_echo(audio, delay_ms=500, decay=0.5): echo = audio - 10 return audio.overlay(echo, position=delay_ms) def apply_stereo_widen(audio, pan_amount=0.3): left = audio.pan(-pan_amount) right = audio.pan(pan_amount) return AudioSegment.from_mono_audiosegments(left, right) def apply_bass_boost(audio, gain=10): return audio.low_pass_filter(100).apply_gain(gain) def apply_treble_boost(audio, gain=10): return audio.high_pass_filter(4000).apply_gain(gain) # === Vocal Isolation Helpers === def load_track_local(path, sample_rate, channels=2): sig, rate = torchaudio.load(path) if rate != sample_rate: sig = torchaudio.functional.resample(sig, rate, sample_rate) if channels == 1: sig = sig.mean(0) return sig def save_track(path, wav, sample_rate): path = Path(path) torchaudio.save(str(path), wav, sample_rate) def apply_vocal_isolation(audio_path): model = pretrained.get_model(name='htdemucs') wav = load_track_local(audio_path, model.samplerate, channels=2) ref = wav.mean(0) wav -= ref[:, None] sources = apply_model(model, wav[None])[0] wav += ref[:, None] vocal_track = sources[3].cpu() # index 3 = vocals out_path = os.path.join(tempfile.gettempdir(), "vocals.wav") save_track(out_path, vocal_track, model.samplerate) return out_path # === Preset Loader === def load_presets(): preset_files = [f for f in os.listdir("presets") if f.endswith(".json")] presets = {} for f in preset_files: with open(os.path.join("presets", f)) as infile: data = json.load(infile) presets[data["name"]] = data["effects"] return presets preset_choices = load_presets() # === Main Processing Function === def process_audio(audio_file, selected_effects, isolate_vocals, preset_name): audio = AudioSegment.from_file(audio_file) effect_map = { "Noise Reduction": apply_noise_reduction, "Compress Dynamic Range": apply_compression, "Add Reverb": apply_reverb, "Pitch Shift": lambda x: apply_pitch_shift(x), "Echo": apply_echo, "Stereo Widening": apply_stereo_widen, "Bass Boost": apply_bass_boost, "Treble Boost": apply_treble_boost, "Normalize": apply_normalize, } # Apply selected preset or custom effects effects_to_apply = preset_choices.get(preset_name, selected_effects) for effect_name in effects_to_apply: if effect_name in effect_map: audio = effect_map[effect_name](audio) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: if isolate_vocals: temp_input = os.path.join(tempfile.gettempdir(), "input.wav") audio.export(temp_input, format="wav") vocal_path = apply_vocal_isolation(temp_input) final_audio = AudioSegment.from_wav(vocal_path) else: final_audio = audio output_path = f.name final_audio.export(output_path, format="mp3") return output_path # === Gradio Interface === effect_options = [ "Noise Reduction", "Compress Dynamic Range", "Add Reverb", "Pitch Shift", "Echo", "Stereo Widening", "Bass Boost", "Treble Boost", "Normalize" ] preset_names = list(preset_choices.keys()) interface = gr.Interface( fn=process_audio, inputs=[ gr.Audio(label="Upload Audio", type="filepath"), gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"), gr.Checkbox(label="Isolate Vocals After Effects"), gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None) ], outputs=gr.Audio(label="Processed Audio (MP3)", type="filepath"), title="AI Audio Studio - Pro Edition", description="Apply multiple effects, isolate vocals, and export polished tracks -- all powered by AI!", allow_flagging="never" ) interface.launch()