Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from pydub import AudioSegment | |
| import numpy as np | |
| import tempfile | |
| import os | |
| import noisereduce as nr | |
| import json | |
| import torch | |
| from demucs import pretrained | |
| from demucs.apply import apply_model | |
| import torchaudio | |
| from pathlib import Path | |
| # === Helper Functions === | |
| def audiosegment_to_array(audio): | |
| return np.array(audio.get_array_of_samples()), audio.frame_rate | |
| def array_to_audiosegment(samples, frame_rate, channels=1): | |
| return AudioSegment( | |
| samples.tobytes(), | |
| frame_rate=frame_rate, | |
| sample_width=samples.dtype.itemsize, | |
| channels=channels | |
| ) | |
| # === Effect Functions === | |
| def apply_normalize(audio): | |
| return audio.normalize() | |
| def apply_noise_reduction(audio): | |
| samples, frame_rate = audiosegment_to_array(audio) | |
| reduced = nr.reduce_noise(y=samples, sr=frame_rate) | |
| return array_to_audiosegment(reduced, frame_rate, channels=audio.channels) | |
| def apply_compression(audio): | |
| return audio.compress_dynamic_range() | |
| def apply_reverb(audio): | |
| reverb = audio - 10 | |
| return audio.overlay(reverb, position=1000) | |
| def apply_pitch_shift(audio, semitones=-2): | |
| new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12))) | |
| samples = np.array(audio.get_array_of_samples()) | |
| resampled = np.interp( | |
| np.arange(0, len(samples), 2 ** (semitones / 12)), | |
| np.arange(len(samples)), | |
| samples | |
| ).astype(np.int16) | |
| return AudioSegment( | |
| resampled.tobytes(), | |
| frame_rate=new_frame_rate, | |
| sample_width=audio.sample_width, | |
| channels=audio.channels | |
| ) | |
| def apply_echo(audio, delay_ms=500, decay=0.5): | |
| echo = audio - 10 | |
| return audio.overlay(echo, position=delay_ms) | |
| def apply_stereo_widen(audio, pan_amount=0.3): | |
| left = audio.pan(-pan_amount) | |
| right = audio.pan(pan_amount) | |
| return AudioSegment.from_mono_audiosegments(left, right) | |
| def apply_bass_boost(audio, gain=10): | |
| return audio.low_pass_filter(100).apply_gain(gain) | |
| def apply_treble_boost(audio, gain=10): | |
| return audio.high_pass_filter(4000).apply_gain(gain) | |
| # === Vocal Isolation Helpers === | |
| def load_track_local(path, sample_rate, channels=2): | |
| sig, rate = torchaudio.load(path) | |
| if rate != sample_rate: | |
| sig = torchaudio.functional.resample(sig, rate, sample_rate) | |
| if channels == 1: | |
| sig = sig.mean(0) | |
| return sig | |
| def save_track(path, wav, sample_rate): | |
| path = Path(path) | |
| torchaudio.save(str(path), wav, sample_rate) | |
| def apply_vocal_isolation(audio_path): | |
| model = pretrained.get_model(name='htdemucs') | |
| wav = load_track_local(audio_path, model.samplerate, channels=2) | |
| ref = wav.mean(0) | |
| wav -= ref[:, None] | |
| sources = apply_model(model, wav[None])[0] | |
| wav += ref[:, None] | |
| vocal_track = sources[3].cpu() # index 3 = vocals | |
| out_path = os.path.join(tempfile.gettempdir(), "vocals.wav") | |
| save_track(out_path, vocal_track, model.samplerate) | |
| return out_path | |
| # === Preset Loader === | |
| def load_presets(): | |
| try: | |
| preset_files = [f for f in os.listdir("presets") if f.endswith(".json")] | |
| presets = {} | |
| for f in preset_files: | |
| path = os.path.join("presets", f) | |
| try: | |
| with open(path, "r") as infile: | |
| data = json.load(infile) | |
| if "name" in data and "effects" in data: | |
| presets[data["name"]] = data["effects"] | |
| else: | |
| print(f"Invalid format in {f}") | |
| except json.JSONDecodeError: | |
| print(f"Failed to parse {f} — invalid JSON") | |
| return presets | |
| except FileNotFoundError: | |
| print("Presets folder not found") | |
| return {} | |
| # Always define preset_choices, even if empty | |
| preset_choices = load_presets() | |
| # Provide fallback if no presets found | |
| if not preset_choices: | |
| preset_choices = {"Default": []} | |
| # === Main Processing Function === | |
| def process_audio(audio_file, selected_effects, isolate_vocals, preset_name): | |
| audio = AudioSegment.from_file(audio_file) | |
| effect_map = { | |
| "Noise Reduction": apply_noise_reduction, | |
| "Compress Dynamic Range": apply_compression, | |
| "Add Reverb": apply_reverb, | |
| "Pitch Shift": lambda x: apply_pitch_shift(x), | |
| "Echo": apply_echo, | |
| "Stereo Widening": apply_stereo_widen, | |
| "Bass Boost": apply_bass_boost, | |
| "Treble Boost": apply_treble_boost, | |
| "Normalize": apply_normalize, | |
| } | |
| # Apply selected preset or custom effects | |
| effects_to_apply = preset_choices.get(preset_name, selected_effects) | |
| for effect_name in effects_to_apply: | |
| if effect_name in effect_map: | |
| audio = effect_map[effect_name](audio) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: | |
| if isolate_vocals: | |
| temp_input = os.path.join(tempfile.gettempdir(), "input.wav") | |
| audio.export(temp_input, format="wav") | |
| vocal_path = apply_vocal_isolation(temp_input) | |
| final_audio = AudioSegment.from_wav(vocal_path) | |
| else: | |
| final_audio = audio | |
| output_path = f.name | |
| final_audio.export(output_path, format="mp3") | |
| return output_path | |
| # === Gradio Interface === | |
| effect_options = [ | |
| "Noise Reduction", | |
| "Compress Dynamic Range", | |
| "Add Reverb", | |
| "Pitch Shift", | |
| "Echo", | |
| "Stereo Widening", | |
| "Bass Boost", | |
| "Treble Boost", | |
| "Normalize" | |
| ] | |
| preset_names = list(preset_choices.keys()) | |
| interface = gr.Interface( | |
| fn=process_audio, | |
| inputs=[ | |
| gr.Audio(label="Upload Audio", type="filepath"), | |
| gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"), | |
| gr.Checkbox(label="Isolate Vocals After Effects"), | |
| gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None) | |
| ], | |
| outputs=gr.Audio(label="Processed Audio (MP3)", type="filepath"), | |
| title="AI Audio Studio - Pro Edition", | |
| description="Apply multiple effects, isolate vocals, and export polished tracks -- all powered by AI!", | |
| allow_flagging="never" | |
| ) | |
| interface.launch() |