Spaces:

tee342
/

AudioMaster

Sleeping

File size: 5,557 Bytes

import gradio as gr
from pydub import AudioSegment
import numpy as np
import tempfile
import os
import noisereduce as nr
import json
import torch
from demucs import pretrained
from demucs.apply import apply_model
import torchaudio
from pathlib import Path

# === Helper Functions ===
def audiosegment_to_array(audio):
    return np.array(audio.get_array_of_samples()), audio.frame_rate

def array_to_audiosegment(samples, frame_rate, channels=1):
    return AudioSegment(
        samples.tobytes(),
        frame_rate=frame_rate,
        sample_width=samples.dtype.itemsize,
        channels=channels
    )

# === Effect Functions ===
def apply_normalize(audio):
    return audio.normalize()

def apply_noise_reduction(audio):
    samples, frame_rate = audiosegment_to_array(audio)
    reduced = nr.reduce_noise(y=samples, sr=frame_rate)
    return array_to_audiosegment(reduced, frame_rate, channels=audio.channels)

def apply_compression(audio):
    return audio.compress_dynamic_range()

def apply_reverb(audio):
    reverb = audio - 10
    return audio.overlay(reverb, position=1000)

def apply_pitch_shift(audio, semitones=-2):
    new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
    samples = np.array(audio.get_array_of_samples())
    resampled = np.interp(
        np.arange(0, len(samples), 2 ** (semitones / 12)),
        np.arange(len(samples)),
        samples
    ).astype(np.int16)
    return AudioSegment(
        resampled.tobytes(),
        frame_rate=new_frame_rate,
        sample_width=audio.sample_width,
        channels=audio.channels
    )

def apply_echo(audio, delay_ms=500, decay=0.5):
    echo = audio - 10
    return audio.overlay(echo, position=delay_ms)

def apply_stereo_widen(audio, pan_amount=0.3):
    left = audio.pan(-pan_amount)
    right = audio.pan(pan_amount)
    return AudioSegment.from_mono_audiosegments(left, right)

def apply_bass_boost(audio, gain=10):
    return audio.low_pass_filter(100).apply_gain(gain)

def apply_treble_boost(audio, gain=10):
    return audio.high_pass_filter(4000).apply_gain(gain)

# === Vocal Isolation Helpers ===
def load_track_local(path, sample_rate, channels=2):
    sig, rate = torchaudio.load(path)
    if rate != sample_rate:
        sig = torchaudio.functional.resample(sig, rate, sample_rate)
    if channels == 1:
        sig = sig.mean(0)
    return sig

def save_track(path, wav, sample_rate):
    path = Path(path)
    torchaudio.save(str(path), wav, sample_rate)

def apply_vocal_isolation(audio_path):
    model = pretrained.get_model(name='htdemucs')
    wav = load_track_local(audio_path, model.samplerate, channels=2)
    ref = wav.mean(0)
    wav -= ref[:, None]
    sources = apply_model(model, wav[None])[0]
    wav += ref[:, None]

    vocal_track = sources[3].cpu()  # index 3 = vocals
    out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
    save_track(out_path, vocal_track, model.samplerate)
    return out_path

# === Preset Loader ===
def load_presets():
    preset_files = [f for f in os.listdir("presets") if f.endswith(".json")]
    presets = {}
    for f in preset_files:
        with open(os.path.join("presets", f)) as infile:
            data = json.load(infile)
            presets[data["name"]] = data["effects"]
    return presets

preset_choices = load_presets()

# === Main Processing Function ===
def process_audio(audio_file, selected_effects, isolate_vocals, preset_name):
    audio = AudioSegment.from_file(audio_file)

    effect_map = {
        "Noise Reduction": apply_noise_reduction,
        "Compress Dynamic Range": apply_compression,
        "Add Reverb": apply_reverb,
        "Pitch Shift": lambda x: apply_pitch_shift(x),
        "Echo": apply_echo,
        "Stereo Widening": apply_stereo_widen,
        "Bass Boost": apply_bass_boost,
        "Treble Boost": apply_treble_boost,
        "Normalize": apply_normalize,
    }

    # Apply selected preset or custom effects
    effects_to_apply = preset_choices.get(preset_name, selected_effects)
    for effect_name in effects_to_apply:
        if effect_name in effect_map:
            audio = effect_map[effect_name](audio)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
        if isolate_vocals:
            temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
            audio.export(temp_input, format="wav")
            vocal_path = apply_vocal_isolation(temp_input)
            final_audio = AudioSegment.from_wav(vocal_path)
        else:
            final_audio = audio

        output_path = f.name
        final_audio.export(output_path, format="mp3")
        return output_path

# === Gradio Interface ===
effect_options = [
    "Noise Reduction",
    "Compress Dynamic Range",
    "Add Reverb",
    "Pitch Shift",
    "Echo",
    "Stereo Widening",
    "Bass Boost",
    "Treble Boost",
    "Normalize"
]

preset_names = list(preset_choices.keys())

interface = gr.Interface(
    fn=process_audio,
    inputs=[
        gr.Audio(label="Upload Audio", type="filepath"),
        gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
        gr.Checkbox(label="Isolate Vocals After Effects"),
        gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None)
    ],
    outputs=gr.Audio(label="Processed Audio (MP3)", type="filepath"),
    title="AI Audio Studio - Pro Edition",
    description="Apply multiple effects, isolate vocals, and export polished tracks -- all powered by AI!",
    allow_flagging="never"
)

interface.launch()