File size: 5,557 Bytes
f6e2a5f
8aee03f
a82e0c6
8aee03f
a82e0c6
9301734
e4398dd
4e02325
 
 
45077a2
 
9301734
e4398dd
9301734
 
c108159
4e02325
9301734
 
 
4e02325
9301734
 
a82e0c6
e4398dd
a82e0c6
 
 
 
 
9301734
 
a82e0c6
 
 
 
 
9301734
 
a82e0c6
 
9301734
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4398dd
45077a2
 
 
 
 
 
 
 
 
 
 
e4398dd
 
 
 
 
 
 
 
 
 
 
 
4e02325
 
e4398dd
 
 
 
 
 
 
 
 
 
 
 
 
 
a82e0c6
4e02325
 
 
 
 
 
 
 
 
 
 
 
 
e4398dd
 
 
4e02325
 
a82e0c6
 
4e02325
 
 
 
 
 
 
 
e4398dd
 
 
a82e0c6
e4398dd
 
4e02325
 
 
 
 
 
 
 
 
 
 
e4398dd
 
b4d5bce
a82e0c6
 
 
e4398dd
 
 
a82e0c6
e4398dd
 
 
4e02325
8aee03f
 
b4d5bce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import gradio as gr
from pydub import AudioSegment
import numpy as np
import tempfile
import os
import noisereduce as nr
import json
import torch
from demucs import pretrained
from demucs.apply import apply_model
import torchaudio
from pathlib import Path

# === Helper Functions ===
def audiosegment_to_array(audio):
    return np.array(audio.get_array_of_samples()), audio.frame_rate

def array_to_audiosegment(samples, frame_rate, channels=1):
    return AudioSegment(
        samples.tobytes(),
        frame_rate=frame_rate,
        sample_width=samples.dtype.itemsize,
        channels=channels
    )

# === Effect Functions ===
def apply_normalize(audio):
    return audio.normalize()

def apply_noise_reduction(audio):
    samples, frame_rate = audiosegment_to_array(audio)
    reduced = nr.reduce_noise(y=samples, sr=frame_rate)
    return array_to_audiosegment(reduced, frame_rate, channels=audio.channels)

def apply_compression(audio):
    return audio.compress_dynamic_range()

def apply_reverb(audio):
    reverb = audio - 10
    return audio.overlay(reverb, position=1000)

def apply_pitch_shift(audio, semitones=-2):
    new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
    samples = np.array(audio.get_array_of_samples())
    resampled = np.interp(
        np.arange(0, len(samples), 2 ** (semitones / 12)),
        np.arange(len(samples)),
        samples
    ).astype(np.int16)
    return AudioSegment(
        resampled.tobytes(),
        frame_rate=new_frame_rate,
        sample_width=audio.sample_width,
        channels=audio.channels
    )

def apply_echo(audio, delay_ms=500, decay=0.5):
    echo = audio - 10
    return audio.overlay(echo, position=delay_ms)

def apply_stereo_widen(audio, pan_amount=0.3):
    left = audio.pan(-pan_amount)
    right = audio.pan(pan_amount)
    return AudioSegment.from_mono_audiosegments(left, right)

def apply_bass_boost(audio, gain=10):
    return audio.low_pass_filter(100).apply_gain(gain)

def apply_treble_boost(audio, gain=10):
    return audio.high_pass_filter(4000).apply_gain(gain)

# === Vocal Isolation Helpers ===
def load_track_local(path, sample_rate, channels=2):
    sig, rate = torchaudio.load(path)
    if rate != sample_rate:
        sig = torchaudio.functional.resample(sig, rate, sample_rate)
    if channels == 1:
        sig = sig.mean(0)
    return sig

def save_track(path, wav, sample_rate):
    path = Path(path)
    torchaudio.save(str(path), wav, sample_rate)

def apply_vocal_isolation(audio_path):
    model = pretrained.get_model(name='htdemucs')
    wav = load_track_local(audio_path, model.samplerate, channels=2)
    ref = wav.mean(0)
    wav -= ref[:, None]
    sources = apply_model(model, wav[None])[0]
    wav += ref[:, None]

    vocal_track = sources[3].cpu()  # index 3 = vocals
    out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
    save_track(out_path, vocal_track, model.samplerate)
    return out_path

# === Preset Loader ===
def load_presets():
    preset_files = [f for f in os.listdir("presets") if f.endswith(".json")]
    presets = {}
    for f in preset_files:
        with open(os.path.join("presets", f)) as infile:
            data = json.load(infile)
            presets[data["name"]] = data["effects"]
    return presets

preset_choices = load_presets()

# === Main Processing Function ===
def process_audio(audio_file, selected_effects, isolate_vocals, preset_name):
    audio = AudioSegment.from_file(audio_file)

    effect_map = {
        "Noise Reduction": apply_noise_reduction,
        "Compress Dynamic Range": apply_compression,
        "Add Reverb": apply_reverb,
        "Pitch Shift": lambda x: apply_pitch_shift(x),
        "Echo": apply_echo,
        "Stereo Widening": apply_stereo_widen,
        "Bass Boost": apply_bass_boost,
        "Treble Boost": apply_treble_boost,
        "Normalize": apply_normalize,
    }

    # Apply selected preset or custom effects
    effects_to_apply = preset_choices.get(preset_name, selected_effects)
    for effect_name in effects_to_apply:
        if effect_name in effect_map:
            audio = effect_map[effect_name](audio)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
        if isolate_vocals:
            temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
            audio.export(temp_input, format="wav")
            vocal_path = apply_vocal_isolation(temp_input)
            final_audio = AudioSegment.from_wav(vocal_path)
        else:
            final_audio = audio

        output_path = f.name
        final_audio.export(output_path, format="mp3")
        return output_path

# === Gradio Interface ===
effect_options = [
    "Noise Reduction",
    "Compress Dynamic Range",
    "Add Reverb",
    "Pitch Shift",
    "Echo",
    "Stereo Widening",
    "Bass Boost",
    "Treble Boost",
    "Normalize"
]

preset_names = list(preset_choices.keys())

interface = gr.Interface(
    fn=process_audio,
    inputs=[
        gr.Audio(label="Upload Audio", type="filepath"),
        gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
        gr.Checkbox(label="Isolate Vocals After Effects"),
        gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None)
    ],
    outputs=gr.Audio(label="Processed Audio (MP3)", type="filepath"),
    title="AI Audio Studio - Pro Edition",
    description="Apply multiple effects, isolate vocals, and export polished tracks -- all powered by AI!",
    allow_flagging="never"
)

interface.launch()