AudioMaster / app.py
tee342's picture
Update app.py
0663ef0 verified
raw
history blame
6.14 kB
import gradio as gr
from pydub import AudioSegment
import numpy as np
import tempfile
import os
import noisereduce as nr
import json
import torch
from demucs import pretrained
from demucs.apply import apply_model
import torchaudio
from pathlib import Path
# === Helper Functions ===
def audiosegment_to_array(audio):
return np.array(audio.get_array_of_samples()), audio.frame_rate
def array_to_audiosegment(samples, frame_rate, channels=1):
return AudioSegment(
samples.tobytes(),
frame_rate=frame_rate,
sample_width=samples.dtype.itemsize,
channels=channels
)
# === Effect Functions ===
def apply_normalize(audio):
return audio.normalize()
def apply_noise_reduction(audio):
samples, frame_rate = audiosegment_to_array(audio)
reduced = nr.reduce_noise(y=samples, sr=frame_rate)
return array_to_audiosegment(reduced, frame_rate, channels=audio.channels)
def apply_compression(audio):
return audio.compress_dynamic_range()
def apply_reverb(audio):
reverb = audio - 10
return audio.overlay(reverb, position=1000)
def apply_pitch_shift(audio, semitones=-2):
new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
samples = np.array(audio.get_array_of_samples())
resampled = np.interp(
np.arange(0, len(samples), 2 ** (semitones / 12)),
np.arange(len(samples)),
samples
).astype(np.int16)
return AudioSegment(
resampled.tobytes(),
frame_rate=new_frame_rate,
sample_width=audio.sample_width,
channels=audio.channels
)
def apply_echo(audio, delay_ms=500, decay=0.5):
echo = audio - 10
return audio.overlay(echo, position=delay_ms)
def apply_stereo_widen(audio, pan_amount=0.3):
left = audio.pan(-pan_amount)
right = audio.pan(pan_amount)
return AudioSegment.from_mono_audiosegments(left, right)
def apply_bass_boost(audio, gain=10):
return audio.low_pass_filter(100).apply_gain(gain)
def apply_treble_boost(audio, gain=10):
return audio.high_pass_filter(4000).apply_gain(gain)
# === Vocal Isolation Helpers ===
def load_track_local(path, sample_rate, channels=2):
sig, rate = torchaudio.load(path)
if rate != sample_rate:
sig = torchaudio.functional.resample(sig, rate, sample_rate)
if channels == 1:
sig = sig.mean(0)
return sig
def save_track(path, wav, sample_rate):
path = Path(path)
torchaudio.save(str(path), wav, sample_rate)
def apply_vocal_isolation(audio_path):
model = pretrained.get_model(name='htdemucs')
wav = load_track_local(audio_path, model.samplerate, channels=2)
ref = wav.mean(0)
wav -= ref[:, None]
sources = apply_model(model, wav[None])[0]
wav += ref[:, None]
vocal_track = sources[3].cpu() # index 3 = vocals
out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
save_track(out_path, vocal_track, model.samplerate)
return out_path
# === Preset Loader ===
def load_presets():
try:
preset_files = [f for f in os.listdir("presets") if f.endswith(".json")]
presets = {}
for f in preset_files:
path = os.path.join("presets", f)
try:
with open(path, "r") as infile:
data = json.load(infile)
if "name" in data and "effects" in data:
presets[data["name"]] = data["effects"]
else:
print(f"Invalid format in {f}")
except json.JSONDecodeError:
print(f"Failed to parse {f} — invalid JSON")
return presets
except FileNotFoundError:
print("Presets folder not found")
return {}
# Always define preset_choices, even if empty
preset_choices = load_presets()
# Provide fallback if no presets found
if not preset_choices:
preset_choices = {"Default": []}
# === Main Processing Function ===
def process_audio(audio_file, selected_effects, isolate_vocals, preset_name):
audio = AudioSegment.from_file(audio_file)
effect_map = {
"Noise Reduction": apply_noise_reduction,
"Compress Dynamic Range": apply_compression,
"Add Reverb": apply_reverb,
"Pitch Shift": lambda x: apply_pitch_shift(x),
"Echo": apply_echo,
"Stereo Widening": apply_stereo_widen,
"Bass Boost": apply_bass_boost,
"Treble Boost": apply_treble_boost,
"Normalize": apply_normalize,
}
# Apply selected preset or custom effects
effects_to_apply = preset_choices.get(preset_name, selected_effects)
for effect_name in effects_to_apply:
if effect_name in effect_map:
audio = effect_map[effect_name](audio)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
if isolate_vocals:
temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
audio.export(temp_input, format="wav")
vocal_path = apply_vocal_isolation(temp_input)
final_audio = AudioSegment.from_wav(vocal_path)
else:
final_audio = audio
output_path = f.name
final_audio.export(output_path, format="mp3")
return output_path
# === Gradio Interface ===
effect_options = [
"Noise Reduction",
"Compress Dynamic Range",
"Add Reverb",
"Pitch Shift",
"Echo",
"Stereo Widening",
"Bass Boost",
"Treble Boost",
"Normalize"
]
preset_names = list(preset_choices.keys())
interface = gr.Interface(
fn=process_audio,
inputs=[
gr.Audio(label="Upload Audio", type="filepath"),
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
gr.Checkbox(label="Isolate Vocals After Effects"),
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None)
],
outputs=gr.Audio(label="Processed Audio (MP3)", type="filepath"),
title="AI Audio Studio - Pro Edition",
description="Apply multiple effects, isolate vocals, and export polished tracks -- all powered by AI!",
allow_flagging="never"
)
interface.launch()