AudioMaster / app.py
tee342's picture
Update app.py
aa87123 verified
raw
history blame
12.7 kB
import gradio as gr
from pydub import AudioSegment
import numpy as np
import tempfile
import os
import noisereduce as nr
import json
import torch
from demucs import pretrained
from demucs.apply import apply_model
import torchaudio
from pathlib import Path
import matplotlib.pyplot as plt
from io import BytesIO
from PIL import Image
import zipfile
import datetime
import librosa
import librosa.display
# === Helper Functions ===
def audiosegment_to_array(audio):
return np.array(audio.get_array_of_samples()), audio.frame_rate
def array_to_audiosegment(samples, frame_rate, channels=1):
return AudioSegment(
samples.tobytes(),
frame_rate=frame_rate,
sample_width=samples.dtype.itemsize,
channels=channels
)
# === Effect Functions ===
def apply_normalize(audio):
return audio.normalize()
def apply_noise_reduction(audio):
samples, frame_rate = audiosegment_to_array(audio)
reduced = nr.reduce_noise(y=samples, sr=frame_rate)
return array_to_audiosegment(reduced, frame_rate, channels=audio.channels)
def apply_compression(audio):
return audio.compress_dynamic_range()
def apply_reverb(audio):
reverb = audio - 10
return audio.overlay(reverb, position=1000)
def apply_pitch_shift(audio, semitones=-2):
new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
samples = np.array(audio.get_array_of_samples())
resampled = np.interp(
np.arange(0, len(samples), 2 ** (semitones / 12)),
np.arange(len(samples)),
samples
).astype(np.int16)
return AudioSegment(
resampled.tobytes(),
frame_rate=new_frame_rate,
sample_width=audio.sample_width,
channels=audio.channels
)
def apply_echo(audio, delay_ms=500, decay=0.5):
echo = audio - 10
return audio.overlay(echo, position=delay_ms)
def apply_stereo_widen(audio, pan_amount=0.3):
left = audio.pan(-pan_amount)
right = audio.pan(pan_amount)
return AudioSegment.from_mono_audiosegments(left, right)
def apply_bass_boost(audio, gain=10):
return audio.low_pass_filter(100).apply_gain(gain)
def apply_treble_boost(audio, gain=10):
return audio.high_pass_filter(4000).apply_gain(gain)
# === Vocal Isolation Helpers ===
def load_track_local(path, sample_rate, channels=2):
sig, rate = torchaudio.load(path)
if rate != sample_rate:
sig = torchaudio.functional.resample(sig, rate, sample_rate)
if channels == 1:
sig = sig.mean(0)
return sig
def save_track(path, wav, sample_rate):
path = Path(path)
torchaudio.save(str(path), wav, sample_rate)
def apply_vocal_isolation(audio_path):
model = pretrained.get_model(name='htdemucs')
wav = load_track_local(audio_path, model.samplerate, channels=2)
ref = wav.mean(0)
wav -= ref[:, None]
sources = apply_model(model, wav[None])[0]
wav += ref[:, None]
vocal_track = sources[3].cpu()
out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
save_track(out_path, vocal_track, model.samplerate)
return out_path
# === Stem Splitting (Drums, Bass, Other, Vocals) ===
def stem_split(audio_path):
model = pretrained.get_model(name='htdemucs')
wav = load_track_local(audio_path, model.samplerate, channels=2)
sources = apply_model(model, wav[None])[0]
output_dir = tempfile.mkdtemp()
stem_paths = []
for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
path = os.path.join(output_dir, f"{name}.wav")
save_track(path, sources[i].cpu(), model.samplerate)
stem_paths.append(path)
return stem_paths
# === Preset Loader with Fallback ===
def load_presets():
try:
preset_files = [f for f in os.listdir("presets") if f.endswith(".json")]
presets = {}
for f in preset_files:
path = os.path.join("presets", f)
try:
with open(path, "r") as infile:
data = json.load(infile)
if "name" in data and "effects" in data:
presets[data["name"]] = data["effects"]
except json.JSONDecodeError:
print(f"Invalid JSON: {f}")
return presets
except FileNotFoundError:
print("Presets folder not found")
return {}
preset_choices = load_presets()
if not preset_choices:
preset_choices = {
"Default": [],
"Clean Podcast": ["Noise Reduction", "Normalize"],
"Music Remix": ["Bass Boost", "Stereo Widening"]
}
preset_names = list(preset_choices.keys())
# === Waveform + Spectrogram Generator ===
def show_waveform(audio_file):
audio = AudioSegment.from_file(audio_file)
samples = np.array(audio.get_array_of_samples())
plt.figure(figsize=(10, 2))
plt.plot(samples[:10000], color="blue")
plt.axis("off")
buf = BytesIO()
plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
plt.close()
buf.seek(0)
return Image.open(buf)
def show_spectrogram(audio_file):
y, sr = torchaudio.load(audio_file)
y_np = y.numpy().flatten()
S = librosa.feature.melspectrogram(y=y_np, sr=sr)
plt.figure(figsize=(10, 2))
librosa.display.specshow(librosa.power_to_db(S, ref=np.max), sr=sr, x_axis='time', y_axis='mel')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.tight_layout()
buf = BytesIO()
plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
plt.close()
buf.seek(0)
return Image.open(buf)
# === Session Info Export ===
def generate_session_log(audio_path, effects, isolate_vocals, export_format):
log = {
"timestamp": str(datetime.datetime.now()),
"filename": os.path.basename(audio_path),
"effects_applied": effects,
"isolate_vocals": isolate_vocals,
"export_format": export_format
}
return json.dumps(log, indent=2)
# === Main Processing Function ===
def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
audio = AudioSegment.from_file(audio_file)
effect_map = {
"Noise Reduction": apply_noise_reduction,
"Compress Dynamic Range": apply_compression,
"Add Reverb": apply_reverb,
"Pitch Shift": lambda x: apply_pitch_shift(x),
"Echo": apply_echo,
"Stereo Widening": apply_stereo_widen,
"Bass Boost": apply_bass_boost,
"Treble Boost": apply_treble_boost,
"Normalize": apply_normalize,
}
effects_to_apply = preset_choices.get(preset_name, selected_effects)
for effect_name in effects_to_apply:
if effect_name in effect_map:
audio = effect_map[effect_name](audio)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
if isolate_vocals:
temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
audio.export(temp_input, format="wav")
vocal_path = apply_vocal_isolation(temp_input)
final_audio = AudioSegment.from_wav(vocal_path)
else:
final_audio = audio
output_path = f.name
final_audio.export(output_path, format=export_format.lower())
waveform_image = show_waveform(output_path)
spectrogram_image = show_spectrogram(output_path)
session_log = generate_session_log(audio_file, effects_to_apply, isolate_vocals, export_format)
return output_path, waveform_image, spectrogram_image, session_log
# === Batch Processing Function ===
def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
output_dir = tempfile.mkdtemp()
results = []
session_logs = []
for file in files:
processed_path, _, _, log = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)
results.append(processed_path)
session_logs.append(log)
zip_path = os.path.join(output_dir, "batch_output.zip")
with zipfile.ZipFile(zip_path, 'w') as zipf:
for i, res in enumerate(results):
filename = f"processed_{i}.{export_format.lower()}"
zipf.write(res, filename)
zipf.writestr(f"session_info_{i}.json", session_logs[i])
return zip_path
# === Gradio Interface Setup ===
effect_options = [
"Noise Reduction",
"Compress Dynamic Range",
"Add Reverb",
"Pitch Shift",
"Echo",
"Stereo Widening",
"Bass Boost",
"Treble Boost",
"Normalize"
]
# === Multi-Tab UI ===
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
gr.Markdown("""
# 🎧 AI Audio Studio – Powered by Hugging Face & Demucs
Upload, edit, and export audio with AI-powered tools.
""")
# ----- Single File Studio Tab -----
with gr.Tab("🎡 Single File Studio"):
gr.Interface(
fn=process_audio,
inputs=[
gr.Audio(label="Upload Audio", type="filepath"),
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
gr.Checkbox(label="Isolate Vocals After Effects"),
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
],
outputs=[
gr.Audio(label="Processed Audio", type="filepath"),
gr.Image(label="Waveform Preview"),
gr.Image(label="Spectrogram View"),
gr.Textbox(label="Session Log (JSON)", lines=5)
],
title="Edit One File at a Time",
description="Apply effects, preview waveform and spectrogram, and get full session log.",
flagging_mode="never",
submit_btn="Process Audio",
clear_btn=None
)
# ----- Batch Processing Tab -----
with gr.Tab("πŸ”Š Batch Processing"):
gr.Interface(
fn=batch_process_audio,
inputs=[
gr.File(label="Upload Multiple Audio Files", file_count="multiple"),
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
gr.Checkbox(label="Isolate Vocals After Effects"),
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
],
outputs=gr.File(label="Download ZIP of All Processed Files"),
title="Batch Audio Processor",
description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
flagging_mode="never",
submit_btn="Process All Files",
clear_btn=None
)
# ----- Remix Mode Tab -----
with gr.Tab("πŸŽ› Remix Mode (Split Stems)"):
def remix_mode(audio_file):
stems = stem_split(audio_file.name)
return [gr.File(value=stem) for stem in stems]
gr.Interface(
fn=remix_mode,
inputs=gr.Audio(label="Upload Music Track", type="filepath"),
outputs=[
gr.File(label="Vocals"),
gr.File(label="Drums"),
gr.File(label="Bass"),
gr.File(label="Other")
],
title="Split Into Drums, Bass, Vocals, and More",
description="Use AI to separate musical elements like vocals, drums, and bass.",
flagging_mode="never",
clear_btn=None
)
# ----- Session Info Tab -----
with gr.Tab("πŸ“ Session Info"):
def get_session_info(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
return generate_session_log(audio_file, selected_effects, isolate_vocals, export_format)
gr.Interface(
fn=get_session_info,
inputs=[
gr.Audio(label="Upload Audio", type="filepath"),
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
gr.Checkbox(label="Isolate Vocals After Effects"),
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
],
outputs=gr.Textbox(label="Your Session Info (Copy or Save This)", lines=10),
title="Save Your Session Settings",
description="Get a full log of what was done to your track.",
flagging_mode="never",
clear_btn=None
)
demo.launch()