Spaces:

tee342
/

AudioMaster

Sleeping

App Files Files Community

AudioMaster / app.py

tee342

Update app.py

3af2469 verified 4 months ago

raw

history blame

11.7 kB

	import gradio as gr
	from pydub import AudioSegment
	import numpy as np
	import tempfile
	import os
	import noisereduce as nr
	import json
	import torch
	from demucs import pretrained
	from demucs.apply import apply_model
	import torchaudio
	from pathlib import Path
	import matplotlib.pyplot as plt
	from io import BytesIO
	from PIL import Image
	import zipfile
	import datetime

	# === Helper Functions ===
	def audiosegment_to_array(audio):
	return np.array(audio.get_array_of_samples()), audio.frame_rate

	def array_to_audiosegment(samples, frame_rate, channels=1):
	return AudioSegment(
	samples.tobytes(),
	frame_rate=frame_rate,
	sample_width=samples.dtype.itemsize,
	channels=channels
	)

	# === Effect Functions ===
	def apply_normalize(audio):
	return audio.normalize()

	def apply_noise_reduction(audio):
	samples, frame_rate = audiosegment_to_array(audio)
	reduced = nr.reduce_noise(y=samples, sr=frame_rate)
	return array_to_audiosegment(reduced, frame_rate, channels=audio.channels)

	def apply_compression(audio):
	return audio.compress_dynamic_range()

	def apply_reverb(audio):
	reverb = audio - 10
	return audio.overlay(reverb, position=1000)

	def apply_pitch_shift(audio, semitones=-2):
	new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
	samples = np.array(audio.get_array_of_samples())
	resampled = np.interp(
	np.arange(0, len(samples), 2 ** (semitones / 12)),
	np.arange(len(samples)),
	samples
	).astype(np.int16)
	return AudioSegment(
	resampled.tobytes(),
	frame_rate=new_frame_rate,
	sample_width=audio.sample_width,
	channels=audio.channels
	)

	def apply_echo(audio, delay_ms=500, decay=0.5):
	echo = audio - 10
	return audio.overlay(echo, position=delay_ms)

	def apply_stereo_widen(audio, pan_amount=0.3):
	left = audio.pan(-pan_amount)
	right = audio.pan(pan_amount)
	return AudioSegment.from_mono_audiosegments(left, right)

	def apply_bass_boost(audio, gain=10):
	return audio.low_pass_filter(100).apply_gain(gain)

	def apply_treble_boost(audio, gain=10):
	return audio.high_pass_filter(4000).apply_gain(gain)

	# === Vocal Isolation Helpers ===
	def load_track_local(path, sample_rate, channels=2):
	sig, rate = torchaudio.load(path)
	if rate != sample_rate:
	sig = torchaudio.functional.resample(sig, rate, sample_rate)
	if channels == 1:
	sig = sig.mean(0)
	return sig

	def save_track(path, wav, sample_rate):
	path = Path(path)
	torchaudio.save(str(path), wav, sample_rate)

	def apply_vocal_isolation(audio_path):
	model = pretrained.get_model(name='htdemucs')
	wav = load_track_local(audio_path, model.samplerate, channels=2)
	ref = wav.mean(0)
	wav -= ref[:, None]
	sources = apply_model(model, wav[None])[0]
	wav += ref[:, None]

	vocal_track = sources[3].cpu() # index 3 = vocals
	out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
	save_track(out_path, vocal_track, model.samplerate)
	return out_path

	# === Stem Splitting (Drums, Bass, Other, Vocals) ===
	def stem_split(audio_path):
	model = pretrained.get_model(name='htdemucs')
	wav = load_track_local(audio_path, model.samplerate, channels=2)
	sources = apply_model(model, wav[None])[0]

	output_dir = tempfile.mkdtemp()
	stem_paths = []

	for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
	path = os.path.join(output_dir, f"{name}.wav")
	save_track(path, sources[i].cpu(), model.samplerate)
	stem_paths.append((path, name))

	return stem_paths

	# === Preset Loader with Fallback ===
	def load_presets():
	try:
	preset_files = [f for f in os.listdir("presets") if f.endswith(".json")]
	presets = {}
	for f in preset_files:
	path = os.path.join("presets", f)
	try:
	with open(path, "r") as infile:
	data = json.load(infile)
	if "name" in data and "effects" in data:
	presets[data["name"]] = data["effects"]
	except json.JSONDecodeError:
	print(f"Invalid JSON: {f}")
	return presets
	except FileNotFoundError:
	print("Presets folder not found")
	return {}

	preset_choices = load_presets()

	if not preset_choices:
	preset_choices = {
	"Default": [],
	"Clean Podcast": ["Noise Reduction", "Normalize"],
	"Music Remix": ["Bass Boost", "Stereo Widening"]
	}

	preset_names = list(preset_choices.keys())

	# === Waveform Generator ===
	def show_waveform(audio_file):
	try:
	audio = AudioSegment.from_file(audio_file)
	samples = np.array(audio.get_array_of_samples())
	plt.figure(figsize=(10, 2))
	plt.plot(samples[:10000], color="blue")
	plt.axis("off")
	buf = BytesIO()
	plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
	plt.close()
	buf.seek(0)
	return Image.open(buf)
	except Exception as e:
	return None

	# === Session Info Export ===
	def generate_session_log(audio_path, effects, isolate_vocals, export_format):
	log = {
	"timestamp": str(datetime.datetime.now()),
	"filename": os.path.basename(audio_path),
	"effects_applied": effects,
	"isolate_vocals": isolate_vocals,
	"export_format": export_format
	}
	return json.dumps(log, indent=2)

	# === Main Processing Function ===
	def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
	audio = AudioSegment.from_file(audio_file)

	effect_map = {
	"Noise Reduction": apply_noise_reduction,
	"Compress Dynamic Range": apply_compression,
	"Add Reverb": apply_reverb,
	"Pitch Shift": lambda x: apply_pitch_shift(x),
	"Echo": apply_echo,
	"Stereo Widening": apply_stereo_widen,
	"Bass Boost": apply_bass_boost,
	"Treble Boost": apply_treble_boost,
	"Normalize": apply_normalize,
	}

	effects_to_apply = preset_choices.get(preset_name, selected_effects)
	for effect_name in effects_to_apply:
	if effect_name in effect_map:
	audio = effect_map[effect_name](audio)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
	if isolate_vocals:
	temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
	audio.export(temp_input, format="wav")
	vocal_path = apply_vocal_isolation(temp_input)
	final_audio = AudioSegment.from_wav(vocal_path)
	else:
	final_audio = audio

	output_path = f.name
	final_audio.export(output_path, format=export_format.lower())

	waveform_image = show_waveform(output_path)
	session_log = generate_session_log(audio_file, effects_to_apply, isolate_vocals, export_format)

	return output_path, waveform_image, session_log

	# === Batch Processing Function ===
	def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
	output_dir = tempfile.mkdtemp()
	results = []
	session_logs = []

	for file in files:
	processed_path, _, log = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)
	results.append(processed_path)
	session_logs.append(log)

	zip_path = os.path.join(output_dir, "batch_output.zip")
	with zipfile.ZipFile(zip_path, 'w') as zipf:
	for i, res in enumerate(results):
	filename = f"processed_{i}.{export_format.lower()}"
	zipf.write(res, filename)
	zipf.writestr(f"session_info_{i}.json", session_logs[i])

	return zip_path

	# === Custom Preset Upload Handler ===
	def upload_preset(preset_file):
	try:
	with open(preset_file.name, "r") as f:
	data = json.load(f)
	if "name" in data and "effects" in data:
	preset_choices[data["name"]] = data["effects"]
	return f"✅ Loaded custom preset: {data['name']}"
	else:
	return "❌ Invalid preset file"
	except Exception as e:
	return f"⚠️ Error loading preset: {str(e)}"

	# === Gradio Interface ===
	effect_options = [
	"Noise Reduction",
	"Compress Dynamic Range",
	"Add Reverb",
	"Pitch Shift",
	"Echo",
	"Stereo Widening",
	"Bass Boost",
	"Treble Boost",
	"Normalize"
	]

	# === Multi-Tab UI ===
	with gr.Blocks(title="AI Audio Studio") as demo:
	gr.Markdown("## 🎧 AI Audio Studio\nUpload, edit, export — all powered by AI")

	# ----- Single File Studio Tab -----
	with gr.Tab("🎵 Single File Studio"):
	gr.Interface(
	fn=process_audio,
	inputs=[
	gr.Audio(label="Upload Audio", type="filepath"),
	gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
	gr.Checkbox(label="Isolate Vocals After Effects"),
	gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
	gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
	],
	outputs=[
	gr.Audio(label="Processed Audio", type="filepath"),
	gr.Image(label="Waveform Preview"),
	gr.Textbox(label="Session Log (JSON)", lines=5)
	],
	title="Edit One File at a Time",
	description="Apply effects, preview waveform, and export as MP3 or WAV",
	allow_flagging="never"
	)

	# ----- Batch Processing Tab -----
	with gr.Tab("🔊 Batch Processing"):
	gr.Interface(
	fn=batch_process_audio,
	inputs=[
	gr.File(label="Upload Multiple Audio Files", file_count="multiple"),
	gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
	gr.Checkbox(label="Isolate Vocals After Effects"),
	gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
	gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
	],
	outputs=gr.File(label="Download ZIP of All Processed Files"),
	title="Batch Audio Processor",
	description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
	allow_flagging="never",
	submit_btn="Process All Files",
	clear_btn=False
	)

	# ----- Remix Mode Tab -----
	with gr.Tab("🎛 Remix Mode (Split Stems)"):
	def remix_mode(audio_file):
	stem_paths = stem_split(audio_file.name)
	return [path for path, _ in stem_paths], \
	[name for _, name in stem_paths]

	gr.Interface(
	fn=remix_mode,
	inputs=gr.Audio(label="Upload Music Track", type="filepath"),
	outputs=[
	gr.File(label="Stem Files (Vocals, Drums, etc.)"),
	gr.Textbox(label="Stem Names")
	],
	title="Split Into Drums, Bass, Vocals",
	description="Use AI to separate musical elements like vocals, drums, and bass."
	)

	# ----- Preset Manager Tab -----
	with gr.Tab("⚙️ Preset Manager"):
	gr.Interface(
	fn=upload_preset,
	inputs=gr.File(label="Upload Your Own Preset (.json)"),
	outputs=gr.Textbox(label="Preset Status"),
	title="Load Custom Presets",
	description="Upload your own `.json` preset to customize effect chains."
	)

	demo.launch()