import gradio as gr from pydub import AudioSegment import numpy as np import tempfile import os import noisereduce as nr import json import torch from demucs import pretrained from demucs.apply import apply_model import torchaudio from pathlib import Path import matplotlib.pyplot as plt from io import BytesIO from PIL import Image import zipfile import datetime # === Helper Functions === def audiosegment_to_array(audio): return np.array(audio.get_array_of_samples()), audio.frame_rate def array_to_audiosegment(samples, frame_rate, channels=1): return AudioSegment( samples.tobytes(), frame_rate=frame_rate, sample_width=samples.dtype.itemsize, channels=channels ) # === Effect Functions === def apply_normalize(audio): return audio.normalize() def apply_noise_reduction(audio): samples, frame_rate = audiosegment_to_array(audio) reduced = nr.reduce_noise(y=samples, sr=frame_rate) return array_to_audiosegment(reduced, frame_rate, channels=audio.channels) def apply_compression(audio): return audio.compress_dynamic_range() def apply_reverb(audio): reverb = audio - 10 return audio.overlay(reverb, position=1000) def apply_pitch_shift(audio, semitones=-2): new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12))) samples = np.array(audio.get_array_of_samples()) resampled = np.interp( np.arange(0, len(samples), 2 ** (semitones / 12)), np.arange(len(samples)), samples ).astype(np.int16) return AudioSegment( resampled.tobytes(), frame_rate=new_frame_rate, sample_width=audio.sample_width, channels=audio.channels ) def apply_echo(audio, delay_ms=500, decay=0.5): echo = audio - 10 return audio.overlay(echo, position=delay_ms) def apply_stereo_widen(audio, pan_amount=0.3): left = audio.pan(-pan_amount) right = audio.pan(pan_amount) return AudioSegment.from_mono_audiosegments(left, right) def apply_bass_boost(audio, gain=10): return audio.low_pass_filter(100).apply_gain(gain) def apply_treble_boost(audio, gain=10): return audio.high_pass_filter(4000).apply_gain(gain) # === Vocal Isolation Helpers === def load_track_local(path, sample_rate, channels=2): sig, rate = torchaudio.load(path) if rate != sample_rate: sig = torchaudio.functional.resample(sig, rate, sample_rate) if channels == 1: sig = sig.mean(0) return sig def save_track(path, wav, sample_rate): path = Path(path) torchaudio.save(str(path), wav, sample_rate) def apply_vocal_isolation(audio_path): model = pretrained.get_model(name='htdemucs') wav = load_track_local(audio_path, model.samplerate, channels=2) ref = wav.mean(0) wav -= ref[:, None] sources = apply_model(model, wav[None])[0] wav += ref[:, None] vocal_track = sources[3].cpu() # index 3 = vocals out_path = os.path.join(tempfile.gettempdir(), "vocals.wav") save_track(out_path, vocal_track, model.samplerate) return out_path # === Stem Splitting (Drums, Bass, Other, Vocals) === def stem_split(audio_path): model = pretrained.get_model(name='htdemucs') wav = load_track_local(audio_path, model.samplerate, channels=2) sources = apply_model(model, wav[None])[0] output_dir = tempfile.mkdtemp() stem_paths = [] for i, name in enumerate(['drums', 'bass', 'other', 'vocals']): path = os.path.join(output_dir, f"{name}.wav") save_track(path, sources[i].cpu(), model.samplerate) stem_paths.append((path, name)) return stem_paths # === Preset Loader with Fallback === def load_presets(): try: preset_files = [f for f in os.listdir("presets") if f.endswith(".json")] presets = {} for f in preset_files: path = os.path.join("presets", f) try: with open(path, "r") as infile: data = json.load(infile) if "name" in data and "effects" in data: presets[data["name"]] = data["effects"] except json.JSONDecodeError: print(f"Invalid JSON: {f}") return presets except FileNotFoundError: print("Presets folder not found") return {} preset_choices = load_presets() if not preset_choices: preset_choices = { "Default": [], "Clean Podcast": ["Noise Reduction", "Normalize"], "Music Remix": ["Bass Boost", "Stereo Widening"] } preset_names = list(preset_choices.keys()) # === Waveform Generator === def show_waveform(audio_file): try: audio = AudioSegment.from_file(audio_file) samples = np.array(audio.get_array_of_samples()) plt.figure(figsize=(10, 2)) plt.plot(samples[:10000], color="blue") plt.axis("off") buf = BytesIO() plt.savefig(buf, format="png", bbox_inches="tight", dpi=100) plt.close() buf.seek(0) return Image.open(buf) except Exception as e: return None # === Session Info Export === def generate_session_log(audio_path, effects, isolate_vocals, export_format): log = { "timestamp": str(datetime.datetime.now()), "filename": os.path.basename(audio_path), "effects_applied": effects, "isolate_vocals": isolate_vocals, "export_format": export_format } return json.dumps(log, indent=2) # === Main Processing Function === def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format): audio = AudioSegment.from_file(audio_file) effect_map = { "Noise Reduction": apply_noise_reduction, "Compress Dynamic Range": apply_compression, "Add Reverb": apply_reverb, "Pitch Shift": lambda x: apply_pitch_shift(x), "Echo": apply_echo, "Stereo Widening": apply_stereo_widen, "Bass Boost": apply_bass_boost, "Treble Boost": apply_treble_boost, "Normalize": apply_normalize, } effects_to_apply = preset_choices.get(preset_name, selected_effects) for effect_name in effects_to_apply: if effect_name in effect_map: audio = effect_map[effect_name](audio) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: if isolate_vocals: temp_input = os.path.join(tempfile.gettempdir(), "input.wav") audio.export(temp_input, format="wav") vocal_path = apply_vocal_isolation(temp_input) final_audio = AudioSegment.from_wav(vocal_path) else: final_audio = audio output_path = f.name final_audio.export(output_path, format=export_format.lower()) waveform_image = show_waveform(output_path) session_log = generate_session_log(audio_file, effects_to_apply, isolate_vocals, export_format) return output_path, waveform_image, session_log # === Batch Processing Function === def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format): output_dir = tempfile.mkdtemp() results = [] session_logs = [] for file in files: processed_path, _, log = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format) results.append(processed_path) session_logs.append(log) zip_path = os.path.join(output_dir, "batch_output.zip") with zipfile.ZipFile(zip_path, 'w') as zipf: for i, res in enumerate(results): filename = f"processed_{i}.{export_format.lower()}" zipf.write(res, filename) zipf.writestr(f"session_info_{i}.json", session_logs[i]) return zip_path # === Custom Preset Upload Handler === def upload_preset(preset_file): try: with open(preset_file.name, "r") as f: data = json.load(f) if "name" in data and "effects" in data: preset_choices[data["name"]] = data["effects"] return f"✅ Loaded custom preset: {data['name']}" else: return "❌ Invalid preset file" except Exception as e: return f"⚠️ Error loading preset: {str(e)}" # === Gradio Interface === effect_options = [ "Noise Reduction", "Compress Dynamic Range", "Add Reverb", "Pitch Shift", "Echo", "Stereo Widening", "Bass Boost", "Treble Boost", "Normalize" ] # === Multi-Tab UI === with gr.Blocks(title="AI Audio Studio") as demo: gr.Markdown("## 🎧 AI Audio Studio\nUpload, edit, export — all powered by AI") # ----- Single File Studio Tab ----- with gr.Tab("🎵 Single File Studio"): gr.Interface( fn=process_audio, inputs=[ gr.Audio(label="Upload Audio", type="filepath"), gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"), gr.Checkbox(label="Isolate Vocals After Effects"), gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None), gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3") ], outputs=[ gr.Audio(label="Processed Audio", type="filepath"), gr.Image(label="Waveform Preview"), gr.Textbox(label="Session Log (JSON)", lines=5) ], title="Edit One File at a Time", description="Apply effects, preview waveform, and export as MP3 or WAV", allow_flagging="never" ) # ----- Batch Processing Tab ----- with gr.Tab("🔊 Batch Processing"): gr.Interface( fn=batch_process_audio, inputs=[ gr.File(label="Upload Multiple Audio Files", file_count="multiple"), gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"), gr.Checkbox(label="Isolate Vocals After Effects"), gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None), gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3") ], outputs=gr.File(label="Download ZIP of All Processed Files"), title="Batch Audio Processor", description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.", allow_flagging="never", submit_btn="Process All Files", clear_btn=False ) # ----- Remix Mode Tab ----- with gr.Tab("🎛 Remix Mode (Split Stems)"): def remix_mode(audio_file): stem_paths = stem_split(audio_file.name) return [path for path, _ in stem_paths], \ [name for _, name in stem_paths] gr.Interface( fn=remix_mode, inputs=gr.Audio(label="Upload Music Track", type="filepath"), outputs=[ gr.File(label="Stem Files (Vocals, Drums, etc.)"), gr.Textbox(label="Stem Names") ], title="Split Into Drums, Bass, Vocals", description="Use AI to separate musical elements like vocals, drums, and bass." ) # ----- Preset Manager Tab ----- with gr.Tab("⚙️ Preset Manager"): gr.Interface( fn=upload_preset, inputs=gr.File(label="Upload Your Own Preset (.json)"), outputs=gr.Textbox(label="Preset Status"), title="Load Custom Presets", description="Upload your own `.json` preset to customize effect chains." ) demo.launch()