Spaces:
Sleeping
Sleeping
import gradio as gr | |
from pydub import AudioSegment | |
import numpy as np | |
import tempfile | |
import os | |
import noisereduce as nr | |
import torch | |
from demucs import pretrained | |
from demucs.apply import apply_model | |
import torchaudio | |
from pathlib import Path | |
import matplotlib.pyplot as plt | |
from io import BytesIO | |
from PIL import Image | |
import zipfile | |
import datetime | |
import librosa | |
import warnings | |
import json | |
import pickle | |
import soundfile as sf | |
warnings.filterwarnings("ignore") | |
### Helper Functions ### | |
def audiosegment_to_array(audio): | |
return np.array(audio.get_array_of_samples()), audio.frame_rate | |
def array_to_audiosegment(samples, frame_rate, channels=1): | |
return AudioSegment( | |
samples.tobytes(), | |
frame_rate=int(frame_rate), | |
sample_width=samples.dtype.itemsize, | |
channels=channels | |
) | |
def save_audiosegment_to_temp(audio: AudioSegment, suffix=".wav"): | |
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f: | |
audio.export(f.name, format=suffix.lstrip('.')) | |
return f.name | |
def load_audiofile_to_numpy(path): | |
samples, sr = sf.read(path, dtype="int16") | |
if samples.ndim > 1 and samples.shape[1] > 2: | |
samples = samples[:, :2] | |
return samples, sr | |
def show_waveform(audio_file): | |
try: | |
audio = AudioSegment.from_file(audio_file) | |
samples = np.array(audio.get_array_of_samples()) | |
plt.figure(figsize=(10,2)) | |
plt.plot(samples[:10000], color="skyblue") | |
plt.axis('off') | |
buf = BytesIO() | |
plt.savefig(buf, format='png', bbox_inches='tight') | |
plt.close() | |
buf.seek(0) | |
return Image.open(buf) | |
except Exception: | |
return None | |
### Effects ### | |
def apply_normalize(audio): return audio.normalize() | |
def apply_noise_reduction(audio): | |
samples, sr = audiosegment_to_array(audio) | |
reduced = nr.reduce_noise(y=samples, sr=sr) | |
return array_to_audiosegment(reduced, sr, audio.channels) | |
def apply_compression(audio): return audio.compress_dynamic_range() | |
def apply_reverb(audio): | |
reverb = audio - 10 | |
return audio.overlay(reverb, position=1000) | |
def apply_pitch_shift(audio, semitones=-2): | |
new_fr = int(audio.frame_rate * (2 ** (semitones / 12))) | |
return audio._spawn(audio.raw_data, overrides={"frame_rate": new_fr}).set_frame_rate(audio.frame_rate) | |
def apply_echo(audio, delay_ms=500, decay=0.5): | |
echo = audio - 10 | |
return audio.overlay(echo, position=delay_ms) | |
def apply_stereo_widen(audio, pan_amount=0.3): | |
left = audio.pan(-pan_amount) | |
right = audio.pan(pan_amount) | |
return AudioSegment.from_mono_audiosegments(left, right) | |
def apply_bass_boost(audio, gain=10): return audio.low_pass_filter(100).apply_gain(gain) | |
def apply_treble_boost(audio, gain=10): return audio.high_pass_filter(4000).apply_gain(gain) | |
def apply_limiter(audio, limit_dB=-1): | |
limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate}) | |
return limiter.apply_gain(limit_dB) | |
def apply_auto_gain(audio, target_dB=-20): | |
change = target_dB - audio.dBFS | |
return audio.apply_gain(change) | |
def apply_vocal_distortion(audio, intensity=0.3): | |
samples = np.array(audio.get_array_of_samples()).astype(np.float32) | |
distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768) | |
return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, audio.channels) | |
def apply_harmony(audio, shift_semitones=4): | |
shifted_up = apply_pitch_shift(audio, shift_semitones) | |
shifted_down = apply_pitch_shift(audio, -shift_semitones) | |
return audio.overlay(shifted_up).overlay(shifted_down) | |
def apply_stage_mode(audio): | |
processed = apply_reverb(audio) | |
processed = apply_bass_boost(processed, gain=6) | |
return apply_limiter(processed, limit_dB=-2) | |
def apply_bitcrush(audio, bit_depth=8): | |
samples = np.array(audio.get_array_of_samples()) | |
max_value = 2 ** bit_depth - 1 | |
downsampled = np.round(samples / (32768 / max_value)).astype(np.int16) | |
return array_to_audiosegment(downsampled, audio.frame_rate // 2, audio.channels) | |
### Presets ### | |
preset_choices = { | |
"Default": [], | |
"Clean Podcast": ["Noise Reduction", "Normalize"], | |
"Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"], | |
"Radio Ready": ["Bass Boost", "Treble Boost", "Limiter"], | |
"Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"], | |
"ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"], | |
"Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"], | |
"8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"], | |
"π Clean Vocal": ["Noise Reduction", "Normalize", "High Pass Filter (80Hz)"], | |
"π§ͺ Vocal Distortion": ["Vocal Distortion", "Reverb", "Compress Dynamic Range"], | |
"πΆ Singer's Harmony": ["Harmony", "Stereo Widening", "Pitch Shift"], | |
"π« ASMR Vocal": ["Auto Gain", "Low-Pass Filter (3000Hz)", "Noise Gate"], | |
"πΌ Stage Mode": ["Reverb", "Bass Boost", "Limiter"], | |
} | |
preset_names = list(preset_choices.keys()) | |
### Main processing ### | |
def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format): | |
try: | |
audio = AudioSegment.from_file(audio_file) | |
effect_map = { | |
"Noise Reduction": apply_noise_reduction, | |
"Compress Dynamic Range": apply_compression, | |
"Add Reverb": apply_reverb, | |
"Pitch Shift": apply_pitch_shift, | |
"Echo": apply_echo, | |
"Stereo Widening": apply_stereo_widen, | |
"Bass Boost": apply_bass_boost, | |
"Treble Boost": apply_treble_boost, | |
"Normalize": apply_normalize, | |
"Limiter": lambda x: apply_limiter(x, limit_dB=-1), | |
"Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20), | |
"Vocal Distortion": apply_vocal_distortion, | |
"Stage Mode": apply_stage_mode, | |
"Harmony": apply_harmony, | |
"Bitcrusher": apply_bitcrush, | |
} | |
for effect in selected_effects: | |
if effect in effect_map: | |
audio = effect_map[effect](audio) | |
if isolate_vocals: | |
temp_path = save_audiosegment_to_temp(audio, suffix=".wav") | |
vocal_path = apply_vocal_isolation(temp_path) | |
audio = AudioSegment.from_file(vocal_path) | |
output_path = save_audiosegment_to_temp(audio, suffix='.' + export_format.lower()) | |
samples, sr = load_audiofile_to_numpy(output_path) | |
waveform = show_waveform(output_path) | |
session_log = json.dumps({ | |
"timestamp": str(datetime.datetime.now()), | |
"filename": os.path.basename(audio_file), | |
"effects_applied": selected_effects, | |
"isolate_vocals": isolate_vocals, | |
"export_format": export_format, | |
"detected_genre": "Unknown" | |
}, indent=2) | |
return (samples, sr), waveform, session_log, "Unknown", "π Done!" | |
except Exception as e: | |
return None, None, f"Error: {e}", "", f"Error: {e}" | |
### Other necessary functions (batch, AI remaster...) would follow similar patterns. | |
# =================================================== | |
# Now, the Gradio UI: | |
# Paste this after all function definitions above | |
# =================================================== | |
with gr.Blocks() as demo: | |
gr.HTML('<h3 style="text-align:center;">Where Your Audio Meets Intelligence</h3>') | |
gr.Markdown('### Upload, edit, export β powered by AI!') | |
with gr.Tab("π΅ Single File Studio"): | |
with gr.Row(): | |
with gr.Column(): | |
input_audio = gr.Audio(label="Upload Audio", type="filepath") | |
effect_checkbox = gr.CheckboxGroup(choices=list({e for effects in preset_choices.values() for e in effects}), label="Apply Effects in Order") | |
preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset") | |
export_format = gr.Dropdown(choices=["WAV", "MP3"], label="Export Format", value="WAV") | |
isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects") | |
process_btn = gr.Button("Process Audio") | |
with gr.Column(): | |
processed_audio = gr.Audio(label="Processed Audio", type="numpy") | |
waveform_image = gr.Image(label="Waveform Preview") | |
session_log = gr.Textbox(label="Session Log", lines=6) | |
detected_genre = gr.Textbox(label="Detected Genre") | |
status = gr.Textbox(label="Status", lines=1, value="Ready") | |
def update_effects(preset): | |
return preset_choices.get(preset, []) | |
preset_dropdown.change(update_effects, inputs=preset_dropdown, outputs=effect_checkbox) | |
def run_processing(audio, effects, isolate, preset, fmt): | |
effs = preset_choices.get(preset, []) if preset in preset_choices else effects | |
return process_audio(audio, effs, isolate, preset, fmt) | |
process_btn.click(run_processing, | |
inputs=[input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format], | |
outputs=[processed_audio, waveform_image, session_log, detected_genre, status] | |
) | |
demo.launch() | |