Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| from scipy.io.wavfile import read | |
| import matplotlib.pyplot as plt | |
| import torch | |
| import math | |
| import yaml | |
| import json | |
| import pyloudnorm as pyln | |
| from hydra.utils import instantiate | |
| from soxr import resample | |
| from functools import partial, reduce | |
| from itertools import accumulate | |
| from torchcomp import coef2ms, ms2coef | |
| from copy import deepcopy | |
| from modules.utils import vec2statedict, get_chunks | |
| from modules.fx import clip_delay_eq_Q | |
| from plot_utils import get_log_mags_from_eq | |
| def chain_functions(*functions): | |
| return lambda *initial_args: reduce( | |
| lambda xs, f: f(*xs) if isinstance(xs, tuple) else f(xs), | |
| functions, | |
| initial_args, | |
| ) | |
| title_md = "# Vocal Effects Generator" | |
| description_md = """ | |
| This is a demo of the paper [DiffVox: A Differentiable Model for Capturing and Analysing Professional Effects Distributions](https://arxiv.org/abs/2504.14735), accepted at DAFx 2025. | |
| In this demo, you can upload a raw vocal audio file (in mono) and use our model to apply professional-quality vocal processing by tweaking generated effects settings to enhance your vocals! | |
| The effects consist of series of EQ, compressor, delay, and reverb. | |
| The generator is a PCA model derived from 365 vocal effects presets fitted with the same effects chain. | |
| This interface allows you to control the principal components (PCs) of the generator, randomise them, and render the audio. | |
| To give you some idea, we empirically found that the first PC controls the amount of reverb and the second PC controls the amount of brightness. | |
| Note that adding these PCs together does not necessarily mean that their effects are additive in the final audio. | |
| We found sometimes the effects of least important PCs are more perceptible. | |
| Try to play around with the sliders and buttons and see what you can come up with! | |
| > **_Note:_** To upload your own audio, click X on the top right corner of the input audio block. | |
| """ | |
| SLIDER_MAX = 3 | |
| SLIDER_MIN = -3 | |
| NUMBER_OF_PCS = 4 | |
| TEMPERATURE = 0.7 | |
| CONFIG_PATH = "presets/rt_config.yaml" | |
| PCA_PARAM_FILE = "presets/internal/gaussian.npz" | |
| INFO_PATH = "presets/internal/info.json" | |
| MASK_PATH = "presets/internal/feature_mask.npy" | |
| PRESET_PATH = "presets/internal/raw_params.npy" | |
| TRAIN_INDEX_PATH = "presets/internal/train_index.npy" | |
| EXAMPLE_PATH = "eleanor_erased.wav" | |
| with open(CONFIG_PATH) as fp: | |
| fx_config = yaml.safe_load(fp)["model"] | |
| # Global effect | |
| global_fx = instantiate(fx_config) | |
| global_fx.eval() | |
| raw_params = torch.from_numpy(np.load(PRESET_PATH)) | |
| train_index = torch.from_numpy(np.load(TRAIN_INDEX_PATH)) | |
| feature_mask = torch.from_numpy(np.load(MASK_PATH)) | |
| presets = raw_params[train_index][:, feature_mask].contiguous() | |
| pca_params = np.load(PCA_PARAM_FILE) | |
| mean = pca_params["mean"] | |
| cov = pca_params["cov"] | |
| eigvals, eigvecs = np.linalg.eigh(cov) | |
| eigvals = np.flip(eigvals, axis=0) | |
| eigvecs = np.flip(eigvecs, axis=1) | |
| eigsqrt = torch.from_numpy(eigvals.copy()).float().sqrt() | |
| U = torch.from_numpy(eigvecs.copy()).float() | |
| mean = torch.from_numpy(mean).float() | |
| # Global latent variable | |
| # z = torch.zeros_like(mean) | |
| with open(INFO_PATH) as f: | |
| info = json.load(f) | |
| param_keys = info["params_keys"] | |
| original_shapes = list( | |
| map(lambda lst: lst if len(lst) else [1], info["params_original_shapes"]) | |
| ) | |
| *vec2dict_args, _ = get_chunks(param_keys, original_shapes) | |
| vec2dict_args = [param_keys, original_shapes] + vec2dict_args | |
| vec2dict = partial( | |
| vec2statedict, | |
| **dict( | |
| zip( | |
| [ | |
| "keys", | |
| "original_shapes", | |
| "selected_chunks", | |
| "position", | |
| "U_matrix_shape", | |
| ], | |
| vec2dict_args, | |
| ) | |
| ), | |
| ) | |
| global_fx.load_state_dict(vec2dict(mean), strict=False) | |
| meter = pyln.Meter(44100) | |
| def z2x(z): | |
| # close all figures to avoid too many open figures | |
| plt.close("all") | |
| x = U @ (z * eigsqrt) + mean | |
| # # print(z) | |
| # fx.load_state_dict(vec2dict(x), strict=False) | |
| # fx.apply(partial(clip_delay_eq_Q, Q=0.707)) | |
| return x | |
| def fx2x(fx): | |
| plt.close("all") | |
| state_dict = fx.state_dict() | |
| flattened = torch.cat([state_dict[k].flatten() for k in param_keys]) | |
| x = flattened[feature_mask] | |
| return x | |
| def x2z(x): | |
| z = U.T @ (x - mean) | |
| return z / eigsqrt | |
| def inference(audio, ratio, fx): | |
| sr, y = audio | |
| if sr != 44100: | |
| y = resample(y, sr, 44100) | |
| if y.dtype.kind != "f": | |
| y = y / 32768.0 | |
| if y.ndim == 1: | |
| y = y[:, None] | |
| loudness = meter.integrated_loudness(y) | |
| y = pyln.normalize.loudness(y, loudness, -18.0) | |
| y = torch.from_numpy(y).float().T.unsqueeze(0) | |
| if y.shape[1] != 1: | |
| y = y.mean(dim=1, keepdim=True) | |
| direct, wet = fx(y) | |
| direct = direct.squeeze(0).T.numpy() | |
| wet = wet.squeeze(0).T.numpy() | |
| angle = ratio * math.pi * 0.5 | |
| test_clipping = direct + wet | |
| # rendered = fx(y).squeeze(0).T.numpy() | |
| if np.max(np.abs(test_clipping)) > 1: | |
| scaler = np.max(np.abs(test_clipping)) | |
| # rendered = rendered / scaler | |
| direct = direct / scaler | |
| wet = wet / scaler | |
| rendered = math.sqrt(2) * (math.cos(angle) * direct + math.sin(angle) * wet) | |
| return ( | |
| (44100, (rendered * 32768).astype(np.int16)), | |
| (44100, (direct * 32768).astype(np.int16)), | |
| ( | |
| 44100, | |
| (wet * 32768).astype(np.int16), | |
| ), | |
| ) | |
| def get_important_pcs(n=10, **kwargs): | |
| sliders = [ | |
| gr.Slider(minimum=SLIDER_MIN, maximum=SLIDER_MAX, label=f"PC {i}", **kwargs) | |
| for i in range(1, n + 1) | |
| ] | |
| return sliders | |
| def model2json(fx): | |
| fx_names = ["PK1", "PK2", "LS", "HS", "LP", "HP", "DRC"] | |
| results = {k: v.toJSON() for k, v in zip(fx_names, fx)} | { | |
| "Panner": fx[7].pan.toJSON() | |
| } | |
| spatial_fx = { | |
| "DLY": fx[7].effects[0].toJSON() | {"LP": fx[7].effects[0].eq.toJSON()}, | |
| "FDN": fx[7].effects[1].toJSON() | |
| | { | |
| "Tone correction PEQ": { | |
| k: v.toJSON() for k, v in zip(fx_names[:4], fx[7].effects[1].eq) | |
| } | |
| }, | |
| "Cross Send (dB)": fx[7].params.sends_0.log10().mul(20).item(), | |
| } | |
| return { | |
| "Direct": results, | |
| "Sends": spatial_fx, | |
| } | |
| def plot_eq(fx): | |
| fig, ax = plt.subplots(figsize=(6, 4), constrained_layout=True) | |
| w, eq_log_mags = get_log_mags_from_eq(fx[:6]) | |
| ax.plot(w, sum(eq_log_mags), color="black", linestyle="-") | |
| for i, eq_log_mag in enumerate(eq_log_mags): | |
| ax.plot(w, eq_log_mag, "k-", alpha=0.3) | |
| ax.fill_between(w, eq_log_mag, 0, facecolor="gray", edgecolor="none", alpha=0.1) | |
| ax.set_xlabel("Frequency (Hz)") | |
| ax.set_ylabel("Magnitude (dB)") | |
| ax.set_xlim(20, 20000) | |
| ax.set_ylim(-40, 20) | |
| ax.set_xscale("log") | |
| ax.grid() | |
| return fig | |
| def plot_comp(fx): | |
| fig, ax = plt.subplots(figsize=(6, 5), constrained_layout=True) | |
| comp = fx[6] | |
| cmp_th = comp.params.cmp_th.item() | |
| exp_th = comp.params.exp_th.item() | |
| cmp_ratio = comp.params.cmp_ratio.item() | |
| exp_ratio = comp.params.exp_ratio.item() | |
| make_up = comp.params.make_up.item() | |
| # print(cmp_ratio, cmp_th, exp_ratio, exp_th, make_up) | |
| comp_in = np.linspace(-80, 0, 100) | |
| comp_curve = np.where( | |
| comp_in > cmp_th, | |
| comp_in - (comp_in - cmp_th) * (cmp_ratio - 1) / cmp_ratio, | |
| comp_in, | |
| ) | |
| comp_out = ( | |
| np.where( | |
| comp_curve < exp_th, | |
| comp_curve - (exp_th - comp_curve) / exp_ratio, | |
| comp_curve, | |
| ) | |
| + make_up | |
| ) | |
| ax.plot(comp_in, comp_out, c="black", linestyle="-") | |
| ax.plot(comp_in, comp_in, c="r", alpha=0.5) | |
| ax.set_xlabel("Input Level (dB)") | |
| ax.set_ylabel("Output Level (dB)") | |
| ax.set_xlim(-80, 0) | |
| ax.set_ylim(-80, 0) | |
| ax.grid() | |
| return fig | |
| def plot_delay(fx): | |
| fig, ax = plt.subplots(figsize=(6, 4), constrained_layout=True) | |
| delay = fx[7].effects[0] | |
| w, eq_log_mags = get_log_mags_from_eq([delay.eq]) | |
| log_gain = delay.params.gain.log10().item() * 20 | |
| d = delay.params.delay.item() / 1000 | |
| log_mag = sum(eq_log_mags) | |
| ax.plot(w, log_mag + log_gain, color="black", linestyle="-") | |
| log_feedback = delay.params.feedback.log10().item() * 20 | |
| for i in range(1, 10): | |
| feedback_log_mag = log_mag * (i + 1) + log_feedback * i + log_gain | |
| ax.plot( | |
| w, | |
| feedback_log_mag, | |
| c="black", | |
| alpha=max(0, (10 - i * d * 4) / 10), | |
| linestyle="-", | |
| ) | |
| ax.set_xscale("log") | |
| ax.set_xlim(20, 20000) | |
| ax.set_ylim(-80, 0) | |
| ax.set_xlabel("Frequency (Hz)") | |
| ax.set_ylabel("Magnitude (dB)") | |
| ax.grid() | |
| return fig | |
| def plot_reverb(fx): | |
| fig, ax = plt.subplots(figsize=(6, 4), constrained_layout=True) | |
| fdn = fx[7].effects[1] | |
| w, eq_log_mags = get_log_mags_from_eq(fdn.eq) | |
| bc = fdn.params.c.norm() * fdn.params.b.norm() | |
| log_bc = torch.log10(bc).item() * 20 | |
| # eq_log_mags = [x + log_bc / len(eq_log_mags) for x in eq_log_mags] | |
| # ax.plot(w, sum(eq_log_mags), color="black", linestyle="-") | |
| eq_log_mags = sum(eq_log_mags) + log_bc | |
| ax.plot(w, eq_log_mags, color="black", linestyle="-") | |
| ax.set_xlabel("Frequency (Hz)") | |
| ax.set_ylabel("Magnitude (dB)") | |
| ax.set_xlim(20, 20000) | |
| ax.set_ylim(-40, 20) | |
| ax.set_xscale("log") | |
| ax.grid() | |
| return fig | |
| def plot_t60(fx): | |
| fig, ax = plt.subplots(figsize=(6, 4), constrained_layout=True) | |
| fdn = fx[7].effects[1] | |
| gamma = fdn.params.gamma.squeeze().numpy() | |
| delays = fdn.delays.numpy() | |
| w = np.linspace(0, 22050, gamma.size) | |
| t60 = -60 / (20 * np.log10(gamma + 1e-10) / np.min(delays)) / 44100 | |
| ax.plot(w, t60, color="black", linestyle="-") | |
| ax.set_xlabel("Frequency (Hz)") | |
| ax.set_ylabel("T60 (s)") | |
| ax.set_xlim(20, 20000) | |
| ax.set_ylim(0, 9) | |
| ax.set_xscale("log") | |
| ax.grid() | |
| return fig | |
| def update_param(m, attr_name, value): | |
| match type(getattr(m, attr_name)): | |
| case torch.nn.Parameter: | |
| getattr(m, attr_name).data.copy_(value) | |
| case _: | |
| if getattr(m, attr_name).ndim == 0: | |
| setattr(m, attr_name, torch.tensor(value)) | |
| else: | |
| setattr(m, attr_name, torch.tensor([value])) | |
| def update_atrt(comp, attr_name, value): | |
| setattr(comp, attr_name, ms2coef(torch.tensor(value), 44100)) | |
| def vec2fx(x): | |
| fx = deepcopy(global_fx) | |
| fx.load_state_dict(vec2dict(x), strict=False) | |
| fx.apply(partial(clip_delay_eq_Q, Q=0.707)) | |
| return fx | |
| get_last_attribute = lambda m, attr_name: ( | |
| (m, attr_name) | |
| if "." not in attr_name | |
| else (lambda x, *remain: get_last_attribute(getattr(m, x), ".".join(remain)))( | |
| *attr_name.split(".") | |
| ) | |
| ) | |
| with gr.Blocks() as demo: | |
| z = gr.State(torch.zeros_like(mean)) | |
| fx_params = gr.State(mean) | |
| fx = vec2fx(fx_params.value) | |
| sr, y = read(EXAMPLE_PATH) | |
| default_pc_slider = partial( | |
| gr.Slider, minimum=SLIDER_MIN, maximum=SLIDER_MAX, interactive=True, value=0 | |
| ) | |
| default_audio_block = partial(gr.Audio, type="numpy", loop=True) | |
| default_freq_slider = partial(gr.Slider, label="Frequency (Hz)", interactive=True) | |
| default_gain_slider = partial(gr.Slider, label="Gain (dB)", interactive=True) | |
| default_q_slider = partial(gr.Slider, label="Q", interactive=True) | |
| gr.Markdown( | |
| title_md, | |
| elem_id="title", | |
| ) | |
| with gr.Row(): | |
| gr.Markdown( | |
| description_md, | |
| elem_id="description", | |
| ) | |
| gr.Image("diffvox_diagram.png", elem_id="diagram") | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio_input = default_audio_block( | |
| sources="upload", label="Input Audio", value=(sr, y) | |
| ) | |
| with gr.Row(): | |
| random_button = gr.Button( | |
| f"Randomise PCs", | |
| elem_id="randomise-button", | |
| ) | |
| reset_button = gr.Button( | |
| "Reset", | |
| elem_id="reset-button", | |
| ) | |
| render_button = gr.Button( | |
| "Run", elem_id="render-button", variant="primary" | |
| ) | |
| with gr.Row(): | |
| s1 = default_pc_slider(label="PC 1") | |
| s2 = default_pc_slider(label="PC 2") | |
| with gr.Row(): | |
| s3 = default_pc_slider(label="PC 3") | |
| s4 = default_pc_slider(label="PC 4") | |
| sliders = [s1, s2, s3, s4] | |
| with gr.Row(): | |
| with gr.Column(): | |
| extra_pc_dropdown = gr.Dropdown( | |
| list(range(NUMBER_OF_PCS + 1, mean.numel() + 1)), | |
| label=f"PC > {NUMBER_OF_PCS}", | |
| info="Select which extra PC to adjust", | |
| interactive=True, | |
| ) | |
| extra_slider = default_pc_slider(label="Extra PC") | |
| preset_dropdown = gr.Dropdown( | |
| ["none"] + list(range(1, presets.shape[0] + 1)), | |
| value="none", | |
| label=f"Select Preset (1-{presets.shape[0]})", | |
| info="Select a preset to load (this will override the current settings)", | |
| interactive=True, | |
| ) | |
| with gr.Column(): | |
| audio_output = default_audio_block(label="Output Audio", interactive=False) | |
| dry_wet_ratio = gr.Slider( | |
| minimum=0, | |
| maximum=1, | |
| value=0.5, | |
| label="Dry/Wet Ratio", | |
| interactive=True, | |
| ) | |
| direct_output = default_audio_block(label="Direct Audio", interactive=False) | |
| wet_output = default_audio_block(label="Wet Audio", interactive=False) | |
| _ = gr.Markdown("## Parametric EQ") | |
| peq_plot = gr.Plot(plot_eq(fx), label="PEQ Frequency Response", elem_id="peq-plot") | |
| with gr.Row(): | |
| with gr.Column(min_width=160): | |
| _ = gr.Markdown("High Pass") | |
| hp = fx[5] | |
| hp_freq = default_freq_slider( | |
| minimum=16, maximum=5300, value=hp.params.freq.item() | |
| ) | |
| hp_q = default_q_slider(minimum=0.5, maximum=10, value=hp.params.Q.item()) | |
| with gr.Column(min_width=160): | |
| _ = gr.Markdown("Low Shelf") | |
| ls = fx[2] | |
| ls_freq = default_freq_slider( | |
| minimum=30, maximum=200, value=ls.params.freq.item() | |
| ) | |
| ls_gain = default_gain_slider( | |
| minimum=-12, maximum=12, value=ls.params.gain.item() | |
| ) | |
| with gr.Column(min_width=160): | |
| _ = gr.Markdown("Peak filter 1") | |
| pk1 = fx[0] | |
| pk1_freq = default_freq_slider( | |
| minimum=33, maximum=5400, value=pk1.params.freq.item() | |
| ) | |
| pk1_gain = default_gain_slider( | |
| minimum=-12, maximum=12, value=pk1.params.gain.item() | |
| ) | |
| pk1_q = default_q_slider(minimum=0.2, maximum=20, value=pk1.params.Q.item()) | |
| with gr.Column(min_width=160): | |
| _ = gr.Markdown("Peak filter 2") | |
| pk2 = fx[1] | |
| pk2_freq = default_freq_slider( | |
| minimum=200, maximum=17500, value=pk2.params.freq.item() | |
| ) | |
| pk2_gain = default_gain_slider( | |
| minimum=-12, maximum=12, value=pk2.params.gain.item() | |
| ) | |
| pk2_q = default_q_slider(minimum=0.2, maximum=20, value=pk2.params.Q.item()) | |
| with gr.Column(min_width=160): | |
| _ = gr.Markdown("High Shelf") | |
| hs = fx[3] | |
| hs_freq = default_freq_slider( | |
| minimum=750, maximum=8300, value=hs.params.freq.item() | |
| ) | |
| hs_gain = default_gain_slider( | |
| minimum=-12, maximum=12, value=hs.params.gain.item() | |
| ) | |
| with gr.Column(min_width=160): | |
| _ = gr.Markdown("Low Pass") | |
| lp = fx[4] | |
| lp_freq = default_freq_slider( | |
| minimum=200, maximum=18000, value=lp.params.freq.item() | |
| ) | |
| lp_q = default_q_slider(minimum=0.5, maximum=10, value=lp.params.Q.item()) | |
| _ = gr.Markdown("## Compressor and Expander") | |
| with gr.Row(): | |
| with gr.Column(): | |
| comp = fx[6] | |
| cmp_th = gr.Slider( | |
| minimum=-60, | |
| maximum=0, | |
| value=comp.params.cmp_th.item(), | |
| interactive=True, | |
| label="Threshold (dB)", | |
| ) | |
| cmp_ratio = gr.Slider( | |
| minimum=1, | |
| maximum=20, | |
| value=comp.params.cmp_ratio.item(), | |
| interactive=True, | |
| label="Comp. Ratio", | |
| ) | |
| make_up = gr.Slider( | |
| minimum=-12, | |
| maximum=12, | |
| value=comp.params.make_up.item(), | |
| interactive=True, | |
| label="Make Up (dB)", | |
| ) | |
| attack_time = gr.Slider( | |
| minimum=0.1, | |
| maximum=100, | |
| value=coef2ms(comp.params.at, 44100).item(), | |
| interactive=True, | |
| label="Attack Time (ms)", | |
| ) | |
| release_time = gr.Slider( | |
| minimum=50, | |
| maximum=1000, | |
| value=coef2ms(comp.params.rt, 44100).item(), | |
| interactive=True, | |
| label="Release Time (ms)", | |
| ) | |
| exp_ratio = gr.Slider( | |
| minimum=0, | |
| maximum=1, | |
| value=comp.params.exp_ratio.item(), | |
| interactive=True, | |
| label="Exp. Ratio", | |
| ) | |
| exp_th = gr.Slider( | |
| minimum=-80, | |
| maximum=0, | |
| value=comp.params.exp_th.item(), | |
| interactive=True, | |
| label="Exp. Threshold (dB)", | |
| ) | |
| avg_coef = gr.Slider( | |
| minimum=0, | |
| maximum=1, | |
| value=comp.params.avg_coef.item(), | |
| interactive=True, | |
| label="RMS Averaging Coefficient", | |
| ) | |
| with gr.Column(): | |
| comp_plot = gr.Plot( | |
| plot_comp(fx), label="Compressor Curve", elem_id="comp-plot" | |
| ) | |
| _ = gr.Markdown("## Ping-Pong Delay") | |
| with gr.Row(): | |
| with gr.Column(): | |
| delay = fx[7].effects[0] | |
| delay_time = gr.Slider( | |
| minimum=100, | |
| maximum=1000, | |
| value=delay.params.delay.item(), | |
| interactive=True, | |
| label="Delay Time (ms)", | |
| ) | |
| feedback = gr.Slider( | |
| minimum=0, | |
| maximum=1, | |
| value=delay.params.feedback.item(), | |
| interactive=True, | |
| label="Feedback", | |
| ) | |
| delay_gain = gr.Slider( | |
| minimum=-80, | |
| maximum=0, | |
| value=delay.params.gain.log10().item() * 20, | |
| interactive=True, | |
| label="Gain (dB)", | |
| ) | |
| odd_pan = gr.Slider( | |
| minimum=-100, | |
| maximum=100, | |
| value=delay.odd_pan.params.pan.item() * 200 - 100, | |
| interactive=True, | |
| label="Odd Delay Pan", | |
| ) | |
| even_pan = gr.Slider( | |
| minimum=-100, | |
| maximum=100, | |
| value=delay.even_pan.params.pan.item() * 200 - 100, | |
| interactive=True, | |
| label="Even Delay Pan", | |
| ) | |
| delay_lp_freq = gr.Slider( | |
| minimum=200, | |
| maximum=16000, | |
| value=delay.eq.params.freq.item(), | |
| interactive=True, | |
| label="Low Pass Frequency (Hz)", | |
| ) | |
| reverb_send = gr.Slider( | |
| minimum=-80, | |
| maximum=0, | |
| value=fx[7].params.sends_0.log10().item() * 20, | |
| interactive=True, | |
| label="Reverb Send (dB)", | |
| ) | |
| with gr.Column(): | |
| delay_plot = gr.Plot( | |
| plot_delay(fx), label="Delay Frequency Response", elem_id="delay-plot" | |
| ) | |
| _ = gr.Markdown("## FDN Reverb") | |
| with gr.Row(): | |
| reverb_plot = gr.Plot( | |
| plot_reverb(fx), | |
| label="Reverb Tone Correction PEQ", | |
| elem_id="reverb-plot", | |
| min_width=160, | |
| ) | |
| t60_plot = gr.Plot( | |
| plot_t60(fx), label="Reverb T60", elem_id="t60-plot", min_width=160 | |
| ) | |
| with gr.Row(): | |
| fdn = fx[7].effects[1] | |
| tone_correct_peq = fdn.eq | |
| with gr.Column(min_width=160): | |
| _ = gr.Markdown("Low Shelf") | |
| tc_ls = tone_correct_peq[2] | |
| tc_ls_freq = default_freq_slider( | |
| minimum=30, maximum=450, value=tc_ls.params.freq.item() | |
| ) | |
| tc_ls_gain = default_gain_slider( | |
| minimum=-12, maximum=12, value=tc_ls.params.gain.item() | |
| ) | |
| with gr.Column(min_width=160): | |
| _ = gr.Markdown("Peak filter 1") | |
| tc_pk1 = tone_correct_peq[0] | |
| tc_pk1_freq = default_freq_slider( | |
| minimum=200, maximum=2500, value=tc_pk1.params.freq.item() | |
| ) | |
| tc_pk1_gain = default_gain_slider( | |
| minimum=-12, maximum=12, value=tc_pk1.params.gain.item() | |
| ) | |
| tc_pk1_q = default_q_slider( | |
| minimum=0.1, maximum=3, value=tc_pk1.params.Q.item() | |
| ) | |
| with gr.Column(min_width=160): | |
| _ = gr.Markdown("Peak filter 2") | |
| tc_pk2 = tone_correct_peq[1] | |
| tc_pk2_freq = default_freq_slider( | |
| minimum=600, maximum=7000, value=tc_pk2.params.freq.item() | |
| ) | |
| tc_pk2_gain = default_gain_slider( | |
| minimum=-12, maximum=12, value=tc_pk2.params.gain.item() | |
| ) | |
| tc_pk2_q = default_q_slider( | |
| minimum=0.1, maximum=3, value=tc_pk2.params.Q.item() | |
| ) | |
| with gr.Column(min_width=160): | |
| _ = gr.Markdown("High Shelf") | |
| tc_hs = tone_correct_peq[3] | |
| tc_hs_freq = default_freq_slider( | |
| minimum=1500, maximum=16000, value=tc_hs.params.freq.item() | |
| ) | |
| tc_hs_gain = default_gain_slider( | |
| minimum=-12, maximum=12, value=tc_hs.params.gain.item() | |
| ) | |
| with gr.Row(): | |
| json_output = gr.JSON( | |
| model2json(fx), label="Effect Settings", max_height=800, open=True | |
| ) | |
| update_pc = lambda z, i: z[:NUMBER_OF_PCS].tolist() + [z[i - 1].item()] | |
| update_pc_outputs = sliders + [extra_slider] | |
| peq_sliders = [ | |
| pk1_freq, | |
| pk1_gain, | |
| pk1_q, | |
| pk2_freq, | |
| pk2_gain, | |
| pk2_q, | |
| ls_freq, | |
| ls_gain, | |
| hs_freq, | |
| hs_gain, | |
| lp_freq, | |
| lp_q, | |
| hp_freq, | |
| hp_q, | |
| ] | |
| peq_attr_names = ( | |
| ["freq", "gain", "Q"] * 2 + ["freq", "gain"] * 2 + ["freq", "Q"] * 2 | |
| ) | |
| peq_indices = [0] * 3 + [1] * 3 + [2] * 2 + [3] * 2 + [4] * 2 + [5] * 2 | |
| cmp_sliders = [ | |
| cmp_th, | |
| cmp_ratio, | |
| make_up, | |
| exp_ratio, | |
| exp_th, | |
| avg_coef, | |
| attack_time, | |
| release_time, | |
| ] | |
| cmp_update_funcs = [update_param] * 6 + [update_atrt] * 2 | |
| cmp_attr_names = [ | |
| "cmp_th", | |
| "cmp_ratio", | |
| "make_up", | |
| "exp_ratio", | |
| "exp_th", | |
| "avg_coef", | |
| "at", | |
| "rt", | |
| ] | |
| cmp_update_plot_flag = [True] * 5 + [False] * 3 | |
| delay_sliders = [delay_time, feedback, delay_lp_freq, delay_gain, odd_pan, even_pan] | |
| delay_update_funcs = ( | |
| [update_param] * 3 | |
| + [lambda m, a, v: update_param(m, a, 10 ** (v / 20))] | |
| + [lambda m, a, v: update_param(m, a, (v + 100) / 200)] * 2 | |
| ) | |
| delay_attr_names = [ | |
| "params.delay", | |
| "params.feedback", | |
| "eq.params.freq", | |
| "params.gain", | |
| "odd_pan.params.pan", | |
| "even_pan.params.pan", | |
| ] | |
| delay_update_plot_flag = [True] * 4 + [False] * 2 | |
| tc_peq_sliders = [ | |
| tc_pk1_freq, | |
| tc_pk1_gain, | |
| tc_pk1_q, | |
| tc_pk2_freq, | |
| tc_pk2_gain, | |
| tc_pk2_q, | |
| tc_ls_freq, | |
| tc_ls_gain, | |
| tc_hs_freq, | |
| tc_hs_gain, | |
| ] | |
| tc_peq_attr_names = ["freq", "gain", "Q"] * 2 + ["freq", "gain"] * 2 | |
| tc_peq_indices = [0] * 3 + [1] * 3 + [2] * 2 + [3] * 2 | |
| all_effect_sliders = ( | |
| peq_sliders + cmp_sliders + delay_sliders + tc_peq_sliders + [reverb_send] | |
| ) | |
| split_sizes = [ | |
| len(peq_sliders), | |
| len(cmp_sliders), | |
| len(delay_sliders), | |
| len(tc_peq_sliders), | |
| 1, | |
| ] | |
| split_indexes = list( | |
| accumulate(split_sizes, initial=0) | |
| ) # [0, len(peq_sliders), len(peq_sliders) + len(cmp_sliders), ...] | |
| def assign_fx_params(fx, *args): | |
| peq_sliders, cmp_sliders, delay_sliders, tc_peq_sliders = map( | |
| lambda i, j: args[i:j], split_indexes[:-2], split_indexes[1:-1] | |
| ) | |
| reverb_send_slider = args[-1] | |
| for idx, s, attr_name in zip(peq_indices, peq_sliders, peq_attr_names): | |
| update_param(fx[idx].params, attr_name, s) | |
| for f, s, attr_name in zip(cmp_update_funcs, cmp_sliders, cmp_attr_names): | |
| f(fx[6].params, attr_name, s) | |
| for f, s, attr_name in zip(delay_update_funcs, delay_sliders, delay_attr_names): | |
| m, name = get_last_attribute(fx[7].effects[0], attr_name) | |
| f(m, name, s) | |
| for idx, s, attr_name in zip(tc_peq_indices, tc_peq_sliders, tc_peq_attr_names): | |
| update_param(fx[7].effects[1].eq[idx].params, attr_name, s) | |
| update_param(fx[7].params, "sends_0", 10 ** (reverb_send_slider / 20)) | |
| return fx | |
| accum_func_results = lambda init, *fs: reduce( | |
| lambda x, f: (f(x[0]), *x), fs, (init,) | |
| ) | |
| x2z_common_steps = chain_functions( | |
| lambda x, *all_s: assign_fx_params(vec2fx(x), *all_s), | |
| lambda fx: accum_func_results(fx, fx2x, x2z), | |
| ) | |
| for s in peq_sliders: | |
| s.input( | |
| chain_functions( | |
| lambda x, i, *args: x2z_common_steps(x, *args) + (i,), | |
| lambda z, x, fx, extra_pc_idx: [z, x] | |
| + [model2json(fx), plot_eq(fx)] | |
| + update_pc(z, extra_pc_idx), | |
| ), | |
| inputs=[fx_params, extra_pc_dropdown] + all_effect_sliders, | |
| outputs=[z, fx_params, json_output, peq_plot] + update_pc_outputs, | |
| ) | |
| for s, update_plot in zip(cmp_sliders, cmp_update_plot_flag): | |
| s.input( | |
| chain_functions( | |
| lambda x, i, *args: x2z_common_steps(x, *args) + (i,), | |
| lambda z, x, fx, e_pc_i, update_plot=update_plot: [z, x] | |
| + [model2json(fx)] | |
| + ([plot_comp(fx)] if update_plot else []) | |
| + update_pc(z, e_pc_i), | |
| ), | |
| inputs=[fx_params, extra_pc_dropdown] + all_effect_sliders, | |
| outputs=[z, fx_params, json_output] | |
| + ([comp_plot] if update_plot else []) | |
| + update_pc_outputs, | |
| ) | |
| for s, update_plot in zip(delay_sliders, delay_update_plot_flag): | |
| s.input( | |
| chain_functions( | |
| lambda x, i, *args: x2z_common_steps(x, *args) + (i,), | |
| lambda z, x, fx, e_pc_i, update_plot=update_plot: ( | |
| [z, x] | |
| + [model2json(fx)] | |
| + ([plot_delay(fx)] if update_plot else []) | |
| + update_pc(z, e_pc_i) | |
| ), | |
| ), | |
| inputs=[fx_params, extra_pc_dropdown] + all_effect_sliders, | |
| outputs=[z, fx_params] | |
| + [json_output] | |
| + ([delay_plot] if update_plot else []) | |
| + update_pc_outputs, | |
| ) | |
| reverb_send.input( | |
| chain_functions( | |
| lambda x, i, *args: x2z_common_steps(x, *args) + (i,), | |
| lambda z, x, fx, e_pc_i: [z, x] + [model2json(fx)] + update_pc(z, e_pc_i), | |
| ), | |
| inputs=[fx_params, extra_pc_dropdown] + all_effect_sliders, | |
| outputs=[z, fx_params, json_output] + update_pc_outputs, | |
| ) | |
| for s in tc_peq_sliders: | |
| s.input( | |
| chain_functions( | |
| lambda x, i, *args: x2z_common_steps(x, *args) + (i,), | |
| lambda z, x, fx, e_pc_i: [z, x] | |
| + [model2json(fx), plot_reverb(fx)] | |
| + update_pc(z, e_pc_i), | |
| ), | |
| inputs=[fx_params, extra_pc_dropdown] + all_effect_sliders, | |
| outputs=[z, fx_params, json_output, reverb_plot] + update_pc_outputs, | |
| ) | |
| render_button.click( | |
| chain_functions( | |
| lambda audio, ratio, x, *all_s: ( | |
| audio, | |
| ratio, | |
| assign_fx_params(vec2fx(x), *all_s), | |
| ), | |
| inference, | |
| ), | |
| inputs=[ | |
| audio_input, | |
| dry_wet_ratio, | |
| fx_params, | |
| ] | |
| + all_effect_sliders, | |
| outputs=[ | |
| audio_output, | |
| direct_output, | |
| wet_output, | |
| ], | |
| ) | |
| update_fx = lambda fx: [ | |
| fx[0].params.freq.item(), | |
| fx[0].params.gain.item(), | |
| fx[0].params.Q.item(), | |
| fx[1].params.freq.item(), | |
| fx[1].params.gain.item(), | |
| fx[1].params.Q.item(), | |
| fx[2].params.freq.item(), | |
| fx[2].params.gain.item(), | |
| fx[3].params.freq.item(), | |
| fx[3].params.gain.item(), | |
| fx[4].params.freq.item(), | |
| fx[4].params.Q.item(), | |
| fx[5].params.freq.item(), | |
| fx[5].params.Q.item(), | |
| fx[6].params.cmp_th.item(), | |
| fx[6].params.cmp_ratio.item(), | |
| fx[6].params.make_up.item(), | |
| fx[6].params.exp_th.item(), | |
| fx[6].params.exp_ratio.item(), | |
| coef2ms(fx[6].params.at, 44100).item(), | |
| coef2ms(fx[6].params.rt, 44100).item(), | |
| fx[7].effects[0].params.delay.item(), | |
| fx[7].effects[0].params.feedback.item(), | |
| fx[7].effects[0].params.gain.log10().item() * 20, | |
| fx[7].effects[0].eq.params.freq.item(), | |
| fx[7].effects[0].odd_pan.params.pan.item() * 200 - 100, | |
| fx[7].effects[0].even_pan.params.pan.item() * 200 - 100, | |
| fx[7].params.sends_0.log10().item() * 20, | |
| fx[7].effects[1].eq[0].params.freq.item(), | |
| fx[7].effects[1].eq[0].params.gain.item(), | |
| fx[7].effects[1].eq[0].params.Q.item(), | |
| fx[7].effects[1].eq[1].params.freq.item(), | |
| fx[7].effects[1].eq[1].params.gain.item(), | |
| fx[7].effects[1].eq[1].params.Q.item(), | |
| fx[7].effects[1].eq[2].params.freq.item(), | |
| fx[7].effects[1].eq[2].params.gain.item(), | |
| fx[7].effects[1].eq[3].params.freq.item(), | |
| fx[7].effects[1].eq[3].params.gain.item(), | |
| ] | |
| update_fx_outputs = [ | |
| pk1_freq, | |
| pk1_gain, | |
| pk1_q, | |
| pk2_freq, | |
| pk2_gain, | |
| pk2_q, | |
| ls_freq, | |
| ls_gain, | |
| hs_freq, | |
| hs_gain, | |
| lp_freq, | |
| lp_q, | |
| hp_freq, | |
| hp_q, | |
| cmp_th, | |
| cmp_ratio, | |
| make_up, | |
| exp_th, | |
| exp_ratio, | |
| attack_time, | |
| release_time, | |
| delay_time, | |
| feedback, | |
| delay_gain, | |
| delay_lp_freq, | |
| odd_pan, | |
| even_pan, | |
| reverb_send, | |
| tc_pk1_freq, | |
| tc_pk1_gain, | |
| tc_pk1_q, | |
| tc_pk2_freq, | |
| tc_pk2_gain, | |
| tc_pk2_q, | |
| tc_ls_freq, | |
| tc_ls_gain, | |
| tc_hs_freq, | |
| tc_hs_gain, | |
| ] | |
| update_plots = lambda fx: [ | |
| plot_eq(fx), | |
| plot_comp(fx), | |
| plot_delay(fx), | |
| plot_reverb(fx), | |
| plot_t60(fx), | |
| ] | |
| update_plots_outputs = [ | |
| peq_plot, | |
| comp_plot, | |
| delay_plot, | |
| reverb_plot, | |
| t60_plot, | |
| ] | |
| update_all = ( | |
| lambda z, fx, i: update_pc(z, i) | |
| + update_fx(fx) | |
| + update_plots(fx) | |
| + [model2json(fx)] | |
| ) | |
| update_all_outputs = ( | |
| update_pc_outputs + update_fx_outputs + update_plots_outputs + [json_output] | |
| ) | |
| z2x_common_steps = chain_functions( | |
| lambda z: accum_func_results(z, z2x, vec2fx), | |
| lambda fx, x, z: (z, x, fx), | |
| ) | |
| random_button.click( | |
| chain_functions( | |
| lambda i: ( | |
| *z2x_common_steps(torch.randn_like(mean).clip(SLIDER_MIN, SLIDER_MAX)), | |
| i, | |
| ), | |
| lambda z, x, fx, i: [z, x] + update_all(z, fx, i), | |
| ), | |
| inputs=extra_pc_dropdown, | |
| outputs=[z, fx_params] + update_all_outputs, | |
| ) | |
| reset_button.click( | |
| chain_functions( | |
| lambda: z2x_common_steps(torch.zeros_like(mean)), | |
| lambda z, x, fx: [z, x] + update_all(z, fx, NUMBER_OF_PCS), | |
| ), | |
| outputs=[z, fx_params] + update_all_outputs, | |
| ) | |
| def update_z(z, s, i): | |
| z[i] = s | |
| return z | |
| for i, slider in enumerate(sliders): | |
| slider.input( | |
| chain_functions( | |
| lambda z, s, i=i: update_z(z, s, i), | |
| z2x_common_steps, | |
| lambda z, x, fx: [z, x, model2json(fx)] | |
| + update_fx(fx) | |
| + update_plots(fx), | |
| ), | |
| inputs=[z, slider], | |
| outputs=[z, fx_params, json_output] | |
| + update_fx_outputs | |
| + update_plots_outputs, | |
| ) | |
| extra_slider.input( | |
| chain_functions( | |
| lambda z, s, i: update_z(z, s, i - 1), | |
| z2x_common_steps, | |
| lambda z, x, fx: [z, x, model2json(fx)] + update_fx(fx) + update_plots(fx), | |
| ), | |
| inputs=[z, extra_slider, extra_pc_dropdown], | |
| outputs=[z, fx_params, json_output] + update_fx_outputs + update_plots_outputs, | |
| ) | |
| extra_pc_dropdown.input( | |
| lambda z, i: z[i - 1].item(), | |
| inputs=[z, extra_pc_dropdown], | |
| outputs=extra_slider, | |
| ) | |
| preset_dropdown.input( | |
| chain_functions( | |
| lambda i, _: (mean if i == "none" else presets[i - 1], _), | |
| lambda x, i: (x2z(x), x, vec2fx(x), i), | |
| lambda z, x, fx, i: [z, x] + update_all(z, fx, i), | |
| ), | |
| inputs=[preset_dropdown, extra_pc_dropdown], | |
| outputs=[z, fx_params] + update_all_outputs, | |
| ) | |
| dry_wet_ratio.input( | |
| chain_functions( | |
| lambda _, *args: (_, *map(lambda x: x[1] / 32768, args)), | |
| lambda ratio, d, w: math.sqrt(2) | |
| * ( | |
| math.cos(ratio * math.pi * 0.5) * d | |
| + math.sin(ratio * math.pi * 0.5) * w | |
| ), | |
| lambda x: (44100, (x * 32768).astype(np.int16)), | |
| ), | |
| inputs=[dry_wet_ratio, direct_output, wet_output], | |
| outputs=[audio_output], | |
| ) | |
| demo.launch() | |