Commit
·
78cac08
1
Parent(s):
dfa1fc4
loudness matching improvement for single-shot generations in DAW
Browse files- one_shot_generation.py +89 -8
one_shot_generation.py
CHANGED
@@ -112,17 +112,21 @@ def generate_loop_continuation_with_mrt(
|
|
112 |
# Final exact-length trim to requested bars
|
113 |
out = hard_trim_seconds(stitched, total_secs)
|
114 |
|
115 |
-
#
|
116 |
-
out = out.peak_normalize(0.95)
|
117 |
apply_micro_fades(out, 5)
|
118 |
|
119 |
-
#
|
120 |
-
out, loud_stats =
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
123 |
)
|
124 |
|
125 |
-
|
|
|
126 |
|
127 |
|
128 |
def generate_style_only_with_mrt(
|
@@ -193,4 +197,81 @@ def generate_style_only_with_mrt(
|
|
193 |
out = out.peak_normalize(0.95)
|
194 |
apply_micro_fades(out, 5)
|
195 |
|
196 |
-
return out, None # loudness stats not applicable (no reference)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
# Final exact-length trim to requested bars
|
113 |
out = hard_trim_seconds(stitched, total_secs)
|
114 |
|
115 |
+
# (optional) keep micro fades
|
|
|
116 |
apply_micro_fades(out, 5)
|
117 |
|
118 |
+
# Bar-wise loudness match so bar 1 sits right even if the model ramps up
|
119 |
+
out, loud_stats = apply_barwise_loudness_match(
|
120 |
+
out,
|
121 |
+
ref_loop=loop, # same source the jam path tiles per chunk
|
122 |
+
bpm=bpm,
|
123 |
+
beats_per_bar=beats_per_bar,
|
124 |
+
method=loudness_mode,
|
125 |
+
headroom_db=loudness_headroom_db,
|
126 |
)
|
127 |
|
128 |
+
# Optionally finish with a light peak cap to ~-1 dBFS (no re-scaling)
|
129 |
+
out = out.peak_normalize(0.95)
|
130 |
|
131 |
|
132 |
def generate_style_only_with_mrt(
|
|
|
197 |
out = out.peak_normalize(0.95)
|
198 |
apply_micro_fades(out, 5)
|
199 |
|
200 |
+
return out, None # loudness stats not applicable (no reference)
|
201 |
+
|
202 |
+
|
203 |
+
# loudness matching helper for /generate:
|
204 |
+
|
205 |
+
def apply_barwise_loudness_match(
|
206 |
+
out: au.Waveform,
|
207 |
+
ref_loop: au.Waveform,
|
208 |
+
*,
|
209 |
+
bpm: float,
|
210 |
+
beats_per_bar: int,
|
211 |
+
method: str = "auto",
|
212 |
+
headroom_db: float = 1.0,
|
213 |
+
smooth_ms: int = 50, # small ramp between bars
|
214 |
+
) -> tuple[au.Waveform, dict]:
|
215 |
+
"""
|
216 |
+
Bar-locked loudness matching. Tiles ref_loop to cover out, then
|
217 |
+
per-bar calls match_loudness_to_reference() and applies gains with
|
218 |
+
a short cross-ramp between bars for smoothness.
|
219 |
+
"""
|
220 |
+
sr = int(out.sample_rate)
|
221 |
+
spb = (60.0 / float(bpm)) * int(beats_per_bar)
|
222 |
+
bar_len = int(round(spb * sr))
|
223 |
+
|
224 |
+
y = out.samples.astype(np.float32, copy=False)
|
225 |
+
if y.ndim == 1: y = y[:, None]
|
226 |
+
if ref_loop.sample_rate != sr:
|
227 |
+
ref = ref_loop.resample(sr).as_stereo().samples.astype(np.float32, copy=False)
|
228 |
+
else:
|
229 |
+
ref = ref_loop.as_stereo().samples.astype(np.float32, copy=False)
|
230 |
+
|
231 |
+
if ref.ndim == 1: ref = ref[:, None]
|
232 |
+
if ref.shape[1] == 1: ref = np.repeat(ref, 2, axis=1)
|
233 |
+
|
234 |
+
# tile reference to length of out
|
235 |
+
need = y.shape[0]
|
236 |
+
reps = int(np.ceil(need / float(ref.shape[0]))) if ref.shape[0] else 1
|
237 |
+
ref_tiled = np.tile(ref, (max(1, reps), 1))[:need]
|
238 |
+
|
239 |
+
from .utils import match_loudness_to_reference # same module in your tree
|
240 |
+
|
241 |
+
gains_db = []
|
242 |
+
out_adj = y.copy()
|
243 |
+
n_bars = max(1, int(np.ceil(need / float(bar_len))))
|
244 |
+
ramp = int(max(0, round(smooth_ms * sr / 1000.0)))
|
245 |
+
|
246 |
+
for i in range(n_bars):
|
247 |
+
s = i * bar_len
|
248 |
+
e = min(need, s + bar_len)
|
249 |
+
if e <= s: break
|
250 |
+
|
251 |
+
ref_bar = au.Waveform(ref_tiled[s:e], sr)
|
252 |
+
tgt_bar = au.Waveform(out_adj[s:e], sr)
|
253 |
+
|
254 |
+
matched_bar, stats = match_loudness_to_reference(
|
255 |
+
ref_bar, tgt_bar, method=method, headroom_db=headroom_db
|
256 |
+
)
|
257 |
+
# compute linear gain we actually applied
|
258 |
+
g = matched_bar.samples.astype(np.float32, copy=False)
|
259 |
+
if tgt_bar.samples.size > 0:
|
260 |
+
# avoid divide-by-zero; infer average gain over the bar
|
261 |
+
eps = 1e-12
|
262 |
+
g_lin = float(np.sqrt((np.mean(g**2) + eps) / (np.mean(tgt_bar.samples**2) + eps)))
|
263 |
+
else:
|
264 |
+
g_lin = 1.0
|
265 |
+
gains_db.append(20.0 * np.log10(max(g_lin, 1e-6)))
|
266 |
+
|
267 |
+
# write with a short cross-ramp from previous bar
|
268 |
+
if i > 0 and ramp > 0:
|
269 |
+
r0 = max(s, s + ramp - (e - s)) # clamp if last bar shorter
|
270 |
+
t = np.linspace(0.0, 1.0, r0 - s, dtype=np.float32)[:, None]
|
271 |
+
out_adj[s:r0] = (1.0 - t) * out_adj[s:r0] + t * g[:r0-s]
|
272 |
+
out_adj[r0:e] = g[r0-s:e-s]
|
273 |
+
else:
|
274 |
+
out_adj[s:e] = g
|
275 |
+
|
276 |
+
out.samples = out_adj.astype(np.float32, copy=False)
|
277 |
+
return out, {"per_bar_gain_db": gains_db}
|