thecollabagepatch commited on
Commit
78cac08
·
1 Parent(s): dfa1fc4

loudness matching improvement for single-shot generations in DAW

Browse files
Files changed (1) hide show
  1. one_shot_generation.py +89 -8
one_shot_generation.py CHANGED
@@ -112,17 +112,21 @@ def generate_loop_continuation_with_mrt(
112
  # Final exact-length trim to requested bars
113
  out = hard_trim_seconds(stitched, total_secs)
114
 
115
- # Final polish AFTER drop
116
- out = out.peak_normalize(0.95)
117
  apply_micro_fades(out, 5)
118
 
119
- # Loudness match to input (after drop) so bar 1 sits right
120
- out, loud_stats = match_loudness_to_reference(
121
- ref=loop, target=out,
122
- method=loudness_mode, headroom_db=loudness_headroom_db
 
 
 
 
123
  )
124
 
125
- return out, loud_stats
 
126
 
127
 
128
  def generate_style_only_with_mrt(
@@ -193,4 +197,81 @@ def generate_style_only_with_mrt(
193
  out = out.peak_normalize(0.95)
194
  apply_micro_fades(out, 5)
195
 
196
- return out, None # loudness stats not applicable (no reference)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  # Final exact-length trim to requested bars
113
  out = hard_trim_seconds(stitched, total_secs)
114
 
115
+ # (optional) keep micro fades
 
116
  apply_micro_fades(out, 5)
117
 
118
+ # Bar-wise loudness match so bar 1 sits right even if the model ramps up
119
+ out, loud_stats = apply_barwise_loudness_match(
120
+ out,
121
+ ref_loop=loop, # same source the jam path tiles per chunk
122
+ bpm=bpm,
123
+ beats_per_bar=beats_per_bar,
124
+ method=loudness_mode,
125
+ headroom_db=loudness_headroom_db,
126
  )
127
 
128
+ # Optionally finish with a light peak cap to ~-1 dBFS (no re-scaling)
129
+ out = out.peak_normalize(0.95)
130
 
131
 
132
  def generate_style_only_with_mrt(
 
197
  out = out.peak_normalize(0.95)
198
  apply_micro_fades(out, 5)
199
 
200
+ return out, None # loudness stats not applicable (no reference)
201
+
202
+
203
+ # loudness matching helper for /generate:
204
+
205
+ def apply_barwise_loudness_match(
206
+ out: au.Waveform,
207
+ ref_loop: au.Waveform,
208
+ *,
209
+ bpm: float,
210
+ beats_per_bar: int,
211
+ method: str = "auto",
212
+ headroom_db: float = 1.0,
213
+ smooth_ms: int = 50, # small ramp between bars
214
+ ) -> tuple[au.Waveform, dict]:
215
+ """
216
+ Bar-locked loudness matching. Tiles ref_loop to cover out, then
217
+ per-bar calls match_loudness_to_reference() and applies gains with
218
+ a short cross-ramp between bars for smoothness.
219
+ """
220
+ sr = int(out.sample_rate)
221
+ spb = (60.0 / float(bpm)) * int(beats_per_bar)
222
+ bar_len = int(round(spb * sr))
223
+
224
+ y = out.samples.astype(np.float32, copy=False)
225
+ if y.ndim == 1: y = y[:, None]
226
+ if ref_loop.sample_rate != sr:
227
+ ref = ref_loop.resample(sr).as_stereo().samples.astype(np.float32, copy=False)
228
+ else:
229
+ ref = ref_loop.as_stereo().samples.astype(np.float32, copy=False)
230
+
231
+ if ref.ndim == 1: ref = ref[:, None]
232
+ if ref.shape[1] == 1: ref = np.repeat(ref, 2, axis=1)
233
+
234
+ # tile reference to length of out
235
+ need = y.shape[0]
236
+ reps = int(np.ceil(need / float(ref.shape[0]))) if ref.shape[0] else 1
237
+ ref_tiled = np.tile(ref, (max(1, reps), 1))[:need]
238
+
239
+ from .utils import match_loudness_to_reference # same module in your tree
240
+
241
+ gains_db = []
242
+ out_adj = y.copy()
243
+ n_bars = max(1, int(np.ceil(need / float(bar_len))))
244
+ ramp = int(max(0, round(smooth_ms * sr / 1000.0)))
245
+
246
+ for i in range(n_bars):
247
+ s = i * bar_len
248
+ e = min(need, s + bar_len)
249
+ if e <= s: break
250
+
251
+ ref_bar = au.Waveform(ref_tiled[s:e], sr)
252
+ tgt_bar = au.Waveform(out_adj[s:e], sr)
253
+
254
+ matched_bar, stats = match_loudness_to_reference(
255
+ ref_bar, tgt_bar, method=method, headroom_db=headroom_db
256
+ )
257
+ # compute linear gain we actually applied
258
+ g = matched_bar.samples.astype(np.float32, copy=False)
259
+ if tgt_bar.samples.size > 0:
260
+ # avoid divide-by-zero; infer average gain over the bar
261
+ eps = 1e-12
262
+ g_lin = float(np.sqrt((np.mean(g**2) + eps) / (np.mean(tgt_bar.samples**2) + eps)))
263
+ else:
264
+ g_lin = 1.0
265
+ gains_db.append(20.0 * np.log10(max(g_lin, 1e-6)))
266
+
267
+ # write with a short cross-ramp from previous bar
268
+ if i > 0 and ramp > 0:
269
+ r0 = max(s, s + ramp - (e - s)) # clamp if last bar shorter
270
+ t = np.linspace(0.0, 1.0, r0 - s, dtype=np.float32)[:, None]
271
+ out_adj[s:r0] = (1.0 - t) * out_adj[s:r0] + t * g[:r0-s]
272
+ out_adj[r0:e] = g[r0-s:e-s]
273
+ else:
274
+ out_adj[s:e] = g
275
+
276
+ out.samples = out_adj.astype(np.float32, copy=False)
277
+ return out, {"per_bar_gain_db": gains_db}