thecollabagepatch commited on
Commit
db001c3
·
1 Parent(s): 169ed8c

yet another _append_model_chunk_and_spool rewrite to fix silence gaps

Browse files
Files changed (1) hide show
  1. jam_worker.py +138 -106
jam_worker.py CHANGED
@@ -445,24 +445,52 @@ class JamWorker(threading.Thread):
445
 
446
  def _append_model_chunk_and_spool(self, wav: au.Waveform) -> None:
447
  """
448
- Conservative boundary fix:
449
- - Emit body+tail immediately (target SR), unchanged from your original behavior.
450
- - On *next* call, compute the mixed overlap (prev tail ⨉ cos + new head ⨉ sin),
451
- resample it, and overwrite the last `_pending_tail_target_len` samples in the
452
- target-SR spool with that mixed overlap. Then emit THIS chunk's body+tail and
453
- remember THIS chunk's tail length at target SR for the next correction.
454
-
455
- This keeps external timing and bar alignment identical, but removes the audible
456
- fade-to-zero at chunk ends.
 
 
 
 
457
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
 
459
  # ---- unpack model-rate samples ----
460
  s = wav.samples.astype(np.float32, copy=False)
461
  if s.ndim == 1:
462
  s = s[:, None]
463
- n_samps, _ = s.shape
464
- if n_samps == 0:
465
- return
 
 
466
 
467
  # crossfade length in model samples
468
  try:
@@ -471,110 +499,114 @@ class JamWorker(threading.Thread):
471
  xfade_s = 0.0
472
  xfade_n = int(round(max(0.0, xfade_s) * float(self._model_sr)))
473
 
474
- # helper: resample to target SR via your streaming resampler
475
- def to_target(y: np.ndarray) -> np.ndarray:
476
- return y if self._rs is None else self._rs.process(y, final=False)
 
 
 
 
 
 
 
477
 
478
  # ------------------------------------------
479
- # (A) If we have a pending model tail, fix the last emitted tail at target SR
480
  # ------------------------------------------
481
- if self._pending_tail_model is not None and self._pending_tail_model.shape[0] == xfade_n and xfade_n > 0 and n_samps >= xfade_n:
482
- head = s[:xfade_n, :]
483
-
484
- print(f"[model] head len={head.shape[0]} rms={_dbg_rms_dbfs_model(head):+.1f} dBFS")
485
-
486
- t = np.linspace(0.0, np.pi/2.0, xfade_n, endpoint=False, dtype=np.float32)[:, None]
487
- cosw = np.cos(t, dtype=np.float32)
488
- sinw = np.sin(t, dtype=np.float32)
489
- mixed_model = (self._pending_tail_model * cosw) + (head * sinw) # [xfade_n, C] at model SR
490
-
491
- y_mixed = to_target(mixed_model.astype(np.float32))
492
- Lcorr = int(y_mixed.shape[0]) # exact target-SR samples to write
493
-
494
- # DEBUG: corrected overlap RMS (what we intend to hear at the boundary)
495
- if y_mixed.size:
496
- print(f"[append] mixedOverlap len={y_mixed.shape[0]} rms={_dbg_rms_dbfs(y_mixed):+.1f} dBFS")
497
-
498
- # Overwrite the last `_pending_tail_target_len` samples of the spool with `y_mixed`.
499
- # Use the *smaller* of the two lengths to be safe.
500
- Lpop = min(self._pending_tail_target_len, self._spool.shape[0], Lcorr)
501
- if Lpop > 0 and self._spool.size:
502
- # Trim last Lpop samples
503
- self._spool = self._spool[:-Lpop, :]
504
- self._spool_written -= Lpop
505
- # Append corrected overlap (trim/pad to Lpop to avoid drift)
506
- if Lcorr != Lpop:
507
- if Lcorr > Lpop:
508
- y_m = y_mixed[-Lpop:, :]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
  else:
510
- pad = np.zeros((Lpop - Lcorr, y_mixed.shape[1]), dtype=np.float32)
511
- y_m = np.concatenate([y_mixed, pad], axis=0)
512
- else:
513
- y_m = y_mixed
514
- self._spool = np.concatenate([self._spool, y_m], axis=0) if self._spool.size else y_m
515
- self._spool_written += y_m.shape[0]
516
 
517
- # For internal continuity, update _model_stream like before
518
- if self._model_stream is None or self._model_stream.shape[0] < xfade_n:
519
- self._model_stream = s[xfade_n:].copy()
520
- else:
521
- self._model_stream = np.concatenate([self._model_stream[:-xfade_n], mixed_model, s[xfade_n:]], axis=0)
522
- else:
523
- # First-ever call or too-short to mix: maintain _model_stream minimally
524
- if xfade_n > 0 and n_samps > xfade_n:
525
- self._model_stream = s[xfade_n:].copy() if self._model_stream is None else np.concatenate([self._model_stream, s[xfade_n:]], axis=0)
526
- else:
527
- self._model_stream = s.copy() if self._model_stream is None else np.concatenate([self._model_stream, s], axis=0)
528
 
529
  # ------------------------------------------
530
- # (B) Emit THIS chunk's body and tail (same external behavior)
531
  # ------------------------------------------
532
- if xfade_n > 0 and n_samps >= (2 * xfade_n):
533
- body = s[xfade_n:-xfade_n, :]
534
- print(f"[model] body len={body.shape[0]} rms={_dbg_rms_dbfs_model(body):+.1f} dBFS")
535
- if body.size:
536
- y_body = to_target(body.astype(np.float32))
537
- if y_body.size:
538
- # DEBUG: body RMS we are actually appending
539
- print(f"[append] body len={y_body.shape[0]} rms={_dbg_rms_dbfs(y_body):+.1f} dBFS")
540
- self._spool = np.concatenate([self._spool, y_body], axis=0) if self._spool.size else y_body
541
- self._spool_written += y_body.shape[0]
542
- else:
543
- # If chunk too short for head+tail split, treat all (minus preroll) as body
544
- if xfade_n > 0 and n_samps > xfade_n:
545
- body = s[xfade_n:, :]
546
- print(f"[model] body(S) len={body.shape[0]} rms={_dbg_rms_dbfs_model(body):+.1f} dBFS")
547
- y_body = to_target(body.astype(np.float32))
548
- if y_body.size:
549
- # DEBUG: body RMS in short-chunk path
550
- print(f"[append] body(len=short) len={y_body.shape[0]} rms={_dbg_rms_dbfs(y_body):+.1f} dBFS")
551
- self._spool = np.concatenate([self._spool, y_body], axis=0) if self._spool.size else y_body
552
- self._spool_written += y_body.shape[0]
553
- # No tail to remember this round
554
- self._pending_tail_model = None
555
- self._pending_tail_target_len = 0
556
- return
557
-
558
- # Tail (always remember how many TARGET samples we append)
559
- if xfade_n > 0 and n_samps >= xfade_n:
560
- tail = s[-xfade_n:, :]
561
- print(f"[model] tail len={tail.shape[0]} rms={_dbg_rms_dbfs_model(tail):+.1f} dBFS")
562
- y_tail = to_target(tail.astype(np.float32))
563
- Ltail = int(y_tail.shape[0])
564
- if Ltail:
565
- # DEBUG: tail RMS we are appending now (to be corrected next call)
566
- print(f"[append] tail len={y_tail.shape[0]} rms={_dbg_rms_dbfs(y_tail):+.1f} dBFS")
567
- self._spool = np.concatenate([self._spool, y_tail], axis=0) if self._spool.size else y_tail
568
- self._spool_written += Ltail
569
- self._pending_tail_model = tail.copy()
570
- self._pending_tail_target_len = Ltail
571
- else:
572
- # Nothing appended (resampler returned nothing yet) — keep model tail but mark zero target len
573
- self._pending_tail_model = tail.copy()
574
- self._pending_tail_target_len = 0
575
  else:
576
- self._pending_tail_model = None
577
  self._pending_tail_target_len = 0
 
 
 
 
 
 
578
 
579
 
580
 
 
445
 
446
  def _append_model_chunk_and_spool(self, wav: au.Waveform) -> None:
447
  """
448
+ Append one MagentaRT chunk into the target-SR spool with an energy-aware,
449
+ deferred-overwrite crossfade to avoid writing near-silence at bar edges.
450
+
451
+ Key behavior:
452
+ - Append BODY and TAIL of *this* chunk right away (resampled to target SR).
453
+ - Keep THIS chunk's model-rate TAIL (+ its target-SR length if appended) to repair the
454
+ previous boundary on the *next* call by mixing (prev_tail*cos + new_head*sin).
455
+ - When the correction length Lpop would be 0 (e.g., tail produced no target samples last time),
456
+ we APPEND the mixed-overlap to bridge the gap instead of overwriting 0 samples.
457
+ - Before overwriting/appending the mixed-overlap, we guard against writing ultra-quiet audio
458
+ by normalizing it up (bounded) if it's >20 dB below the existing spool end.
459
+
460
+ This keeps your bar clock and external timing the same, but removes "bad starts" and fizzles.
461
  """
462
+ import math
463
+ import numpy as np
464
+
465
+ # ---- helpers ----
466
+ def _rms_dbfs(x: np.ndarray) -> float:
467
+ if x.size == 0:
468
+ return -120.0
469
+ if x.ndim == 2 and x.shape[1] > 1:
470
+ x_m = x.mean(axis=1, dtype=np.float32)
471
+ else:
472
+ x_m = x.astype(np.float32, copy=False).reshape(-1)
473
+ # guard for NaNs
474
+ x_m = np.nan_to_num(x_m, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32, copy=False)
475
+ r = float(np.sqrt(np.mean(x_m * x_m) + 1e-12))
476
+ return 20.0 * math.log10(max(r, 1e-12))
477
+
478
+ def _rms_dbfs_model(x: np.ndarray) -> float:
479
+ # same metric; named for clarity in logs
480
+ return _rms_dbfs(x)
481
+
482
+ def to_target(y: np.ndarray) -> np.ndarray:
483
+ return y if self._rs is None else self._rs.process(y, final=False)
484
 
485
  # ---- unpack model-rate samples ----
486
  s = wav.samples.astype(np.float32, copy=False)
487
  if s.ndim == 1:
488
  s = s[:, None]
489
+ if s.shape[1] == 1:
490
+ # ensure stereo shape for consistency with your spool (S,2)
491
+ s = np.repeat(s, 2, axis=1)
492
+
493
+ n_samps = int(s.shape[0])
494
 
495
  # crossfade length in model samples
496
  try:
 
499
  xfade_s = 0.0
500
  xfade_n = int(round(max(0.0, xfade_s) * float(self._model_sr)))
501
 
502
+ # carve head/body/tail in model domain
503
+ if xfade_n > 0 and n_samps >= (2 * xfade_n):
504
+ head_m = s[:xfade_n, :]
505
+ body_m = s[xfade_n:n_samps - xfade_n, :]
506
+ tail_m = s[n_samps - xfade_n:, :]
507
+ else:
508
+ # too short or no xfade configured — treat everything as body
509
+ head_m = np.zeros((0, 2), dtype=np.float32)
510
+ body_m = s
511
+ tail_m = np.zeros((0, 2), dtype=np.float32)
512
 
513
  # ------------------------------------------
514
+ # (A) Repair the PREVIOUS boundary if we have a pending model-tail
515
  # ------------------------------------------
516
+ did_boundary_mix = False
517
+ if (self._pending_tail_model is not None) and (xfade_n > 0) and (n_samps >= xfade_n):
518
+ # adaptive crossfade length when either side is very quiet
519
+ tail_prev_m = self._pending_tail_model
520
+ head_now_m = head_m
521
+
522
+ # safety: match shapes
523
+ if tail_prev_m.shape[1] != 2:
524
+ if tail_prev_m.ndim == 1:
525
+ tail_prev_m = tail_prev_m[:, None]
526
+ tail_prev_m = np.repeat(tail_prev_m[:, :1], 2, axis=1)
527
+ if head_now_m.shape[1] != 2:
528
+ if head_now_m.ndim == 1:
529
+ head_now_m = head_now_m[:, None]
530
+ head_now_m = np.repeat(head_now_m[:, :1], 2, axis=1)
531
+
532
+ # compute energy to decide whether to shorten xfade
533
+ tail_r = _rms_dbfs_model(tail_prev_m)
534
+ head_r = _rms_dbfs_model(head_now_m)
535
+ xfade_use = int(xfade_n)
536
+ if min(tail_r, head_r) < -45.0:
537
+ xfade_use = max(1, xfade_n // 4)
538
+
539
+ # windowed overlap (model domain)
540
+ Lm = min(xfade_use, tail_prev_m.shape[0], head_now_m.shape[0])
541
+ if Lm > 0:
542
+ t = np.linspace(0.0, math.pi / 2.0, Lm, endpoint=False, dtype=np.float32)[:, None]
543
+ cosw = np.cos(t, dtype=np.float32)
544
+ sinw = np.sin(t, dtype=np.float32)
545
+ mixed_m = tail_prev_m[-Lm:, :] * cosw + head_now_m[:Lm, :] * sinw
546
+
547
+ # resample to target and correct the end of the spool
548
+ y_mixed = to_target(mixed_m)
549
+ Lcorr = int(y_mixed.shape[0])
550
+
551
+ if Lcorr > 0:
552
+ # how many samples from last time's tail did we append?
553
+ # (may be zero if resampler yielded nothing then)
554
+ Lpop = int(min(self._pending_tail_target_len, self._spool.shape[0], Lcorr))
555
+
556
+ if Lpop > 0:
557
+ # energy-aware overwrite of last Lpop samples
558
+ prev_end = self._spool[-Lpop:, :]
559
+ new_seg = y_mixed[-Lpop:, :]
560
+
561
+ prev_r = _rms_dbfs(prev_end)
562
+ new_r = _rms_dbfs(new_seg)
563
+
564
+ # If the new overlap is >20 dB quieter than what's there, lift it (bounded)
565
+ if new_r < (prev_r - 20.0):
566
+ lift_db = max(0.0, min(20.0, (prev_r - 6.0) - new_r)) # cap boost; leave ~6 dB headroom
567
+ scale = 10.0 ** (lift_db / 20.0)
568
+ new_seg = np.clip(new_seg * scale, -1.0, 1.0).astype(np.float32, copy=False)
569
+
570
+ self._spool[-Lpop:, :] = new_seg
571
+ print(f"[append] mixedOverlap len={Lpop} rms={_rms_dbfs(new_seg):+.1f} dBFS")
572
  else:
573
+ # Nothing to overwrite (e.g., last tail produced 0 target samples).
574
+ # Bridge by APPENDING the mixed-overlap.
575
+ self._spool = np.concatenate([self._spool, y_mixed], axis=0)
576
+ self._spool_written += int(y_mixed.shape[0])
577
+ print(f"[append] mixedOverlap len={y_mixed.shape[0]} rms={_rms_dbfs(y_mixed):+.1f} dBFS")
 
578
 
579
+ did_boundary_mix = True
580
+
581
+ # clear pending once we attempted the repair
582
+ self._pending_tail_model = None
583
+ self._pending_tail_target_len = 0
 
 
 
 
 
 
584
 
585
  # ------------------------------------------
586
+ # (B) Append this chunk's BODY then TAIL (target SR)
587
  # ------------------------------------------
588
+ # BODY
589
+ y_body = to_target(body_m) if body_m.size else np.zeros((0, 2), dtype=np.float32)
590
+ if y_body.size:
591
+ self._spool = np.concatenate([self._spool, y_body], axis=0)
592
+ self._spool_written += int(y_body.shape[0])
593
+ print(f"[append] body len={y_body.shape[0] if y_body.size else 0} rms={_rms_dbfs(y_body):+.1f} dBFS")
594
+
595
+ # TAIL (we append now to keep continuity; on next call we'll correct the end)
596
+ y_tail = to_target(tail_m) if tail_m.size else np.zeros((0, 2), dtype=np.float32)
597
+ if y_tail.size:
598
+ self._spool = np.concatenate([self._spool, y_tail], axis=0)
599
+ self._spool_written += int(y_tail.shape[0])
600
+ self._pending_tail_target_len = int(y_tail.shape[0]) # how much we just added at target SR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
  else:
602
+ # resampler returned nothing for the tail; mark 0 so next Lpop==0
603
  self._pending_tail_target_len = 0
604
+ print(f"[append] tail len={y_tail.shape[0] if y_tail.size else 0} rms={_rms_dbfs(y_tail):+.1f} dBFS")
605
+
606
+ # keep THIS chunk's model tail to mix with next chunk's head
607
+ # (even if y_tail had 0 target samples; in that case we'll bridge by appending mixed overlap)
608
+ self._pending_tail_model = tail_m if tail_m.size else None
609
+
610
 
611
 
612