Commit
·
db001c3
1
Parent(s):
169ed8c
yet another _append_model_chunk_and_spool rewrite to fix silence gaps
Browse files- jam_worker.py +138 -106
jam_worker.py
CHANGED
@@ -445,24 +445,52 @@ class JamWorker(threading.Thread):
|
|
445 |
|
446 |
def _append_model_chunk_and_spool(self, wav: au.Waveform) -> None:
|
447 |
"""
|
448 |
-
|
449 |
-
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
|
|
|
|
|
|
|
|
457 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
|
459 |
# ---- unpack model-rate samples ----
|
460 |
s = wav.samples.astype(np.float32, copy=False)
|
461 |
if s.ndim == 1:
|
462 |
s = s[:, None]
|
463 |
-
|
464 |
-
|
465 |
-
|
|
|
|
|
466 |
|
467 |
# crossfade length in model samples
|
468 |
try:
|
@@ -471,110 +499,114 @@ class JamWorker(threading.Thread):
|
|
471 |
xfade_s = 0.0
|
472 |
xfade_n = int(round(max(0.0, xfade_s) * float(self._model_sr)))
|
473 |
|
474 |
-
#
|
475 |
-
|
476 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
|
478 |
# ------------------------------------------
|
479 |
-
# (A)
|
480 |
# ------------------------------------------
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
if
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
509 |
else:
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
self._spool_written += y_m.shape[0]
|
516 |
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
else:
|
523 |
-
# First-ever call or too-short to mix: maintain _model_stream minimally
|
524 |
-
if xfade_n > 0 and n_samps > xfade_n:
|
525 |
-
self._model_stream = s[xfade_n:].copy() if self._model_stream is None else np.concatenate([self._model_stream, s[xfade_n:]], axis=0)
|
526 |
-
else:
|
527 |
-
self._model_stream = s.copy() if self._model_stream is None else np.concatenate([self._model_stream, s], axis=0)
|
528 |
|
529 |
# ------------------------------------------
|
530 |
-
# (B)
|
531 |
# ------------------------------------------
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
body = s[xfade_n:, :]
|
546 |
-
print(f"[model] body(S) len={body.shape[0]} rms={_dbg_rms_dbfs_model(body):+.1f} dBFS")
|
547 |
-
y_body = to_target(body.astype(np.float32))
|
548 |
-
if y_body.size:
|
549 |
-
# DEBUG: body RMS in short-chunk path
|
550 |
-
print(f"[append] body(len=short) len={y_body.shape[0]} rms={_dbg_rms_dbfs(y_body):+.1f} dBFS")
|
551 |
-
self._spool = np.concatenate([self._spool, y_body], axis=0) if self._spool.size else y_body
|
552 |
-
self._spool_written += y_body.shape[0]
|
553 |
-
# No tail to remember this round
|
554 |
-
self._pending_tail_model = None
|
555 |
-
self._pending_tail_target_len = 0
|
556 |
-
return
|
557 |
-
|
558 |
-
# Tail (always remember how many TARGET samples we append)
|
559 |
-
if xfade_n > 0 and n_samps >= xfade_n:
|
560 |
-
tail = s[-xfade_n:, :]
|
561 |
-
print(f"[model] tail len={tail.shape[0]} rms={_dbg_rms_dbfs_model(tail):+.1f} dBFS")
|
562 |
-
y_tail = to_target(tail.astype(np.float32))
|
563 |
-
Ltail = int(y_tail.shape[0])
|
564 |
-
if Ltail:
|
565 |
-
# DEBUG: tail RMS we are appending now (to be corrected next call)
|
566 |
-
print(f"[append] tail len={y_tail.shape[0]} rms={_dbg_rms_dbfs(y_tail):+.1f} dBFS")
|
567 |
-
self._spool = np.concatenate([self._spool, y_tail], axis=0) if self._spool.size else y_tail
|
568 |
-
self._spool_written += Ltail
|
569 |
-
self._pending_tail_model = tail.copy()
|
570 |
-
self._pending_tail_target_len = Ltail
|
571 |
-
else:
|
572 |
-
# Nothing appended (resampler returned nothing yet) — keep model tail but mark zero target len
|
573 |
-
self._pending_tail_model = tail.copy()
|
574 |
-
self._pending_tail_target_len = 0
|
575 |
else:
|
576 |
-
|
577 |
self._pending_tail_target_len = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
578 |
|
579 |
|
580 |
|
|
|
445 |
|
446 |
def _append_model_chunk_and_spool(self, wav: au.Waveform) -> None:
|
447 |
"""
|
448 |
+
Append one MagentaRT chunk into the target-SR spool with an energy-aware,
|
449 |
+
deferred-overwrite crossfade to avoid writing near-silence at bar edges.
|
450 |
+
|
451 |
+
Key behavior:
|
452 |
+
- Append BODY and TAIL of *this* chunk right away (resampled to target SR).
|
453 |
+
- Keep THIS chunk's model-rate TAIL (+ its target-SR length if appended) to repair the
|
454 |
+
previous boundary on the *next* call by mixing (prev_tail*cos + new_head*sin).
|
455 |
+
- When the correction length Lpop would be 0 (e.g., tail produced no target samples last time),
|
456 |
+
we APPEND the mixed-overlap to bridge the gap instead of overwriting 0 samples.
|
457 |
+
- Before overwriting/appending the mixed-overlap, we guard against writing ultra-quiet audio
|
458 |
+
by normalizing it up (bounded) if it's >20 dB below the existing spool end.
|
459 |
+
|
460 |
+
This keeps your bar clock and external timing the same, but removes "bad starts" and fizzles.
|
461 |
"""
|
462 |
+
import math
|
463 |
+
import numpy as np
|
464 |
+
|
465 |
+
# ---- helpers ----
|
466 |
+
def _rms_dbfs(x: np.ndarray) -> float:
|
467 |
+
if x.size == 0:
|
468 |
+
return -120.0
|
469 |
+
if x.ndim == 2 and x.shape[1] > 1:
|
470 |
+
x_m = x.mean(axis=1, dtype=np.float32)
|
471 |
+
else:
|
472 |
+
x_m = x.astype(np.float32, copy=False).reshape(-1)
|
473 |
+
# guard for NaNs
|
474 |
+
x_m = np.nan_to_num(x_m, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32, copy=False)
|
475 |
+
r = float(np.sqrt(np.mean(x_m * x_m) + 1e-12))
|
476 |
+
return 20.0 * math.log10(max(r, 1e-12))
|
477 |
+
|
478 |
+
def _rms_dbfs_model(x: np.ndarray) -> float:
|
479 |
+
# same metric; named for clarity in logs
|
480 |
+
return _rms_dbfs(x)
|
481 |
+
|
482 |
+
def to_target(y: np.ndarray) -> np.ndarray:
|
483 |
+
return y if self._rs is None else self._rs.process(y, final=False)
|
484 |
|
485 |
# ---- unpack model-rate samples ----
|
486 |
s = wav.samples.astype(np.float32, copy=False)
|
487 |
if s.ndim == 1:
|
488 |
s = s[:, None]
|
489 |
+
if s.shape[1] == 1:
|
490 |
+
# ensure stereo shape for consistency with your spool (S,2)
|
491 |
+
s = np.repeat(s, 2, axis=1)
|
492 |
+
|
493 |
+
n_samps = int(s.shape[0])
|
494 |
|
495 |
# crossfade length in model samples
|
496 |
try:
|
|
|
499 |
xfade_s = 0.0
|
500 |
xfade_n = int(round(max(0.0, xfade_s) * float(self._model_sr)))
|
501 |
|
502 |
+
# carve head/body/tail in model domain
|
503 |
+
if xfade_n > 0 and n_samps >= (2 * xfade_n):
|
504 |
+
head_m = s[:xfade_n, :]
|
505 |
+
body_m = s[xfade_n:n_samps - xfade_n, :]
|
506 |
+
tail_m = s[n_samps - xfade_n:, :]
|
507 |
+
else:
|
508 |
+
# too short or no xfade configured — treat everything as body
|
509 |
+
head_m = np.zeros((0, 2), dtype=np.float32)
|
510 |
+
body_m = s
|
511 |
+
tail_m = np.zeros((0, 2), dtype=np.float32)
|
512 |
|
513 |
# ------------------------------------------
|
514 |
+
# (A) Repair the PREVIOUS boundary if we have a pending model-tail
|
515 |
# ------------------------------------------
|
516 |
+
did_boundary_mix = False
|
517 |
+
if (self._pending_tail_model is not None) and (xfade_n > 0) and (n_samps >= xfade_n):
|
518 |
+
# adaptive crossfade length when either side is very quiet
|
519 |
+
tail_prev_m = self._pending_tail_model
|
520 |
+
head_now_m = head_m
|
521 |
+
|
522 |
+
# safety: match shapes
|
523 |
+
if tail_prev_m.shape[1] != 2:
|
524 |
+
if tail_prev_m.ndim == 1:
|
525 |
+
tail_prev_m = tail_prev_m[:, None]
|
526 |
+
tail_prev_m = np.repeat(tail_prev_m[:, :1], 2, axis=1)
|
527 |
+
if head_now_m.shape[1] != 2:
|
528 |
+
if head_now_m.ndim == 1:
|
529 |
+
head_now_m = head_now_m[:, None]
|
530 |
+
head_now_m = np.repeat(head_now_m[:, :1], 2, axis=1)
|
531 |
+
|
532 |
+
# compute energy to decide whether to shorten xfade
|
533 |
+
tail_r = _rms_dbfs_model(tail_prev_m)
|
534 |
+
head_r = _rms_dbfs_model(head_now_m)
|
535 |
+
xfade_use = int(xfade_n)
|
536 |
+
if min(tail_r, head_r) < -45.0:
|
537 |
+
xfade_use = max(1, xfade_n // 4)
|
538 |
+
|
539 |
+
# windowed overlap (model domain)
|
540 |
+
Lm = min(xfade_use, tail_prev_m.shape[0], head_now_m.shape[0])
|
541 |
+
if Lm > 0:
|
542 |
+
t = np.linspace(0.0, math.pi / 2.0, Lm, endpoint=False, dtype=np.float32)[:, None]
|
543 |
+
cosw = np.cos(t, dtype=np.float32)
|
544 |
+
sinw = np.sin(t, dtype=np.float32)
|
545 |
+
mixed_m = tail_prev_m[-Lm:, :] * cosw + head_now_m[:Lm, :] * sinw
|
546 |
+
|
547 |
+
# resample to target and correct the end of the spool
|
548 |
+
y_mixed = to_target(mixed_m)
|
549 |
+
Lcorr = int(y_mixed.shape[0])
|
550 |
+
|
551 |
+
if Lcorr > 0:
|
552 |
+
# how many samples from last time's tail did we append?
|
553 |
+
# (may be zero if resampler yielded nothing then)
|
554 |
+
Lpop = int(min(self._pending_tail_target_len, self._spool.shape[0], Lcorr))
|
555 |
+
|
556 |
+
if Lpop > 0:
|
557 |
+
# energy-aware overwrite of last Lpop samples
|
558 |
+
prev_end = self._spool[-Lpop:, :]
|
559 |
+
new_seg = y_mixed[-Lpop:, :]
|
560 |
+
|
561 |
+
prev_r = _rms_dbfs(prev_end)
|
562 |
+
new_r = _rms_dbfs(new_seg)
|
563 |
+
|
564 |
+
# If the new overlap is >20 dB quieter than what's there, lift it (bounded)
|
565 |
+
if new_r < (prev_r - 20.0):
|
566 |
+
lift_db = max(0.0, min(20.0, (prev_r - 6.0) - new_r)) # cap boost; leave ~6 dB headroom
|
567 |
+
scale = 10.0 ** (lift_db / 20.0)
|
568 |
+
new_seg = np.clip(new_seg * scale, -1.0, 1.0).astype(np.float32, copy=False)
|
569 |
+
|
570 |
+
self._spool[-Lpop:, :] = new_seg
|
571 |
+
print(f"[append] mixedOverlap len={Lpop} rms={_rms_dbfs(new_seg):+.1f} dBFS")
|
572 |
else:
|
573 |
+
# Nothing to overwrite (e.g., last tail produced 0 target samples).
|
574 |
+
# Bridge by APPENDING the mixed-overlap.
|
575 |
+
self._spool = np.concatenate([self._spool, y_mixed], axis=0)
|
576 |
+
self._spool_written += int(y_mixed.shape[0])
|
577 |
+
print(f"[append] mixedOverlap len={y_mixed.shape[0]} rms={_rms_dbfs(y_mixed):+.1f} dBFS")
|
|
|
578 |
|
579 |
+
did_boundary_mix = True
|
580 |
+
|
581 |
+
# clear pending once we attempted the repair
|
582 |
+
self._pending_tail_model = None
|
583 |
+
self._pending_tail_target_len = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
584 |
|
585 |
# ------------------------------------------
|
586 |
+
# (B) Append this chunk's BODY then TAIL (target SR)
|
587 |
# ------------------------------------------
|
588 |
+
# BODY
|
589 |
+
y_body = to_target(body_m) if body_m.size else np.zeros((0, 2), dtype=np.float32)
|
590 |
+
if y_body.size:
|
591 |
+
self._spool = np.concatenate([self._spool, y_body], axis=0)
|
592 |
+
self._spool_written += int(y_body.shape[0])
|
593 |
+
print(f"[append] body len={y_body.shape[0] if y_body.size else 0} rms={_rms_dbfs(y_body):+.1f} dBFS")
|
594 |
+
|
595 |
+
# TAIL (we append now to keep continuity; on next call we'll correct the end)
|
596 |
+
y_tail = to_target(tail_m) if tail_m.size else np.zeros((0, 2), dtype=np.float32)
|
597 |
+
if y_tail.size:
|
598 |
+
self._spool = np.concatenate([self._spool, y_tail], axis=0)
|
599 |
+
self._spool_written += int(y_tail.shape[0])
|
600 |
+
self._pending_tail_target_len = int(y_tail.shape[0]) # how much we just added at target SR
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
601 |
else:
|
602 |
+
# resampler returned nothing for the tail; mark 0 so next Lpop==0
|
603 |
self._pending_tail_target_len = 0
|
604 |
+
print(f"[append] tail len={y_tail.shape[0] if y_tail.size else 0} rms={_rms_dbfs(y_tail):+.1f} dBFS")
|
605 |
+
|
606 |
+
# keep THIS chunk's model tail to mix with next chunk's head
|
607 |
+
# (even if y_tail had 0 target samples; in that case we'll bridge by appending mixed overlap)
|
608 |
+
self._pending_tail_model = tail_m if tail_m.size else None
|
609 |
+
|
610 |
|
611 |
|
612 |
|