Commit
·
86dd29c
1
Parent(s):
82c8e97
loudness matching debug logs for sanity
Browse files- jam_worker.py +74 -10
jam_worker.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
# jam_worker.py - Bar-locked spool rewrite
|
2 |
from __future__ import annotations
|
3 |
|
|
|
|
|
4 |
import threading, time
|
5 |
from dataclasses import dataclass
|
6 |
from fractions import Fraction
|
@@ -435,7 +437,7 @@ class JamWorker(threading.Thread):
|
|
435 |
This keeps external timing and bar alignment identical, but removes the audible
|
436 |
fade-to-zero at chunk ends.
|
437 |
"""
|
438 |
-
|
439 |
|
440 |
# ---- unpack model-rate samples ----
|
441 |
s = wav.samples.astype(np.float32, copy=False)
|
@@ -550,20 +552,77 @@ class JamWorker(threading.Thread):
|
|
550 |
return self.idx <= (horizon_anchor + self._max_buffer_ahead)
|
551 |
|
552 |
def _emit_ready(self):
|
553 |
-
"""Emit next chunk(s) if the spool has enough samples."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
554 |
while True:
|
555 |
start, end = self._bar_clock.bounds_for_chunk(self.idx, self.params.bars_per_chunk)
|
556 |
if end > self._spool_written:
|
557 |
-
|
558 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
559 |
|
560 |
# Loudness match to reference loop (optional)
|
|
|
561 |
if self.params.ref_loop is not None and self.params.loudness_mode != "none":
|
562 |
ref = self.params.ref_loop.as_stereo().resample(self.params.target_sr)
|
563 |
wav = au.Waveform(loop.copy(), int(self.params.target_sr))
|
564 |
-
|
565 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
566 |
|
|
|
567 |
audio_b64, total_samples, channels = wav_bytes_base64(loop, int(self.params.target_sr))
|
568 |
meta = {
|
569 |
"bpm": float(self.params.bpm),
|
@@ -580,27 +639,31 @@ class JamWorker(threading.Thread):
|
|
580 |
}
|
581 |
chunk = JamChunk(index=self.idx, audio_base64=audio_b64, metadata=meta)
|
582 |
|
|
|
583 |
with self._cv:
|
584 |
self._outbox[self.idx] = chunk
|
585 |
self._cv.notify_all()
|
|
|
|
|
|
|
|
|
586 |
self.idx += 1
|
587 |
|
588 |
# If a reseed is queued, install it *right after* we finish a chunk
|
589 |
with self._lock:
|
590 |
-
# Prefer seamless token splice when available
|
591 |
if self._pending_token_splice is not None:
|
592 |
spliced = self._coerce_tokens(self._pending_token_splice["tokens"])
|
593 |
try:
|
594 |
-
#
|
595 |
-
self.state.context_tokens = spliced
|
596 |
self._pending_token_splice = None
|
|
|
597 |
except Exception:
|
598 |
-
# fallback: full reseed using spliced tokens
|
599 |
new_state = self.mrt.init_state()
|
600 |
new_state.context_tokens = spliced
|
601 |
self.state = new_state
|
602 |
self._model_stream = None
|
603 |
self._pending_token_splice = None
|
|
|
604 |
elif self._pending_reseed is not None:
|
605 |
ctx = self._coerce_tokens(self._pending_reseed["ctx"])
|
606 |
new_state = self.mrt.init_state()
|
@@ -608,6 +671,7 @@ class JamWorker(threading.Thread):
|
|
608 |
self.state = new_state
|
609 |
self._model_stream = None
|
610 |
self._pending_reseed = None
|
|
|
611 |
|
612 |
# ---------- main loop ----------
|
613 |
|
|
|
1 |
# jam_worker.py - Bar-locked spool rewrite
|
2 |
from __future__ import annotations
|
3 |
|
4 |
+
import os
|
5 |
+
|
6 |
import threading, time
|
7 |
from dataclasses import dataclass
|
8 |
from fractions import Fraction
|
|
|
437 |
This keeps external timing and bar alignment identical, but removes the audible
|
438 |
fade-to-zero at chunk ends.
|
439 |
"""
|
440 |
+
|
441 |
|
442 |
# ---- unpack model-rate samples ----
|
443 |
s = wav.samples.astype(np.float32, copy=False)
|
|
|
552 |
return self.idx <= (horizon_anchor + self._max_buffer_ahead)
|
553 |
|
554 |
def _emit_ready(self):
|
555 |
+
"""Emit next chunk(s) if the spool has enough samples. With verbose RMS debug."""
|
556 |
+
|
557 |
+
|
558 |
+
QDB_SILENCE = -55.0 # quarter-bar segment considered "near silence" if RMS dBFS below this
|
559 |
+
EPS = 1e-12
|
560 |
+
|
561 |
+
def rms_dbfs(x: np.ndarray) -> float:
|
562 |
+
# x: float32 [-1,1]; return single-channel RMS dBFS (mean over channels if stereo)
|
563 |
+
if x.ndim == 2:
|
564 |
+
x = x.mean(axis=1)
|
565 |
+
rms = float(np.sqrt(np.mean(np.square(x)) + EPS))
|
566 |
+
return 20.0 * np.log10(max(rms, EPS))
|
567 |
+
|
568 |
+
def qbar_rms_dbfs(x: np.ndarray, seg_len: int) -> list[float]:
|
569 |
+
vals = []
|
570 |
+
if x.ndim == 2:
|
571 |
+
mono = x.mean(axis=1)
|
572 |
+
else:
|
573 |
+
mono = x
|
574 |
+
N = mono.shape[0]
|
575 |
+
for i in range(0, N, seg_len):
|
576 |
+
seg = mono[i:min(i + seg_len, N)]
|
577 |
+
if seg.size == 0:
|
578 |
+
break
|
579 |
+
r = float(np.sqrt(np.mean(seg * seg) + EPS))
|
580 |
+
vals.append(20.0 * np.log10(max(r, EPS)))
|
581 |
+
return vals
|
582 |
+
|
583 |
while True:
|
584 |
start, end = self._bar_clock.bounds_for_chunk(self.idx, self.params.bars_per_chunk)
|
585 |
if end > self._spool_written:
|
586 |
+
# Not enough audio buffered for the next full chunk
|
587 |
+
# Debug the readiness gap once per idx
|
588 |
+
# print(f"[emit idx={self.idx}] need end={end}, have={self._spool_written} (Δ={end - self._spool_written})")
|
589 |
+
break
|
590 |
+
|
591 |
+
# Slice the emitted window (target SR)
|
592 |
+
loop = self._spool[start:end] # shape: [samples, channels] @ target_sr
|
593 |
+
|
594 |
+
# ---- DEBUG: pre-loudness quarter-bar RMS ----
|
595 |
+
spb = self._bar_clock.bar_samps # samples per bar @ target_sr
|
596 |
+
qlen = max(1, spb // 4) # quarter-bar segment length
|
597 |
+
q_rms_pre = qbar_rms_dbfs(loop, qlen)
|
598 |
+
# Mark segments that look like near-silence
|
599 |
+
silent_marks_pre = ["🟢" if v > QDB_SILENCE else "🟥" for v in q_rms_pre[:8]]
|
600 |
+
print(f"[emit idx={self.idx}] pre-LM qRMS dBFS: {['%5.1f'%v for v in q_rms_pre[:8]]} {''.join(silent_marks_pre)}")
|
601 |
|
602 |
# Loudness match to reference loop (optional)
|
603 |
+
gain_db_applied = None
|
604 |
if self.params.ref_loop is not None and self.params.loudness_mode != "none":
|
605 |
ref = self.params.ref_loop.as_stereo().resample(self.params.target_sr)
|
606 |
wav = au.Waveform(loop.copy(), int(self.params.target_sr))
|
607 |
+
try:
|
608 |
+
matched, gain_db_applied = match_loudness_to_reference(
|
609 |
+
ref, wav,
|
610 |
+
method=self.params.loudness_mode,
|
611 |
+
headroom_db=self.params.headroom_db
|
612 |
+
)
|
613 |
+
loop = matched.samples
|
614 |
+
except Exception as e:
|
615 |
+
print(f"[emit idx={self.idx}] loudness-match ERROR: {e}; proceeding with un-matched audio")
|
616 |
+
|
617 |
+
# ---- DEBUG: post-loudness quarter-bar RMS ----
|
618 |
+
q_rms_post = qbar_rms_dbfs(loop, qlen)
|
619 |
+
silent_marks_post = ["🟢" if v > QDB_SILENCE else "🟥" for v in q_rms_post[:8]]
|
620 |
+
if gain_db_applied is None:
|
621 |
+
print(f"[emit idx={self.idx}] post-LM qRMS dBFS: {['%5.1f'%v for v in q_rms_post[:8]]} {''.join(silent_marks_post)} (LM: none)")
|
622 |
+
else:
|
623 |
+
print(f"[emit idx={self.idx}] post-LM qRMS dBFS: {['%5.1f'%v for v in q_rms_post[:8]]} {''.join(silent_marks_post)} (LM gain {gain_db_applied:+.2f} dB)")
|
624 |
|
625 |
+
# Encode & ship
|
626 |
audio_b64, total_samples, channels = wav_bytes_base64(loop, int(self.params.target_sr))
|
627 |
meta = {
|
628 |
"bpm": float(self.params.bpm),
|
|
|
639 |
}
|
640 |
chunk = JamChunk(index=self.idx, audio_base64=audio_b64, metadata=meta)
|
641 |
|
642 |
+
# Emit to outbox
|
643 |
with self._cv:
|
644 |
self._outbox[self.idx] = chunk
|
645 |
self._cv.notify_all()
|
646 |
+
|
647 |
+
# ---- DEBUG: boundary bookkeeping ----
|
648 |
+
print(f"[emit idx={self.idx}] slice [{start}:{end}] (len={end-start}), spool_written={self._spool_written}")
|
649 |
+
|
650 |
self.idx += 1
|
651 |
|
652 |
# If a reseed is queued, install it *right after* we finish a chunk
|
653 |
with self._lock:
|
|
|
654 |
if self._pending_token_splice is not None:
|
655 |
spliced = self._coerce_tokens(self._pending_token_splice["tokens"])
|
656 |
try:
|
657 |
+
self.state.context_tokens = spliced # in-place update
|
|
|
658 |
self._pending_token_splice = None
|
659 |
+
print(f"[emit idx={self.idx}] installed token splice (in-place)")
|
660 |
except Exception:
|
|
|
661 |
new_state = self.mrt.init_state()
|
662 |
new_state.context_tokens = spliced
|
663 |
self.state = new_state
|
664 |
self._model_stream = None
|
665 |
self._pending_token_splice = None
|
666 |
+
print(f"[emit idx={self.idx}] installed token splice (reinit state)")
|
667 |
elif self._pending_reseed is not None:
|
668 |
ctx = self._coerce_tokens(self._pending_reseed["ctx"])
|
669 |
new_state = self.mrt.init_state()
|
|
|
671 |
self.state = new_state
|
672 |
self._model_stream = None
|
673 |
self._pending_reseed = None
|
674 |
+
print(f"[emit idx={self.idx}] performed full reseed")
|
675 |
|
676 |
# ---------- main loop ----------
|
677 |
|