thecollabagepatch commited on
Commit
aa3931e
·
1 Parent(s): 2081536

append model chunk and spool revision

Browse files
Files changed (1) hide show
  1. jam_worker.py +72 -72
jam_worker.py CHANGED
@@ -422,107 +422,107 @@ class JamWorker(threading.Thread):
422
 
423
  # ---------- core streaming helpers ----------
424
 
425
- def _append_model_chunk_and_spool(self, s: np.ndarray) -> None:
426
  """
427
- Append a newly-generated *model-rate* chunk `s` into the output spool, ensuring
428
- the equal-power crossfade *overlap* is actually included in emitted audio.
429
-
430
- Strategy (Option A):
431
- - Keep the last `xfade_n` samples from the previous chunk in `self._pending_overlap_model`.
432
- - On each new chunk, equal-power mix: mixed = tail(prev) ⨉ cos + head(curr) ⨉ sin
433
- - Resample+append `mixed` to the target-SR spool, then append the new non-overlapped body.
434
- - Save the new tail (last `xfade_n`) as `self._pending_overlap_model` for the next call.
435
- - On the *very first* call (no pending tail yet), DO NOT emit the tail; only emit the body and hold the tail.
436
-
437
- Notes:
438
- - This function only manages the *emitted* audio content. It does not change model state.
439
- - Works with mono or multi-channel arrays shaped [samples] or [samples, channels].
440
  """
441
 
442
 
443
- if s is None or s.size == 0:
444
- return
445
-
446
- # ---------- Helpers ----------
447
- def _ensure_2d(x: np.ndarray) -> np.ndarray:
448
- return x if x.ndim == 2 else x[:, None]
449
-
450
- def _to_target_sr(y_model: np.ndarray) -> np.ndarray:
451
- # Reuse your existing resampler here if you have one already.
452
- # If you use a different helper, swap this call accordingly.
453
- from utils import resample_audio # adjust if your resampler lives elsewhere
454
- return resample_audio(y_model, self.mrt.sr, self.params.target_sr)
455
 
456
- # Compute xfade length in *model samples*
457
- # Prefer explicit "samples" if present; else derive from seconds.
458
  try:
459
- xfade_n = int(getattr(self.mrt.config, "crossfade_samples"))
460
  except Exception:
461
- xfade_sec = float(getattr(self.mrt.config, "crossfade_length"))
462
- xfade_n = int(round(xfade_sec * float(self.mrt.sr)))
463
 
 
 
 
464
  if xfade_n <= 0:
465
- # No crossfade configured -> just resample whole thing and append
466
- y = _to_target_sr(_ensure_2d(s))
467
  self._spool = np.concatenate([self._spool, y], axis=0) if self._spool.size else y
468
  self._spool_written += y.shape[0]
 
 
469
  return
470
 
471
- # Normalize shapes
472
- s = _ensure_2d(s)
473
- n_samps = s.shape[0]
474
  if n_samps <= xfade_n:
475
- # Too short to meaningfully process: accumulate into pending tail and wait
476
- tail = s
477
  self._pending_overlap_model = tail if self._pending_overlap_model is None \
478
  else np.concatenate([self._pending_overlap_model, tail], axis=0)[-xfade_n:]
 
 
479
  return
480
 
481
- # Split current chunk into head/body/tail at model rate
482
  head = s[:xfade_n, :]
483
  body = s[xfade_n:-xfade_n, :] if n_samps >= (2 * xfade_n) else None
484
  tail = s[-xfade_n:, :]
485
 
486
- # ---------- If we have a pending tail, mix it with the current head and EMIT the mix ----------
487
- if self._pending_overlap_model is not None and self._pending_overlap_model.shape[0] == xfade_n:
488
- prev_tail = self._pending_overlap_model
489
-
490
- # Equal-power crossfade: tail(prev) * cos + head(curr) * sin
491
- # Shapes: [xfade_n, C]
492
- t = np.linspace(0.0, np.pi / 2.0, xfade_n, endpoint=False, dtype=np.float32)[:, None]
493
- cosw = np.cos(t, dtype=np.float32)
494
- sinw = np.sin(t, dtype=np.float32)
495
- mixed = (prev_tail * cosw) + (head * sinw) # still model-rate
496
-
497
- y_mixed = _to_target_sr(mixed.astype(np.float32))
498
- # Append the mixed overlap FIRST at target rate
499
- if self._spool.size:
500
- self._spool = np.concatenate([self._spool, y_mixed], axis=0)
501
- else:
502
- self._spool = y_mixed
503
- self._spool_written += y_mixed.shape[0]
504
 
505
- # After mixing, we've consumed head; the "new body" to emit is whatever remains (if any)
506
  if body is not None and body.size:
507
- y_body = _to_target_sr(body.astype(np.float32))
508
- self._spool = np.concatenate([self._spool, y_body], axis=0)
509
- self._spool_written += y_body.shape[0]
 
510
 
511
- else:
512
- # FIRST CHUNK: no pending overlap yet
513
- # Emit only the body; DO NOT emit the tail (we keep it to mix with the next head)
514
- if body is not None and body.size:
515
- y_body = _to_target_sr(body.astype(np.float32))
516
- if self._spool.size:
517
- self._spool = np.concatenate([self._spool, y_body], axis=0)
518
- else:
519
- self._spool = y_body
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
  self._spool_written += y_body.shape[0]
521
- # (If there is no body because the chunk is tiny, we emit nothing yet.)
522
 
523
- # ---------- Store the new pending tail to mix with the next head ----------
524
  self._pending_overlap_model = tail.copy()
525
 
 
526
  def _should_generate_next_chunk(self) -> bool:
527
  # Allow running ahead relative to whichever is larger: last *consumed*
528
  # (explicit ack from client) or last *delivered* (implicit ack).
 
422
 
423
  # ---------- core streaming helpers ----------
424
 
425
+ def _append_model_chunk_and_spool(self, wav: au.Waveform) -> None:
426
  """
427
+ Emit crossfaded overlaps into the *output spool* (target SR).
428
+ We hold the previous tail (model-rate), mix it with the current head (equal-power),
429
+ append that mixed overlap to the spool, then append the current body.
430
+ The current tail is kept pending for the next call.
 
 
 
 
 
 
 
 
 
431
  """
432
 
433
 
434
+ # ---- unpack model-rate samples ----
435
+ s = wav.samples.astype(np.float32, copy=False)
436
+ if s.ndim == 1:
437
+ s = s[:, None] # (S,1) -> (S,1); downstream expects 2D
438
+ n_samps, n_ch = s.shape
439
+ sr_model = self._model_sr
 
 
 
 
 
 
440
 
441
+ # crossfade length (in model samples)
 
442
  try:
443
+ xfade_s = float(self.mrt.config.crossfade_length)
444
  except Exception:
445
+ xfade_s = 0.0
446
+ xfade_n = int(round(max(0.0, xfade_s) * float(sr_model)))
447
 
448
+ # trivial cases
449
+ if n_samps == 0:
450
+ return
451
  if xfade_n <= 0:
452
+ # No crossfade configured -> just emit whole thing
453
+ y = (s if self._rs is None else self._rs.process(s, final=False))
454
  self._spool = np.concatenate([self._spool, y], axis=0) if self._spool.size else y
455
  self._spool_written += y.shape[0]
456
+ # For continuity tracking of model stream:
457
+ self._model_stream = s if self._model_stream is None else np.concatenate([self._model_stream, s], axis=0)
458
  return
459
 
460
+ # If the chunk is too short to hold a full overlap, accumulate into pending and wait
 
 
461
  if n_samps <= xfade_n:
462
+ tail = s # keep most recent xfade_n samples
 
463
  self._pending_overlap_model = tail if self._pending_overlap_model is None \
464
  else np.concatenate([self._pending_overlap_model, tail], axis=0)[-xfade_n:]
465
+ # Keep model stream continuity too
466
+ self._model_stream = s if self._model_stream is None else np.concatenate([self._model_stream, s], axis=0)
467
  return
468
 
469
+ # ---- split current chunk into head / body / tail at model rate ----
470
  head = s[:xfade_n, :]
471
  body = s[xfade_n:-xfade_n, :] if n_samps >= (2 * xfade_n) else None
472
  tail = s[-xfade_n:, :]
473
 
474
+ # === First call path (no previous tail to mix) ===
475
+ if self._pending_overlap_model is None or self._pending_overlap_model.shape[0] != xfade_n:
476
+ # Model-stream continuity (drop preroll like before, so future mixes line up)
477
+ new_part_for_stream = s[xfade_n:] if xfade_n < n_samps else s[:0]
478
+ self._model_stream = new_part_for_stream.copy() if self._model_stream is None \
479
+ else np.concatenate([self._model_stream, new_part_for_stream], axis=0)
 
 
 
 
 
 
 
 
 
 
 
 
480
 
481
+ # Emit only the body (if present); DO NOT emit tail yet
482
  if body is not None and body.size:
483
+ y_body = body if self._rs is None else self._rs.process(body, final=False)
484
+ if y_body.size:
485
+ self._spool = np.concatenate([self._spool, y_body], axis=0) if self._spool.size else y_body
486
+ self._spool_written += y_body.shape[0]
487
 
488
+ # Hold tail for next head
489
+ self._pending_overlap_model = tail.copy()
490
+ return
491
+
492
+ # === Subsequent calls: we have a pending tail to mix with this head ===
493
+ prev_tail = self._pending_overlap_model
494
+
495
+ # Equal-power window
496
+ t = np.linspace(0.0, np.pi / 2.0, xfade_n, endpoint=False, dtype=np.float32)[:, None]
497
+ cosw = np.cos(t, dtype=np.float32)
498
+ sinw = np.sin(t, dtype=np.float32)
499
+
500
+ # Mixed overlap (model-rate)
501
+ mixed = (prev_tail * cosw) + (head * sinw)
502
+
503
+ # Update model stream exactly like before (for internal continuity)
504
+ self._model_stream = (
505
+ np.concatenate([self._model_stream[:-xfade_n], mixed, s[xfade_n:]], axis=0)
506
+ if (self._model_stream is not None and self._model_stream.shape[0] >= xfade_n)
507
+ else (mixed if self._model_stream is None else np.concatenate([self._model_stream, mixed, s[xfade_n:]], axis=0))
508
+ )
509
+
510
+ # ---- Emit to target-SR spool: mixed overlap FIRST, then body (if any) ----
511
+ y_mixed = mixed if self._rs is None else self._rs.process(mixed, final=False)
512
+ if y_mixed.size:
513
+ self._spool = np.concatenate([self._spool, y_mixed], axis=0) if self._spool.size else y_mixed
514
+ self._spool_written += y_mixed.shape[0]
515
+
516
+ if body is not None and body.size:
517
+ y_body = body if self._rs is None else self._rs.process(body, final=False)
518
+ if y_body.size:
519
+ self._spool = np.concatenate([self._spool, y_body], axis=0)
520
  self._spool_written += y_body.shape[0]
 
521
 
522
+ # Keep the new tail for next time
523
  self._pending_overlap_model = tail.copy()
524
 
525
+
526
  def _should_generate_next_chunk(self) -> bool:
527
  # Allow running ahead relative to whichever is larger: last *consumed*
528
  # (explicit ack from client) or last *delivered* (implicit ack).