Spaces:
Running
on
Zero
Running
on
Zero
update streamer
Browse files
mgm/model/multimodal_generator/mgm_omni_streamer.py
CHANGED
@@ -18,8 +18,9 @@ class MGMOmniStreamer(TextIteratorStreamer):
|
|
18 |
self.cosyvoice = cosyvoice
|
19 |
self.cosyvoice.hift_cache_dict[self.uuid] = None
|
20 |
self.max_audio_token = max_audio_token - 2
|
21 |
-
self.hop_len =
|
22 |
-
self.this_hop_len =
|
|
|
23 |
self.lookahead_len = self.cosyvoice.flow.pre_lookahead_len
|
24 |
self.token_offset = 0
|
25 |
self.speech_tokens = None
|
@@ -77,7 +78,8 @@ class MGMOmniStreamer(TextIteratorStreamer):
|
|
77 |
try:
|
78 |
token_offset = self.token_offset
|
79 |
self.token_offset += self.this_hop_len
|
80 |
-
self.this_hop_len = self.hop_len *
|
|
|
81 |
cur_audio = self.cosyvoice.token2wav(
|
82 |
token=self.speech_tokens[:, :self.token_offset + self.lookahead_len],
|
83 |
prompt_token=self.ref_tokens,
|
|
|
18 |
self.cosyvoice = cosyvoice
|
19 |
self.cosyvoice.hift_cache_dict[self.uuid] = None
|
20 |
self.max_audio_token = max_audio_token - 2
|
21 |
+
self.hop_len = 200
|
22 |
+
self.this_hop_len = 200
|
23 |
+
self.block_rate = 2
|
24 |
self.lookahead_len = self.cosyvoice.flow.pre_lookahead_len
|
25 |
self.token_offset = 0
|
26 |
self.speech_tokens = None
|
|
|
78 |
try:
|
79 |
token_offset = self.token_offset
|
80 |
self.token_offset += self.this_hop_len
|
81 |
+
self.this_hop_len = min(self.hop_len * self.block_rate, 3200)
|
82 |
+
self.block_rate *= 2
|
83 |
cur_audio = self.cosyvoice.token2wav(
|
84 |
token=self.speech_tokens[:, :self.token_offset + self.lookahead_len],
|
85 |
prompt_token=self.ref_tokens,
|
third_party/cosyvoice/flow/flow_matching.py
CHANGED
@@ -156,7 +156,7 @@ class CausalConditionalCFM(ConditionalCFM):
|
|
156 |
def __init__(self, in_channels, cfm_params, n_spks=1, spk_emb_dim=64, estimator: torch.nn.Module = None):
|
157 |
super().__init__(in_channels, cfm_params, n_spks, spk_emb_dim, estimator)
|
158 |
set_all_random_seed(0)
|
159 |
-
self.rand_noise = torch.randn([1, 80, 50 * 300])
|
160 |
|
161 |
@torch.inference_mode()
|
162 |
def forward(self, mu, mask, n_timesteps, temperature=1.0, spks=None, cond=None, streaming=False):
|
|
|
156 |
def __init__(self, in_channels, cfm_params, n_spks=1, spk_emb_dim=64, estimator: torch.nn.Module = None):
|
157 |
super().__init__(in_channels, cfm_params, n_spks, spk_emb_dim, estimator)
|
158 |
set_all_random_seed(0)
|
159 |
+
self.rand_noise = torch.randn([1, 80, 50 * 300 * 2])
|
160 |
|
161 |
@torch.inference_mode()
|
162 |
def forward(self, mu, mask, n_timesteps, temperature=1.0, spks=None, cond=None, streaming=False):
|