wcy1122 commited on
Commit
8ac9028
·
1 Parent(s): 9f73e8a

update streamer

Browse files
mgm/model/multimodal_generator/mgm_omni_streamer.py CHANGED
@@ -18,8 +18,9 @@ class MGMOmniStreamer(TextIteratorStreamer):
18
  self.cosyvoice = cosyvoice
19
  self.cosyvoice.hift_cache_dict[self.uuid] = None
20
  self.max_audio_token = max_audio_token - 2
21
- self.hop_len = 100
22
- self.this_hop_len = 100
 
23
  self.lookahead_len = self.cosyvoice.flow.pre_lookahead_len
24
  self.token_offset = 0
25
  self.speech_tokens = None
@@ -77,7 +78,8 @@ class MGMOmniStreamer(TextIteratorStreamer):
77
  try:
78
  token_offset = self.token_offset
79
  self.token_offset += self.this_hop_len
80
- self.this_hop_len = self.hop_len * 2
 
81
  cur_audio = self.cosyvoice.token2wav(
82
  token=self.speech_tokens[:, :self.token_offset + self.lookahead_len],
83
  prompt_token=self.ref_tokens,
 
18
  self.cosyvoice = cosyvoice
19
  self.cosyvoice.hift_cache_dict[self.uuid] = None
20
  self.max_audio_token = max_audio_token - 2
21
+ self.hop_len = 200
22
+ self.this_hop_len = 200
23
+ self.block_rate = 2
24
  self.lookahead_len = self.cosyvoice.flow.pre_lookahead_len
25
  self.token_offset = 0
26
  self.speech_tokens = None
 
78
  try:
79
  token_offset = self.token_offset
80
  self.token_offset += self.this_hop_len
81
+ self.this_hop_len = min(self.hop_len * self.block_rate, 3200)
82
+ self.block_rate *= 2
83
  cur_audio = self.cosyvoice.token2wav(
84
  token=self.speech_tokens[:, :self.token_offset + self.lookahead_len],
85
  prompt_token=self.ref_tokens,
third_party/cosyvoice/flow/flow_matching.py CHANGED
@@ -156,7 +156,7 @@ class CausalConditionalCFM(ConditionalCFM):
156
  def __init__(self, in_channels, cfm_params, n_spks=1, spk_emb_dim=64, estimator: torch.nn.Module = None):
157
  super().__init__(in_channels, cfm_params, n_spks, spk_emb_dim, estimator)
158
  set_all_random_seed(0)
159
- self.rand_noise = torch.randn([1, 80, 50 * 300])
160
 
161
  @torch.inference_mode()
162
  def forward(self, mu, mask, n_timesteps, temperature=1.0, spks=None, cond=None, streaming=False):
 
156
  def __init__(self, in_channels, cfm_params, n_spks=1, spk_emb_dim=64, estimator: torch.nn.Module = None):
157
  super().__init__(in_channels, cfm_params, n_spks, spk_emb_dim, estimator)
158
  set_all_random_seed(0)
159
+ self.rand_noise = torch.randn([1, 80, 50 * 300 * 2])
160
 
161
  @torch.inference_mode()
162
  def forward(self, mu, mask, n_timesteps, temperature=1.0, spks=None, cond=None, streaming=False):