Spaces:
Running
on
A10G
Running
on
A10G
fixed long-text generation
Browse files
app.py
CHANGED
@@ -403,7 +403,7 @@ def infer_long_text(text, preset_prompt, prompt=None, language='auto', accent='n
|
|
403 |
)
|
404 |
complete_tokens = torch.cat([complete_tokens, encoded_frames.transpose(2, 1)], dim=-1)
|
405 |
# Decode with Vocos
|
406 |
-
frames =
|
407 |
features = vocos.codes_to_features(frames)
|
408 |
samples = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
409 |
|
@@ -451,7 +451,7 @@ def infer_long_text(text, preset_prompt, prompt=None, language='auto', accent='n
|
|
451 |
audio_prompts = original_audio_prompts
|
452 |
text_prompts = original_text_prompts
|
453 |
# Decode with Vocos
|
454 |
-
frames =
|
455 |
features = vocos.codes_to_features(frames)
|
456 |
samples = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
457 |
|
|
|
403 |
)
|
404 |
complete_tokens = torch.cat([complete_tokens, encoded_frames.transpose(2, 1)], dim=-1)
|
405 |
# Decode with Vocos
|
406 |
+
frames = complete_tokens.permute(1, 0, 2)
|
407 |
features = vocos.codes_to_features(frames)
|
408 |
samples = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
409 |
|
|
|
451 |
audio_prompts = original_audio_prompts
|
452 |
text_prompts = original_text_prompts
|
453 |
# Decode with Vocos
|
454 |
+
frames = complete_tokens.permute(1, 0, 2)
|
455 |
features = vocos.codes_to_features(frames)
|
456 |
samples = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
457 |
|