Spaces:
Running
Running
New TTS: LLaSA 1B; 3B 8B runtime error; maskgct too demanding on ZeroGPU
Browse files- app/models.py +34 -14
app/models.py
CHANGED
|
@@ -68,7 +68,7 @@ AVAILABLE_MODELS = {
|
|
| 68 |
|
| 69 |
# MaskGCT (by Amphion)
|
| 70 |
# 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
|
| 71 |
-
'Svngoku/maskgct-audio-lab': 'Svngoku/maskgct-audio-lab', # DEMANDS 300 seconds of ZeroGPU!
|
| 72 |
|
| 73 |
# GPT-SoVITS
|
| 74 |
'lj1995/GPT-SoVITS-v2': 'lj1995/GPT-SoVITS-v2',
|
|
@@ -79,10 +79,12 @@ AVAILABLE_MODELS = {
|
|
| 79 |
# OuteTTS 1B
|
| 80 |
# 'OuteAI/OuteTTS-0.3-1B-Demo': 'OuteAI/OuteTTS-0.3-1B-Demo',
|
| 81 |
|
|
|
|
|
|
|
| 82 |
# llasa 3b TTS
|
| 83 |
-
'srinivasbilla/llasa-3b-tts': 'srinivasbilla/llasa-3b-tts',
|
| 84 |
# llasa 8b TTS
|
| 85 |
-
'srinivasbilla/llasa-8b-tts': 'srinivasbilla/llasa-8b-tts',
|
| 86 |
|
| 87 |
# Mars5
|
| 88 |
# 'CAMB-AI/mars5_space': 'CAMB-AI/mars5_space', # slow inference; Unstable
|
|
@@ -255,7 +257,8 @@ HF_SPACES = {
|
|
| 255 |
'text_param_index': 'gen_text_input',
|
| 256 |
'return_audio_index': 0,
|
| 257 |
'is_zero_gpu_space': True,
|
| 258 |
-
'series': '
|
|
|
|
| 259 |
},
|
| 260 |
|
| 261 |
# E2 TTS TODO: call switch model function
|
|
@@ -265,7 +268,8 @@ HF_SPACES = {
|
|
| 265 |
'text_param_index': 'gen_text_input',
|
| 266 |
'return_audio_index': 0,
|
| 267 |
'is_zero_gpu_space': True,
|
| 268 |
-
'series': '
|
|
|
|
| 269 |
},
|
| 270 |
|
| 271 |
# IMS-Toucan
|
|
@@ -338,7 +342,7 @@ HF_SPACES = {
|
|
| 338 |
'return_audio_index': 0,
|
| 339 |
'is_zero_gpu_space': True,
|
| 340 |
'series': 'MaskGCT',
|
| 341 |
-
|
| 342 |
},
|
| 343 |
'Svngoku/maskgct-audio-lab': {
|
| 344 |
'name': 'MaskGCT',
|
|
@@ -347,7 +351,7 @@ HF_SPACES = {
|
|
| 347 |
'return_audio_index': 0,
|
| 348 |
'is_zero_gpu_space': True,
|
| 349 |
'series': 'MaskGCT',
|
| 350 |
-
|
| 351 |
},
|
| 352 |
|
| 353 |
# GPT-SoVITS v2
|
|
@@ -362,7 +366,7 @@ HF_SPACES = {
|
|
| 362 |
|
| 363 |
# OuteTTS v0.2 500M
|
| 364 |
'ameerazam08/OuteTTS-0.2-500M-Demo': {
|
| 365 |
-
'name': 'OuteTTS
|
| 366 |
'function': '/generate_tts',
|
| 367 |
'text_param_index': 0,
|
| 368 |
'return_audio_index': 0,
|
|
@@ -372,7 +376,7 @@ HF_SPACES = {
|
|
| 372 |
},
|
| 373 |
# OuteTTS v0.3 1B
|
| 374 |
'OuteAI/OuteTTS-0.3-1B-Demo': {
|
| 375 |
-
'name': 'OuteTTS
|
| 376 |
'function': '/generate_tts',
|
| 377 |
'text_param_index': 'text',
|
| 378 |
'return_audio_index': 0,
|
|
@@ -381,6 +385,17 @@ HF_SPACES = {
|
|
| 381 |
'emoji': '🥵', # requires 300s reserved ZeroGPU!
|
| 382 |
},
|
| 383 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
# LlaSa 3B
|
| 385 |
'srinivasbilla/llasa-3b-tts': {
|
| 386 |
'name': 'LLaSA 3B',
|
|
@@ -388,7 +403,8 @@ HF_SPACES = {
|
|
| 388 |
'text_param_index': 'target_text',
|
| 389 |
'return_audio_index': 0,
|
| 390 |
'is_zero_gpu_space': True,
|
| 391 |
-
'series': '
|
|
|
|
| 392 |
},
|
| 393 |
|
| 394 |
# LlaSa 8B
|
|
@@ -398,7 +414,8 @@ HF_SPACES = {
|
|
| 398 |
'text_param_index': 'target_text',
|
| 399 |
'return_audio_index': 0,
|
| 400 |
'is_zero_gpu_space': True,
|
| 401 |
-
'series': '
|
|
|
|
| 402 |
},
|
| 403 |
|
| 404 |
# Mars5
|
|
@@ -648,11 +665,14 @@ OVERRIDE_INPUTS = {
|
|
| 648 |
'speaker_selection': "en_female_1",
|
| 649 |
'reference_audio': None,
|
| 650 |
},
|
|
|
|
|
|
|
|
|
|
| 651 |
'srinivasbilla/llasa-3b-tts': {
|
| 652 |
-
'sample_audio_path': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3')
|
| 653 |
},
|
| 654 |
'srinivasbilla/llasa-8b-tts': {
|
| 655 |
-
'sample_audio_path': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3')
|
| 656 |
},
|
| 657 |
|
| 658 |
# MARS 5
|
|
@@ -774,7 +794,7 @@ closed_source = [
|
|
| 774 |
]
|
| 775 |
|
| 776 |
# top five models in order to always have one of them picked and scrutinized
|
| 777 |
-
top_five = ['
|
| 778 |
|
| 779 |
# prioritize low vote models
|
| 780 |
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
|
|
|
|
| 68 |
|
| 69 |
# MaskGCT (by Amphion)
|
| 70 |
# 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
|
| 71 |
+
# 'Svngoku/maskgct-audio-lab': 'Svngoku/maskgct-audio-lab', # DEMANDS 300 seconds of ZeroGPU!
|
| 72 |
|
| 73 |
# GPT-SoVITS
|
| 74 |
'lj1995/GPT-SoVITS-v2': 'lj1995/GPT-SoVITS-v2',
|
|
|
|
| 79 |
# OuteTTS 1B
|
| 80 |
# 'OuteAI/OuteTTS-0.3-1B-Demo': 'OuteAI/OuteTTS-0.3-1B-Demo',
|
| 81 |
|
| 82 |
+
# llasa 1b TTS
|
| 83 |
+
'HKUST-Audio/Llasa-1B-finetuned-for-two-speakers': 'HKUST-Audio/Llasa-1B-finetuned-for-two-speakers',
|
| 84 |
# llasa 3b TTS
|
| 85 |
+
# 'srinivasbilla/llasa-3b-tts': 'srinivasbilla/llasa-3b-tts', # ZeroGPU Pro account expired
|
| 86 |
# llasa 8b TTS
|
| 87 |
+
# 'srinivasbilla/llasa-8b-tts': 'srinivasbilla/llasa-8b-tts', # ZeroGPU Pro account expired
|
| 88 |
|
| 89 |
# Mars5
|
| 90 |
# 'CAMB-AI/mars5_space': 'CAMB-AI/mars5_space', # slow inference; Unstable
|
|
|
|
| 257 |
'text_param_index': 'gen_text_input',
|
| 258 |
'return_audio_index': 0,
|
| 259 |
'is_zero_gpu_space': True,
|
| 260 |
+
# 'series': 'E2 TTS',
|
| 261 |
+
'series': 'E2/F5 TTS',
|
| 262 |
},
|
| 263 |
|
| 264 |
# E2 TTS TODO: call switch model function
|
|
|
|
| 268 |
'text_param_index': 'gen_text_input',
|
| 269 |
'return_audio_index': 0,
|
| 270 |
'is_zero_gpu_space': True,
|
| 271 |
+
# 'series': 'F5 TTS',
|
| 272 |
+
'series': 'E2/F5 TTS',
|
| 273 |
},
|
| 274 |
|
| 275 |
# IMS-Toucan
|
|
|
|
| 342 |
'return_audio_index': 0,
|
| 343 |
'is_zero_gpu_space': True,
|
| 344 |
'series': 'MaskGCT',
|
| 345 |
+
'emoji': '🥵', # requires 300s reserved ZeroGPU!
|
| 346 |
},
|
| 347 |
'Svngoku/maskgct-audio-lab': {
|
| 348 |
'name': 'MaskGCT',
|
|
|
|
| 351 |
'return_audio_index': 0,
|
| 352 |
'is_zero_gpu_space': True,
|
| 353 |
'series': 'MaskGCT',
|
| 354 |
+
'emoji': '🥵', # requires 300s reserved ZeroGPU!
|
| 355 |
},
|
| 356 |
|
| 357 |
# GPT-SoVITS v2
|
|
|
|
| 366 |
|
| 367 |
# OuteTTS v0.2 500M
|
| 368 |
'ameerazam08/OuteTTS-0.2-500M-Demo': {
|
| 369 |
+
'name': 'OuteTTS v0.2 500M',
|
| 370 |
'function': '/generate_tts',
|
| 371 |
'text_param_index': 0,
|
| 372 |
'return_audio_index': 0,
|
|
|
|
| 376 |
},
|
| 377 |
# OuteTTS v0.3 1B
|
| 378 |
'OuteAI/OuteTTS-0.3-1B-Demo': {
|
| 379 |
+
'name': 'OuteTTS v0.3 1B',
|
| 380 |
'function': '/generate_tts',
|
| 381 |
'text_param_index': 'text',
|
| 382 |
'return_audio_index': 0,
|
|
|
|
| 385 |
'emoji': '🥵', # requires 300s reserved ZeroGPU!
|
| 386 |
},
|
| 387 |
|
| 388 |
+
# LlaSa 1B
|
| 389 |
+
'HKUST-Audio/Llasa-1B-finetuned-for-two-speakers': {
|
| 390 |
+
'name': 'LLaSA 1B',
|
| 391 |
+
'function': '/predict',
|
| 392 |
+
'text_param_index': 'input_text',
|
| 393 |
+
'return_audio_index': 0,
|
| 394 |
+
'is_zero_gpu_space': True,
|
| 395 |
+
'series': 'LLaSA',
|
| 396 |
+
# 'emoji': '😷', # broken space
|
| 397 |
+
},
|
| 398 |
+
|
| 399 |
# LlaSa 3B
|
| 400 |
'srinivasbilla/llasa-3b-tts': {
|
| 401 |
'name': 'LLaSA 3B',
|
|
|
|
| 403 |
'text_param_index': 'target_text',
|
| 404 |
'return_audio_index': 0,
|
| 405 |
'is_zero_gpu_space': True,
|
| 406 |
+
'series': 'LLaSA',
|
| 407 |
+
'emoji': '😷', # broken space
|
| 408 |
},
|
| 409 |
|
| 410 |
# LlaSa 8B
|
|
|
|
| 414 |
'text_param_index': 'target_text',
|
| 415 |
'return_audio_index': 0,
|
| 416 |
'is_zero_gpu_space': True,
|
| 417 |
+
'series': 'LLaSA',
|
| 418 |
+
'emoji': '😷', # broken space
|
| 419 |
},
|
| 420 |
|
| 421 |
# Mars5
|
|
|
|
| 665 |
'speaker_selection': "en_female_1",
|
| 666 |
'reference_audio': None,
|
| 667 |
},
|
| 668 |
+
'HKUST-Audio/Llasa-1B-finetuned-for-two-speakers': {
|
| 669 |
+
'speaker_choice': 'kore',
|
| 670 |
+
},
|
| 671 |
'srinivasbilla/llasa-3b-tts': {
|
| 672 |
+
'sample_audio_path': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
|
| 673 |
},
|
| 674 |
'srinivasbilla/llasa-8b-tts': {
|
| 675 |
+
'sample_audio_path': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
|
| 676 |
},
|
| 677 |
|
| 678 |
# MARS 5
|
|
|
|
| 794 |
]
|
| 795 |
|
| 796 |
# top five models in order to always have one of them picked and scrutinized
|
| 797 |
+
top_five = ['HKUST-Audio/Llasa-1B-finetuned-for-two-speakers']
|
| 798 |
|
| 799 |
# prioritize low vote models
|
| 800 |
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
|