Spaces:
Running
Running
New TTS: IMS-Toucan
Browse files- app.py +30 -7
- test_tts_ims-toucan.py +16 -0
app.py
CHANGED
|
@@ -74,29 +74,32 @@ AVAILABLE_MODELS = {
|
|
| 74 |
|
| 75 |
# HF Gradio Spaces: # <works with gradio version #>
|
| 76 |
# gravio version that works with most spaces: 4.29
|
| 77 |
-
'coqui/xtts': 'coqui/xtts', # 4.29 4.32
|
| 78 |
-
'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
| 79 |
# 'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # 4.29
|
| 80 |
# 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # 4.29
|
| 81 |
-
'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
|
| 82 |
'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
| 83 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
| 84 |
-
'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
| 85 |
-
'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
|
| 86 |
|
| 87 |
# E2 & F5 TTS
|
| 88 |
# F5 model
|
| 89 |
-
'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
|
| 90 |
|
| 91 |
# # Parler
|
| 92 |
# Parler Large model
|
| 93 |
-
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 94 |
# Parler Mini model
|
| 95 |
# 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 96 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
|
| 97 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
| 98 |
|
| 99 |
# # Microsoft Edge TTS
|
|
|
|
|
|
|
|
|
|
| 100 |
'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
|
| 101 |
|
| 102 |
# HF TTS w issues
|
|
@@ -241,6 +244,15 @@ HF_SPACES = {
|
|
| 241 |
'series': 'E2/F5 TTS',
|
| 242 |
},
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
# TTS w issues
|
| 245 |
# 'PolyAI/pheme': '/predict#0', #sleepy HF Space
|
| 246 |
# 'amphion/Text-to-Speech': '/predict#0', #takes a whole minute to synthesize
|
|
@@ -352,6 +364,17 @@ OVERRIDE_INPUTS = {
|
|
| 352 |
3: "F5-TTS", # model
|
| 353 |
4: False, # cleanup silence
|
| 354 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
}
|
| 356 |
|
| 357 |
hf_clients: Tuple[Client] = {}
|
|
|
|
| 74 |
|
| 75 |
# HF Gradio Spaces: # <works with gradio version #>
|
| 76 |
# gravio version that works with most spaces: 4.29
|
| 77 |
+
# 'coqui/xtts': 'coqui/xtts', # 4.29 4.32
|
| 78 |
+
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
| 79 |
# 'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # 4.29
|
| 80 |
# 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # 4.29
|
| 81 |
+
# 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
|
| 82 |
'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
| 83 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
| 84 |
+
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
| 85 |
+
# 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
|
| 86 |
|
| 87 |
# E2 & F5 TTS
|
| 88 |
# F5 model
|
| 89 |
+
# 'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
|
| 90 |
|
| 91 |
# # Parler
|
| 92 |
# Parler Large model
|
| 93 |
+
# 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 94 |
# Parler Mini model
|
| 95 |
# 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 96 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
|
| 97 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
| 98 |
|
| 99 |
# # Microsoft Edge TTS
|
| 100 |
+
# 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
|
| 101 |
+
|
| 102 |
+
# IMS-Toucan
|
| 103 |
'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
|
| 104 |
|
| 105 |
# HF TTS w issues
|
|
|
|
| 244 |
'series': 'E2/F5 TTS',
|
| 245 |
},
|
| 246 |
|
| 247 |
+
# IMS-Toucan
|
| 248 |
+
'Flux9665/MassivelyMultilingualTTS': {
|
| 249 |
+
'name': 'IMS-Toucan',
|
| 250 |
+
'function': "/predict",
|
| 251 |
+
'text_param_index': 0,
|
| 252 |
+
'return_audio_index': 0,
|
| 253 |
+
'series': 'IMS-Toucan',
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
# TTS w issues
|
| 257 |
# 'PolyAI/pheme': '/predict#0', #sleepy HF Space
|
| 258 |
# 'amphion/Text-to-Speech': '/predict#0', #takes a whole minute to synthesize
|
|
|
|
| 364 |
3: "F5-TTS", # model
|
| 365 |
4: False, # cleanup silence
|
| 366 |
},
|
| 367 |
+
|
| 368 |
+
# IMS-Toucan
|
| 369 |
+
'Flux9665/MassivelyMultilingualTTS': {
|
| 370 |
+
1: "English (eng)", #language
|
| 371 |
+
2: 0.6, #prosody_creativity
|
| 372 |
+
3: 1, #duration_scaling_factor
|
| 373 |
+
4: 41, #voice_seed
|
| 374 |
+
5: -7.5, #emb1
|
| 375 |
+
6: None, #reference_audio
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
}
|
| 379 |
|
| 380 |
hf_clients: Tuple[Client] = {}
|
test_tts_ims-toucan.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from gradio_client import Client
|
| 3 |
+
|
| 4 |
+
client = Client("Flux9665/MassivelyMultilingualTTS", hf_token=os.getenv('HF_TOKEN'))
|
| 5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
| 6 |
+
# print(endpoints)
|
| 7 |
+
result = client.predict(
|
| 8 |
+
prompt="What I cannot create, I do not understand.",
|
| 9 |
+
language="English (eng)",
|
| 10 |
+
prosody_creativity=0.5,
|
| 11 |
+
duration_scaling_factor=1,
|
| 12 |
+
voice_seed=27,
|
| 13 |
+
emb1=-7.5,
|
| 14 |
+
reference_audio=None,
|
| 15 |
+
api_name="/predict"
|
| 16 |
+
)
|