TTS-Spaces-Arena

Running

App Files Files Community

Pendrokar commited on Feb 2

Commit

b45c900

1 Parent(s): bc6dc80

Kokoro v1

Browse files

Files changed (5) hide show

app/models.py +46 -7
app/sample_caching.py +1 -1
app/synth.py +9 -1
app/ui_vote.py +1 -1
test_tts_styletts_kokoro_v1.py +21 -0

app/models.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 from gradio_client import handle_file
-# Models to include in the leaderboard, only include models that users can vote on
 AVAILABLE_MODELS = {
     # 'XTTSv2': 'xtts',
     # 'WhisperSpeech': 'whisperspeech',
@@ -52,10 +52,15 @@ AVAILABLE_MODELS = {
     # IMS-Toucan
     # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
     # StyleTTS v2
-    'Pendrokar/style-tts-2': 'Pendrokar/style-tts-2', #  more votes in OG arena; emotionless
-    # StyleTTS kokoro
-    'hexgrad/kokoro': 'hexgrad/kokoro',
     # MaskGCT (by Amphion)
     # 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
@@ -92,6 +97,7 @@ HF_SPACES = {
         'return_audio_index': 1,
         'series': 'XTTS',
     },
     # WhisperSpeech
     'collabora/WhisperSpeech': {
         'name': 'WhisperSpeech',
@@ -101,6 +107,7 @@ HF_SPACES = {
         'series': 'WhisperSpeech',
         'emoji': '😷', # broken space
     },
     # OpenVoice (MyShell.ai)
     'myshell-ai/OpenVoice': {
         'name':'OpenVoice',
@@ -117,6 +124,7 @@ HF_SPACES = {
         'return_audio_index': 1,
         'series': 'OpenVoice',
     },
     # MetaVoice
     'mrfakename/MetaVoice-1B-v0.1': {
         'name':'MetaVoice',
@@ -126,6 +134,7 @@ HF_SPACES = {
         'series': 'MetaVoice-1B',
         'emoji': '😷', # broken space
     },
     # xVASynth (CPU)
     'Pendrokar/xVASynth-TTS': {
         'name': 'xVASynth v3',
@@ -134,6 +143,7 @@ HF_SPACES = {
         'return_audio_index': 0,
         'series': 'xVASynth',
     },
     # CoquiTTS (CPU)
     'coqui/CoquiTTS': {
         'name': 'CoquiTTS',
@@ -142,6 +152,7 @@ HF_SPACES = {
         'return_audio_index': 0,
         'series': 'CoquiTTS',
     },
     # HierSpeech_TTS
     'LeeSangHoon/HierSpeech_TTS': {
         'name': 'HierSpeech++',
@@ -151,6 +162,7 @@ HF_SPACES = {
         'series': 'HierSpeech++',
         'emoji': '😒', # unemotional
     },
     # MeloTTS (MyShell.ai)
     'mrfakename/MeloTTS': {
         'name': 'MeloTTS',
@@ -279,6 +291,17 @@ HF_SPACES = {
         'series': 'Kokoro',
     },
     # MaskGCT (by Amphion)
     'amphion/maskgct': {
         'name': 'MaskGCT',
@@ -287,7 +310,7 @@ HF_SPACES = {
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
         'series': 'MaskGCT',
-        # 'emoji': '🥵', # 300s minimum ZeroGPU!
     },
     'Svngoku/maskgct-audio-lab': {
         'name': 'MaskGCT',
@@ -296,8 +319,10 @@ HF_SPACES = {
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
         'series': 'MaskGCT',
-        # 'emoji': '🥵', # 300s minimum ZeroGPU!
     },
     'lj1995/GPT-SoVITS-v2': {
         'name': 'GPT-SoVITS v2',
         'function': '/get_tts_wav',
@@ -306,6 +331,8 @@ HF_SPACES = {
         'is_zero_gpu_space': True,
         'series': 'GPT-SoVITS',
     },
     'ameerazam08/OuteTTS-0.2-500M-Demo': {
         'name': 'OuteTTS v2 500M',
         'function': '/generate_tts',
@@ -313,7 +340,9 @@ HF_SPACES = {
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
         'series': 'OuteTTS',
     },
     'OuteAI/OuteTTS-0.3-1B-Demo': {
         'name': 'OuteTTS v3 1B',
         'function': '/generate_tts',
@@ -321,14 +350,18 @@ HF_SPACES = {
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
         'series': 'OuteTTS',
     },
     'srinivasbilla/llasa-3b-tts': {
-        'name': 'llasa 3b',
         'function': '/infer',
         'text_param_index': 'target_text',
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
         'series': 'llasa 3b',
     },
 }
@@ -487,6 +520,12 @@ OVERRIDE_INPUTS = {
         'sk': os.getenv('KOKORO'),
     },
     # maskGCT (by amphion)
     'amphion/maskgct': {
         0: DEFAULT_VOICE_SAMPLE, #prompt_wav

 import os
 from gradio_client import handle_file
+# Models to enable, only include models that users can vote on
 AVAILABLE_MODELS = {
     # 'XTTSv2': 'xtts',
     # 'WhisperSpeech': 'whisperspeech',
     # IMS-Toucan
     # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
     # StyleTTS v2
+    # 'Pendrokar/style-tts-2': 'Pendrokar/style-tts-2', #  more votes in OG arena; emotionless
+    # StyleTTS Kokoro v0.19
+    # 'hexgrad/kokoro': 'hexgrad/Kokoro-TTS',
+    # StyleTTS Kokoro v0.23
+    # 'hexgrad/Kokoro-TTS/0.23': 'hexgrad/Kokoro-TTS',
+    # StyleTTS Kokoro v1.0
+    'hexgrad/Kokoro-API': 'hexgrad/kokoro-API',
     # MaskGCT (by Amphion)
     # 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
         'return_audio_index': 1,
         'series': 'XTTS',
     },
     # WhisperSpeech
     'collabora/WhisperSpeech': {
         'name': 'WhisperSpeech',
         'series': 'WhisperSpeech',
         'emoji': '😷', # broken space
     },
     # OpenVoice (MyShell.ai)
     'myshell-ai/OpenVoice': {
         'name':'OpenVoice',
         'return_audio_index': 1,
         'series': 'OpenVoice',
     },
     # MetaVoice
     'mrfakename/MetaVoice-1B-v0.1': {
         'name':'MetaVoice',
         'series': 'MetaVoice-1B',
         'emoji': '😷', # broken space
     },
     # xVASynth (CPU)
     'Pendrokar/xVASynth-TTS': {
         'name': 'xVASynth v3',
         'return_audio_index': 0,
         'series': 'xVASynth',
     },
     # CoquiTTS (CPU)
     'coqui/CoquiTTS': {
         'name': 'CoquiTTS',
         'return_audio_index': 0,
         'series': 'CoquiTTS',
     },
     # HierSpeech_TTS
     'LeeSangHoon/HierSpeech_TTS': {
         'name': 'HierSpeech++',
         'series': 'HierSpeech++',
         'emoji': '😒', # unemotional
     },
     # MeloTTS (MyShell.ai)
     'mrfakename/MeloTTS': {
         'name': 'MeloTTS',
         'series': 'Kokoro',
     },
+    # StyleTTS Kokoro v1.0
+    'hexgrad/Kokoro-API': {
+        'name': 'Kokoro v1.0',
+        'function': '/predict',
+        'text_param_index': 'text',
+        'return_audio_index': 0,
+        'is_zero_gpu_space': False,
+        'series': 'Kokoro',
+        'hf_token': os.getenv('KOKORO'), #special
+    },
     # MaskGCT (by Amphion)
     'amphion/maskgct': {
         'name': 'MaskGCT',
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
         'series': 'MaskGCT',
+        # 'emoji': '🥵', # requires 300s reserved ZeroGPU!
     },
     'Svngoku/maskgct-audio-lab': {
         'name': 'MaskGCT',
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
         'series': 'MaskGCT',
+        # 'emoji': '🥵', # requires 300s reserved ZeroGPU!
     },
+    # GPT-SoVITS v2
     'lj1995/GPT-SoVITS-v2': {
         'name': 'GPT-SoVITS v2',
         'function': '/get_tts_wav',
         'is_zero_gpu_space': True,
         'series': 'GPT-SoVITS',
     },
+    # OuteTTS v0.2 500M
     'ameerazam08/OuteTTS-0.2-500M-Demo': {
         'name': 'OuteTTS v2 500M',
         'function': '/generate_tts',
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
         'series': 'OuteTTS',
+        'emoji': '🥵', # requires 300s reserved ZeroGPU!
     },
+    # OuteTTS v0.3 1B
     'OuteAI/OuteTTS-0.3-1B-Demo': {
         'name': 'OuteTTS v3 1B',
         'function': '/generate_tts',
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
         'series': 'OuteTTS',
+        'emoji': '🥵', # requires 300s reserved ZeroGPU!
     },
+    # LlaSa 3B
     'srinivasbilla/llasa-3b-tts': {
+        'name': 'LLaSA 3B',
         'function': '/infer',
         'text_param_index': 'target_text',
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
         'series': 'llasa 3b',
+        # 'emoji': '🥵', # requires 300s reserved ZeroGPU!
     },
 }
         'sk': os.getenv('KOKORO'),
     },
+    # StyleTTS 2 Kokoro v1.0
+    'hexgrad/Kokoro-API': {
+		'voice': "af_heart",
+		'speed': 1,
+    },
     # maskGCT (by amphion)
     'amphion/maskgct': {
         0: DEFAULT_VOICE_SAMPLE, #prompt_wav

app/sample_caching.py CHANGED Viewed

@@ -144,7 +144,7 @@ def give_cached_sample(session_hash: str, autoplay: bool, request: gr.Request):
     return (
         gr.update(visible=True, value=pair[0].transcript, elem_classes=['blurred-text']),
-        "Synthesize",
         gr.update(visible=True), # r2
         pair[0].modelName, # model1
         pair[1].modelName, # model2

     return (
         gr.update(visible=True, value=pair[0].transcript, elem_classes=['blurred-text']),
+        "Synthesize 🐢",
         gr.update(visible=True), # r2
         pair[0].modelName, # model1
         pair[1].modelName, # model2

app/synth.py CHANGED Viewed

@@ -100,8 +100,16 @@ def synthandreturn(text, autoplay, request: gr.Request):
                     if '/' in model:
                         # Use public HF Space
                         # if (model not in hf_clients):
                         #     hf_clients[model] = Client(model, hf_token=hf_token, headers=hf_headers)
-                        mdl_space = Client(AVAILABLE_MODELS[model], hf_token=hf_token, headers=hf_headers)
                         # print(f"{model}: Fetching endpoints of HF Space")
                         # assume the index is one of the first 9 return params

                     if '/' in model:
                         # Use public HF Space
                         # if (model not in hf_clients):
+                        #     #save client to local variable; can timeout
                         #     hf_clients[model] = Client(model, hf_token=hf_token, headers=hf_headers)
+                        try:
+                            # use TTS host's token
+                            client_token = HF_SPACES[model]['hf_token']
+                        except:
+                            # use arena host's token
+                            client_token = hf_token
+                        # even this may cause 429 Too Many Request
+                        mdl_space = Client(AVAILABLE_MODELS[model], hf_token=client_token, headers=hf_headers)
                         # print(f"{model}: Fetching endpoints of HF Space")
                         # assume the index is one of the first 9 return params

app/ui_vote.py CHANGED Viewed

@@ -120,7 +120,7 @@ with gr.Blocks() as vote:
     ]
     """
     text,
-        "Synthesize",
         gr.update(visible=True), # r2
         mdl1, # model1
         mdl2, # model2

     ]
     """
     text,
+        "Synthesize 🐢",
         gr.update(visible=True), # r2
         mdl1, # model1
         mdl2, # model2

test_tts_styletts_kokoro_v1.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from gradio_client import Client, file
+client = Client("hexgrad/Kokoro-API", hf_token=os.getenv('KOKORO'))
+# endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+# print(endpoints)
+result = client.predict(
+    text='Hello there, you.',
+    voice='af_heart',
+    speed=1,
+    api_name='/predict'
+)
+print(result)
+# text="Oh, hello there!!",
+# voice="af",
+# ps=None,
+# speed=1,
+# trim=3000,
+# use_gpu=False,