ZabanZad_PoC

Sleeping

App Files Files Community

barghavani commited on Dec 8, 2023

Commit

678aab1

1 Parent(s): 79393d9

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -97

app.py CHANGED Viewed

@@ -1,112 +1,65 @@
-import os
 import tempfile
 import gradio as gr
-from TTS.api import TTS
 from TTS.utils.synthesizer import Synthesizer
-from huggingface_hub import hf_hub_download
-import json
-os.environ["COQUI_TOS_AGREED"] = "1"
-# Define constants
-MODEL_INFO = [
-    ["Xtts-Farsi", "best_model.pth", "config.json", "saillab/xtts_v2_fa_revision1","speakers.pth"],
-]
-MODEL_NAMES = [info[0] for info in MODEL_INFO]
-MAX_TXT_LEN = 400
-TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')
-model_files = {}
-config_files = {}
-speaker_files = {}
-synthesizers = {}
-def update_config_speakers_file_recursive(config_dict, speakers_path):
-    if "speakers_file" in config_dict:
-        config_dict["speakers_file"] = speakers_path
-    for key, value in config_dict.items():
-        if isinstance(value, dict):
-            update_config_speakers_file_recursive(value, speakers_path)
-def update_config_speakers_file(config_path, speakers_path):
-    with open(config_path, 'r') as f:
-        config = json.load(f)
-    update_config_speakers_file_recursive(config, speakers_path)
-    with open(config_path, 'w') as f:
-        json.dump(config, f, indent=4)
-for info in MODEL_INFO:
-    model_name, model_file, config_file, repo_name = info[:4]
-    speaker_file = info[4] if len(info) == 5 else None
-    print(f"|> Downloading: {model_name}")
-    model_files[model_name] = hf_hub_download(repo_id=repo_name, filename=model_file, use_auth_token=TOKEN)
-    config_files[model_name] = hf_hub_download(repo_id=repo_name, filename=config_file, use_auth_token=TOKEN)
-    if speaker_file:
-        speaker_files[model_name] = hf_hub_download(repo_id=repo_name, filename=speaker_file, use_auth_token=TOKEN)
-        update_config_speakers_file(config_files[model_name], speaker_files[model_name])
-        print(speaker_files[model_name])
-        synthesizer = Synthesizer(
-            tts_checkpoint=model_files[model_name],
-            tts_config_path=config_files[model_name],
-            tts_speakers_file=speaker_files[model_name],
-            use_cuda=False
-            )
-    elif speaker_file is None:
-        synthesizer = Synthesizer(
-            tts_checkpoint=model_files[model_name],
-            tts_config_path=config_files[model_name],
-            use_cuda=False
-        )
-    synthesizers[model_name] = synthesizer
-def synthesize(text: str, model_name: str, speaker_name=None) -> str:
     if len(text) > MAX_TXT_LEN:
         text = text[:MAX_TXT_LEN]
-        print(f"Input text was cut off as it exceeded the {MAX_TXT_LEN} character limit.")
-    synthesizer = synthesizers[model_name]
     if synthesizer is None:
-        raise NameError("Model not found")
-    if not synthesizer.tts_speakers_file:
-        wavs = synthesizer.tts(text)
-    elif synthesizer.tts_speakers_file:
-        if not speaker_name:
-            wavs = synthesizer.tts(text, speaker_name=None)
-        else:
-            wavs = synthesizer.tts(text, speaker_name=speaker_name)
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         synthesizer.save_wav(wavs, fp)
         return fp.name
-def update_options(model_name):
-    synthesizer = synthesizers[model_name]
-    if model_name is MODEL_NAMES[1]:
-        speakers = synthesizer.tts_model.speaker_manager.speaker_names
-        return speakers
-    else:
-        return []
-iface = gr.Interface(
-    fn=synthesize,
-    inputs=[
-        gr.Textbox(label="Enter Text to Synthesize:", value="زین همرهان سست عناصر، دلم گرفت."),
-        gr.Radio(label="Pick a Model", choices=MODEL_NAMES, value=MODEL_NAMES[0], type="value"),
-        gr.Dropdown(label="Select Speaker", choices=update_options(MODEL_NAMES[1]), type="value", default=None)
-    ],
-    outputs=gr.Audio(label="Output", type='filepath'),
-    examples=[["زین همرهان سست عناصر، دلم گرفت.", MODEL_NAMES[0], ""]],
-    title='Persian TTS Playground',
-    description="""
-    ### Persian text to speech model demo.
-    #### Pick a speaker for MultiSpeaker models. (for single speaker go for speaker-0)
-    """,
-    article="",
-    live=False
-)
-iface.launch()

 import tempfile
+from typing import Optional
+from TTS.config import load_config
 import gradio as gr
+import numpy as np
+from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
+MODELS = {}
+SPEAKERS = {}
+MAX_TXT_LEN = 100
+manager = ModelManager()
+MODEL_NAMES = ["saillab/xtts_v2_fa_revision1"]
+def tts(text: str):
+    model_name = "saillab/xtts_v2_fa_revision1"
     if len(text) > MAX_TXT_LEN:
         text = text[:MAX_TXT_LEN]
+        print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
+    print(text, model_name)
+    model_path, config_path, model_item = manager.download_model(model_name)
+    vocoder_name: Optional[str] = model_item["default_vocoder"]
+    vocoder_path = None
+    vocoder_config_path = None
+    if vocoder_name is not None:
+        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
+    synthesizer = Synthesizer(model_path, config_path, None, None, vocoder_path, vocoder_config_path,)
     if synthesizer is None:
+        raise NameError("model not found")
+    wavs = synthesizer.tts(text, None)
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         synthesizer.save_wav(wavs, fp)
         return fp.name
+title = """<h1 align="center">🐸💬 CoquiTTS Playground </h1>"""
+with gr.Blocks(analytics_enabled=False) as demo:
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("GitHub Markdown Details")
+        with gr.Column():
+            gr.Markdown("GitHub Markdown Details")
+    with gr.Row():
+        gr.Markdown("GitHub Markdown Details")
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.inputs.Textbox(
+                label="Input Text",
+                default="This sentence has been generated by a speech synthesis system.",
+            )
+            tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
+        with gr.Column():
+            output_audio = gr.outputs.Audio(label="Output", type="filepath")
+    tts_button.click(
+        tts,
+        inputs=[input_text],
+        outputs=[output_audio],
+    )
+demo.queue(concurrency_count=16).launch(debug=True)