Spaces:

robinhad
/

qirimtatar-tts

Running

App Files Files Community

Yurii Paniv commited on Nov 4, 2022

Commit

01e1229

1 Parent(s): b812929

Add initial model

Browse files

Files changed (6) hide show

.gitignore +6 -0
README.md +4 -1
app.py +61 -14
crh_tts/__init__.py +0 -0
crh_tts/tts.py +84 -0
requirements.txt +3 -1

.gitignore CHANGED Viewed

@@ -127,3 +127,9 @@ dmypy.json
 # Pyre type checker
 .pyre/

 # Pyre type checker
 .pyre/
+# Model files
+config.json
+speakers.pth
+model.pth

README.md CHANGED Viewed

@@ -19,4 +19,7 @@ Online demo: https://huggingface.co/spaces/robinhad/qirimli-tts
 # Attribution
-Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration)

 # Attribution
+- Model training - [Yurii Paniv @robinhad](https://github.com/robinhad)
+- Crimean Tatar dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)
+- Huge thanks for voice to: Nuri, Arslan, Kemal
+- Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration)

app.py CHANGED Viewed

@@ -1,41 +1,88 @@
-from unittest import result
 import gradio as gr
 from crh_transliterator.transliterator import transliterate
 from crh_preprocessor.preprocessor import preprocess
 from datetime import datetime
-def tts(text: str) -> str:
-    result = transliterate(text)
-    text = preprocess(result)
-    print("============================")
-    print("Original text:", text)
-    print("Time:", datetime.utcnow())
-    return text
 badge = (
     "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.qirimli-tts"
 )
 with open("README.md") as file:
     article = file.read()
     article = article[article.find("---\n", 4) + 5 : :]
 iface = gr.Interface(
     fn=tts,
     inputs=[
         gr.components.Textbox(
             label="Input",
-            value="Please input your sentence.",
         ),
     ],
-    outputs="text",
-    examples=[
-        ["Selâm! İşler nasıl?"],
-        ["Sağlıqnen qalıñız! Sağlıqnen barıñız! "],
-        ["Селям! Ишлер насыл?"],
     ],
     article=article + f'\n  <center><img src="{badge}" alt="visitors badge"/></center>',
 )
 iface.launch()

 import gradio as gr
 from crh_transliterator.transliterator import transliterate
 from crh_preprocessor.preprocessor import preprocess
 from datetime import datetime
+import tempfile
+import gradio as gr
+from datetime import datetime
+from enum import Enum
+from crh_tts.tts import TTS, Voices
+from torch.cuda import is_available
+class VoiceOption(Enum):
+    Nuri = "Нурі (жіночий) 👩"
+    Arslan = "Арслан (чоловічий) 👨"
+    Kemal = "Кемаль (чоловічий) 👨"
+print(f"CUDA available? {is_available()}")
 badge = (
     "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.qirimli-tts"
 )
+crh_tts = TTS(use_cuda=is_available())
+def tts(text: str, voice: str):
+    print("============================")
+    print("Original text:", text)
+    print("Voice", voice)
+    print("Time:", datetime.utcnow())
+    voice_mapping = {
+        VoiceOption.Nuri.value: Voices.Nuri.value,
+        VoiceOption.Arslan.value: Voices.Arslan.value,
+        VoiceOption.Kemal.value: Voices.Kemal.value,
+    }
+    speaker_name = voice_mapping[voice]
+    text_limit = 7200
+    text = (
+        text if len(text) < text_limit else text[0:text_limit]
+    )  # mitigate crashes on hf space
+    result = transliterate(text)
+    text = preprocess(result)
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+        _, text = crh_tts.tts(text, speaker_name, fp)
+        return fp.name, text
 with open("README.md") as file:
     article = file.read()
     article = article[article.find("---\n", 4) + 5 : :]
 iface = gr.Interface(
     fn=tts,
     inputs=[
         gr.components.Textbox(
             label="Input",
+            value="Qırımtatarlar! Селям! Ишлер насыл?",
+        ),
+        gr.components.Radio(
+            label="Голос",
+            choices=[option.value for option in VoiceOption],
+            value=VoiceOption.Nuri.value,
         ),
     ],
+    outputs=[
+        gr.components.Audio(label="Output"),
+        gr.components.Textbox(label="Наголошений текст"),
     ],
+    title="Кримськотатарський синтез мовлення",
+    description="Кримськотатарський Text-to-Speech за допомогою Coqui TTS",
     article=article + f'\n  <center><img src="{badge}" alt="visitors badge"/></center>',
+    examples=[
+        ["Selâm! İşler nasıl?", VoiceOption.Kemal.value],
+        [
+            "Qırımtatarlar üç subetnik gruppasından er birisiniñ (tatlar, noğaylar ve yalıboylular) öz şivesi bar.",
+            VoiceOption.Arslan.value,
+        ],
+        ["Селям! Ишлер насыл?", VoiceOption.Nuri.value],
+    ],
 )
 iface.launch()

crh_tts/__init__.py ADDED Viewed

File without changes

crh_tts/tts.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from io import BytesIO
+import requests
+from os.path import exists, join
+from TTS.utils.synthesizer import Synthesizer
+from enum import Enum
+from crh_preprocessor.preprocessor import preprocess
+from torch import no_grad
+class Voices(Enum):
+    """List of available voices for the model."""
+    Arslan = "arslan"
+    Nuri = "nuri"
+    Kemal = "kemal"
+class TTS:
+    """ """
+    def __init__(self, use_cuda=False) -> None:
+        """
+        Class to setup a text-to-speech engine, from download to model creation.  \n
+        Downloads or uses files from `cache_folder` directory.  \n
+        By default stores in current directory."""
+        self.__setup_cache(use_cuda=use_cuda)
+    def tts(self, text: str, voice: str, output_fp=BytesIO()):
+        """
+        Run a Text-to-Speech engine and output to `output_fp` BytesIO-like object.
+        - `text` - your model input text.
+        - `voice` - one of predefined voices from `Voices` enum.
+        - `output_fp` - file-like object output. Stores in RAM by default.
+        """
+        if voice not in [option.value for option in Voices]:
+            raise ValueError(
+                f"Invalid value for voice selected! Please use one of the following values: {', '.join([option.value for option in Voices])}."
+            )
+        text = preprocess(text)
+        with no_grad():
+            wavs = self.synthesizer.tts(text, speaker_name=voice)
+            self.synthesizer.save_wav(wavs, output_fp)
+        output_fp.seek(0)
+        return output_fp, text
+    def __setup_cache(self, use_cuda=False):
+        """Downloads models and stores them into `cache_folder`. By default stores in current directory."""
+        print("downloading uk/crh/vits-tts")
+        release_number = "v0.0.1"
+        model_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/model.pth"
+        config_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/config.json"
+        speakers_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/speakers.pth"
+        cache_folder = "."
+        model_path = join(cache_folder, "model.pth")
+        config_path = join(cache_folder, "config.json")
+        speakers_path = join(cache_folder, "speakers.pth")
+        self.__download(model_link, model_path)
+        self.__download(config_link, config_path)
+        self.__download(speakers_link, speakers_path)
+        self.synthesizer = Synthesizer(
+            model_path, config_path, speakers_path, None, None, use_cuda=use_cuda
+        )
+        if self.synthesizer is None:
+            raise NameError("Model not found")
+    def __download(self, url, file_name):
+        """Downloads file from `url` into local `file_name` file."""
+        if not exists(file_name):
+            print(f"Downloading {file_name}")
+            r = requests.get(url, allow_redirects=True)
+            with open(file_name, "wb") as file:
+                file.write(r.content)
+        else:
+            print(f"Found {file_name}. Skipping download...")

requirements.txt CHANGED Viewed

	@@ -1 +1,3 @@
1	- gradio==3.6

+gradio==3.6
+torch>=1.13
+TTS==0.8.0