Spaces:
Running
Running
Yurii Paniv
commited on
Commit
·
01e1229
1
Parent(s):
b812929
Add initial model
Browse files- .gitignore +6 -0
- README.md +4 -1
- app.py +61 -14
- crh_tts/__init__.py +0 -0
- crh_tts/tts.py +84 -0
- requirements.txt +3 -1
.gitignore
CHANGED
|
@@ -127,3 +127,9 @@ dmypy.json
|
|
| 127 |
|
| 128 |
# Pyre type checker
|
| 129 |
.pyre/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
# Pyre type checker
|
| 129 |
.pyre/
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
# Model files
|
| 133 |
+
config.json
|
| 134 |
+
speakers.pth
|
| 135 |
+
model.pth
|
README.md
CHANGED
|
@@ -19,4 +19,7 @@ Online demo: https://huggingface.co/spaces/robinhad/qirimli-tts
|
|
| 19 |
|
| 20 |
# Attribution
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# Attribution
|
| 21 |
|
| 22 |
+
- Model training - [Yurii Paniv @robinhad](https://github.com/robinhad)
|
| 23 |
+
- Crimean Tatar dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)
|
| 24 |
+
- Huge thanks for voice to: Nuri, Arslan, Kemal
|
| 25 |
+
- Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration)
|
app.py
CHANGED
|
@@ -1,41 +1,88 @@
|
|
| 1 |
-
from unittest import result
|
| 2 |
import gradio as gr
|
| 3 |
from crh_transliterator.transliterator import transliterate
|
| 4 |
from crh_preprocessor.preprocessor import preprocess
|
| 5 |
from datetime import datetime
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
badge = (
|
| 18 |
"https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.qirimli-tts"
|
| 19 |
)
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
with open("README.md") as file:
|
| 22 |
article = file.read()
|
| 23 |
article = article[article.find("---\n", 4) + 5 : :]
|
| 24 |
|
|
|
|
| 25 |
iface = gr.Interface(
|
| 26 |
fn=tts,
|
| 27 |
inputs=[
|
| 28 |
gr.components.Textbox(
|
| 29 |
label="Input",
|
| 30 |
-
value="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
),
|
| 32 |
],
|
| 33 |
-
outputs=
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
["Sağlıqnen qalıñız! Sağlıqnen barıñız! "],
|
| 37 |
-
["Селям! Ишлер насыл?"],
|
| 38 |
],
|
|
|
|
|
|
|
| 39 |
article=article + f'\n <center><img src="{badge}" alt="visitors badge"/></center>',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
)
|
| 41 |
iface.launch()
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from crh_transliterator.transliterator import transliterate
|
| 3 |
from crh_preprocessor.preprocessor import preprocess
|
| 4 |
from datetime import datetime
|
| 5 |
|
| 6 |
+
import tempfile
|
| 7 |
+
import gradio as gr
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from enum import Enum
|
| 10 |
+
from crh_tts.tts import TTS, Voices
|
| 11 |
+
from torch.cuda import is_available
|
| 12 |
|
| 13 |
+
|
| 14 |
+
class VoiceOption(Enum):
|
| 15 |
+
Nuri = "Нурі (жіночий) 👩"
|
| 16 |
+
Arslan = "Арслан (чоловічий) 👨"
|
| 17 |
+
Kemal = "Кемаль (чоловічий) 👨"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
print(f"CUDA available? {is_available()}")
|
| 21 |
|
| 22 |
|
| 23 |
badge = (
|
| 24 |
"https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.qirimli-tts"
|
| 25 |
)
|
| 26 |
|
| 27 |
+
crh_tts = TTS(use_cuda=is_available())
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def tts(text: str, voice: str):
|
| 31 |
+
print("============================")
|
| 32 |
+
print("Original text:", text)
|
| 33 |
+
print("Voice", voice)
|
| 34 |
+
print("Time:", datetime.utcnow())
|
| 35 |
+
|
| 36 |
+
voice_mapping = {
|
| 37 |
+
VoiceOption.Nuri.value: Voices.Nuri.value,
|
| 38 |
+
VoiceOption.Arslan.value: Voices.Arslan.value,
|
| 39 |
+
VoiceOption.Kemal.value: Voices.Kemal.value,
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
speaker_name = voice_mapping[voice]
|
| 43 |
+
text_limit = 7200
|
| 44 |
+
text = (
|
| 45 |
+
text if len(text) < text_limit else text[0:text_limit]
|
| 46 |
+
) # mitigate crashes on hf space
|
| 47 |
+
result = transliterate(text)
|
| 48 |
+
text = preprocess(result)
|
| 49 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
| 50 |
+
_, text = crh_tts.tts(text, speaker_name, fp)
|
| 51 |
+
return fp.name, text
|
| 52 |
+
|
| 53 |
+
|
| 54 |
with open("README.md") as file:
|
| 55 |
article = file.read()
|
| 56 |
article = article[article.find("---\n", 4) + 5 : :]
|
| 57 |
|
| 58 |
+
|
| 59 |
iface = gr.Interface(
|
| 60 |
fn=tts,
|
| 61 |
inputs=[
|
| 62 |
gr.components.Textbox(
|
| 63 |
label="Input",
|
| 64 |
+
value="Qırımtatarlar! Селям! Ишлер насыл?",
|
| 65 |
+
),
|
| 66 |
+
gr.components.Radio(
|
| 67 |
+
label="Голос",
|
| 68 |
+
choices=[option.value for option in VoiceOption],
|
| 69 |
+
value=VoiceOption.Nuri.value,
|
| 70 |
),
|
| 71 |
],
|
| 72 |
+
outputs=[
|
| 73 |
+
gr.components.Audio(label="Output"),
|
| 74 |
+
gr.components.Textbox(label="Наголошений текст"),
|
|
|
|
|
|
|
| 75 |
],
|
| 76 |
+
title="Кримськотатарський синтез мовлення",
|
| 77 |
+
description="Кримськотатарський Text-to-Speech за допомогою Coqui TTS",
|
| 78 |
article=article + f'\n <center><img src="{badge}" alt="visitors badge"/></center>',
|
| 79 |
+
examples=[
|
| 80 |
+
["Selâm! İşler nasıl?", VoiceOption.Kemal.value],
|
| 81 |
+
[
|
| 82 |
+
"Qırımtatarlar üç subetnik gruppasından er birisiniñ (tatlar, noğaylar ve yalıboylular) öz şivesi bar.",
|
| 83 |
+
VoiceOption.Arslan.value,
|
| 84 |
+
],
|
| 85 |
+
["Селям! Ишлер насыл?", VoiceOption.Nuri.value],
|
| 86 |
+
],
|
| 87 |
)
|
| 88 |
iface.launch()
|
crh_tts/__init__.py
ADDED
|
File without changes
|
crh_tts/tts.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from io import BytesIO
|
| 2 |
+
import requests
|
| 3 |
+
from os.path import exists, join
|
| 4 |
+
from TTS.utils.synthesizer import Synthesizer
|
| 5 |
+
from enum import Enum
|
| 6 |
+
from crh_preprocessor.preprocessor import preprocess
|
| 7 |
+
from torch import no_grad
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class Voices(Enum):
|
| 11 |
+
"""List of available voices for the model."""
|
| 12 |
+
|
| 13 |
+
Arslan = "arslan"
|
| 14 |
+
Nuri = "nuri"
|
| 15 |
+
Kemal = "kemal"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class TTS:
|
| 19 |
+
""" """
|
| 20 |
+
|
| 21 |
+
def __init__(self, use_cuda=False) -> None:
|
| 22 |
+
"""
|
| 23 |
+
Class to setup a text-to-speech engine, from download to model creation. \n
|
| 24 |
+
Downloads or uses files from `cache_folder` directory. \n
|
| 25 |
+
By default stores in current directory."""
|
| 26 |
+
self.__setup_cache(use_cuda=use_cuda)
|
| 27 |
+
|
| 28 |
+
def tts(self, text: str, voice: str, output_fp=BytesIO()):
|
| 29 |
+
"""
|
| 30 |
+
Run a Text-to-Speech engine and output to `output_fp` BytesIO-like object.
|
| 31 |
+
- `text` - your model input text.
|
| 32 |
+
- `voice` - one of predefined voices from `Voices` enum.
|
| 33 |
+
- `output_fp` - file-like object output. Stores in RAM by default.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
if voice not in [option.value for option in Voices]:
|
| 37 |
+
raise ValueError(
|
| 38 |
+
f"Invalid value for voice selected! Please use one of the following values: {', '.join([option.value for option in Voices])}."
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
text = preprocess(text)
|
| 42 |
+
|
| 43 |
+
with no_grad():
|
| 44 |
+
wavs = self.synthesizer.tts(text, speaker_name=voice)
|
| 45 |
+
self.synthesizer.save_wav(wavs, output_fp)
|
| 46 |
+
|
| 47 |
+
output_fp.seek(0)
|
| 48 |
+
|
| 49 |
+
return output_fp, text
|
| 50 |
+
|
| 51 |
+
def __setup_cache(self, use_cuda=False):
|
| 52 |
+
"""Downloads models and stores them into `cache_folder`. By default stores in current directory."""
|
| 53 |
+
print("downloading uk/crh/vits-tts")
|
| 54 |
+
release_number = "v0.0.1"
|
| 55 |
+
model_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/model.pth"
|
| 56 |
+
config_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/config.json"
|
| 57 |
+
speakers_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/speakers.pth"
|
| 58 |
+
|
| 59 |
+
cache_folder = "."
|
| 60 |
+
|
| 61 |
+
model_path = join(cache_folder, "model.pth")
|
| 62 |
+
config_path = join(cache_folder, "config.json")
|
| 63 |
+
speakers_path = join(cache_folder, "speakers.pth")
|
| 64 |
+
|
| 65 |
+
self.__download(model_link, model_path)
|
| 66 |
+
self.__download(config_link, config_path)
|
| 67 |
+
self.__download(speakers_link, speakers_path)
|
| 68 |
+
|
| 69 |
+
self.synthesizer = Synthesizer(
|
| 70 |
+
model_path, config_path, speakers_path, None, None, use_cuda=use_cuda
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
if self.synthesizer is None:
|
| 74 |
+
raise NameError("Model not found")
|
| 75 |
+
|
| 76 |
+
def __download(self, url, file_name):
|
| 77 |
+
"""Downloads file from `url` into local `file_name` file."""
|
| 78 |
+
if not exists(file_name):
|
| 79 |
+
print(f"Downloading {file_name}")
|
| 80 |
+
r = requests.get(url, allow_redirects=True)
|
| 81 |
+
with open(file_name, "wb") as file:
|
| 82 |
+
file.write(r.content)
|
| 83 |
+
else:
|
| 84 |
+
print(f"Found {file_name}. Skipping download...")
|
requirements.txt
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
-
gradio==3.6
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==3.6
|
| 2 |
+
torch>=1.13
|
| 3 |
+
TTS==0.8.0
|