Spaces:
Sleeping
Sleeping
File size: 4,057 Bytes
c82c126 9b50a09 c82c126 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import os
from TTS.utils.download import download_url
from TTS.utils.synthesizer import Synthesizer
import gradio as gr
import tempfile
import torch
import json
from TTS.tts.utils.synthesis import synthesis
from TTS.tts.configs.vits_config import VitsConfig
from TTS.tts.models.vits import Vits, VitsCharacters
from TTS.tts.utils.text.tokenizer import TTSTokenizer
import numpy as np
from TTS.utils.audio.numpy_transforms import save_wav
MAX_TXT_LEN = 800
BASE_DIR = "kbd-vits-tts-{}"
MALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/checkpoint_56000.pth"
MALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/config_35000.json"
FEMALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/best_model_56351.pth"
FEMALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/config.json"
MALE_ONNX_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/onnx/kbd_vits_male.onnx"
FEMALE_ONNX_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/onnx/kbd_vits_female.onnx"
def download_model_and_config(gender):
dir_path = BASE_DIR.format(gender)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
model_url = MALE_MODEL_URL if gender == "male" else FEMALE_MODEL_URL
config_url = MALE_CONFIG_URL if gender == "male" else FEMALE_CONFIG_URL
onnx_model_url = MALE_ONNX_MODEL_URL if gender == "male" else FEMALE_ONNX_MODEL_URL
download_url(model_url, dir_path, "model.pth")
download_url(config_url, dir_path, "config.json")
download_url(onnx_model_url, dir_path, "model.onnx")
return dir_path
download_model_and_config("male")
download_model_and_config("female")
def tts(text: str, voice: str = "Male", use_onnx: bool = True):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
model_dir = BASE_DIR.format("male" if voice == "Male" else "female")
config_file = f"{model_dir}/config.json"
text = text.replace("I", "ӏ") # Replace capital "I" with "Palochka" symbol
text = text.lower()
if use_onnx:
onnx_model_path = f"{model_dir}/model.onnx"
config = VitsConfig()
config.load_json(config_file)
tokenizer = TTSTokenizer(
use_phonemes=False,
text_cleaner=config.text_cleaner,
characters=VitsCharacters(),
phonemizer=None,
add_blank=config.add_blank,
)
vits = Vits.init_from_config(config)
vits.load_onnx(onnx_model_path)
text_inputs = np.asarray(
vits.tokenizer.text_to_ids(text),
dtype=np.int64,
)[None, :]
audio = vits.inference_onnx(text_inputs)
# Create a temporary WAV file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
out_path = temp_file.name
save_wav(wav=audio[0], path=out_path, sample_rate=24000)
else:
# Synthesize
synthesizer = Synthesizer(f"{model_dir}/model.pth", config_file)
wavs = synthesizer.tts(text)
# Create a temporary WAV file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
out_path = temp_file.name
synthesizer.save_wav(wavs, out_path)
return out_path
iface = gr.Interface(
fn=tts,
inputs=[
gr.Textbox(
label="Text",
value="Дауэ ущыт?",
),
gr.Radio(
choices=["Male", "Female"],
value="Male",
label="Voice"
),
gr.Checkbox(
label="Use ONNX",
value=True,
),
],
outputs=gr.Audio(label="Output", type='filepath'),
title="KBD TTS",
article="<p style='text-align: center'>The original model belongs to <a href='https://huggingface.co/anzorq' target='_blank'>anzorq</a></p>",
live=False
)
iface.launch(share=False) |