tts-server / app.py
shiveshnavin's picture
Fixes
a412d05
import gradio as gr
from TTS.api import TTS
import tempfile
import os
import soundfile as sf
import numpy as np
model_name = "tts_models/en/vctk/vits"
promisingM = ["p282", "p301", "p234", "p232", "p256", "p267", "p272"]
promisingF = ["p311", "p361", "p263", "p306", "p259"]
speakers = promisingM + promisingF
tts = TTS(model_name)
def text_to_speech(sentence, speaker_name):
file = tempfile.NamedTemporaryFile(
mode="w+b",
buffering=-1,
encoding=None,
newline=None,
suffix=None,
prefix=None,
dir=None,
delete=False,
).name
wav = tts.tts_to_file(
text=sentence, speaker=speaker_name, file_path=file, verbose=False
)
audio, sample_rate = sf.read(wav, dtype="float32")
audio_bytes = (audio * 32767).astype(np.int16)
os.remove(wav)
return sample_rate, audio_bytes
iface = gr.Interface(
fn=text_to_speech,
inputs=["text", "text"],
outputs="audio",
inputs_label=["Enter Sentence", "Enter Speaker Name"],
outputs_label="Audio",
examples=[
["Hello, this is a sample sentence.", "p282"],
["How are you doing?", "p301"],
],
)
if __name__ == "__main__":
iface.queue().launch(server_name="0.0.0.0")