Spaces:
Sleeping
Sleeping
File size: 1,247 Bytes
2c04641 617d161 8c33263 2c04641 617d161 2c04641 617d161 2c04641 617d161 8c33263 2c04641 617d161 2c04641 617d161 2c04641 a4736d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from TTS.api import TTS
import tempfile
import os
import soundfile as sf
import numpy as np
model_name = "tts_models/en/vctk/vits"
promisingM = ["p282", "p301", "p234", "p232", "p256", "p267", "p272"]
promisingF = ["p311", "p361", "p263", "p306", "p259"]
speakers = promisingM + promisingF
tts = TTS(model_name)
def text_to_speech(sentence, speaker_name):
file = tempfile.NamedTemporaryFile(
mode="w+b",
buffering=-1,
encoding=None,
newline=None,
suffix=None,
prefix=None,
dir=None,
delete=False,
).name
wav = tts.tts_to_file(
text=sentence, speaker=speaker_name, file_path=file, verbose=False
)
audio, sample_rate = sf.read(wav, dtype="float32")
audio_bytes = (audio * 32767).astype(np.int16)
os.remove(wav)
return sample_rate, audio_bytes
iface = gr.Interface(
fn=text_to_speech,
inputs=["text", "text"],
outputs="audio",
inputs_label=["Enter Sentence", "Enter Speaker Name"],
outputs_label="Audio",
examples=[
["Hello, this is a sample sentence.", "p282"],
["How are you doing?", "p301"],
],
)
if __name__ == "__main__":
iface.queue().launch(server_name="0.0.0.0")
|