Spaces:
Running
Running
File size: 3,096 Bytes
7e9eac8 d853661 7e9eac8 c499d81 7e9eac8 b6e1649 7e9eac8 b6e1649 7e9eac8 c499d81 ee5fc59 7e9eac8 fa5f226 ee5fc59 7e9eac8 c499d81 7e9eac8 ebb01fc 7e9eac8 c499d81 7e9eac8 c499d81 7e9eac8 c499d81 ee5fc59 7e9eac8 c499d81 7e9eac8 c499d81 fa5f226 c499d81 ee5fc59 c499d81 ee5fc59 c499d81 fa5f226 7e9eac8 c499d81 dd6a80b 6a19fc4 dd6a80b c499d81 dd6a80b 7e9eac8 dd6a80b c499d81 7e9eac8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import logging
import os
import uuid
import time
import gradio as gr
import soundfile as sf
from model import get_pretrained_model, language_to_models
# Function to update model dropdown based on language selection
#def update_model_dropdown(language):
# if language in language_to_models:
# choices = language_to_models[language]
# return gr.Dropdown.update(choices=choices, value=choices[0])
# else:
# raise ValueError(f"Unsupported language: {language}")
def update_model_dropdown(language: str):
if language in language_to_models:
choices = language_to_models[language]
return gr.Dropdown(
choices=choices,
value=choices[0],
interactive=True,
)
raise ValueError(f"Unsupported language: {language}")
# Function to process text to speech conversion
def process(language, repo_id, text, sid, speed):
logging.info(f"Input text: {text}, SID: {sid}, Speed: {speed}")
sid = int(sid)
tts = get_pretrained_model(repo_id, speed)
start = time.time()
audio = tts.generate(text, sid=sid)
duration = len(audio.samples) / audio.sample_rate
elapsed_seconds = time.time() - start
rtf = elapsed_seconds / duration
info = f"""
Wave duration: {duration:.3f} s<br/>
Processing time: {elapsed_seconds:.3f} s<br/>
RTF: {rtf:.3f}<br/>
"""
logging.info(info)
filename = f"{uuid.uuid4()}.wav"
sf.write(filename, audio.samples, samplerate=audio.sample_rate, subtype="PCM_16")
return filename
# Interface layout
demo = gr.Blocks()
with demo:
gr.Markdown("# Text to Voice")
gr.Markdown("High Fidelity TTS. Visit <a href='https://ruslanmv.com/' target='_blank'>ruslanmv.com</a> for more information.")
language_choices = list(language_to_models.keys())
language_radio = gr.Radio(label="Language", choices=language_choices, value=language_choices[0])
model_dropdown = gr.Dropdown(label="Select a model", choices=language_to_models[language_choices[0]])
language_radio.change(update_model_dropdown, inputs=language_radio, outputs=model_dropdown)
input_text = gr.Textbox(lines=10, label="Enter text to convert to speech")
input_sid = gr.Textbox(label="Speaker ID", value="0", placeholder="Valid only for multi-speaker model")
input_speed = gr.Slider(minimum=0.1, maximum=10, value=1, step=0.1, label="Speed (larger->faster; smaller->slower)")
output_audio = gr.Audio(label="Generated audio")
#output_info = gr.HTML(label="Info")
input_button = gr.Button("Submit")
input_button.click(process, inputs=[language_radio, model_dropdown, input_text, input_sid, input_speed], outputs=[output_audio])
# Download necessary data
def download_espeak_ng_data():
os.system(
"""
cd /tmp
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
tar xf espeak-ng-data.tar.bz2
"""
)
if __name__ == "__main__":
download_espeak_ng_data()
logging.basicConfig(level=logging.INFO)
demo.launch()
|