Spaces:
Running
Running
import gradio as gr | |
import soundfile as sf | |
import numpy as np | |
from kittentts import KittenTTS | |
# Initialize the model | |
model = KittenTTS("KittenML/kitten-tts-nano-0.1") | |
# Available voices | |
AVAILABLE_VOICES = [ | |
'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f', | |
'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' | |
] | |
def generate_speech(text, voice): | |
"""Generate speech from text using KittenTTS""" | |
if not text.strip(): | |
return None, "Please enter some text to generate speech." | |
try: | |
# Generate audio | |
audio = model.generate(text, voice=voice) | |
# Convert to the format expected by Gradio | |
if len(audio.shape) > 1: | |
audio = audio.mean(axis=1) # Convert stereo to mono if needed | |
# Normalize audio | |
audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio | |
return audio, f"β Successfully generated speech with voice: {voice}" | |
except Exception as e: | |
return None, f"β Error generating speech: {str(e)}" | |
# Create the interface | |
with gr.Blocks(title="KittenTTS - High Quality Text-to-Speech") as demo: | |
gr.HTML(""" | |
<div style="text-align: center; margin-bottom: 2rem;"> | |
<h1>π€ KittenTTS</h1> | |
<p><em>High Quality Text-to-Speech Generation</em></p> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
text_input = gr.Textbox( | |
label="Enter your text", | |
placeholder="Type or paste your text here...", | |
lines=4 | |
) | |
voice_dropdown = gr.Dropdown( | |
choices=AVAILABLE_VOICES, | |
value=AVAILABLE_VOICES[1], | |
label="Select Voice" | |
) | |
generate_btn = gr.Button("π΅ Generate Speech", variant="primary") | |
with gr.Column(): | |
gr.HTML(""" | |
<div style="background: #f0f0f0; padding: 1rem; border-radius: 8px;"> | |
<h3>Available Voices:</h3> | |
<ul> | |
<li><strong>Male:</strong> expr-voice-2-m, expr-voice-3-m, expr-voice-4-m, expr-voice-5-m</li> | |
<li><strong>Female:</strong> expr-voice-2-f, expr-voice-3-f, expr-voice-4-f, expr-voice-5-f</li> | |
</ul> | |
</div> | |
""") | |
audio_output = gr.Audio(label="Generated Audio") | |
status_output = gr.Textbox(label="Status", interactive=False) | |
# Connect the generate button | |
generate_btn.click( | |
fn=generate_speech, | |
inputs=[text_input, voice_dropdown], | |
outputs=[audio_output, status_output] | |
) | |
# Auto-generate when text is entered and Enter is pressed | |
text_input.submit( | |
fn=generate_speech, | |
inputs=[text_input, voice_dropdown], | |
outputs=[audio_output, status_output] | |
) | |
# Launch the demo | |
if __name__ == "__main__": | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=True | |
) |