KittenTTSDemo / app_simple.py
Vishwas1's picture
Upload 5 files
e327671 verified
raw
history blame
3.17 kB
import gradio as gr
import soundfile as sf
import numpy as np
from kittentts import KittenTTS
# Initialize the model
model = KittenTTS("KittenML/kitten-tts-nano-0.1")
# Available voices
AVAILABLE_VOICES = [
'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
]
def generate_speech(text, voice):
"""Generate speech from text using KittenTTS"""
if not text.strip():
return None, "Please enter some text to generate speech."
try:
# Generate audio
audio = model.generate(text, voice=voice)
# Convert to the format expected by Gradio
if len(audio.shape) > 1:
audio = audio.mean(axis=1) # Convert stereo to mono if needed
# Normalize audio
audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio
return audio, f"βœ… Successfully generated speech with voice: {voice}"
except Exception as e:
return None, f"❌ Error generating speech: {str(e)}"
# Create the interface
with gr.Blocks(title="KittenTTS - High Quality Text-to-Speech") as demo:
gr.HTML("""
<div style="text-align: center; margin-bottom: 2rem;">
<h1>🎀 KittenTTS</h1>
<p><em>High Quality Text-to-Speech Generation</em></p>
</div>
""")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Enter your text",
placeholder="Type or paste your text here...",
lines=4
)
voice_dropdown = gr.Dropdown(
choices=AVAILABLE_VOICES,
value=AVAILABLE_VOICES[1],
label="Select Voice"
)
generate_btn = gr.Button("🎡 Generate Speech", variant="primary")
with gr.Column():
gr.HTML("""
<div style="background: #f0f0f0; padding: 1rem; border-radius: 8px;">
<h3>Available Voices:</h3>
<ul>
<li><strong>Male:</strong> expr-voice-2-m, expr-voice-3-m, expr-voice-4-m, expr-voice-5-m</li>
<li><strong>Female:</strong> expr-voice-2-f, expr-voice-3-f, expr-voice-4-f, expr-voice-5-f</li>
</ul>
</div>
""")
audio_output = gr.Audio(label="Generated Audio")
status_output = gr.Textbox(label="Status", interactive=False)
# Connect the generate button
generate_btn.click(
fn=generate_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, status_output]
)
# Auto-generate when text is entered and Enter is pressed
text_input.submit(
fn=generate_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, status_output]
)
# Launch the demo
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True
)