File size: 3,171 Bytes
e327671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
import soundfile as sf
import numpy as np
from kittentts import KittenTTS

# Initialize the model
model = KittenTTS("KittenML/kitten-tts-nano-0.1")

# Available voices
AVAILABLE_VOICES = [
    'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
    'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
]

def generate_speech(text, voice):
    """Generate speech from text using KittenTTS"""
    if not text.strip():
        return None, "Please enter some text to generate speech."
    
    try:
        # Generate audio
        audio = model.generate(text, voice=voice)
        
        # Convert to the format expected by Gradio
        if len(audio.shape) > 1:
            audio = audio.mean(axis=1)  # Convert stereo to mono if needed
        
        # Normalize audio
        audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio
        
        return audio, f"βœ… Successfully generated speech with voice: {voice}"
        
    except Exception as e:
        return None, f"❌ Error generating speech: {str(e)}"

# Create the interface
with gr.Blocks(title="KittenTTS - High Quality Text-to-Speech") as demo:
    
    gr.HTML("""

    <div style="text-align: center; margin-bottom: 2rem;">

        <h1>🎀 KittenTTS</h1>

        <p><em>High Quality Text-to-Speech Generation</em></p>

    </div>

    """)
    
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(
                label="Enter your text",
                placeholder="Type or paste your text here...",
                lines=4
            )
            
            voice_dropdown = gr.Dropdown(
                choices=AVAILABLE_VOICES,
                value=AVAILABLE_VOICES[1],
                label="Select Voice"
            )
            
            generate_btn = gr.Button("🎡 Generate Speech", variant="primary")
        
        with gr.Column():
            gr.HTML("""

            <div style="background: #f0f0f0; padding: 1rem; border-radius: 8px;">

                <h3>Available Voices:</h3>

                <ul>

                    <li><strong>Male:</strong> expr-voice-2-m, expr-voice-3-m, expr-voice-4-m, expr-voice-5-m</li>

                    <li><strong>Female:</strong> expr-voice-2-f, expr-voice-3-f, expr-voice-4-f, expr-voice-5-f</li>

                </ul>

            </div>

            """)
    
    audio_output = gr.Audio(label="Generated Audio")
    status_output = gr.Textbox(label="Status", interactive=False)
    
    # Connect the generate button
    generate_btn.click(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown],
        outputs=[audio_output, status_output]
    )
    
    # Auto-generate when text is entered and Enter is pressed
    text_input.submit(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown],
        outputs=[audio_output, status_output]
    )

# Launch the demo
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )