KingNish commited on
Commit
fd8eada
·
verified ·
1 Parent(s): 256fa87
Files changed (1) hide show
  1. app.py +151 -0
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tempfile
3
+ import uuid
4
+ import os
5
+ from kittentts import KittenTTS
6
+ import soundfile as sf
7
+
8
+ # Initialize the TTS model
9
+ model = KittenTTS("KittenML/kitten-tts-nano-0.1")
10
+
11
+ def generate_speech(text, voice, speed):
12
+ """
13
+ Generate speech from text using KittenTTS
14
+
15
+ Args:
16
+ text (str): Text to convert to speech
17
+ voice (str): Voice to use for generation
18
+ speed (float): Speed of speech generation
19
+
20
+ Returns:
21
+ str: Path to generated audio file
22
+ """
23
+ if not text.strip():
24
+ return None, "Please enter some text to generate speech."
25
+
26
+ try:
27
+ # Generate audio
28
+ audio = model.generate(text, voice=voice, speed=speed)
29
+
30
+ # Create temporary file with UUID
31
+ temp_dir = tempfile.gettempdir()
32
+ unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
33
+ output_path = os.path.join(temp_dir, unique_filename)
34
+
35
+ # Save audio file
36
+ sf.write(output_path, audio, 24000)
37
+
38
+ return output_path, f"Speech generated successfully! File saved as: {unique_filename}"
39
+
40
+ except Exception as e:
41
+ return None, f"Error generating speech: {str(e)}"
42
+
43
+ def get_available_voices():
44
+ """Get list of available voices from the model"""
45
+ try:
46
+ voices = model.available_voices()
47
+ return voices if voices else ["expr-voice-5-m"] # Default voice as fallback
48
+ except:
49
+ return ["expr-voice-5-m"] # Default voice as fallback
50
+
51
+ # Get available voices
52
+ available_voices = get_available_voices()
53
+
54
+ # Create Gradio interface
55
+ with gr.Blocks(title="KittenTTS - Text to Speech", theme=gr.themes.Soft()) as app:
56
+ gr.Markdown("# 🐱 KittenTTS - Text to Speech Generator")
57
+ gr.Markdown("Convert your text to high-quality speech using KittenTTS nano model!")
58
+
59
+ with gr.Row():
60
+ with gr.Column(scale=2):
61
+ # Input components
62
+ text_input = gr.Textbox(
63
+ label="Text to Convert",
64
+ placeholder="Enter the text you want to convert to speech...",
65
+ lines=4,
66
+ max_lines=10
67
+ )
68
+
69
+ with gr.Row():
70
+ voice_dropdown = gr.Dropdown(
71
+ choices=available_voices,
72
+ value=available_voices[0] if available_voices else "expr-voice-5-m",
73
+ label="Voice Selection",
74
+ info="Choose the voice for speech generation"
75
+ )
76
+
77
+ speed_slider = gr.Slider(
78
+ minimum=0.5,
79
+ maximum=2.0,
80
+ step=0.1,
81
+ value=1.0,
82
+ label="Speech Speed",
83
+ info="Adjust the speed of speech (0.5x to 2.0x)"
84
+ )
85
+
86
+ generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
87
+
88
+ with gr.Column(scale=1):
89
+ # Output components
90
+ audio_output = gr.Audio(
91
+ label="Generated Speech",
92
+ type="filepath",
93
+ interactive=False
94
+ )
95
+
96
+ status_output = gr.Textbox(
97
+ label="Status",
98
+ interactive=False,
99
+ lines=3
100
+ )
101
+
102
+ # Example inputs
103
+ gr.Markdown("## 📝 Example Texts")
104
+ examples = gr.Examples(
105
+ examples=[
106
+ ["Hello! This is a test of the KittenTTS model.", available_voices[0] if available_voices else "expr-voice-5-m", 1.0],
107
+ ["The quick brown fox jumps over the lazy dog.", available_voices[0] if available_voices else "expr-voice-5-m", 1.2],
108
+ ["Welcome to the world of high-quality text-to-speech synthesis!", available_voices[0] if available_voices else "expr-voice-5-m", 0.9],
109
+ ],
110
+ inputs=[text_input, voice_dropdown, speed_slider],
111
+ label="Click on an example to try it out"
112
+ )
113
+
114
+ # Model information
115
+ with gr.Accordion("ℹ️ Model Information", open=False):
116
+ gr.Markdown("""
117
+ **Model:** KittenML/kitten-tts-nano-0.1
118
+
119
+ **Features:**
120
+ - High-quality text-to-speech synthesis
121
+ - Works without GPU acceleration
122
+ - Multiple voice options
123
+ - Adjustable speech speed
124
+ - 24kHz audio output
125
+
126
+ **Usage:**
127
+ 1. Enter your text in the text box
128
+ 2. Select a voice from the dropdown
129
+ 3. Adjust the speech speed if needed
130
+ 4. Click "Generate Speech" to create audio
131
+
132
+ Generated files are saved in temporary directory with unique UUID filenames.
133
+ """)
134
+
135
+ # Event handlers
136
+ generate_btn.click(
137
+ fn=generate_speech,
138
+ inputs=[text_input, voice_dropdown, speed_slider],
139
+ outputs=[audio_output, status_output]
140
+ )
141
+
142
+ # Auto-generate on Enter key (optional)
143
+ text_input.submit(
144
+ fn=generate_speech,
145
+ inputs=[text_input, voice_dropdown, speed_slider],
146
+ outputs=[audio_output, status_output]
147
+ )
148
+
149
+ # Launch the app
150
+ if __name__ == "__main__":
151
+ app.launch()