Spaces:

KingNish
/

Kitten-TTS

Running

App Files Files Community

KingNish commited on Aug 5

Commit

fd8eada

verified ·

1 Parent(s): 256fa87

By claude

Browse files

Files changed (1) hide show

app.py +151 -0

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import gradio as gr
+import tempfile
+import uuid
+import os
+from kittentts import KittenTTS
+import soundfile as sf
+# Initialize the TTS model
+model = KittenTTS("KittenML/kitten-tts-nano-0.1")
+def generate_speech(text, voice, speed):
+    """
+    Generate speech from text using KittenTTS
+    Args:
+        text (str): Text to convert to speech
+        voice (str): Voice to use for generation
+        speed (float): Speed of speech generation
+    Returns:
+        str: Path to generated audio file
+    """
+    if not text.strip():
+        return None, "Please enter some text to generate speech."
+    try:
+        # Generate audio
+        audio = model.generate(text, voice=voice, speed=speed)
+        # Create temporary file with UUID
+        temp_dir = tempfile.gettempdir()
+        unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
+        output_path = os.path.join(temp_dir, unique_filename)
+        # Save audio file
+        sf.write(output_path, audio, 24000)
+        return output_path, f"Speech generated successfully! File saved as: {unique_filename}"
+    except Exception as e:
+        return None, f"Error generating speech: {str(e)}"
+def get_available_voices():
+    """Get list of available voices from the model"""
+    try:
+        voices = model.available_voices()
+        return voices if voices else ["expr-voice-5-m"]  # Default voice as fallback
+    except:
+        return ["expr-voice-5-m"]  # Default voice as fallback
+# Get available voices
+available_voices = get_available_voices()
+# Create Gradio interface
+with gr.Blocks(title="KittenTTS - Text to Speech", theme=gr.themes.Soft()) as app:
+    gr.Markdown("# 🐱 KittenTTS - Text to Speech Generator")
+    gr.Markdown("Convert your text to high-quality speech using KittenTTS nano model!")
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Input components
+            text_input = gr.Textbox(
+                label="Text to Convert",
+                placeholder="Enter the text you want to convert to speech...",
+                lines=4,
+                max_lines=10
+            )
+            with gr.Row():
+                voice_dropdown = gr.Dropdown(
+                    choices=available_voices,
+                    value=available_voices[0] if available_voices else "expr-voice-5-m",
+                    label="Voice Selection",
+                    info="Choose the voice for speech generation"
+                )
+                speed_slider = gr.Slider(
+                    minimum=0.5,
+                    maximum=2.0,
+                    step=0.1,
+                    value=1.0,
+                    label="Speech Speed",
+                    info="Adjust the speed of speech (0.5x to 2.0x)"
+                )
+            generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            # Output components
+            audio_output = gr.Audio(
+                label="Generated Speech",
+                type="filepath",
+                interactive=False
+            )
+            status_output = gr.Textbox(
+                label="Status",
+                interactive=False,
+                lines=3
+            )
+    # Example inputs
+    gr.Markdown("## 📝 Example Texts")
+    examples = gr.Examples(
+        examples=[
+            ["Hello! This is a test of the KittenTTS model.", available_voices[0] if available_voices else "expr-voice-5-m", 1.0],
+            ["The quick brown fox jumps over the lazy dog.", available_voices[0] if available_voices else "expr-voice-5-m", 1.2],
+            ["Welcome to the world of high-quality text-to-speech synthesis!", available_voices[0] if available_voices else "expr-voice-5-m", 0.9],
+        ],
+        inputs=[text_input, voice_dropdown, speed_slider],
+        label="Click on an example to try it out"
+    )
+    # Model information
+    with gr.Accordion("ℹ️ Model Information", open=False):
+        gr.Markdown("""
+        **Model:** KittenML/kitten-tts-nano-0.1
+        **Features:**
+        - High-quality text-to-speech synthesis
+        - Works without GPU acceleration
+        - Multiple voice options
+        - Adjustable speech speed
+        - 24kHz audio output
+        **Usage:**
+        1. Enter your text in the text box
+        2. Select a voice from the dropdown
+        3. Adjust the speech speed if needed
+        4. Click "Generate Speech" to create audio
+        Generated files are saved in temporary directory with unique UUID filenames.
+        """)
+    # Event handlers
+    generate_btn.click(
+        fn=generate_speech,
+        inputs=[text_input, voice_dropdown, speed_slider],
+        outputs=[audio_output, status_output]
+    )
+    # Auto-generate on Enter key (optional)
+    text_input.submit(
+        fn=generate_speech,
+        inputs=[text_input, voice_dropdown, speed_slider],
+        outputs=[audio_output, status_output]
+    )
+# Launch the app
+if __name__ == "__main__":
+    app.launch()