#!/usr/bin/env python3 """ Chatterbox-TTS Apple Silicon Gradio Interface Full web interface for local usage with Apple Silicon compatibility Install gradio first: pip install gradio Then run: python app_gradio.py """ import gradio as gr from app import ( get_or_load_model, generate_audio, DEVICE, split_text_into_chunks, logger ) import torch import tempfile import os def gradio_generate_audio( text_input: str, audio_prompt_input, exaggeration_input: float, temperature_input: float, seed_input: int, cfg_weight_input: float, chunk_size_input: int = 250 ): """Gradio wrapper for audio generation""" try: # Handle audio prompt audio_prompt_path = None if audio_prompt_input is not None: if isinstance(audio_prompt_input, tuple): # Gradio audio format: (sample_rate, audio_data) audio_prompt_path = audio_prompt_input elif isinstance(audio_prompt_input, str): # File path audio_prompt_path = audio_prompt_input # Generate audio using our main function with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: output_path = generate_audio( text=text_input, audio_prompt_path=audio_prompt_path, exaggeration=exaggeration_input, temperature=temperature_input, seed=seed_input if seed_input != 0 else None, cfg_weight=cfg_weight_input, chunk_size=chunk_size_input, output_path=tmp_file.name ) return output_path except Exception as e: raise gr.Error(f"Generation failed: {str(e)}") # Create Gradio interface with gr.Blocks( title="🎙️ Chatterbox-TTS (Apple Silicon)", theme=gr.themes.Soft(), css=""" .gradio-container { max-width: 1200px; margin: auto; } .gr-button { background: linear-gradient(45deg, #FF6B6B, #4ECDC4); color: white; } .info-box { padding: 15px; border-radius: 10px; margin-top: 20px; border: 1px solid #ddd; box-shadow: 0 2px 4px rgba(0,0,0,0.1); } .info-box h4 { margin-top: 0; color: #333; font-weight: bold; } .info-box p { margin: 8px 0; color: #555; line-height: 1.4; } .chunking-info { background: linear-gradient(135deg, #e8f5e8, #f0f8f0); } .system-info { background: linear-gradient(135deg, #f0f4f8, #e6f2ff); } """ ) as demo: gr.HTML("""
Generate high-quality speech from text with voice cloning
Optimized for Apple Silicon compatibility!
Based on official ResembleAI implementation
✨ Enhanced with smart text chunking and Apple Silicon support!
Smart Chunking: Long text is automatically split at sentence boundaries
Chunk Processing: Each chunk generates separate audio, then concatenated
Silence Gaps: 0.3s silence added between chunks for natural flow
Device: {DEVICE.upper()} {'🍎' if torch.backends.mps.is_available() else '💻'}
PyTorch: {torch.__version__}
MPS Available: {'✅ Yes' if torch.backends.mps.is_available() else '❌ No'}
Compatibility: CPU mode for stability