File size: 7,668 Bytes
6231313 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
#!/usr/bin/env python3
"""
Chatterbox-TTS Apple Silicon Gradio Interface
Full web interface for local usage with Apple Silicon compatibility
Install gradio first: pip install gradio
Then run: python app_gradio.py
"""
import gradio as gr
from app import (
get_or_load_model,
generate_audio,
DEVICE,
split_text_into_chunks,
logger
)
import torch
import tempfile
import os
def gradio_generate_audio(
text_input: str,
audio_prompt_input,
exaggeration_input: float,
temperature_input: float,
seed_input: int,
cfg_weight_input: float,
chunk_size_input: int = 250
):
"""Gradio wrapper for audio generation"""
try:
# Handle audio prompt
audio_prompt_path = None
if audio_prompt_input is not None:
if isinstance(audio_prompt_input, tuple):
# Gradio audio format: (sample_rate, audio_data)
audio_prompt_path = audio_prompt_input
elif isinstance(audio_prompt_input, str):
# File path
audio_prompt_path = audio_prompt_input
# Generate audio using our main function
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
output_path = generate_audio(
text=text_input,
audio_prompt_path=audio_prompt_path,
exaggeration=exaggeration_input,
temperature=temperature_input,
seed=seed_input if seed_input != 0 else None,
cfg_weight=cfg_weight_input,
chunk_size=chunk_size_input,
output_path=tmp_file.name
)
return output_path
except Exception as e:
raise gr.Error(f"Generation failed: {str(e)}")
# Create Gradio interface
with gr.Blocks(
title="ποΈ Chatterbox-TTS (Apple Silicon)",
theme=gr.themes.Soft(),
css="""
.gradio-container { max-width: 1200px; margin: auto; }
.gr-button { background: linear-gradient(45deg, #FF6B6B, #4ECDC4); color: white; }
.info-box {
padding: 15px;
border-radius: 10px;
margin-top: 20px;
border: 1px solid #ddd;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.info-box h4 {
margin-top: 0;
color: #333;
font-weight: bold;
}
.info-box p {
margin: 8px 0;
color: #555;
line-height: 1.4;
}
.chunking-info { background: linear-gradient(135deg, #e8f5e8, #f0f8f0); }
.system-info { background: linear-gradient(135deg, #f0f4f8, #e6f2ff); }
"""
) as demo:
gr.HTML("""
<div style="text-align: center; padding: 20px;">
<h1>ποΈ Chatterbox-TTS Apple Silicon</h1>
<p style="font-size: 18px; color: #666;">
Generate high-quality speech from text with voice cloning<br>
<strong>Optimized for Apple Silicon compatibility!</strong>
</p>
<p style="font-size: 14px; color: #888;">
Based on <a href="https://huggingface.co/spaces/ResembleAI/Chatterbox">official ResembleAI implementation</a><br>
β¨ <strong>Enhanced with smart text chunking and Apple Silicon support!</strong>
</p>
</div>
""")
with gr.Row():
with gr.Column():
text = gr.Textbox(
value="Hello! This is a test of the Chatterbox-TTS voice cloning system running locally on Apple Silicon.",
label="Text to synthesize (supports long text with automatic chunking)",
max_lines=10,
lines=5
)
ref_wav = gr.Audio(
type="filepath",
label="Reference Audio File (Optional - 6+ seconds recommended)",
sources=["upload", "microphone"]
)
with gr.Row():
exaggeration = gr.Slider(
0.25, 2, step=0.05,
label="Exaggeration (Neutral = 0.5)",
value=0.5
)
cfg_weight = gr.Slider(
0.2, 1, step=0.05,
label="CFG/Pace",
value=0.5
)
with gr.Accordion("βοΈ Advanced Options", open=False):
chunk_size = gr.Slider(
100, 400, step=25,
label="Chunk Size (characters per chunk for long text)",
value=250
)
seed_num = gr.Number(
value=0,
label="Random seed (0 for random)",
precision=0
)
temp = gr.Slider(
0.05, 5, step=0.05,
label="Temperature",
value=0.8
)
run_btn = gr.Button("π΅ Generate Speech", variant="primary", size="lg")
with gr.Column():
audio_output = gr.Audio(label="Generated Speech")
gr.HTML("""
<div class="info-box chunking-info">
<h4>π Text Chunking Info</h4>
<p><strong>Smart Chunking:</strong> Long text is automatically split at sentence boundaries</p>
<p><strong>Chunk Processing:</strong> Each chunk generates separate audio, then concatenated</p>
<p><strong>Silence Gaps:</strong> 0.3s silence added between chunks for natural flow</p>
</div>
""")
# System info
gr.HTML(f"""
<div class="info-box system-info">
<h4>π» System Status</h4>
<p><strong>Device:</strong> {DEVICE.upper()} {'π' if torch.backends.mps.is_available() else 'π»'}</p>
<p><strong>PyTorch:</strong> {torch.__version__}</p>
<p><strong>MPS Available:</strong> {'β
Yes' if torch.backends.mps.is_available() else 'β No'}</p>
<p><strong>Compatibility:</strong> CPU mode for stability</p>
</div>
""")
# Connect the interface
run_btn.click(
fn=gradio_generate_audio,
inputs=[
text,
ref_wav,
exaggeration,
temp,
seed_num,
cfg_weight,
chunk_size,
],
outputs=[audio_output],
show_progress=True
)
# Example texts
gr.Examples(
examples=[
["Hello! This is a test of voice cloning running on Apple Silicon."],
["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet."],
["Welcome to the future of voice synthesis! With Chatterbox, you can clone any voice in seconds."],
],
inputs=[text],
label="π Example Texts"
)
def main():
"""Launch the Gradio interface"""
try:
print("π Starting Chatterbox-TTS Gradio Interface")
print(f"Device: {DEVICE}")
# Pre-load model
print("Loading model...")
get_or_load_model()
print("β
Model loaded!")
# Launch interface
demo.launch(
server_name="127.0.0.1",
server_port=7861,
share=False,
debug=True,
show_error=True
)
except ImportError as e:
print("β Missing dependency!")
print("Install with: pip install gradio")
print("Then run: python app_gradio.py")
except Exception as e:
print(f"β Error: {e}")
if __name__ == "__main__":
main() |