Abhijit Bhattacharya commited on
Commit
6231313
Β·
1 Parent(s): 3836582

Update with latest code structure - Simple script (app.py) + optional Gradio interface (app_gradio.py) - Updated README with accurate compatibility info - Fixed MPS issues with CPU mode approach - Ready for download and local usage

Browse files
Files changed (1) hide show
  1. app_gradio.py +228 -0
app_gradio.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Chatterbox-TTS Apple Silicon Gradio Interface
4
+ Full web interface for local usage with Apple Silicon compatibility
5
+
6
+ Install gradio first: pip install gradio
7
+ Then run: python app_gradio.py
8
+ """
9
+
10
+ import gradio as gr
11
+ from app import (
12
+ get_or_load_model,
13
+ generate_audio,
14
+ DEVICE,
15
+ split_text_into_chunks,
16
+ logger
17
+ )
18
+ import torch
19
+ import tempfile
20
+ import os
21
+
22
+ def gradio_generate_audio(
23
+ text_input: str,
24
+ audio_prompt_input,
25
+ exaggeration_input: float,
26
+ temperature_input: float,
27
+ seed_input: int,
28
+ cfg_weight_input: float,
29
+ chunk_size_input: int = 250
30
+ ):
31
+ """Gradio wrapper for audio generation"""
32
+ try:
33
+ # Handle audio prompt
34
+ audio_prompt_path = None
35
+ if audio_prompt_input is not None:
36
+ if isinstance(audio_prompt_input, tuple):
37
+ # Gradio audio format: (sample_rate, audio_data)
38
+ audio_prompt_path = audio_prompt_input
39
+ elif isinstance(audio_prompt_input, str):
40
+ # File path
41
+ audio_prompt_path = audio_prompt_input
42
+
43
+ # Generate audio using our main function
44
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
45
+ output_path = generate_audio(
46
+ text=text_input,
47
+ audio_prompt_path=audio_prompt_path,
48
+ exaggeration=exaggeration_input,
49
+ temperature=temperature_input,
50
+ seed=seed_input if seed_input != 0 else None,
51
+ cfg_weight=cfg_weight_input,
52
+ chunk_size=chunk_size_input,
53
+ output_path=tmp_file.name
54
+ )
55
+
56
+ return output_path
57
+
58
+ except Exception as e:
59
+ raise gr.Error(f"Generation failed: {str(e)}")
60
+
61
+ # Create Gradio interface
62
+ with gr.Blocks(
63
+ title="πŸŽ™οΈ Chatterbox-TTS (Apple Silicon)",
64
+ theme=gr.themes.Soft(),
65
+ css="""
66
+ .gradio-container { max-width: 1200px; margin: auto; }
67
+ .gr-button { background: linear-gradient(45deg, #FF6B6B, #4ECDC4); color: white; }
68
+ .info-box {
69
+ padding: 15px;
70
+ border-radius: 10px;
71
+ margin-top: 20px;
72
+ border: 1px solid #ddd;
73
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
74
+ }
75
+ .info-box h4 {
76
+ margin-top: 0;
77
+ color: #333;
78
+ font-weight: bold;
79
+ }
80
+ .info-box p {
81
+ margin: 8px 0;
82
+ color: #555;
83
+ line-height: 1.4;
84
+ }
85
+ .chunking-info { background: linear-gradient(135deg, #e8f5e8, #f0f8f0); }
86
+ .system-info { background: linear-gradient(135deg, #f0f4f8, #e6f2ff); }
87
+ """
88
+ ) as demo:
89
+
90
+ gr.HTML("""
91
+ <div style="text-align: center; padding: 20px;">
92
+ <h1>πŸŽ™οΈ Chatterbox-TTS Apple Silicon</h1>
93
+ <p style="font-size: 18px; color: #666;">
94
+ Generate high-quality speech from text with voice cloning<br>
95
+ <strong>Optimized for Apple Silicon compatibility!</strong>
96
+ </p>
97
+ <p style="font-size: 14px; color: #888;">
98
+ Based on <a href="https://huggingface.co/spaces/ResembleAI/Chatterbox">official ResembleAI implementation</a><br>
99
+ ✨ <strong>Enhanced with smart text chunking and Apple Silicon support!</strong>
100
+ </p>
101
+ </div>
102
+ """)
103
+
104
+ with gr.Row():
105
+ with gr.Column():
106
+ text = gr.Textbox(
107
+ value="Hello! This is a test of the Chatterbox-TTS voice cloning system running locally on Apple Silicon.",
108
+ label="Text to synthesize (supports long text with automatic chunking)",
109
+ max_lines=10,
110
+ lines=5
111
+ )
112
+
113
+ ref_wav = gr.Audio(
114
+ type="filepath",
115
+ label="Reference Audio File (Optional - 6+ seconds recommended)",
116
+ sources=["upload", "microphone"]
117
+ )
118
+
119
+ with gr.Row():
120
+ exaggeration = gr.Slider(
121
+ 0.25, 2, step=0.05,
122
+ label="Exaggeration (Neutral = 0.5)",
123
+ value=0.5
124
+ )
125
+ cfg_weight = gr.Slider(
126
+ 0.2, 1, step=0.05,
127
+ label="CFG/Pace",
128
+ value=0.5
129
+ )
130
+
131
+ with gr.Accordion("βš™οΈ Advanced Options", open=False):
132
+ chunk_size = gr.Slider(
133
+ 100, 400, step=25,
134
+ label="Chunk Size (characters per chunk for long text)",
135
+ value=250
136
+ )
137
+ seed_num = gr.Number(
138
+ value=0,
139
+ label="Random seed (0 for random)",
140
+ precision=0
141
+ )
142
+ temp = gr.Slider(
143
+ 0.05, 5, step=0.05,
144
+ label="Temperature",
145
+ value=0.8
146
+ )
147
+
148
+ run_btn = gr.Button("🎡 Generate Speech", variant="primary", size="lg")
149
+
150
+ with gr.Column():
151
+ audio_output = gr.Audio(label="Generated Speech")
152
+
153
+ gr.HTML("""
154
+ <div class="info-box chunking-info">
155
+ <h4>πŸ“ Text Chunking Info</h4>
156
+ <p><strong>Smart Chunking:</strong> Long text is automatically split at sentence boundaries</p>
157
+ <p><strong>Chunk Processing:</strong> Each chunk generates separate audio, then concatenated</p>
158
+ <p><strong>Silence Gaps:</strong> 0.3s silence added between chunks for natural flow</p>
159
+ </div>
160
+ """)
161
+
162
+ # System info
163
+ gr.HTML(f"""
164
+ <div class="info-box system-info">
165
+ <h4>πŸ’» System Status</h4>
166
+ <p><strong>Device:</strong> {DEVICE.upper()} {'🍎' if torch.backends.mps.is_available() else 'πŸ’»'}</p>
167
+ <p><strong>PyTorch:</strong> {torch.__version__}</p>
168
+ <p><strong>MPS Available:</strong> {'βœ… Yes' if torch.backends.mps.is_available() else '❌ No'}</p>
169
+ <p><strong>Compatibility:</strong> CPU mode for stability</p>
170
+ </div>
171
+ """)
172
+
173
+ # Connect the interface
174
+ run_btn.click(
175
+ fn=gradio_generate_audio,
176
+ inputs=[
177
+ text,
178
+ ref_wav,
179
+ exaggeration,
180
+ temp,
181
+ seed_num,
182
+ cfg_weight,
183
+ chunk_size,
184
+ ],
185
+ outputs=[audio_output],
186
+ show_progress=True
187
+ )
188
+
189
+ # Example texts
190
+ gr.Examples(
191
+ examples=[
192
+ ["Hello! This is a test of voice cloning running on Apple Silicon."],
193
+ ["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet."],
194
+ ["Welcome to the future of voice synthesis! With Chatterbox, you can clone any voice in seconds."],
195
+ ],
196
+ inputs=[text],
197
+ label="πŸ“ Example Texts"
198
+ )
199
+
200
+ def main():
201
+ """Launch the Gradio interface"""
202
+ try:
203
+ print("🍎 Starting Chatterbox-TTS Gradio Interface")
204
+ print(f"Device: {DEVICE}")
205
+
206
+ # Pre-load model
207
+ print("Loading model...")
208
+ get_or_load_model()
209
+ print("βœ… Model loaded!")
210
+
211
+ # Launch interface
212
+ demo.launch(
213
+ server_name="127.0.0.1",
214
+ server_port=7861,
215
+ share=False,
216
+ debug=True,
217
+ show_error=True
218
+ )
219
+
220
+ except ImportError as e:
221
+ print("❌ Missing dependency!")
222
+ print("Install with: pip install gradio")
223
+ print("Then run: python app_gradio.py")
224
+ except Exception as e:
225
+ print(f"❌ Error: {e}")
226
+
227
+ if __name__ == "__main__":
228
+ main()