hari7261 commited on
Commit
a3d1b01
·
verified ·
1 Parent(s): 9fec753

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +656 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,656 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import google.generativeai as genai
3
+ from gtts import gTTS
4
+ import pyttsx3
5
+ from pathlib import Path
6
+ import tempfile
7
+ import os
8
+ from uuid import uuid4
9
+ import time
10
+ import asyncio
11
+ import edge_tts
12
+ import numpy as np
13
+ import soundfile as sf
14
+ import re
15
+
16
+ # Voice configurations for different speakers
17
+ VOICE_CONFIGS = {
18
+ "2_speakers": [
19
+ {"name": "Alex", "voice": "en-US-AriaNeural", "gender": "female"},
20
+ {"name": "Brian", "voice": "en-US-GuyNeural", "gender": "male"}
21
+ ],
22
+ "3_speakers": [
23
+ {"name": "Sarah", "voice": "en-US-JennyNeural", "gender": "female"},
24
+ {"name": "Mike", "voice": "en-US-BrandonNeural", "gender": "male"},
25
+ {"name": "Emma", "voice": "en-US-AriaNeural", "gender": "female"}
26
+ ],
27
+ "4_speakers": [
28
+ {"name": "Sarah", "voice": "en-US-JennyNeural", "gender": "female"},
29
+ {"name": "Mike", "voice": "en-US-BrandonNeural", "gender": "male"},
30
+ {"name": "Emma", "voice": "en-US-AriaNeural", "gender": "female"},
31
+ {"name": "David", "voice": "en-US-GuyNeural", "gender": "male"}
32
+ ]
33
+ }
34
+
35
+ # Initialize Gemini client
36
+ client = None
37
+
38
+ def init_gemini(api_key):
39
+ """Initialize Gemini client with API key"""
40
+ global client
41
+ if api_key:
42
+ try:
43
+ genai.configure(api_key=api_key)
44
+ client = genai.GenerativeModel('gemini-2.0-flash')
45
+ return "✅ Gemini API connected successfully!"
46
+ except Exception as e:
47
+ return f"❌ Gemini API error: {str(e)}"
48
+ return "ℹ️ Add Gemini API key for better summaries"
49
+
50
+ def generate_with_gtts(text, filename):
51
+ """Generate speech using Google's gTTS"""
52
+ try:
53
+ tts = gTTS(text=text, lang='en', slow=False)
54
+ tts.save(filename)
55
+ return filename, None
56
+ except Exception as e:
57
+ return None, f"gTTS Error: {str(e)}"
58
+
59
+ async def generate_with_edge_tts(text, voice, filename):
60
+ """Generate speech using Microsoft Edge TTS with specific voice"""
61
+ try:
62
+ communicate = edge_tts.Communicate(text, voice)
63
+ await communicate.save(filename)
64
+ return filename, None
65
+ except Exception as e:
66
+ return None, f"Edge TTS Error: {str(e)}"
67
+
68
+ def combine_audio_files(audio_files, output_filename):
69
+ """Combine multiple audio files into one"""
70
+ try:
71
+ from scipy.signal import resample
72
+ combined_audio = []
73
+ sample_rate = None
74
+
75
+ for audio_file in audio_files:
76
+ if os.path.exists(audio_file):
77
+ data, sr = sf.read(audio_file)
78
+ if sample_rate is None:
79
+ sample_rate = sr
80
+ elif sr != sample_rate:
81
+ # Resample if needed
82
+ data = resample(data, int(len(data) * sample_rate / sr))
83
+
84
+ combined_audio.append(data)
85
+ # Add small pause between speakers
86
+ pause = np.zeros(int(sample_rate * 0.5)) # 0.5 second pause
87
+ combined_audio.append(pause)
88
+
89
+ if combined_audio:
90
+ final_audio = np.concatenate(combined_audio)
91
+ sf.write(output_filename, final_audio, sample_rate)
92
+ return output_filename, None
93
+ else:
94
+ return None, "No audio files to combine"
95
+ except Exception as e:
96
+ return None, f"Audio combination error: {str(e)}"
97
+
98
+ async def generate_multi_speaker_audio(script_parts, speaker_count, output_filename):
99
+ """Generate multi-speaker podcast audio"""
100
+ try:
101
+ voice_config = VOICE_CONFIGS[f"{speaker_count}_speakers"]
102
+ audio_files = []
103
+
104
+ for i, (speaker_text, speaker_idx) in enumerate(script_parts):
105
+ voice = voice_config[speaker_idx]["voice"]
106
+ temp_filename = f"temp_speaker_{i}_{uuid4().hex[:8]}.wav"
107
+
108
+ result, error = await generate_with_edge_tts(speaker_text, voice, temp_filename)
109
+ if result:
110
+ audio_files.append(temp_filename)
111
+ else:
112
+ # Cleanup and return error
113
+ for f in audio_files:
114
+ try:
115
+ os.unlink(f)
116
+ except:
117
+ pass
118
+ return None, f"Error generating voice {i+1}: {error}"
119
+
120
+ # Combine all audio files
121
+ final_file, error = combine_audio_files(audio_files, output_filename)
122
+
123
+ # Cleanup temp files
124
+ for f in audio_files:
125
+ try:
126
+ os.unlink(f)
127
+ except:
128
+ pass
129
+
130
+ return final_file, error
131
+ except Exception as e:
132
+ return None, f"Multi-speaker generation error: {str(e)}"
133
+
134
+ def generate_with_pyttsx3(text, filename):
135
+ """Generate speech using system's TTS engine"""
136
+ try:
137
+ engine = pyttsx3.init()
138
+
139
+ # Set properties for better audio quality
140
+ engine.setProperty('rate', 180)
141
+ engine.setProperty('volume', 0.9)
142
+
143
+ # Try to find a good voice
144
+ voices = engine.getProperty('voices')
145
+ for voice in voices:
146
+ if 'female' in voice.name.lower() or 'zira' in voice.name.lower():
147
+ engine.setProperty('voice', voice.id)
148
+ break
149
+
150
+ engine.save_to_file(text, filename)
151
+ engine.runAndWait()
152
+ return filename, None
153
+ except Exception as e:
154
+ return None, f"pyttsx3 Error: {str(e)}"
155
+
156
+ def generate_podcast_script(text, speaker_count, use_gemini):
157
+ """Generate a podcast script with multiple speakers"""
158
+ if use_gemini and client:
159
+ try:
160
+ voice_config = VOICE_CONFIGS[f"{speaker_count}_speakers"]
161
+ speaker_names = [config["name"] for config in voice_config]
162
+
163
+ prompt = f"""Create an engaging podcast conversation between {speaker_count} hosts: {', '.join(speaker_names)}.
164
+
165
+ Transform this text into a natural conversation where each speaker contributes meaningfully.
166
+
167
+ Guidelines:
168
+ - Make it sound like a real podcast discussion
169
+ - Each speaker should have distinct perspectives and speaking styles
170
+ - Include natural transitions and interactions
171
+ - Keep it under 2500 characters total
172
+ - Use speaker names clearly (e.g., "Sarah: Hello everyone...")
173
+ - Make it conversational and engaging
174
+
175
+ Original text: {text[:3000]}
176
+
177
+ Format the output with clear speaker labels like:
178
+ Speaker1: [text]
179
+ Speaker2: [text]
180
+ etc."""
181
+
182
+ response = client.generate_content(prompt)
183
+ return response.text
184
+ except Exception as e:
185
+ # Fallback to simple script
186
+ return f"Error generating script: {str(e)}"
187
+ else:
188
+ # Simple fallback for single speaker
189
+ return text[:2000] + ("..." if len(text) > 2000 else "")
190
+
191
+ def parse_script_for_speakers(script, speaker_count):
192
+ """Parse the script to extract speaker parts"""
193
+ try:
194
+ voice_config = VOICE_CONFIGS[f"{speaker_count}_speakers"]
195
+ speaker_names = [config["name"] for config in voice_config]
196
+
197
+ # Split script by speaker patterns
198
+ parts = []
199
+ lines = script.split('\n')
200
+ current_speaker = 0
201
+ current_text = ""
202
+
203
+ for line in lines:
204
+ line = line.strip()
205
+ if not line:
206
+ continue
207
+
208
+ # Check if line starts with a speaker name
209
+ speaker_found = False
210
+ for i, name in enumerate(speaker_names):
211
+ if line.lower().startswith(f"{name.lower()}:"):
212
+ # Save previous speaker's text
213
+ if current_text.strip():
214
+ parts.append((current_text.strip(), current_speaker))
215
+ # Start new speaker
216
+ current_speaker = i
217
+ current_text = line[len(name)+1:].strip()
218
+ speaker_found = True
219
+ break
220
+
221
+ if not speaker_found:
222
+ current_text += " " + line
223
+
224
+ # Add final speaker text
225
+ if current_text.strip():
226
+ parts.append((current_text.strip(), current_speaker))
227
+
228
+ # If no speakers were found, distribute text evenly
229
+ if not parts and script.strip():
230
+ sentences = script.split('. ')
231
+ sentences_per_speaker = max(1, len(sentences) // speaker_count)
232
+
233
+ for i in range(speaker_count):
234
+ start_idx = i * sentences_per_speaker
235
+ if i == speaker_count - 1: # Last speaker gets remaining sentences
236
+ speaker_sentences = sentences[start_idx:]
237
+ else:
238
+ speaker_sentences = sentences[start_idx:start_idx + sentences_per_speaker]
239
+
240
+ if speaker_sentences:
241
+ speaker_text = '. '.join(speaker_sentences)
242
+ if not speaker_text.endswith('.'):
243
+ speaker_text += '.'
244
+ parts.append((speaker_text, i))
245
+
246
+ return parts
247
+ except Exception as e:
248
+ # Fallback: single speaker
249
+ return [(script, 0)]
250
+
251
+ def create_podcast(text, use_gemini, tts_engine, speaker_count, progress=gr.Progress()):
252
+ """Main function to create podcast from text with multiple speakers"""
253
+ progress(0.1, "Starting processing...")
254
+
255
+ if not text.strip():
256
+ return None, "❌ Please enter some text first!", ""
257
+
258
+ # Step 1: Generate script using Gemini or use raw text
259
+ progress(0.3, "Generating podcast script...")
260
+
261
+ podcast_script = generate_podcast_script(text, speaker_count, use_gemini)
262
+
263
+ progress(0.5, "Parsing script for speakers...")
264
+
265
+ # Parse script for multiple speakers
266
+ script_parts = parse_script_for_speakers(podcast_script, speaker_count)
267
+
268
+ progress(0.6, "Generating audio with multiple voices...")
269
+
270
+ # Step 2: Generate audio
271
+ try:
272
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
273
+ temp_filename = tmp_file.name
274
+
275
+ if tts_engine == "Multi-Speaker (Edge TTS - Best Quality)" and speaker_count > 1:
276
+ # Use Edge TTS for multi-speaker
277
+ loop = asyncio.new_event_loop()
278
+ asyncio.set_event_loop(loop)
279
+ try:
280
+ audio_file, error = loop.run_until_complete(
281
+ generate_multi_speaker_audio(script_parts, speaker_count, temp_filename)
282
+ )
283
+ finally:
284
+ loop.close()
285
+ elif tts_engine == "gTTS (Online - Single Voice)":
286
+ # Use gTTS for single voice
287
+ full_text = " ".join([part[0] for part in script_parts])
288
+ audio_file, error = generate_with_gtts(full_text, temp_filename)
289
+ else:
290
+ # Use pyttsx3 for offline single voice
291
+ full_text = " ".join([part[0] for part in script_parts])
292
+ audio_file, error = generate_with_pyttsx3(full_text, temp_filename)
293
+
294
+ if error:
295
+ return None, f"❌ {error}", ""
296
+
297
+ progress(0.9, "Finalizing...")
298
+
299
+ # Read the generated audio file
300
+ with open(audio_file, 'rb') as f:
301
+ audio_data = f.read()
302
+
303
+ # Clean up
304
+ try:
305
+ os.unlink(audio_file)
306
+ except:
307
+ pass
308
+
309
+ progress(1.0, "Complete!")
310
+
311
+ return audio_data, "✅ Podcast generated successfully!", podcast_script
312
+
313
+ except Exception as e:
314
+ return None, f"❌ Audio generation failed: {str(e)}", ""
315
+
316
+ # Custom CSS for better styling
317
+ css = """
318
+ .gradio-container {
319
+ max-width: 900px !important;
320
+ margin: 0 auto !important;
321
+ }
322
+ .container {
323
+ padding: 20px;
324
+ }
325
+ .header {
326
+ text-align: center;
327
+ margin-bottom: 30px;
328
+ }
329
+ .header h1 {
330
+ color: #2563eb;
331
+ font-size: 2.5em;
332
+ margin-bottom: 10px;
333
+ }
334
+ .header p {
335
+ color: #6b7280;
336
+ font-size: 1.1em;
337
+ }
338
+ .section {
339
+ background: white;
340
+ padding: 25px;
341
+ border-radius: 12px;
342
+ margin-bottom: 20px;
343
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
344
+ }
345
+ .section h2 {
346
+ color: #374151;
347
+ margin-bottom: 15px;
348
+ font-size: 1.4em;
349
+ }
350
+ .input-text {
351
+ min-height: 200px;
352
+ resize: vertical;
353
+ }
354
+ .output-audio {
355
+ text-align: center;
356
+ }
357
+ .output-script {
358
+ background: #f8fafc;
359
+ padding: 20px;
360
+ border-radius: 8px;
361
+ border-left: 4px solid #2563eb;
362
+ max-height: 300px;
363
+ overflow-y: auto;
364
+ }
365
+
366
+ .speaker-info {
367
+ background: linear-gradient(135deg, #ffeaa7 0%, #fab1a0 100%);
368
+ padding: 15px;
369
+ border-radius: 8px;
370
+ margin: 10px 0;
371
+ border: 1px solid #fdcb6e;
372
+ font-weight: bold;
373
+ }
374
+
375
+ .status-message {
376
+ padding: 15px;
377
+ border-radius: 8px;
378
+ font-weight: bold;
379
+ margin: 10px 0;
380
+ }
381
+
382
+ .status-success {
383
+ background-color: #d4edda;
384
+ color: #155724;
385
+ border: 1px solid #c3e6cb;
386
+ }
387
+
388
+ .status-error {
389
+ background-color: #f8d7da;
390
+ color: #721c24;
391
+ border: 1px solid #f5c6cb;
392
+ }
393
+
394
+ .status-info {
395
+ background-color: #cce7ff;
396
+ color: #004085;
397
+ border: 1px solid #99d3ff;
398
+ }
399
+ .instructions {
400
+ background: #f0f9ff;
401
+ padding: 20px;
402
+ border-radius: 8px;
403
+ border-left: 4px solid #0ea5e9;
404
+ }
405
+ .instructions h3 {
406
+ color: #0369a1;
407
+ margin-bottom: 10px;
408
+ }
409
+ .btn-generate {
410
+ background: linear-gradient(135deg, #2563eb, #1d4ed8) !important;
411
+ color: white !important;
412
+ font-weight: bold !important;
413
+ padding: 12px 24px !important;
414
+ border-radius: 8px !important;
415
+ }
416
+ .btn-generate:hover {
417
+ background: linear-gradient(135deg, #1d4ed8, #1e40af) !important;
418
+ }
419
+ .status-message {
420
+ padding: 15px;
421
+ border-radius: 8px;
422
+ margin: 10px 0;
423
+ }
424
+ .status-success {
425
+ background: #dcfce7;
426
+ color: #166534;
427
+ border-left: 4px solid #22c55e;
428
+ }
429
+ .status-error {
430
+ background: #fee2e2;
431
+ color: #991b1b;
432
+ border-left: 4px solid #ef4444;
433
+ }
434
+ .status-info {
435
+ background: #dbeafe;
436
+ color: #1e40af;
437
+ border-left: 4px solid #3b82f6;
438
+ }
439
+ """
440
+
441
+ # Create the Gradio interface
442
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
443
+ with gr.Column(elem_classes="container"):
444
+ # Header
445
+ with gr.Column(elem_classes="header"):
446
+ gr.Markdown("# 🎙️ Blog to Podcast Converter")
447
+ gr.Markdown("Transform your text into engaging podcast audio using AI")
448
+
449
+ # API Section
450
+ with gr.Column(elem_classes="section"):
451
+ gr.Markdown("## 🔑 API Configuration")
452
+ api_key = gr.Textbox(
453
+ label="Gemini API Key (Optional)",
454
+ type="password",
455
+ placeholder="Enter your Google Gemini API key for better summaries...",
456
+ info="Get a free key from https://aistudio.google.com/"
457
+ )
458
+ api_status = gr.Textbox(
459
+ label="API Status",
460
+ interactive=False,
461
+ value="ℹ️ Add Gemini API key for AI-powered summaries"
462
+ )
463
+ api_key.change(init_gemini, inputs=api_key, outputs=api_status)
464
+
465
+ # Input Section
466
+ with gr.Column(elem_classes="section"):
467
+ gr.Markdown("## 📝 Input Text")
468
+ input_text = gr.Textbox(
469
+ label="Paste your blog post or article text",
470
+ placeholder="Enter your text here... (2000+ characters works best)",
471
+ lines=8,
472
+ elem_classes="input-text"
473
+ )
474
+
475
+ # Configuration Section
476
+ with gr.Column(elem_classes="section"):
477
+ gr.Markdown("## ⚙️ Podcast Configuration")
478
+
479
+ with gr.Row():
480
+ speaker_count = gr.Radio(
481
+ label="Number of Speakers",
482
+ choices=[1, 2, 3, 4],
483
+ value=2,
484
+ info="Choose how many voices/speakers for your podcast"
485
+ )
486
+
487
+ use_gemini = gr.Checkbox(
488
+ label="Use AI for better summaries",
489
+ value=True,
490
+ info="Requires valid Gemini API key above"
491
+ )
492
+
493
+ tts_engine = gr.Radio(
494
+ label="Voice Engine",
495
+ choices=[
496
+ "Multi-Speaker (Edge TTS - Best Quality)",
497
+ "gTTS (Online - Single Voice)",
498
+ "pyttsx3 (Offline - Single Voice)"
499
+ ],
500
+ value="Multi-Speaker (Edge TTS - Best Quality)",
501
+ info="Edge TTS provides realistic multi-speaker conversations"
502
+ )
503
+
504
+ # Generate Button
505
+ generate_btn = gr.Button(
506
+ "🎙️ Generate Podcast",
507
+ elem_classes="btn-generate",
508
+ size="lg"
509
+ )
510
+
511
+ # Output Section
512
+ with gr.Column(elem_classes="section"):
513
+ gr.Markdown("## 🎧 Generated Podcast")
514
+
515
+ # Status message
516
+ status_msg = gr.HTML(
517
+ value="<div class='status-message status-info'>Ready to generate podcast...</div>"
518
+ )
519
+
520
+ # Audio output with download
521
+ with gr.Row():
522
+ audio_output = gr.Audio(
523
+ label="Generated Podcast",
524
+ type="filepath",
525
+ visible=False
526
+ )
527
+ download_btn = gr.DownloadButton(
528
+ "⬇️ Download Podcast",
529
+ visible=False,
530
+ variant="secondary"
531
+ )
532
+
533
+ # Speaker info display
534
+ speaker_info = gr.HTML(
535
+ value="",
536
+ visible=False
537
+ )
538
+
539
+ # Script output
540
+ script_output = gr.Textbox(
541
+ label="Podcast Script",
542
+ visible=False,
543
+ lines=8,
544
+ elem_classes="output-script"
545
+ )
546
+
547
+ # Instructions
548
+ with gr.Column(elem_classes="instructions"):
549
+ gr.Markdown("### ℹ️ How to Use")
550
+ gr.Markdown("""
551
+ 1. **Optional**: Enter your Gemini API key for AI-powered conversation generation
552
+ 2. **Paste your text** in the input box (articles, blogs, etc.)
553
+ 3. **Choose number of speakers** (1-4) for different conversation styles
554
+ 4. **Select voice engine**:
555
+ - Multi-Speaker Edge TTS (best quality, realistic voices)
556
+ - gTTS (single voice, good quality)
557
+ - pyttsx3 (offline, system voice)
558
+ 5. **Click Generate Podcast** and wait for processing
559
+ 6. **Listen and download** your podcast!
560
+
561
+ **Speaker Configurations**:
562
+ - **1 Speaker**: Solo narration
563
+ - **2 Speakers**: Host conversation (Alex & Brian)
564
+ - **3 Speakers**: Panel discussion (Sarah, Mike & Emma)
565
+ - **4 Speakers**: Full roundtable (Sarah, Mike, Emma & David)
566
+
567
+ **Tips**:
568
+ - For best results, use 500-3000 characters of text
569
+ - Multi-speaker works best with Gemini AI enabled
570
+ - Edge TTS provides the most realistic conversations
571
+ """)
572
+
573
+ def get_speaker_info(speaker_count):
574
+ """Get speaker information for display"""
575
+ if speaker_count == 1:
576
+ return "<div class='speaker-info'><b>Single Speaker Mode</b><br/>Solo narration with one voice</div>"
577
+
578
+ voice_config = VOICE_CONFIGS[f"{speaker_count}_speakers"]
579
+ speakers_html = "<div class='speaker-info'><b>Speakers in this podcast:</b><br/>"
580
+
581
+ for i, config in enumerate(voice_config):
582
+ speakers_html += f"🎤 <b>{config['name']}</b> ({config['gender']} voice)<br/>"
583
+
584
+ speakers_html += "</div>"
585
+ return speakers_html
586
+
587
+ # Event handlers
588
+ def update_status(message, success=True):
589
+ """Update status message with appropriate styling"""
590
+ status_class = "status-success" if success else "status-error"
591
+ if "Ready" in message or "ℹ️" in message:
592
+ status_class = "status-info"
593
+ return f"<div class='status-message {status_class}'>{message}</div>"
594
+
595
+ def generate_podcast_wrapper(text, use_gemini, tts_engine, speaker_count, progress=gr.Progress()):
596
+ """Wrapper function for podcast generation"""
597
+ audio_data, message, script = create_podcast(text, use_gemini, tts_engine, speaker_count, progress)
598
+
599
+ status_html = update_status(message, success=audio_data is not None)
600
+
601
+ outputs = [status_html]
602
+ if audio_data:
603
+ # Save audio to temporary file for playback and download
604
+ filename = f"podcast_{speaker_count}speakers_{uuid4().hex[:8]}.wav"
605
+ filepath = os.path.join(tempfile.gettempdir(), filename)
606
+
607
+ with open(filepath, 'wb') as f:
608
+ f.write(audio_data)
609
+
610
+ # Get speaker info
611
+ speaker_info_html = get_speaker_info(speaker_count)
612
+
613
+ outputs.extend([filepath, filepath, speaker_info_html, script])
614
+ else:
615
+ outputs.extend([None, None, "", script])
616
+
617
+ return outputs
618
+
619
+ # Connect the button click event
620
+ generate_btn.click(
621
+ fn=generate_podcast_wrapper,
622
+ inputs=[input_text, use_gemini, tts_engine, speaker_count],
623
+ outputs=[status_msg, audio_output, download_btn, speaker_info, script_output]
624
+ )
625
+
626
+ # Update speaker info when speaker count changes
627
+ speaker_count.change(
628
+ fn=get_speaker_info,
629
+ inputs=speaker_count,
630
+ outputs=speaker_info
631
+ )
632
+
633
+ # Show/hide outputs based on results
634
+ def toggle_visibility(audio_data):
635
+ has_audio = audio_data is not None
636
+ return (
637
+ gr.Audio(visible=has_audio),
638
+ gr.DownloadButton(visible=has_audio),
639
+ gr.HTML(visible=has_audio),
640
+ gr.Textbox(visible=has_audio)
641
+ )
642
+
643
+ audio_output.change(
644
+ fn=toggle_visibility,
645
+ inputs=audio_output,
646
+ outputs=[audio_output, download_btn, speaker_info, script_output]
647
+ )
648
+
649
+ # Launch the application
650
+ if __name__ == "__main__":
651
+ demo.launch(
652
+ server_name="0.0.0.0",
653
+ server_port=7860,
654
+ share=False,
655
+ show_error=True
656
+ )
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ gtts
3
+ pyttsx3
4
+ requests
5
+ uuid
6
+ google-generativeai
7
+ edge-tts
8
+ soundfile
9
+ numpy
10
+ scipy