Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -38,12 +38,6 @@ OUTPUT_DIR = "outputs"
|
|
38 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
39 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
40 |
|
41 |
-
# Initialize TTS model
|
42 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
43 |
-
|
44 |
-
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
45 |
-
logger.info("TTS model initialized on %s", device)
|
46 |
-
|
47 |
# Define Pydantic model for slide data
|
48 |
class Slide(BaseModel):
|
49 |
title: str
|
@@ -180,7 +174,7 @@ async def validate_and_convert_speaker_audio(speaker_audio):
|
|
180 |
return None
|
181 |
|
182 |
# Helper function to generate audio using Coqui TTS API
|
183 |
-
def generate_xtts_audio(text, speaker_wav, output_path):
|
184 |
if not tts:
|
185 |
logger.error("TTS model not initialized")
|
186 |
return False
|
@@ -275,22 +269,13 @@ def extract_json_from_message(message):
|
|
275 |
logger.warning("Unsupported message type for JSON extraction: %s", type(message))
|
276 |
return None
|
277 |
|
278 |
-
# Function to generate Markdown and convert to PDF (
|
279 |
def generate_slides_pdf(slides):
|
280 |
pdf = MarkdownPdf()
|
281 |
-
# Add LaTeX preamble for landscape orientation
|
282 |
-
preamble = r"""
|
283 |
-
\usepackage{pdflscape}
|
284 |
-
\newcommand{\blandscape}{\begin{landscape}}
|
285 |
-
\newcommand{\elandscape}{\end{landscape}}
|
286 |
-
"""
|
287 |
-
pdf.set_preamble(preamble)
|
288 |
|
289 |
for slide in slides:
|
290 |
content_lines = slide['content'].replace('\n', '\n\n')
|
291 |
markdown_content = f"""
|
292 |
-
\\blandscape
|
293 |
-
|
294 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
|
295 |
# {slide['title']}
|
296 |
|
@@ -300,8 +285,6 @@ def generate_slides_pdf(slides):
|
|
300 |
{content_lines}
|
301 |
</div>
|
302 |
|
303 |
-
\\elandscape
|
304 |
-
|
305 |
---
|
306 |
"""
|
307 |
pdf.add_section(Section(markdown_content, toc=False))
|
@@ -309,7 +292,7 @@ def generate_slides_pdf(slides):
|
|
309 |
pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
|
310 |
pdf.save(pdf_file)
|
311 |
|
312 |
-
logger.info("Generated PDF slides (
|
313 |
return pdf_file
|
314 |
|
315 |
# Async function to update audio preview
|
@@ -330,11 +313,19 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
|
|
330 |
"""
|
331 |
return
|
332 |
|
333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
yield f"""
|
335 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
336 |
-
<h2 style="color: #d9534f;">TTS model
|
337 |
-
<p style="margin-top: 20px;">
|
|
|
338 |
</div>
|
339 |
"""
|
340 |
return
|
@@ -619,7 +610,7 @@ Example for 1 content slide:
|
|
619 |
current_text = ". ".join(sentences) + "."
|
620 |
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
621 |
|
622 |
-
success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
|
623 |
if not success:
|
624 |
raise RuntimeError("TTS generation failed")
|
625 |
|
|
|
38 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
39 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
# Define Pydantic model for slide data
|
42 |
class Slide(BaseModel):
|
43 |
title: str
|
|
|
174 |
return None
|
175 |
|
176 |
# Helper function to generate audio using Coqui TTS API
|
177 |
+
def generate_xtts_audio(tts, text, speaker_wav, output_path):
|
178 |
if not tts:
|
179 |
logger.error("TTS model not initialized")
|
180 |
return False
|
|
|
269 |
logger.warning("Unsupported message type for JSON extraction: %s", type(message))
|
270 |
return None
|
271 |
|
272 |
+
# Function to generate Markdown and convert to PDF (portrait, centered)
|
273 |
def generate_slides_pdf(slides):
|
274 |
pdf = MarkdownPdf()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
276 |
for slide in slides:
|
277 |
content_lines = slide['content'].replace('\n', '\n\n')
|
278 |
markdown_content = f"""
|
|
|
|
|
279 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
|
280 |
# {slide['title']}
|
281 |
|
|
|
285 |
{content_lines}
|
286 |
</div>
|
287 |
|
|
|
|
|
288 |
---
|
289 |
"""
|
290 |
pdf.add_section(Section(markdown_content, toc=False))
|
|
|
292 |
pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
|
293 |
pdf.save(pdf_file)
|
294 |
|
295 |
+
logger.info("Generated PDF slides (portrait): %s", pdf_file)
|
296 |
return pdf_file
|
297 |
|
298 |
# Async function to update audio preview
|
|
|
313 |
"""
|
314 |
return
|
315 |
|
316 |
+
# Initialize TTS model
|
317 |
+
tts = None
|
318 |
+
try:
|
319 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
320 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
321 |
+
logger.info("TTS model initialized on %s", device)
|
322 |
+
except Exception as e:
|
323 |
+
logger.error("Failed to initialize TTS model: %s", str(e))
|
324 |
yield f"""
|
325 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
326 |
+
<h2 style="color: #d9534f;">TTS model initialization failed</h2>
|
327 |
+
<p style="margin-top: 20px;">Error: {str(e)}</p>
|
328 |
+
<p>Please ensure the Coqui TTS model is properly installed and try again.</p>
|
329 |
</div>
|
330 |
"""
|
331 |
return
|
|
|
610 |
current_text = ". ".join(sentences) + "."
|
611 |
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
612 |
|
613 |
+
success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
|
614 |
if not success:
|
615 |
raise RuntimeError("TTS generation failed")
|
616 |
|