Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ import gradio as gr
|
|
5 |
import asyncio
|
6 |
import logging
|
7 |
import torch
|
8 |
-
import pypandoc
|
9 |
from serpapi import GoogleSearch
|
10 |
from pydantic import BaseModel
|
11 |
from autogen_agentchat.agents import AssistantAgent
|
@@ -16,6 +15,7 @@ from autogen_agentchat.messages import TextMessage, HandoffMessage, StructuredMe
|
|
16 |
from autogen_ext.models.anthropic import AnthropicChatCompletionClient
|
17 |
from autogen_ext.models.openai import OpenAIChatCompletionClient
|
18 |
from autogen_ext.models.ollama import OllamaChatCompletionClient
|
|
|
19 |
import traceback
|
20 |
import soundfile as sf
|
21 |
import tempfile
|
@@ -38,6 +38,12 @@ OUTPUT_DIR = "outputs"
|
|
38 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
39 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
# Define Pydantic model for slide data
|
42 |
class Slide(BaseModel):
|
43 |
title: str
|
@@ -174,7 +180,7 @@ async def validate_and_convert_speaker_audio(speaker_audio):
|
|
174 |
return None
|
175 |
|
176 |
# Helper function to generate audio using Coqui TTS API
|
177 |
-
def generate_xtts_audio(
|
178 |
if not tts:
|
179 |
logger.error("TTS model not initialized")
|
180 |
return False
|
@@ -271,70 +277,39 @@ def extract_json_from_message(message):
|
|
271 |
|
272 |
# Function to generate Markdown and convert to PDF (landscape, centered)
|
273 |
def generate_slides_pdf(slides):
|
274 |
-
|
275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
for slide in slides:
|
277 |
content_lines = slide['content'].replace('\n', '\n\n')
|
278 |
-
|
279 |
\\blandscape
|
280 |
|
|
|
281 |
# {slide['title']}
|
282 |
|
283 |
*Prof. AI Feynman*
|
284 |
*Princeton University, April 26th, 2025*
|
285 |
|
286 |
{content_lines}
|
|
|
287 |
|
288 |
\\elandscape
|
289 |
|
290 |
---
|
291 |
"""
|
292 |
-
markdown_content
|
293 |
-
|
294 |
-
# Write Markdown to a temporary file
|
295 |
-
md_file = os.path.join(OUTPUT_DIR, "slides.md")
|
296 |
-
with open(md_file, "w", encoding="utf-8") as f:
|
297 |
-
f.write(markdown_content)
|
298 |
|
299 |
-
# Define LaTeX preamble
|
300 |
-
preamble = r"""
|
301 |
-
\documentclass{article}
|
302 |
-
\usepackage{pdflscape}
|
303 |
-
\newcommand{\blandscape}{\begin{landscape}}
|
304 |
-
\newcommand{\elandscape}{\end{landscape}}
|
305 |
-
\usepackage{geometry}
|
306 |
-
\geometry{a4paper, margin=1in}
|
307 |
-
\begin{document}
|
308 |
-
"""
|
309 |
-
# Write preamble to a temporary LaTeX file
|
310 |
-
preamble_file = os.path.join(OUTPUT_DIR, "preamble.tex")
|
311 |
-
with open(preamble_file, "w", encoding="utf-8") as f:
|
312 |
-
f.write(preamble)
|
313 |
-
|
314 |
-
# Convert Markdown to PDF using pypandoc
|
315 |
pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
|
316 |
-
|
317 |
-
pypandoc.convert_file(
|
318 |
-
md_file,
|
319 |
-
to='pdf',
|
320 |
-
outputfile=pdf_file,
|
321 |
-
extra_args=[
|
322 |
-
'--include-in-header', preamble_file,
|
323 |
-
'--pdf-engine=pdflatex',
|
324 |
-
'-V', 'geometry:a4paper,margin=1in',
|
325 |
-
'--variable', 'documentclass:article'
|
326 |
-
]
|
327 |
-
)
|
328 |
-
logger.info("Generated PDF slides (landscape): %s", pdf_file)
|
329 |
-
except Exception as e:
|
330 |
-
logger.error("Failed to generate PDF: %s", str(e))
|
331 |
-
raise
|
332 |
-
|
333 |
-
# Clean up temporary files
|
334 |
-
for temp_file in [md_file, preamble_file]:
|
335 |
-
if os.path.exists(temp_file):
|
336 |
-
os.remove(temp_file)
|
337 |
|
|
|
338 |
return pdf_file
|
339 |
|
340 |
# Async function to update audio preview
|
@@ -355,19 +330,11 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
|
|
355 |
"""
|
356 |
return
|
357 |
|
358 |
-
|
359 |
-
tts = None
|
360 |
-
try:
|
361 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
362 |
-
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
363 |
-
logger.info("TTS model initialized on %s", device)
|
364 |
-
except Exception as e:
|
365 |
-
logger.error("Failed to initialize TTS model: %s", str(e))
|
366 |
yield f"""
|
367 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
368 |
-
<h2 style="color: #d9534f;">TTS model
|
369 |
-
<p style="margin-top: 20px;">
|
370 |
-
<p>Please ensure the Coqui TTS model is properly installed and try again.</p>
|
371 |
</div>
|
372 |
"""
|
373 |
return
|
@@ -652,7 +619,7 @@ Example for 1 content slide:
|
|
652 |
current_text = ". ".join(sentences) + "."
|
653 |
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
654 |
|
655 |
-
success = generate_xtts_audio(
|
656 |
if not success:
|
657 |
raise RuntimeError("TTS generation failed")
|
658 |
|
@@ -668,7 +635,7 @@ Example for 1 content slide:
|
|
668 |
except Exception as e:
|
669 |
logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
|
670 |
if attempt == max_retries:
|
671 |
-
logger.error("Max retries
|
672 |
audio_files.append(None)
|
673 |
break
|
674 |
|
|
|
5 |
import asyncio
|
6 |
import logging
|
7 |
import torch
|
|
|
8 |
from serpapi import GoogleSearch
|
9 |
from pydantic import BaseModel
|
10 |
from autogen_agentchat.agents import AssistantAgent
|
|
|
15 |
from autogen_ext.models.anthropic import AnthropicChatCompletionClient
|
16 |
from autogen_ext.models.openai import OpenAIChatCompletionClient
|
17 |
from autogen_ext.models.ollama import OllamaChatCompletionClient
|
18 |
+
from markdown_pdf import MarkdownPdf, Section
|
19 |
import traceback
|
20 |
import soundfile as sf
|
21 |
import tempfile
|
|
|
38 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
39 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
40 |
|
41 |
+
# Initialize TTS model
|
42 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
43 |
+
|
44 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
45 |
+
logger.info("TTS model initialized on %s", device)
|
46 |
+
|
47 |
# Define Pydantic model for slide data
|
48 |
class Slide(BaseModel):
|
49 |
title: str
|
|
|
180 |
return None
|
181 |
|
182 |
# Helper function to generate audio using Coqui TTS API
|
183 |
+
def generate_xtts_audio(text, speaker_wav, output_path):
|
184 |
if not tts:
|
185 |
logger.error("TTS model not initialized")
|
186 |
return False
|
|
|
277 |
|
278 |
# Function to generate Markdown and convert to PDF (landscape, centered)
|
279 |
def generate_slides_pdf(slides):
|
280 |
+
pdf = MarkdownPdf()
|
281 |
+
# Add LaTeX preamble for landscape orientation
|
282 |
+
preamble = r"""
|
283 |
+
\usepackage{pdflscape}
|
284 |
+
\newcommand{\blandscape}{\begin{landscape}}
|
285 |
+
\newcommand{\elandscape}{\end{landscape}}
|
286 |
+
"""
|
287 |
+
pdf.set_preamble(preamble)
|
288 |
+
|
289 |
for slide in slides:
|
290 |
content_lines = slide['content'].replace('\n', '\n\n')
|
291 |
+
markdown_content = f"""
|
292 |
\\blandscape
|
293 |
|
294 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
|
295 |
# {slide['title']}
|
296 |
|
297 |
*Prof. AI Feynman*
|
298 |
*Princeton University, April 26th, 2025*
|
299 |
|
300 |
{content_lines}
|
301 |
+
</div>
|
302 |
|
303 |
\\elandscape
|
304 |
|
305 |
---
|
306 |
"""
|
307 |
+
pdf.add_section(Section(markdown_content, toc=False))
|
|
|
|
|
|
|
|
|
|
|
308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
|
310 |
+
pdf.save(pdf_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
312 |
+
logger.info("Generated PDF slides (landscape): %s", pdf_file)
|
313 |
return pdf_file
|
314 |
|
315 |
# Async function to update audio preview
|
|
|
330 |
"""
|
331 |
return
|
332 |
|
333 |
+
if not tts:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
yield f"""
|
335 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
336 |
+
<h2 style="color: #d9534f;">TTS model not initialized</h2>
|
337 |
+
<p style="margin-top: 20px;">Please ensure the Coqui TTS model is properly installed and try again.</p>
|
|
|
338 |
</div>
|
339 |
"""
|
340 |
return
|
|
|
619 |
current_text = ". ".join(sentences) + "."
|
620 |
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
621 |
|
622 |
+
success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
|
623 |
if not success:
|
624 |
raise RuntimeError("TTS generation failed")
|
625 |
|
|
|
635 |
except Exception as e:
|
636 |
logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
|
637 |
if attempt == max_retries:
|
638 |
+
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
639 |
audio_files.append(None)
|
640 |
break
|
641 |
|