Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
|
|
5 |
import asyncio
|
6 |
import logging
|
7 |
import torch
|
|
|
8 |
from serpapi import GoogleSearch
|
9 |
from pydantic import BaseModel
|
10 |
from autogen_agentchat.agents import AssistantAgent
|
@@ -15,7 +16,6 @@ from autogen_agentchat.messages import TextMessage, HandoffMessage, StructuredMe
|
|
15 |
from autogen_ext.models.anthropic import AnthropicChatCompletionClient
|
16 |
from autogen_ext.models.openai import OpenAIChatCompletionClient
|
17 |
from autogen_ext.models.ollama import OllamaChatCompletionClient
|
18 |
-
from markdown_pdf import MarkdownPdf, Section
|
19 |
import traceback
|
20 |
import soundfile as sf
|
21 |
import tempfile
|
@@ -38,11 +38,6 @@ OUTPUT_DIR = "outputs"
|
|
38 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
39 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
40 |
|
41 |
-
# Initialize TTS model
|
42 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
43 |
-
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
44 |
-
logger.info("TTS model initialized on %s", device)
|
45 |
-
|
46 |
# Define Pydantic model for slide data
|
47 |
class Slide(BaseModel):
|
48 |
title: str
|
@@ -179,7 +174,7 @@ async def validate_and_convert_speaker_audio(speaker_audio):
|
|
179 |
return None
|
180 |
|
181 |
# Helper function to generate audio using Coqui TTS API
|
182 |
-
def generate_xtts_audio(text, speaker_wav, output_path):
|
183 |
if not tts:
|
184 |
logger.error("TTS model not initialized")
|
185 |
return False
|
@@ -276,39 +271,70 @@ def extract_json_from_message(message):
|
|
276 |
|
277 |
# Function to generate Markdown and convert to PDF (landscape, centered)
|
278 |
def generate_slides_pdf(slides):
|
279 |
-
|
280 |
-
|
281 |
-
preamble = r"""
|
282 |
-
\usepackage{pdflscape}
|
283 |
-
\newcommand{\blandscape}{\begin{landscape}}
|
284 |
-
\newcommand{\elandscape}{\end{landscape}}
|
285 |
-
"""
|
286 |
-
pdf.set_preamble(preamble)
|
287 |
-
|
288 |
for slide in slides:
|
289 |
content_lines = slide['content'].replace('\n', '\n\n')
|
290 |
-
|
291 |
\\blandscape
|
292 |
|
293 |
-
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
|
294 |
# {slide['title']}
|
295 |
|
296 |
*Prof. AI Feynman*
|
297 |
*Princeton University, April 26th, 2025*
|
298 |
|
299 |
{content_lines}
|
300 |
-
</div>
|
301 |
|
302 |
\\elandscape
|
303 |
|
304 |
---
|
305 |
"""
|
306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
|
|
|
308 |
pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
|
309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
|
311 |
-
logger.info("Generated PDF slides (landscape): %s", pdf_file)
|
312 |
return pdf_file
|
313 |
|
314 |
# Async function to update audio preview
|
@@ -329,11 +355,19 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
|
|
329 |
"""
|
330 |
return
|
331 |
|
332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
yield f"""
|
334 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
335 |
-
<h2 style="color: #d9534f;">TTS model
|
336 |
-
<p style="margin-top: 20px;">
|
|
|
337 |
</div>
|
338 |
"""
|
339 |
return
|
@@ -618,7 +652,7 @@ Example for 1 content slide:
|
|
618 |
current_text = ". ".join(sentences) + "."
|
619 |
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
620 |
|
621 |
-
success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
|
622 |
if not success:
|
623 |
raise RuntimeError("TTS generation failed")
|
624 |
|
@@ -634,7 +668,7 @@ Example for 1 content slide:
|
|
634 |
except Exception as e:
|
635 |
logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
|
636 |
if attempt == max_retries:
|
637 |
-
logger.error("Max retries
|
638 |
audio_files.append(None)
|
639 |
break
|
640 |
|
|
|
5 |
import asyncio
|
6 |
import logging
|
7 |
import torch
|
8 |
+
import pypandoc
|
9 |
from serpapi import GoogleSearch
|
10 |
from pydantic import BaseModel
|
11 |
from autogen_agentchat.agents import AssistantAgent
|
|
|
16 |
from autogen_ext.models.anthropic import AnthropicChatCompletionClient
|
17 |
from autogen_ext.models.openai import OpenAIChatCompletionClient
|
18 |
from autogen_ext.models.ollama import OllamaChatCompletionClient
|
|
|
19 |
import traceback
|
20 |
import soundfile as sf
|
21 |
import tempfile
|
|
|
38 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
39 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
40 |
|
|
|
|
|
|
|
|
|
|
|
41 |
# Define Pydantic model for slide data
|
42 |
class Slide(BaseModel):
|
43 |
title: str
|
|
|
174 |
return None
|
175 |
|
176 |
# Helper function to generate audio using Coqui TTS API
|
177 |
+
def generate_xtts_audio(tts, text, speaker_wav, output_path):
|
178 |
if not tts:
|
179 |
logger.error("TTS model not initialized")
|
180 |
return False
|
|
|
271 |
|
272 |
# Function to generate Markdown and convert to PDF (landscape, centered)
|
273 |
def generate_slides_pdf(slides):
|
274 |
+
# Create Markdown content
|
275 |
+
markdown_content = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
for slide in slides:
|
277 |
content_lines = slide['content'].replace('\n', '\n\n')
|
278 |
+
slide_content = f"""
|
279 |
\\blandscape
|
280 |
|
|
|
281 |
# {slide['title']}
|
282 |
|
283 |
*Prof. AI Feynman*
|
284 |
*Princeton University, April 26th, 2025*
|
285 |
|
286 |
{content_lines}
|
|
|
287 |
|
288 |
\\elandscape
|
289 |
|
290 |
---
|
291 |
"""
|
292 |
+
markdown_content += slide_content
|
293 |
+
|
294 |
+
# Write Markdown to a temporary file
|
295 |
+
md_file = os.path.join(OUTPUT_DIR, "slides.md")
|
296 |
+
with open(md_file, "w", encoding="utf-8") as f:
|
297 |
+
f.write(markdown_content)
|
298 |
+
|
299 |
+
# Define LaTeX preamble
|
300 |
+
preamble = r"""
|
301 |
+
\documentclass{article}
|
302 |
+
\usepackage{pdflscape}
|
303 |
+
\newcommand{\blandscape}{\begin{landscape}}
|
304 |
+
\newcommand{\elandscape}{\end{landscape}}
|
305 |
+
\usepackage{geometry}
|
306 |
+
\geometry{a4paper, margin=1in}
|
307 |
+
\begin{document}
|
308 |
+
"""
|
309 |
+
# Write preamble to a temporary LaTeX file
|
310 |
+
preamble_file = os.path.join(OUTPUT_DIR, "preamble.tex")
|
311 |
+
with open(preamble_file, "w", encoding="utf-8") as f:
|
312 |
+
f.write(preamble)
|
313 |
|
314 |
+
# Convert Markdown to PDF using pypandoc
|
315 |
pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
|
316 |
+
try:
|
317 |
+
pypandoc.convert_file(
|
318 |
+
md_file,
|
319 |
+
to='pdf',
|
320 |
+
outputfile=pdf_file,
|
321 |
+
extra_args=[
|
322 |
+
'--include-in-header', preamble_file,
|
323 |
+
'--pdf-engine=pdflatex',
|
324 |
+
'-V', 'geometry:a4paper,margin=1in',
|
325 |
+
'--variable', 'documentclass:article'
|
326 |
+
]
|
327 |
+
)
|
328 |
+
logger.info("Generated PDF slides (landscape): %s", pdf_file)
|
329 |
+
except Exception as e:
|
330 |
+
logger.error("Failed to generate PDF: %s", str(e))
|
331 |
+
raise
|
332 |
+
|
333 |
+
# Clean up temporary files
|
334 |
+
for temp_file in [md_file, preamble_file]:
|
335 |
+
if os.path.exists(temp_file):
|
336 |
+
os.remove(temp_file)
|
337 |
|
|
|
338 |
return pdf_file
|
339 |
|
340 |
# Async function to update audio preview
|
|
|
355 |
"""
|
356 |
return
|
357 |
|
358 |
+
# Initialize TTS model
|
359 |
+
tts = None
|
360 |
+
try:
|
361 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
362 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
363 |
+
logger.info("TTS model initialized on %s", device)
|
364 |
+
except Exception as e:
|
365 |
+
logger.error("Failed to initialize TTS model: %s", str(e))
|
366 |
yield f"""
|
367 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
368 |
+
<h2 style="color: #d9534f;">TTS model initialization failed</h2>
|
369 |
+
<p style="margin-top: 20px;">Error: {str(e)}</p>
|
370 |
+
<p>Please ensure the Coqui TTS model is properly installed and try again.</p>
|
371 |
</div>
|
372 |
"""
|
373 |
return
|
|
|
652 |
current_text = ". ".join(sentences) + "."
|
653 |
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
654 |
|
655 |
+
success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
|
656 |
if not success:
|
657 |
raise RuntimeError("TTS generation failed")
|
658 |
|
|
|
668 |
except Exception as e:
|
669 |
logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
|
670 |
if attempt == max_retries:
|
671 |
+
logger.error("Max retries raggiunto per slide %d, salto", i + 1)
|
672 |
audio_files.append(None)
|
673 |
break
|
674 |
|