Jaward commited on
Commit
585136c
·
verified ·
1 Parent(s): 1c2cb75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -27
app.py CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
5
  import asyncio
6
  import logging
7
  import torch
 
8
  from serpapi import GoogleSearch
9
  from pydantic import BaseModel
10
  from autogen_agentchat.agents import AssistantAgent
@@ -15,7 +16,6 @@ from autogen_agentchat.messages import TextMessage, HandoffMessage, StructuredMe
15
  from autogen_ext.models.anthropic import AnthropicChatCompletionClient
16
  from autogen_ext.models.openai import OpenAIChatCompletionClient
17
  from autogen_ext.models.ollama import OllamaChatCompletionClient
18
- from markdown_pdf import MarkdownPdf, Section
19
  import traceback
20
  import soundfile as sf
21
  import tempfile
@@ -38,11 +38,6 @@ OUTPUT_DIR = "outputs"
38
  os.makedirs(OUTPUT_DIR, exist_ok=True)
39
  os.environ["COQUI_TOS_AGREED"] = "1"
40
 
41
- # Initialize TTS model
42
- device = "cuda" if torch.cuda.is_available() else "cpu"
43
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
44
- logger.info("TTS model initialized on %s", device)
45
-
46
  # Define Pydantic model for slide data
47
  class Slide(BaseModel):
48
  title: str
@@ -179,7 +174,7 @@ async def validate_and_convert_speaker_audio(speaker_audio):
179
  return None
180
 
181
  # Helper function to generate audio using Coqui TTS API
182
- def generate_xtts_audio(text, speaker_wav, output_path):
183
  if not tts:
184
  logger.error("TTS model not initialized")
185
  return False
@@ -276,39 +271,70 @@ def extract_json_from_message(message):
276
 
277
  # Function to generate Markdown and convert to PDF (landscape, centered)
278
  def generate_slides_pdf(slides):
279
- pdf = MarkdownPdf()
280
- # Add LaTeX preamble for landscape orientation
281
- preamble = r"""
282
- \usepackage{pdflscape}
283
- \newcommand{\blandscape}{\begin{landscape}}
284
- \newcommand{\elandscape}{\end{landscape}}
285
- """
286
- pdf.set_preamble(preamble)
287
-
288
  for slide in slides:
289
  content_lines = slide['content'].replace('\n', '\n\n')
290
- markdown_content = f"""
291
  \\blandscape
292
 
293
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
294
  # {slide['title']}
295
 
296
  *Prof. AI Feynman*
297
  *Princeton University, April 26th, 2025*
298
 
299
  {content_lines}
300
- </div>
301
 
302
  \\elandscape
303
 
304
  ---
305
  """
306
- pdf.add_section(Section(markdown_content, toc=False))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
 
 
308
  pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
309
- pdf.save(pdf_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
- logger.info("Generated PDF slides (landscape): %s", pdf_file)
312
  return pdf_file
313
 
314
  # Async function to update audio preview
@@ -329,11 +355,19 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
329
  """
330
  return
331
 
332
- if not tts:
 
 
 
 
 
 
 
333
  yield f"""
334
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
335
- <h2 style="color: #d9534f;">TTS model not initialized</h2>
336
- <p style="margin-top: 20px;">Please ensure the Coqui TTS model is properly installed and try again.</p>
 
337
  </div>
338
  """
339
  return
@@ -618,7 +652,7 @@ Example for 1 content slide:
618
  current_text = ". ".join(sentences) + "."
619
  logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
620
 
621
- success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
622
  if not success:
623
  raise RuntimeError("TTS generation failed")
624
 
@@ -634,7 +668,7 @@ Example for 1 content slide:
634
  except Exception as e:
635
  logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
636
  if attempt == max_retries:
637
- logger.error("Max retries reached for slide %d, skipping", i + 1)
638
  audio_files.append(None)
639
  break
640
 
 
5
  import asyncio
6
  import logging
7
  import torch
8
+ import pypandoc
9
  from serpapi import GoogleSearch
10
  from pydantic import BaseModel
11
  from autogen_agentchat.agents import AssistantAgent
 
16
  from autogen_ext.models.anthropic import AnthropicChatCompletionClient
17
  from autogen_ext.models.openai import OpenAIChatCompletionClient
18
  from autogen_ext.models.ollama import OllamaChatCompletionClient
 
19
  import traceback
20
  import soundfile as sf
21
  import tempfile
 
38
  os.makedirs(OUTPUT_DIR, exist_ok=True)
39
  os.environ["COQUI_TOS_AGREED"] = "1"
40
 
 
 
 
 
 
41
  # Define Pydantic model for slide data
42
  class Slide(BaseModel):
43
  title: str
 
174
  return None
175
 
176
  # Helper function to generate audio using Coqui TTS API
177
+ def generate_xtts_audio(tts, text, speaker_wav, output_path):
178
  if not tts:
179
  logger.error("TTS model not initialized")
180
  return False
 
271
 
272
  # Function to generate Markdown and convert to PDF (landscape, centered)
273
  def generate_slides_pdf(slides):
274
+ # Create Markdown content
275
+ markdown_content = ""
 
 
 
 
 
 
 
276
  for slide in slides:
277
  content_lines = slide['content'].replace('\n', '\n\n')
278
+ slide_content = f"""
279
  \\blandscape
280
 
 
281
  # {slide['title']}
282
 
283
  *Prof. AI Feynman*
284
  *Princeton University, April 26th, 2025*
285
 
286
  {content_lines}
 
287
 
288
  \\elandscape
289
 
290
  ---
291
  """
292
+ markdown_content += slide_content
293
+
294
+ # Write Markdown to a temporary file
295
+ md_file = os.path.join(OUTPUT_DIR, "slides.md")
296
+ with open(md_file, "w", encoding="utf-8") as f:
297
+ f.write(markdown_content)
298
+
299
+ # Define LaTeX preamble
300
+ preamble = r"""
301
+ \documentclass{article}
302
+ \usepackage{pdflscape}
303
+ \newcommand{\blandscape}{\begin{landscape}}
304
+ \newcommand{\elandscape}{\end{landscape}}
305
+ \usepackage{geometry}
306
+ \geometry{a4paper, margin=1in}
307
+ \begin{document}
308
+ """
309
+ # Write preamble to a temporary LaTeX file
310
+ preamble_file = os.path.join(OUTPUT_DIR, "preamble.tex")
311
+ with open(preamble_file, "w", encoding="utf-8") as f:
312
+ f.write(preamble)
313
 
314
+ # Convert Markdown to PDF using pypandoc
315
  pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
316
+ try:
317
+ pypandoc.convert_file(
318
+ md_file,
319
+ to='pdf',
320
+ outputfile=pdf_file,
321
+ extra_args=[
322
+ '--include-in-header', preamble_file,
323
+ '--pdf-engine=pdflatex',
324
+ '-V', 'geometry:a4paper,margin=1in',
325
+ '--variable', 'documentclass:article'
326
+ ]
327
+ )
328
+ logger.info("Generated PDF slides (landscape): %s", pdf_file)
329
+ except Exception as e:
330
+ logger.error("Failed to generate PDF: %s", str(e))
331
+ raise
332
+
333
+ # Clean up temporary files
334
+ for temp_file in [md_file, preamble_file]:
335
+ if os.path.exists(temp_file):
336
+ os.remove(temp_file)
337
 
 
338
  return pdf_file
339
 
340
  # Async function to update audio preview
 
355
  """
356
  return
357
 
358
+ # Initialize TTS model
359
+ tts = None
360
+ try:
361
+ device = "cuda" if torch.cuda.is_available() else "cpu"
362
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
363
+ logger.info("TTS model initialized on %s", device)
364
+ except Exception as e:
365
+ logger.error("Failed to initialize TTS model: %s", str(e))
366
  yield f"""
367
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
368
+ <h2 style="color: #d9534f;">TTS model initialization failed</h2>
369
+ <p style="margin-top: 20px;">Error: {str(e)}</p>
370
+ <p>Please ensure the Coqui TTS model is properly installed and try again.</p>
371
  </div>
372
  """
373
  return
 
652
  current_text = ". ".join(sentences) + "."
653
  logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
654
 
655
+ success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
656
  if not success:
657
  raise RuntimeError("TTS generation failed")
658
 
 
668
  except Exception as e:
669
  logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
670
  if attempt == max_retries:
671
+ logger.error("Max retries raggiunto per slide %d, salto", i + 1)
672
  audio_files.append(None)
673
  break
674