Jaward commited on
Commit
cd7a21b
·
verified ·
1 Parent(s): 585136c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -61
app.py CHANGED
@@ -5,7 +5,6 @@ import gradio as gr
5
  import asyncio
6
  import logging
7
  import torch
8
- import pypandoc
9
  from serpapi import GoogleSearch
10
  from pydantic import BaseModel
11
  from autogen_agentchat.agents import AssistantAgent
@@ -16,6 +15,7 @@ from autogen_agentchat.messages import TextMessage, HandoffMessage, StructuredMe
16
  from autogen_ext.models.anthropic import AnthropicChatCompletionClient
17
  from autogen_ext.models.openai import OpenAIChatCompletionClient
18
  from autogen_ext.models.ollama import OllamaChatCompletionClient
 
19
  import traceback
20
  import soundfile as sf
21
  import tempfile
@@ -38,6 +38,12 @@ OUTPUT_DIR = "outputs"
38
  os.makedirs(OUTPUT_DIR, exist_ok=True)
39
  os.environ["COQUI_TOS_AGREED"] = "1"
40
 
 
 
 
 
 
 
41
  # Define Pydantic model for slide data
42
  class Slide(BaseModel):
43
  title: str
@@ -174,7 +180,7 @@ async def validate_and_convert_speaker_audio(speaker_audio):
174
  return None
175
 
176
  # Helper function to generate audio using Coqui TTS API
177
- def generate_xtts_audio(tts, text, speaker_wav, output_path):
178
  if not tts:
179
  logger.error("TTS model not initialized")
180
  return False
@@ -271,70 +277,39 @@ def extract_json_from_message(message):
271
 
272
  # Function to generate Markdown and convert to PDF (landscape, centered)
273
  def generate_slides_pdf(slides):
274
- # Create Markdown content
275
- markdown_content = ""
 
 
 
 
 
 
 
276
  for slide in slides:
277
  content_lines = slide['content'].replace('\n', '\n\n')
278
- slide_content = f"""
279
  \\blandscape
280
 
 
281
  # {slide['title']}
282
 
283
  *Prof. AI Feynman*
284
  *Princeton University, April 26th, 2025*
285
 
286
  {content_lines}
 
287
 
288
  \\elandscape
289
 
290
  ---
291
  """
292
- markdown_content += slide_content
293
-
294
- # Write Markdown to a temporary file
295
- md_file = os.path.join(OUTPUT_DIR, "slides.md")
296
- with open(md_file, "w", encoding="utf-8") as f:
297
- f.write(markdown_content)
298
 
299
- # Define LaTeX preamble
300
- preamble = r"""
301
- \documentclass{article}
302
- \usepackage{pdflscape}
303
- \newcommand{\blandscape}{\begin{landscape}}
304
- \newcommand{\elandscape}{\end{landscape}}
305
- \usepackage{geometry}
306
- \geometry{a4paper, margin=1in}
307
- \begin{document}
308
- """
309
- # Write preamble to a temporary LaTeX file
310
- preamble_file = os.path.join(OUTPUT_DIR, "preamble.tex")
311
- with open(preamble_file, "w", encoding="utf-8") as f:
312
- f.write(preamble)
313
-
314
- # Convert Markdown to PDF using pypandoc
315
  pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
316
- try:
317
- pypandoc.convert_file(
318
- md_file,
319
- to='pdf',
320
- outputfile=pdf_file,
321
- extra_args=[
322
- '--include-in-header', preamble_file,
323
- '--pdf-engine=pdflatex',
324
- '-V', 'geometry:a4paper,margin=1in',
325
- '--variable', 'documentclass:article'
326
- ]
327
- )
328
- logger.info("Generated PDF slides (landscape): %s", pdf_file)
329
- except Exception as e:
330
- logger.error("Failed to generate PDF: %s", str(e))
331
- raise
332
-
333
- # Clean up temporary files
334
- for temp_file in [md_file, preamble_file]:
335
- if os.path.exists(temp_file):
336
- os.remove(temp_file)
337
 
 
338
  return pdf_file
339
 
340
  # Async function to update audio preview
@@ -355,19 +330,11 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
355
  """
356
  return
357
 
358
- # Initialize TTS model
359
- tts = None
360
- try:
361
- device = "cuda" if torch.cuda.is_available() else "cpu"
362
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
363
- logger.info("TTS model initialized on %s", device)
364
- except Exception as e:
365
- logger.error("Failed to initialize TTS model: %s", str(e))
366
  yield f"""
367
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
368
- <h2 style="color: #d9534f;">TTS model initialization failed</h2>
369
- <p style="margin-top: 20px;">Error: {str(e)}</p>
370
- <p>Please ensure the Coqui TTS model is properly installed and try again.</p>
371
  </div>
372
  """
373
  return
@@ -652,7 +619,7 @@ Example for 1 content slide:
652
  current_text = ". ".join(sentences) + "."
653
  logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
654
 
655
- success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
656
  if not success:
657
  raise RuntimeError("TTS generation failed")
658
 
@@ -668,7 +635,7 @@ Example for 1 content slide:
668
  except Exception as e:
669
  logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
670
  if attempt == max_retries:
671
- logger.error("Max retries raggiunto per slide %d, salto", i + 1)
672
  audio_files.append(None)
673
  break
674
 
 
5
  import asyncio
6
  import logging
7
  import torch
 
8
  from serpapi import GoogleSearch
9
  from pydantic import BaseModel
10
  from autogen_agentchat.agents import AssistantAgent
 
15
  from autogen_ext.models.anthropic import AnthropicChatCompletionClient
16
  from autogen_ext.models.openai import OpenAIChatCompletionClient
17
  from autogen_ext.models.ollama import OllamaChatCompletionClient
18
+ from markdown_pdf import MarkdownPdf, Section
19
  import traceback
20
  import soundfile as sf
21
  import tempfile
 
38
  os.makedirs(OUTPUT_DIR, exist_ok=True)
39
  os.environ["COQUI_TOS_AGREED"] = "1"
40
 
41
+ # Initialize TTS model
42
+ device = "cuda" if torch.cuda.is_available() else "cpu"
43
+
44
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
45
+ logger.info("TTS model initialized on %s", device)
46
+
47
  # Define Pydantic model for slide data
48
  class Slide(BaseModel):
49
  title: str
 
180
  return None
181
 
182
  # Helper function to generate audio using Coqui TTS API
183
+ def generate_xtts_audio(text, speaker_wav, output_path):
184
  if not tts:
185
  logger.error("TTS model not initialized")
186
  return False
 
277
 
278
  # Function to generate Markdown and convert to PDF (landscape, centered)
279
  def generate_slides_pdf(slides):
280
+ pdf = MarkdownPdf()
281
+ # Add LaTeX preamble for landscape orientation
282
+ preamble = r"""
283
+ \usepackage{pdflscape}
284
+ \newcommand{\blandscape}{\begin{landscape}}
285
+ \newcommand{\elandscape}{\end{landscape}}
286
+ """
287
+ pdf.set_preamble(preamble)
288
+
289
  for slide in slides:
290
  content_lines = slide['content'].replace('\n', '\n\n')
291
+ markdown_content = f"""
292
  \\blandscape
293
 
294
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
295
  # {slide['title']}
296
 
297
  *Prof. AI Feynman*
298
  *Princeton University, April 26th, 2025*
299
 
300
  {content_lines}
301
+ </div>
302
 
303
  \\elandscape
304
 
305
  ---
306
  """
307
+ pdf.add_section(Section(markdown_content, toc=False))
 
 
 
 
 
308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
310
+ pdf.save(pdf_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
+ logger.info("Generated PDF slides (landscape): %s", pdf_file)
313
  return pdf_file
314
 
315
  # Async function to update audio preview
 
330
  """
331
  return
332
 
333
+ if not tts:
 
 
 
 
 
 
 
334
  yield f"""
335
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
336
+ <h2 style="color: #d9534f;">TTS model not initialized</h2>
337
+ <p style="margin-top: 20px;">Please ensure the Coqui TTS model is properly installed and try again.</p>
 
338
  </div>
339
  """
340
  return
 
619
  current_text = ". ".join(sentences) + "."
620
  logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
621
 
622
+ success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
623
  if not success:
624
  raise RuntimeError("TTS generation failed")
625
 
 
635
  except Exception as e:
636
  logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
637
  if attempt == max_retries:
638
+ logger.error("Max retries reached for slide %d, skipping", i + 1)
639
  audio_files.append(None)
640
  break
641