Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,15 +33,22 @@ logging.basicConfig(
|
|
| 33 |
logger = logging.getLogger(__name__)
|
| 34 |
|
| 35 |
# Set up environment
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
| 37 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 38 |
logger.info(f"Using output directory: {OUTPUT_DIR}")
|
| 39 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# Define Pydantic model for slide data
|
| 47 |
class Slide(BaseModel):
|
|
@@ -394,6 +401,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 394 |
label = "Research: in progress..."
|
| 395 |
yield (
|
| 396 |
html_with_progress(label, progress),
|
|
|
|
|
|
|
| 397 |
[]
|
| 398 |
)
|
| 399 |
await asyncio.sleep(0.1)
|
|
@@ -440,6 +449,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 440 |
label = "Slides: generating..."
|
| 441 |
yield (
|
| 442 |
html_with_progress(label, progress),
|
|
|
|
|
|
|
| 443 |
[]
|
| 444 |
)
|
| 445 |
await asyncio.sleep(0.1)
|
|
@@ -465,6 +476,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 465 |
label = "Scripts: generating..."
|
| 466 |
yield (
|
| 467 |
html_with_progress(label, progress),
|
|
|
|
|
|
|
| 468 |
[]
|
| 469 |
)
|
| 470 |
await asyncio.sleep(0.1)
|
|
@@ -479,6 +492,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 479 |
label = "Review: in progress..."
|
| 480 |
yield (
|
| 481 |
html_with_progress(label, progress),
|
|
|
|
|
|
|
| 482 |
[]
|
| 483 |
)
|
| 484 |
await asyncio.sleep(0.1)
|
|
@@ -489,6 +504,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 489 |
label = "Slides: generating..."
|
| 490 |
yield (
|
| 491 |
html_with_progress(label, progress),
|
|
|
|
|
|
|
| 492 |
[]
|
| 493 |
)
|
| 494 |
await asyncio.sleep(0.1)
|
|
@@ -522,6 +539,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 522 |
label = "Scripts: generating..."
|
| 523 |
yield (
|
| 524 |
html_with_progress(label, progress),
|
|
|
|
|
|
|
| 525 |
[]
|
| 526 |
)
|
| 527 |
await asyncio.sleep(0.1)
|
|
@@ -556,6 +575,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 556 |
label = "Scripts generated and saved. Reviewing..."
|
| 557 |
yield (
|
| 558 |
html_with_progress(label, progress),
|
|
|
|
|
|
|
| 559 |
[]
|
| 560 |
)
|
| 561 |
await asyncio.sleep(0.1)
|
|
@@ -578,6 +599,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 578 |
label = "Lecture materials ready. Generating audio..."
|
| 579 |
yield (
|
| 580 |
html_with_progress(label, progress),
|
|
|
|
|
|
|
| 581 |
[]
|
| 582 |
)
|
| 583 |
await asyncio.sleep(0.1)
|
|
@@ -593,12 +616,9 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 593 |
source = getattr(msg, 'source', getattr(msg, 'sender', None))
|
| 594 |
logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
|
| 595 |
yield (
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
<p style="margin-top: 20px;">Please try again with a different model or adjust your inputs.</p>
|
| 600 |
-
</div>
|
| 601 |
-
""",
|
| 602 |
[]
|
| 603 |
)
|
| 604 |
return
|
|
@@ -612,6 +632,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 612 |
<p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
|
| 613 |
</div>
|
| 614 |
""",
|
|
|
|
|
|
|
| 615 |
[]
|
| 616 |
)
|
| 617 |
return
|
|
@@ -625,6 +647,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 625 |
<p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
|
| 626 |
</div>
|
| 627 |
""",
|
|
|
|
|
|
|
| 628 |
[]
|
| 629 |
)
|
| 630 |
return
|
|
@@ -638,6 +662,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 638 |
<p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
|
| 639 |
</div>
|
| 640 |
""",
|
|
|
|
|
|
|
| 641 |
[]
|
| 642 |
)
|
| 643 |
return
|
|
@@ -652,127 +678,41 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 652 |
<p style="margin-top: 20px;">Please try again.</p>
|
| 653 |
</div>
|
| 654 |
""",
|
|
|
|
|
|
|
| 655 |
[]
|
| 656 |
)
|
| 657 |
return
|
| 658 |
|
| 659 |
-
audio_files = []
|
| 660 |
-
audio_urls = []
|
| 661 |
-
validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
|
| 662 |
-
if not validated_speaker_wav:
|
| 663 |
-
logger.error("Invalid speaker audio after conversion, skipping TTS")
|
| 664 |
-
yield (
|
| 665 |
-
f"""
|
| 666 |
-
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
| 667 |
-
<h2 style="color: #d9534f;">Invalid speaker audio</h2>
|
| 668 |
-
<p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
|
| 669 |
-
</div>
|
| 670 |
-
""",
|
| 671 |
-
[]
|
| 672 |
-
)
|
| 673 |
-
return
|
| 674 |
-
|
| 675 |
-
for i, script in enumerate(scripts):
|
| 676 |
-
cleaned_script = clean_script_text(script)
|
| 677 |
-
audio_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}.mp3")
|
| 678 |
-
script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_script.txt")
|
| 679 |
-
|
| 680 |
-
try:
|
| 681 |
-
with open(script_file, "w", encoding="utf-8") as f:
|
| 682 |
-
f.write(cleaned_script or "")
|
| 683 |
-
logger.info("Saved script to %s: %s", script_file, cleaned_script)
|
| 684 |
-
except Exception as e:
|
| 685 |
-
logger.error("Error saving script to %s: %s", script_file, str(e))
|
| 686 |
-
|
| 687 |
-
if not cleaned_script:
|
| 688 |
-
logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
|
| 689 |
-
audio_files.append(None)
|
| 690 |
-
audio_urls.append(None)
|
| 691 |
-
progress = 90 + ((i + 1) / len(scripts)) * 10
|
| 692 |
-
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
| 693 |
-
yield (
|
| 694 |
-
html_with_progress(label, progress),
|
| 695 |
-
[]
|
| 696 |
-
)
|
| 697 |
-
await asyncio.sleep(0.1)
|
| 698 |
-
continue
|
| 699 |
-
|
| 700 |
-
max_audio_retries = 2
|
| 701 |
-
for attempt in range(max_audio_retries + 1):
|
| 702 |
-
try:
|
| 703 |
-
current_text = cleaned_script
|
| 704 |
-
if attempt > 0:
|
| 705 |
-
sentences = re.split(r"[.!?]+", cleaned_script)
|
| 706 |
-
sentences = [s.strip() for s in sentences if s.strip()][:2]
|
| 707 |
-
current_text = ". ".join(sentences) + "."
|
| 708 |
-
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
| 709 |
-
|
| 710 |
-
success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
|
| 711 |
-
if not success:
|
| 712 |
-
raise RuntimeError("TTS generation failed")
|
| 713 |
-
|
| 714 |
-
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
| 715 |
-
audio_files.append(audio_file)
|
| 716 |
-
# Use Gradio's file serving URL
|
| 717 |
-
audio_urls.append(f"/gradio_api/file={audio_file}")
|
| 718 |
-
progress = 90 + ((i + 1) / len(scripts)) * 10
|
| 719 |
-
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
| 720 |
-
yield (
|
| 721 |
-
html_with_progress(label, progress),
|
| 722 |
-
[]
|
| 723 |
-
)
|
| 724 |
-
await asyncio.sleep(0.1)
|
| 725 |
-
break
|
| 726 |
-
except Exception as e:
|
| 727 |
-
logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
|
| 728 |
-
if attempt == max_audio_retries:
|
| 729 |
-
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
| 730 |
-
audio_files.append(None)
|
| 731 |
-
audio_urls.append(None)
|
| 732 |
-
progress = 90 + ((i + 1) / len(scripts)) * 10
|
| 733 |
-
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
| 734 |
-
yield (
|
| 735 |
-
html_with_progress(label, progress),
|
| 736 |
-
[]
|
| 737 |
-
)
|
| 738 |
-
await asyncio.sleep(0.1)
|
| 739 |
-
break
|
| 740 |
-
|
| 741 |
# Collect .txt files for download
|
| 742 |
txt_files = [f for f in os.listdir(OUTPUT_DIR) if f.endswith('.txt')]
|
| 743 |
txt_files.sort() # Sort for consistent display
|
| 744 |
txt_file_paths = [os.path.join(OUTPUT_DIR, f) for f in txt_files]
|
| 745 |
|
| 746 |
-
#
|
|
|
|
| 747 |
audio_timeline = ""
|
| 748 |
-
for i
|
| 749 |
-
|
| 750 |
-
audio_timeline += f'<audio id="audio-{i+1}" controls src="{audio_url}" style="display: inline-block; margin: 0 10px; width: 200px;"></audio>'
|
| 751 |
-
else:
|
| 752 |
-
audio_timeline += f'<span id="audio-{i+1}" style="display: inline-block; margin: 0 10px;">slide_{i+1}.mp3 (not generated)</span>'
|
| 753 |
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
<script src="https://cdn.jsdelivr.net/npm/[email protected]/marked.min.js"></script>
|
| 758 |
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
|
| 759 |
-
<div id="slide-content" style="flex: 1; overflow: auto; padding: 20px; text-align: center; background-color: #fff; color: #333;">
|
| 760 |
-
<!-- Slides will be rendered here -->
|
| 761 |
-
</div>
|
| 762 |
<div style="padding: 20px; text-align: center;">
|
| 763 |
-
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
| 764 |
{audio_timeline}
|
| 765 |
</div>
|
| 766 |
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
| 767 |
<button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
|
| 768 |
<button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
|
| 769 |
<button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
|
| 770 |
-
<button style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;"
|
| 771 |
</div>
|
| 772 |
</div>
|
| 773 |
</div>
|
| 774 |
<script>
|
| 775 |
-
const lectureData = {
|
| 776 |
let currentSlide = 0;
|
| 777 |
const totalSlides = lectureData.slides.length;
|
| 778 |
let audioElements = [];
|
|
@@ -783,26 +723,15 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 783 |
audioElements.push(audio);
|
| 784 |
}}
|
| 785 |
|
| 786 |
-
function
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
const markdownText = lectureData.slides[currentSlide];
|
| 790 |
-
const htmlContent = marked.parse(markdownText);
|
| 791 |
-
slideContent.innerHTML = htmlContent;
|
| 792 |
-
console.log("Rendering slide:", markdownText);
|
| 793 |
-
console.log("Rendered HTML:", htmlContent);
|
| 794 |
-
}} else {{
|
| 795 |
-
slideContent.innerHTML = '<h2>No slide content available</h2>';
|
| 796 |
-
console.log("No slide content for index:", currentSlide);
|
| 797 |
-
}}
|
| 798 |
-
}}
|
| 799 |
-
|
| 800 |
-
function updateSlide() {{
|
| 801 |
-
renderSlide();
|
| 802 |
-
audioElements.forEach(audio => {{
|
| 803 |
if (audio && audio.pause) {{
|
| 804 |
audio.pause();
|
| 805 |
audio.currentTime = 0;
|
|
|
|
|
|
|
|
|
|
| 806 |
}}
|
| 807 |
}});
|
| 808 |
}}
|
|
@@ -810,23 +739,25 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 810 |
function prevSlide() {{
|
| 811 |
if (currentSlide > 0) {{
|
| 812 |
currentSlide--;
|
| 813 |
-
|
| 814 |
}}
|
| 815 |
}}
|
| 816 |
|
| 817 |
function nextSlide() {{
|
| 818 |
if (currentSlide < totalSlides - 1) {{
|
| 819 |
currentSlide++;
|
| 820 |
-
|
| 821 |
}}
|
| 822 |
}}
|
| 823 |
|
| 824 |
function playAll() {{
|
| 825 |
-
let index =
|
| 826 |
function playNext() {{
|
| 827 |
if (index >= totalSlides) return;
|
|
|
|
|
|
|
| 828 |
const audio = audioElements[index];
|
| 829 |
-
if (audio && audio.
|
| 830 |
audio.play().then(() => {{
|
| 831 |
audio.addEventListener('ended', () => {{
|
| 832 |
index++;
|
|
@@ -845,20 +776,226 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 845 |
playNext();
|
| 846 |
}}
|
| 847 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 848 |
// Attach event listeners
|
| 849 |
document.getElementById('prev-btn').addEventListener('click', prevSlide);
|
| 850 |
document.getElementById('play-btn').addEventListener('click', playAll);
|
| 851 |
document.getElementById('next-btn').addEventListener('click', nextSlide);
|
|
|
|
| 852 |
|
| 853 |
// Initialize first slide
|
| 854 |
-
|
| 855 |
</script>
|
| 856 |
"""
|
| 857 |
-
logger.info("Lecture generation completed successfully")
|
| 858 |
yield (
|
| 859 |
-
|
| 860 |
-
txt_file_paths
|
|
|
|
|
|
|
| 861 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 862 |
|
| 863 |
except Exception as e:
|
| 864 |
logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
|
|
@@ -870,6 +1007,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
| 870 |
<p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
|
| 871 |
</div>
|
| 872 |
""",
|
|
|
|
|
|
|
| 873 |
[]
|
| 874 |
)
|
| 875 |
return
|
|
@@ -905,7 +1044,8 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
| 905 |
<p style="margin-top: 10px; font-size: 16px;">Please Generate lecture content via the form on the left first before lecture begins</p>
|
| 906 |
</div>
|
| 907 |
"""
|
| 908 |
-
slide_display = gr.
|
|
|
|
| 909 |
file_output = gr.File(label="Download Generated Files")
|
| 910 |
|
| 911 |
speaker_audio.change(
|
|
@@ -914,11 +1054,26 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
| 914 |
outputs=speaker_audio
|
| 915 |
)
|
| 916 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 917 |
generate_btn.click(
|
| 918 |
fn=on_generate,
|
| 919 |
inputs=[api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides],
|
| 920 |
-
outputs=[slide_display,
|
| 921 |
)
|
| 922 |
|
| 923 |
if __name__ == "__main__":
|
| 924 |
-
demo.launch(allowed_paths=[OUTPUT_DIR]
|
|
|
|
| 33 |
logger = logging.getLogger(__name__)
|
| 34 |
|
| 35 |
# Set up environment
|
| 36 |
+
if os.path.exists("/tmp"):
|
| 37 |
+
OUTPUT_DIR = "/tmp/outputs" # Use /tmp for Huggingface Spaces
|
| 38 |
+
else:
|
| 39 |
+
OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") # Fallback for local dev
|
| 40 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 41 |
logger.info(f"Using output directory: {OUTPUT_DIR}")
|
| 42 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
| 43 |
|
| 44 |
+
# Initialize TTS model at the top
|
| 45 |
+
try:
|
| 46 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 47 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
| 48 |
+
logger.info("TTS model initialized on %s", device)
|
| 49 |
+
except Exception as e:
|
| 50 |
+
logger.error("Failed to initialize TTS model: %s", str(e))
|
| 51 |
+
tts = None
|
| 52 |
|
| 53 |
# Define Pydantic model for slide data
|
| 54 |
class Slide(BaseModel):
|
|
|
|
| 401 |
label = "Research: in progress..."
|
| 402 |
yield (
|
| 403 |
html_with_progress(label, progress),
|
| 404 |
+
[],
|
| 405 |
+
"",
|
| 406 |
[]
|
| 407 |
)
|
| 408 |
await asyncio.sleep(0.1)
|
|
|
|
| 449 |
label = "Slides: generating..."
|
| 450 |
yield (
|
| 451 |
html_with_progress(label, progress),
|
| 452 |
+
[],
|
| 453 |
+
"",
|
| 454 |
[]
|
| 455 |
)
|
| 456 |
await asyncio.sleep(0.1)
|
|
|
|
| 476 |
label = "Scripts: generating..."
|
| 477 |
yield (
|
| 478 |
html_with_progress(label, progress),
|
| 479 |
+
[],
|
| 480 |
+
"",
|
| 481 |
[]
|
| 482 |
)
|
| 483 |
await asyncio.sleep(0.1)
|
|
|
|
| 492 |
label = "Review: in progress..."
|
| 493 |
yield (
|
| 494 |
html_with_progress(label, progress),
|
| 495 |
+
[],
|
| 496 |
+
"",
|
| 497 |
[]
|
| 498 |
)
|
| 499 |
await asyncio.sleep(0.1)
|
|
|
|
| 504 |
label = "Slides: generating..."
|
| 505 |
yield (
|
| 506 |
html_with_progress(label, progress),
|
| 507 |
+
[],
|
| 508 |
+
"",
|
| 509 |
[]
|
| 510 |
)
|
| 511 |
await asyncio.sleep(0.1)
|
|
|
|
| 539 |
label = "Scripts: generating..."
|
| 540 |
yield (
|
| 541 |
html_with_progress(label, progress),
|
| 542 |
+
[],
|
| 543 |
+
"",
|
| 544 |
[]
|
| 545 |
)
|
| 546 |
await asyncio.sleep(0.1)
|
|
|
|
| 575 |
label = "Scripts generated and saved. Reviewing..."
|
| 576 |
yield (
|
| 577 |
html_with_progress(label, progress),
|
| 578 |
+
[],
|
| 579 |
+
"",
|
| 580 |
[]
|
| 581 |
)
|
| 582 |
await asyncio.sleep(0.1)
|
|
|
|
| 599 |
label = "Lecture materials ready. Generating audio..."
|
| 600 |
yield (
|
| 601 |
html_with_progress(label, progress),
|
| 602 |
+
[],
|
| 603 |
+
"",
|
| 604 |
[]
|
| 605 |
)
|
| 606 |
await asyncio.sleep(0.1)
|
|
|
|
| 616 |
source = getattr(msg, 'source', getattr(msg, 'sender', None))
|
| 617 |
logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
|
| 618 |
yield (
|
| 619 |
+
error_html,
|
| 620 |
+
[],
|
| 621 |
+
"",
|
|
|
|
|
|
|
|
|
|
| 622 |
[]
|
| 623 |
)
|
| 624 |
return
|
|
|
|
| 632 |
<p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
|
| 633 |
</div>
|
| 634 |
""",
|
| 635 |
+
[],
|
| 636 |
+
"",
|
| 637 |
[]
|
| 638 |
)
|
| 639 |
return
|
|
|
|
| 647 |
<p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
|
| 648 |
</div>
|
| 649 |
""",
|
| 650 |
+
[],
|
| 651 |
+
"",
|
| 652 |
[]
|
| 653 |
)
|
| 654 |
return
|
|
|
|
| 662 |
<p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
|
| 663 |
</div>
|
| 664 |
""",
|
| 665 |
+
[],
|
| 666 |
+
"",
|
| 667 |
[]
|
| 668 |
)
|
| 669 |
return
|
|
|
|
| 678 |
<p style="margin-top: 20px;">Please try again.</p>
|
| 679 |
</div>
|
| 680 |
""",
|
| 681 |
+
[],
|
| 682 |
+
"",
|
| 683 |
[]
|
| 684 |
)
|
| 685 |
return
|
| 686 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
# Collect .txt files for download
|
| 688 |
txt_files = [f for f in os.listdir(OUTPUT_DIR) if f.endswith('.txt')]
|
| 689 |
txt_files.sort() # Sort for consistent display
|
| 690 |
txt_file_paths = [os.path.join(OUTPUT_DIR, f) for f in txt_files]
|
| 691 |
|
| 692 |
+
# Initialize audio timeline placeholders
|
| 693 |
+
audio_urls = [None] * len(scripts)
|
| 694 |
audio_timeline = ""
|
| 695 |
+
for i in range(len(scripts)):
|
| 696 |
+
audio_timeline += f'<audio id="audio-{i+1}" controls style="display: inline-block; margin: 0 10px; width: 200px;"><source src="" type="audio/mpeg"></audio>'
|
|
|
|
|
|
|
|
|
|
| 697 |
|
| 698 |
+
# Display lecture materials immediately
|
| 699 |
+
slides_json = json.dumps({"slides": markdown_slides, "audioFiles": audio_urls})
|
| 700 |
+
html_controls = f"""
|
|
|
|
| 701 |
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
|
|
|
|
|
|
|
|
|
|
| 702 |
<div style="padding: 20px; text-align: center;">
|
| 703 |
+
<div id="audio-timeline" style="display: flex; justify-content: center; margin-bottom: 10px;">
|
| 704 |
{audio_timeline}
|
| 705 |
</div>
|
| 706 |
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
| 707 |
<button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
|
| 708 |
<button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
|
| 709 |
<button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
|
| 710 |
+
<button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">🖥️</button>
|
| 711 |
</div>
|
| 712 |
</div>
|
| 713 |
</div>
|
| 714 |
<script>
|
| 715 |
+
const lectureData = {slides_json};
|
| 716 |
let currentSlide = 0;
|
| 717 |
const totalSlides = lectureData.slides.length;
|
| 718 |
let audioElements = [];
|
|
|
|
| 723 |
audioElements.push(audio);
|
| 724 |
}}
|
| 725 |
|
| 726 |
+
function updateSlideDisplay() {{
|
| 727 |
+
window.updateSlideContent(lectureData.slides[currentSlide]);
|
| 728 |
+
audioElements.forEach((audio, index) => {{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 729 |
if (audio && audio.pause) {{
|
| 730 |
audio.pause();
|
| 731 |
audio.currentTime = 0;
|
| 732 |
+
if (index === currentSlide && audio.src) {{
|
| 733 |
+
audio.play().catch(e => console.error('Audio play failed:', e));
|
| 734 |
+
}}
|
| 735 |
}}
|
| 736 |
}});
|
| 737 |
}}
|
|
|
|
| 739 |
function prevSlide() {{
|
| 740 |
if (currentSlide > 0) {{
|
| 741 |
currentSlide--;
|
| 742 |
+
updateSlideDisplay();
|
| 743 |
}}
|
| 744 |
}}
|
| 745 |
|
| 746 |
function nextSlide() {{
|
| 747 |
if (currentSlide < totalSlides - 1) {{
|
| 748 |
currentSlide++;
|
| 749 |
+
updateSlideDisplay();
|
| 750 |
}}
|
| 751 |
}}
|
| 752 |
|
| 753 |
function playAll() {{
|
| 754 |
+
let index = currentSlide;
|
| 755 |
function playNext() {{
|
| 756 |
if (index >= totalSlides) return;
|
| 757 |
+
currentSlide = index;
|
| 758 |
+
updateSlideDisplay();
|
| 759 |
const audio = audioElements[index];
|
| 760 |
+
if (audio && audio.src) {{
|
| 761 |
audio.play().then(() => {{
|
| 762 |
audio.addEventListener('ended', () => {{
|
| 763 |
index++;
|
|
|
|
| 776 |
playNext();
|
| 777 |
}}
|
| 778 |
|
| 779 |
+
function toggleFullScreen() {{
|
| 780 |
+
const container = document.getElementById('lecture-container');
|
| 781 |
+
if (!document.fullscreenElement) {{
|
| 782 |
+
container.requestFullscreen().catch(err => {{
|
| 783 |
+
console.error(`Error attempting to enable full-screen mode: ${{err.message}}`);
|
| 784 |
+
}});
|
| 785 |
+
}} else {{
|
| 786 |
+
document.exitFullscreen();
|
| 787 |
+
}}
|
| 788 |
+
}}
|
| 789 |
+
|
| 790 |
// Attach event listeners
|
| 791 |
document.getElementById('prev-btn').addEventListener('click', prevSlide);
|
| 792 |
document.getElementById('play-btn').addEventListener('click', playAll);
|
| 793 |
document.getElementById('next-btn').addEventListener('click', nextSlide);
|
| 794 |
+
document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
|
| 795 |
|
| 796 |
// Initialize first slide
|
| 797 |
+
updateSlideDisplay();
|
| 798 |
</script>
|
| 799 |
"""
|
|
|
|
| 800 |
yield (
|
| 801 |
+
html_controls,
|
| 802 |
+
txt_file_paths,
|
| 803 |
+
markdown_slides[0],
|
| 804 |
+
[]
|
| 805 |
)
|
| 806 |
+
|
| 807 |
+
# Audio generation
|
| 808 |
+
audio_files = []
|
| 809 |
+
validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
|
| 810 |
+
if not validated_speaker_wav:
|
| 811 |
+
logger.error("Invalid speaker audio after conversion, skipping TTS")
|
| 812 |
+
yield (
|
| 813 |
+
html_controls,
|
| 814 |
+
txt_file_paths,
|
| 815 |
+
markdown_slides[0],
|
| 816 |
+
[]
|
| 817 |
+
)
|
| 818 |
+
return
|
| 819 |
+
|
| 820 |
+
for i, script in enumerate(scripts):
|
| 821 |
+
cleaned_script = clean_script_text(script)
|
| 822 |
+
audio_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}.mp3")
|
| 823 |
+
script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_script.txt")
|
| 824 |
+
|
| 825 |
+
try:
|
| 826 |
+
with open(script_file, "w", encoding="utf-8") as f:
|
| 827 |
+
f.write(cleaned_script or "")
|
| 828 |
+
logger.info("Saved script to %s: %s", script_file, cleaned_script)
|
| 829 |
+
except Exception as e:
|
| 830 |
+
logger.error("Error saving script to %s: %s", script_file, str(e))
|
| 831 |
+
|
| 832 |
+
if not cleaned_script:
|
| 833 |
+
logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
|
| 834 |
+
audio_files.append(None)
|
| 835 |
+
audio_urls[i] = None
|
| 836 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10
|
| 837 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
| 838 |
+
yield (
|
| 839 |
+
html_controls,
|
| 840 |
+
txt_file_paths,
|
| 841 |
+
markdown_slides[currentSlide if 'currentSlide' in locals() else 0],
|
| 842 |
+
[]
|
| 843 |
+
)
|
| 844 |
+
await asyncio.sleep(0.1)
|
| 845 |
+
continue
|
| 846 |
+
|
| 847 |
+
max_audio_retries = 2
|
| 848 |
+
for attempt in range(max_audio_retries + 1):
|
| 849 |
+
try:
|
| 850 |
+
current_text = cleaned_script
|
| 851 |
+
if attempt > 0:
|
| 852 |
+
sentences = re.split(r"[.!?]+", cleaned_script)
|
| 853 |
+
sentences = [s.strip() for s in sentences if s.strip()][:2]
|
| 854 |
+
current_text = ". ".join(sentences) + "."
|
| 855 |
+
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
| 856 |
+
|
| 857 |
+
success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
|
| 858 |
+
if not success:
|
| 859 |
+
raise RuntimeError("TTS generation failed")
|
| 860 |
+
|
| 861 |
+
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
| 862 |
+
audio_files.append(audio_file)
|
| 863 |
+
audio_urls[i] = f"/gradio_api/file={audio_file}"
|
| 864 |
+
# Update the audio element's src
|
| 865 |
+
audio_timeline = ""
|
| 866 |
+
for j, url in enumerate(audio_urls):
|
| 867 |
+
if url:
|
| 868 |
+
audio_timeline += f'<audio id="audio-{j+1}" controls src="{url}" style="display: inline-block; margin: 0 10px; width: 200px;"></audio>'
|
| 869 |
+
else:
|
| 870 |
+
audio_timeline += f'<audio id="audio-{j+1}" controls style="display: inline-block; margin: 0 10px; width: 200px;"><source src="" type="audio/mpeg"></audio>'
|
| 871 |
+
html_controls = f"""
|
| 872 |
+
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
|
| 873 |
+
<div style="padding: 20px; text-align: center;">
|
| 874 |
+
<div id="audio-timeline" style="display: flex; justify-content: center; margin-bottom: 10px;">
|
| 875 |
+
{audio_timeline}
|
| 876 |
+
</div>
|
| 877 |
+
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
| 878 |
+
<button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
|
| 879 |
+
<button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
|
| 880 |
+
<button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
|
| 881 |
+
<button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">🖥️</button>
|
| 882 |
+
</div>
|
| 883 |
+
</div>
|
| 884 |
+
</div>
|
| 885 |
+
<script>
|
| 886 |
+
const lectureData = {json.dumps({"slides": markdown_slides, "audioFiles": audio_urls})};
|
| 887 |
+
let currentSlide = {currentSlide if 'currentSlide' in locals() else 0};
|
| 888 |
+
const totalSlides = lectureData.slides.length;
|
| 889 |
+
let audioElements = [];
|
| 890 |
+
|
| 891 |
+
// Populate audio elements
|
| 892 |
+
for (let i = 0; i < totalSlides; i++) {{
|
| 893 |
+
const audio = document.getElementById(`audio-${{i+1}}`);
|
| 894 |
+
audioElements.push(audio);
|
| 895 |
+
}}
|
| 896 |
+
|
| 897 |
+
function updateSlideDisplay() {{
|
| 898 |
+
window.updateSlideContent(lectureData.slides[currentSlide]);
|
| 899 |
+
audioElements.forEach((audio, index) => {{
|
| 900 |
+
if (audio && audio.pause) {{
|
| 901 |
+
audio.pause();
|
| 902 |
+
audio.currentTime = 0;
|
| 903 |
+
if (index === currentSlide && audio.src) {{
|
| 904 |
+
audio.play().catch(e => console.error('Audio play failed:', e));
|
| 905 |
+
}}
|
| 906 |
+
}}
|
| 907 |
+
}});
|
| 908 |
+
}}
|
| 909 |
+
|
| 910 |
+
function prevSlide() {{
|
| 911 |
+
if (currentSlide > 0) {{
|
| 912 |
+
currentSlide--;
|
| 913 |
+
updateSlideDisplay();
|
| 914 |
+
}}
|
| 915 |
+
}}
|
| 916 |
+
|
| 917 |
+
function nextSlide() {{
|
| 918 |
+
if (currentSlide < totalSlides - 1) {{
|
| 919 |
+
currentSlide++;
|
| 920 |
+
updateSlideDisplay();
|
| 921 |
+
}}
|
| 922 |
+
}}
|
| 923 |
+
|
| 924 |
+
function playAll() {{
|
| 925 |
+
let index = currentSlide;
|
| 926 |
+
function playNext() {{
|
| 927 |
+
if (index >= totalSlides) return;
|
| 928 |
+
currentSlide = index;
|
| 929 |
+
updateSlideDisplay();
|
| 930 |
+
const audio = audioElements[index];
|
| 931 |
+
if (audio && audio.src) {{
|
| 932 |
+
audio.play().then(() => {{
|
| 933 |
+
audio.addEventListener('ended', () => {{
|
| 934 |
+
index++;
|
| 935 |
+
playNext();
|
| 936 |
+
}}, {{ once: true }});
|
| 937 |
+
}}).catch(e => {{
|
| 938 |
+
console.error('Audio play failed:', e);
|
| 939 |
+
index++;
|
| 940 |
+
playNext();
|
| 941 |
+
}});
|
| 942 |
+
}} else {{
|
| 943 |
+
index++;
|
| 944 |
+
playNext();
|
| 945 |
+
}}
|
| 946 |
+
}}
|
| 947 |
+
playNext();
|
| 948 |
+
}}
|
| 949 |
+
|
| 950 |
+
function toggleFullScreen() {{
|
| 951 |
+
const container = document.getElementById('lecture-container');
|
| 952 |
+
if (!document.fullscreenElement) {{
|
| 953 |
+
container.requestFullscreen().catch(err => {{
|
| 954 |
+
console.error(`Error attempting to enable full-screen mode: ${{err.message}}`);
|
| 955 |
+
}});
|
| 956 |
+
}} else {{
|
| 957 |
+
document.exitFullscreen();
|
| 958 |
+
}}
|
| 959 |
+
}}
|
| 960 |
+
|
| 961 |
+
// Attach event listeners
|
| 962 |
+
document.getElementById('prev-btn').addEventListener('click', prevSlide);
|
| 963 |
+
document.getElementById('play-btn').addEventListener('click', playAll);
|
| 964 |
+
document.getElementById('next-btn').addEventListener('click', nextSlide);
|
| 965 |
+
document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
|
| 966 |
+
|
| 967 |
+
// Initialize first slide
|
| 968 |
+
updateSlideDisplay();
|
| 969 |
+
</script>
|
| 970 |
+
"""
|
| 971 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10
|
| 972 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
| 973 |
+
yield (
|
| 974 |
+
html_controls,
|
| 975 |
+
txt_file_paths,
|
| 976 |
+
markdown_slides[currentSlide if 'currentSlide' in locals() else 0],
|
| 977 |
+
[]
|
| 978 |
+
)
|
| 979 |
+
await asyncio.sleep(0.1)
|
| 980 |
+
break
|
| 981 |
+
except Exception as e:
|
| 982 |
+
logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
|
| 983 |
+
if attempt == max_audio_retries:
|
| 984 |
+
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
| 985 |
+
audio_files.append(None)
|
| 986 |
+
audio_urls[i] = None
|
| 987 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10
|
| 988 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
| 989 |
+
yield (
|
| 990 |
+
html_controls,
|
| 991 |
+
txt_file_paths,
|
| 992 |
+
markdown_slides[currentSlide if 'currentSlide' in locals() else 0],
|
| 993 |
+
[]
|
| 994 |
+
)
|
| 995 |
+
await asyncio.sleep(0.1)
|
| 996 |
+
break
|
| 997 |
+
|
| 998 |
+
logger.info("Lecture generation completed successfully")
|
| 999 |
|
| 1000 |
except Exception as e:
|
| 1001 |
logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
|
|
|
|
| 1007 |
<p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
|
| 1008 |
</div>
|
| 1009 |
""",
|
| 1010 |
+
[],
|
| 1011 |
+
"",
|
| 1012 |
[]
|
| 1013 |
)
|
| 1014 |
return
|
|
|
|
| 1044 |
<p style="margin-top: 10px; font-size: 16px;">Please Generate lecture content via the form on the left first before lecture begins</p>
|
| 1045 |
</div>
|
| 1046 |
"""
|
| 1047 |
+
slide_display = gr.Markdown(label="Lecture Slides", value="Waiting for lecture content...")
|
| 1048 |
+
controls_display = gr.HTML(label="Controls", value=default_slide_html)
|
| 1049 |
file_output = gr.File(label="Download Generated Files")
|
| 1050 |
|
| 1051 |
speaker_audio.change(
|
|
|
|
| 1054 |
outputs=speaker_audio
|
| 1055 |
)
|
| 1056 |
|
| 1057 |
+
# JavaScript to update slide content dynamically
|
| 1058 |
+
demo.load(
|
| 1059 |
+
fn=None,
|
| 1060 |
+
inputs=None,
|
| 1061 |
+
outputs=None,
|
| 1062 |
+
_js="""
|
| 1063 |
+
() => {
|
| 1064 |
+
window.updateSlideContent = (content) => {
|
| 1065 |
+
document.querySelector('#slide-display textarea').value = content;
|
| 1066 |
+
document.querySelector('#slide-display').dispatchEvent(new Event('input'));
|
| 1067 |
+
};
|
| 1068 |
+
}
|
| 1069 |
+
"""
|
| 1070 |
+
)
|
| 1071 |
+
|
| 1072 |
generate_btn.click(
|
| 1073 |
fn=on_generate,
|
| 1074 |
inputs=[api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides],
|
| 1075 |
+
outputs=[controls_display, file_output, slide_display, gr.State()]
|
| 1076 |
)
|
| 1077 |
|
| 1078 |
if __name__ == "__main__":
|
| 1079 |
+
demo.launch(allowed_paths=[OUTPUT_DIR])
|