Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import gradio as gr
|
|
| 5 |
import asyncio
|
| 6 |
import logging
|
| 7 |
import torch
|
|
|
|
| 8 |
from serpapi import GoogleSearch
|
| 9 |
from pydantic import BaseModel
|
| 10 |
from autogen_agentchat.agents import AssistantAgent
|
|
@@ -105,27 +106,36 @@ def get_model_client(service, api_key):
|
|
| 105 |
else:
|
| 106 |
raise ValueError("Invalid service")
|
| 107 |
|
| 108 |
-
# Helper function to clean script text
|
| 109 |
def clean_script_text(script):
|
| 110 |
if not script or not isinstance(script, str):
|
| 111 |
logger.error("Invalid script input: %s", script)
|
| 112 |
return None
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
script = re.sub(r"\
|
| 116 |
-
script = re.sub(r"
|
| 117 |
-
script = re.sub(r"
|
| 118 |
-
script = script.replace("humanlike", "human
|
| 119 |
-
script = re.sub(r"\s+", " ", script).strip()
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
return None
|
|
|
|
|
|
|
| 129 |
return script
|
| 130 |
|
| 131 |
# Helper function to validate and convert speaker audio (MP3 or WAV)
|
|
@@ -344,11 +354,12 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
|
|
| 344 |
model_client=model_client,
|
| 345 |
handoffs=["script_agent"],
|
| 346 |
system_message=f"""
|
| 347 |
-
You are a Slide Agent. Using the research from the conversation history and the number of content slides ({num_slides})
|
| 348 |
-
Example output:
|
| 349 |
```json
|
| 350 |
[
|
| 351 |
{{"title": "Slide 1", "content": "Content for slide 1"}},
|
|
|
|
| 352 |
{{"title": "Quiz", "content": "Quiz questions"}},
|
| 353 |
{{"title": "Assignment", "content": "Assignment details"}},
|
| 354 |
{{"title": "Thank You", "content": "Thank you message"}}
|
|
@@ -362,14 +373,14 @@ Example output:
|
|
| 362 |
model_client=model_client,
|
| 363 |
handoffs=["feynman_agent"],
|
| 364 |
system_message=f"""
|
| 365 |
-
You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each
|
| 366 |
Example for 1 content slide:
|
| 367 |
```json
|
| 368 |
[
|
| 369 |
-
"
|
| 370 |
-
"
|
| 371 |
-
"
|
| 372 |
-
"
|
| 373 |
]
|
| 374 |
```""",
|
| 375 |
output_content_type=None,
|
|
@@ -379,8 +390,10 @@ Example for 1 content slide:
|
|
| 379 |
name="feynman_agent",
|
| 380 |
model_client=model_client,
|
| 381 |
handoffs=[],
|
| 382 |
-
system_message="
|
| 383 |
-
|
|
|
|
|
|
|
| 384 |
|
| 385 |
swarm = Swarm(
|
| 386 |
participants=[research_agent, slide_agent, script_agent, feynman_agent],
|
|
@@ -521,7 +534,7 @@ Example for 1 content slide:
|
|
| 521 |
|
| 522 |
elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
|
| 523 |
logger.info("Feynman Agent completed lecture review: %s", message.content)
|
| 524 |
-
progress =
|
| 525 |
label = "Lecture materials ready. Generating audio..."
|
| 526 |
yield html_with_progress(label, progress)
|
| 527 |
await asyncio.sleep(0.1)
|
|
@@ -544,6 +557,17 @@ Example for 1 content slide:
|
|
| 544 |
"""
|
| 545 |
return
|
| 546 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 547 |
if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
|
| 548 |
logger.error("Scripts are not a list of strings: %s", scripts)
|
| 549 |
yield f"""
|
|
@@ -554,7 +578,7 @@ Example for 1 content slide:
|
|
| 554 |
"""
|
| 555 |
return
|
| 556 |
|
| 557 |
-
if len(
|
| 558 |
logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
|
| 559 |
yield f"""
|
| 560 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
|
@@ -598,6 +622,11 @@ Example for 1 content slide:
|
|
| 598 |
if not cleaned_script:
|
| 599 |
logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
|
| 600 |
audio_files.append(None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 601 |
continue
|
| 602 |
|
| 603 |
max_retries = 2
|
|
@@ -616,11 +645,9 @@ Example for 1 content slide:
|
|
| 616 |
|
| 617 |
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
| 618 |
audio_files.append(audio_file)
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
</div>
|
| 623 |
-
"""
|
| 624 |
await asyncio.sleep(0.1)
|
| 625 |
break
|
| 626 |
except Exception as e:
|
|
@@ -628,6 +655,10 @@ Example for 1 content slide:
|
|
| 628 |
if attempt == max_retries:
|
| 629 |
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
| 630 |
audio_files.append(None)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 631 |
break
|
| 632 |
|
| 633 |
audio_files = [f"file://{os.path.abspath(f)}" if f else None for f in audio_files]
|
|
|
|
| 5 |
import asyncio
|
| 6 |
import logging
|
| 7 |
import torch
|
| 8 |
+
import random
|
| 9 |
from serpapi import GoogleSearch
|
| 10 |
from pydantic import BaseModel
|
| 11 |
from autogen_agentchat.agents import AssistantAgent
|
|
|
|
| 106 |
else:
|
| 107 |
raise ValueError("Invalid service")
|
| 108 |
|
| 109 |
+
# Helper function to clean script text and make it natural
|
| 110 |
def clean_script_text(script):
|
| 111 |
if not script or not isinstance(script, str):
|
| 112 |
logger.error("Invalid script input: %s", script)
|
| 113 |
return None
|
| 114 |
+
|
| 115 |
+
# Minimal cleaning to preserve natural language
|
| 116 |
+
script = re.sub(r"\*\*Slide \d+:.*?\*\*", "", script) # Remove slide headers
|
| 117 |
+
script = re.sub(r"\[.*?\]", "", script) # Remove bracketed content
|
| 118 |
+
script = re.sub(r"Title:.*?\n|Content:.*?\n", "", script) # Remove metadata
|
| 119 |
+
script = script.replace("humanlike", "human-like").replace("problemsolving", "problem-solving")
|
| 120 |
+
script = re.sub(r"\s+", " ", script).strip() # Normalize whitespace
|
| 121 |
+
|
| 122 |
+
# Convert bullet points to spoken cues
|
| 123 |
+
script = re.sub(r"^\s*-\s*", "So, ", script, flags=re.MULTILINE)
|
| 124 |
+
|
| 125 |
+
# Add non-verbal words randomly (e.g., "um," "you know," "like")
|
| 126 |
+
non_verbal = ["um, ", "you know, ", "like, "]
|
| 127 |
+
words = script.split()
|
| 128 |
+
for i in range(len(words) - 1, -1, -1):
|
| 129 |
+
if random.random() < 0.1: # 10% chance per word
|
| 130 |
+
words.insert(i, random.choice(non_verbal))
|
| 131 |
+
script = " ".join(words)
|
| 132 |
+
|
| 133 |
+
# Basic validation
|
| 134 |
+
if len(script) < 10:
|
| 135 |
+
logger.error("Cleaned script too short (%d characters): %s", len(script), script)
|
| 136 |
return None
|
| 137 |
+
|
| 138 |
+
logger.info("Cleaned and naturalized script: %s", script)
|
| 139 |
return script
|
| 140 |
|
| 141 |
# Helper function to validate and convert speaker audio (MP3 or WAV)
|
|
|
|
| 354 |
model_client=model_client,
|
| 355 |
handoffs=["script_agent"],
|
| 356 |
system_message=f"""
|
| 357 |
+
You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({num_slides}), generate exactly {num_slides} content slides, plus one quiz slide, one assignment slide, and one thank-you slide, for a total of {num_slides + 3} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and contains exactly {num_slides + 3} slides before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
|
| 358 |
+
Example output for 2 content slides:
|
| 359 |
```json
|
| 360 |
[
|
| 361 |
{{"title": "Slide 1", "content": "Content for slide 1"}},
|
| 362 |
+
{{"title": "Slide 2", "content": "Content for slide 2"}},
|
| 363 |
{{"title": "Quiz", "content": "Quiz questions"}},
|
| 364 |
{{"title": "Assignment", "content": "Assignment details"}},
|
| 365 |
{{"title": "Thank You", "content": "Thank you message"}}
|
|
|
|
| 373 |
model_client=model_client,
|
| 374 |
handoffs=["feynman_agent"],
|
| 375 |
system_message=f"""
|
| 376 |
+
You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
|
| 377 |
Example for 1 content slide:
|
| 378 |
```json
|
| 379 |
[
|
| 380 |
+
"So, this slide, um, covers the main topic in a fun way.",
|
| 381 |
+
"Alright, you know, answer these quiz questions.",
|
| 382 |
+
"Here's your, like, assignment to complete.",
|
| 383 |
+
"Thanks for, um, attending today!"
|
| 384 |
]
|
| 385 |
```""",
|
| 386 |
output_content_type=None,
|
|
|
|
| 390 |
name="feynman_agent",
|
| 391 |
model_client=model_client,
|
| 392 |
handoffs=[],
|
| 393 |
+
system_message=f"""
|
| 394 |
+
You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {num_slides + 3} slides and {num_slides + 3} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({num_slides + 3}), report the issue clearly. Use 'TERMINATE' to signal completion.
|
| 395 |
+
Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
|
| 396 |
+
""")
|
| 397 |
|
| 398 |
swarm = Swarm(
|
| 399 |
participants=[research_agent, slide_agent, script_agent, feynman_agent],
|
|
|
|
| 534 |
|
| 535 |
elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
|
| 536 |
logger.info("Feynman Agent completed lecture review: %s", message.content)
|
| 537 |
+
progress = 90 # Set to 90% before audio generation
|
| 538 |
label = "Lecture materials ready. Generating audio..."
|
| 539 |
yield html_with_progress(label, progress)
|
| 540 |
await asyncio.sleep(0.1)
|
|
|
|
| 557 |
"""
|
| 558 |
return
|
| 559 |
|
| 560 |
+
expected_slide_count = num_slides + 3
|
| 561 |
+
if len(slides) != expected_slide_count:
|
| 562 |
+
logger.error("Expected %d slides (including %d content slides + 3), but received %d", expected_slide_count, num_slides, len(slides))
|
| 563 |
+
yield f"""
|
| 564 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
| 565 |
+
<h2 style="color: #d9534f;">Incorrect number of slides</h2>
|
| 566 |
+
<p style="margin-top: 20px;">Expected {expected_slide_count} slides ({num_slides} content slides + quiz, assignment, thank-you), but generated {len(slides)}. Please try again.</p>
|
| 567 |
+
</div>
|
| 568 |
+
"""
|
| 569 |
+
return
|
| 570 |
+
|
| 571 |
if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
|
| 572 |
logger.error("Scripts are not a list of strings: %s", scripts)
|
| 573 |
yield f"""
|
|
|
|
| 578 |
"""
|
| 579 |
return
|
| 580 |
|
| 581 |
+
if len(scripts) != expected_slide_count:
|
| 582 |
logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
|
| 583 |
yield f"""
|
| 584 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
|
|
|
| 622 |
if not cleaned_script:
|
| 623 |
logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
|
| 624 |
audio_files.append(None)
|
| 625 |
+
# Update progress (even for skipped slides)
|
| 626 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
|
| 627 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
| 628 |
+
yield html_with_progress(label, progress)
|
| 629 |
+
await asyncio.sleep(0.1)
|
| 630 |
continue
|
| 631 |
|
| 632 |
max_retries = 2
|
|
|
|
| 645 |
|
| 646 |
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
| 647 |
audio_files.append(audio_file)
|
| 648 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
|
| 649 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
| 650 |
+
yield html_with_progress(label, progress)
|
|
|
|
|
|
|
| 651 |
await asyncio.sleep(0.1)
|
| 652 |
break
|
| 653 |
except Exception as e:
|
|
|
|
| 655 |
if attempt == max_retries:
|
| 656 |
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
| 657 |
audio_files.append(None)
|
| 658 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
|
| 659 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
| 660 |
+
yield html_with_progress(label, progress)
|
| 661 |
+
await asyncio.sleep(0.1)
|
| 662 |
break
|
| 663 |
|
| 664 |
audio_files = [f"file://{os.path.abspath(f)}" if f else None for f in audio_files]
|