Spaces:

Jaward
/

Professor-AI-Feynman

Running

App Files Files Community

Jaward commited on May 1

Commit

e368dbe

verified ·

1 Parent(s): 6de00c6

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -32

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import gradio as gr
 import asyncio
 import logging
 import torch
 from serpapi import GoogleSearch
 from pydantic import BaseModel
 from autogen_agentchat.agents import AssistantAgent
@@ -105,27 +106,36 @@ def get_model_client(service, api_key):
     else:
         raise ValueError("Invalid service")
-# Helper function to clean script text
 def clean_script_text(script):
     if not script or not isinstance(script, str):
         logger.error("Invalid script input: %s", script)
         return None
-    script = re.sub(r"\*\*Slide \d+:.*?\*\*", "", script)
-    script = re.sub(r"\*\*|\*|\s*-\s*", "", script)
-    script = re.sub(r"\[.*?\]", "", script)
-    script = re.sub(r"Title:.*?\n|Content:.*?\n", "", script)
-    script = re.sub(r"-", " ", script)
-    script = script.replace("humanlike", "human like").replace("problemsolving", "problem solving")
-    script = re.sub(r"\s+", " ", script).strip()
-    script = re.sub(r"[^\w\s.,!?']", "", script)
-    if len(script) < 10 or len(script) > 500:
-        logger.error("Cleaned script length invalid (%d characters): %s", len(script), script)
-        return None
-    sentences = re.split(r"[.!?]+", script)
-    sentences = [s.strip() for s in sentences if s.strip()]
-    if len(sentences) < 1 or len(sentences) > 5:
-        logger.error("Cleaned script has invalid sentence count (%d): %s", len(sentences), script)
         return None
     return script
 # Helper function to validate and convert speaker audio (MP3 or WAV)
@@ -344,11 +354,12 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
         model_client=model_client,
         handoffs=["script_agent"],
         system_message=f"""
-You are a Slide Agent. Using the research from the conversation history and the number of content slides ({num_slides}) specified in the initial message, generate exactly {num_slides} content slides, then add a quiz slide, an assignment slide, and a thank-you slide. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and complete before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
-Example output:
 ```json
 [
     {{"title": "Slide 1", "content": "Content for slide 1"}},
     {{"title": "Quiz", "content": "Quiz questions"}},
     {{"title": "Assignment", "content": "Assignment details"}},
     {{"title": "Thank You", "content": "Thank you message"}}
@@ -362,14 +373,14 @@ Example output:
         model_client=model_client,
         handoffs=["feynman_agent"],
         system_message=f"""
-You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each slide, summarizing its content. Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
 Example for 1 content slide:
 ```json
 [
-    "This slide covers the main topic.",
-    "Answer these quiz questions.",
-    "Complete this assignment.",
-    "Thank you for attending."
 ]
 ```""",
         output_content_type=None,
@@ -379,8 +390,10 @@ Example for 1 content slide:
         name="feynman_agent",
         model_client=model_client,
         handoffs=[],
-        system_message="You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence and completeness. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing or invalid, report the issue clearly. Use 'TERMINATE' to signal completion. Example: 'Received 5 slides and 5 scripts. Lecture is coherent. TERMINATE'"
-    )
     swarm = Swarm(
         participants=[research_agent, slide_agent, script_agent, feynman_agent],
@@ -521,7 +534,7 @@ Example for 1 content slide:
             elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
                 logger.info("Feynman Agent completed lecture review: %s", message.content)
-                progress = 100
                 label = "Lecture materials ready. Generating audio..."
                 yield html_with_progress(label, progress)
                 await asyncio.sleep(0.1)
@@ -544,6 +557,17 @@ Example for 1 content slide:
             """
             return
         if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
             logger.error("Scripts are not a list of strings: %s", scripts)
             yield f"""
@@ -554,7 +578,7 @@ Example for 1 content slide:
             """
             return
-        if len(slides) != len(scripts):
             logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
             yield f"""
             <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
@@ -598,6 +622,11 @@ Example for 1 content slide:
             if not cleaned_script:
                 logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
                 audio_files.append(None)
                 continue
             max_retries = 2
@@ -616,11 +645,9 @@ Example for 1 content slide:
                     logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
                     audio_files.append(audio_file)
-                    yield f"""
-                    <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
-                        <h2 style="font-style: italic; color: #555;">Generated audio for slide {i + 1}/{len(scripts)}...</h2>
-                    </div>
-                    """
                     await asyncio.sleep(0.1)
                     break
                 except Exception as e:
@@ -628,6 +655,10 @@ Example for 1 content slide:
                     if attempt == max_retries:
                         logger.error("Max retries reached for slide %d, skipping", i + 1)
                         audio_files.append(None)
                         break
         audio_files = [f"file://{os.path.abspath(f)}" if f else None for f in audio_files]

 import asyncio
 import logging
 import torch
+import random
 from serpapi import GoogleSearch
 from pydantic import BaseModel
 from autogen_agentchat.agents import AssistantAgent
     else:
         raise ValueError("Invalid service")
+# Helper function to clean script text and make it natural
 def clean_script_text(script):
     if not script or not isinstance(script, str):
         logger.error("Invalid script input: %s", script)
         return None
+    # Minimal cleaning to preserve natural language
+    script = re.sub(r"\*\*Slide \d+:.*?\*\*", "", script)  # Remove slide headers
+    script = re.sub(r"\[.*?\]", "", script)  # Remove bracketed content
+    script = re.sub(r"Title:.*?\n|Content:.*?\n", "", script)  # Remove metadata
+    script = script.replace("humanlike", "human-like").replace("problemsolving", "problem-solving")
+    script = re.sub(r"\s+", " ", script).strip()  # Normalize whitespace
+    # Convert bullet points to spoken cues
+    script = re.sub(r"^\s*-\s*", "So, ", script, flags=re.MULTILINE)
+    # Add non-verbal words randomly (e.g., "um," "you know," "like")
+    non_verbal = ["um, ", "you know, ", "like, "]
+    words = script.split()
+    for i in range(len(words) - 1, -1, -1):
+        if random.random() < 0.1:  # 10% chance per word
+            words.insert(i, random.choice(non_verbal))
+    script = " ".join(words)
+    # Basic validation
+    if len(script) < 10:
+        logger.error("Cleaned script too short (%d characters): %s", len(script), script)
         return None
+    logger.info("Cleaned and naturalized script: %s", script)
     return script
 # Helper function to validate and convert speaker audio (MP3 or WAV)
         model_client=model_client,
         handoffs=["script_agent"],
         system_message=f"""
+You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({num_slides}), generate exactly {num_slides} content slides, plus one quiz slide, one assignment slide, and one thank-you slide, for a total of {num_slides + 3} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and contains exactly {num_slides + 3} slides before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
+Example output for 2 content slides:
 ```json
 [
     {{"title": "Slide 1", "content": "Content for slide 1"}},
+    {{"title": "Slide 2", "content": "Content for slide 2"}},
     {{"title": "Quiz", "content": "Quiz questions"}},
     {{"title": "Assignment", "content": "Assignment details"}},
     {{"title": "Thank You", "content": "Thank you message"}}
         model_client=model_client,
         handoffs=["feynman_agent"],
         system_message=f"""
+You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
 Example for 1 content slide:
 ```json
 [
+    "So, this slide, um, covers the main topic in a fun way.",
+    "Alright, you know, answer these quiz questions.",
+    "Here's your, like, assignment to complete.",
+    "Thanks for, um, attending today!"
 ]
 ```""",
         output_content_type=None,
         name="feynman_agent",
         model_client=model_client,
         handoffs=[],
+        system_message=f"""
+You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {num_slides + 3} slides and {num_slides + 3} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({num_slides + 3}), report the issue clearly. Use 'TERMINATE' to signal completion.
+Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
+""")
     swarm = Swarm(
         participants=[research_agent, slide_agent, script_agent, feynman_agent],
             elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
                 logger.info("Feynman Agent completed lecture review: %s", message.content)
+                progress = 90  # Set to 90% before audio generation
                 label = "Lecture materials ready. Generating audio..."
                 yield html_with_progress(label, progress)
                 await asyncio.sleep(0.1)
             """
             return
+        expected_slide_count = num_slides + 3
+        if len(slides) != expected_slide_count:
+            logger.error("Expected %d slides (including %d content slides + 3), but received %d", expected_slide_count, num_slides, len(slides))
+            yield f"""
+            <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
+                <h2 style="color: #d9534f;">Incorrect number of slides</h2>
+                <p style="margin-top: 20px;">Expected {expected_slide_count} slides ({num_slides} content slides + quiz, assignment, thank-you), but generated {len(slides)}. Please try again.</p>
+            </div>
+            """
+            return
         if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
             logger.error("Scripts are not a list of strings: %s", scripts)
             yield f"""
             """
             return
+        if len(scripts) != expected_slide_count:
             logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
             yield f"""
             <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
             if not cleaned_script:
                 logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
                 audio_files.append(None)
+                # Update progress (even for skipped slides)
+                progress = 90 + ((i + 1) / len(scripts)) * 10  # Progress from 90% to 100%
+                label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
+                yield html_with_progress(label, progress)
+                await asyncio.sleep(0.1)
                 continue
             max_retries = 2
                     logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
                     audio_files.append(audio_file)
+                    progress = 90 + ((i + 1) / len(scripts)) * 10  # Progress from 90% to 100%
+                    label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
+                    yield html_with_progress(label, progress)
                     await asyncio.sleep(0.1)
                     break
                 except Exception as e:
                     if attempt == max_retries:
                         logger.error("Max retries reached for slide %d, skipping", i + 1)
                         audio_files.append(None)
+                        progress = 90 + ((i + 1) / len(scripts)) * 10  # Progress from 90% to 100%
+                        label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
+                        yield html_with_progress(label, progress)
+                        await asyncio.sleep(0.1)
                         break
         audio_files = [f"file://{os.path.abspath(f)}" if f else None for f in audio_files]