Jaward commited on
Commit
4401cfd
·
verified ·
1 Parent(s): eacbde1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1046 -107
app.py CHANGED
@@ -1,5 +1,6 @@
1
- # Professor AI Feynman: A Multi-Agent Tool for Learning Anything the Feynman way
2
- # Jaward Sesay - Microsoft AI Agent Hackathon Submission April 2025
 
3
  import os
4
  import json
5
  import re
@@ -28,6 +29,19 @@ import tempfile
28
  from pydub import AudioSegment
29
  from TTS.api import TTS
30
  import markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # Set up logging
33
  logging.basicConfig(
@@ -42,8 +56,11 @@ logger = logging.getLogger(__name__)
42
 
43
  # Set up environment
44
  OUTPUT_DIR = os.path.join(os.getcwd(), "outputs")
 
45
  os.makedirs(OUTPUT_DIR, exist_ok=True)
 
46
  logger.info(f"Using output directory: {OUTPUT_DIR}")
 
47
  os.environ["COQUI_TOS_AGREED"] = "1"
48
 
49
  # Initialize TTS model
@@ -104,7 +121,7 @@ def render_md_to_html(md_content: str) -> str:
104
  return "<div>Error rendering content</div>"
105
 
106
  # Slide tool for generating HTML slides used by slide_agent
107
- def create_slides(slides: list[dict], title: str, output_dir: str = OUTPUT_DIR) -> list[str]:
108
  try:
109
  html_files = []
110
  template_file = os.path.join(os.getcwd(), "slide_template.html")
@@ -122,7 +139,7 @@ def create_slides(slides: list[dict], title: str, output_dir: str = OUTPUT_DIR)
122
  slide_html = slide_html.replace("section title", f"{slide['title']}")
123
  slide_html = slide_html.replace("Lecture title", title)
124
  slide_html = slide_html.replace("<!--CONTENT-->", html_content)
125
- slide_html = slide_html.replace("speaker name", "Prof. AI Feynman")
126
  slide_html = slide_html.replace("date", date)
127
 
128
  html_file = os.path.join(output_dir, f"slide_{slide_number}.html")
@@ -205,7 +222,7 @@ def clean_script_text(script):
205
  async def validate_and_convert_speaker_audio(speaker_audio):
206
  if not speaker_audio or not os.path.exists(speaker_audio):
207
  logger.warning("Speaker audio file does not exist: %s. Using default voice.", speaker_audio)
208
- default_voice = os.path.join(os.path.dirname(__file__), "feynman.mp3")
209
  if os.path.exists(default_voice):
210
  speaker_audio = default_voice
211
  else:
@@ -390,6 +407,13 @@ def get_gradio_file_url(local_path):
390
  # Async generate lecture materials and audio
391
  async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, lecture_style, speaker_audio, num_slides):
392
  model_client = get_model_client(api_service, api_key)
 
 
 
 
 
 
 
393
 
394
  if os.path.exists(OUTPUT_DIR):
395
  try:
@@ -425,7 +449,7 @@ async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_
425
  system_message=f"""
426
  You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({content_slides}), generate exactly {content_slides} content slides, plus an Introduction slide as the first slide and a Closing slide as the last slide, making a total of {total_slides} slides.
427
 
428
- - The Introduction slide (first slide) should have the title "{title}" and content containing only the lecture title, speaker name (Prof. AI Feynman), and date {date}, centered, in plain text.
429
  - The Closing slide (last slide) should have the title "Closing" and content containing only "The End\nThank you", centered, in plain text.
430
  - The remaining {content_slides} slides should be content slides based on the lecture description, audience type, and lecture style ({lecture_style}), with meaningful titles and content in valid Markdown format. Adapt the content to the lecture style to suit diverse learners:
431
  - Feynman: Explains complex ideas with simplicity, clarity, and enthusiasm, emulating Richard Feynman's teaching style.
@@ -439,7 +463,7 @@ Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each
439
  Example output for 1 content slide (total 3 slides):
440
  ```json
441
  [
442
- {{"title": "Introduction to AI Basics", "content": "AI Basics\nProf. AI Feynman\nMay 2nd, 2025"}},
443
  {{"title": "What is AI?", "content": "# What is AI?\n- Definition: Systems that mimic human intelligence\n- Key areas: ML, NLP, Robotics"}},
444
  {{"title": "Closing", "content": "The End\nThank you"}}
445
  ]
@@ -451,22 +475,16 @@ Example output for 1 content slide (total 3 slides):
451
  script_agent = AssistantAgent(
452
  name="script_agent",
453
  model_client=model_client,
454
- handoffs=["feynman_agent"],
455
  system_message=f"""
456
- You are a Script Agent modeled after Richard Feynman. Access the JSON array of {total_slides} slides from the conversation history, which includes an Introduction slide, {content_slides} content slides, and a Closing slide. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a clear, academically inclined tone, with humor as Professor Feynman would deliver it. Ensure the lecture is engaging, covers the fundamental requirements of the topic, and aligns with the lecture style ({lecture_style}) to suit diverse learners:
457
- - Feynman: Explains complex ideas with simplicity, clarity, and enthusiasm, emulating Richard Feynman's teaching style.
458
- - Socratic: Poses thought-provoking questions to guide learners to insights without requiring direct interaction.
459
- - Narrative: Use storytelling or analogies to explain concepts.
460
- - Analytical: Focus on data, equations, or logical breakdowns.
461
- - Humorous: Infuses wit and light-hearted anecdotes to make content engaging and memorable.
462
- - Reflective: Encourages introspection with a calm, contemplative tone to deepen understanding.
463
 
464
- Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
465
 
466
  Example for 3 slides (1 content slide):
467
  ```json
468
  [
469
- "Welcome to the lecture on AI Basics. I am Professor AI Feynman, and today we will explore the fundamentals of artificial intelligence.",
470
  "Let us begin by defining artificial intelligence: it refers to systems that mimic human intelligence, spanning key areas such as machine learning, natural language processing, and robotics.",
471
  "That concludes our lecture on AI Basics. Thank you for your attention, and I hope you found this session insightful."
472
  ]
@@ -474,17 +492,72 @@ Example for 3 slides (1 content slide):
474
  output_content_type=None,
475
  reflect_on_tool_use=False
476
  )
477
- feynman_agent = AssistantAgent(
478
- name="feynman_agent",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
  model_client=model_client,
480
  handoffs=[],
481
- system_message=f"""
482
- You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {total_slides} slides and {total_slides} scripts are received, including the Introduction and Closing slides. Verify that HTML slide files exist in the outputs directory and align with the lecture style ({lecture_style}). Output a confirmation message summarizing the number of slides, scripts, and HTML files status. If slides, scripts, or HTML files are missing, invalid, or do not match the expected count ({total_slides}), report the issue clearly. Use 'TERMINATE' to signal completion.
483
- Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files. Lecture is coherent and aligns with {lecture_style} style. TERMINATE'
484
- """)
485
 
486
  swarm = Swarm(
487
- participants=[research_agent, slide_agent, script_agent, feynman_agent],
488
  termination_condition=HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
489
  )
490
 
@@ -568,7 +641,7 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
568
  []
569
  )
570
  await asyncio.sleep(0.1)
571
- elif source == "script_agent" and message.target == "feynman_agent":
572
  if scripts is None:
573
  logger.warning("Script Agent handoff without scripts JSON")
574
  extracted_json = extract_json_from_message(message)
@@ -610,8 +683,8 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
610
  )
611
  task_result.messages.append(retry_message)
612
  continue
613
- # Generate HTML slides
614
- html_files = create_slides(slides, title)
615
  if not html_files:
616
  logger.error("Failed to generate HTML slides")
617
  progress = 50
@@ -668,8 +741,8 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
668
  task_result.messages.append(retry_message)
669
  continue
670
 
671
- elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
672
- logger.info("Feynman Agent completed lecture review: %s", message.content)
673
  progress = 90
674
  label = "Lecture materials ready. Generating lecture speech..."
675
  file_paths = [f for f in os.listdir(OUTPUT_DIR) if f.endswith(('.md', '.txt'))]
@@ -754,12 +827,13 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
754
  logger.error("Invalid speaker audio after conversion, skipping TTS")
755
  yield (
756
  f"""
757
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
758
- <h2 style="color: #d9534f;">Invalid speaker audio</h2>
759
- <p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
760
  </div>
761
  """,
762
- []
 
763
  )
764
  return
765
 
@@ -784,7 +858,8 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
784
  label = f"Generating lecture speech for slide {i + 1}/{len(scripts)}..."
785
  yield (
786
  html_with_progress(label, progress),
787
- file_paths
 
788
  )
789
  await asyncio.sleep(0.1)
790
  continue
@@ -811,7 +886,8 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
811
  file_paths.append(audio_file)
812
  yield (
813
  html_with_progress(label, progress),
814
- file_paths
 
815
  )
816
  await asyncio.sleep(0.1)
817
  break
@@ -825,7 +901,8 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
825
  label = f"Generating speech for slide {i + 1}/{len(scripts)}..."
826
  yield (
827
  html_with_progress(label, progress),
828
- file_paths
 
829
  )
830
  await asyncio.sleep(0.1)
831
  break
@@ -854,18 +931,29 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
854
  {audio_timeline}
855
  </div>
856
  <div style="display: center; justify-content: center; margin-bottom: 10px;">
857
- <button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: lightgrey"><i class="fas fa-step-backward" style="color: #000"></i></button>
858
- <button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: lightgrey"><i class="fas fa-play" style="color: #000"></i></button>
859
- <button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: lightgrey"><i class="fas fa-step-forward" style="color: #000"></i></button>
860
- <button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: lightgrey"><i style="color: #000" class="fas fa-expand"></i></button>
 
861
  </div>
862
  </div>
863
  </div>
864
  """
865
  logger.info("Yielding final lecture materials after audio generation")
 
 
 
 
 
 
 
 
 
866
  yield (
867
  html_output,
868
- file_paths
 
869
  )
870
 
871
  logger.info("Lecture generation completed successfully")
@@ -880,13 +968,15 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
880
  <p style="margin-top: 20px;">Please try again</p>
881
  </div>
882
  """,
883
- []
 
884
  )
885
  return
886
 
887
  # custom js for lecture container features
888
  js_code = """
889
  () => {
 
890
  function waitForElement(selector, callback, maxAttempts = 50, interval = 100) {
891
  let attempts = 0;
892
  const intervalId = setInterval(() => {
@@ -903,11 +993,11 @@ js_code = """
903
  }, interval);
904
  }
905
 
906
- // Main initit func
907
  function initializeSlides() {
908
  console.log("Initializing slides...");
909
 
910
- // await lecture-data to load JSON data
911
  waitForElement('#lecture-data', (dataElement) => {
912
  if (!dataElement.textContent) {
913
  console.error("Lecture data element is empty");
@@ -931,7 +1021,8 @@ js_code = """
931
  const totalSlides = lectureData.htmlFiles.length;
932
  let audioElements = [];
933
  let isPlaying = false;
934
- let hasNavigated = false; // Track if user has used prev/next buttons
 
935
 
936
  // Wait for slide-content element
937
  waitForElement('#slide-content', (slideContent) => {
@@ -963,15 +1054,38 @@ js_code = """
963
  if (body) {
964
  const textLength = body.textContent.length;
965
  const screenWidth = window.innerWidth;
966
- // Base font size: 12px max on large screens, scale down to 8px on small screens
967
- let baseFontSize = Math.min(12, Math.max(12, 16 * (screenWidth / 1920))); // Scale with screen width (1920px as reference)
968
- // Adjust inversely with content length
969
- const adjustedFontSize = Math.max(12, baseFontSize * (1000 / (textLength + 100))); // Minimum 8px, scale down with length
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
970
  const elements = body.getElementsByTagName('*');
971
  for (let elem of elements) {
972
  elem.style.fontSize = `${adjustedFontSize}px`;
973
  }
974
- console.log(`Adjusted font size to ${adjustedFontSize}px for ${textLength} characters on ${screenWidth}px width`);
 
975
  }
976
  };
977
  });
@@ -995,7 +1109,7 @@ js_code = """
995
  if (audio && audio.pause) {
996
  audio.pause();
997
  audio.currentTime = 0;
998
- audio.style.border = 'none'; // Reset border
999
  console.log("Paused and reset audio:", audio.id);
1000
  }
1001
  });
@@ -1021,7 +1135,7 @@ js_code = """
1021
 
1022
  function prevSlide() {
1023
  console.log("Previous button clicked, current slide:", currentSlide);
1024
- hasNavigated = true; // User has navigated
1025
  if (currentSlide > 0) {
1026
  currentSlide--;
1027
  updateSlide(() => {
@@ -1039,7 +1153,7 @@ js_code = """
1039
 
1040
  function nextSlide() {
1041
  console.log("Next button clicked, current slide:", currentSlide);
1042
- hasNavigated = true; // User has navigated
1043
  if (currentSlide < totalSlides - 1) {
1044
  currentSlide++;
1045
  updateSlide(() => {
@@ -1063,13 +1177,13 @@ js_code = """
1063
  return;
1064
  }
1065
  const playIcon = playBtn.querySelector('i');
1066
- if (playIcon.className.includes('fa-pause')) {
 
1067
  // Pause playback
1068
  isPlaying = false;
1069
  audioElements.forEach(audio => {
1070
  if (audio && audio.pause) {
1071
  audio.pause();
1072
- audio.currentTime = 0;
1073
  audio.style.border = 'none';
1074
  console.log("Paused audio:", audio.id);
1075
  }
@@ -1077,14 +1191,16 @@ js_code = """
1077
  playIcon.className = 'fas fa-play';
1078
  return;
1079
  }
 
1080
  // Start playback
1081
- currentSlide = 0;
1082
- let index = 0;
1083
  isPlaying = true;
1084
  playIcon.className = 'fas fa-pause';
 
 
 
1085
  updateSlide(() => {
1086
  function playNext() {
1087
- if (index >= totalSlides || !isPlaying) {
1088
  isPlaying = false;
1089
  playIcon.className = 'fas fa-play';
1090
  audioElements.forEach(audio => {
@@ -1093,72 +1209,64 @@ js_code = """
1093
  console.log("Finished playing all slides or paused");
1094
  return;
1095
  }
1096
- currentSlide = index;
 
1097
  updateSlide(() => {
1098
- const audio = audioElements[index];
1099
  if (audio && audio.play) {
1100
- // Highlight the current audio element
1101
  audioElements.forEach(a => a.style.border = 'none');
1102
  audio.style.border = '5px solid #16cd16';
1103
  audio.style.borderRadius = '30px';
1104
- console.log(`Attempting to play audio for slide ${index + 1}`);
 
1105
  audio.play().then(() => {
1106
- console.log(`Playing audio for slide ${index + 1}`);
1107
- // Remove any existing ended listeners to prevent duplicates
1108
  audio.onended = null;
1109
  audio.addEventListener('ended', () => {
1110
- console.log(`Audio ended for slide ${index + 1}`);
1111
- index++;
1112
- playNext();
 
 
1113
  }, { once: true });
1114
- // Fallback: Check if audio is stuck (e.g., duration not advancing)
1115
  const checkDuration = setInterval(() => {
1116
  if (!isPlaying) {
1117
  clearInterval(checkDuration);
1118
  return;
1119
  }
1120
  if (audio.duration && audio.currentTime >= audio.duration - 0.1) {
1121
- console.log(`Fallback: Audio for slide ${index + 1} considered ended`);
1122
  clearInterval(checkDuration);
1123
- audio.onended = null; // Prevent duplicate triggers
1124
- index++;
1125
  playNext();
1126
  }
1127
  }, 1000);
1128
  }).catch(e => {
1129
- console.error(`Audio play failed for slide ${index + 1}:`, e);
1130
- // Retry playing the same slide after a short delay
1131
  setTimeout(() => {
1132
- audio.play().then(() => {
1133
- console.log(`Retry succeeded for slide ${index + 1}`);
1134
- audio.onended = null;
1135
- audio.addEventListener('ended', () => {
1136
- console.log(`Audio ended for slide ${index + 1}`);
1137
- index++;
 
 
 
 
 
 
 
 
1138
  playNext();
1139
- }, { once: true });
1140
- const checkDuration = setInterval(() => {
1141
- if (!isPlaying) {
1142
- clearInterval(checkDuration);
1143
- return;
1144
- }
1145
- if (audio.duration && audio.currentTime >= audio.duration - 0.1) {
1146
- console.log(`Fallback: Audio for slide ${index + 1} considered ended`);
1147
- clearInterval(checkDuration);
1148
- audio.onended = null;
1149
- index++;
1150
- playNext();
1151
- }
1152
- }, 1000);
1153
- }).catch(e => {
1154
- console.error(`Retry failed for slide ${index + 1}:`, e);
1155
- index++; // Move to next slide if retry fails
1156
- playNext();
1157
- });
1158
  }, 500);
1159
  });
1160
  } else {
1161
- index++;
1162
  playNext();
1163
  }
1164
  });
@@ -1220,7 +1328,7 @@ js_code = """
1220
  const lectureContainer = document.getElementById('lecture-container');
1221
  if (lectureContainer) {
1222
  console.log("Lecture container detected in DOM");
1223
- observer.disconnect(); // Stop observing once found
1224
  initializeSlides();
1225
  }
1226
  }
@@ -1231,27 +1339,183 @@ js_code = """
1231
  }
1232
  """
1233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1234
  # Gradio interface
1235
  with gr.Blocks(
1236
- title="Agent Feynman",
1237
  css="""
1238
- h1 {text-align: center; color: white !important;}
 
1239
  #lecture-container {font-family: 'Times New Roman', Times, serif;}
1240
  #slide-content {font-size: 48px; line-height: 1.2;}
1241
  #form-group {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; color: #000; background-color: white;}
1242
  #download {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px;}
 
1243
  #slide-display {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; background-color: white;}
1244
- .gradio-container { background: linear-gradient(135deg, #2a2a2a, #6a6a6a); box-shadow: 0 0 2rem rgba(255, 255, 255, 0.14);padding-top: 30px;}
1245
- .gradio-container-5-29-0 .prose :last-child {color: #fff !important; }
1246
  button {transition: background-color 0.3s;}
1247
  button:hover {background-color: #e0e0e0;}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1248
  """,
1249
  js=js_code,
1250
  head='<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">'
1251
  ) as demo:
1252
  gr.Markdown("""
1253
- # <center>Professor AI Feynman: A Multi-Agent Tool for Learning Anything the Feynman way.</center>""")
 
 
1254
  with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
1255
  with gr.Column(scale=1):
1256
  with gr.Group(elem_id="form-group"):
1257
  title = gr.Textbox(label="Lecture Title", placeholder="e.g. Introduction to AI")
@@ -1276,17 +1540,451 @@ with gr.Blocks(
1276
  api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama or Azure AI Foundry (use GITHUB_TOKEN env var)")
1277
  serpapi_key = gr.Textbox(label="SerpApi Key (For Research Agent)", type="password", placeholder="Enter your SerpApi key (optional)")
1278
  num_slides = gr.Slider(1, 20, step=1, label="Number of Lecture Slides (will add intro and closing slides)", value=3)
1279
- speaker_audio = gr.Audio(value="feynman.mp3", label="Speaker sample speech (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
 
 
 
 
 
 
1280
  generate_btn = gr.Button("Generate Lecture")
 
 
1281
  with gr.Column(scale=2):
1282
  default_slide_html = """
1283
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 30px; box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important;">
1284
  <h2 style="font-style: italic; color: #000 !important;">Waiting for lecture content...</h2>
1285
- <p style="margin-top: 10px; font-size: 16px;color: #000">Please Generate lecture content via the form on the left first before lecture begins</p>
 
 
 
 
 
 
 
 
1286
  </div>
1287
  """
1288
  slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html, elem_id="slide-display")
 
1289
  file_output = gr.File(label="Download Lecture Materials", elem_id="download")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1290
 
1291
  speaker_audio.change(
1292
  fn=update_audio_preview,
@@ -1300,5 +1998,246 @@ with gr.Blocks(
1300
  outputs=[slide_display, file_output]
1301
  )
1302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1303
  if __name__ == "__main__":
1304
  demo.launch(allowed_paths=[OUTPUT_DIR])
 
1
+ # Lectūra Research Demo: A Multi-Agent Tool for Self-taught Mastery.
2
+ # Author: Jaward Sesay
3
+ # License: All rights reserved.
4
  import os
5
  import json
6
  import re
 
29
  from pydub import AudioSegment
30
  from TTS.api import TTS
31
  import markdown
32
+ import PyPDF2
33
+ import io
34
+ import copy
35
+
36
+ def get_instructor_name(speaker):
37
+ instructor_names = {
38
+ "feynman.mp3": "Professor Richard Feynman",
39
+ "einstein.mp3": "Professor Albert Einstein",
40
+ "samantha.mp3": "Professor Samantha",
41
+ "socrates.mp3": "Professor Socrates",
42
+ "professor_lectura_male.mp3": "Professor Lectūra"
43
+ }
44
+ return instructor_names.get(speaker, "Professor Lectūra")
45
 
46
  # Set up logging
47
  logging.basicConfig(
 
56
 
57
  # Set up environment
58
  OUTPUT_DIR = os.path.join(os.getcwd(), "outputs")
59
+ UPLOAD_DIR = os.path.join(os.getcwd(), "uploads")
60
  os.makedirs(OUTPUT_DIR, exist_ok=True)
61
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
62
  logger.info(f"Using output directory: {OUTPUT_DIR}")
63
+ logger.info(f"Using upload directory: {UPLOAD_DIR}")
64
  os.environ["COQUI_TOS_AGREED"] = "1"
65
 
66
  # Initialize TTS model
 
121
  return "<div>Error rendering content</div>"
122
 
123
  # Slide tool for generating HTML slides used by slide_agent
124
+ def create_slides(slides: list[dict], title: str, instructor_name: str, output_dir: str = OUTPUT_DIR) -> list[str]:
125
  try:
126
  html_files = []
127
  template_file = os.path.join(os.getcwd(), "slide_template.html")
 
139
  slide_html = slide_html.replace("section title", f"{slide['title']}")
140
  slide_html = slide_html.replace("Lecture title", title)
141
  slide_html = slide_html.replace("<!--CONTENT-->", html_content)
142
+ slide_html = slide_html.replace("speaker name", instructor_name)
143
  slide_html = slide_html.replace("date", date)
144
 
145
  html_file = os.path.join(output_dir, f"slide_{slide_number}.html")
 
222
  async def validate_and_convert_speaker_audio(speaker_audio):
223
  if not speaker_audio or not os.path.exists(speaker_audio):
224
  logger.warning("Speaker audio file does not exist: %s. Using default voice.", speaker_audio)
225
+ default_voice = os.path.join(os.path.dirname(__file__), "professor_lectura_male.mp3")
226
  if os.path.exists(default_voice):
227
  speaker_audio = default_voice
228
  else:
 
407
  # Async generate lecture materials and audio
408
  async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, lecture_style, speaker_audio, num_slides):
409
  model_client = get_model_client(api_service, api_key)
410
+
411
+ # Get the speaker from the speaker_audio path
412
+ speaker = os.path.basename(speaker_audio) if speaker_audio else "professor_lectura_male.mp3"
413
+ logger.info(f"Selected speaker file: {speaker}")
414
+
415
+ instructor_name = get_instructor_name(speaker)
416
+ logger.info(f"Using instructor: {instructor_name}")
417
 
418
  if os.path.exists(OUTPUT_DIR):
419
  try:
 
449
  system_message=f"""
450
  You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({content_slides}), generate exactly {content_slides} content slides, plus an Introduction slide as the first slide and a Closing slide as the last slide, making a total of {total_slides} slides.
451
 
452
+ - The Introduction slide (first slide) should have the title "{title}" and content containing only the lecture title, speaker name ({get_instructor_name(speaker_audio)}), and date {date}, centered, in plain text.
453
  - The Closing slide (last slide) should have the title "Closing" and content containing only "The End\nThank you", centered, in plain text.
454
  - The remaining {content_slides} slides should be content slides based on the lecture description, audience type, and lecture style ({lecture_style}), with meaningful titles and content in valid Markdown format. Adapt the content to the lecture style to suit diverse learners:
455
  - Feynman: Explains complex ideas with simplicity, clarity, and enthusiasm, emulating Richard Feynman's teaching style.
 
463
  Example output for 1 content slide (total 3 slides):
464
  ```json
465
  [
466
+ {{"title": "Introduction to AI Basics", "content": "AI Basics\n{get_instructor_name(speaker_audio)}\n{date}"}},
467
  {{"title": "What is AI?", "content": "# What is AI?\n- Definition: Systems that mimic human intelligence\n- Key areas: ML, NLP, Robotics"}},
468
  {{"title": "Closing", "content": "The End\nThank you"}}
469
  ]
 
475
  script_agent = AssistantAgent(
476
  name="script_agent",
477
  model_client=model_client,
478
+ handoffs=["instructor_agent"],
479
  system_message=f"""
480
+ You are a Script Agent. Access the JSON array of {total_slides} slides from the conversation history, which includes an Introduction slide, {content_slides} content slides, and a Closing slide. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a clear, academically inclined tone. Ensure the lecture is engaging, covers the fundamental requirements of the topic, and aligns with the lecture style ({lecture_style}) to suit diverse learners. The lecture will be delivered by {instructor_name}.
 
 
 
 
 
 
481
 
482
+ Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_instructor_agent tool. If scripts cannot be generated, retry once.
483
 
484
  Example for 3 slides (1 content slide):
485
  ```json
486
  [
487
+ "Welcome to the lecture on AI Basics. I am {instructor_name}, and today we will explore the fundamentals of artificial intelligence.",
488
  "Let us begin by defining artificial intelligence: it refers to systems that mimic human intelligence, spanning key areas such as machine learning, natural language processing, and robotics.",
489
  "That concludes our lecture on AI Basics. Thank you for your attention, and I hope you found this session insightful."
490
  ]
 
492
  output_content_type=None,
493
  reflect_on_tool_use=False
494
  )
495
+
496
+ def get_instructor_prompt(speaker, lecture_style):
497
+ base_prompts = {
498
+ "feynman.mp3": f"You are {instructor_name}, known for your ability to explain complex concepts with remarkable clarity and enthusiasm. Your teaching style is characterized by:",
499
+ "einstein.mp3": f"You are {instructor_name}, known for your profound insights and ability to connect abstract concepts to the physical world. Your teaching style is characterized by:",
500
+ "samantha.mp3": f"You are {instructor_name}, known for your engaging and accessible approach to teaching. Your teaching style is characterized by:",
501
+ "socrates.mp3": f"You are {instructor_name}, known for your method of questioning and guiding students to discover knowledge themselves. Your teaching style is characterized by:",
502
+ "professor_lectura_male.mp3": f"You are {instructor_name}, known for your clear and authoritative teaching style. Your teaching style is characterized by:"
503
+ }
504
+
505
+ style_characteristics = {
506
+ "Feynman - Simplifies complex ideas with enthusiasm": """
507
+ - Breaking down complex ideas into simple, understandable parts
508
+ - Using analogies and real-world examples
509
+ - Maintaining enthusiasm and curiosity throughout
510
+ - Encouraging critical thinking and questioning
511
+ - Making abstract concepts tangible and relatable""",
512
+
513
+ "Socratic - Guides insights with probing questions": """
514
+ - Using thought-provoking questions to guide understanding
515
+ - Encouraging self-discovery and critical thinking
516
+ - Challenging assumptions and exploring implications
517
+ - Building knowledge through dialogue and inquiry
518
+ - Fostering intellectual curiosity and reflection""",
519
+
520
+ "Inspirational - Sparks enthusiasm with visionary ideas": """
521
+ - Connecting concepts to broader implications and possibilities
522
+ - Using motivational language and visionary thinking
523
+ - Inspiring curiosity and wonder about the subject
524
+ - Highlighting the transformative potential of knowledge
525
+ - Encouraging students to think beyond conventional boundaries""",
526
+
527
+ "Reflective - Promotes introspection with a calm tone": """
528
+ - Creating a contemplative learning environment
529
+ - Encouraging deep thinking and personal connection
530
+ - Using a calm, measured delivery
531
+ - Promoting self-reflection and understanding
532
+ - Building connections between concepts and personal experience""",
533
+
534
+ "Humorous - Uses wit and anecdotes for engaging content": """
535
+ - Incorporating relevant humor and anecdotes
536
+ - Making learning enjoyable and memorable
537
+ - Using wit to highlight key concepts
538
+ - Creating an engaging and relaxed atmosphere
539
+ - Balancing entertainment with educational value"""
540
+ }
541
+
542
+ base_prompt = base_prompts.get(speaker, base_prompts["feynman.mp3"])
543
+ style_prompt = style_characteristics.get(lecture_style, style_characteristics["Feynman - Simplifies complex ideas with enthusiasm"])
544
+
545
+ return f"""{base_prompt}
546
+ {style_prompt}
547
+
548
+ Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {total_slides} slides and {total_slides} scripts are received, including the Introduction and Closing slides. Verify that HTML slide files exist in the outputs directory and align with the lecture style ({lecture_style}). Output a confirmation message summarizing the number of slides, scripts, and HTML files status. If slides, scripts, or HTML files are missing, invalid, or do not match the expected count ({total_slides}), report the issue clearly. Use 'TERMINATE' to signal completion.
549
+ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files. Lecture is coherent and aligns with {lecture_style} style. TERMINATE'
550
+ """
551
+
552
+ instructor_agent = AssistantAgent(
553
+ name="instructor_agent",
554
  model_client=model_client,
555
  handoffs=[],
556
+ system_message=get_instructor_prompt(speaker_audio, lecture_style)
557
+ )
 
 
558
 
559
  swarm = Swarm(
560
+ participants=[research_agent, slide_agent, script_agent, instructor_agent],
561
  termination_condition=HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
562
  )
563
 
 
641
  []
642
  )
643
  await asyncio.sleep(0.1)
644
+ elif source == "script_agent" and message.target == "instructor_agent":
645
  if scripts is None:
646
  logger.warning("Script Agent handoff without scripts JSON")
647
  extracted_json = extract_json_from_message(message)
 
683
  )
684
  task_result.messages.append(retry_message)
685
  continue
686
+ # Generate HTML slides with instructor name
687
+ html_files = create_slides(slides, title, instructor_name)
688
  if not html_files:
689
  logger.error("Failed to generate HTML slides")
690
  progress = 50
 
741
  task_result.messages.append(retry_message)
742
  continue
743
 
744
+ elif source == "instructor_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
745
+ logger.info("Instructor Agent completed lecture review: %s", message.content)
746
  progress = 90
747
  label = "Lecture materials ready. Generating lecture speech..."
748
  file_paths = [f for f in os.listdir(OUTPUT_DIR) if f.endswith(('.md', '.txt'))]
 
827
  logger.error("Invalid speaker audio after conversion, skipping TTS")
828
  yield (
829
  f"""
830
+ <div style=\"display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;\">
831
+ <h2 style=\"color: #d9534f;\">Invalid speaker audio</h2>
832
+ <p style=\"margin-top: 20px;\">Please upload a valid MP3 or WAV audio file and try again.</p>
833
  </div>
834
  """,
835
+ [],
836
+ None
837
  )
838
  return
839
 
 
858
  label = f"Generating lecture speech for slide {i + 1}/{len(scripts)}..."
859
  yield (
860
  html_with_progress(label, progress),
861
+ file_paths,
862
+ None
863
  )
864
  await asyncio.sleep(0.1)
865
  continue
 
886
  file_paths.append(audio_file)
887
  yield (
888
  html_with_progress(label, progress),
889
+ file_paths,
890
+ None
891
  )
892
  await asyncio.sleep(0.1)
893
  break
 
901
  label = f"Generating speech for slide {i + 1}/{len(scripts)}..."
902
  yield (
903
  html_with_progress(label, progress),
904
+ file_paths,
905
+ None
906
  )
907
  await asyncio.sleep(0.1)
908
  break
 
931
  {audio_timeline}
932
  </div>
933
  <div style="display: center; justify-content: center; margin-bottom: 10px;">
934
+ <button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: black"><i class="fas fa-step-backward" style="color: #fff !important"></i></button>
935
+ <button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: black"><i class="fas fa-play" style="color: #fff !important"></i></button>
936
+ <button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: black"><i class="fas fa-step-forward" style="color: #fff !important"></i></button>
937
+ <button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: black"><i style="color: #fff !important" class="fas fa-expand"></i></button>
938
+ <button id="clear-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: black"><i style="color: #fff !important" class="fas fa-paint-brush"></i></button>
939
  </div>
940
  </div>
941
  </div>
942
  """
943
  logger.info("Yielding final lecture materials after audio generation")
944
+ # --- YIELD LECTURE CONTEXT FOR AGENTS ---
945
+ lecture_context = {
946
+ "slides": slides,
947
+ "scripts": scripts,
948
+ "title": title,
949
+ "description": lecture_content_description,
950
+ "style": lecture_style,
951
+ "audience": lecture_type
952
+ }
953
  yield (
954
  html_output,
955
+ file_paths,
956
+ lecture_context
957
  )
958
 
959
  logger.info("Lecture generation completed successfully")
 
968
  <p style="margin-top: 20px;">Please try again</p>
969
  </div>
970
  """,
971
+ [],
972
+ None
973
  )
974
  return
975
 
976
  # custom js for lecture container features
977
  js_code = """
978
  () => {
979
+ // Function to wait for an element to appear in the DOM
980
  function waitForElement(selector, callback, maxAttempts = 50, interval = 100) {
981
  let attempts = 0;
982
  const intervalId = setInterval(() => {
 
993
  }, interval);
994
  }
995
 
996
+ // Main initialization function
997
  function initializeSlides() {
998
  console.log("Initializing slides...");
999
 
1000
+ // Wait for lecture-data to load the JSON data
1001
  waitForElement('#lecture-data', (dataElement) => {
1002
  if (!dataElement.textContent) {
1003
  console.error("Lecture data element is empty");
 
1021
  const totalSlides = lectureData.htmlFiles.length;
1022
  let audioElements = [];
1023
  let isPlaying = false;
1024
+ let hasNavigated = false;
1025
+ let currentAudioIndex = 0;
1026
 
1027
  // Wait for slide-content element
1028
  waitForElement('#slide-content', (slideContent) => {
 
1054
  if (body) {
1055
  const textLength = body.textContent.length;
1056
  const screenWidth = window.innerWidth;
1057
+ const screenHeight = window.innerHeight;
1058
+
1059
+ // Base font size calculation
1060
+ let baseFontSize;
1061
+ if (screenWidth >= 1920) {
1062
+ baseFontSize = 20; // Large screens
1063
+ } else if (screenWidth >= 1366) {
1064
+ baseFontSize = 18; // Medium screens
1065
+ } else {
1066
+ baseFontSize = 16; // Small screens
1067
+ }
1068
+
1069
+ // Adjust based on content length
1070
+ let adjustedFontSize;
1071
+ if (textLength > 1000) {
1072
+ adjustedFontSize = baseFontSize * 0.8; // Reduce for long content
1073
+ } else if (textLength > 500) {
1074
+ adjustedFontSize = baseFontSize * 0.9; // Slightly reduce for medium content
1075
+ } else {
1076
+ adjustedFontSize = baseFontSize; // Keep base size for short content
1077
+ }
1078
+
1079
+ // Ensure minimum and maximum sizes
1080
+ adjustedFontSize = Math.max(14, Math.min(24, adjustedFontSize));
1081
+
1082
+ // Apply to all elements
1083
  const elements = body.getElementsByTagName('*');
1084
  for (let elem of elements) {
1085
  elem.style.fontSize = `${adjustedFontSize}px`;
1086
  }
1087
+
1088
+ console.log(`Adjusted font size to ${adjustedFontSize}px for ${textLength} characters on ${screenWidth}x${screenHeight} screen`);
1089
  }
1090
  };
1091
  });
 
1109
  if (audio && audio.pause) {
1110
  audio.pause();
1111
  audio.currentTime = 0;
1112
+ audio.style.border = 'none';
1113
  console.log("Paused and reset audio:", audio.id);
1114
  }
1115
  });
 
1135
 
1136
  function prevSlide() {
1137
  console.log("Previous button clicked, current slide:", currentSlide);
1138
+ hasNavigated = true;
1139
  if (currentSlide > 0) {
1140
  currentSlide--;
1141
  updateSlide(() => {
 
1153
 
1154
  function nextSlide() {
1155
  console.log("Next button clicked, current slide:", currentSlide);
1156
+ hasNavigated = true;
1157
  if (currentSlide < totalSlides - 1) {
1158
  currentSlide++;
1159
  updateSlide(() => {
 
1177
  return;
1178
  }
1179
  const playIcon = playBtn.querySelector('i');
1180
+
1181
+ if (isPlaying) {
1182
  // Pause playback
1183
  isPlaying = false;
1184
  audioElements.forEach(audio => {
1185
  if (audio && audio.pause) {
1186
  audio.pause();
 
1187
  audio.style.border = 'none';
1188
  console.log("Paused audio:", audio.id);
1189
  }
 
1191
  playIcon.className = 'fas fa-play';
1192
  return;
1193
  }
1194
+
1195
  // Start playback
 
 
1196
  isPlaying = true;
1197
  playIcon.className = 'fas fa-pause';
1198
+ currentSlide = 0;
1199
+ currentAudioIndex = 0;
1200
+
1201
  updateSlide(() => {
1202
  function playNext() {
1203
+ if (currentAudioIndex >= totalSlides || !isPlaying) {
1204
  isPlaying = false;
1205
  playIcon.className = 'fas fa-play';
1206
  audioElements.forEach(audio => {
 
1209
  console.log("Finished playing all slides or paused");
1210
  return;
1211
  }
1212
+
1213
+ currentSlide = currentAudioIndex;
1214
  updateSlide(() => {
1215
+ const audio = audioElements[currentAudioIndex];
1216
  if (audio && audio.play) {
 
1217
  audioElements.forEach(a => a.style.border = 'none');
1218
  audio.style.border = '5px solid #16cd16';
1219
  audio.style.borderRadius = '30px';
1220
+ console.log(`Attempting to play audio for slide ${currentAudioIndex + 1}`);
1221
+
1222
  audio.play().then(() => {
1223
+ console.log(`Playing audio for slide ${currentAudioIndex + 1}`);
 
1224
  audio.onended = null;
1225
  audio.addEventListener('ended', () => {
1226
+ if (isPlaying) {
1227
+ console.log(`Audio ended for slide ${currentAudioIndex + 1}`);
1228
+ currentAudioIndex++;
1229
+ playNext();
1230
+ }
1231
  }, { once: true });
1232
+
1233
  const checkDuration = setInterval(() => {
1234
  if (!isPlaying) {
1235
  clearInterval(checkDuration);
1236
  return;
1237
  }
1238
  if (audio.duration && audio.currentTime >= audio.duration - 0.1) {
1239
+ console.log(`Fallback: Audio for slide ${currentAudioIndex + 1} considered ended`);
1240
  clearInterval(checkDuration);
1241
+ audio.onended = null;
1242
+ currentAudioIndex++;
1243
  playNext();
1244
  }
1245
  }, 1000);
1246
  }).catch(e => {
1247
+ console.error(`Audio play failed for slide ${currentAudioIndex + 1}:`, e);
 
1248
  setTimeout(() => {
1249
+ if (isPlaying) {
1250
+ audio.play().then(() => {
1251
+ console.log(`Retry succeeded for slide ${currentAudioIndex + 1}`);
1252
+ audio.onended = null;
1253
+ audio.addEventListener('ended', () => {
1254
+ if (isPlaying) {
1255
+ console.log(`Audio ended for slide ${currentAudioIndex + 1}`);
1256
+ currentAudioIndex++;
1257
+ playNext();
1258
+ }
1259
+ }, { once: true });
1260
+ }).catch(e => {
1261
+ console.error(`Retry failed for slide ${currentAudioIndex + 1}:`, e);
1262
+ currentAudioIndex++;
1263
  playNext();
1264
+ });
1265
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1266
  }, 500);
1267
  });
1268
  } else {
1269
+ currentAudioIndex++;
1270
  playNext();
1271
  }
1272
  });
 
1328
  const lectureContainer = document.getElementById('lecture-container');
1329
  if (lectureContainer) {
1330
  console.log("Lecture container detected in DOM");
1331
+ observer.disconnect();
1332
  initializeSlides();
1333
  }
1334
  }
 
1339
  }
1340
  """
1341
 
1342
+ def process_uploaded_file(file):
1343
+ """Process uploaded file and extract text content."""
1344
+ try:
1345
+ # Determine if file is a NamedString (Gradio string-like object) or file-like object
1346
+ file_name = os.path.basename(file.name if hasattr(file, 'name') else str(file))
1347
+ file_path = os.path.join(UPLOAD_DIR, file_name)
1348
+
1349
+ # Get file extension
1350
+ _, ext = os.path.splitext(file_path)
1351
+ ext = ext.lower()
1352
+
1353
+ # Handle PDF files differently
1354
+ if ext == '.pdf':
1355
+ # For PDF files, write the raw bytes
1356
+ if hasattr(file, 'read'):
1357
+ with open(file_path, 'wb') as f:
1358
+ f.write(file.read())
1359
+ else:
1360
+ # If it's a file path, copy the file
1361
+ shutil.copy2(str(file), file_path)
1362
+
1363
+ # Process PDF file
1364
+ pdf_reader = PyPDF2.PdfReader(file_path)
1365
+ text = ""
1366
+ for page in pdf_reader.pages:
1367
+ text += page.extract_text() + "\n"
1368
+ logger.info("Extracted text from PDF: %s", file_path)
1369
+ return text
1370
+
1371
+ # Handle text files
1372
+ elif ext in ('.txt', '.md'):
1373
+ # Read content and save to UPLOAD_DIR
1374
+ if hasattr(file, 'read'): # File-like object
1375
+ content = file.read()
1376
+ if isinstance(content, bytes):
1377
+ content = content.decode('utf-8', errors='replace')
1378
+ with open(file_path, 'w', encoding='utf-8') as f:
1379
+ f.write(content)
1380
+ else: # NamedString or string-like
1381
+ # If it's a file path, read the file
1382
+ if os.path.exists(str(file)):
1383
+ with open(str(file), 'r', encoding='utf-8') as f:
1384
+ content = f.read()
1385
+ else:
1386
+ content = str(file)
1387
+ with open(file_path, 'w', encoding='utf-8') as f:
1388
+ f.write(content)
1389
+
1390
+ # Clean and return content
1391
+ cleaned_content = clean_script_text(content)
1392
+ logger.info("Cleaned content for %s: %s", file_path, cleaned_content[:100] + "..." if len(cleaned_content) > 100 else cleaned_content)
1393
+ return cleaned_content
1394
+ else:
1395
+ raise ValueError(f"Unsupported file format: {ext}")
1396
+ except Exception as e:
1397
+ logger.error(f"Error processing file {file_path}: {str(e)}")
1398
+ raise
1399
+
1400
+ async def study_mode_process(file, api_service, api_key):
1401
+ """Process uploaded file in study mode."""
1402
+ max_retries = 1
1403
+ for attempt in range(max_retries + 1):
1404
+ try:
1405
+ # Extract text from file
1406
+ content = process_uploaded_file(file)
1407
+ logger.info("Successfully extracted content from file: %s", file)
1408
+
1409
+ # Create study agent
1410
+ logger.info("Initializing model client for service: %s", api_service)
1411
+ model_client = get_model_client(api_service, api_key)
1412
+ logger.info("Model client initialized successfully")
1413
+
1414
+ study_agent = AssistantAgent(
1415
+ name="study_agent",
1416
+ model_client=model_client,
1417
+ system_message="""You are a Study Agent that analyzes lecture materials and generates appropriate inputs for the lecture generation system.
1418
+ Analyze the provided content and generate:
1419
+ 1. A concise title (max 10 words)
1420
+ 2. A brief content description (max 20 words)
1421
+
1422
+ Output the results in JSON format:
1423
+ {
1424
+ "title": "string",
1425
+ "content_description": "string"
1426
+ }"""
1427
+ )
1428
+
1429
+ # Process content with study agent
1430
+ logger.info("Running study agent with content length: %d", len(content))
1431
+ task_result = await Console(study_agent.run_stream(task=content))
1432
+ logger.info("Study agent execution completed")
1433
+
1434
+ for message in task_result.messages:
1435
+ extracted_json = extract_json_from_message(message)
1436
+ if extracted_json and isinstance(extracted_json, dict):
1437
+ if "title" in extracted_json and "content_description" in extracted_json:
1438
+ logger.info("Valid JSON output: %s", extracted_json)
1439
+ return extracted_json
1440
+ else:
1441
+ logger.warning("Incomplete JSON output: %s", extracted_json)
1442
+
1443
+ raise ValueError("No valid JSON output with title and content_description from study agent")
1444
+
1445
+ except Exception as e:
1446
+ logger.error("Attempt %d/%d failed: %s\n%s", attempt + 1, max_retries + 1, str(e), traceback.format_exc())
1447
+ if attempt == max_retries:
1448
+ raise Exception(f"Failed to process file after {max_retries + 1} attempts: {str(e)}")
1449
+ logger.info("Retrying study mode processing...")
1450
+ await asyncio.sleep(1) # Brief delay before retry
1451
+
1452
  # Gradio interface
1453
  with gr.Blocks(
1454
+ title="Lectūra AI",
1455
  css="""
1456
+ h1 {text-align: center; color: #fff !important;}
1457
+ .gradio-container-5-29-0 .prose :last-child {color: #fff !important; }
1458
  #lecture-container {font-family: 'Times New Roman', Times, serif;}
1459
  #slide-content {font-size: 48px; line-height: 1.2;}
1460
  #form-group {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; color: #000; background-color: white;}
1461
  #download {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px;}
1462
+ #uploaded-file {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px;}
1463
  #slide-display {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; background-color: white;}
1464
+ .gradio-container { background: #fff !important; box-shadow: 0 0 2rem rgba(255, 255, 255, 0.14);padding-top: 30px;}
 
1465
  button {transition: background-color 0.3s;}
1466
  button:hover {background-color: #e0e0e0;}
1467
+ .upload-area {border: 2px dashed #ccc; border-radius: 20px; padding: 40px; text-align: center; cursor: pointer; height: 100%; min-height: 700px; display: flex; flex-direction: column; justify-content: center; align-items: center;}
1468
+ .upload-area:hover {border-color: #16cd16;}
1469
+ .upload-area.dragover {border-color: #16cd16; background-color: rgba(22, 205, 22, 0.1);}
1470
+ .wrap.svelte-1kzox3m {justify-content: center;}
1471
+ #mode-tabs {border-radius: 30px !important;}
1472
+ #component-2 {border-radius: 30px; box-shadow: rgba(0, 0, 0, 0.14) 0px 0px 2rem !important; width: 290px;}
1473
+ #component-0 {align-items: center;justify-content: center;}
1474
+ #component-26 {box-shadow: rgba(0, 0, 0, 0.14) 0px 0px 2rem !important; border-radius: 30px; height: 970px !important; overflow: auto !important;}
1475
+ #right-column {padding: 10px !important; height: 100% !important; display: flex !important; flex-direction: column !important; gap: 20px !important;}
1476
+ #notes-section {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; background-color: white; padding: 20px; flex: 0 0 auto; display: flex; flex-direction: column; overflow: hidden;}
1477
+ #chat-section {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; background-color: white; padding: 20px; flex: 1; display: flex; flex-direction: column; overflow: hidden; min-height: 760px;}
1478
+ .note-button {width: 100%; border-radius: 15px; margin-bottom: 10px; padding: 10px; background-color: #f0f0f0; border: none; cursor: pointer; color: #000 !important}
1479
+ .note-button:hover {background-color: #e0e0e0;}
1480
+ .notes-list {flex: 1; overflow-y: auto; margin-top: 0px; min-height: 0;}
1481
+ .chat-input-container {display: flex; gap: 10px; margin-top: auto; padding-top: 20px;}
1482
+ .chat-input {flex-grow: 1; border-radius: 20px; padding: 10px 20px; border: 1px solid #ddd;background-color: rgb(240, 240, 240)}
1483
+ .send-button {border-radius: 20px; padding: 10px 25px; background-color: #16cd16; color: white; border: none; cursor: pointer;}
1484
+ .send-button:hover {background-color: #14b814;}
1485
+ .back-button {border-radius: 50%; width: 40px; height: 40px; background-color: #f0f0f0; border: none; cursor: pointer; display: flex; align-items: center; justify-content: center;}
1486
+ .back-button:hover {background-color: #e0e0e0;}
1487
+ .note-editor {display: none; width: 100%; height: 100%; min-height: 0;}
1488
+ .note-editor.active {display: flex; flex-direction: column;}
1489
+ .notes-view {display: flex; flex-direction: column; height: 100%; min-height: 0;}
1490
+ .notes-view.hidden {display: none;}
1491
+ .chat-messages {flex: 1; overflow-y: auto; margin-bottom: 20px; min-height: 0;}
1492
+ #study-guide-btn {margin-bottom: 0px !important}
1493
+ #component-26 {padding: 20px}
1494
+ .gradio-container-5-29-0 .prose :last-child {color: black !important;}
1495
+ #add-note-btn, #study-guide-btn, #quiz-btn, #send-btn{border-radius: 30px !important;}
1496
+ #chatbot {border-radius: 20px !important;}
1497
+ #chat-input-row {align-items: center !important;}
1498
  """,
1499
  js=js_code,
1500
  head='<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">'
1501
  ) as demo:
1502
  gr.Markdown("""
1503
+ # <center>Lectūra: Your AI Genie for Self-taught Mastery.</center>""")
1504
+
1505
+ # Add mode tabs
1506
  with gr.Row():
1507
+ with gr.Column(scale=1):
1508
+ with gr.Group(elem_id="mode-tabs"):
1509
+ mode_tabs = gr.Radio(
1510
+ choices=["Learn Mode", "Study Mode"],
1511
+ value="Learn Mode",
1512
+ label="Mode",
1513
+ elem_id="mode-tabs",
1514
+ show_label=False
1515
+ )
1516
+
1517
+ with gr.Row():
1518
+ # Left column (existing form)
1519
  with gr.Column(scale=1):
1520
  with gr.Group(elem_id="form-group"):
1521
  title = gr.Textbox(label="Lecture Title", placeholder="e.g. Introduction to AI")
 
1540
  api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama or Azure AI Foundry (use GITHUB_TOKEN env var)")
1541
  serpapi_key = gr.Textbox(label="SerpApi Key (For Research Agent)", type="password", placeholder="Enter your SerpApi key (optional)")
1542
  num_slides = gr.Slider(1, 20, step=1, label="Number of Lecture Slides (will add intro and closing slides)", value=3)
1543
+ speaker_select = gr.Dropdown(
1544
+ choices=["feynman.mp3", "einstein.mp3", "samantha.mp3", "socrates.mp3", "professor_lectura_male.mp3"],
1545
+ value="professor_lectura_male.mp3",
1546
+ label="Select Instructor",
1547
+ elem_id="speaker-select"
1548
+ )
1549
+ speaker_audio = gr.Audio(value="professor_lectura_male.mp3", label="Speaker sample speech (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
1550
  generate_btn = gr.Button("Generate Lecture")
1551
+
1552
+ # Middle column (existing slide display)
1553
  with gr.Column(scale=2):
1554
  default_slide_html = """
1555
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 30px; box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important;">
1556
  <h2 style="font-style: italic; color: #000 !important;">Waiting for lecture content...</h2>
1557
+ <p style="margin-top: 10px; font-size: 16px;color: #000 !important">Please Generate lecture content via the form on the left first before lecture begins</p>
1558
+ </div>
1559
+ """
1560
+
1561
+ # Study mode upload area
1562
+ study_mode_html = """
1563
+ <div class="upload-area" id="upload-area">
1564
+ <h2 style="margin-top: 20px; color: #000;">Please upload lecture material by clicking the upload button below</h2>
1565
+ <p style="color: #666;">(only supports .pdf, .txt and .md)</p>
1566
  </div>
1567
  """
1568
  slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html, elem_id="slide-display")
1569
+ uploaded_file = gr.File(label="Upload Lecture Material", visible=False, elem_id="uploaded-file")
1570
  file_output = gr.File(label="Download Lecture Materials", elem_id="download")
1571
+
1572
+ # --- RIGHT COLUMN SPLIT: NOTES (TOP) AND CHAT (BOTTOM) ---
1573
+ with gr.Column(scale=1, elem_id="right-column"): # Add elem_id for CSS targeting
1574
+ # State for notes and lecture context
1575
+ notes_state = gr.State([]) # List of notes: [{"title": ..., "content": ...}]
1576
+ lecture_context_state = gr.State({}) # Dict with latest lecture slides/scripts
1577
+ chat_history_state = gr.State([]) # List of {user, assistant}
1578
+
1579
+ with gr.Row():
1580
+ with gr.Column(scale=1, elem_id="notes-section"): # NOTES SECTION (TOP)
1581
+ with gr.Row():
1582
+ add_note_btn = gr.Button("+ Add note", elem_id="add-note-btn")
1583
+ study_guide_btn = gr.Button("Study Guide", elem_id="study-guide-btn")
1584
+ quiz_btn = gr.Button("Quiz Yourself", elem_id="quiz-btn")
1585
+ note_response = gr.Textbox(label="Note Draft", visible=False)
1586
+ study_guide_response = gr.Textbox(label="Study Guide", visible=False)
1587
+ quiz_response = gr.Textbox(label="Quiz", visible=False)
1588
+ notes_list = gr.Dataframe(headers=["Title"], interactive=False, label="Your Notes", elem_id="notes-list")
1589
+ with gr.Column(visible=False) as note_editor:
1590
+ note_title = gr.Textbox(label="Note Title", elem_id="note-title")
1591
+ note_content = gr.Textbox(label="Note Content", lines=10, elem_id="note-content")
1592
+ with gr.Row():
1593
+ save_note_btn = gr.Button("Save Note", elem_id="save-note-btn")
1594
+ back_btn = gr.Button("Back", elem_id="back-btn")
1595
+
1596
+ with gr.Column(scale=1, elem_id="chat-section"): # CHAT SECTION (BOTTOM)
1597
+ with gr.Column():
1598
+ chatbot = gr.Chatbot(label="Chat", elem_id="chatbot", height=220, show_copy_button=True, type="messages")
1599
+ with gr.Row(elem_id="chat-input-row"):
1600
+ chat_input = gr.Textbox(show_label=False, placeholder="Type your message...", lines=1, elem_id="chat-input", scale=9)
1601
+ send_btn = gr.Button("Send", elem_id="send-btn", scale=1)
1602
+
1603
+ # --- UI LOGIC FOR SHOWING/HIDING RESPONSE COMPONENTS ---
1604
+ def show_only(component):
1605
+ # Helper to show only the given response component
1606
+ return (
1607
+ gr.update(visible=(component == "note")),
1608
+ gr.update(visible=(component == "study")),
1609
+ gr.update(visible=(component == "quiz")),
1610
+ )
1611
+
1612
+ # Add Note button: generate note draft and show editor/response
1613
+ async def add_note_fn(notes, lecture_context, api_service, api_key, title_val, desc_val, style_val, audience_val):
1614
+ context = get_fallback_lecture_context(lecture_context, title_val, desc_val, style_val, audience_val)
1615
+ note = await run_note_agent(api_service, api_key, context, "", "")
1616
+ note_text = (note.get("title", "") + "\n" + note.get("content", "")).strip()
1617
+ return (
1618
+ gr.update(visible=True, value=note_text),
1619
+ gr.update(visible=False, value=""),
1620
+ gr.update(visible=False, value=""),
1621
+ note.get("title", ""),
1622
+ note.get("content", "")
1623
+ )
1624
+ add_note_btn.click(
1625
+ fn=add_note_fn,
1626
+ inputs=[notes_state, lecture_context_state, api_service, api_key, title, lecture_content_description, lecture_style, lecture_type],
1627
+ outputs=[note_response, study_guide_response, quiz_response, note_title, note_content]
1628
+ )
1629
+
1630
+ # Study Guide button: generate study guide and show response
1631
+ async def study_guide_btn_fn(notes, lecture_context, api_service, api_key, title_val, desc_val, style_val, audience_val):
1632
+ context = get_fallback_lecture_context(lecture_context, title_val, desc_val, style_val, audience_val)
1633
+ guide = await run_study_agent(api_service, api_key, context)
1634
+ return (
1635
+ gr.update(visible=False, value=""),
1636
+ gr.update(visible=True, value=guide),
1637
+ gr.update(visible=False, value="")
1638
+ )
1639
+ study_guide_btn.click(
1640
+ fn=study_guide_btn_fn,
1641
+ inputs=[notes_state, lecture_context_state, api_service, api_key, title, lecture_content_description, lecture_style, lecture_type],
1642
+ outputs=[note_response, study_guide_response, quiz_response]
1643
+ )
1644
+
1645
+ # Quiz button: generate quiz and show response
1646
+ async def quiz_btn_fn(notes, lecture_context, api_service, api_key, title_val, desc_val, style_val, audience_val):
1647
+ context = get_fallback_lecture_context(lecture_context, title_val, desc_val, style_val, audience_val)
1648
+ quiz = await run_quiz_agent(api_service, api_key, context)
1649
+ return (
1650
+ gr.update(visible=False, value=""),
1651
+ gr.update(visible=False, value=""),
1652
+ gr.update(visible=True, value=quiz)
1653
+ )
1654
+ quiz_btn.click(
1655
+ fn=quiz_btn_fn,
1656
+ inputs=[notes_state, lecture_context_state, api_service, api_key, title, lecture_content_description, lecture_style, lecture_type],
1657
+ outputs=[note_response, study_guide_response, quiz_response]
1658
+ )
1659
+
1660
+ # Back button: hide note editor and all responses
1661
+ back_btn.click(
1662
+ fn=lambda: (
1663
+ gr.update(visible=False, value=""),
1664
+ gr.update(visible=False, value=""),
1665
+ gr.update(visible=False, value="")
1666
+ ),
1667
+ inputs=[],
1668
+ outputs=[note_response, study_guide_response, quiz_response]
1669
+ )
1670
+
1671
+ # Save Note button: add note to state and update list, hide responses
1672
+ async def save_note(note_title_val, note_content_val, notes, lecture_context, api_service, api_key, note_type=None):
1673
+ note = await run_note_agent(api_service, api_key, get_fallback_lecture_context(lecture_context, note_title_val, note_content_val, "", ""), note_title_val, note_content_val)
1674
+ # Prefix title with note type if provided
1675
+ if note_type:
1676
+ note["title"] = note_type_prefix(note_type, note.get("title", ""))
1677
+ new_notes = copy.deepcopy(notes)
1678
+ new_notes.append(note)
1679
+ # Save note content to a .txt file
1680
+ note_file = os.path.join(OUTPUT_DIR, f"{note['title']}.txt")
1681
+ with open(note_file, "w", encoding="utf-8") as f:
1682
+ f.write(note['content'])
1683
+ return (
1684
+ update_notes_list(new_notes),
1685
+ new_notes,
1686
+ gr.update(visible=False, value=""),
1687
+ gr.update(visible=False, value=""),
1688
+ gr.update(visible=False, value="")
1689
+ )
1690
+ save_note_btn.click(
1691
+ fn=save_note,
1692
+ inputs=[note_title, note_content, notes_state, lecture_context_state, api_service, api_key],
1693
+ outputs=[notes_list, notes_state, note_response, study_guide_response, quiz_response]
1694
+ )
1695
+
1696
+ # --- CHAT AGENT LOGIC ---
1697
+ async def chat_fn(user_message, chat_history, lecture_context, api_service, api_key, title_val, desc_val):
1698
+ if not user_message.strip():
1699
+ return chat_history, "", chat_history, gr.update(), gr.update()
1700
+ form_update, response = await run_chat_agent(api_service, api_key, lecture_context, chat_history, user_message)
1701
+ new_history = chat_history.copy()
1702
+ # Append user message
1703
+ if user_message:
1704
+ new_history.append({"role": "user", "content": user_message})
1705
+ # Append assistant response
1706
+ if response:
1707
+ new_history.append({"role": "assistant", "content": response})
1708
+ title_update = gr.update()
1709
+ desc_update = gr.update()
1710
+ if form_update:
1711
+ title = form_update.get("title")
1712
+ desc = form_update.get("content_description")
1713
+ msg = ""
1714
+ if title:
1715
+ msg += f"\nLecture Title: {title}"
1716
+ title_update = gr.update(value=title)
1717
+ if desc:
1718
+ msg += f"\nLecture Content Description: {desc}"
1719
+ desc_update = gr.update(value=desc)
1720
+ new_history.append({"role": "assistant", "content": msg.strip()})
1721
+ return new_history, "", new_history, title_update, desc_update
1722
+ return new_history, "", new_history, title_update, desc_update
1723
+ send_btn.click(
1724
+ fn=chat_fn,
1725
+ inputs=[chat_input, chat_history_state, lecture_context_state, api_service, api_key, title, lecture_content_description],
1726
+ outputs=[chatbot, chat_input, chat_history_state, title, lecture_content_description]
1727
+ )
1728
+
1729
+ js_code = js_code + """
1730
+ // Add file upload handling
1731
+ function initializeFileUpload() {
1732
+ const uploadArea = document.getElementById('upload-area');
1733
+ if (!uploadArea) return;
1734
+
1735
+ // Create hidden file input
1736
+ const fileInput = document.createElement('input');
1737
+ fileInput.type = 'file';
1738
+ fileInput.accept = '.pdf,.txt,.md';
1739
+ fileInput.style.display = 'none';
1740
+ uploadArea.appendChild(fileInput);
1741
+
1742
+ // Handle click on the entire upload area
1743
+ uploadArea.addEventListener('click', (e) => {
1744
+ if (e.target !== fileInput) {
1745
+ fileInput.click();
1746
+ }
1747
+ });
1748
+
1749
+ fileInput.addEventListener('change', (e) => {
1750
+ const file = e.target.files[0];
1751
+ if (file) {
1752
+ const dataTransfer = new DataTransfer();
1753
+ dataTransfer.items.add(file);
1754
+ const gradioFileInput = document.querySelector('input[type="file"]');
1755
+ if (gradioFileInput) {
1756
+ gradioFileInput.files = dataTransfer.files;
1757
+ const event = new Event('change', { bubbles: true });
1758
+ gradioFileInput.dispatchEvent(event);
1759
+ }
1760
+ }
1761
+ });
1762
+
1763
+ // Handle drag and drop
1764
+ ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
1765
+ uploadArea.addEventListener(eventName, preventDefaults, false);
1766
+ });
1767
+
1768
+ function preventDefaults(e) {
1769
+ e.preventDefault();
1770
+ e.stopPropagation();
1771
+ }
1772
+
1773
+ ['dragenter', 'dragover'].forEach(eventName => {
1774
+ uploadArea.addEventListener(eventName, highlight, false);
1775
+ });
1776
+
1777
+ ['dragleave', 'drop'].forEach(eventName => {
1778
+ uploadArea.addEventListener(eventName, unhighlight, false);
1779
+ });
1780
+
1781
+ function highlight(e) {
1782
+ uploadArea.classList.add('dragover');
1783
+ }
1784
+
1785
+ function unhighlight(e) {
1786
+ uploadArea.classList.remove('dragover');
1787
+ }
1788
+
1789
+ uploadArea.addEventListener('drop', handleDrop, false);
1790
+
1791
+ function handleDrop(e) {
1792
+ const dt = e.dataTransfer;
1793
+ const file = dt.files[0];
1794
+ if (file) {
1795
+ const dataTransfer = new DataTransfer();
1796
+ dataTransfer.items.add(file);
1797
+ const gradioFileInput = document.querySelector('input[type="file"]');
1798
+ if (gradioFileInput) {
1799
+ gradioFileInput.files = dataTransfer.files;
1800
+ const event = new Event('change', { bubbles: true });
1801
+ gradioFileInput.dispatchEvent(event);
1802
+ }
1803
+ }
1804
+ }
1805
+ }
1806
+
1807
+ // Initialize clear button functionality
1808
+ function initializeClearButton() {
1809
+ const clearButton = document.getElementById('clear-btn');
1810
+ if (clearButton) {
1811
+ clearButton.addEventListener('click', () => {
1812
+ const modeTabs = document.querySelector('.mode-tabs input[type="radio"]:checked');
1813
+ const isStudyMode = modeTabs && modeTabs.value === 'Study Mode';
1814
+
1815
+ // Reset all audio elements
1816
+ const audioElements = document.querySelectorAll('audio');
1817
+ audioElements.forEach(audio => {
1818
+ audio.pause();
1819
+ audio.currentTime = 0;
1820
+ audio.style.border = 'none';
1821
+ });
1822
+
1823
+ // Reset play button
1824
+ const playBtn = document.getElementById('play-btn');
1825
+ if (playBtn) {
1826
+ const playIcon = playBtn.querySelector('i');
1827
+ if (playIcon) {
1828
+ playIcon.className = 'fas fa-play';
1829
+ }
1830
+ }
1831
+
1832
+ const slideContent = document.getElementById('slide-content');
1833
+ if (slideContent) {
1834
+ if (isStudyMode) {
1835
+ slideContent.innerHTML = `
1836
+ <div class="upload-area" id="upload-area">
1837
+ <h2 style="margin-top: 20px; color: #000;">Please upload lecture material by clicking the upload button below</h2>
1838
+ <p style="color: #666;">(only supports .pdf, .txt and .md)</p>
1839
+ </div>
1840
+ `;
1841
+ initializeFileUpload();
1842
+ } else {
1843
+ slideContent.innerHTML = `
1844
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 30px; box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important;">
1845
+ <h2 style="font-style: italic; color: #000 !important;">Waiting for lecture content...</h2>
1846
+ <p style="margin-top: 10px; font-size: 16px;color: #000">Please Generate lecture content via the form on the left first before lecture begins</p>
1847
+ </div>
1848
+ `;
1849
+ }
1850
+ }
1851
+ });
1852
+ }
1853
+ }
1854
+
1855
+ // Initialize speaker selection
1856
+ function initializeSpeakerSelect() {
1857
+ const speakerSelect = document.getElementById('speaker-select');
1858
+ const speakerAudio = document.querySelector('#speaker-audio input[type="file"]');
1859
+
1860
+ if (speakerSelect && speakerAudio) {
1861
+ speakerSelect.addEventListener('change', (e) => {
1862
+ const selectedSpeaker = e.target.value;
1863
+ // Create a new File object from the selected speaker
1864
+ fetch(selectedSpeaker)
1865
+ .then(response => response.blob())
1866
+ .then(blob => {
1867
+ const file = new File([blob], selectedSpeaker, { type: 'audio/mpeg' });
1868
+ const dataTransfer = new DataTransfer();
1869
+ dataTransfer.items.add(file);
1870
+ speakerAudio.files = dataTransfer.files;
1871
+ const event = new Event('change', { bubbles: true });
1872
+ speakerAudio.dispatchEvent(event);
1873
+ });
1874
+ });
1875
+ }
1876
+ }
1877
+
1878
+ // Initialize file upload when study mode is active
1879
+ function checkAndInitializeUpload() {
1880
+ const uploadArea = document.getElementById('upload-area');
1881
+ if (uploadArea) {
1882
+ console.log('Initializing file upload...');
1883
+ initializeFileUpload();
1884
+ }
1885
+ initializeClearButton();
1886
+ initializeSpeakerSelect();
1887
+ }
1888
+
1889
+ // Check immediately and also set up an observer
1890
+ checkAndInitializeUpload();
1891
+
1892
+ const modeObserver = new MutationObserver((mutations) => {
1893
+ mutations.forEach((mutation) => {
1894
+ if (mutation.addedNodes.length) {
1895
+ checkAndInitializeUpload();
1896
+ }
1897
+ });
1898
+ });
1899
+ modeObserver.observe(document.body, { childList: true, subtree: true });
1900
+ """
1901
+
1902
+ # Handle mode switching
1903
+ def switch_mode(mode):
1904
+ if mode == "Learn Mode":
1905
+ return default_slide_html, gr.update(visible=True), gr.update(visible=False)
1906
+ else:
1907
+ return study_mode_html, gr.update(visible=True), gr.update(visible=True)
1908
+
1909
+ mode_tabs.change(
1910
+ fn=switch_mode,
1911
+ inputs=[mode_tabs],
1912
+ outputs=[slide_display, generate_btn, uploaded_file]
1913
+ )
1914
+
1915
+ # Handle file upload in study mode
1916
+ async def handle_file_upload(file, api_service, api_key):
1917
+ """Handle file upload in study mode and validate API key."""
1918
+ if not file:
1919
+ yield default_slide_html, None, None
1920
+ return
1921
+
1922
+ # Validate API key or GITHUB_TOKEN for Azure AI Foundry
1923
+ if not api_key and api_service != "Azure AI Foundry":
1924
+ error_html = """
1925
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
1926
+ <h2 style="color: #d9534f;">Please input api key first</h2>
1927
+ <p style="margin-top: 20px;">An API key is required to process uploaded files in Study mode. Please provide a valid API key and try again.</p>
1928
+ </div>
1929
+ """
1930
+ logger.warning("API key is empty, terminating file upload")
1931
+ yield error_html, None, None
1932
+ return
1933
+ elif api_service == "Azure AI Foundry" and not os.environ.get("GITHUB_TOKEN"):
1934
+ error_html = """
1935
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
1936
+ <h2 style="color: #d9534f;">GITHUB_TOKEN not set</h2>
1937
+ <p style="margin-top: 20px;">Azure AI Foundry requires a GITHUB_TOKEN environment variable. Please set it and try again.</p>
1938
+ </div>
1939
+ """
1940
+ logger.warning("GITHUB_TOKEN is missing for Azure AI Foundry, terminating file upload")
1941
+ yield error_html, None, None
1942
+ return
1943
+
1944
+ try:
1945
+ # Show uploading progress
1946
+ yield html_with_progress("Uploading Lecture Material...", 25), None, None
1947
+ await asyncio.sleep(0.1)
1948
+
1949
+ # Show processing progress
1950
+ yield html_with_progress("Processing file...", 50), None, None
1951
+ await asyncio.sleep(0.1)
1952
+
1953
+ # Process file and generate inputs
1954
+ yield html_with_progress("Generating inputs", 75), None, None
1955
+ await asyncio.sleep(0.1)
1956
+
1957
+ result = await study_mode_process(file, api_service, api_key)
1958
+
1959
+ # Show success message with updated inputs
1960
+ success_html = """
1961
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 30px; box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important;">
1962
+ <h2 style="font-style: italic; color: #000 !important;">Study material processed, you can now generate lecture</h2>
1963
+ <p style="margin-top: 10px; font-size: 16px;color: #000">The form has been updated with the extracted information. Click Generate Lecture to proceed.</p>
1964
+ </div>
1965
+ """
1966
+
1967
+ # Update only title and description
1968
+ yield (
1969
+ success_html,
1970
+ result["title"],
1971
+ result["content_description"]
1972
+ )
1973
+ except Exception as e:
1974
+ error_html = f"""
1975
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
1976
+ <h2 style="color: #d9534f;">Error processing file</h2>
1977
+ <p style="margin-top: 20px;">{str(e)}</p>
1978
+ </div>
1979
+ """
1980
+ logger.error(f"Error processing file: {str(e)}")
1981
+ yield error_html, None, None
1982
+
1983
+ uploaded_file.change(
1984
+ fn=handle_file_upload,
1985
+ inputs=[uploaded_file, api_service, api_key],
1986
+ outputs=[slide_display, title, lecture_content_description]
1987
+ )
1988
 
1989
  speaker_audio.change(
1990
  fn=update_audio_preview,
 
1998
  outputs=[slide_display, file_output]
1999
  )
2000
 
2001
+ # Handle speaker selection
2002
+ def update_speaker_audio(speaker):
2003
+ logger.info(f"Speaker selection changed to: {speaker}")
2004
+ return speaker
2005
+
2006
+ speaker_select.change(
2007
+ fn=update_speaker_audio,
2008
+ inputs=[speaker_select],
2009
+ outputs=[speaker_audio]
2010
+ )
2011
+
2012
+ js_code = js_code + """
2013
+ // Add note editor functionality
2014
+ function initializeNoteEditor() {
2015
+ const addNoteBtn = document.getElementById('add-note-btn');
2016
+ const backBtn = document.getElementById('back-btn');
2017
+ const notesView = document.getElementById('notes-view');
2018
+ const noteEditor = document.getElementById('note-editor');
2019
+
2020
+ if (addNoteBtn && backBtn && notesView && noteEditor) {
2021
+ addNoteBtn.addEventListener('click', () => {
2022
+ notesView.style.display = 'none';
2023
+ noteEditor.style.display = 'block';
2024
+ });
2025
+
2026
+ backBtn.addEventListener('click', () => {
2027
+ noteEditor.style.display = 'none';
2028
+ notesView.style.display = 'block';
2029
+ });
2030
+ }
2031
+ }
2032
+
2033
+ // Initialize all components
2034
+ function initializeComponents() {
2035
+ initializeFileUpload();
2036
+ initializeClearButton();
2037
+ initializeSpeakerSelect();
2038
+ initializeNoteEditor();
2039
+ }
2040
+
2041
+ // Check immediately and also set up an observer
2042
+ initializeComponents();
2043
+
2044
+ const observer = new MutationObserver((mutations) => {
2045
+ mutations.forEach((mutation) => {
2046
+ if (mutation.addedNodes.length) {
2047
+ initializeComponents();
2048
+ }
2049
+ });
2050
+ });
2051
+ observer.observe(document.body, { childList: true, subtree: true });
2052
+ """
2053
+
2054
+ # --- AGENT LOGIC FOR NOTES, STUDY GUIDE, QUIZ, AND CHAT ---
2055
+
2056
+ async def run_note_agent(api_service, api_key, lecture_context, note_title, note_content):
2057
+ model_client = get_model_client(api_service, api_key)
2058
+ system_message = (
2059
+ "You are a Note Agent. Given the current lecture slides and scripts, help the user draft a note. "
2060
+ "If a title or content is provided, improve or complete the note. If not, suggest a new note based on the lecture. "
2061
+ "Always use the lecture context. Output a JSON object: {\"title\": ..., \"content\": ...}."
2062
+ )
2063
+ note_agent = AssistantAgent(
2064
+ name="note_agent",
2065
+ model_client=model_client,
2066
+ system_message=system_message
2067
+ )
2068
+ context_str = json.dumps(lecture_context)
2069
+ user_input = f"Lecture Context: {context_str}\nNote Title: {note_title}\nNote Content: {note_content}"
2070
+ result = await Console(note_agent.run_stream(task=user_input))
2071
+ # Return only the agent's reply
2072
+ for msg in reversed(result.messages):
2073
+ if getattr(msg, 'source', None) == 'note_agent' and hasattr(msg, 'content') and isinstance(msg.content, str):
2074
+ try:
2075
+ extracted = extract_json_from_message(msg)
2076
+ if extracted and isinstance(extracted, dict):
2077
+ return extracted
2078
+ except Exception:
2079
+ continue
2080
+ # fallback: any non-user message with content
2081
+ for msg in reversed(result.messages):
2082
+ if hasattr(msg, 'content') and isinstance(msg.content, str):
2083
+ try:
2084
+ extracted = extract_json_from_message(msg)
2085
+ if extracted and isinstance(extracted, dict):
2086
+ return extracted
2087
+ except Exception:
2088
+ continue
2089
+ return {"title": note_title, "content": note_content}
2090
+
2091
+ async def run_study_agent(api_service, api_key, lecture_context):
2092
+ model_client = get_model_client(api_service, api_key)
2093
+ system_message = (
2094
+ "You are a Study Guide Agent. Given the current lecture slides and scripts, generate a concise study guide (max 200 words) summarizing the key points and actionable steps for the student. Output plain text only."
2095
+ )
2096
+ study_agent = AssistantAgent(
2097
+ name="study_agent",
2098
+ model_client=model_client,
2099
+ system_message=system_message
2100
+ )
2101
+ context_str = json.dumps(lecture_context)
2102
+ user_input = f"Lecture Context: {context_str}"
2103
+ result = await Console(study_agent.run_stream(task=user_input))
2104
+ # Return only the agent's reply
2105
+ for msg in reversed(result.messages):
2106
+ if getattr(msg, 'source', None) == 'study_agent' and hasattr(msg, 'content') and isinstance(msg.content, str):
2107
+ return msg.content.strip()
2108
+ for msg in reversed(result.messages):
2109
+ if hasattr(msg, 'content') and isinstance(msg.content, str):
2110
+ return msg.content.strip()
2111
+ return "No study guide generated."
2112
+
2113
+ async def run_quiz_agent(api_service, api_key, lecture_context):
2114
+ model_client = get_model_client(api_service, api_key)
2115
+ system_message = (
2116
+ "You are a Quiz Agent. Given the current lecture slides and scripts, generate a short quiz (3-5 questions) to test understanding. Output plain text only."
2117
+ )
2118
+ quiz_agent = AssistantAgent(
2119
+ name="quiz_agent",
2120
+ model_client=model_client,
2121
+ system_message=system_message
2122
+ )
2123
+ context_str = json.dumps(lecture_context)
2124
+ user_input = f"Lecture Context: {context_str}"
2125
+ result = await Console(quiz_agent.run_stream(task=user_input))
2126
+ # Return only the agent's reply
2127
+ for msg in reversed(result.messages):
2128
+ if getattr(msg, 'source', None) == 'quiz_agent' and hasattr(msg, 'content') and isinstance(msg.content, str):
2129
+ return msg.content.strip()
2130
+ for msg in reversed(result.messages):
2131
+ if hasattr(msg, 'content') and isinstance(msg.content, str):
2132
+ return msg.content.strip()
2133
+ return "No quiz generated."
2134
+
2135
+ async def run_chat_agent(api_service, api_key, lecture_context, chat_history, user_message):
2136
+ model_client = get_model_client(api_service, api_key)
2137
+ system_message = (
2138
+ "You are a helpful Chat Agent. Answer questions about the lecture, and if the user asks for a lecture title or content description, suggest appropriate values. "
2139
+ "If you want to update the form, output a JSON object: {\"title\": ..., \"content_description\": ...}. Otherwise, just reply as normal."
2140
+ )
2141
+ chat_agent = AssistantAgent(
2142
+ name="chat_agent",
2143
+ model_client=model_client,
2144
+ system_message=system_message
2145
+ )
2146
+ context_str = json.dumps(lecture_context)
2147
+ chat_str = "\n".join([f"User: {m['content']}" if m['role']=='user' else f"Assistant: {m['content']}" for m in chat_history])
2148
+ user_input = f"Lecture Context: {context_str}\nChat History: {chat_str}\nUser: {user_message}"
2149
+ result = await Console(chat_agent.run_stream(task=user_input))
2150
+ # Return only the chat_agent's reply
2151
+ for msg in reversed(result.messages):
2152
+ if getattr(msg, 'source', None) == 'chat_agent' and hasattr(msg, 'content') and isinstance(msg.content, str):
2153
+ extracted = extract_json_from_message(msg)
2154
+ if extracted and isinstance(extracted, dict):
2155
+ return extracted, None
2156
+ return None, msg.content.strip()
2157
+ for msg in reversed(result.messages):
2158
+ if hasattr(msg, 'content') and isinstance(msg.content, str):
2159
+ extracted = extract_json_from_message(msg)
2160
+ if extracted and isinstance(extracted, dict):
2161
+ return extracted, None
2162
+ return None, msg.content.strip()
2163
+ return None, "No response."
2164
+
2165
+ # --- UI WIRING FOR NOTES, STUDY GUIDE, QUIZ, AND CHAT ---
2166
+
2167
+ def update_notes_list(notes):
2168
+ """Convert notes list to DataFrame format for Gradio Dataframe (titles only)."""
2169
+ return [[n["title"]] for n in notes]
2170
+
2171
+ def show_note_editor_with_content(title, content):
2172
+ return (
2173
+ gr.update(visible=True), # note_editor
2174
+ gr.update(visible=False), # notes_list
2175
+ gr.update(visible=False), # study_guide_output
2176
+ gr.update(visible=False), # quiz_output
2177
+ gr.update(value=title), # note_title
2178
+ gr.update(value=content) # note_content
2179
+ )
2180
+
2181
+ def hide_note_editor():
2182
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
2183
+
2184
+ def show_study_guide(guide):
2185
+ return gr.update(visible=False), gr.update(visible=True), gr.update(value=guide, visible=True), gr.update(visible=False)
2186
+
2187
+ def show_quiz(quiz):
2188
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=quiz, visible=True)
2189
+
2190
+ # Helper to get fallback lecture context from form fields
2191
+
2192
+ def get_fallback_lecture_context(lecture_context, title_val, desc_val, style_val, audience_val):
2193
+ # If slides/scripts missing, use form fields
2194
+ if lecture_context and (lecture_context.get("slides") or lecture_context.get("scripts")):
2195
+ return lecture_context
2196
+ return {
2197
+ "slides": [],
2198
+ "scripts": [],
2199
+ "title": title_val or "Untitled Lecture",
2200
+ "description": desc_val or "No description provided.",
2201
+ "style": style_val or "Feynman - Simplifies complex ideas with enthusiasm",
2202
+ "audience": audience_val or "University"
2203
+ }
2204
+
2205
+ # Show note content when a note title is clicked
2206
+
2207
+ def show_note_content(evt: dict, notes):
2208
+ # evt['index'] gives the row index
2209
+ idx = evt.get('index', 0)
2210
+ if 0 <= idx < len(notes):
2211
+ note = notes[idx]
2212
+ note_file = os.path.join(OUTPUT_DIR, f"{note['title']}.txt")
2213
+ if os.path.exists(note_file):
2214
+ with open(note_file, "r", encoding="utf-8") as f:
2215
+ note_text = f.read()
2216
+ return gr.update(visible=True, value=note_text)
2217
+ return gr.update(visible=False, value="")
2218
+ notes_list.select(
2219
+ fn=show_note_content,
2220
+ inputs=[notes_state],
2221
+ outputs=note_response
2222
+ )
2223
+
2224
+ # Make right column scrollable (wrap in a scrollable gr.Column if needed)
2225
+ # (Assume CSS already allows #right-column to scroll)
2226
+
2227
+ # --- NOTES LOGIC ---
2228
+ def note_type_prefix(note_type, title):
2229
+ if note_type and not title.startswith(note_type):
2230
+ return f"{note_type} - {title}"
2231
+ return title
2232
+
2233
+ custom_css = """
2234
+ #right-column {height: 100% !important; display: flex !important; flex-direction: column !important; gap: 20px !important;}
2235
+ #notes-section, #chat-section {flex: 1 1 0; min-height: 0; max-height: 50vh; overflow-y: auto;}
2236
+ #chat-section {display: flex; flex-direction: column; position: relative;}
2237
+ #chatbot {flex: 1 1 auto; min-height: 0; max-height: calc(50vh - 60px); overflow-y: auto;}
2238
+ #chat-input-row {position: sticky; bottom: 0; background: white; z-index: 2; padding-top: 8px;}
2239
+ """
2240
+ demo.css += custom_css
2241
+
2242
  if __name__ == "__main__":
2243
  demo.launch(allowed_paths=[OUTPUT_DIR])