import os import requests from smolagents.tools import tool from difflib import SequenceMatcher try: from gradio_client import Client except ImportError: # Fallback import for older versions import gradio_client Client = gradio_client.Client import google.generativeai as genai import json import time import numpy as np from pathlib import Path from typing import Dict, List, Optional, Tuple, Union from dotenv import load_dotenv import base64 # Load environment variables load_dotenv() # Configure API keys TTS_API = os.getenv("TTS_API") STT_API = os.getenv("STT_API") GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") # Configure Google Gemini if GOOGLE_API_KEY: genai.configure(api_key=GOOGLE_API_KEY) @tool def generate_story(name: str, grade: str, topic: str) -> str: """ Generate a short, age-appropriate story for reading practice using LLM. Args: name (str): The child's name. grade (str): The student's grade level, e.g., "Grade 3". topic (str): The story topic, e.g., "space", "animals". Returns: str: Generated story text. """ # Extract grade number and determine age/reading level grade_num = int(''.join(filter(str.isdigit, grade)) or "1") age = grade_num + 5 # Grade 1 = ~6 years old, Grade 6 = ~11 years old # Dynamically determine story parameters based on grade if grade_num <= 2: # Grades 1-2: Very simple stories story_length = "5 short sentences" vocabulary_level = "very simple words (mostly 1-2 syllables)" sentence_structure = "short, simple sentences" complexity = "basic concepts" reading_level = "beginner" elif grade_num <= 4: # Grades 3-4: Intermediate stories story_length = "1 short paragraphs" vocabulary_level = "age-appropriate words with some longer words" sentence_structure = "mix of simple and compound sentences" complexity = "intermediate concepts with some detail" reading_level = "intermediate" else: # Grades 5-10: More advanced stories story_length = "2 paragraphs" vocabulary_level = "varied vocabulary including descriptive words" sentence_structure = "complex sentences with descriptive language" complexity = "detailed concepts and explanations" reading_level = "advanced elementary" # Create dynamic, grade-adaptive prompt prompt = f""" You are an expert children's reading coach. Create an engaging, educational story for a {age}-year-old child named {name} about {topic}. GRADE LEVEL: {grade} ({reading_level} level) Story Requirements: - Length: {story_length} - Vocabulary: Use {vocabulary_level} - Sentence structure: {sentence_structure} - Complexity: {complexity} - Teach something interesting about {topic} - End with a positive, encouraging message - Make it engaging and fun to read aloud - start directly with the story, no preamble or introduction Additional Guidelines: - For younger students (Grades 1-2): Focus on simple actions, basic emotions, and clear cause-and-effect - For middle students (Grades 3-5): Include some problem-solving, friendship themes, and basic science/nature facts - For older students (Grades 6-10): Add character development, more detailed explanations, and encourage curiosity The story should be perfectly suited for a {grade} student's reading ability and attention span. Story: """ # Use Google Gemini model = genai.GenerativeModel('gemini-2.0-flash') # Adjust generation parameters based on grade level max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000 generation_config = { "temperature": 0.8, "max_output_tokens": max_tokens, "top_p": 0.9, } response = model.generate_content( contents=prompt, generation_config=generation_config ) return response.text.strip() @tool def text_to_speech(text: str) -> str: """ Convert story text into an audio URL via TTS service using the gradio_client. Args: text (str): The story to convert to speech. Returns: str: URL or file path of the generated audio. """ try: # Use the gradio_client to interact with the TTS API with correct parameters based on API docs client = Client("NihalGazi/Text-To-Speech-Unlimited") # Call the API with proper keyword arguments as per documentation result = client.predict( prompt=text, # Required: The text to convert to speech voice="nova", # Voice selection from available options emotion="neutral", # Required: Emotion style use_random_seed=True, # Use random seed for variety specific_seed=12345, # Specific seed value api_name="/text_to_speech_app" ) print(f"TTS result: {result}") print(f"TTS result type: {type(result)}") # According to API docs, returns tuple of (filepath, status_str) if isinstance(result, tuple) and len(result) >= 2: audio_path, status = result[0], result[1] print(f"TTS Status: {status}") # Return the audio file path if audio_path and isinstance(audio_path, str): print(f"TTS generated audio at: {audio_path}") return audio_path else: print(f"Invalid audio path: {audio_path}") return None else: print(f"Unexpected TTS result format: {result}") return None except Exception as e: print(f"TTS Error: {e}") import traceback traceback.print_exc() return None @tool def transcribe_audio(audio_path: str) -> str: """ Transcribe the student's audio into text using Hugging Face Whisper Space. Args: audio_path (str): Path to the recorded .wav audio file Returns: str: Transcribed text from the audio """ import base64 import requests from pathlib import Path try: print(f"Received audio input: {type(audio_path)} - {str(audio_path)[:100]}...") # Make sure it's a valid file path path = Path(audio_path) if not path.exists(): return "Audio file not found. Please try recording again." # Encode audio to base64 with open(path, "rb") as f: encoded = base64.b64encode(f.read()).decode("utf-8") # Prepare payload for HF Space payload = { "data": [ { "name": path.name, "data": f"data:audio/wav;base64,{encoded}" }, None ] } print("Sending audio to HF STT...") response = requests.post( "https://abidlabs-whisper-large-v2.hf.space/run/predict", json=payload, timeout=60 ) response.raise_for_status() result = response.json().get("data", [None])[0] print(f"HF response: {result}") if not result or not isinstance(result, str) or len(result.strip()) == 0: return "Could not transcribe audio. Please speak more clearly and try again." return result.strip() except requests.exceptions.HTTPError as e: print(f"HTTP error: {e}") return "Transcription service returned an error. Please try again later." except Exception as e: print(f"Unexpected error: {e}") return "Something went wrong during transcription. Please try again." def compare_texts_for_feedback(original: str, spoken: str) -> str: """ Compare the original and spoken text, provide age-appropriate feedback with pronunciation help. Agentic feedback system that adapts to student needs. Args: original (str): The original story text. spoken (str): The student's transcribed reading. Returns: str: Comprehensive, age-appropriate feedback with learning suggestions. """ # Clean and process text orig_words = [w.strip(".,!?;:\"'").lower() for w in original.split() if w.strip()] spoken_words = [w.strip(".,!?;:\"'").lower() for w in spoken.split() if w.strip()] # Calculate accuracy using sequence matching matcher = SequenceMatcher(None, orig_words, spoken_words, autojunk=False) accuracy = min(round(matcher.quick_ratio() * 100 + 60), 100) # Identify different types of errors missed_words = set(orig_words) - set(spoken_words) extra_words = set(spoken_words) - set(orig_words) # Find mispronounced words (words that sound similar but are different) mispronounced = find_similar_words(orig_words, spoken_words) # Generate age-appropriate feedback return generate_adaptive_feedback(accuracy, missed_words, extra_words, mispronounced, len(orig_words)) def find_similar_words(original_words: list, spoken_words: list) -> list: """ Find words that might be mispronounced (similar but not exact matches). Args: original_words (list): Original story words spoken_words (list): Transcribed words Returns: list: Tuples of (original_word, spoken_word) for potential mispronunciations """ from difflib import get_close_matches mispronounced = [] for orig_word in original_words: if orig_word not in spoken_words and len(orig_word) > 2: close_matches = get_close_matches(orig_word, spoken_words, n=1, cutoff=0.6) if close_matches: mispronounced.append((orig_word, close_matches[0])) return mispronounced[:5] def generate_adaptive_feedback(accuracy: int, missed_words: set, extra_words: set, mispronounced: list, total_words: int) -> str: """ Generate age-appropriate, encouraging feedback with specific learning guidance. Args: accuracy (float): Reading accuracy percentage missed_words (set): Words that were skipped extra_words (set): Words that were added mispronounced (list): Potential mispronunciations total_words (int): Total words in story Returns: str: Comprehensive feedback message """ feedback_parts = [] # Start with encouraging accuracy feedback if accuracy >= 95: feedback_parts.append("🌟 AMAZING! You read almost perfectly!") elif accuracy >= 85: feedback_parts.append("šŸŽ‰ GREAT JOB! You're doing wonderful!") elif accuracy >= 70: feedback_parts.append("šŸ‘ GOOD WORK! You're getting better!") elif accuracy >= 50: feedback_parts.append("😊 NICE TRY! Keep practicing!") else: feedback_parts.append("šŸš€ GREAT START! Every practice makes you better!") feedback_parts.append(f"Reading accuracy: {accuracy:.1f}%") # Provide specific help for missed words if missed_words: missed_list = sorted(list(missed_words))[:8] # Limit to 8 words feedback_parts.append("\nšŸ“š PRACTICE THESE WORDS:") for word in missed_list: pronunciation_tip = get_pronunciation_tip(word) feedback_parts.append(f"• {word.upper()} - {pronunciation_tip}") # Help with mispronounced words if mispronounced: feedback_parts.append("\nšŸŽÆ PRONUNCIATION PRACTICE:") for orig, spoken in mispronounced: tip = get_pronunciation_correction(orig, spoken) feedback_parts.append(f"• {orig.upper()} (you said '{spoken}') - {tip}") # Positive reinforcement and next steps if accuracy >= 80: feedback_parts.append("\nšŸ† You're ready for more challenging stories!") elif accuracy >= 60: feedback_parts.append("\nšŸ’Ŗ Try reading this story again to improve your score!") else: feedback_parts.append("\n🌱 Let's practice with shorter, simpler stories first!") return "\n".join(feedback_parts) def get_pronunciation_tip(word: str) -> str: """ Generate pronunciation tips for difficult words. Args: word (str): Word to provide pronunciation help for Returns: str: Pronunciation tip """ word = word.lower() # Common pronunciation patterns and tips if len(word) <= 3: return f"Sound it out: {'-'.join(word)}" elif word.endswith('tion'): return "Ends with 'shun' sound" elif word.endswith('ed'): if word[-3] in 'td': return "Past tense - ends with 'ed' sound" else: return "Past tense - ends with 'd' sound" elif 'th' in word: return "Put your tongue between your teeth for 'th'" elif word.startswith('wh'): return "Starts with 'w' sound (like 'when')" elif len(word) >= 6: # Break longer words into syllables return f"Break it down: {break_into_syllables(word)}" else: return f"Sound it out slowly: {'-'.join(word[:len(word)//2])}-{'-'.join(word[len(word)//2:])}" def get_pronunciation_correction(original: str, spoken: str) -> str: """ Provide specific correction for mispronounced words. Args: original (str): Correct word spoken (str): How it was pronounced Returns: str: Correction tip """ orig = original.lower() spok = spoken.lower() # Common mispronunciation patterns if len(orig) > len(spok): return f"Don't skip letters! Say all sounds in '{orig}'" elif len(spok) > len(orig): return f"Not too fast! The word is just '{orig}'" elif orig[0] != spok[0]: return f"Starts with '{orig[0]}' sound, not '{spok[0]}'" elif orig[-1] != spok[-1]: return f"Ends with '{orig[-1]}' sound" else: return f"Listen carefully: '{orig}' - try saying it slower" def break_into_syllables(word: str) -> str: """ Simple syllable breaking for pronunciation help. Args: word (str): Word to break into syllables Returns: str: Word broken into syllables """ vowels = 'aeiou' syllables = [] current_syllable = '' for i, char in enumerate(word): current_syllable += char # Simple rule: break after vowel if next char is consonant if char.lower() in vowels and i < len(word) - 1: if word[i + 1].lower() not in vowels: syllables.append(current_syllable) current_syllable = '' if current_syllable: syllables.append(current_syllable) return '-'.join(syllables) if len(syllables) > 1 else word @tool def generate_targeted_story(previous_feedback: str, name: str, grade: str, missed_words: list = None) -> str: """ Generate a new story that specifically targets words the student struggled with. Agentic story generation based on learning gaps. Args: previous_feedback (str): Previous reading feedback name (str): Student's name grade (str): Student's grade level missed_words (list): Words the student had trouble with Returns: str: New targeted story for practice """ grade_num = int(''.join(filter(str.isdigit, grade)) or "3") age = grade_num + 5 # Extract difficulty level from previous feedback if "AMAZING" in previous_feedback or "accuracy: 9" in previous_feedback or "🌟 AMAZING" in previous_feedback: difficulty_adjustment = "more challenging with advanced vocabulary" focus_area = "new vocabulary, longer sentences, and complex concepts" elif "GREAT JOB" in previous_feedback or "accuracy: 8" in previous_feedback or "šŸŽ‰ GREAT JOB" in previous_feedback: difficulty_adjustment = "slightly more challenging" focus_area = "new vocabulary and longer sentences" elif "GOOD" in previous_feedback or "accuracy: 7" in previous_feedback or "šŸ‘ GOOD WORK" in previous_feedback: difficulty_adjustment = "similar level with some new words" focus_area = "reinforcing current skills" else: difficulty_adjustment = "simpler and shorter" focus_area = "basic vocabulary and simple sentences" # Create targeted practice words if missed_words: practice_words = missed_words[:5] # Focus on top 5 missed words word_focus = f"Include and repeat these practice words: {', '.join(practice_words)}" else: word_focus = "Focus on common sight words for this grade level" # Generate adaptive prompt prompt = f""" You are an expert reading coach creating a personalized story for {name}, a {age}-year-old in {grade}. LEARNING ADAPTATION: - Make this story {difficulty_adjustment} than the previous one - Focus on: {focus_area} - {word_focus} STORY REQUIREMENTS: - Feature {name} as the main character - Include an engaging adventure or discovery theme - Naturally incorporate the practice words multiple times - Make it fun and encouraging - End with {name} feeling proud and accomplished Create a story that helps {name} practice the words they found challenging while building confidence. Story: """ # Generate targeted story model = genai.GenerativeModel('gemini-2.0-flash') max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000 generation_config = { "temperature": 0.7, "max_output_tokens": max_tokens, "top_p": 0.9, } response = model.generate_content( contents=prompt, generation_config=generation_config ) return response.text.strip() class SessionManager: """Manages student sessions and progress tracking""" def __init__(self): self.sessions = {} self.student_progress = {} def start_session(self, student_name: str, grade: str) -> str: """Start a new reading session for a student""" session_id = f"{student_name}_{int(time.time())}" self.sessions[session_id] = { "student_name": student_name, "grade": grade, "start_time": time.time(), "stories_read": 0, "total_accuracy": 0, "feedback_history": [] } return session_id def get_session(self, session_id: str) -> dict: """Get session data""" return self.sessions.get(session_id, {}) def update_session(self, session_id: str, accuracy: float, feedback: str): """Update session with reading results""" if session_id in self.sessions: session = self.sessions[session_id] session["stories_read"] += 1 session["total_accuracy"] += accuracy session["feedback_history"].append({ "timestamp": time.time(), "accuracy": accuracy, "feedback": feedback }) class ReadingCoachAgent: """ Main agent class that provides the interface for the reading coach system. Wraps the individual tool functions and manages student sessions. """ def __init__(self): self.session_manager = SessionManager() self.current_session = None self.current_story = "" self.student_info = {"name": "", "grade": ""} def generate_story_for_student(self, name: str, grade: str, topic: str) -> str: """Generate a story for a student and start/update session""" # Store student info self.student_info = {"name": name, "grade": grade} # Start or update session session_id = self.session_manager.start_session(name, grade) self.current_session = session_id # Generate story using the tool function story = generate_story(name, grade, topic) self.current_story = story return story def create_audio_from_story(self, story: str) -> str: """Convert story to audio using TTS""" return text_to_speech(story) def analyze_student_reading(self, audio_path: str) -> tuple: """Analyze student's reading and provide feedback""" # Transcribe the audio transcribed_text = transcribe_audio(audio_path) # Compare with original story and get feedback feedback = compare_texts_for_feedback(self.current_story, transcribed_text) # Extract accuracy from feedback accuracy = self._extract_accuracy_from_feedback(feedback) # Update session if we have one if self.current_session: self.session_manager.update_session(self.current_session, accuracy, feedback) return transcribed_text, feedback, accuracy def generate_new_passage(self, topic: str) -> str: """Generate a new passage with the current student info""" if not self.student_info["name"] or not self.student_info["grade"]: raise ValueError("No active session. Please start a new session first.") # Generate new story story = generate_story(self.student_info["name"], self.student_info["grade"], topic) self.current_story = story return story def generate_practice_story(self, name: str, grade: str) -> str: """Generate a new targeted practice story based on previous feedback""" if not self.student_info.get("name") or not self.student_info.get("grade"): # Use provided parameters if student info is not available name = name or "Student" grade = grade or "Grade 3" else: name = self.student_info["name"] grade = self.student_info["grade"] # Get the last feedback from session if available last_feedback = "" if self.current_session and self.current_session in self.session_manager.sessions: session_data = self.session_manager.sessions[self.current_session] if session_data.get("feedback_history"): last_feedback = session_data["feedback_history"][-1].get("feedback", "") # Generate a new practice story using the targeted story function with feedback context practice_story = generate_targeted_story(last_feedback, name, grade) self.current_story = practice_story return practice_story def clear_session(self): """Clear current session""" self.current_session = None self.current_story = "" self.student_info = {"name": "", "grade": ""} def reset_all_data(self): """Reset all current session state but keep tracked sessions.""" self.clear_session() def _extract_accuracy_from_feedback(self, feedback: str) -> float: """Extract accuracy percentage from feedback text""" import re # Look for "Reading accuracy: XX.X%" pattern in feedback match = re.search(r'Reading accuracy:\s*(\d+\.?\d*)%', feedback) if match: return float(match.group(1)) return 0.0