Spaces:

Agents-MCP-Hackathon
/

ReadRight

Sleeping

App Files Files Community

ParulPandey commited on Jun 10

Commit

2d79f41

verified ·

1 Parent(s): b3566fd

Update agent.py

Browse files

Files changed (1) hide show

agent.py +39 -280

agent.py CHANGED Viewed

@@ -2,15 +2,13 @@ import os
 import requests
 from smolagents.tools import tool
 from difflib import SequenceMatcher
 try:
     from gradio_client import Client
 except ImportError:
     # Fallback import for older versions
     import gradio_client
     Client = gradio_client.Client
-from google import genai
-from google.genai import types
 import json
 import time
 import numpy as np
@@ -26,9 +24,9 @@ TTS_API = os.getenv("TTS_API")
 STT_API = os.getenv("STT_API")
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-# Configure Google Gemini client
 if GOOGLE_API_KEY:
-    gemini_client = genai.Client(api_key=GOOGLE_API_KEY)
 @tool
 def generate_story(name: str, grade: str, topic: str) -> str:
@@ -97,19 +95,20 @@ def generate_story(name: str, grade: str, topic: str) -> str:
     """
     # Use Google Gemini
     # Adjust generation parameters based on grade level
     max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000
-    generation_config = types.GenerateContentConfig(
-        temperature=0.8,
-        max_output_tokens=max_tokens,
-        top_p=0.9,
-    )
-    response = gemini_client.models.generate_content(
-        model="gemini-2.0-flash",
-        contents=[prompt],
-        config=generation_config
     )
     return response.text.strip()
@@ -322,54 +321,23 @@ def compare_texts_for_feedback(original: str, spoken: str) -> str:
     Returns:
         str: Comprehensive, age-appropriate feedback with learning suggestions.
     """
-    # Check if the spoken text is too short to be meaningful
-    if not spoken or len(spoken.split()) < 3:
-        return "⚠️ Your reading was too short. Please try reading the complete story."
     # Clean and process text
     orig_words = [w.strip(".,!?;:\"'").lower() for w in original.split() if w.strip()]
     spoken_words = [w.strip(".,!?;:\"'").lower() for w in spoken.split() if w.strip()]
-    # Set minimum threshold for overall matching - if nothing matches at all,
-    # it's likely the student read something completely different
-    common_words = set(orig_words).intersection(set(spoken_words))
-    if len(common_words) < max(2, len(orig_words) * 0.1):  # At least 2 words or 10% must match
-        return "⚠️ I couldn't recognize enough words from the story. Please try reading the story text shown on the screen.\n\nReading accuracy: 0.0%"
     # Calculate accuracy using sequence matching
     matcher = SequenceMatcher(None, orig_words, spoken_words, autojunk=False)
     accuracy = matcher.ratio() * 100
-    # Identify different types of errors using context-aware approach
-    # Use difflib to get a more accurate understanding of missed words in context
-    import difflib
-    d = difflib.Differ()
-    diff = list(d.compare([w.lower() for w in original.split() if w.strip()],
-                         [w.lower() for w in spoken.split() if w.strip()]))
-    missed_words = []
-    for word in diff:
-        if word.startswith('- '):  # Words in original but not in spoken
-            clean_word = word[2:].strip(".,!?;:\"'").lower()
-            if clean_word and len(clean_word) > 1:  # Avoid punctuation
-                missed_words.append(clean_word)
-    # Convert to set to remove duplicates but preserve order for important words
-    missed_words_set = set(missed_words)
-    # Extra words (might be mispronunciations or additions)
     extra_words = set(spoken_words) - set(orig_words)
     # Find mispronounced words (words that sound similar but are different)
     mispronounced = find_similar_words(orig_words, spoken_words)
-    # Prioritize important words (like nouns, longer words) if available
-    important_missed = [w for w in missed_words if len(w) > 4]
-    if important_missed:
-        missed_words_set = set(important_missed) | set([w for w in missed_words if w not in important_missed][:3])
     # Generate age-appropriate feedback
-    return generate_adaptive_feedback(accuracy, missed_words_set, extra_words, mispronounced, len(orig_words))
 def find_similar_words(original_words: list, spoken_words: list) -> list:
     """
@@ -467,8 +435,6 @@ def get_pronunciation_tip(word: str) -> str:
         return f"Sound it out: {'-'.join(word)}"
     elif word.endswith('tion'):
         return "Ends with 'shun' sound"
-    elif word.endswith('sion'):
-        return "Ends with 'zhun' or 'shun' sound"
     elif word.endswith('ed'):
         if word[-3] in 'td':
             return "Past tense - ends with 'ed' sound"
@@ -476,24 +442,8 @@ def get_pronunciation_tip(word: str) -> str:
             return "Past tense - ends with 'd' sound"
     elif 'th' in word:
         return "Put your tongue between your teeth for 'th'"
-    elif 'ch' in word:
-        return "Make the 'ch' sound like in 'cheese'"
-    elif 'sh' in word:
-        return "Make the 'sh' sound like in 'ship'"
-    elif word.startswith('kn'):
-        return "The 'k' is silent, start with the 'n' sound"
-    elif word.startswith('ph'):
-        return "The 'ph' makes an 'f' sound"
     elif word.startswith('wh'):
         return "Starts with 'w' sound (like 'when')"
-    elif word.endswith('igh'):
-        return "The 'igh' makes a long 'i' sound like in 'night'"
-    elif 'ou' in word:
-        return "The 'ou' often sounds like 'ow' in 'cow'"
-    elif 'ai' in word:
-        return "The 'ai' makes the long 'a' sound"
-    elif 'ea' in word:
-        return "The 'ea' usually makes the long 'e' sound"
     elif len(word) >= 6:
         # Break longer words into syllables
         return f"Break it down: {break_into_syllables(word)}"
@@ -523,32 +473,12 @@ def get_pronunciation_correction(original: str, spoken: str) -> str:
         return f"Starts with '{orig[0]}' sound, not '{spok[0]}'"
     elif orig[-1] != spok[-1]:
         return f"Ends with '{orig[-1]}' sound"
-    # Check for vowel confusion
-    orig_vowels = [c for c in orig if c in 'aeiou']
-    spok_vowels = [c for c in spok if c in 'aeiou']
-    if orig_vowels != spok_vowels:
-        # Find the first different vowel
-        for i in range(min(len(orig_vowels), len(spok_vowels))):
-            if orig_vowels[i] != spok_vowels[i]:
-                vowel_map = {
-                    'a': "ah (like in 'cat')",
-                    'e': "eh (like in 'bed')",
-                    'i': "ih (like in 'sit')",
-                    'o': "oh (like in 'hot')",
-                    'u': "uh (like in 'cup')"
-                }
-                correct_sound = vowel_map.get(orig_vowels[i], f"'{orig_vowels[i]}'")
-                wrong_sound = vowel_map.get(spok_vowels[i], f"'{spok_vowels[i]}'")
-                return f"Say the vowel sound as {correct_sound}, not {wrong_sound}"
-    # Default case
-    return f"Listen carefully: '{orig}' - try saying it slower"
 def break_into_syllables(word: str) -> str:
     """
-    Improved syllable breaking for pronunciation help.
     Args:
         word (str): Word to break into syllables
@@ -556,97 +486,22 @@ def break_into_syllables(word: str) -> str:
     Returns:
         str: Word broken into syllables
     """
-    vowels = 'aeiouy'
-    word = word.lower()
     syllables = []
     current_syllable = ''
-    consonant_cluster = ''
-    # Handle common prefixes
-    common_prefixes = ['re', 'pre', 'un', 'in', 'im', 'dis', 'mis', 'non', 'sub', 'inter', 'ex']
-    for prefix in common_prefixes:
-        if word.startswith(prefix) and len(word) > len(prefix) + 1:
-            syllables.append(prefix)
-            word = word[len(prefix):]
-            break
-    # Handle common suffixes
-    common_suffixes = ['ing', 'ed', 'er', 'est', 'ly', 'ful', 'ness', 'less', 'ment', 'able', 'ible']
-    for suffix in common_suffixes:
-        if word.endswith(suffix) and len(word) > len(suffix) + 1:
-            suffix_syllable = suffix
-            word = word[:-len(suffix)]
-            syllables.append(word)
-            syllables.append(suffix_syllable)
-            return '-'.join(syllables)
-    # Process the word character by character
-    i = 0
-    while i < len(word):
-        char = word[i]
-        # If we encounter a vowel
-        if char in vowels:
-            # Start or add to a syllable
-            if consonant_cluster:
-                # For consonant clusters, we generally add one consonant to the current syllable
-                # and move the rest to the next syllable
-                if len(consonant_cluster) > 1:
-                    if current_syllable:  # If we already have a syllable started
-                        current_syllable += consonant_cluster[0]
-                        syllables.append(current_syllable)
-                        current_syllable = consonant_cluster[1:] + char
-                    else:  # For starting consonant clusters
-                        current_syllable = consonant_cluster + char
-                else:  # Single consonant
-                    current_syllable += consonant_cluster + char
-                consonant_cluster = ''
-            else:
-                current_syllable += char
-            # Check for vowel pairs that should stay together
-            if i < len(word) - 1 and word[i+1] in vowels:
-                vowel_pairs = ['ea', 'ee', 'oo', 'ou', 'ie', 'ai', 'oa']
-                if word[i:i+2] in vowel_pairs:
-                    current_syllable += word[i+1]
-                    i += 1  # Skip the next vowel since we've added it
-        else:  # Consonant
-            if current_syllable:  # If we have an open syllable
-                if i < len(word) - 1 and word[i+1] not in vowels:  # Consonant cluster
-                    consonant_cluster += char
-                else:  # Single consonant followed by vowel
-                    current_syllable += char
-            else:  # Starting with consonant or building consonant cluster
-                consonant_cluster += char
-        # Handle end of word or ready to break syllable
-        if i == len(word) - 1 or (char in vowels and i < len(word) - 1 and word[i+1] not in vowels):
-            if current_syllable:
                 syllables.append(current_syllable)
                 current_syllable = ''
-        i += 1
-    # Add any remaining parts
-    if consonant_cluster:
-        if syllables:
-            syllables[-1] += consonant_cluster
-        else:
-            syllables.append(consonant_cluster)
     if current_syllable:
         syllables.append(current_syllable)
-    # Special case handling
-    result = '-'.join(syllables) if syllables else word
-    # If we ended up with no breaks, provide a simpler approach
-    if result == word and len(word) > 3:
-        # Simple fallback: break after every other letter
-        syllables = [word[i:i+2] for i in range(0, len(word), 2)]
-        result = '-'.join(syllables)
-    return result
 @tool
 def generate_targeted_story(previous_feedback: str, name: str, grade: str, missed_words: list = None) -> str:
@@ -666,38 +521,15 @@ def generate_targeted_story(previous_feedback: str, name: str, grade: str, misse
     grade_num = int(''.join(filter(str.isdigit, grade)) or "3")
     age = grade_num + 5
-    # Dynamically determine story parameters based on grade - match the same criteria as main story generation
-    if grade_num <= 2:
-        # Grades 1-2: Very simple stories
-        story_length = "2-3 short sentences"
-        vocabulary_level = "very simple words (mostly 1-2 syllables)"
-        sentence_structure = "short, simple sentences"
-        complexity = "basic concepts"
-        reading_level = "beginner"
-    elif grade_num <= 4:
-        # Grades 3-4: Intermediate stories
-        story_length = "1-2 short paragraphs"
-        vocabulary_level = "age-appropriate words with some longer words"
-        sentence_structure = "mix of simple and compound sentences"
-        complexity = "intermediate concepts with some detail"
-        reading_level = "intermediate"
-    else:
-        # Grades 5-6: More advanced stories
-        story_length = "2-3 paragraphs"
-        vocabulary_level = "varied vocabulary including descriptive words"
-        sentence_structure = "complex sentences with descriptive language"
-        complexity = "detailed concepts and explanations"
-        reading_level = "advanced elementary"
     # Extract difficulty level from previous feedback
     if "AMAZING" in previous_feedback or "accuracy: 9" in previous_feedback:
-        difficulty_adjustment = "slightly more challenging but still within grade level"
         focus_area = "new vocabulary and longer sentences"
     elif "GOOD" in previous_feedback or "accuracy: 8" in previous_feedback:
         difficulty_adjustment = "similar level with some new words"
         focus_area = "reinforcing current skills"
     else:
-        difficulty_adjustment = "slightly simpler but still grade-appropriate"
         focus_area = "basic vocabulary and simple sentences"
     # Create targeted practice words
@@ -711,16 +543,8 @@ def generate_targeted_story(previous_feedback: str, name: str, grade: str, misse
     prompt = f"""
     You are an expert reading coach creating a personalized story for {name}, a {age}-year-old in {grade}.
-    GRADE LEVEL: {grade} ({reading_level} level)
-    STORY SPECIFICATIONS:
-    - Length: {story_length}
-    - Vocabulary: {vocabulary_level}
-    - Sentence structure: {sentence_structure}
-    - Complexity: {complexity}
     LEARNING ADAPTATION:
-    - Make this story {difficulty_adjustment}
     - Focus on: {focus_area}
     - {word_focus}
@@ -737,17 +561,17 @@ def generate_targeted_story(previous_feedback: str, name: str, grade: str, misse
     """
     # Generate targeted story
     max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000
-    generation_config = genai.GenerationConfig(
-        temperature=0.7,
-        max_output_tokens=max_tokens,
-        top_p=0.9,
-    )
-    response = gemini_client.models.generate_content(
-        model="gemini-2.5-flash",
-        contents=[prompt],
         generation_config=generation_config
     )
@@ -826,18 +650,6 @@ class ReadingCoachAgent:
         # Transcribe the audio
         transcribed_text = transcribe_audio(audio_path)
-        # Check if the transcribed text is an error message or empty
-        if transcribed_text.startswith("Error:") or transcribed_text.startswith("I couldn't hear") or len(transcribed_text.strip()) < 3:
-            # Return a helpful message instead of giving feedback with accuracy points
-            error_feedback = "⚠️ I couldn't hear your reading clearly. Please try again and make sure to:\n"
-            error_feedback += "• Speak clearly and at a normal pace\n"
-            error_feedback += "• Make sure your microphone is working properly\n"
-            error_feedback += "• Try reading in a quieter environment\n"
-            error_feedback += "• Read the complete story from beginning to end\n\n"
-            error_feedback += "Reading accuracy: 0.0%"
-            return transcribed_text, error_feedback, 0.0
         # Compare with original story and get feedback
         feedback = compare_texts_for_feedback(self.current_story, transcribed_text)
@@ -871,25 +683,8 @@ class ReadingCoachAgent:
             name = self.student_info["name"]
             grade = self.student_info["grade"]
-        # Get the last feedback to personalize the practice story
-        last_feedback = ""
-        missed_words_list = []
-        # Extract missed words from feedback if available
-        if self.current_session:
-            session_data = self.session_manager.get_session(self.current_session)
-            if session_data and "feedback_history" in session_data and session_data["feedback_history"]:
-                last_feedback = session_data["feedback_history"][-1]["feedback"]
-                # Extract missed words from the feedback
-                import re
-                if "PRACTICE THESE WORDS:" in last_feedback:
-                    # Find all words that appear after bullet points
-                    matches = re.findall(r'• ([A-Z]+)', last_feedback)
-                    missed_words_list = [word.lower() for word in matches]
         # Generate a new practice story using the targeted story function
-        practice_story = generate_targeted_story(last_feedback, name, grade, missed_words_list)
         self.current_story = practice_story
         return practice_story
@@ -912,39 +707,3 @@ class ReadingCoachAgent:
         if match:
             return float(match.group(1))
         return 0.0
-    def _extract_missed_words_from_feedback(feedback: str) -> list:
-        """
-        Extract missed words from feedback text.
-        Args:
-            feedback (str): Feedback text containing missed words
-        Returns:
-            list: List of missed words
-        """
-        import re
-        missed_words = []
-        # Check if feedback contains practice words section
-        if "PRACTICE THESE WORDS:" in feedback:
-            # Extract the section with practice words
-            practice_section = feedback.split("PRACTICE THESE WORDS:")[1].split("\n")[1:]
-            # Extract words that appear after bullet points
-            for line in practice_section:
-                if "•" in line and "-" in line:
-                    # Extract word before the dash
-                    match = re.search(r'• ([A-Z]+) -', line)
-                    if match:
-                        missed_words.append(match.group(1).lower())
-        # If we also have mispronounced words, add them too
-        if "PRONUNCIATION PRACTICE:" in feedback:
-            pronun_section = feedback.split("PRONUNCIATION PRACTICE:")[1].split("\n")[1:]
-            for line in pronun_section:
-                if "•" in line and "(you said" in line:
-                    match = re.search(r'• ([A-Z]+) \(you said', line)
-                    if match:
-                        missed_words.append(match.group(1).lower())
-        return missed_words

 import requests
 from smolagents.tools import tool
 from difflib import SequenceMatcher
 try:
     from gradio_client import Client
 except ImportError:
     # Fallback import for older versions
     import gradio_client
     Client = gradio_client.Client
+import google.generativeai as genai
 import json
 import time
 import numpy as np
 STT_API = os.getenv("STT_API")
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+# Configure Google Gemini
 if GOOGLE_API_KEY:
+    genai.configure(api_key=GOOGLE_API_KEY)
 @tool
 def generate_story(name: str, grade: str, topic: str) -> str:
     """
     # Use Google Gemini
+    model = genai.GenerativeModel('gemini-1.5-flash')
     # Adjust generation parameters based on grade level
     max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000
+    generation_config = {
+        "temperature": 0.8,
+        "max_output_tokens": max_tokens,
+        "top_p": 0.9,
+    }
+    response = model.generate_content(
+        contents=prompt,
+        generation_config=generation_config
     )
     return response.text.strip()
     Returns:
         str: Comprehensive, age-appropriate feedback with learning suggestions.
     """
     # Clean and process text
     orig_words = [w.strip(".,!?;:\"'").lower() for w in original.split() if w.strip()]
     spoken_words = [w.strip(".,!?;:\"'").lower() for w in spoken.split() if w.strip()]
     # Calculate accuracy using sequence matching
     matcher = SequenceMatcher(None, orig_words, spoken_words, autojunk=False)
     accuracy = matcher.ratio() * 100
+    # Identify different types of errors
+    missed_words = set(orig_words) - set(spoken_words)
     extra_words = set(spoken_words) - set(orig_words)
     # Find mispronounced words (words that sound similar but are different)
     mispronounced = find_similar_words(orig_words, spoken_words)
     # Generate age-appropriate feedback
+    return generate_adaptive_feedback(accuracy, missed_words, extra_words, mispronounced, len(orig_words))
 def find_similar_words(original_words: list, spoken_words: list) -> list:
     """
         return f"Sound it out: {'-'.join(word)}"
     elif word.endswith('tion'):
         return "Ends with 'shun' sound"
     elif word.endswith('ed'):
         if word[-3] in 'td':
             return "Past tense - ends with 'ed' sound"
             return "Past tense - ends with 'd' sound"
     elif 'th' in word:
         return "Put your tongue between your teeth for 'th'"
     elif word.startswith('wh'):
         return "Starts with 'w' sound (like 'when')"
     elif len(word) >= 6:
         # Break longer words into syllables
         return f"Break it down: {break_into_syllables(word)}"
         return f"Starts with '{orig[0]}' sound, not '{spok[0]}'"
     elif orig[-1] != spok[-1]:
         return f"Ends with '{orig[-1]}' sound"
+    else:
+        return f"Listen carefully: '{orig}' - try saying it slower"
 def break_into_syllables(word: str) -> str:
     """
+    Simple syllable breaking for pronunciation help.
     Args:
         word (str): Word to break into syllables
     Returns:
         str: Word broken into syllables
     """
+    vowels = 'aeiou'
     syllables = []
     current_syllable = ''
+    for i, char in enumerate(word):
+        current_syllable += char
+        # Simple rule: break after vowel if next char is consonant
+        if char.lower() in vowels and i < len(word) - 1:
+            if word[i + 1].lower() not in vowels:
                 syllables.append(current_syllable)
                 current_syllable = ''
     if current_syllable:
         syllables.append(current_syllable)
+    return '-'.join(syllables) if len(syllables) > 1 else word
 @tool
 def generate_targeted_story(previous_feedback: str, name: str, grade: str, missed_words: list = None) -> str:
     grade_num = int(''.join(filter(str.isdigit, grade)) or "3")
     age = grade_num + 5
     # Extract difficulty level from previous feedback
     if "AMAZING" in previous_feedback or "accuracy: 9" in previous_feedback:
+        difficulty_adjustment = "slightly more challenging"
         focus_area = "new vocabulary and longer sentences"
     elif "GOOD" in previous_feedback or "accuracy: 8" in previous_feedback:
         difficulty_adjustment = "similar level with some new words"
         focus_area = "reinforcing current skills"
     else:
+        difficulty_adjustment = "simpler and shorter"
         focus_area = "basic vocabulary and simple sentences"
     # Create targeted practice words
     prompt = f"""
     You are an expert reading coach creating a personalized story for {name}, a {age}-year-old in {grade}.
     LEARNING ADAPTATION:
+    - Make this story {difficulty_adjustment} than the previous one
     - Focus on: {focus_area}
     - {word_focus}
     """
     # Generate targeted story
+    model = genai.GenerativeModel('gemini-1.5-flash')
     max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000
+    generation_config = {
+        "temperature": 0.7,
+        "max_output_tokens": max_tokens,
+        "top_p": 0.9,
+    }
+    response = model.generate_content(
+        contents=prompt,
         generation_config=generation_config
     )
         # Transcribe the audio
         transcribed_text = transcribe_audio(audio_path)
         # Compare with original story and get feedback
         feedback = compare_texts_for_feedback(self.current_story, transcribed_text)
             name = self.student_info["name"]
             grade = self.student_info["grade"]
         # Generate a new practice story using the targeted story function
+        practice_story = generate_targeted_story("", name, grade)
         self.current_story = practice_story
         return practice_story
         if match:
             return float(match.group(1))
         return 0.0