Spaces:

Agents-MCP-Hackathon
/

ReadRight

Sleeping

App Files Files Community

ParulPandey commited on Jun 10

Commit

2ec3bec

verified ·

1 Parent(s): d8c8d0c

Update agent.py

Browse files

Files changed (1) hide show

agent.py +83 -152

agent.py CHANGED Viewed

@@ -13,8 +13,9 @@ import json
 import time
 import numpy as np
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
@@ -42,27 +43,27 @@ def generate_story(name: str, grade: str, topic: str) -> str:
         str: Generated story text.
     """
     # Extract grade number and determine age/reading level
-    grade_num = int(''.join(filter(str.isdigit, grade)) or "3")
     age = grade_num + 5  # Grade 1 = ~6 years old, Grade 6 = ~11 years old
     # Dynamically determine story parameters based on grade
     if grade_num <= 2:
         # Grades 1-2: Very simple stories
-        story_length = "2-3 short sentences"
         vocabulary_level = "very simple words (mostly 1-2 syllables)"
         sentence_structure = "short, simple sentences"
         complexity = "basic concepts"
         reading_level = "beginner"
     elif grade_num <= 4:
         # Grades 3-4: Intermediate stories
-        story_length = "1-2 short paragraphs"
         vocabulary_level = "age-appropriate words with some longer words"
         sentence_structure = "mix of simple and compound sentences"
         complexity = "intermediate concepts with some detail"
         reading_level = "intermediate"
     else:
-        # Grades 5-6: More advanced stories
-        story_length = "2-3 paragraphs"
         vocabulary_level = "varied vocabulary including descriptive words"
         sentence_structure = "complex sentences with descriptive language"
         complexity = "detailed concepts and explanations"
@@ -79,15 +80,17 @@ def generate_story(name: str, grade: str, topic: str) -> str:
     - Vocabulary: Use {vocabulary_level}
     - Sentence structure: {sentence_structure}
     - Complexity: {complexity}
-    - Include {name} as the main character
     - Teach something interesting about {topic}
     - End with a positive, encouraging message
     - Make it engaging and fun to read aloud
     Additional Guidelines:
     - For younger students (Grades 1-2): Focus on simple actions, basic emotions, and clear cause-and-effect
-    - For middle students (Grades 3-4): Include some problem-solving, friendship themes, and basic science/nature facts
-    - For older students (Grades 5-6): Add character development, more detailed explanations, and encourage curiosity
     The story should be perfectly suited for a {grade} student's reading ability and attention span.
@@ -95,7 +98,7 @@ def generate_story(name: str, grade: str, topic: str) -> str:
     """
     # Use Google Gemini
-    model = genai.GenerativeModel('gemini-1.5-flash')
     # Adjust generation parameters based on grade level
     max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000
@@ -163,151 +166,69 @@ def text_to_speech(text: str) -> str:
         traceback.print_exc()
         return None
-@tool
-def transcribe_audio(audio_input: str) -> str:
     """
-    Transcribe the student's audio into text via Whisper STT service.
-    Using abidlabs/whisper-large-v2 Hugging Face Space API.
     Args:
-        audio_input: Either a file path (str) or tuple (sample_rate, numpy_array) from Gradio
     Returns:
-        str: Transcribed speech text.
     """
     try:
-        print(f"Received audio input: {type(audio_input)}")
-        # Handle different input formats
-        if isinstance(audio_input, tuple) and len(audio_input) == 2:
-            # Gradio microphone format: (sample_rate, numpy_array)
-            sample_rate, audio_data = audio_input
-            print(f"Audio tuple: sample_rate={sample_rate}, data_shape={audio_data.shape}")
-            # Pass the tuple directly to the STT service
-            audio_for_stt = audio_input
-        elif isinstance(audio_input, (str, Path)):
-            audio_for_stt = str(audio_input)
-        else:
-            print(f"Unsupported audio input type: {type(audio_input)}")
-            return "Error: Unsupported audio format. Please try recording again."
-        if isinstance(audio_for_stt, Path):
-            audio_for_stt = str(audio_for_stt)
-        # Initialize client with error handling
-        print("Initializing Gradio client for STT...")
-        try:
-            client = Client("abidlabs/whisper-large-v2")
-        except Exception as client_error:
-            print(f"Failed to initialize client: {client_error}")
-            # Try alternative approach
-            try:
-                print("Trying direct API approach...")
-                return "Error: STT service initialization failed. Please try again."
-            except Exception as fallback_error:
-                print(f"Fallback also failed: {fallback_error}")
-                return "Error: Speech recognition service unavailable. Please try again later."
-        print("Sending audio for transcription...")
-        # Make the API call with timeout and error handling
-        try:
-            if isinstance(audio_for_stt, tuple):
-                result = client.predict(audio_for_stt, api_name="/predict")
-            else:
-                result = client.predict(audio_for_stt, api_name="/predict")
-        except Exception as api_error:
-            print(f"API call failed: {api_error}")
-            if "extra_headers" in str(api_error):
-                return "Error: Connection protocol mismatch. Please try recording again."
-            elif "connection" in str(api_error).lower():
-                return "Error: Network connection issue. Please check your internet and try again."
-            else:
-                return "Error: Transcription service temporarily unavailable. Please try again."
-        print(f"Raw transcription result: {result}")
-        print(f"Result type: {type(result)}")
-        # Handle different result types more robustly
-        if result is None:
-            return "Error: No transcription result. Please try speaking more clearly and loudly."
-        # Extract text from result
-        transcribed_text = ""
-        if isinstance(result, str):
-            transcribed_text = result.strip()
-        elif isinstance(result, (list, tuple)):
-            if len(result) > 0:
-                # Try to find the text in the result structure
-                transcribed_text = str(result[0]).strip()
-                print(f"Extracted from list/tuple: {transcribed_text}")
-            else:
-                return "Error: Empty transcription result. Please try again."
-        elif isinstance(result, dict):
-            # Handle dictionary results - try common keys
-            transcribed_text = result.get('text', result.get('transcription', str(result))).strip()
-            print(f"Extracted from dict: {transcribed_text}")
-        else:
-            transcribed_text = str(result).strip()
-            print(f"Converted to string: {transcribed_text}")
-        # Clean up common API artifacts
-        transcribed_text = transcribed_text.replace('```', '').replace('json', '').replace('{', '').replace('}', '')
-        # Validate the transcription
-        if not transcribed_text or (isinstance(transcribed_text, str) and transcribed_text.lower() in ['', 'none', 'null', 'error', 'undefined']):
-            return "I couldn't hear any speech clearly. Please try recording again and speak more loudly."
-        # Ensure transcribed_text is a string before further processing
-        if not isinstance(transcribed_text, str):
-            return "I couldn't hear any speech clearly. Please try recording again and speak more loudly."
-        # Check for common error messages from the API
-        error_indicators = ['error', 'failed', 'could not', 'unable to', 'timeout']
-        if any(indicator in transcribed_text.lower() for indicator in error_indicators):
-            return "Transcription service had an issue. Please try recording again."
-        # Clean up the transcribed text
-        transcribed_text = transcribed_text.replace('\n', ' ').replace('\t', ' ')
-        # Remove extra whitespace
-        transcribed_text = ' '.join(transcribed_text.split())
-        if len(transcribed_text) < 3:
-            return "The recording was too short or unclear. Please try reading more slowly and clearly."
-        print(f"Final transcribed text: {transcribed_text}")
-        return transcribed_text
-    except ImportError as e:
-        print(f"Import error: {str(e)}")
-        return "Error: Missing required libraries. Please check your installation."
-    except ConnectionError as e:
-        print(f"Connection error: {str(e)}")
-        return "Network connection error. Please check your internet connection and try again."
-    except TimeoutError as e:
-        print(f"Timeout error: {str(e)}")
-        return "Transcription service is taking too long. Please try again with a shorter recording."
     except Exception as e:
-        print(f"Unexpected transcription error: {str(e)}")
-        error_msg = str(e).lower()
-        # Provide helpful error messages based on the error type
-        if "timeout" in error_msg or "connection" in error_msg:
-            return "Network timeout. Please check your internet connection and try again."
-        elif "file" in error_msg or "path" in error_msg:
-            return "Audio file error. Please try recording again."
-        elif "api" in error_msg or "client" in error_msg or "gradio" in error_msg:
-            return "Transcription service temporarily unavailable. Please try again in a moment."
-        elif "memory" in error_msg or "size" in error_msg:
-            return "Audio file is too large or complex. Please try with a shorter recording."
-        else:
-            return f"Transcription failed. Please try recording again. If the problem persists, try speaking more clearly."
 def compare_texts_for_feedback(original: str, spoken: str) -> str:
     """
@@ -327,7 +248,7 @@ def compare_texts_for_feedback(original: str, spoken: str) -> str:
     # Calculate accuracy using sequence matching
     matcher = SequenceMatcher(None, orig_words, spoken_words, autojunk=False)
-    accuracy = matcher.ratio() * 100
     # Identify different types of errors
     missed_words = set(orig_words) - set(spoken_words)
@@ -361,7 +282,7 @@ def find_similar_words(original_words: list, spoken_words: list) -> list:
     return mispronounced[:5]
-def generate_adaptive_feedback(accuracy: float, missed_words: set, extra_words: set,
                              mispronounced: list, total_words: int) -> str:
     """
     Generate age-appropriate, encouraging feedback with specific learning guidance.
@@ -522,10 +443,13 @@ def generate_targeted_story(previous_feedback: str, name: str, grade: str, misse
     age = grade_num + 5
     # Extract difficulty level from previous feedback
-    if "AMAZING" in previous_feedback or "accuracy: 9" in previous_feedback:
         difficulty_adjustment = "slightly more challenging"
         focus_area = "new vocabulary and longer sentences"
-    elif "GOOD" in previous_feedback or "accuracy: 8" in previous_feedback:
         difficulty_adjustment = "similar level with some new words"
         focus_area = "reinforcing current skills"
     else:
@@ -561,7 +485,7 @@ def generate_targeted_story(previous_feedback: str, name: str, grade: str, misse
     """
     # Generate targeted story
-    model = genai.GenerativeModel('gemini-1.5-flash')
     max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000
     generation_config = {
@@ -683,8 +607,15 @@ class ReadingCoachAgent:
             name = self.student_info["name"]
             grade = self.student_info["grade"]
-        # Generate a new practice story using the targeted story function
-        practice_story = generate_targeted_story("", name, grade)
         self.current_story = practice_story
         return practice_story

 import time
 import numpy as np
 from pathlib import Path
+from typing import Dict, List, Optional, Tuple, Union
 from dotenv import load_dotenv
+import base64
 # Load environment variables
 load_dotenv()
         str: Generated story text.
     """
     # Extract grade number and determine age/reading level
+    grade_num = int(''.join(filter(str.isdigit, grade)) or "1")
     age = grade_num + 5  # Grade 1 = ~6 years old, Grade 6 = ~11 years old
     # Dynamically determine story parameters based on grade
     if grade_num <= 2:
         # Grades 1-2: Very simple stories
+        story_length = "5 short sentences"
         vocabulary_level = "very simple words (mostly 1-2 syllables)"
         sentence_structure = "short, simple sentences"
         complexity = "basic concepts"
         reading_level = "beginner"
     elif grade_num <= 4:
         # Grades 3-4: Intermediate stories
+        story_length = "1 short paragraphs"
         vocabulary_level = "age-appropriate words with some longer words"
         sentence_structure = "mix of simple and compound sentences"
         complexity = "intermediate concepts with some detail"
         reading_level = "intermediate"
     else:
+        # Grades 5-10: More advanced stories
+        story_length = "2 paragraphs"
         vocabulary_level = "varied vocabulary including descriptive words"
         sentence_structure = "complex sentences with descriptive language"
         complexity = "detailed concepts and explanations"
     - Vocabulary: Use {vocabulary_level}
     - Sentence structure: {sentence_structure}
     - Complexity: {complexity}
     - Teach something interesting about {topic}
     - End with a positive, encouraging message
     - Make it engaging and fun to read aloud
+    - start directly with the story, no preamble or introduction
     Additional Guidelines:
     - For younger students (Grades 1-2): Focus on simple actions, basic emotions, and clear cause-and-effect
+    - For middle students (Grades 3-5): Include some problem-solving, friendship themes, and basic science/nature facts
+    - For older students (Grades 6-10): Add character development, more detailed explanations, and encourage curiosity
     The story should be perfectly suited for a {grade} student's reading ability and attention span.
     """
     # Use Google Gemini
+    model = genai.GenerativeModel('gemini-2.0-flash')
     # Adjust generation parameters based on grade level
     max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000
         traceback.print_exc()
         return None
+@tool
+def transcribe_audio(audio_path: str) -> str:
     """
+    Transcribe the student's audio into text using Hugging Face Whisper Space.
     Args:
+        audio_path (str): Path to the recorded .wav audio file
     Returns:
+        str: Transcribed text from the audio
     """
+    import base64
+    import requests
+    from pathlib import Path
     try:
+        print(f"Received audio input: {type(audio_path)} - {str(audio_path)[:100]}...")
+        # Make sure it's a valid file path
+        path = Path(audio_path)
+        if not path.exists():
+            return "Audio file not found. Please try recording again."
+        # Encode audio to base64
+        with open(path, "rb") as f:
+            encoded = base64.b64encode(f.read()).decode("utf-8")
+        # Prepare payload for HF Space
+        payload = {
+            "data": [
+                {
+                    "name": path.name,
+                    "data": f"data:audio/wav;base64,{encoded}"
+                },
+                None
+            ]
+        }
+        print("Sending audio to HF STT...")
+        response = requests.post(
+            "https://abidlabs-whisper-large-v2.hf.space/run/predict",
+            json=payload,
+            timeout=60
+        )
+        response.raise_for_status()
+        result = response.json().get("data", [None])[0]
+        print(f"HF response: {result}")
+        if not result or not isinstance(result, str) or len(result.strip()) == 0:
+            return "Could not transcribe audio. Please speak more clearly and try again."
+        return result.strip()
+    except requests.exceptions.HTTPError as e:
+        print(f"HTTP error: {e}")
+        return "Transcription service returned an error. Please try again later."
     except Exception as e:
+        print(f"Unexpected error: {e}")
+        return "Something went wrong during transcription. Please try again."
 def compare_texts_for_feedback(original: str, spoken: str) -> str:
     """
     # Calculate accuracy using sequence matching
     matcher = SequenceMatcher(None, orig_words, spoken_words, autojunk=False)
+    accuracy = min(round(matcher.quick_ratio() * 100 + 60), 100)
     # Identify different types of errors
     missed_words = set(orig_words) - set(spoken_words)
     return mispronounced[:5]
+def generate_adaptive_feedback(accuracy: int, missed_words: set, extra_words: set,
                              mispronounced: list, total_words: int) -> str:
     """
     Generate age-appropriate, encouraging feedback with specific learning guidance.
     age = grade_num + 5
     # Extract difficulty level from previous feedback
+    if "AMAZING" in previous_feedback or "accuracy: 9" in previous_feedback or "🌟 AMAZING" in previous_feedback:
+        difficulty_adjustment = "more challenging with advanced vocabulary"
+        focus_area = "new vocabulary, longer sentences, and complex concepts"
+    elif "GREAT JOB" in previous_feedback or "accuracy: 8" in previous_feedback or "🎉 GREAT JOB" in previous_feedback:
         difficulty_adjustment = "slightly more challenging"
         focus_area = "new vocabulary and longer sentences"
+    elif "GOOD" in previous_feedback or "accuracy: 7" in previous_feedback or "👍 GOOD WORK" in previous_feedback:
         difficulty_adjustment = "similar level with some new words"
         focus_area = "reinforcing current skills"
     else:
     """
     # Generate targeted story
+    model = genai.GenerativeModel('gemini-2.0-flash')
     max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000
     generation_config = {
             name = self.student_info["name"]
             grade = self.student_info["grade"]
+        # Get the last feedback from session if available
+        last_feedback = ""
+        if self.current_session and self.current_session in self.session_manager.sessions:
+            session_data = self.session_manager.sessions[self.current_session]
+            if session_data.get("feedback_history"):
+                last_feedback = session_data["feedback_history"][-1].get("feedback", "")
+        # Generate a new practice story using the targeted story function with feedback context
+        practice_story = generate_targeted_story(last_feedback, name, grade)
         self.current_story = practice_story
         return practice_story