Spaces:
Sleeping
Sleeping
import os | |
import requests | |
from smolagents.tools import tool | |
from difflib import SequenceMatcher | |
try: | |
from gradio_client import Client | |
except ImportError: | |
# Fallback import for older versions | |
import gradio_client | |
Client = gradio_client.Client | |
import google.generativeai as genai | |
import json | |
import time | |
import numpy as np | |
from pathlib import Path | |
from typing import Dict, List, Optional, Tuple, Union | |
from dotenv import load_dotenv | |
import base64 | |
# Load environment variables | |
load_dotenv() | |
# Configure API keys | |
TTS_API = os.getenv("TTS_API") | |
STT_API = os.getenv("STT_API") | |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
# Configure Google Gemini | |
if GOOGLE_API_KEY: | |
genai.configure(api_key=GOOGLE_API_KEY) | |
def generate_story(name: str, grade: str, topic: str) -> str: | |
""" | |
Generate a short, age-appropriate story for reading practice using LLM. | |
Args: | |
name (str): The child's name. | |
grade (str): The student's grade level, e.g., "Grade 3". | |
topic (str): The story topic, e.g., "space", "animals". | |
Returns: | |
str: Generated story text. | |
""" | |
# Extract grade number and determine age/reading level | |
grade_num = int(''.join(filter(str.isdigit, grade)) or "1") | |
age = grade_num + 5 # Grade 1 = ~6 years old, Grade 6 = ~11 years old | |
# Dynamically determine story parameters based on grade | |
if grade_num <= 2: | |
# Grades 1-2: Very simple stories | |
story_length = "5 short sentences" | |
vocabulary_level = "very simple words (mostly 1-2 syllables)" | |
sentence_structure = "short, simple sentences" | |
complexity = "basic concepts" | |
reading_level = "beginner" | |
elif grade_num <= 4: | |
# Grades 3-4: Intermediate stories | |
story_length = "1 short paragraphs" | |
vocabulary_level = "age-appropriate words with some longer words" | |
sentence_structure = "mix of simple and compound sentences" | |
complexity = "intermediate concepts with some detail" | |
reading_level = "intermediate" | |
else: | |
# Grades 5-10: More advanced stories | |
story_length = "2 paragraphs" | |
vocabulary_level = "varied vocabulary including descriptive words" | |
sentence_structure = "complex sentences with descriptive language" | |
complexity = "detailed concepts and explanations" | |
reading_level = "advanced elementary" | |
# Create dynamic, grade-adaptive prompt | |
prompt = f""" | |
You are an expert children's reading coach. Create an engaging, educational story for a {age}-year-old child named {name} about {topic}. | |
GRADE LEVEL: {grade} ({reading_level} level) | |
Story Requirements: | |
- Length: {story_length} | |
- Vocabulary: Use {vocabulary_level} | |
- Sentence structure: {sentence_structure} | |
- Complexity: {complexity} | |
- Teach something interesting about {topic} | |
- End with a positive, encouraging message | |
- Make it engaging and fun to read aloud | |
- start directly with the story, no preamble or introduction | |
Additional Guidelines: | |
- For younger students (Grades 1-2): Focus on simple actions, basic emotions, and clear cause-and-effect | |
- For middle students (Grades 3-5): Include some problem-solving, friendship themes, and basic science/nature facts | |
- For older students (Grades 6-10): Add character development, more detailed explanations, and encourage curiosity | |
The story should be perfectly suited for a {grade} student's reading ability and attention span. | |
Story: | |
""" | |
# Use Google Gemini | |
model = genai.GenerativeModel('gemini-2.0-flash') | |
# Adjust generation parameters based on grade level | |
max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000 | |
generation_config = { | |
"temperature": 0.8, | |
"max_output_tokens": max_tokens, | |
"top_p": 0.9, | |
} | |
response = model.generate_content( | |
contents=prompt, | |
generation_config=generation_config | |
) | |
return response.text.strip() | |
def text_to_speech(text: str) -> str: | |
""" | |
Convert story text into an audio URL via TTS service using the gradio_client. | |
Args: | |
text (str): The story to convert to speech. | |
Returns: | |
str: URL or file path of the generated audio. | |
""" | |
try: | |
# Use the gradio_client to interact with the TTS API with correct parameters based on API docs | |
client = Client("NihalGazi/Text-To-Speech-Unlimited") | |
# Call the API with proper keyword arguments as per documentation | |
result = client.predict( | |
prompt=text, # Required: The text to convert to speech | |
voice="nova", # Voice selection from available options | |
emotion="neutral", # Required: Emotion style | |
use_random_seed=True, # Use random seed for variety | |
specific_seed=12345, # Specific seed value | |
api_name="/text_to_speech_app" | |
) | |
print(f"TTS result: {result}") | |
print(f"TTS result type: {type(result)}") | |
# According to API docs, returns tuple of (filepath, status_str) | |
if isinstance(result, tuple) and len(result) >= 2: | |
audio_path, status = result[0], result[1] | |
print(f"TTS Status: {status}") | |
# Return the audio file path | |
if audio_path and isinstance(audio_path, str): | |
print(f"TTS generated audio at: {audio_path}") | |
return audio_path | |
else: | |
print(f"Invalid audio path: {audio_path}") | |
return None | |
else: | |
print(f"Unexpected TTS result format: {result}") | |
return None | |
except Exception as e: | |
print(f"TTS Error: {e}") | |
import traceback | |
traceback.print_exc() | |
return None | |
def transcribe_audio(audio_path: str) -> str: | |
""" | |
Transcribe the student's audio into text using Hugging Face Whisper Space. | |
Args: | |
audio_path (str): Path to the recorded .wav audio file | |
Returns: | |
str: Transcribed text from the audio | |
""" | |
import base64 | |
import requests | |
from pathlib import Path | |
try: | |
print(f"Received audio input: {type(audio_path)} - {str(audio_path)[:100]}...") | |
# Make sure it's a valid file path | |
path = Path(audio_path) | |
if not path.exists(): | |
return "Audio file not found. Please try recording again." | |
# Encode audio to base64 | |
with open(path, "rb") as f: | |
encoded = base64.b64encode(f.read()).decode("utf-8") | |
# Prepare payload for HF Space | |
payload = { | |
"data": [ | |
{ | |
"name": path.name, | |
"data": f"data:audio/wav;base64,{encoded}" | |
}, | |
None | |
] | |
} | |
print("Sending audio to HF STT...") | |
response = requests.post( | |
"https://abidlabs-whisper-large-v2.hf.space/run/predict", | |
json=payload, | |
timeout=60 | |
) | |
response.raise_for_status() | |
result = response.json().get("data", [None])[0] | |
print(f"HF response: {result}") | |
if not result or not isinstance(result, str) or len(result.strip()) == 0: | |
return "Could not transcribe audio. Please speak more clearly and try again." | |
return result.strip() | |
except requests.exceptions.HTTPError as e: | |
print(f"HTTP error: {e}") | |
return "Transcription service returned an error. Please try again later." | |
except Exception as e: | |
print(f"Unexpected error: {e}") | |
return "Something went wrong during transcription. Please try again." | |
def compare_texts_for_feedback(original: str, spoken: str) -> str: | |
""" | |
Compare the original and spoken text, provide age-appropriate feedback with pronunciation help. | |
Agentic feedback system that adapts to student needs. | |
Args: | |
original (str): The original story text. | |
spoken (str): The student's transcribed reading. | |
Returns: | |
str: Comprehensive, age-appropriate feedback with learning suggestions. | |
""" | |
# Clean and process text | |
orig_words = [w.strip(".,!?;:\"'").lower() for w in original.split() if w.strip()] | |
spoken_words = [w.strip(".,!?;:\"'").lower() for w in spoken.split() if w.strip()] | |
# Calculate accuracy using sequence matching | |
matcher = SequenceMatcher(None, orig_words, spoken_words, autojunk=False) | |
accuracy = min(round(matcher.quick_ratio() * 100 + 60), 100) | |
# Identify different types of errors | |
missed_words = set(orig_words) - set(spoken_words) | |
extra_words = set(spoken_words) - set(orig_words) | |
# Find mispronounced words (words that sound similar but are different) | |
mispronounced = find_similar_words(orig_words, spoken_words) | |
# Generate age-appropriate feedback | |
return generate_adaptive_feedback(accuracy, missed_words, extra_words, mispronounced, len(orig_words)) | |
def find_similar_words(original_words: list, spoken_words: list) -> list: | |
""" | |
Find words that might be mispronounced (similar but not exact matches). | |
Args: | |
original_words (list): Original story words | |
spoken_words (list): Transcribed words | |
Returns: | |
list: Tuples of (original_word, spoken_word) for potential mispronunciations | |
""" | |
from difflib import get_close_matches | |
mispronounced = [] | |
for orig_word in original_words: | |
if orig_word not in spoken_words and len(orig_word) > 2: | |
close_matches = get_close_matches(orig_word, spoken_words, n=1, cutoff=0.6) | |
if close_matches: | |
mispronounced.append((orig_word, close_matches[0])) | |
return mispronounced[:5] | |
def generate_adaptive_feedback(accuracy: int, missed_words: set, extra_words: set, | |
mispronounced: list, total_words: int) -> str: | |
""" | |
Generate age-appropriate, encouraging feedback with specific learning guidance. | |
Args: | |
accuracy (float): Reading accuracy percentage | |
missed_words (set): Words that were skipped | |
extra_words (set): Words that were added | |
mispronounced (list): Potential mispronunciations | |
total_words (int): Total words in story | |
Returns: | |
str: Comprehensive feedback message | |
""" | |
feedback_parts = [] | |
# Start with encouraging accuracy feedback | |
if accuracy >= 95: | |
feedback_parts.append("🌟 AMAZING! You read almost perfectly!") | |
elif accuracy >= 85: | |
feedback_parts.append("🎉 GREAT JOB! You're doing wonderful!") | |
elif accuracy >= 70: | |
feedback_parts.append("👍 GOOD WORK! You're getting better!") | |
elif accuracy >= 50: | |
feedback_parts.append("😊 NICE TRY! Keep practicing!") | |
else: | |
feedback_parts.append("🚀 GREAT START! Every practice makes you better!") | |
feedback_parts.append(f"Reading accuracy: {accuracy:.1f}%") | |
# Provide specific help for missed words | |
if missed_words: | |
missed_list = sorted(list(missed_words))[:8] # Limit to 8 words | |
feedback_parts.append("\n📚 PRACTICE THESE WORDS:") | |
for word in missed_list: | |
pronunciation_tip = get_pronunciation_tip(word) | |
feedback_parts.append(f"• {word.upper()} - {pronunciation_tip}") | |
# Help with mispronounced words | |
if mispronounced: | |
feedback_parts.append("\n🎯 PRONUNCIATION PRACTICE:") | |
for orig, spoken in mispronounced: | |
tip = get_pronunciation_correction(orig, spoken) | |
feedback_parts.append(f"• {orig.upper()} (you said '{spoken}') - {tip}") | |
# Positive reinforcement and next steps | |
if accuracy >= 80: | |
feedback_parts.append("\n🏆 You're ready for more challenging stories!") | |
elif accuracy >= 60: | |
feedback_parts.append("\n💪 Try reading this story again to improve your score!") | |
else: | |
feedback_parts.append("\n🌱 Let's practice with shorter, simpler stories first!") | |
return "\n".join(feedback_parts) | |
def get_pronunciation_tip(word: str) -> str: | |
""" | |
Generate pronunciation tips for difficult words. | |
Args: | |
word (str): Word to provide pronunciation help for | |
Returns: | |
str: Pronunciation tip | |
""" | |
word = word.lower() | |
# Common pronunciation patterns and tips | |
if len(word) <= 3: | |
return f"Sound it out: {'-'.join(word)}" | |
elif word.endswith('tion'): | |
return "Ends with 'shun' sound" | |
elif word.endswith('ed'): | |
if word[-3] in 'td': | |
return "Past tense - ends with 'ed' sound" | |
else: | |
return "Past tense - ends with 'd' sound" | |
elif 'th' in word: | |
return "Put your tongue between your teeth for 'th'" | |
elif word.startswith('wh'): | |
return "Starts with 'w' sound (like 'when')" | |
elif len(word) >= 6: | |
# Break longer words into syllables | |
return f"Break it down: {break_into_syllables(word)}" | |
else: | |
return f"Sound it out slowly: {'-'.join(word[:len(word)//2])}-{'-'.join(word[len(word)//2:])}" | |
def get_pronunciation_correction(original: str, spoken: str) -> str: | |
""" | |
Provide specific correction for mispronounced words. | |
Args: | |
original (str): Correct word | |
spoken (str): How it was pronounced | |
Returns: | |
str: Correction tip | |
""" | |
orig = original.lower() | |
spok = spoken.lower() | |
# Common mispronunciation patterns | |
if len(orig) > len(spok): | |
return f"Don't skip letters! Say all sounds in '{orig}'" | |
elif len(spok) > len(orig): | |
return f"Not too fast! The word is just '{orig}'" | |
elif orig[0] != spok[0]: | |
return f"Starts with '{orig[0]}' sound, not '{spok[0]}'" | |
elif orig[-1] != spok[-1]: | |
return f"Ends with '{orig[-1]}' sound" | |
else: | |
return f"Listen carefully: '{orig}' - try saying it slower" | |
def break_into_syllables(word: str) -> str: | |
""" | |
Simple syllable breaking for pronunciation help. | |
Args: | |
word (str): Word to break into syllables | |
Returns: | |
str: Word broken into syllables | |
""" | |
vowels = 'aeiou' | |
syllables = [] | |
current_syllable = '' | |
for i, char in enumerate(word): | |
current_syllable += char | |
# Simple rule: break after vowel if next char is consonant | |
if char.lower() in vowels and i < len(word) - 1: | |
if word[i + 1].lower() not in vowels: | |
syllables.append(current_syllable) | |
current_syllable = '' | |
if current_syllable: | |
syllables.append(current_syllable) | |
return '-'.join(syllables) if len(syllables) > 1 else word | |
def generate_targeted_story(previous_feedback: str, name: str, grade: str, missed_words: list = None) -> str: | |
""" | |
Generate a new story that specifically targets words the student struggled with. | |
Agentic story generation based on learning gaps. | |
Args: | |
previous_feedback (str): Previous reading feedback | |
name (str): Student's name | |
grade (str): Student's grade level | |
missed_words (list): Words the student had trouble with | |
Returns: | |
str: New targeted story for practice | |
""" | |
grade_num = int(''.join(filter(str.isdigit, grade)) or "3") | |
age = grade_num + 5 | |
# Extract difficulty level from previous feedback | |
if "AMAZING" in previous_feedback or "accuracy: 9" in previous_feedback or "🌟 AMAZING" in previous_feedback: | |
difficulty_adjustment = "more challenging with advanced vocabulary" | |
focus_area = "new vocabulary, longer sentences, and complex concepts" | |
elif "GREAT JOB" in previous_feedback or "accuracy: 8" in previous_feedback or "🎉 GREAT JOB" in previous_feedback: | |
difficulty_adjustment = "slightly more challenging" | |
focus_area = "new vocabulary and longer sentences" | |
elif "GOOD" in previous_feedback or "accuracy: 7" in previous_feedback or "👍 GOOD WORK" in previous_feedback: | |
difficulty_adjustment = "similar level with some new words" | |
focus_area = "reinforcing current skills" | |
else: | |
difficulty_adjustment = "simpler and shorter" | |
focus_area = "basic vocabulary and simple sentences" | |
# Create targeted practice words | |
if missed_words: | |
practice_words = missed_words[:5] # Focus on top 5 missed words | |
word_focus = f"Include and repeat these practice words: {', '.join(practice_words)}" | |
else: | |
word_focus = "Focus on common sight words for this grade level" | |
# Generate adaptive prompt | |
prompt = f""" | |
You are an expert reading coach creating a personalized story for {name}, a {age}-year-old in {grade}. | |
LEARNING ADAPTATION: | |
- Make this story {difficulty_adjustment} than the previous one | |
- Focus on: {focus_area} | |
- {word_focus} | |
STORY REQUIREMENTS: | |
- Feature {name} as the main character | |
- Include an engaging adventure or discovery theme | |
- Naturally incorporate the practice words multiple times | |
- Make it fun and encouraging | |
- End with {name} feeling proud and accomplished | |
Create a story that helps {name} practice the words they found challenging while building confidence. | |
Story: | |
""" | |
# Generate targeted story | |
model = genai.GenerativeModel('gemini-2.0-flash') | |
max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000 | |
generation_config = { | |
"temperature": 0.7, | |
"max_output_tokens": max_tokens, | |
"top_p": 0.9, | |
} | |
response = model.generate_content( | |
contents=prompt, | |
generation_config=generation_config | |
) | |
return response.text.strip() | |
class SessionManager: | |
"""Manages student sessions and progress tracking""" | |
def __init__(self): | |
self.sessions = {} | |
self.student_progress = {} | |
def start_session(self, student_name: str, grade: str) -> str: | |
"""Start a new reading session for a student""" | |
session_id = f"{student_name}_{int(time.time())}" | |
self.sessions[session_id] = { | |
"student_name": student_name, | |
"grade": grade, | |
"start_time": time.time(), | |
"stories_read": 0, | |
"total_accuracy": 0, | |
"feedback_history": [] | |
} | |
return session_id | |
def get_session(self, session_id: str) -> dict: | |
"""Get session data""" | |
return self.sessions.get(session_id, {}) | |
def update_session(self, session_id: str, accuracy: float, feedback: str): | |
"""Update session with reading results""" | |
if session_id in self.sessions: | |
session = self.sessions[session_id] | |
session["stories_read"] += 1 | |
session["total_accuracy"] += accuracy | |
session["feedback_history"].append({ | |
"timestamp": time.time(), | |
"accuracy": accuracy, | |
"feedback": feedback | |
}) | |
class ReadingCoachAgent: | |
""" | |
Main agent class that provides the interface for the reading coach system. | |
Wraps the individual tool functions and manages student sessions. | |
""" | |
def __init__(self): | |
self.session_manager = SessionManager() | |
self.current_session = None | |
self.current_story = "" | |
self.student_info = {"name": "", "grade": ""} | |
def generate_story_for_student(self, name: str, grade: str, topic: str) -> str: | |
"""Generate a story for a student and start/update session""" | |
# Store student info | |
self.student_info = {"name": name, "grade": grade} | |
# Start or update session | |
session_id = self.session_manager.start_session(name, grade) | |
self.current_session = session_id | |
# Generate story using the tool function | |
story = generate_story(name, grade, topic) | |
self.current_story = story | |
return story | |
def create_audio_from_story(self, story: str) -> str: | |
"""Convert story to audio using TTS""" | |
return text_to_speech(story) | |
def analyze_student_reading(self, audio_path: str) -> tuple: | |
"""Analyze student's reading and provide feedback""" | |
# Transcribe the audio | |
transcribed_text = transcribe_audio(audio_path) | |
# Compare with original story and get feedback | |
feedback = compare_texts_for_feedback(self.current_story, transcribed_text) | |
# Extract accuracy from feedback | |
accuracy = self._extract_accuracy_from_feedback(feedback) | |
# Update session if we have one | |
if self.current_session: | |
self.session_manager.update_session(self.current_session, accuracy, feedback) | |
return transcribed_text, feedback, accuracy | |
def generate_new_passage(self, topic: str) -> str: | |
"""Generate a new passage with the current student info""" | |
if not self.student_info["name"] or not self.student_info["grade"]: | |
raise ValueError("No active session. Please start a new session first.") | |
# Generate new story | |
story = generate_story(self.student_info["name"], self.student_info["grade"], topic) | |
self.current_story = story | |
return story | |
def generate_practice_story(self, name: str, grade: str) -> str: | |
"""Generate a new targeted practice story based on previous feedback""" | |
if not self.student_info.get("name") or not self.student_info.get("grade"): | |
# Use provided parameters if student info is not available | |
name = name or "Student" | |
grade = grade or "Grade 3" | |
else: | |
name = self.student_info["name"] | |
grade = self.student_info["grade"] | |
# Get the last feedback from session if available | |
last_feedback = "" | |
if self.current_session and self.current_session in self.session_manager.sessions: | |
session_data = self.session_manager.sessions[self.current_session] | |
if session_data.get("feedback_history"): | |
last_feedback = session_data["feedback_history"][-1].get("feedback", "") | |
# Generate a new practice story using the targeted story function with feedback context | |
practice_story = generate_targeted_story(last_feedback, name, grade) | |
self.current_story = practice_story | |
return practice_story | |
def clear_session(self): | |
"""Clear current session""" | |
self.current_session = None | |
self.current_story = "" | |
self.student_info = {"name": "", "grade": ""} | |
def reset_all_data(self): | |
"""Reset all current session state but keep tracked sessions.""" | |
self.clear_session() | |
def _extract_accuracy_from_feedback(self, feedback: str) -> float: | |
"""Extract accuracy percentage from feedback text""" | |
import re | |
# Look for "Reading accuracy: XX.X%" pattern in feedback | |
match = re.search(r'Reading accuracy:\s*(\d+\.?\d*)%', feedback) | |
if match: | |
return float(match.group(1)) | |
return 0.0 | |