ENGLISH-Speaking-Scoring

Running

App Files Files Community

aiqcamp commited on May 5

Commit

66afa9e

verified ·

1 Parent(s): 92535df

Update app.py

Browse files

Files changed (1) hide show

app.py +272 -148

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import numpy as np
 import matplotlib.pyplot as plt
 import time
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 import pandas as pd
 from sklearn.feature_extraction.text import CountVectorizer
@@ -11,30 +12,47 @@ import re
 # Download necessary NLTK data
 try:
-    nltk.data.find('tokenizers/punkt')
-except LookupError:
     nltk.download('punkt')
-try:
-    nltk.data.find('taggers/averaged_perceptron_tagger')
-except LookupError:
     nltk.download('averaged_perceptron_tagger')
-# Load Whisper for ASR
-asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
-# Load Grammar Scoring Model (CoLA)
-cola_model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-CoLA")
-cola_tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-CoLA")
-grammar_pipeline = pipeline("text-classification", model=cola_model, tokenizer=cola_tokenizer)
-# Load Grammar Correction Model (T5)
-correction_pipeline = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
-# Add sentiment analysis
-sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
-# Add fluency analysis (using BERT)
-fluency_pipeline = pipeline("text-classification", model="textattack/bert-base-uncased-CoLA")
 # Common English filler words to detect
 FILLER_WORDS = ["um", "uh", "like", "you know", "actually", "basically", "literally",
@@ -57,38 +75,56 @@ def calculate_speaking_rate(text, duration):
 def analyze_vocabulary_richness(text):
     """Analyze vocabulary richness"""
-    words = word_tokenize(text.lower())
     if not words:
-        return 0, 0
     # Vocabulary richness (unique words / total words)
     unique_words = set(words)
     richness = len(unique_words) / len(words)
-    # POS tagging to see variety of word types used
-    pos_tags = nltk.pos_tag(words)
-    pos_counts = {}
-    for _, tag in pos_tags:
-        pos_counts[tag] = pos_counts.get(tag, 0) + 1
     return richness, pos_counts
 def analyze_sentence_complexity(text):
-    """Analyze sentence complexity"""
-    sentences = re.split(r'[.!?]+', text)
-    sentences = [s.strip() for s in sentences if s.strip()]
-    if not sentences:
-        return 0, 0
-    # Average words per sentence
-    words_per_sentence = [len(s.split()) for s in sentences]
-    avg_words = sum(words_per_sentence) / len(sentences)
-    # Sentence length variation (standard deviation)
-    sentence_length_variation = np.std(words_per_sentence) if len(sentences) > 1 else 0
-    return avg_words, sentence_length_variation
 def create_detailed_feedback(transcription, grammar_score, corrected_text,
                             sentiment, fluency, filler_ratio, speaking_rate,
@@ -152,120 +188,208 @@ def process_audio(audio):
     start_time = time.time()
-    # Get audio duration (assuming audio[1] contains the sample rate)
-    sample_rate = 16000  # Default if we can't determine
-    if isinstance(audio, tuple) and len(audio) > 1:
-        sample_rate = audio[1]
-    # For file uploads, we need to handle differently
-    if isinstance(audio, str):
-        # This is a file path
-        import librosa
-        y, sr = librosa.load(audio, sr=None)
-        duration = librosa.get_duration(y=y, sr=sr)
-    else:
-        # Assuming a tuple with (samples, sample_rate)
         try:
-            duration = len(audio[0]) / sample_rate if sample_rate > 0 else 0
         except:
-            duration = 0
-    # Step 1: Transcription
-    transcription_result = asr_pipeline(audio)
-    transcription = transcription_result["text"]
-    # Step 2: Grammar Scoring
-    score_output = grammar_pipeline(transcription)[0]
-    label = score_output["label"]
-    confidence = score_output["score"]
-    grammar_score = f"{label} ({confidence:.2f})"
-    # Step 3: Grammar Correction
-    corrected = correction_pipeline(transcription, max_length=128)[0]["generated_text"]
-    # Step 4: Sentiment Analysis
-    sentiment_result = sentiment_pipeline(transcription)[0]
-    sentiment = sentiment_result["label"]
-    sentiment_score = sentiment_result["score"]
-    # Step 5: Fluency Analysis
-    fluency_result = fluency_pipeline(transcription)[0]
-    fluency_score = fluency_result["score"] if fluency_result["label"] == "acceptable" else 1 - fluency_result["score"]
-    # Step 6: Filler Words Analysis
-    filler_count, filler_ratio = count_filler_words(transcription)
-    # Step 7: Speaking Rate
-    speaking_rate = calculate_speaking_rate(transcription, duration)
-    # Step 8: Vocabulary Richness
-    vocab_richness, pos_counts = analyze_vocabulary_richness(transcription)
-    # Step 9: Sentence Complexity
-    avg_words, sentence_variation = analyze_sentence_complexity(transcription)
-    # Create feedback
-    feedback = create_detailed_feedback(
-        transcription, grammar_score, corrected, sentiment,
-        fluency_score, filler_ratio, speaking_rate, vocab_richness, avg_words
-    )
-    # Create metrics visualization
-    fig, ax = plt.subplots(figsize=(10, 6))
-    # Define metrics for radar chart
-    categories = ['Grammar', 'Fluency', 'Vocabulary', 'Speaking Rate', 'Clarity']
-    # Normalize scores between 0 and 1
-    grammar_norm = confidence if label == "acceptable" else 1 - confidence
-    speaking_rate_norm = max(0, min(1, 1 - abs((speaking_rate - 140) / 100)))  # Optimal around 140 wpm
-    values = [
-        grammar_norm,
-        fluency_score,
-        vocab_richness,
-        speaking_rate_norm,
-        1 - filler_ratio  # Lower filler ratio is better
-    ]
-    # Complete the loop for the radar chart
-    values += values[:1]
-    categories += categories[:1]
-    # Convert to radians and plot
-    angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist()
-    angles += angles[:1]
-    ax.plot(angles, values, linewidth=2, linestyle='solid')
-    ax.fill(angles, values, alpha=0.25)
-    ax.set_yticklabels([])
-    ax.set_xticks(angles[:-1])
-    ax.set_xticklabels(categories[:-1])
-    ax.grid(True)
-    plt.title('Speaking Performance Metrics', size=15, color='navy', y=1.1)
-    # Create detailed analysis text
-    processing_time = time.time() - start_time
-    detailed_analysis = f"""
-    ## Detailed Speech Analysis
-    **Processing Time:** {processing_time:.2f} seconds
-    **Audio Duration:** {duration:.2f} seconds
-    ### Metrics:
-    - **Grammar Score:** {confidence:.2f} ({label})
-    - **Fluency Score:** {fluency_score:.2f}
-    - **Speaking Rate:** {speaking_rate:.1f} words per minute
-    - **Vocabulary Richness:** {vocab_richness:.2f} (higher is better)
-    - **Filler Words:** {filler_count} occurrences ({filler_ratio:.1%} of speech)
-    - **Avg Words Per Sentence:** {avg_words:.1f}
-    - **Sentiment:** {sentiment} ({sentiment_score:.2f})
-    ### Word Types Used:
-    {', '.join([f"{k}: {v}" for k, v in sorted(pos_counts.items(), key=lambda x: x[1], reverse=True)[:5]])}
-    """
-    return transcription, grammar_score, corrected, feedback, fig, detailed_analysis
 # Create theme
 theme = gr.themes.Soft(

 import numpy as np
 import matplotlib.pyplot as plt
 import time
+import os
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 import pandas as pd
 from sklearn.feature_extraction.text import CountVectorizer
 # Download necessary NLTK data
 try:
+    # Make the download more reliable by specifying download directory
+    nltk_data_dir = '/home/user/nltk_data'
+    os.makedirs(nltk_data_dir, exist_ok=True)
+    # Download all required resources
+    nltk.download('punkt', download_dir=nltk_data_dir)
+    nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
+    # Set the data path to include our custom directory
+    nltk.data.path.insert(0, nltk_data_dir)
+except Exception as e:
+    print(f"NLTK download issue: {e}")
+    # Fallback simple approach if the directory approach fails
     nltk.download('punkt')
     nltk.download('averaged_perceptron_tagger')
+# Add error handling around model loading
+try:
+    # Load Whisper for ASR
+    asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
+    # Load Grammar Scoring Model (CoLA)
+    cola_model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-CoLA")
+    cola_tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-CoLA")
+    grammar_pipeline = pipeline("text-classification", model=cola_model, tokenizer=cola_tokenizer)
+    # Load Grammar Correction Model (T5)
+    correction_pipeline = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
+    # Add sentiment analysis
+    sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
+    # Add fluency analysis (using BERT)
+    fluency_pipeline = pipeline("text-classification", model="textattack/bert-base-uncased-CoLA")
+    # Set variables to track loaded models
+    MODELS_LOADED = True
+except Exception as e:
+    print(f"Error loading models: {e}")
+    # Set variable to track failed model loading
+    MODELS_LOADED = False
 # Common English filler words to detect
 FILLER_WORDS = ["um", "uh", "like", "you know", "actually", "basically", "literally",
 def analyze_vocabulary_richness(text):
     """Analyze vocabulary richness"""
+    # Split text by simple regex instead of using word_tokenize to avoid NLTK issues
+    try:
+        # Try using word_tokenize first
+        words = word_tokenize(text.lower())
+    except LookupError:
+        # Fallback to simple regex-based tokenization if NLTK fails
+        words = re.findall(r'\b\w+\b', text.lower())
     if not words:
+        return 0, {}
     # Vocabulary richness (unique words / total words)
     unique_words = set(words)
     richness = len(unique_words) / len(words)
+    # Use simple POS tagging or skip it if NLTK fails
+    try:
+        pos_tags = nltk.pos_tag(words)
+        pos_counts = {}
+        for _, tag in pos_tags:
+            pos_counts[tag] = pos_counts.get(tag, 0) + 1
+    except Exception:
+        # Return simplified count if POS tagging fails
+        pos_counts = {"WORD": len(words), "UNIQUE": len(unique_words)}
     return richness, pos_counts
 def analyze_sentence_complexity(text):
+    """Analyze sentence complexity with error handling"""
+    try:
+        # Simple sentence splitting by punctuation
+        sentences = re.split(r'[.!?]+', text)
+        sentences = [s.strip() for s in sentences if s.strip()]
+        if not sentences:
+            return 0, 0
+        # Average words per sentence
+        words_per_sentence = [len(s.split()) for s in sentences]
+        avg_words = sum(words_per_sentence) / len(sentences)
+        # Sentence length variation (standard deviation)
+        sentence_length_variation = np.std(words_per_sentence) if len(sentences) > 1 else 0
+        return avg_words, sentence_length_variation
+    except Exception:
+        # In case of any error, return simple defaults
+        word_count = len(text.split())
+        # Assume approximately 15 words per sentence if we can't detect
+        return word_count / max(1, text.count('.') + text.count('!') + text.count('?')), 0
 def create_detailed_feedback(transcription, grammar_score, corrected_text,
                             sentiment, fluency, filler_ratio, speaking_rate,
     start_time = time.time()
+    # Check if models loaded properly
+    if 'MODELS_LOADED' in globals() and not MODELS_LOADED:
+        return ("Models failed to load. Please check the logs for details.",
+                "Error", "Error", "Unable to process audio due to model loading issues.",
+                None, "## Error\nThe required models couldn't be loaded. Please check the system configuration.")
+    try:
+        # Get audio duration (assuming audio[1] contains the sample rate)
+        sample_rate = 16000  # Default if we can't determine
+        if isinstance(audio, tuple) and len(audio) > 1:
+            sample_rate = audio[1]
+        # For file uploads, we need to handle differently
+        duration = 0
+        if isinstance(audio, str):
+            # This is a file path
+            try:
+                import librosa
+                y, sr = librosa.load(audio, sr=None)
+                duration = librosa.get_duration(y=y, sr=sr)
+            except Exception as e:
+                print(f"Error getting duration: {e}")
+                # Estimate duration based on file size
+                try:
+                    file_size = os.path.getsize(audio)
+                    # Rough estimate: 16kHz, 16-bit audio is about 32KB per second
+                    duration = file_size / 32000
+                except:
+                    duration = 10  # Default to 10 seconds if we can't determine
+        else:
+            # Assuming a tuple with (samples, sample_rate)
+            try:
+                duration = len(audio[0]) / sample_rate if sample_rate > 0 else 0
+            except:
+                duration = 10  # Default duration
+        # Step 1: Transcription
+        try:
+            transcription_result = asr_pipeline(audio)
+            transcription = transcription_result["text"]
+        except Exception as e:
+            print(f"Transcription error: {e}")
+            return ("Error in speech recognition. Please try again.",
+                    "Error", "Error", "There was an error processing your audio.",
+                    None, f"## Error\nError in speech recognition: {str(e)[:100]}...")
+        if not transcription or transcription.strip() == "":
+            return ("No speech detected. Please speak louder or check your microphone.",
+                    "N/A", "N/A", "No speech detected in the audio.",
+                    None, "## No Speech Detected\nPlease try recording again with clearer speech.")
+        # Step 2: Grammar Scoring
         try:
+            score_output = grammar_pipeline(transcription)[0]
+            label = score_output["label"]
+            confidence = score_output["score"]
+            grammar_score = f"{label} ({confidence:.2f})"
+        except Exception as e:
+            print(f"Grammar scoring error: {e}")
+            label = "UNKNOWN"
+            confidence = 0.5
+            grammar_score = "Could not analyze grammar"
+        # Step 3: Grammar Correction
+        try:
+            corrected = correction_pipeline(transcription, max_length=128)[0]["generated_text"]
+        except Exception as e:
+            print(f"Grammar correction error: {e}")
+            corrected = transcription
+        # Step 4: Sentiment Analysis
+        try:
+            sentiment_result = sentiment_pipeline(transcription)[0]
+            sentiment = sentiment_result["label"]
+            sentiment_score = sentiment_result["score"]
+        except Exception as e:
+            print(f"Sentiment analysis error: {e}")
+            sentiment = "NEUTRAL"
+            sentiment_score = 0.5
+        # Step 5: Fluency Analysis
+        try:
+            fluency_result = fluency_pipeline(transcription)[0]
+            fluency_score = fluency_result["score"] if fluency_result["label"] == "acceptable" else 1 - fluency_result["score"]
+        except Exception as e:
+            print(f"Fluency analysis error: {e}")
+            fluency_score = 0.5
+        # Step 6: Filler Words Analysis
+        try:
+            filler_count, filler_ratio = count_filler_words(transcription)
+        except Exception as e:
+            print(f"Filler word analysis error: {e}")
+            filler_count, filler_ratio = 0, 0
+        # Step 7: Speaking Rate
+        try:
+            speaking_rate = calculate_speaking_rate(transcription, duration)
+        except Exception as e:
+            print(f"Speaking rate calculation error: {e}")
+            speaking_rate = 0
+        # Step 8: Vocabulary Richness
+        try:
+            vocab_richness, pos_counts = analyze_vocabulary_richness(transcription)
+        except Exception as e:
+            print(f"Vocabulary analysis error: {e}")
+            vocab_richness, pos_counts = 0.5, {"N/A": 1}
+        # Step 9: Sentence Complexity
+        try:
+            avg_words, sentence_variation = analyze_sentence_complexity(transcription)
+        except Exception as e:
+            print(f"Sentence complexity analysis error: {e}")
+            avg_words, sentence_variation = 0, 0
+        # Create feedback
+        try:
+            feedback = create_detailed_feedback(
+                transcription, grammar_score, corrected, sentiment,
+                fluency_score, filler_ratio, speaking_rate, vocab_richness, avg_words
+            )
+        except Exception as e:
+            print(f"Feedback creation error: {e}")
+            feedback = "Error generating detailed feedback."
+        # Create metrics visualization
+        try:
+            fig, ax = plt.subplots(figsize=(10, 6))
+            # Define metrics for radar chart
+            categories = ['Grammar', 'Fluency', 'Vocabulary', 'Speaking Rate', 'Clarity']
+            # Normalize scores between 0 and 1
+            grammar_norm = confidence if label == "acceptable" else 1 - confidence
+            speaking_rate_norm = max(0, min(1, 1 - abs((speaking_rate - 140) / 100)))  # Optimal around 140 wpm
+            values = [
+                grammar_norm,
+                fluency_score,
+                vocab_richness,
+                speaking_rate_norm,
+                1 - filler_ratio  # Lower filler ratio is better
+            ]
+            # Complete the loop for the radar chart
+            values += values[:1]
+            categories += categories[:1]
+            # Convert to radians and plot
+            angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist()
+            angles += angles[:1]
+            ax.plot(angles, values, linewidth=2, linestyle='solid')
+            ax.fill(angles, values, alpha=0.25)
+            ax.set_yticklabels([])
+            ax.set_xticks(angles[:-1])
+            ax.set_xticklabels(categories[:-1])
+            ax.grid(True)
+            plt.title('Speaking Performance Metrics', size=15, color='navy', y=1.1)
+        except Exception as e:
+            print(f"Visualization error: {e}")
+            # Create a simple error figure
+            fig, ax = plt.subplots(figsize=(6, 3))
+            ax.text(0.5, 0.5, "Error creating visualization",
+                    horizontalalignment='center', verticalalignment='center')
+            ax.axis('off')
+        # Create detailed analysis text
+        processing_time = time.time() - start_time
+        try:
+            pos_counts_str = ', '.join([f"{k}: {v}" for k, v in sorted(pos_counts.items(), key=lambda x: x[1], reverse=True)[:5]])
         except:
+            pos_counts_str = "N/A"
+        detailed_analysis = f"""
+        ## Detailed Speech Analysis
+        **Processing Time:** {processing_time:.2f} seconds
+        **Audio Duration:** {duration:.2f} seconds
+        ### Metrics:
+        - **Grammar Score:** {confidence:.2f} ({label})
+        - **Fluency Score:** {fluency_score:.2f}
+        - **Speaking Rate:** {speaking_rate:.1f} words per minute
+        - **Vocabulary Richness:** {vocab_richness:.2f} (higher is better)
+        - **Filler Words:** {filler_count} occurrences ({filler_ratio:.1%} of speech)
+        - **Avg Words Per Sentence:** {avg_words:.1f}
+        - **Sentiment:** {sentiment} ({sentiment_score:.2f})
+        ### Word Types Used:
+        {pos_counts_str}
+        """
+        return transcription, grammar_score, corrected, feedback, fig, detailed_analysis
+    except Exception as e:
+        print(f"Unexpected error in process_audio: {e}")
+        return ("An unexpected error occurred during processing.",
+                "Error", "Error", "There was an unexpected error processing your audio.",
+                None, f"## Unexpected Error\n\nAn error occurred: {str(e)[:200]}...")
 # Create theme
 theme = gr.themes.Soft(