Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Runtime error

App Files Files Community

invincible-jha commited on Nov 18, 2024

Commit

1eb4ae1

verified ·

1 Parent(s): dd56228

Upload 3 files

Browse files

Files changed (3) hide show

src/models/analyzer.py +61 -0
src/models/audio-processor.py +55 -0
src/models/model-manager.py +79 -0

src/models/analyzer.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from .model_manager import ModelManager
+from .audio_processor import AudioProcessor
+from typing import Dict
+class Analyzer:
+    def __init__(self, model_manager: ModelManager, audio_processor: AudioProcessor):
+        self.model_manager = model_manager
+        self.audio_processor = audio_processor
+        self.model_manager.load_models()
+    def analyze(self, audio_path: str) -> Dict:
+        # Process audio
+        waveform, features = self.audio_processor.process_audio(audio_path)
+        # Get transcription
+        transcription = self.model_manager.transcribe(waveform)
+        # Analyze emotions
+        emotions = self.model_manager.analyze_emotions(transcription)
+        # Analyze mental health indicators
+        mental_health = self.model_manager.analyze_mental_health(transcription)
+        # Combine analysis with audio features
+        mental_health = self._combine_analysis(mental_health, features)
+        return {
+            'transcription': transcription,
+            'emotions': {
+                'scores': emotions,
+                'dominant_emotion': max(emotions.items(), key=lambda x: x[1])[0]
+            },
+            'mental_health_indicators': mental_health,
+            'audio_features': features
+        }
+    def _combine_analysis(self, mental_health: Dict, features: Dict) -> Dict:
+        """Combine mental health analysis with audio features"""
+        # Adjust risk scores based on audio features
+        energy_level = features['energy']['mean']
+        pitch_variability = features['pitch']['std']
+        # Simple risk score adjustment based on audio features
+        mental_health['depression_risk'] = (
+            mental_health['depression_risk'] * 0.7 +
+            (1 - energy_level) * 0.3  # Lower energy may indicate depression
+        )
+        mental_health['anxiety_risk'] = (
+            mental_health['anxiety_risk'] * 0.7 +
+            pitch_variability * 0.3  # Higher pitch variability may indicate anxiety
+        )
+        # Add confidence scores
+        mental_health['confidence'] = {
+            'depression': 0.8,  # Example confidence scores
+            'anxiety': 0.8,
+            'stress': 0.7
+        }
+        return mental_health

src/models/audio-processor.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import librosa
+import numpy as np
+from typing import Dict, Tuple
+class AudioProcessor:
+    def __init__(self):
+        self.sample_rate = 16000
+        self.n_mfcc = 13
+        self.n_mels = 128
+    def process_audio(self, audio_path: str) -> Tuple[np.ndarray, Dict]:
+        # Load and preprocess audio
+        waveform, sr = librosa.load(audio_path, sr=self.sample_rate)
+        # Extract features
+        features = {
+            'mfcc': self._extract_mfcc(waveform),
+            'pitch': self._extract_pitch(waveform),
+            'energy': self._extract_energy(waveform)
+        }
+        return waveform, features
+    def _extract_mfcc(self, waveform: np.ndarray) -> np.ndarray:
+        mfccs = librosa.feature.mfcc(
+            y=waveform,
+            sr=self.sample_rate,
+            n_mfcc=self.n_mfcc
+        )
+        return mfccs.mean(axis=1)
+    def _extract_pitch(self, waveform: np.ndarray) -> Dict:
+        f0, voiced_flag, voiced_probs = librosa.pyin(
+            waveform,
+            fmin=librosa.note_to_hz('C2'),
+            fmax=librosa.note_to_hz('C7'),
+            sr=self.sample_rate
+        )
+        return {
+            'mean': float(np.nanmean(f0)),
+            'std': float(np.nanstd(f0)),
+            'max': float(np.nanmax(f0)),
+            'min': float(np.nanmin(f0))
+        }
+    def _extract_energy(self, waveform: np.ndarray) -> Dict:
+        rms = librosa.feature.rms(y=waveform)[0]
+        return {
+            'mean': float(np.mean(rms)),
+            'std': float(np.std(rms)),
+            'max': float(np.max(rms)),
+            'min': float(np.min(rms))
+        }

src/models/model-manager.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from transformers import (
+    WhisperProcessor, WhisperForConditionalGeneration,
+    AutoModelForSequenceClassification, AutoTokenizer
+)
+import torch
+class ModelManager:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.models = {}
+        self.tokenizers = {}
+        self.processors = {}
+    def load_models(self):
+        # Load Whisper for speech recognition
+        self.processors['whisper'] = WhisperProcessor.from_pretrained("openai/whisper-base")
+        self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained(
+            "openai/whisper-base"
+        ).to(self.device)
+        # Load EmoBERTa for emotion detection
+        self.tokenizers['emotion'] = AutoTokenizer.from_pretrained("arpanghoshal/EmoRoBERTa")
+        self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained(
+            "arpanghoshal/EmoRoBERTa"
+        ).to(self.device)
+        # Load ClinicalBERT for analysis
+        self.tokenizers['clinical'] = AutoTokenizer.from_pretrained(
+            "emilyalsentzer/Bio_ClinicalBERT"
+        )
+        self.models['clinical'] = AutoModelForSequenceClassification.from_pretrained(
+            "emilyalsentzer/Bio_ClinicalBERT"
+        ).to(self.device)
+    def transcribe(self, audio_input):
+        inputs = self.processors['whisper'](
+            audio_input,
+            return_tensors="pt"
+        ).input_features.to(self.device)
+        generated_ids = self.models['whisper'].generate(inputs)
+        transcription = self.processors['whisper'].batch_decode(
+            generated_ids,
+            skip_special_tokens=True
+        )[0]
+        return transcription
+    def analyze_emotions(self, text):
+        inputs = self.tokenizers['emotion'](
+            text,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=512
+        ).to(self.device)
+        outputs = self.models['emotion'](**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+        emotions = ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']
+        return {emotion: float(prob) for emotion, prob in zip(emotions, probs[0])}
+    def analyze_mental_health(self, text):
+        inputs = self.tokenizers['clinical'](
+            text,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=512
+        ).to(self.device)
+        outputs = self.models['clinical'](**inputs)
+        scores = torch.sigmoid(outputs.logits)
+        return {
+            'depression_risk': float(scores[0][0]),
+            'anxiety_risk': float(scores[0][1]),
+            'stress_level': float(scores[0][2])
+        }