HealthVoiceAnalyzeroneline

Sleeping

File size: 5,860 Bytes

410fd66
 
 
 
a6dea81
410fd66
573cc21
 
 
68390a5
573cc21
 
 
 
 
 
 
 
45a579f
573cc21
 
 
 
 
 
 
a6dea81
 
 
 
 
 
 
 
 
 
573cc21
 
 
a6dea81
573cc21
 
 
 
 
 
 
 
d2ad93f
573cc21
 
 
 
 
 
 
 
432d77e
dd19451
a6dea81
 
573cc21
a6dea81
 
573cc21
 
 
a6dea81
432d77e
 
573cc21
a6dea81
573cc21
 
a6dea81
 
 
 
573cc21
410fd66
 
 
 
432d77e
 
 
 
 
a6dea81
 
 
410fd66
a6dea81
 
 
410fd66
a6dea81
 
 
 
410fd66
a6dea81
 
 
 
410fd66
a6dea81
 
410fd66
a6dea81
 
410fd66
a6dea81
 
410fd66
a6dea81
 
 
 
 
 
410fd66
 
 
 
 
a937006
432d77e
 
 
 
 
 
 
 
 
a937006
410fd66
 
 
 
 
 
573cc21
410fd66
 
 
a6dea81
410fd66

import gradio as gr
import librosa
import numpy as np
import os
import hashlib
from datetime import datetime
from transformers import pipeline
import soundfile as sf
import torch

# Initialize local models
try:
    # Whisper for speech-to-text (English-only)
    whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1)  # CPU; use device=0 for GPU
    print("Whisper model loaded successfully.")
except Exception as e:
    print(f"Failed to load Whisper model: {str(e)}")
    whisper = None

try:
    # Symptom-2-Disease for health analysis
    symptom_classifier = pipeline("text-classification", model="abhirajeshbhai/symptom-2-disease-net", device=-1)  # CPU
    print("Symptom-2-Disease model loaded successfully.")
except Exception as e:
    print(f"Failed to load Symptom-2-Disease model: {str(e)}")
    symptom_classifier = None

def compute_file_hash(file_path):
    """Compute MD5 hash of a file to check uniqueness."""
    hash_md5 = hashlib.md5()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

def transcribe_audio(audio_file):
    """Transcribe audio using local Whisper model."""
    if not whisper:
        return "Error: Whisper model not loaded. Check logs for details."
    try:
        # Load and resample audio to 16,000 Hz
        audio, sr = librosa.load(audio_file, sr=16000)
        # Save as WAV for Whisper compatibility
        temp_wav = f"/tmp/{os.path.basename(audio_file)}.wav"
        sf.write(temp_wav, audio, sr)
        
        # Transcribe
        result = whisper(temp_wav)
        transcription = result.get("text", "").strip()
        print(f"Transcription: {transcription}")
        
        # Clean up temp file
        try:
            os.remove(temp_wav)
        except Exception:
            pass
        
        if not transcription:
            return "Transcription empty. Please provide clear audio describing symptoms in English."
        return transcription
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

def analyze_symptoms(text):
    """Analyze symptoms using local Symptom-2-Disease model."""
    if not symptom_classifier:
        return "Error: Symptom-2-Disease model not loaded. Check logs for details.", 0.0
    try:
        if not text or "Error transcribing" in text:
            return "No valid transcription for analysis.", 0.0
        result = symptom_classifier(text)
        if result and isinstance(result, list) and len(result) > 0:
            prediction = result[0]["label"]
            score = result[0]["score"]
            print(f"Health Prediction: {prediction}, Score: {score:.4f}")
            return prediction, score
        return "No health condition predicted", 0.0
    except Exception as e:
        return f"Error analyzing symptoms: {str(e)}", 0.0

def analyze_voice(audio_file):
    """Analyze voice for health indicators."""
    try:
        # Ensure unique file name to avoid Gradio reuse
        unique_path = f"/tmp/gradio/{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{os.path.basename(audio_file)}"
        os.rename(audio_file, unique_path)
        audio_file = unique_path
        
        # Log audio file info
        file_hash = compute_file_hash(audio_file)
        print(f"Processing audio file: {audio_file}, Hash: {file_hash}")
        
        # Load audio to verify format
        audio, sr = librosa.load(audio_file, sr=16000)
        print(f"Audio shape: {audio.shape}, Sampling rate: {sr}, Duration: {len(audio)/sr:.2f}s, Mean: {np.mean(audio):.4f}, Std: {np.std(audio):.4f}")
        
        # Transcribe audio
        transcription = transcribe_audio(audio_file)
        if "Error transcribing" in transcription:
            return transcription
        
        # Analyze symptoms
        prediction, score = analyze_symptoms(transcription)
        if "Error analyzing" in prediction:
            return prediction
        
        # Generate feedback
        if prediction == "No health condition predicted":
            feedback = "No significant health indicators detected."
        else:
            feedback = f"Possible health condition: {prediction} (confidence: {score:.4f}). Consult a doctor."
        
        feedback += f"\n\n**Debug Info**: Transcription = '{transcription}', Prediction = {prediction}, Confidence = {score:.4f}, File Hash = {file_hash}"
        feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
        
        # Clean up temporary audio file
        try:
            os.remove(audio_file)
            print(f"Deleted temporary audio file: {audio_file}")
        except Exception as e:
            print(f"Failed to delete audio file: {str(e)}")
        
        return feedback
    except Exception as e:
        return f"Error processing audio: {str(e)}"

def test_with_sample_audio():
    """Test the app with sample audio files."""
    samples = ["audio_samples/sample.wav", "audio_samples/common_voice_en.wav"]
    results = []
    for sample in samples:
        if os.path.exists(sample):
            results.append(analyze_voice(sample))
        else:
            results.append(f"Sample not found: {sample}")
    return "\n".join(results)

# Gradio interface
iface = gr.Interface(
    fn=analyze_voice,
    inputs=gr.Audio(type="filepath", label="Record or Upload Voice"),
    outputs=gr.Textbox(label="Health Assessment Feedback"),
    title="Health Voice Analyzer",
    description="Record or upload a voice sample describing symptoms for preliminary health assessment. Supports English (transcription), with symptom analysis in English."
)

if __name__ == "__main__":
    print(test_with_sample_audio())
    iface.launch(server_name="0.0.0.0", server_port=7860)