RITISHREE / app.py
sancho10's picture
Update app.py
7084f59 verified
raw
history blame
3.91 kB
import gradio as gr
import numpy as np
import tensorflow as tf
import librosa
import librosa.util
import pickle
from sklearn.preprocessing import LabelEncoder
# Feature Extraction Function
def extract_features(file_path):
try:
# Load audio
y, sr = librosa.load(file_path, sr=8000) # Resample to 8kHz
# Extract MFCC and deltas
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
mfcc_delta = librosa.feature.delta(mfcc)
mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
# Extract SFCC
sfcc = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=13)
sfcc_db = librosa.power_to_db(sfcc)
sfcc_delta = librosa.feature.delta(sfcc_db)
sfcc_double_delta = librosa.feature.delta(sfcc_db, order=2)
# Calculate HNR (Harmonics-to-Noise Ratio)
hnr = np.mean(librosa.effects.harmonic(y)) # Approximation for simplicity
# Padding/truncating for consistency
mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
mfcc_delta = librosa.util.fix_length(mfcc_delta, size=100, axis=1)
mfcc_double_delta = librosa.util.fix_length(mfcc_double_delta, size=100, axis=1)
sfcc_db = librosa.util.fix_length(sfcc_db, size=100, axis=1)
sfcc_delta = librosa.util.fix_length(sfcc_delta, size=100, axis=1)
sfcc_double_delta = librosa.util.fix_length(sfcc_double_delta, size=100, axis=1)
# Concatenate all features into a single matrix
features = np.vstack([
mfcc, mfcc_delta, mfcc_double_delta,
sfcc_db, sfcc_delta, sfcc_double_delta
])
return {"features": features, "hnr": hnr}
except Exception as e:
raise ValueError(f"Error in feature extraction: {str(e)}")
# Prepare Input Function
def prepare_input(features):
feature_matrix = features["features"] # Shape: (78, 100)
hnr = features["hnr"] # Single scalar value
# Normalize feature matrix
feature_matrix = (feature_matrix - np.mean(feature_matrix)) / np.std(feature_matrix)
# Add batch and channel dimensions for model compatibility
feature_matrix = feature_matrix[np.newaxis, ..., np.newaxis] # Shape: (1, 78, 100, 1)
return feature_matrix, hnr
# Prediction Function
def predict_class(file_path, model, label_encoder):
try:
# Extract and prepare features
features = extract_features(file_path)
feature_matrix, _ = prepare_input(features)
# Make prediction
prediction = model.predict(feature_matrix)
predicted_index = np.argmax(prediction)
# Map predicted index to class label
predicted_class = label_encoder.inverse_transform([predicted_index])
return f"Predicted Class: {predicted_class[0]}"
except Exception as e:
return f"Error in prediction: {str(e)}"
# Load Pre-trained Model
model = tf.keras.models.load_model("voice_classification_modelm.h5")
# Create Label Encoder
# Note: Replace these labels with the actual classes used during training
labels = [
"all_vowels_healthy",
"allvowels_functional",
"allvowels_laryngitis",
"allvowels_leukoplakia",
"allvowels_psychogenic",
"allvowels_rlnp",
"allvowels_sd",
]
label_encoder = LabelEncoder()
label_encoder.fit(labels)
# Gradio Interface Function
def classify_audio(audio_file):
return predict_class(audio_file, model, label_encoder)
# Gradio Interface
interface = gr.Interface(
fn=classify_audio,
inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
outputs=gr.Textbox(label="Predicted Class"),
title="Voice Disorder Classification",
description="Upload an audio file to classify its voice type (e.g., healthy or various disorder types).",
examples=["example_audio.wav"], # Replace with paths to example audio files
)
# Launch Gradio App
if __name__ == "__main__":
interface.launch()