|
import gradio as gr |
|
import numpy as np |
|
import tensorflow as tf |
|
import librosa |
|
import librosa.util |
|
import pickle |
|
from sklearn.preprocessing import LabelEncoder |
|
|
|
|
|
|
|
def extract_features(file_path): |
|
try: |
|
|
|
y, sr = librosa.load(file_path, sr=8000) |
|
|
|
|
|
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) |
|
mfcc_delta = librosa.feature.delta(mfcc) |
|
mfcc_double_delta = librosa.feature.delta(mfcc, order=2) |
|
|
|
|
|
sfcc = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=13) |
|
sfcc_db = librosa.power_to_db(sfcc) |
|
sfcc_delta = librosa.feature.delta(sfcc_db) |
|
sfcc_double_delta = librosa.feature.delta(sfcc_db, order=2) |
|
|
|
|
|
hnr = np.mean(librosa.effects.harmonic(y)) |
|
|
|
|
|
mfcc = librosa.util.fix_length(mfcc, size=100, axis=1) |
|
mfcc_delta = librosa.util.fix_length(mfcc_delta, size=100, axis=1) |
|
mfcc_double_delta = librosa.util.fix_length(mfcc_double_delta, size=100, axis=1) |
|
sfcc_db = librosa.util.fix_length(sfcc_db, size=100, axis=1) |
|
sfcc_delta = librosa.util.fix_length(sfcc_delta, size=100, axis=1) |
|
sfcc_double_delta = librosa.util.fix_length(sfcc_double_delta, size=100, axis=1) |
|
|
|
|
|
features = np.vstack([ |
|
mfcc, mfcc_delta, mfcc_double_delta, |
|
sfcc_db, sfcc_delta, sfcc_double_delta |
|
]) |
|
|
|
return {"features": features, "hnr": hnr} |
|
except Exception as e: |
|
raise ValueError(f"Error in feature extraction: {str(e)}") |
|
|
|
|
|
|
|
def prepare_input(features): |
|
feature_matrix = features["features"] |
|
hnr = features["hnr"] |
|
|
|
|
|
feature_matrix = (feature_matrix - np.mean(feature_matrix)) / np.std(feature_matrix) |
|
|
|
|
|
feature_matrix = feature_matrix[np.newaxis, ..., np.newaxis] |
|
return feature_matrix, hnr |
|
|
|
|
|
|
|
def predict_class(file_path, model, label_encoder): |
|
try: |
|
|
|
features = extract_features(file_path) |
|
feature_matrix, _ = prepare_input(features) |
|
|
|
|
|
prediction = model.predict(feature_matrix) |
|
predicted_index = np.argmax(prediction) |
|
|
|
|
|
predicted_class = label_encoder.inverse_transform([predicted_index]) |
|
return f"Predicted Class: {predicted_class[0]}" |
|
except Exception as e: |
|
return f"Error in prediction: {str(e)}" |
|
|
|
|
|
|
|
model = tf.keras.models.load_model("voice_classification_modelm.h5") |
|
|
|
|
|
|
|
labels = [ |
|
"all_vowels_healthy", |
|
"allvowels_functional", |
|
"allvowels_laryngitis", |
|
"allvowels_leukoplakia", |
|
"allvowels_psychogenic", |
|
"allvowels_rlnp", |
|
"allvowels_sd", |
|
] |
|
label_encoder = LabelEncoder() |
|
label_encoder.fit(labels) |
|
|
|
|
|
|
|
def classify_audio(audio_file): |
|
return predict_class(audio_file, model, label_encoder) |
|
|
|
|
|
|
|
interface = gr.Interface( |
|
fn=classify_audio, |
|
inputs=gr.Audio(type="filepath", label="Upload an Audio File"), |
|
outputs=gr.Textbox(label="Predicted Class"), |
|
title="Voice Disorder Classification", |
|
description="Upload an audio file to classify its voice type (e.g., healthy or various disorder types).", |
|
examples=["example_audio.wav"], |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
interface.launch() |
|
|
|
|