|
import gradio as gr |
|
import numpy as np |
|
import tensorflow as tf |
|
import librosa |
|
import librosa.util |
|
import pickle |
|
from sklearn.preprocessing import LabelEncoder |
|
|
|
|
|
def extract_features(file_path): |
|
try: |
|
|
|
y, sr = librosa.load(file_path, sr=8000) |
|
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) |
|
|
|
|
|
mfcc = librosa.util.fix_length(mfcc, size=100, axis=1) |
|
|
|
|
|
if mfcc.shape[0] != 13: |
|
mfcc = librosa.util.fix_length(mfcc, size=13, axis=0) |
|
|
|
return {"mfcc": mfcc} |
|
except Exception as e: |
|
raise ValueError(f"Error in feature extraction: {str(e)}") |
|
|
|
|
|
def predict_class(file_path, model, label_encoder): |
|
try: |
|
features = extract_features(file_path) |
|
mfcc = features["mfcc"] |
|
|
|
|
|
mfcc = mfcc[np.newaxis, ..., np.newaxis] |
|
|
|
|
|
prediction = model.predict(mfcc) |
|
predicted_class = label_encoder.inverse_transform([np.argmax(prediction)]) |
|
return f"Predicted Class: {predicted_class[0]}" |
|
except Exception as e: |
|
return f"Error in prediction: {str(e)}" |
|
|
|
|
|
model = tf.keras.models.load_model("voice_classification_modelm.h5") |
|
|
|
|
|
with open("label_encoder.pkl", "rb") as f: |
|
label_encoder = pickle.load(f) |
|
|
|
|
|
def classify_audio(audio_file): |
|
return predict_class(audio_file, model, label_encoder) |
|
|
|
|
|
interface = gr.Interface( |
|
fn=classify_audio, |
|
inputs=gr.Audio(source="upload", type="filepath", label="Upload an Audio File"), |
|
outputs=gr.Textbox(label="Predicted Class"), |
|
title="Voice Disorder Classification", |
|
description="Upload an audio file to classify its voice type (e.g., healthy or various disorder types).", |
|
examples=["example_audio.wav"], |
|
) |
|
|
|
|
|
interface.launch() |
|
|