Spaces:

sancho10
/

RITISHREE

Sleeping

App Files Files Community

RITISHREE / app.py

sancho10

Update app.py

7084f59 verified 6 months ago

raw

history blame

3.91 kB

	import gradio as gr
	import numpy as np
	import tensorflow as tf
	import librosa
	import librosa.util
	import pickle
	from sklearn.preprocessing import LabelEncoder


	# Feature Extraction Function
	def extract_features(file_path):
	try:
	# Load audio
	y, sr = librosa.load(file_path, sr=8000) # Resample to 8kHz

	# Extract MFCC and deltas
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
	mfcc_delta = librosa.feature.delta(mfcc)
	mfcc_double_delta = librosa.feature.delta(mfcc, order=2)

	# Extract SFCC
	sfcc = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=13)
	sfcc_db = librosa.power_to_db(sfcc)
	sfcc_delta = librosa.feature.delta(sfcc_db)
	sfcc_double_delta = librosa.feature.delta(sfcc_db, order=2)

	# Calculate HNR (Harmonics-to-Noise Ratio)
	hnr = np.mean(librosa.effects.harmonic(y)) # Approximation for simplicity

	# Padding/truncating for consistency
	mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
	mfcc_delta = librosa.util.fix_length(mfcc_delta, size=100, axis=1)
	mfcc_double_delta = librosa.util.fix_length(mfcc_double_delta, size=100, axis=1)
	sfcc_db = librosa.util.fix_length(sfcc_db, size=100, axis=1)
	sfcc_delta = librosa.util.fix_length(sfcc_delta, size=100, axis=1)
	sfcc_double_delta = librosa.util.fix_length(sfcc_double_delta, size=100, axis=1)

	# Concatenate all features into a single matrix
	features = np.vstack([
	mfcc, mfcc_delta, mfcc_double_delta,
	sfcc_db, sfcc_delta, sfcc_double_delta
	])

	return {"features": features, "hnr": hnr}
	except Exception as e:
	raise ValueError(f"Error in feature extraction: {str(e)}")


	# Prepare Input Function
	def prepare_input(features):
	feature_matrix = features["features"] # Shape: (78, 100)
	hnr = features["hnr"] # Single scalar value

	# Normalize feature matrix
	feature_matrix = (feature_matrix - np.mean(feature_matrix)) / np.std(feature_matrix)

	# Add batch and channel dimensions for model compatibility
	feature_matrix = feature_matrix[np.newaxis, ..., np.newaxis] # Shape: (1, 78, 100, 1)
	return feature_matrix, hnr


	# Prediction Function
	def predict_class(file_path, model, label_encoder):
	try:
	# Extract and prepare features
	features = extract_features(file_path)
	feature_matrix, _ = prepare_input(features)

	# Make prediction
	prediction = model.predict(feature_matrix)
	predicted_index = np.argmax(prediction)

	# Map predicted index to class label
	predicted_class = label_encoder.inverse_transform([predicted_index])
	return f"Predicted Class: {predicted_class[0]}"
	except Exception as e:
	return f"Error in prediction: {str(e)}"


	# Load Pre-trained Model
	model = tf.keras.models.load_model("voice_classification_modelm.h5")

	# Create Label Encoder
	# Note: Replace these labels with the actual classes used during training
	labels = [
	"all_vowels_healthy",
	"allvowels_functional",
	"allvowels_laryngitis",
	"allvowels_leukoplakia",
	"allvowels_psychogenic",
	"allvowels_rlnp",
	"allvowels_sd",
	]
	label_encoder = LabelEncoder()
	label_encoder.fit(labels)


	# Gradio Interface Function
	def classify_audio(audio_file):
	return predict_class(audio_file, model, label_encoder)


	# Gradio Interface
	interface = gr.Interface(
	fn=classify_audio,
	inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
	outputs=gr.Textbox(label="Predicted Class"),
	title="Voice Disorder Classification",
	description="Upload an audio file to classify its voice type (e.g., healthy or various disorder types).",
	examples=["example_audio.wav"], # Replace with paths to example audio files
	)

	# Launch Gradio App
	if __name__ == "__main__":
	interface.launch()