import os import json import librosa import cv2 import numpy as np import tensorflow as tf import streamlit as st from streamlit_extras.add_vertical_space import add_vertical_space from warnings import filterwarnings filterwarnings('ignore') def streamlit_config(): # page configuration st.set_page_config(page_title='Classification', layout='centered') # page header transparent color page_background_color = """ """ st.markdown(page_background_color, unsafe_allow_html=True) # title and position st.markdown(f'

Bird Sound Classification

', unsafe_allow_html=True) add_vertical_space(4) # Streamlit Configuration Setup streamlit_config() def prediction(audio_file): # Load the Prediction JSON File to Predict Target_Label with open('prediction.json', mode='r') as f: prediction_dict = json.load(f) # Extract the Audio_Signal and Sample_Rate from Input Audio audio, sample_rate =librosa.load(audio_file) # Extract the MFCC Features and Aggrigate mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40) mfccs_features = np.mean(mfccs_features, axis=1) # Reshape MFCC features to match the expected input shape for Conv1D both batch & feature dimension mfccs_features = np.expand_dims(mfccs_features, axis=0) mfccs_features = np.expand_dims(mfccs_features, axis=2) # Convert into Tensors mfccs_tensors = tf.convert_to_tensor(mfccs_features, dtype=tf.float32) # Load the Model and Prediction model = tf.keras.models.load_model('model.h5') prediction = model.predict(mfccs_tensors) # Find the Maximum Probability Value target_label = np.argmax(prediction) # Find the Target_Label Name using Prediction_dict predicted_class = prediction_dict[str(target_label)] confidence = round(np.max(prediction)*100, 2) add_vertical_space(1) st.markdown(f'

{confidence}% Match Found

', unsafe_allow_html=True) # Display the Image image_path = os.path.join('Inference_Images', f'{predicted_class}.jpg') img = cv2.imread(image_path, cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (350, 300)) _,col2,_ = st.columns([0.1,0.8,0.1]) with col2: st.image(img) st.markdown(f'

{predicted_class}

', unsafe_allow_html=True) _,col2,_ = st.columns([0.1,0.9,0.1]) with col2: input_audio = st.file_uploader(label='Upload the Audio', type=['mp3', 'wav']) if input_audio is not None: _,col2,_ = st.columns([0.2,0.8,0.2]) with col2: prediction(input_audio)