Spaces:
Running
Running
File size: 3,768 Bytes
3b84715 942bf87 51a3749 3a814dc 51a3749 3b84715 76a754e 51159d5 942bf87 dc9275e 3b84715 dc9275e 51159d5 dc9275e 51159d5 dc9275e 51159d5 dc9275e 51159d5 3b84715 14f4c95 d5efa2c 3b84715 cf1d474 2ef6d0c 3b84715 942bf87 3b84715 cf1d474 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
import joblib
import numpy as np
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler
# Load trained SVM model and scaler (Ensure both files exist in the Space)
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")
# List of features used in your model
selected_features = [
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
"RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
"NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
"DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
"CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
"EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
"QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
"GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
"HC", "HG", "HL", "HK", "HP",
"IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
"LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
"KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
"MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
"FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
"PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
"SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
"TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
"WC",
"YR", "YD", "YC", "YG", "YL", "YS", "YV",
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
]
def extract_features(sequence):
"""Extract only the required features and normalize them."""
# Compute all possible features
all_features = AAComposition.CalculateAADipeptideComposition(sequence) # Amino Acid Composition
# Extract the values from the dictionary
feature_values = list(all_features.values()) # Extract values only
# Convert to NumPy array for normalization
feature_array = np.array(feature_values).reshape(-1, 1)
feature_array = feature_array[: 420]
# Min-Max Normalization
normalized_features = scaler.transform(feature_array.T)
# Reshape normalized_features back to a single dimension
normalized_features = normalized_features.flatten() # Flatten array
# Create a dictionary with selected features
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
if feature in all_features}
# Convert dictionary to dataframe
selected_feature_df = pd.DataFrame([selected_feature_dict])
# Convert dataframe to numpy array
selected_feature_array = selected_feature_df.T.to_numpy()
return selected_feature_array
def predict(sequence):
"""Predict AMP vs Non-AMP"""
features = extract_features(sequence)
prediction = model.predict(features.T)[0]
return "AMP" if prediction == 0 else "Non-AMP"
# Create Gradio interface
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(label="Enter Protein Sequence"),
outputs=gr.Label(label="Prediction"),
title="AMP Classifier",
description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
)
# Launch app
iface.launch(share=True)
|