File size: 3,768 Bytes
3b84715
942bf87
51a3749
3a814dc
51a3749
 
3b84715
76a754e
51159d5
942bf87
dc9275e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b84715
dc9275e
 
51159d5
 
 
 
 
 
 
 
dc9275e
51159d5
 
dc9275e
51159d5
 
 
dc9275e
51159d5
 
 
 
 
 
 
3b84715
14f4c95
d5efa2c
3b84715
 
 
cf1d474
2ef6d0c
3b84715
 
 
 
 
 
 
 
 
942bf87
3b84715
cf1d474
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
import joblib
import numpy as np
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler

# Load trained SVM model and scaler (Ensure both files exist in the Space)
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")


# List of features used in your model
selected_features = [
    "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
    "AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
    "RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
    "NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
    "DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
    "CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
    "EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
    "QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
    "GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
    "HC", "HG", "HL", "HK", "HP",
    "IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
    "LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
    "KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
    "MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
    "FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
    "PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
    "SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
    "TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
    "WC",
    "YR", "YD", "YC", "YG", "YL", "YS", "YV",
    "VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
]

def extract_features(sequence):
    """Extract only the required features and normalize them."""
    # Compute all possible features
    all_features = AAComposition.CalculateAADipeptideComposition(sequence)  # Amino Acid Composition
    # Extract the values from the dictionary
    feature_values = list(all_features.values())  # Extract values only
    # Convert to NumPy array for normalization
    feature_array = np.array(feature_values).reshape(-1, 1)
    feature_array = feature_array[: 420]
    # Min-Max Normalization
    normalized_features = scaler.transform(feature_array.T)

    # Reshape normalized_features back to a single dimension
    normalized_features = normalized_features.flatten()  # Flatten array

    # Create a dictionary with selected features
    selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
                               if feature in all_features}

    # Convert dictionary to dataframe
    selected_feature_df = pd.DataFrame([selected_feature_dict])

    # Convert dataframe to numpy array
    selected_feature_array = selected_feature_df.T.to_numpy()

    return selected_feature_array



def predict(sequence):
    """Predict AMP vs Non-AMP"""
    features = extract_features(sequence)
    prediction = model.predict(features.T)[0]
    return "AMP" if prediction == 0 else "Non-AMP"

# Create Gradio interface
iface = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(label="Enter Protein Sequence"),
    outputs=gr.Label(label="Prediction"),
    title="AMP Classifier",
    description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
)

# Launch app
iface.launch(share=True)