Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,10 +5,11 @@ import pandas as pd
|
|
| 5 |
from propy import AAComposition
|
| 6 |
from sklearn.preprocessing import MinMaxScaler
|
| 7 |
|
|
|
|
| 8 |
model = joblib.load("SVM.joblib")
|
| 9 |
scaler = joblib.load("norm.joblib")
|
| 10 |
|
| 11 |
-
|
| 12 |
selected_features = [
|
| 13 |
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
|
| 14 |
"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
|
|
@@ -34,34 +35,33 @@ selected_features = [
|
|
| 34 |
]
|
| 35 |
|
| 36 |
def extract_features(sequence):
|
| 37 |
-
"""Extract
|
| 38 |
all_features = AAComposition.CalculateAADipeptideComposition(sequence)
|
| 39 |
feature_values = list(all_features.values())
|
| 40 |
feature_array = np.array(feature_values).reshape(-1, 1)
|
| 41 |
-
feature_array = feature_array[: 420]
|
| 42 |
normalized_features = scaler.transform(feature_array.T)
|
| 43 |
-
|
| 44 |
normalized_features = normalized_features.flatten()
|
| 45 |
|
|
|
|
| 46 |
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
|
| 47 |
-
|
| 48 |
-
|
| 49 |
selected_feature_df = pd.DataFrame([selected_feature_dict])
|
| 50 |
-
|
| 51 |
selected_feature_array = selected_feature_df.T.to_numpy()
|
| 52 |
|
| 53 |
return selected_feature_array
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
def predict(sequence):
|
| 58 |
-
"""Predict AMP
|
| 59 |
features = extract_features(sequence)
|
| 60 |
prediction = model.predict(features.T)[0]
|
| 61 |
-
|
| 62 |
-
|
|
|
|
| 63 |
|
|
|
|
| 64 |
|
|
|
|
| 65 |
iface = gr.Interface(
|
| 66 |
fn=predict,
|
| 67 |
inputs=gr.Textbox(label="Enter Protein Sequence"),
|
|
@@ -70,4 +70,4 @@ iface = gr.Interface(
|
|
| 70 |
description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
|
| 71 |
)
|
| 72 |
|
| 73 |
-
iface.launch(share=True)
|
|
|
|
| 5 |
from propy import AAComposition
|
| 6 |
from sklearn.preprocessing import MinMaxScaler
|
| 7 |
|
| 8 |
+
# Load trained model and scaler
|
| 9 |
model = joblib.load("SVM.joblib")
|
| 10 |
scaler = joblib.load("norm.joblib")
|
| 11 |
|
| 12 |
+
# Selected features used in training
|
| 13 |
selected_features = [
|
| 14 |
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
|
| 15 |
"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
|
|
|
|
| 35 |
]
|
| 36 |
|
| 37 |
def extract_features(sequence):
|
| 38 |
+
"""Extract selected features and normalize them."""
|
| 39 |
all_features = AAComposition.CalculateAADipeptideComposition(sequence)
|
| 40 |
feature_values = list(all_features.values())
|
| 41 |
feature_array = np.array(feature_values).reshape(-1, 1)
|
| 42 |
+
feature_array = feature_array[: 420] # Ensure we only use 420 features
|
| 43 |
normalized_features = scaler.transform(feature_array.T)
|
|
|
|
| 44 |
normalized_features = normalized_features.flatten()
|
| 45 |
|
| 46 |
+
# Select features that match training data
|
| 47 |
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
|
| 48 |
+
if feature in all_features}
|
|
|
|
| 49 |
selected_feature_df = pd.DataFrame([selected_feature_dict])
|
|
|
|
| 50 |
selected_feature_array = selected_feature_df.T.to_numpy()
|
| 51 |
|
| 52 |
return selected_feature_array
|
| 53 |
|
|
|
|
|
|
|
| 54 |
def predict(sequence):
|
| 55 |
+
"""Predict if the sequence is an AMP or not."""
|
| 56 |
features = extract_features(sequence)
|
| 57 |
prediction = model.predict(features.T)[0]
|
| 58 |
+
probabilities = model.predict_proba(features.T)[0]
|
| 59 |
+
prob_amp = probabilities[0]
|
| 60 |
+
prob_non_amp = probabilities[1]
|
| 61 |
|
| 62 |
+
return f"⚡ {prob_amp * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)" if prediction == 0 else f"❌ {prob_non_amp * 100:.2f}% chance of being Non-AMP"
|
| 63 |
|
| 64 |
+
# Gradio interface
|
| 65 |
iface = gr.Interface(
|
| 66 |
fn=predict,
|
| 67 |
inputs=gr.Textbox(label="Enter Protein Sequence"),
|
|
|
|
| 70 |
description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
|
| 71 |
)
|
| 72 |
|
| 73 |
+
iface.launch(share=True)
|