Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,18 @@
|
|
1 |
-
import
|
|
|
2 |
import joblib
|
3 |
import numpy as np
|
4 |
import pandas as pd
|
5 |
from propy import AAComposition
|
6 |
from sklearn.preprocessing import MinMaxScaler
|
7 |
|
8 |
-
#
|
|
|
|
|
|
|
9 |
model = joblib.load("SVM.joblib")
|
10 |
scaler = joblib.load("norm.joblib")
|
11 |
|
12 |
-
|
13 |
# List of features used in your model
|
14 |
selected_features = [
|
15 |
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
|
@@ -35,49 +38,48 @@ selected_features = [
|
|
35 |
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
|
36 |
]
|
37 |
|
|
|
|
|
|
|
|
|
|
|
38 |
def extract_features(sequence):
|
39 |
"""Extract only the required features and normalize them."""
|
40 |
# Compute all possible features
|
41 |
-
all_features = AAComposition.CalculateAADipeptideComposition(sequence)
|
42 |
# Extract the values from the dictionary
|
43 |
-
feature_values = list(all_features.values())
|
44 |
# Convert to NumPy array for normalization
|
45 |
feature_array = np.array(feature_values).reshape(-1, 1)
|
46 |
-
feature_array = feature_array[:
|
47 |
# Min-Max Normalization
|
48 |
normalized_features = scaler.transform(feature_array.T)
|
49 |
-
|
50 |
# Reshape normalized_features back to a single dimension
|
51 |
-
normalized_features = normalized_features.flatten()
|
52 |
-
|
53 |
# Create a dictionary with selected features
|
54 |
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
|
55 |
if feature in all_features}
|
56 |
-
|
57 |
# Convert dictionary to dataframe
|
58 |
selected_feature_df = pd.DataFrame([selected_feature_dict])
|
59 |
-
|
60 |
# Convert dataframe to numpy array
|
61 |
selected_feature_array = selected_feature_df.T.to_numpy()
|
62 |
-
|
63 |
return selected_feature_array
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
def predict(
|
68 |
"""Predict AMP vs Non-AMP"""
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
title="AMP Classifier",
|
79 |
-
description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
|
80 |
-
)
|
81 |
|
82 |
-
#
|
83 |
-
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
+
from pydantic import BaseModel
|
3 |
import joblib
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
from propy import AAComposition
|
7 |
from sklearn.preprocessing import MinMaxScaler
|
8 |
|
9 |
+
# Initialize FastAPI app
|
10 |
+
app = FastAPI()
|
11 |
+
|
12 |
+
# Load trained SVM model and scaler
|
13 |
model = joblib.load("SVM.joblib")
|
14 |
scaler = joblib.load("norm.joblib")
|
15 |
|
|
|
16 |
# List of features used in your model
|
17 |
selected_features = [
|
18 |
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
|
|
|
38 |
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
|
39 |
]
|
40 |
|
41 |
+
# Define request model
|
42 |
+
class SequenceRequest(BaseModel):
|
43 |
+
sequence: str
|
44 |
+
|
45 |
+
# Feature extraction function
|
46 |
def extract_features(sequence):
|
47 |
"""Extract only the required features and normalize them."""
|
48 |
# Compute all possible features
|
49 |
+
all_features = AAComposition.CalculateAADipeptideComposition(sequence)
|
50 |
# Extract the values from the dictionary
|
51 |
+
feature_values = list(all_features.values())
|
52 |
# Convert to NumPy array for normalization
|
53 |
feature_array = np.array(feature_values).reshape(-1, 1)
|
54 |
+
feature_array = feature_array[:420]
|
55 |
# Min-Max Normalization
|
56 |
normalized_features = scaler.transform(feature_array.T)
|
|
|
57 |
# Reshape normalized_features back to a single dimension
|
58 |
+
normalized_features = normalized_features.flatten()
|
|
|
59 |
# Create a dictionary with selected features
|
60 |
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
|
61 |
if feature in all_features}
|
|
|
62 |
# Convert dictionary to dataframe
|
63 |
selected_feature_df = pd.DataFrame([selected_feature_dict])
|
|
|
64 |
# Convert dataframe to numpy array
|
65 |
selected_feature_array = selected_feature_df.T.to_numpy()
|
|
|
66 |
return selected_feature_array
|
67 |
|
68 |
+
# Prediction endpoint
|
69 |
+
@app.post("/predict")
|
70 |
+
def predict(request: SequenceRequest):
|
71 |
"""Predict AMP vs Non-AMP"""
|
72 |
+
try:
|
73 |
+
# Extract features
|
74 |
+
features = extract_features(request.sequence)
|
75 |
+
# Make prediction
|
76 |
+
prediction = model.predict(features.T)[0]
|
77 |
+
# Return the result
|
78 |
+
return {"prediction": "AMP" if prediction == 0 else "Non-AMP"}
|
79 |
+
except Exception as e:
|
80 |
+
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
|
81 |
|
82 |
+
# Root endpoint for health check
|
83 |
+
@app.get("/")
|
84 |
+
def read_root():
|
85 |
+
return {"status": "OK"}
|