AMP-Classifier / app.py
nonzeroexit's picture
Update app.py
741f7a3 verified
raw
history blame
3.9 kB
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import numpy as np
import pandas as pd
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler
# Initialize FastAPI app
app = FastAPI()
# Load trained SVM model and scaler
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")
# List of features used in your model
selected_features = [
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
"RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
"NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
"DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
"CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
"EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
"QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
"GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
"HC", "HG", "HL", "HK", "HP",
"IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
"LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
"KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
"MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
"FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
"PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
"SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
"TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
"WC",
"YR", "YD", "YC", "YG", "YL", "YS", "YV",
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
]
# Define request model
class SequenceRequest(BaseModel):
sequence: str
# Feature extraction function
def extract_features(sequence):
"""Extract only the required features and normalize them."""
# Compute all possible features
all_features = AAComposition.CalculateAADipeptideComposition(sequence)
# Extract the values from the dictionary
feature_values = list(all_features.values())
# Convert to NumPy array for normalization
feature_array = np.array(feature_values).reshape(-1, 1)
feature_array = feature_array[:420]
# Min-Max Normalization
normalized_features = scaler.transform(feature_array.T)
# Reshape normalized_features back to a single dimension
normalized_features = normalized_features.flatten()
# Create a dictionary with selected features
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
if feature in all_features}
# Convert dictionary to dataframe
selected_feature_df = pd.DataFrame([selected_feature_dict])
# Convert dataframe to numpy array
selected_feature_array = selected_feature_df.T.to_numpy()
return selected_feature_array
# Prediction endpoint
@app.post("/predict")
def predict(request: SequenceRequest):
"""Predict AMP vs Non-AMP"""
try:
# Extract features
features = extract_features(request.sequence)
# Make prediction
prediction = model.predict(features.T)[0]
# Return the result
return {"prediction": "AMP" if prediction == 0 else "Non-AMP"}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
# Root endpoint for health check
@app.get("/")
def read_root():
return {"status": "OK"}