Spaces:
Running
Running
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
import joblib | |
import numpy as np | |
import pandas as pd | |
from propy import AAComposition | |
from sklearn.preprocessing import MinMaxScaler | |
# Initialize FastAPI app | |
app = FastAPI() | |
# Load trained SVM model and scaler | |
model = joblib.load("SVM.joblib") | |
scaler = joblib.load("norm.joblib") | |
# List of features used in your model | |
selected_features = [ | |
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V", | |
"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV", | |
"RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV", | |
"NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP", | |
"DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV", | |
"CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV", | |
"EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV", | |
"QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV", | |
"GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV", | |
"HC", "HG", "HL", "HK", "HP", | |
"IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV", | |
"LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV", | |
"KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV", | |
"MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV", | |
"FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV", | |
"PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV", | |
"SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV", | |
"TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV", | |
"WC", | |
"YR", "YD", "YC", "YG", "YL", "YS", "YV", | |
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV" | |
] | |
# Define request model | |
class SequenceRequest(BaseModel): | |
sequence: str | |
# Feature extraction function | |
def extract_features(sequence): | |
"""Extract only the required features and normalize them.""" | |
# Compute all possible features | |
all_features = AAComposition.CalculateAADipeptideComposition(sequence) | |
# Extract the values from the dictionary | |
feature_values = list(all_features.values()) | |
# Convert to NumPy array for normalization | |
feature_array = np.array(feature_values).reshape(-1, 1) | |
feature_array = feature_array[:420] | |
# Min-Max Normalization | |
normalized_features = scaler.transform(feature_array.T) | |
# Reshape normalized_features back to a single dimension | |
normalized_features = normalized_features.flatten() | |
# Create a dictionary with selected features | |
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features) | |
if feature in all_features} | |
# Convert dictionary to dataframe | |
selected_feature_df = pd.DataFrame([selected_feature_dict]) | |
# Convert dataframe to numpy array | |
selected_feature_array = selected_feature_df.T.to_numpy() | |
return selected_feature_array | |
# Prediction endpoint | |
def predict(request: SequenceRequest): | |
"""Predict AMP vs Non-AMP""" | |
try: | |
# Extract features | |
features = extract_features(request.sequence) | |
# Make prediction | |
prediction = model.predict(features.T)[0] | |
# Return the result | |
return {"prediction": "AMP" if prediction == 0 else "Non-AMP"} | |
except Exception as e: | |
raise HTTPException(status_code=400, detail=str(e)) | |
# Root endpoint for health check | |
def read_root(): | |
return {"status": "OK"} |