Spaces:

nonzeroexit
/

AMP-Classifier

Running

App Files Files Community

AMP-Classifier / app.py

nonzeroexit

Update app.py

741f7a3 verified 4 months ago

raw

history blame

3.9 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	import joblib
	import numpy as np
	import pandas as pd
	from propy import AAComposition
	from sklearn.preprocessing import MinMaxScaler

	# Initialize FastAPI app
	app = FastAPI()

	# Load trained SVM model and scaler
	model = joblib.load("SVM.joblib")
	scaler = joblib.load("norm.joblib")

	# List of features used in your model
	selected_features = [
	"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
	"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
	"RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
	"NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
	"DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
	"CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
	"EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
	"QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
	"GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
	"HC", "HG", "HL", "HK", "HP",
	"IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
	"LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
	"KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
	"MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
	"FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
	"PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
	"SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
	"TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
	"WC",
	"YR", "YD", "YC", "YG", "YL", "YS", "YV",
	"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
	]

	# Define request model
	class SequenceRequest(BaseModel):
	sequence: str

	# Feature extraction function
	def extract_features(sequence):
	"""Extract only the required features and normalize them."""
	# Compute all possible features
	all_features = AAComposition.CalculateAADipeptideComposition(sequence)
	# Extract the values from the dictionary
	feature_values = list(all_features.values())
	# Convert to NumPy array for normalization
	feature_array = np.array(feature_values).reshape(-1, 1)
	feature_array = feature_array[:420]
	# Min-Max Normalization
	normalized_features = scaler.transform(feature_array.T)
	# Reshape normalized_features back to a single dimension
	normalized_features = normalized_features.flatten()
	# Create a dictionary with selected features
	selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
	if feature in all_features}
	# Convert dictionary to dataframe
	selected_feature_df = pd.DataFrame([selected_feature_dict])
	# Convert dataframe to numpy array
	selected_feature_array = selected_feature_df.T.to_numpy()
	return selected_feature_array

	# Prediction endpoint
	@app.post("/predict")
	def predict(request: SequenceRequest):
	"""Predict AMP vs Non-AMP"""
	try:
	# Extract features
	features = extract_features(request.sequence)
	# Make prediction
	prediction = model.predict(features.T)[0]
	# Return the result
	return {"prediction": "AMP" if prediction == 0 else "Non-AMP"}
	except Exception as e:
	raise HTTPException(status_code=400, detail=str(e))

	# Root endpoint for health check
	@app.get("/")
	def read_root():
	return {"status": "OK"}