Spaces:

Anupam202224
/

GeneticDiseasePredictionSystem

Sleeping

App Files Files Community

GeneticDiseasePredictionSystem / app.py

Anupam202224

Create app.py

5f741a4 verified 11 months ago

raw

history blame contribute delete

5.98 kB

	import numpy as np
	import pandas as pd
	import gradio as gr
	from sklearn.preprocessing import StandardScaler
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import accuracy_score, classification_report
	import joblib

	# Generate more realistic synthetic genetic data
	def generate_realistic_genetic_data(n_samples=1000):
	np.random.seed(42)

	# Define genetic markers associated with different conditions
	# Marker ranges based on typical genetic variation patterns
	genetic_data = {
	'BRCA1_mutation': np.random.choice([0, 1], size=n_samples, p=[0.95, 0.05]), # BRCA1 mutation
	'P53_variation': np.random.normal(0.5, 0.1, n_samples), # P53 tumor suppressor variation
	'APOE_allele': np.random.choice([2, 3, 4], size=n_samples, p=[0.1, 0.7, 0.2]), # APOE allele types
	'DNA_methylation': np.random.beta(2, 5, n_samples), # DNA methylation levels
	'telomere_length': np.random.normal(6000, 1000, n_samples), # Telomere length
	'CYP2D6_activity': np.random.gamma(2, 2, n_samples), # CYP2D6 enzyme activity
	'inflammatory_markers': np.random.exponential(2, n_samples), # Inflammatory markers
	'glucose_metabolism': np.random.normal(100, 15, n_samples), # Glucose metabolism
	'oxidative_stress': np.random.gamma(3, 1, n_samples), # Oxidative stress levels
	'immune_response': np.random.normal(0.7, 0.1, n_samples) # Immune response strength
	}

	# Create DataFrame
	df = pd.DataFrame(genetic_data)

	# Generate disease status based on complex interactions
	disease_probability = (
	0.3 * genetic_data['BRCA1_mutation'] +
	0.2 * (genetic_data['P53_variation'] > 0.7) +
	0.15 * (genetic_data['APOE_allele'] == 4) +
	0.1 * (genetic_data['DNA_methylation'] > 0.6) +
	0.05 * (genetic_data['telomere_length'] < 5000) +
	0.1 * (genetic_data['CYP2D6_activity'] > 5) +
	0.05 * (genetic_data['inflammatory_markers'] > 3) +
	0.05 * (genetic_data['glucose_metabolism'] > 120)
	)

	df['disease'] = (disease_probability > 0.5).astype(int)

	return df

	# Data preprocessing
	def preprocess_data(data):
	X = data.drop('disease', axis=1)
	y = data['disease']

	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(X)

	return X_scaled, y, scaler

	# Train and evaluate model
	def train_and_evaluate_model():
	# Generate and preprocess data
	print("Generating synthetic genetic data...")
	data = generate_realistic_genetic_data(1500)
	print("\nData Sample:")
	print(data.head())
	print("\nData Statistics:")
	print(data.describe())

	# Preprocess data
	X_scaled, y, scaler = preprocess_data(data)

	# Split data
	X_train, X_test, y_train, y_test = train_test_split(
	X_scaled, y, test_size=0.2, random_state=42
	)

	# Train model
	print("\nTraining Random Forest model...")
	model = RandomForestClassifier(
	n_estimators=100,
	max_depth=5,
	random_state=42
	)
	model.fit(X_train, y_train)

	# Evaluate model
	y_pred = model.predict(X_test)
	accuracy = accuracy_score(y_test, y_pred)
	print("\nModel Evaluation:")
	print(f"Accuracy: {accuracy:.2f}")
	print("\nClassification Report:")
	print(classification_report(y_test, y_pred))

	# Feature importance
	feature_importance = pd.DataFrame({
	'feature': data.drop('disease', axis=1).columns,
	'importance': model.feature_importances_
	})
	print("\nFeature Importance:")
	print(feature_importance.sort_values('importance', ascending=False))

	return model, scaler

	# Prediction function
	def predict_disease(
	brca1_mutation, p53_variation, apoe_allele, dna_methylation,
	telomere_length, cyp2d6_activity, inflammatory_markers,
	glucose_metabolism, oxidative_stress, immune_response
	):
	# Create input array
	input_data = np.array([
	brca1_mutation, p53_variation, apoe_allele, dna_methylation,
	telomere_length, cyp2d6_activity, inflammatory_markers,
	glucose_metabolism, oxidative_stress, immune_response
	]).reshape(1, -1)

	# Scale input data
	scaled_data = scaler.transform(input_data)

	# Make prediction
	prediction = model.predict_proba(scaled_data)[0]

	return {
	"No Disease": float(prediction[0]),
	"Disease": float(prediction[1])
	}

	# Train model and get scaler
	print("Training model and preparing interface...")
	model, scaler = train_and_evaluate_model()

	# Create Gradio interface
	iface = gr.Interface(
	fn=predict_disease,
	inputs=[
	gr.Number(label="BRCA1 Mutation (0 or 1)", value=0),
	gr.Number(label="P53 Variation (typically 0.3-0.7)", value=0.5),
	gr.Number(label="APOE Allele (2, 3, or 4)", value=3),
	gr.Number(label="DNA Methylation (0-1)", value=0.4),
	gr.Number(label="Telomere Length (typically 4000-8000)", value=6000),
	gr.Number(label="CYP2D6 Activity (typically 0-10)", value=4),
	gr.Number(label="Inflammatory Markers (typically 0-10)", value=2),
	gr.Number(label="Glucose Metabolism (typically 70-130)", value=100),
	gr.Number(label="Oxidative Stress (typically 0-10)", value=3),
	gr.Number(label="Immune Response (typically 0.5-0.9)", value=0.7)
	],
	outputs=gr.Label(label="Disease Prediction"),
	title="Genetic Disease Prediction System",
	description="""This system predicts genetic disease risk based on various genetic markers and biological indicators.
	Please input values within the suggested ranges for accurate predictions.""",
	examples=[
	# High-risk example
	[1, 0.8, 4, 0.7, 4800, 6, 4, 125, 5, 0.6],
	# Low-risk example
	[0, 0.4, 3, 0.3, 6500, 3, 1, 95, 2, 0.8],
	# Moderate-risk example
	[0, 0.6, 3, 0.5, 5500, 4, 2, 110, 3, 0.7]
	]
	)

	# Launch the interface
	iface.launch(share=True)