Spaces:

Roberta2024
/

heart_predict

Sleeping

App Files Files Community

Roberta2024 commited on Jul 30, 2024

Commit

79de571

verified ·

1 Parent(s): 6fb6c47

Create app.py

Browse files

Files changed (1) hide show

app.py +82 -0

app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+# File path
+file_path = '/mnt/data/heart.csv'
+# Import necessary libraries
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import confusion_matrix, roc_curve, auc
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+import xgboost as xgb
+import joblib
+# Step 1: Data Cleaning and Encoding
+# Load data
+data = pd.read_csv(file_path)
+# Handle missing values (example: filling with median)
+data = data.fillna(data.median())
+# Encode categorical variables
+label_encoders = {}
+for column in data.select_dtypes(include=['object']).columns:
+    le = LabelEncoder()
+    data[column] = le.fit_transform(data[column])
+    label_encoders[column] = le
+# Step 2: Plotting the Dependency Matrix
+plt.figure(figsize=(12, 8))
+correlation_matrix = data.corr()
+sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
+plt.title('Correlation Matrix')
+plt.show()
+# Step 3: Supervised Learning Model for Prediction using XGBoost
+# Define features and target
+X = data.drop('target', axis=1)  # Assuming 'target' is the target variable
+y = data['target']
+# Split the data
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Standardize the data
+scaler = StandardScaler()
+X_train = scaler.fit_transform(X_train)
+X_test = scaler.transform(X_test)
+# Train the model
+model = xgb.XGBClassifier()
+model.fit(X_train, y_train)
+# Make predictions
+y_pred = model.predict(X_test)
+y_pred_prob = model.predict_proba(X_test)[:, 1]
+# Step 4: Evaluation Using Confusion Matrix and Plotting ROC Curve
+# Confusion Matrix
+conf_matrix = confusion_matrix(y_test, y_pred)
+sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
+plt.title('Confusion Matrix')
+plt.xlabel('Predicted')
+plt.ylabel('Actual')
+plt.show()
+# ROC Curve
+fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
+roc_auc = auc(fpr, tpr)
+plt.figure()
+plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:0.2f})')
+plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+plt.xlim([0.0, 1.0])
+plt.ylim([0.0, 1.05])
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('Receiver Operating Characteristic (ROC) Curve')
+plt.legend(loc='lower right')
+plt.show()
+# Save the model using joblib
+model_filename = '/mnt/data/xgboost_model.joblib'
+joblib.dump(model, model_filename)
+print(f"Model saved to {model_filename}")