Spaces:
Sleeping
Sleeping
File size: 5,479 Bytes
067ea44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import gradio as gr
import pickle
import numpy as np
import pandas as pd
from explainer import get_shap_values
import shap
import matplotlib.pyplot as plt
import io
import os
from PIL import Image
from huggingface_hub import HfApi
# Load model
with open("model.pkl", "rb") as f:
model = pickle.load(f)
# List of columns used during training
columns = ['Age', 'Saving accounts', 'Checking account', 'Credit amount', 'Duration',
'Purpose_business', 'Purpose_car', 'Purpose_domestic appliances', 'Purpose_education', 'Purpose_furniture/equipment',
'Purpose_radio/TV', 'Purpose_repairs', 'Purpose_vacation/others', 'Sex_female', 'Sex_male',
'Housing_free', 'Housing_own', 'Housing_rent', 'Job_0', 'Job_1', 'Job_2', 'Job_3']
# Define prediction function
# All categorical mappings must match the encoding used during training
saving_account_map = {'little': 0, 'moderate': 1, 'quite rich': 2, 'rich': 3, 'No Savings': 0}
checking_account_map = {'little': 0, 'moderate': 1, 'rich': 2, 'No Checking': 0}
purpose_options = ['business', 'car', 'domestic appliances', 'education', 'furniture/equipment', 'radio/TV', 'repairs', 'vacation/others']
sex_options = ['female', 'male']
housing_options = ['free', 'own', 'rent']
job_options = [0, 1, 2, 3]
def to_scalar(val):
# If val is a numpy array, get the first element; else, return as float
if isinstance(val, np.ndarray):
return float(val.flatten()[0])
return float(val)
def predict_and_explain(age, saving_account, checking_account, credit_amount, duration, purpose, sex, housing, job):
# Build a single-row DataFrame with all columns set to 0
input_data = pd.DataFrame([[0]*len(columns)], columns=columns)
input_data['Age'] = age
input_data['Saving accounts'] = saving_account_map.get(saving_account, 0)
input_data['Checking account'] = checking_account_map.get(checking_account, 0)
input_data['Credit amount'] = credit_amount
input_data['Duration'] = duration
# One-hot encoding for purpose
for p in purpose_options:
input_data[f'Purpose_{p}'] = 1 if purpose == p else 0
# One-hot encoding for sex
for s in sex_options:
input_data[f'Sex_{s}'] = 1 if sex == s else 0
# One-hot encoding for housing
for h in housing_options:
input_data[f'Housing_{h}'] = 1 if housing == h else 0
# One-hot encoding for job
for j in job_options:
input_data[f'Job_{j}'] = 1 if job == j else 0
# Predict
pred = model.predict(input_data)[0]
prob = model.predict_proba(input_data)[0][int(pred)]
risk = "Low Risk" if pred == 0 else "High Risk"
prediction_text = f"{risk} (Confidence: {prob:.2f})"
# Save flagged (high risk) input
if risk == "High Risk":
flagged_file = "flagged_inputs.csv"
# Save the raw user input, not the one-hot encoded row
user_row = {
"Age": age,
"Saving accounts": saving_account,
"Checking account": checking_account,
"Credit amount": credit_amount,
"Duration": duration,
"Purpose": purpose,
"Sex": sex,
"Housing": housing,
"Job": job
}
df_flag = pd.DataFrame([user_row])
if not os.path.exists(flagged_file):
df_flag.to_csv(flagged_file, index=False)
else:
df_flag.to_csv(flagged_file, mode='a', header=False, index=False)
# Get SHAP values
shap_values = get_shap_values(model, input_data)
# If get_shap_values returns (shap_values, explainer), use: shap_values, explainer = get_shap_values(input_data)
# Get feature importances for this prediction
shap_vals = shap_values[0] if hasattr(shap_values, '__getitem__') else shap_values
feature_importance = sorted(
zip(input_data.columns, shap_vals),
key=lambda x: abs(x[1]),
reverse=True
)
# Get top 5 important features
top_features = feature_importance[:5]
importance_text = "Top features for this prediction:\n" + "\n".join(
[f"{feat}: {to_scalar(val):.3f}" for feat, val in top_features]
)
return prediction_text, importance_text
# Gradio UI
demo = gr.Interface(
fn=predict_and_explain,
inputs=[
gr.Number(label="Age", info="Customer's Age"),
gr.Radio(list(saving_account_map.keys()), label="Saving accounts", info="Saving accounts (Amount of Saving)"),
gr.Radio(list(checking_account_map.keys()), label="Checking account", info="Checking account (in USD)"),
gr.Number(label="Credit amount", info="Credit amount (numeric, in USD)"),
gr.Number(label="Duration", info="Duration (numeric, in month)"),
gr.Radio(purpose_options, label="Purpose", info="Purpose (car, furniture/equipment, radio/TV, domestic appliances, repairs, education, business, vacation/others)"),
gr.Radio(sex_options, label="Sex", info="Sex (Male, Female)"),
gr.Radio(housing_options, label="Housing", info="Housing (Own, Rent, or Free)"),
gr.Radio(job_options, label="Job", info="Job (0 - Unskilled and non-resident, 1 - Unskilled and resident, 2 - Skilled, 3 - Highly skilled)")
],
outputs=[
gr.Text(label="Credit Risk Prediction"),
gr.Text(label="Top Features for This Prediction")
],
title="Credit Risk Predictor",
description="Input applicant info to assess credit risk level and see SHAP explanation"
)
if __name__ == "__main__":
demo.launch()
|