Spaces:
Runtime error
Runtime error
Nityanand Mathur
commited on
Commit
·
e4b4b59
1
Parent(s):
f8c9b15
Added CF
Browse files- app.py +107 -4
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,5 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
|
|
3 |
|
4 |
|
5 |
def read_csv(dataset):
|
@@ -8,11 +14,9 @@ def read_csv(dataset):
|
|
8 |
return data.head(10)
|
9 |
|
10 |
def train_model(input_df, target, test_size, model_name, features_to_drop):
|
11 |
-
import pickle
|
12 |
|
13 |
from sklearn.compose import ColumnTransformer
|
14 |
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
|
15 |
-
from sklearn.model_selection import train_test_split
|
16 |
from sklearn.pipeline import Pipeline
|
17 |
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
18 |
|
@@ -49,9 +53,51 @@ def train_model(input_df, target, test_size, model_name, features_to_drop):
|
|
49 |
regr = Pipeline(steps=[('preprocessor', transformations),
|
50 |
('regressor', RandomForestRegressor())])
|
51 |
model = regr.fit(X_train, y_train)
|
52 |
-
pickle.dump(model, open('./' + model_name + '.pkl', 'wb'))
|
53 |
return 'Model Saved'
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
with gr.Blocks() as demo:
|
56 |
with gr.Tab('Dataset'):
|
57 |
gr.Markdown('Visualize the dataset to apply CFML')
|
@@ -60,6 +106,7 @@ with gr.Blocks() as demo:
|
|
60 |
'Summer','TotalClothingValue','TotalClothingValue+3Binary'], label='Dataset')
|
61 |
output_vis = gr.DataFrame()
|
62 |
button_vis = gr.Button(label="Run")
|
|
|
63 |
with gr.Tab('Model'):
|
64 |
gr.Markdown('Choose the features to apply CFML')
|
65 |
input_df = gr.Dropdown(['IndividualClothingBinary','IndividualClothingBinary+3Binary',
|
@@ -73,8 +120,64 @@ with gr.Blocks() as demo:
|
|
73 |
model_output = gr.Textbox(label='Status')
|
74 |
button_model = gr.Button(label="Train Model")
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
button_vis.click(read_csv, df_vis, outputs=output_vis)
|
77 |
button_model.click(train_model, [input_df, target, test_size, model_name, features_to_drop], outputs=model_output)
|
78 |
-
|
|
|
|
|
79 |
|
80 |
demo.launch()
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import random
|
4 |
+
|
5 |
+
import dice_ml
|
6 |
import gradio as gr
|
7 |
import pandas as pd
|
8 |
+
from sklearn.model_selection import train_test_split
|
9 |
|
10 |
|
11 |
def read_csv(dataset):
|
|
|
14 |
return data.head(10)
|
15 |
|
16 |
def train_model(input_df, target, test_size, model_name, features_to_drop):
|
|
|
17 |
|
18 |
from sklearn.compose import ColumnTransformer
|
19 |
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
|
|
|
20 |
from sklearn.pipeline import Pipeline
|
21 |
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
22 |
|
|
|
53 |
regr = Pipeline(steps=[('preprocessor', transformations),
|
54 |
('regressor', RandomForestRegressor())])
|
55 |
model = regr.fit(X_train, y_train)
|
56 |
+
pickle.dump(model, open('./' + model_name + '.pkl', 'wb'))
|
57 |
return 'Model Saved'
|
58 |
|
59 |
+
def generate_cfs_total(input_df_T, target_T, radio_T, predefined_T, custom_T, dropped_features_T, freeze_features_T, model_T):
|
60 |
+
path = './data/' + input_df_T + '.csv'
|
61 |
+
data = pd.read_csv(path)
|
62 |
+
model = pickle.load(open('./' + model_T + '.pkl', 'rb'))
|
63 |
+
data = data.drop(dropped_features_T, axis=1)
|
64 |
+
metrics = ['TSV','TPV','TCV','TSL']
|
65 |
+
metrics.remove(target_T[0])
|
66 |
+
|
67 |
+
data = data.drop(metrics, axis=1)
|
68 |
+
features = data.drop(target_T[0], axis=1).columns.tolist()
|
69 |
+
target = data[target_T[0]]
|
70 |
+
datasetX = data.drop('TSV', axis=1)
|
71 |
+
x_train, x_test, y_train, y_test = train_test_split(datasetX,
|
72 |
+
target,
|
73 |
+
test_size=0.2,
|
74 |
+
random_state=0)
|
75 |
+
|
76 |
+
always_immutable = ['AvgMaxDailyTemp','AvgMinDailyTemp','School','DAY','StartTime']
|
77 |
+
freezed = always_immutable + freeze_features_T + [target_T[0]]
|
78 |
+
|
79 |
+
features_to_vary = data.columns.difference(freezed).to_list()
|
80 |
+
|
81 |
+
d = dice_ml.Data(dataframe=data, continuous_features=features, outcome_name=target_T[0])
|
82 |
+
m = dice_ml.Model(model=model, backend='sklearn', model_type='regressor')
|
83 |
+
|
84 |
+
exp = dice_ml.Dice(d, m, method='genetic')
|
85 |
+
|
86 |
+
if radio_T == 'Predefined':
|
87 |
+
random_index = random.randint(0, len(x_train-2))
|
88 |
+
print(int(predefined_T))
|
89 |
+
query_instances = x_test[random_index:random_index+int(predefined_T)]
|
90 |
+
|
91 |
+
elif radio_T == 'Custom':
|
92 |
+
query_instances = custom_T
|
93 |
+
|
94 |
+
dice_exp = exp.generate_counterfactuals(query_instances, total_CFs=4, desired_range=[0.0, 2.0], features_to_vary=features_to_vary)
|
95 |
+
return dice_exp.visualize_as_dataframe(show_only_changes=True)
|
96 |
+
|
97 |
+
|
98 |
+
def generate_cfs_individual(input_df_I, target_I, radio_I, predefined_I, custom_I, dropped_features_I, freeze_features_I, model_I):
|
99 |
+
pass
|
100 |
+
|
101 |
with gr.Blocks() as demo:
|
102 |
with gr.Tab('Dataset'):
|
103 |
gr.Markdown('Visualize the dataset to apply CFML')
|
|
|
106 |
'Summer','TotalClothingValue','TotalClothingValue+3Binary'], label='Dataset')
|
107 |
output_vis = gr.DataFrame()
|
108 |
button_vis = gr.Button(label="Run")
|
109 |
+
|
110 |
with gr.Tab('Model'):
|
111 |
gr.Markdown('Choose the features to apply CFML')
|
112 |
input_df = gr.Dropdown(['IndividualClothingBinary','IndividualClothingBinary+3Binary',
|
|
|
120 |
model_output = gr.Textbox(label='Status')
|
121 |
button_model = gr.Button(label="Train Model")
|
122 |
|
123 |
+
#list add .pkl files from models folder
|
124 |
+
models = []
|
125 |
+
for file in os.listdir('./'):
|
126 |
+
if file.endswith('.pkl'):
|
127 |
+
models.append(file.split('.')[0])
|
128 |
+
|
129 |
+
with gr.Tab('Counterfactuals-Total'):
|
130 |
+
gr.Markdown('Generate Counterfactuals for Total CLO Dataset')
|
131 |
+
input_df_T = gr.Dropdown(['Multi_TotalCLO_w_Chair','Summer',
|
132 |
+
'TotalClothingValue','TotalClothingValue+3Binary'], label='Dataset')
|
133 |
+
target_T = gr.CheckboxGroup(['TSV','TPV','TCV','TSL'], label='Target Metric', info='Please select only one')
|
134 |
+
#target_T_range = gr.Textbox(label='Target Range', placeholder='Enter the target range [start,end]')
|
135 |
+
radio_T = gr.Radio(['Predefined', 'Custom'], label='Type of Input')
|
136 |
+
predefined_T = gr.Number(default=0, label='Number of inputs to provide')
|
137 |
+
custom_T = gr.Dataframe(
|
138 |
+
headers=['DAY','School','SchoolType','StartTime','AvgMaxDailyTemp','AvgMinDailyTemp','AvgIndoorRelativeHumidity',
|
139 |
+
'IndoorTempDuringSurvey','Grade','Age','Gender','FormalClothing','TotalCLOwithChair'],
|
140 |
+
row_count=(2, 'dynamic')
|
141 |
+
)
|
142 |
+
|
143 |
+
dropped_features_T = gr.CheckboxGroup(['SwC', 'MC', 'Grade', 'Age', 'Gender'],
|
144 |
+
label='Features to Drop', info='Select the features that are dropped from feature set')
|
145 |
+
|
146 |
+
freeze_features_T = gr.CheckboxGroup(['SchoolType','StartTime','AvgIndoorRelativeHumidity',
|
147 |
+
'IndoorTempDuringSurvey','Grade','Age','Gender','FormalClothing','TotalCLOwithChair'],
|
148 |
+
info = 'Select the features to be freezed to generate CFs')
|
149 |
+
model_T = gr.Dropdown(models, label='Model', info='Select the model to generate CFs')
|
150 |
+
button_cf_T = gr.Button(label="Generate CFs")
|
151 |
+
|
152 |
+
with gr.Tab('Counterfactuals-Individual'):
|
153 |
+
gr.Markdown('Generate Counterfactuals for Individual Clothing Dataset')
|
154 |
+
input_df_I = gr.Dropdown(['IndividualClothingBinary','IndividualClothingBinary+3Binary',
|
155 |
+
'IndividualClothingValue','IndividualClothingValue+3Binary'], label='Dataset')
|
156 |
+
target_I = gr.CheckboxGroup(['TSV','TPV','TCV','TSL'], label='Target Metric', info='Please select only one')
|
157 |
+
radio_I = gr.Radio(['Predefined', 'Custom'], label='Type of Input')
|
158 |
+
predefined_I = gr.Number(default=0, label='Number of inputs to provide')
|
159 |
+
custom_I = gr.Dataframe(
|
160 |
+
headers=['DAY','School','SchoolType','StartTime','AvgMaxDailyTemp','AvgMinDailyTemp','AvgIndoorRelativeHumidity','IndoorTempDuringSurvey',
|
161 |
+
'Grade','Age','Gender','FormalClothing','Pant','Trackpant','Halfshirt','Blazer','Jacket','Skirt',
|
162 |
+
'FullShirt','HalfSweater','Tshirt','Socks','Thermal','Vest','FullSweater','SwC','MC'],
|
163 |
+
row_count=(2, 'dynamic')
|
164 |
+
)
|
165 |
+
dropped_features_I = gr.CheckboxGroup(['SwC', 'MC', 'Grade', 'Age', 'Gender'],
|
166 |
+
label='Features to Drop', info='Select the features that are dropped from feature set')
|
167 |
+
|
168 |
+
freeze_features_I = gr.CheckboxGroup(['SchoolType','StartTime','AvgIndoorRelativeHumidity','IndoorTempDuringSurvey',
|
169 |
+
'Grade','Age','Gender', 'FormalClothing','Pant','Trackpant','Halfshirt','Blazer','Jacket','Skirt',
|
170 |
+
'FullShirt','HalfSweater','Tshirt','Socks','Thermal','Vest','FullSweater','SwC','MC'],
|
171 |
+
info='Select the features to be freezed to generate CFs')
|
172 |
+
|
173 |
+
|
174 |
+
model_I = gr.Dropdown(models, label='Model', info='Select the model to generate CFs')
|
175 |
+
button_cf_I = gr.Button(label="Generate CFs")
|
176 |
+
|
177 |
button_vis.click(read_csv, df_vis, outputs=output_vis)
|
178 |
button_model.click(train_model, [input_df, target, test_size, model_name, features_to_drop], outputs=model_output)
|
179 |
+
button_cf_T.click(generate_cfs_total, [input_df_T, target_T, radio_T, predefined_T,
|
180 |
+
custom_T, dropped_features_T, freeze_features_T, model_T],
|
181 |
+
outputs=output_vis)
|
182 |
|
183 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
numpy
|
2 |
pandas
|
3 |
-
scikit-learn
|
|
|
|
1 |
numpy
|
2 |
pandas
|
3 |
+
scikit-learn
|
4 |
+
dice-ml
|