Nityanand Mathur commited on
Commit
e4b4b59
·
1 Parent(s): f8c9b15
Files changed (2) hide show
  1. app.py +107 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,5 +1,11 @@
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
 
3
 
4
 
5
  def read_csv(dataset):
@@ -8,11 +14,9 @@ def read_csv(dataset):
8
  return data.head(10)
9
 
10
  def train_model(input_df, target, test_size, model_name, features_to_drop):
11
- import pickle
12
 
13
  from sklearn.compose import ColumnTransformer
14
  from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
15
- from sklearn.model_selection import train_test_split
16
  from sklearn.pipeline import Pipeline
17
  from sklearn.preprocessing import OneHotEncoder, StandardScaler
18
 
@@ -49,9 +53,51 @@ def train_model(input_df, target, test_size, model_name, features_to_drop):
49
  regr = Pipeline(steps=[('preprocessor', transformations),
50
  ('regressor', RandomForestRegressor())])
51
  model = regr.fit(X_train, y_train)
52
- pickle.dump(model, open('./' + model_name + '.pkl', 'wb'))
53
  return 'Model Saved'
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  with gr.Blocks() as demo:
56
  with gr.Tab('Dataset'):
57
  gr.Markdown('Visualize the dataset to apply CFML')
@@ -60,6 +106,7 @@ with gr.Blocks() as demo:
60
  'Summer','TotalClothingValue','TotalClothingValue+3Binary'], label='Dataset')
61
  output_vis = gr.DataFrame()
62
  button_vis = gr.Button(label="Run")
 
63
  with gr.Tab('Model'):
64
  gr.Markdown('Choose the features to apply CFML')
65
  input_df = gr.Dropdown(['IndividualClothingBinary','IndividualClothingBinary+3Binary',
@@ -73,8 +120,64 @@ with gr.Blocks() as demo:
73
  model_output = gr.Textbox(label='Status')
74
  button_model = gr.Button(label="Train Model")
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  button_vis.click(read_csv, df_vis, outputs=output_vis)
77
  button_model.click(train_model, [input_df, target, test_size, model_name, features_to_drop], outputs=model_output)
78
-
 
 
79
 
80
  demo.launch()
 
1
+ import os
2
+ import pickle
3
+ import random
4
+
5
+ import dice_ml
6
  import gradio as gr
7
  import pandas as pd
8
+ from sklearn.model_selection import train_test_split
9
 
10
 
11
  def read_csv(dataset):
 
14
  return data.head(10)
15
 
16
  def train_model(input_df, target, test_size, model_name, features_to_drop):
 
17
 
18
  from sklearn.compose import ColumnTransformer
19
  from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
20
  from sklearn.pipeline import Pipeline
21
  from sklearn.preprocessing import OneHotEncoder, StandardScaler
22
 
 
53
  regr = Pipeline(steps=[('preprocessor', transformations),
54
  ('regressor', RandomForestRegressor())])
55
  model = regr.fit(X_train, y_train)
56
+ pickle.dump(model, open('./' + model_name + '.pkl', 'wb'))
57
  return 'Model Saved'
58
 
59
+ def generate_cfs_total(input_df_T, target_T, radio_T, predefined_T, custom_T, dropped_features_T, freeze_features_T, model_T):
60
+ path = './data/' + input_df_T + '.csv'
61
+ data = pd.read_csv(path)
62
+ model = pickle.load(open('./' + model_T + '.pkl', 'rb'))
63
+ data = data.drop(dropped_features_T, axis=1)
64
+ metrics = ['TSV','TPV','TCV','TSL']
65
+ metrics.remove(target_T[0])
66
+
67
+ data = data.drop(metrics, axis=1)
68
+ features = data.drop(target_T[0], axis=1).columns.tolist()
69
+ target = data[target_T[0]]
70
+ datasetX = data.drop('TSV', axis=1)
71
+ x_train, x_test, y_train, y_test = train_test_split(datasetX,
72
+ target,
73
+ test_size=0.2,
74
+ random_state=0)
75
+
76
+ always_immutable = ['AvgMaxDailyTemp','AvgMinDailyTemp','School','DAY','StartTime']
77
+ freezed = always_immutable + freeze_features_T + [target_T[0]]
78
+
79
+ features_to_vary = data.columns.difference(freezed).to_list()
80
+
81
+ d = dice_ml.Data(dataframe=data, continuous_features=features, outcome_name=target_T[0])
82
+ m = dice_ml.Model(model=model, backend='sklearn', model_type='regressor')
83
+
84
+ exp = dice_ml.Dice(d, m, method='genetic')
85
+
86
+ if radio_T == 'Predefined':
87
+ random_index = random.randint(0, len(x_train-2))
88
+ print(int(predefined_T))
89
+ query_instances = x_test[random_index:random_index+int(predefined_T)]
90
+
91
+ elif radio_T == 'Custom':
92
+ query_instances = custom_T
93
+
94
+ dice_exp = exp.generate_counterfactuals(query_instances, total_CFs=4, desired_range=[0.0, 2.0], features_to_vary=features_to_vary)
95
+ return dice_exp.visualize_as_dataframe(show_only_changes=True)
96
+
97
+
98
+ def generate_cfs_individual(input_df_I, target_I, radio_I, predefined_I, custom_I, dropped_features_I, freeze_features_I, model_I):
99
+ pass
100
+
101
  with gr.Blocks() as demo:
102
  with gr.Tab('Dataset'):
103
  gr.Markdown('Visualize the dataset to apply CFML')
 
106
  'Summer','TotalClothingValue','TotalClothingValue+3Binary'], label='Dataset')
107
  output_vis = gr.DataFrame()
108
  button_vis = gr.Button(label="Run")
109
+
110
  with gr.Tab('Model'):
111
  gr.Markdown('Choose the features to apply CFML')
112
  input_df = gr.Dropdown(['IndividualClothingBinary','IndividualClothingBinary+3Binary',
 
120
  model_output = gr.Textbox(label='Status')
121
  button_model = gr.Button(label="Train Model")
122
 
123
+ #list add .pkl files from models folder
124
+ models = []
125
+ for file in os.listdir('./'):
126
+ if file.endswith('.pkl'):
127
+ models.append(file.split('.')[0])
128
+
129
+ with gr.Tab('Counterfactuals-Total'):
130
+ gr.Markdown('Generate Counterfactuals for Total CLO Dataset')
131
+ input_df_T = gr.Dropdown(['Multi_TotalCLO_w_Chair','Summer',
132
+ 'TotalClothingValue','TotalClothingValue+3Binary'], label='Dataset')
133
+ target_T = gr.CheckboxGroup(['TSV','TPV','TCV','TSL'], label='Target Metric', info='Please select only one')
134
+ #target_T_range = gr.Textbox(label='Target Range', placeholder='Enter the target range [start,end]')
135
+ radio_T = gr.Radio(['Predefined', 'Custom'], label='Type of Input')
136
+ predefined_T = gr.Number(default=0, label='Number of inputs to provide')
137
+ custom_T = gr.Dataframe(
138
+ headers=['DAY','School','SchoolType','StartTime','AvgMaxDailyTemp','AvgMinDailyTemp','AvgIndoorRelativeHumidity',
139
+ 'IndoorTempDuringSurvey','Grade','Age','Gender','FormalClothing','TotalCLOwithChair'],
140
+ row_count=(2, 'dynamic')
141
+ )
142
+
143
+ dropped_features_T = gr.CheckboxGroup(['SwC', 'MC', 'Grade', 'Age', 'Gender'],
144
+ label='Features to Drop', info='Select the features that are dropped from feature set')
145
+
146
+ freeze_features_T = gr.CheckboxGroup(['SchoolType','StartTime','AvgIndoorRelativeHumidity',
147
+ 'IndoorTempDuringSurvey','Grade','Age','Gender','FormalClothing','TotalCLOwithChair'],
148
+ info = 'Select the features to be freezed to generate CFs')
149
+ model_T = gr.Dropdown(models, label='Model', info='Select the model to generate CFs')
150
+ button_cf_T = gr.Button(label="Generate CFs")
151
+
152
+ with gr.Tab('Counterfactuals-Individual'):
153
+ gr.Markdown('Generate Counterfactuals for Individual Clothing Dataset')
154
+ input_df_I = gr.Dropdown(['IndividualClothingBinary','IndividualClothingBinary+3Binary',
155
+ 'IndividualClothingValue','IndividualClothingValue+3Binary'], label='Dataset')
156
+ target_I = gr.CheckboxGroup(['TSV','TPV','TCV','TSL'], label='Target Metric', info='Please select only one')
157
+ radio_I = gr.Radio(['Predefined', 'Custom'], label='Type of Input')
158
+ predefined_I = gr.Number(default=0, label='Number of inputs to provide')
159
+ custom_I = gr.Dataframe(
160
+ headers=['DAY','School','SchoolType','StartTime','AvgMaxDailyTemp','AvgMinDailyTemp','AvgIndoorRelativeHumidity','IndoorTempDuringSurvey',
161
+ 'Grade','Age','Gender','FormalClothing','Pant','Trackpant','Halfshirt','Blazer','Jacket','Skirt',
162
+ 'FullShirt','HalfSweater','Tshirt','Socks','Thermal','Vest','FullSweater','SwC','MC'],
163
+ row_count=(2, 'dynamic')
164
+ )
165
+ dropped_features_I = gr.CheckboxGroup(['SwC', 'MC', 'Grade', 'Age', 'Gender'],
166
+ label='Features to Drop', info='Select the features that are dropped from feature set')
167
+
168
+ freeze_features_I = gr.CheckboxGroup(['SchoolType','StartTime','AvgIndoorRelativeHumidity','IndoorTempDuringSurvey',
169
+ 'Grade','Age','Gender', 'FormalClothing','Pant','Trackpant','Halfshirt','Blazer','Jacket','Skirt',
170
+ 'FullShirt','HalfSweater','Tshirt','Socks','Thermal','Vest','FullSweater','SwC','MC'],
171
+ info='Select the features to be freezed to generate CFs')
172
+
173
+
174
+ model_I = gr.Dropdown(models, label='Model', info='Select the model to generate CFs')
175
+ button_cf_I = gr.Button(label="Generate CFs")
176
+
177
  button_vis.click(read_csv, df_vis, outputs=output_vis)
178
  button_model.click(train_model, [input_df, target, test_size, model_name, features_to_drop], outputs=model_output)
179
+ button_cf_T.click(generate_cfs_total, [input_df_T, target_T, radio_T, predefined_T,
180
+ custom_T, dropped_features_T, freeze_features_T, model_T],
181
+ outputs=output_vis)
182
 
183
  demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  numpy
2
  pandas
3
- scikit-learn
 
 
1
  numpy
2
  pandas
3
+ scikit-learn
4
+ dice-ml