JaganathC commited on
Commit
fa7554c
·
1 Parent(s): b40c442

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -0
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.linear_model import LogisticRegression
6
+ from sklearn import metrics
7
+
8
+
9
+ ### ------------------------------ ###
10
+ ### data transformation ###
11
+ ### ------------------------------ ###
12
+
13
+ # load dataset
14
+ uncleaned_data = pd.read_csv('data.csv')
15
+
16
+ # remove timestamp from dataset (always first column)
17
+ uncleaned_data = uncleaned_data.iloc[: , 1:]
18
+ data = pd.DataFrame()
19
+
20
+ # keep track of which columns are categorical and what
21
+ # those columns' value mappings are
22
+ # structure: {colname1: {...}, colname2: {...} }
23
+ cat_value_dicts = {}
24
+ final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
25
+
26
+ # for each column...
27
+ for (colname, colval) in uncleaned_data.iteritems():
28
+
29
+ # check if col is already a number; if so, add col directly
30
+ # to new dataframe and skip to next column
31
+ if isinstance(colval.values[0], (np.integer, float)):
32
+ data[colname] = uncleaned_data[colname].copy()
33
+ continue
34
+
35
+ # structure: {0: "lilac", 1: "blue", ...}
36
+ new_dict = {}
37
+ val = 0 # first index per column
38
+ transformed_col_vals = [] # new numeric datapoints
39
+
40
+ # if not, for each item in that column...
41
+ for (row, item) in enumerate(colval.values):
42
+
43
+ # if item is not in this col's dict...
44
+ if item not in new_dict:
45
+ new_dict[item] = val
46
+ val += 1
47
+
48
+ # then add numerical value to transformed dataframe
49
+ transformed_col_vals.append(new_dict[item])
50
+
51
+ # reverse dictionary only for final col (0, 1) => (vals)
52
+ if colname == final_colname:
53
+ new_dict = {value : key for (key, value) in new_dict.items()}
54
+
55
+ cat_value_dicts[colname] = new_dict
56
+ data[colname] = transformed_col_vals
57
+
58
+
59
+ ### -------------------------------- ###
60
+ ### model training ###
61
+ ### -------------------------------- ###
62
+
63
+ # select features and predicton; automatically selects last column as prediction
64
+ cols = len(data.columns)
65
+ num_features = cols - 1
66
+ x = data.iloc[: , :num_features]
67
+ y = data.iloc[: , num_features:]
68
+
69
+ # split data into training and testing sets
70
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
71
+
72
+ # instantiate the model (using default parameters)
73
+ model = LogisticRegression()
74
+ model.fit(x_train, y_train.values.ravel())
75
+ y_pred = model.predict(x_test)
76
+
77
+
78
+ ### -------------------------------- ###
79
+ ### article generation ###
80
+ ### -------------------------------- ###
81
+ # borrow file reading function from reader.py
82
+
83
+ def get_feat():
84
+ feats = [abs(x) for x in model.coef_[0]]
85
+ max_val = max(feats)
86
+ idx = feats.index(max_val)
87
+ return data.columns[idx]
88
+
89
+ acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
90
+ most_imp_feat = get_feat()
91
+ # info = get_article(acc, most_imp_feat)
92
+
93
+
94
+
95
+ ### ------------------------------- ###
96
+ ### interface creation ###
97
+ ### ------------------------------- ###
98
+
99
+
100
+ # predictor for generic number of features
101
+ def general_predictor(*args):
102
+ features = []
103
+
104
+ # transform categorical input
105
+ for colname, arg in zip(data.columns, args):
106
+ if (colname in cat_value_dicts):
107
+ features.append(cat_value_dicts[colname][arg])
108
+ else:
109
+ features.append(arg)
110
+
111
+ # predict single datapoint
112
+ new_input = [features]
113
+ result = model.predict(new_input)
114
+ return cat_value_dicts[final_colname][result[0]]
115
+
116
+ # add data labels to replace those lost via star-args
117
+
118
+
119
+ block = gr.Blocks()
120
+
121
+ with open('info.md') as f:
122
+ with block:
123
+ gr.Markdown(f.readline())
124
+ gr.Markdown('Take the quiz to get a personalized recommendation using AI.')
125
+
126
+ with gr.Row():
127
+ with gr.Box():
128
+ inputls = []
129
+ for colname in data.columns:
130
+ # skip last column
131
+ if colname == final_colname:
132
+ continue
133
+
134
+ # access categories dict if data is categorical
135
+ # otherwise, just use a number input
136
+ if colname in cat_value_dicts:
137
+ radio_options = list(cat_value_dicts[colname].keys())
138
+ inputls.append(gr.inputs.Dropdown(choices=radio_options, type="value", label=colname))
139
+ else:
140
+ # add numerical input
141
+ inputls.append(gr.inputs.Number(label=colname))
142
+ gr.Markdown("<br />")
143
+
144
+ submit = gr.Button("Click to see your personalized result!", variant="primary")
145
+ gr.Markdown("<br />")
146
+ output = gr.Textbox(label="Your recommendation:", placeholder="your recommendation will appear here")
147
+
148
+ submit.click(fn=general_predictor, inputs=inputls, outputs=output)
149
+ gr.Markdown("<br />")
150
+
151
+ with gr.Row():
152
+ with gr.Box():
153
+ gr.Markdown(f"<h3>Accuracy: </h3>{acc}")
154
+ with gr.Box():
155
+ gr.Markdown(f"<h3>Most important feature: </h3>{most_imp_feat}")
156
+
157
+ gr.Markdown("<br />")
158
+
159
+
160
+
161
+ with gr.Box():
162
+ with open('info.md') as f:
163
+ f.readline()
164
+ gr.Markdown(f.read())
165
+
166
+ # show the interface
167
+ block.launch()