### ----------------------------- ### ### libraries ### ### ----------------------------- ### import gradio as gr import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn import metrics ### ------------------------------ ### ### data transformation ### ### ------------------------------ ### # load dataset uncleaned_data = pd.read_csv('data.csv') # remove timestamp from dataset (always first column) uncleaned_data = uncleaned_data.iloc[: , 1:] data = pd.DataFrame() # keep track of which columns are categorical and what # those columns' value mappings are # structure: {colname1: {...}, colname2: {...} } cat_value_dicts = {} final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1] # for each column... for (colname, colval) in uncleaned_data.iteritems(): # check if col is already a number; if so, add col directly # to new dataframe and skip to next column if isinstance(colval.values[0], (np.integer, float)): data[colname] = uncleaned_data[colname].copy() continue # structure: { "val_name": 0, "val_name_2": 1, ...} new_dict = {} # reverse structure for final col: {0: "val_name", 1: "val_name_2"} rev_dict = {} val = 0 # first index per column transformed_col_vals = [] # new numeric datapoints # if not, for each item in that column... for (row, item) in enumerate(colval.values): # if item is not in this col's dict... if item not in new_dict: new_dict[item] = val rev_dict[val] = item val += 1 # then add numerical value to transformed dataframe transformed_col_vals.append(new_dict[item]) # reverse dictionary only for final col (0, 1) => (vals) if colname == final_colname: cat_value_dicts[colname] = rev_dict else: cat_value_dicts[colname] = new_dict data[colname] = transformed_col_vals ### -------------------------------- ### ### model training ### ### -------------------------------- ### # select features and predicton; automatically selects last column as prediction cols = len(data.columns) num_features = cols - 1 x = data.iloc[: , :num_features] y = data.iloc[: , num_features:] # split data into training and testing sets x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42) # instantiate the model (using default parameters) # FIX: Increased max_iter to prevent ConvergenceWarning model = LogisticRegression(max_iter=1000) model.fit(x_train, y_train.values.ravel()) y_pred = model.predict(x_test) ### -------------------------------- ### ### article generation ### ### -------------------------------- ### # borrow file reading function from reader.py def get_feat(): # Handle cases with multiple classes in logistic regression if len(model.coef_) > 1: # For multi-class, calculate importance across all classes importance = np.mean([abs(c) for c in model.coef_], axis=0) else: importance = abs(model.coef_[0]) idx = np.argmax(importance) return data.columns[idx] acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%" most_imp_feat = get_feat() # info = get_article(acc, most_imp_feat) ### ------------------------------- ### ### interface creation ### ### ------------------------------- ### # predictor for generic number of features def general_predictor(*args): try: features = [] # transform categorical input for colname, arg in zip(data.columns, args): if (colname in cat_value_dicts): features.append(cat_value_dicts[colname][arg]) else: features.append(arg) # predict single datapoint new_input = [features] result = model.predict(new_input) return cat_value_dicts[final_colname][result[0]] except Exception as e: return "Error: Please make sure all fields are filled out." # add data labels to replace those lost via star-args block = gr.Blocks() with open('info.md') as f: with block: gr.Markdown(f.readline()) gr.Markdown('Take the quiz to get a personalized recommendation using AI.') with gr.Row(): # FIX: Replaced gr.Box with gr.Group with gr.Group(): inputls = [] for colname in data.columns: # skip last column if colname == final_colname: continue # access categories dict if data is categorical # otherwise, just use a number input if colname in cat_value_dicts: radio_options = list(cat_value_dicts[colname].keys()) inputls.append(gr.Dropdown(radio_options, type="value", label=colname)) else: # add numerical input inputls.append(gr.Number(label=colname)) submit = gr.Button("Click to see your personalized result!", variant="primary") output = gr.Textbox(label="Your recommendation:", placeholder="your recommendation will appear here") submit.click(fn=general_predictor, inputs=inputls, outputs=output) with gr.Row(): # FIX: Replaced gr.Box with gr.Group with gr.Group(): gr.Markdown(f"

Accuracy:

{acc}") # FIX: Replaced gr.Box with gr.Group with gr.Group(): gr.Markdown(f"

Most important feature:

{most_imp_feat}") # FIX: Replaced gr.Box with gr.Group with gr.Group(): gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well the AI model can give correct recommendations for that dataset. Model accuracy and most important feature can be helpful for understanding how the model works, but should not be considered absolute facts about the real world.''') # FIX: Replaced gr.Box with gr.Group with gr.Group(): with open('info.md') as f: f.readline() gr.Markdown(f.read()) # show the interface block.launch()