Spaces:
Build error
Build error
| from flask import Flask, render_template, request, redirect, url_for, flash, send_file | |
| import os | |
| import pandas as pd | |
| from werkzeug.utils import secure_filename | |
| from joblib import load | |
| import numpy as np | |
| from sklearn.preprocessing import LabelEncoder | |
| from time import time | |
| app = Flask(__name__) | |
| # Set the secret key for session management | |
| app.secret_key = os.urandom(24) | |
| # Configurations | |
| UPLOAD_FOLDER = "uploads/" | |
| DATA_FOLDER = "data/" | |
| # Define the model directory and label encoder directory | |
| MODEL_DIR = r'./Model' | |
| LABEL_ENOCDER_DIR = r'./Label_encoders' | |
| # Global file names for outputs; these will be updated per prediction. | |
| PRED_OUTPUT_FILE = "data/pred_output.csv" | |
| CLASS_OUTPUT_FILE = "data/class_output.csv" | |
| ALLOWED_EXTENSIONS = {'csv', 'xlsx'} | |
| app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER | |
| os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) | |
| # ------------------------------ | |
| # Load Models and Label Encoders | |
| # ------------------------------ | |
| gia_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_gia_price.joblib')) | |
| grade_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_grade_price.joblib')) | |
| bygrade_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_bygrade_price.joblib')) | |
| makable_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_makable_price.joblib')) | |
| col_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_col.joblib')) | |
| cts_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cts.joblib')) | |
| cut_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cut.joblib')) | |
| qua_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_qua.joblib')) | |
| shp_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_shp.joblib')) | |
| blk_eng_to_mkbl_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_mkbl_blk.joblib')) | |
| wht_eng_to_mkbl_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_mkbl_wht.joblib')) | |
| open_eng_to_mkbl_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_mkbl_open.joblib')) | |
| pav_eng_to_mkbl_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_mkbl_pav.joblib')) | |
| blk_eng_to_grade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_grade_blk.joblib')) | |
| wht_eng_to_grade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_grade_wht.joblib')) | |
| open_eng_to_grade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_grade_open.joblib')) | |
| pav_eng_to_grade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_grade_pav.joblib')) | |
| blk_eng_to_bygrade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_bygrade_blk.joblib')) | |
| wht_eng_to_bygrade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_bygrade_wht.joblib')) | |
| open_eng_to_bygrade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_bygrade_open.joblib')) | |
| pav_eng_to_bygrade_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_bygrade_pav.joblib')) | |
| blk_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_blk.joblib')) | |
| wht_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_wht.joblib')) | |
| open_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_open.joblib')) | |
| pav_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_pav.joblib')) | |
| encoder_list = ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', | |
| 'EngNts', 'EngMikly', 'EngLab','EngBlk', 'EngWht', 'EngOpen','EngPav', | |
| 'Change_cts_value', 'Change_shape_value', 'Change_quality_value', 'Change_color_value', | |
| 'Change_cut_value', 'Change_Blk_Eng_to_Mkbl_value', 'Change_Wht_Eng_to_Mkbl_value', | |
| 'Change_Open_Eng_to_Mkbl_value', 'Change_Pav_Eng_to_Mkbl_value', 'Change_Blk_Eng_to_Grd_value', | |
| 'Change_Wht_Eng_to_Grd_value', 'Change_Open_Eng_to_Grd_value', 'Change_Pav_Eng_to_Grd_value', | |
| 'Change_Blk_Eng_to_ByGrd_value', 'Change_Wht_Eng_to_ByGrd_value', 'Change_Open_Eng_to_ByGrd_value', | |
| 'Change_Pav_Eng_to_ByGrd_value', 'Change_Blk_Eng_to_Gia_value', 'Change_Wht_Eng_to_Gia_value', | |
| 'Change_Open_Eng_to_Gia_value', 'Change_Pav_Eng_to_Gia_value'] | |
| loaded_label_encoder = {} | |
| for val in encoder_list: | |
| encoder_path = os.path.join(LABEL_ENOCDER_DIR, f"label_encoder_{val}.joblib") | |
| loaded_label_encoder[val] = load(encoder_path) | |
| # ------------------------------ | |
| # Utility: Allowed File Check | |
| # ------------------------------ | |
| def allowed_file(filename): | |
| return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| # ------------------------------ | |
| # Routes | |
| # ------------------------------ | |
| def index(): | |
| return render_template('index.html') | |
| def predict(): | |
| if 'file' not in request.files: | |
| flash('No file part', 'error') | |
| return redirect(request.url) | |
| file = request.files['file'] | |
| if file.filename == '': | |
| flash('No selected file', 'error') | |
| return redirect(request.url) | |
| if file and allowed_file(file.filename): | |
| filename = secure_filename(file.filename) | |
| filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) | |
| file.save(filepath) | |
| # Convert file to DataFrame | |
| if filename.endswith('.csv'): | |
| df = pd.read_csv(filepath) | |
| else: | |
| df = pd.read_excel(filepath) | |
| # Process the DataFrame and generate predictions and classification analysis. | |
| df_pred, dx_class = process_dataframe(df) | |
| # Save output files with a timestamp (you can also store in session if needed) | |
| current_date = pd.Timestamp.now().strftime("%Y-%m-%d") | |
| global PRED_OUTPUT_FILE, CLASS_OUTPUT_FILE | |
| PRED_OUTPUT_FILE = f'data/prediction_output_{current_date}.csv' | |
| CLASS_OUTPUT_FILE = f'data/classification_output_{current_date}.csv' | |
| df_pred.to_csv(PRED_OUTPUT_FILE, index=False) | |
| dx_class.to_csv(CLASS_OUTPUT_FILE, index=False) | |
| # Redirect to report view; default to prediction report, page 1. | |
| return redirect(url_for('report_view', report_type='pred', page=1)) | |
| else: | |
| flash('Invalid file type. Only CSV and Excel files are allowed.', 'error') | |
| return redirect(request.url) | |
| def process_dataframe(df): | |
| try: | |
| # Define the columns needed for two parts | |
| required_columns = ['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', | |
| 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngAmt'] | |
| required_columns_2 = required_columns + ['EngBlk', 'EngWht', 'EngOpen', 'EngPav'] | |
| # Create two DataFrames: one for prediction and one for classification. | |
| df_pred = df[required_columns].copy() | |
| df_class = df[required_columns_2].fillna("NA").copy() | |
| # Transform categorical columns for prediction DataFrame using the label encoders. | |
| for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']: | |
| df_pred[col] = loaded_label_encoder[col].transform(df_pred[col]) | |
| # Update the classification DataFrame with the transformed prediction columns. | |
| for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']: | |
| df_class[col] = df_pred[col] | |
| # Transform the extra columns in the classification DataFrame. | |
| for col in ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']: | |
| df_class[col] = loaded_label_encoder[col].transform(df_class[col]) | |
| # Convert both DataFrames to float (or handle as needed). | |
| df_pred = df_pred.astype(float) | |
| df_class = df_class.astype(float) | |
| # ------------------------- | |
| # Prediction Report Section | |
| # ------------------------- | |
| # Use the prediction DataFrame for price predictions. | |
| x = df_pred.copy() | |
| df_pred['GIA_Predicted'] = gia_model.predict(x) | |
| df_pred['Grade_Predicted'] = grade_model.predict(x) | |
| df_pred['ByGrade_Predicted'] = bygrade_model.predict(x) | |
| df_pred['Makable_Predicted'] = makable_model.predict(x) | |
| df_pred['GIA_Diff'] = df_pred['EngAmt'] - df_pred['GIA_Predicted'] | |
| df_pred['Grade_Diff'] = df_pred['EngAmt'] - df_pred['Grade_Predicted'] | |
| df_pred['ByGrade_Diff'] = df_pred['EngAmt'] - df_pred['ByGrade_Predicted'] | |
| df_pred['Makable_Diff'] = df_pred['EngAmt'] - df_pred['Makable_Predicted'] | |
| # ------------------------- | |
| # Classification Report Section | |
| # ------------------------- | |
| # For classification, use df_class (which has extra columns). | |
| x2 = df_class.copy() | |
| dx = df_pred.copy() # Start with the prediction data. | |
| dx['col_change'] = col_model.predict(x) | |
| dx['cts_change'] = cts_model.predict(x) | |
| dx['cut_change'] = cut_model.predict(x) | |
| dx['qua_change'] = qua_model.predict(x) | |
| dx['shp_change'] = shp_model.predict(x) | |
| dx['Change_Blk_Eng_to_Mkbl_value'] = blk_eng_to_mkbl_model.predict(x2) | |
| dx['Change_Wht_Eng_to_Mkbl_value'] = wht_eng_to_mkbl_model.predict(x2) | |
| dx['Change_Open_Eng_to_Mkbl_value'] = open_eng_to_mkbl_model.predict(x2) | |
| dx['Change_Pav_Eng_to_Mkbl_value'] = pav_eng_to_mkbl_model.predict(x2) | |
| dx['Change_Blk_Eng_to_Grd_value'] = blk_eng_to_grade_model.predict(x2) | |
| dx['Change_Wht_Eng_to_Grd_value'] = wht_eng_to_grade_model.predict(x2) | |
| dx['Change_Open_Eng_to_Grd_value'] = open_eng_to_grade_model.predict(x2) | |
| dx['Change_Pav_Eng_to_Grd_value'] = pav_eng_to_grade_model.predict(x2) | |
| dx['Change_Blk_Eng_to_ByGrd_value'] = blk_eng_to_bygrade_model.predict(x2) | |
| dx['Change_Wht_Eng_to_ByGrd_value'] = wht_eng_to_bygrade_model.predict(x2) | |
| dx['Change_Open_Eng_to_ByGrd_value'] = open_eng_to_bygrade_model.predict(x2) | |
| dx['Change_Pav_Eng_to_ByGrd_value'] = pav_eng_to_bygrade_model.predict(x2) | |
| dx['Change_Blk_Eng_to_Gia_value'] = blk_eng_to_gia_model.predict(x2) | |
| dx['Change_Wht_Eng_to_Gia_value'] = wht_eng_to_gia_model.predict(x2) | |
| dx['Change_Open_Eng_to_Gia_value'] = open_eng_to_gia_model.predict(x2) | |
| dx['Change_Pav_Eng_to_Gia_value'] = pav_eng_to_gia_model.predict(x2) | |
| # Inverse transform classification predictions. | |
| dx['col_change'] = loaded_label_encoder['Change_color_value'].inverse_transform(dx['col_change']) | |
| dx['cts_change'] = loaded_label_encoder['Change_cts_value'].inverse_transform(dx['cts_change']) | |
| dx['cut_change'] = loaded_label_encoder['Change_cut_value'].inverse_transform(dx['cut_change']) | |
| dx['qua_change'] = loaded_label_encoder['Change_quality_value'].inverse_transform(dx['qua_change']) | |
| dx['shp_change'] = loaded_label_encoder['Change_shape_value'].inverse_transform(dx['shp_change']) | |
| dx['Change_Blk_Eng_to_Mkbl_value'] = loaded_label_encoder['Change_Blk_Eng_to_Mkbl_value'].inverse_transform(dx['Change_Blk_Eng_to_Mkbl_value']) | |
| dx['Change_Wht_Eng_to_Mkbl_value'] = loaded_label_encoder['Change_Wht_Eng_to_Mkbl_value'].inverse_transform(dx['Change_Wht_Eng_to_Mkbl_value']) | |
| dx['Change_Open_Eng_to_Mkbl_value'] = loaded_label_encoder['Change_Open_Eng_to_Mkbl_value'].inverse_transform(dx['Change_Open_Eng_to_Mkbl_value']) | |
| dx['Change_Pav_Eng_to_Mkbl_value'] = loaded_label_encoder['Change_Pav_Eng_to_Mkbl_value'].inverse_transform(dx['Change_Pav_Eng_to_Mkbl_value']) | |
| dx['Change_Blk_Eng_to_Grd_value'] = loaded_label_encoder['Change_Blk_Eng_to_Grd_value'].inverse_transform(dx['Change_Blk_Eng_to_Grd_value']) | |
| dx['Change_Wht_Eng_to_Grd_value'] = loaded_label_encoder['Change_Wht_Eng_to_Grd_value'].inverse_transform(dx['Change_Wht_Eng_to_Grd_value']) | |
| dx['Change_Open_Eng_to_Grd_value'] = loaded_label_encoder['Change_Open_Eng_to_Grd_value'].inverse_transform(dx['Change_Open_Eng_to_Grd_value']) | |
| dx['Change_Pav_Eng_to_Grd_value'] = loaded_label_encoder['Change_Pav_Eng_to_Grd_value'].inverse_transform(dx['Change_Pav_Eng_to_Grd_value']) | |
| dx['Change_Blk_Eng_to_ByGrd_value'] = loaded_label_encoder['Change_Blk_Eng_to_ByGrd_value'].inverse_transform(dx['Change_Blk_Eng_to_ByGrd_value']) | |
| dx['Change_Wht_Eng_to_ByGrd_value'] = loaded_label_encoder['Change_Wht_Eng_to_ByGrd_value'].inverse_transform(dx['Change_Wht_Eng_to_ByGrd_value']) | |
| dx['Change_Open_Eng_to_ByGrd_value'] = loaded_label_encoder['Change_Open_Eng_to_ByGrd_value'].inverse_transform(dx['Change_Open_Eng_to_ByGrd_value']) | |
| dx['Change_Pav_Eng_to_ByGrd_value'] = loaded_label_encoder['Change_Pav_Eng_to_ByGrd_value'].inverse_transform(dx['Change_Pav_Eng_to_ByGrd_value']) | |
| dx['Change_Blk_Eng_to_Gia_value'] = loaded_label_encoder['Change_Blk_Eng_to_Gia_value'].inverse_transform(dx['Change_Blk_Eng_to_Gia_value']) | |
| dx['Change_Wht_Eng_to_Gia_value'] = loaded_label_encoder['Change_Wht_Eng_to_Gia_value'].inverse_transform(dx['Change_Wht_Eng_to_Gia_value']) | |
| dx['Change_Open_Eng_to_Gia_value'] = loaded_label_encoder['Change_Open_Eng_to_Gia_value'].inverse_transform(dx['Change_Open_Eng_to_Gia_value']) | |
| dx['Change_Pav_Eng_to_Gia_value'] = loaded_label_encoder['Change_Pav_Eng_to_Gia_value'].inverse_transform(dx['Change_Pav_Eng_to_Gia_value']) | |
| return df_pred, dx.head(len(df_pred)) # Return full DataFrames for pagination later. | |
| except Exception as e: | |
| flash(f'Error processing file: {e}', 'error') | |
| return pd.DataFrame(), pd.DataFrame() | |
| # ------------------------------ | |
| # Report View Route with Pagination & Toggle | |
| # ------------------------------ | |
| def report_view(): | |
| # Get query parameters: report_type (pred or class) and page number. | |
| report_type = request.args.get('report_type', 'pred') | |
| try: | |
| page = int(request.args.get('page', 1)) | |
| except ValueError: | |
| page = 1 | |
| per_page = 15 # records per page | |
| # Read the appropriate CSV file. | |
| if report_type == 'pred': | |
| df = pd.read_csv(PRED_OUTPUT_FILE) | |
| else: | |
| df = pd.read_csv(CLASS_OUTPUT_FILE) | |
| # Calculate pagination indices. | |
| start_idx = (page - 1) * per_page | |
| end_idx = start_idx + per_page | |
| total_records = len(df) | |
| # Slice the DataFrame for the current page. | |
| df_page = df.iloc[start_idx:end_idx] | |
| table_html = df_page.to_html(classes="data-table", index=False) | |
| # Determine if previous/next pages exist. | |
| has_prev = page > 1 | |
| has_next = end_idx < total_records | |
| return render_template('output.html', | |
| table_html=table_html, | |
| report_type=report_type, | |
| page=page, | |
| has_prev=has_prev, | |
| has_next=has_next) | |
| # ------------------------------ | |
| # Download Routes (remain unchanged) | |
| # ------------------------------ | |
| def download_pred(): | |
| return send_file(PRED_OUTPUT_FILE, as_attachment=True) | |
| def download_class(): | |
| return send_file(CLASS_OUTPUT_FILE, as_attachment=True) | |
| if __name__ == "__main__": | |
| app.run(debug=True) | |