Spaces:
Sleeping
Sleeping
| from flask import Flask, render_template, request, redirect, url_for, send_file | |
| import os | |
| import shutil | |
| import pandas as pd | |
| from werkzeug.utils import secure_filename | |
| from joblib import load, dump | |
| import numpy as np | |
| from sklearn.preprocessing import LabelEncoder | |
| from time import time | |
| from huggingface_hub import hf_hub_download | |
| import pickle | |
| import uuid | |
| from pathlib import Path | |
| import numpy as np | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib as mpl | |
| import matplotlib.pyplot as plt | |
| import matplotlib.pylab as pylab | |
| from sklearn.preprocessing import OneHotEncoder, LabelEncoder | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.decomposition import PCA | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.tree import DecisionTreeRegressor | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.linear_model import LinearRegression | |
| from xgboost import XGBRegressor | |
| from sklearn.neighbors import KNeighborsRegressor | |
| from sklearn.model_selection import cross_val_score | |
| from sklearn.metrics import mean_squared_error | |
| from sklearn import metrics | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.preprocessing import PowerTransformer, StandardScaler | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV | |
| import lightgbm as lgb | |
| from catboost import CatBoostRegressor | |
| from sklearn.ensemble import StackingRegressor | |
| import json | |
| app = Flask(__name__) | |
| # Set the secret key for session management | |
| app.secret_key = os.urandom(24) | |
| # Configurations | |
| UPLOAD_FOLDER = "uploads/" | |
| DATA_FOLDER = "data/" | |
| MODEL_FOLDER = "models/" | |
| os.makedirs(MODEL_FOLDER, exist_ok=True) | |
| # Define the model directory and label encoder directory | |
| MODEL_DIR = r'./Model' | |
| LABEL_ENCODER_DIR = r'./Label_encoders' # Renamed for clarity | |
| # Global file names for outputs; these will be updated per prediction. | |
| # Note: we now include a unique id to avoid overwriting. | |
| PRED_OUTPUT_FILE = None | |
| CLASS_OUTPUT_FILE = None | |
| ALLOWED_EXTENSIONS = {'csv', 'xlsx'} | |
| # Create directories if they do not exist. | |
| app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER | |
| os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) | |
| app.config['DATA_FOLDER'] = DATA_FOLDER | |
| os.makedirs(app.config['DATA_FOLDER'], exist_ok=True) | |
| os.makedirs("data", exist_ok=True) | |
| app.config['MODEL_FOLDER'] = MODEL_FOLDER | |
| os.makedirs(app.config['MODEL_FOLDER'], exist_ok=True) | |
| # ------------------------------ | |
| # Load Models and Label Encoders | |
| # ------------------------------ | |
| # Prediction analysis models loaded from Hugging Face. | |
| src_path = hf_hub_download( | |
| repo_id="WebashalarForML/Diamond_model_", | |
| filename="models_list/mkble/DecisionTree_best_pipeline_mkble_0_to_0.99_al.pkl", | |
| cache_dir=MODEL_FOLDER | |
| ) | |
| dst_path = os.path.join(MODEL_FOLDER, "DecisionTree_best_pipeline_mkble_0_to_0.99_al.pkl") | |
| shutil.copy(src_path, dst_path) | |
| makable_model_0 = load(dst_path) | |
| src_path = hf_hub_download( | |
| repo_id="WebashalarForML/Diamond_model_", | |
| filename="models_list/mkble/DecisionTree_best_pipeline_mkble_1_to_1.49.pkl", | |
| cache_dir=MODEL_FOLDER | |
| ) | |
| dst_path = os.path.join(MODEL_FOLDER, "DecisionTree_best_pipeline_mkble_1_to_1.49.pkl") | |
| shutil.copy(src_path, dst_path) | |
| makable_model_1 = load(dst_path) | |
| # Prediction analysis models loaded from Hugging Face. | |
| src_path = hf_hub_download( | |
| repo_id="WebashalarForML/Diamond_model_", | |
| filename="models_list/mkble/DecisionTree_best_pipeline_mkble_1.50_to_1.99.pkl", | |
| cache_dir=MODEL_FOLDER | |
| ) | |
| dst_path = os.path.join(MODEL_FOLDER, "DecisionTree_best_pipeline_mkble_1.50_to_1.99.pkl") | |
| shutil.copy(src_path, dst_path) | |
| makable_model_2 = load(dst_path) | |
| #classsification model on the task | |
| src_path = hf_hub_download( | |
| repo_id="WebashalarForML/Diamond_model_", | |
| filename="models_list/classification/3_pipeline.pkl", | |
| cache_dir=MODEL_FOLDER | |
| ) | |
| dst_path = os.path.join(MODEL_FOLDER, "3_pipeline.pkl") | |
| shutil.copy(src_path, dst_path) | |
| mkble_amt_class_model = load(dst_path) | |
| #print("makable_model type:", type(makable_model)) | |
| #print("grade_model type:", type(grade_model)) | |
| #print("bygrade_model type:", type(bygrade_model)) | |
| #print("gia_model type:", type(gia_model)) | |
| print("================================") | |
| print("mkble_amt_class_model type:", type(mkble_amt_class_model)) | |
| # List of label encoder names. | |
| encoder_list = [ | |
| 'Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', | |
| 'EngNts', 'EngMikly', 'EngLab','EngBlk', 'EngWht', 'EngOpen','EngPav', | |
| 'Change_cts_value', 'Change_shape_value', 'Change_quality_value', 'Change_color_value', | |
| 'Change_cut_value', 'Change_Blk_Eng_to_Mkbl_value', 'Change_Wht_Eng_to_Mkbl_value', | |
| 'Change_Open_Eng_to_Mkbl_value', 'Change_Pav_Eng_to_Mkbl_value', 'Change_Blk_Eng_to_Grd_value', | |
| 'Change_Wht_Eng_to_Grd_value', 'Change_Open_Eng_to_Grd_value', 'Change_Pav_Eng_to_Grd_value', | |
| 'Change_Blk_Eng_to_ByGrd_value', 'Change_Wht_Eng_to_ByGrd_value', 'Change_Open_Eng_to_ByGrd_value', | |
| 'Change_Pav_Eng_to_ByGrd_value', 'Change_Blk_Eng_to_Gia_value', 'Change_Wht_Eng_to_Gia_value', | |
| 'Change_Open_Eng_to_Gia_value', 'Change_Pav_Eng_to_Gia_value' | |
| ] | |
| # Load label encoders using pathlib for cleaner path management. | |
| loaded_label_encoder = {} | |
| enc_path = Path(LABEL_ENCODER_DIR) | |
| for val in encoder_list: | |
| encoder_file = enc_path / f"label_encoder_{val}.joblib" | |
| loaded_label_encoder[val] = load(encoder_file) | |
| # ------------------------------ | |
| # Utility: Allowed File Check | |
| # ------------------------------ | |
| def allowed_file(filename): | |
| return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| # ------------------------------ | |
| # Routes | |
| # ------------------------------ | |
| def index(): | |
| return render_template('index.html') | |
| def predict(): | |
| if 'file' not in request.files: | |
| print('No file part', 'error') | |
| return redirect(url_for('index')) | |
| file = request.files['file'] | |
| if file.filename == '': | |
| print('No selected file', 'error') | |
| return redirect(url_for('index')) | |
| if file and allowed_file(file.filename): | |
| filename = secure_filename(file.filename) | |
| filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) | |
| file.save(filepath) | |
| # Convert file to DataFrame | |
| try: | |
| if filename.endswith('.csv'): | |
| df = pd.read_csv(filepath) | |
| else: | |
| df = pd.read_excel(filepath) | |
| except Exception as e: | |
| print(f'Error reading file: {e}', 'error') | |
| return redirect(url_for('index')) | |
| # Process the DataFrame and generate predictions and classification analysis. | |
| df_pred, dx_class = process_dataframe(df) | |
| if df_pred.empty: | |
| print("Processed prediction DataFrame is empty. Check the input file and processing logic.", "error") | |
| return redirect(url_for('index')) | |
| # Save output files with a timestamp and unique id. | |
| current_date = pd.Timestamp.now().strftime("%Y-%m-%d") | |
| unique_id = uuid.uuid4().hex[:8] | |
| global PRED_OUTPUT_FILE, CLASS_OUTPUT_FILE | |
| PRED_OUTPUT_FILE = f'data/prediction_output_{current_date}_{unique_id}.csv' | |
| CLASS_OUTPUT_FILE = f'data/classification_output_{current_date}_{unique_id}.csv' | |
| df_pred.to_csv(PRED_OUTPUT_FILE, index=False) | |
| dx_class.to_csv(CLASS_OUTPUT_FILE, index=False) | |
| # Redirect to report view; default to prediction report, page 1. | |
| return redirect(url_for('report_view', report_type='pred', page=1)) | |
| else: | |
| print('Invalid file type. Only CSV and Excel files are allowed.', 'error') | |
| return redirect(url_for('index')) | |
| def process_dataframe(df): | |
| try: | |
| #df = df[df["MkblAmt"].notna()] | |
| # Define the columns needed for two parts. | |
| required_columns = ['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', | |
| 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngBlk', 'EngWht', 'EngOpen', | |
| 'EngPav', 'EngAmt'] | |
| required_columns_2 = ['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', | |
| 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngAmt'] | |
| # Create two DataFrames: one for prediction and one for classification. | |
| df_pred = df[required_columns].copy() | |
| #df_pred = df_pred[(df_pred[['EngCts']] > 0.99).all(axis=1) & (df_pred[['EngCts']] < 1.50).all(axis=1)] | |
| df_pred[['EngBlk', 'EngWht', 'EngOpen', 'EngPav']]=df_pred[['EngBlk', 'EngWht', 'EngOpen', 'EngPav']].fillna("NA") | |
| df_class = df[required_columns_2].fillna("NA").copy() | |
| # Transform categorical columns for prediction DataFrame using the label encoders. | |
| for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly','EngBlk', 'EngWht', 'EngOpen', 'EngPav']: | |
| try: | |
| df_pred[col] = loaded_label_encoder[col].transform(df_pred[col]) | |
| except ValueError as e: | |
| print(f'Invalid value in column {col}: {e}', 'error') | |
| return pd.DataFrame(), pd.DataFrame() | |
| # Update the classification DataFrame with the transformed prediction columns. | |
| for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']: | |
| df_class[col] = df_pred[col] | |
| # Transform the extra columns in the classification DataFrame. | |
| #for col in ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']: | |
| # try: | |
| # df_class[col] = loaded_label_encoder[col].transform(df_class[col]) | |
| # except ValueError as e: | |
| # print(f'Invalid value in column {col}: {e}', 'error') | |
| # return pd.DataFrame(), pd.DataFrame() | |
| # Convert both DataFrames to float. | |
| df_pred = df_pred.astype(float) | |
| df_class = df_class.astype(float) | |
| # ------------------------- | |
| # Prediction Report Section | |
| # ------------------------- | |
| try: | |
| # for model 0 to 0.99 | |
| df_pred_0 = df_pred[(df_pred[['EngCts']] > 0.00).all(axis=1) & (df_pred[['EngCts']] < 0.99).all(axis=1)] | |
| df_pred_0['change_in_amt_mkble'] = pd.DataFrame(mkble_amt_class_model.predict(df_pred_0), columns=["pred_change_in_eng_to_mkble"]) | |
| print(df_pred_0.columns) | |
| df_pred_0 = df_pred_0[['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', | |
| 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngBlk', 'EngWht', 'EngOpen', | |
| 'EngPav', 'EngAmt', | |
| 'change_in_amt_mkble' | |
| ]] | |
| df_pred_0['Makable_Predicted'] = pd.DataFrame(np.expm1(makable_model_0.predict(df_pred_0)), columns=["Predicted"]) | |
| print(df_pred_0.columns) | |
| # for model 1 to 1.49 | |
| df_pred_1 = df_pred[(df_pred[['EngCts']] > 0.99).all(axis=1) & (df_pred[['EngCts']] < 1.50).all(axis=1)] | |
| df_pred_1['change_in_amt_mkble'] = pd.DataFrame(mkble_amt_class_model.predict(df_pred_1), columns=["pred_change_in_eng_to_mkble"]) | |
| print(df_pred_1.columns) | |
| df_pred_1 = df_pred_1[['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', | |
| 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngBlk', 'EngWht', 'EngOpen', | |
| 'EngPav', 'EngAmt', | |
| 'change_in_amt_mkble' | |
| ]] | |
| df_pred_1['Makable_Predicted'] = pd.DataFrame(np.expm1(makable_model_1.predict(df_pred_1)), columns=["Predicted"]) | |
| print(df_pred_1.columns) | |
| # for model 1.50 to 1.99 | |
| df_pred_2 = df_pred[(df_pred[['EngCts']] > 1.49).all(axis=1) & (df_pred[['EngCts']] < 2.00).all(axis=1)] | |
| df_pred_2['change_in_amt_mkble'] = pd.DataFrame(mkble_amt_class_model.predict(df_pred_2), columns=["pred_change_in_eng_to_mkble"]) | |
| print(df_pred_2.columns) | |
| df_pred_2 = df_pred_2[['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', | |
| 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngBlk', 'EngWht', 'EngOpen', | |
| 'EngPav', 'EngAmt', | |
| 'change_in_amt_mkble' | |
| ]] | |
| df_pred_2['Makable_Predicted'] = pd.DataFrame(np.expm1(makable_model_2.predict(df_pred_2)), columns=["Predicted"]) | |
| print(df_pred_2.columns) | |
| df_pred_main = pd.concat([df_pred_0, df_pred_1, df_pred_2]) | |
| df_pred_main['Makable_Diff'] = df_pred_main['EngAmt'] - df_pred_main['Makable_Predicted'] | |
| # Fill NaN or missing values in prediction columns | |
| # df_pred_main[['change_in_amt_mkble', 'Makable_Predicted', 'Makable_Diff']] = df_pred_main[['change_in_amt_mkble', 'Makable_Predicted', 'Makable_Diff']].fillna("-") | |
| for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly','EngBlk', 'EngWht', 'EngOpen', 'EngPav']: | |
| try: | |
| df_pred_main[col] = loaded_label_encoder[col].inverse_transform(df_pred_main[col].astype(int)) | |
| except ValueError as e: | |
| print(f'inverse transform fails value in column {col}: {e}', 'error') | |
| except ValueError as e: | |
| print(f'pred model error----->: {e}', 'error') | |
| # Final return with full data for pagination. | |
| # Final step to replace NaN or empty values with "-" | |
| df_pred_main = df_pred.fillna("-") | |
| df_pred_main = df_pred.replace(r'^\s*$', "-", regex=True) | |
| return df_pred_main, df_pred_main | |
| except Exception as e: | |
| print(f'Error processing file: {e}', 'error') | |
| return pd.DataFrame(), pd.DataFrame() | |
| # ------------------------------ | |
| # Report View Route with Pagination & Toggle | |
| # ------------------------------ | |
| def report_view(): | |
| report_type = request.args.get('report_type', 'pred') | |
| try: | |
| page = int(request.args.get('page', 1)) | |
| except ValueError: | |
| page = 1 | |
| per_page = 15 # records per page | |
| # Read the appropriate CSV file. | |
| if report_type == 'pred': | |
| df = pd.read_csv(PRED_OUTPUT_FILE) | |
| else: | |
| df = pd.read_csv(CLASS_OUTPUT_FILE) | |
| start_idx = (page - 1) * per_page | |
| end_idx = start_idx + per_page | |
| total_records = len(df) | |
| df_page = df.iloc[start_idx:end_idx] | |
| table_html = df_page.to_html(classes="data-table", index=False) | |
| has_prev = page > 1 | |
| has_next = end_idx < total_records | |
| print("------------------------------------------------------------------------------------------------") | |
| print("df_page['EngCts']", df_page['EngCts']) | |
| print("------------------------------------------------------------------------------------------------") | |
| print("df_page['Makable_Predicted']", df_page['Makable_Predicted']) | |
| print("------------------------------------------------------------------------------------------------") | |
| print("df_page['Makable_Diff']", df_page['Makable_Diff']) | |
| print("------------------------------------------------------------------------------------------------") | |
| # ------------------ V changes -------------------------------------- | |
| # Prepare chart data (convert to JSON for JS) | |
| chart_data = { | |
| 'EngCts': df_page['EngCts'].tolist(), | |
| 'Makable_Predicted': df_page['Makable_Predicted'].tolist(), | |
| 'Makable_Diff': df_page['Makable_Diff'].tolist() | |
| } | |
| # ------------------ V changes -------------------------------------- | |
| return render_template('output.html', | |
| table_html=table_html, | |
| report_type=report_type, | |
| page=page, | |
| has_prev=has_prev, | |
| has_next=has_next, | |
| chart_data=json.dumps(chart_data) | |
| ) | |
| # ------------------------------ | |
| # Download Routes | |
| # ------------------------------ | |
| def download_pred(): | |
| return send_file(PRED_OUTPUT_FILE, as_attachment=True) | |
| def download_class(): | |
| return send_file(CLASS_OUTPUT_FILE, as_attachment=True) | |
| if __name__ == "__main__": | |
| app.run(debug=True) |