Diam_Rapo_range_specific_hybrid

Sleeping

App Files Files Community

WebashalarForML commited on Apr 7

Commit

bc872ec

verified ·

1 Parent(s): 44073b3

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -58

app.py CHANGED Viewed

@@ -2,13 +2,14 @@ from flask import Flask, render_template, request, redirect, url_for, flash, sen
 import os
 import pandas as pd
 from werkzeug.utils import secure_filename
-from joblib import load
 import numpy as np
 from sklearn.preprocessing import LabelEncoder
 from time import time
 from huggingface_hub import hf_hub_download
 import pickle
-import os
 app = Flask(__name__)
@@ -22,22 +23,25 @@ MODEL_FOLDER = "models/"
 # Define the model directory and label encoder directory
 MODEL_DIR = r'./Model'
-LABEL_ENOCDER_DIR = r'./Label_encoders'
 # Global file names for outputs; these will be updated per prediction.
-PRED_OUTPUT_FILE = "data/pred_output.csv"
-CLASS_OUTPUT_FILE = "data/class_output.csv"
 ALLOWED_EXTENSIONS = {'csv', 'xlsx'}
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
-app.config['DATA_FOLDER'] = UPLOAD_FOLDER
 os.makedirs(app.config['DATA_FOLDER'], exist_ok=True)
 os.makedirs("data", exist_ok=True)
-app.config['MODEL_FOLDER'] = UPLOAD_FOLDER
 os.makedirs(app.config['MODEL_FOLDER'], exist_ok=True)
@@ -45,14 +49,12 @@ os.makedirs(app.config['MODEL_FOLDER'], exist_ok=True)
 # Load Models and Label Encoders
 # ------------------------------
-# prediction analysis
-# Download the model file to the specified location
 file_path_1 = hf_hub_download(
     repo_id="WebashalarForML/Diamond_model_",
     filename="models_list/mkble/StackingRegressor_best_pipeline_mkble_0_to_1.01.pkl",
     cache_dir=MODEL_FOLDER
 )
 with open(file_path_1, "rb") as f:
     makable_model = pickle.load(f)
@@ -61,7 +63,6 @@ file_path_2 = hf_hub_download(
     filename="models_list/grd/StackingRegressor_best_pipeline_grd_0_to_1.01.pkl",
     cache_dir=MODEL_FOLDER
 )
 with open(file_path_2, "rb") as f:
     grade_model = pickle.load(f)
@@ -70,7 +71,6 @@ file_path_3 = hf_hub_download(
     filename="models_list/bygrad/StackingRegressor_best_pipeline_bygrad_0_to_1.01.pkl",
     cache_dir=MODEL_FOLDER
 )
 with open(file_path_3, "rb") as f:
     bygrade_model = pickle.load(f)
@@ -79,16 +79,10 @@ file_path_4 = hf_hub_download(
     filename="models_list/gia/StackingRegressor_best_pipeline_gia_0_to_1.01.pkl",
     cache_dir=MODEL_FOLDER
 )
 with open(file_path_4, "rb") as f:
     gia_model = pickle.load(f)
-#gia_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_gia_price.joblib'))
-#grade_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_grade_price.joblib'))
-#bygrade_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_bygrade_price.joblib'))
-#makable_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_makable_price.joblib'))
-# classifcation analysis
 col_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_col.joblib'))
 cts_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cts.joblib'))
 cut_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cut.joblib'))
@@ -112,20 +106,25 @@ wht_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegr
 open_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_open.joblib'))
 pav_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_pav.joblib'))
-encoder_list = ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo',
-                'EngNts', 'EngMikly', 'EngLab','EngBlk', 'EngWht', 'EngOpen','EngPav',
-                'Change_cts_value', 'Change_shape_value', 'Change_quality_value', 'Change_color_value',
-                'Change_cut_value', 'Change_Blk_Eng_to_Mkbl_value', 'Change_Wht_Eng_to_Mkbl_value',
-                'Change_Open_Eng_to_Mkbl_value', 'Change_Pav_Eng_to_Mkbl_value', 'Change_Blk_Eng_to_Grd_value',
-                'Change_Wht_Eng_to_Grd_value', 'Change_Open_Eng_to_Grd_value', 'Change_Pav_Eng_to_Grd_value',
-                'Change_Blk_Eng_to_ByGrd_value', 'Change_Wht_Eng_to_ByGrd_value', 'Change_Open_Eng_to_ByGrd_value',
-                'Change_Pav_Eng_to_ByGrd_value', 'Change_Blk_Eng_to_Gia_value', 'Change_Wht_Eng_to_Gia_value',
-                'Change_Open_Eng_to_Gia_value', 'Change_Pav_Eng_to_Gia_value']
 loaded_label_encoder = {}
 for val in encoder_list:
-    encoder_path = os.path.join(LABEL_ENOCDER_DIR, f"label_encoder_{val}.joblib")
-    loaded_label_encoder[val] = load(encoder_path)
 # ------------------------------
 # Utility: Allowed File Check
@@ -144,12 +143,12 @@ def index():
 def predict():
     if 'file' not in request.files:
         flash('No file part', 'error')
-        return redirect(request.url)
     file = request.files['file']
     if file.filename == '':
         flash('No selected file', 'error')
-        return redirect(request.url)
     if file and allowed_file(file.filename):
         filename = secure_filename(file.filename)
@@ -157,23 +156,27 @@ def predict():
         file.save(filepath)
         # Convert file to DataFrame
-        if filename.endswith('.csv'):
-            df = pd.read_csv(filepath)
-        else:
-            df = pd.read_excel(filepath)
         # Process the DataFrame and generate predictions and classification analysis.
         df_pred, dx_class = process_dataframe(df)
         if df_pred.empty:
-            print("Processed prediction DataFrame is empty. Check the input file and processing logic.", "error")
-            return redirect(request.url)
-        # Save output files with a timestamp (you can also store in session if needed)
         current_date = pd.Timestamp.now().strftime("%Y-%m-%d")
         global PRED_OUTPUT_FILE, CLASS_OUTPUT_FILE
-        PRED_OUTPUT_FILE = f'data/prediction_output_{current_date}.csv'
-        CLASS_OUTPUT_FILE = f'data/classification_output_{current_date}.csv'
         df_pred.to_csv(PRED_OUTPUT_FILE, index=False)
         dx_class.to_csv(CLASS_OUTPUT_FILE, index=False)
@@ -181,11 +184,11 @@ def predict():
         return redirect(url_for('report_view', report_type='pred', page=1))
     else:
         flash('Invalid file type. Only CSV and Excel files are allowed.', 'error')
-        return redirect(request.url)
 def process_dataframe(df):
     try:
-        # Define the columns needed for two parts
         required_columns = ['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut',
                             'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngAmt']
         required_columns_2 = required_columns + ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']
@@ -196,7 +199,11 @@ def process_dataframe(df):
         # Transform categorical columns for prediction DataFrame using the label encoders.
         for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']:
-            df_pred[col] = loaded_label_encoder[col].transform(df_pred[col])
         # Update the classification DataFrame with the transformed prediction columns.
         for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']:
@@ -204,16 +211,19 @@ def process_dataframe(df):
         # Transform the extra columns in the classification DataFrame.
         for col in ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']:
-            df_class[col] = loaded_label_encoder[col].transform(df_class[col])
-        # Convert both DataFrames to float (or handle as needed).
         df_pred = df_pred.astype(float)
         df_class = df_class.astype(float)
         # -------------------------
         # Prediction Report Section
         # -------------------------
-        # Use the prediction DataFrame for price predictions.
         x = df_pred.copy()
         df_pred['GIA_Predicted'] = gia_model.predict(x)
         df_pred['Grade_Predicted'] = grade_model.predict(x)
@@ -227,7 +237,6 @@ def process_dataframe(df):
         # -------------------------
         # Classification Report Section
         # -------------------------
-        # For classification, use df_class (which has extra columns).
         x2 = df_class.copy()
         dx = df_pred.copy()  # Start with the prediction data.
         dx['col_change'] = col_model.predict(x)
@@ -275,7 +284,8 @@ def process_dataframe(df):
         dx['Change_Open_Eng_to_Gia_value'] = loaded_label_encoder['Change_Open_Eng_to_Gia_value'].inverse_transform(dx['Change_Open_Eng_to_Gia_value'])
         dx['Change_Pav_Eng_to_Gia_value'] = loaded_label_encoder['Change_Pav_Eng_to_Gia_value'].inverse_transform(dx['Change_Pav_Eng_to_Gia_value'])
-        return df_pred, dx.head(len(df_pred))  # Return full DataFrames for pagination later.
     except Exception as e:
         flash(f'Error processing file: {e}', 'error')
         return pd.DataFrame(), pd.DataFrame()
@@ -285,30 +295,26 @@ def process_dataframe(df):
 # ------------------------------
 @app.route('/report')
 def report_view():
-    # Get query parameters: report_type (pred or class) and page number.
     report_type = request.args.get('report_type', 'pred')
     try:
         page = int(request.args.get('page', 1))
     except ValueError:
         page = 1
     per_page = 15  # records per page
     # Read the appropriate CSV file.
     if report_type == 'pred':
         df = pd.read_csv(PRED_OUTPUT_FILE)
     else:
         df = pd.read_csv(CLASS_OUTPUT_FILE)
-    # Calculate pagination indices.
     start_idx = (page - 1) * per_page
     end_idx = start_idx + per_page
     total_records = len(df)
-    # Slice the DataFrame for the current page.
     df_page = df.iloc[start_idx:end_idx]
     table_html = df_page.to_html(classes="data-table", index=False)
-    # Determine if previous/next pages exist.
     has_prev = page > 1
     has_next = end_idx < total_records
@@ -320,7 +326,7 @@ def report_view():
                            has_next=has_next)
 # ------------------------------
-# Download Routes (remain unchanged)
 # ------------------------------
 @app.route('/download_pred', methods=['GET'])
 def download_pred():

 import os
 import pandas as pd
 from werkzeug.utils import secure_filename
+from joblib import load, dump
 import numpy as np
 from sklearn.preprocessing import LabelEncoder
 from time import time
 from huggingface_hub import hf_hub_download
 import pickle
+import uuid
+from pathlib import Path
 app = Flask(__name__)
 # Define the model directory and label encoder directory
 MODEL_DIR = r'./Model'
+LABEL_ENCODER_DIR = r'./Label_encoders'  # Renamed for clarity
 # Global file names for outputs; these will be updated per prediction.
+# Note: we now include a unique id to avoid overwriting.
+PRED_OUTPUT_FILE = None
+CLASS_OUTPUT_FILE = None
 ALLOWED_EXTENSIONS = {'csv', 'xlsx'}
+# Create directories if they do not exist.
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+app.config['DATA_FOLDER'] = DATA_FOLDER
 os.makedirs(app.config['DATA_FOLDER'], exist_ok=True)
 os.makedirs("data", exist_ok=True)
+app.config['MODEL_FOLDER'] = MODEL_FOLDER
 os.makedirs(app.config['MODEL_FOLDER'], exist_ok=True)
 # Load Models and Label Encoders
 # ------------------------------
+# Prediction analysis models loaded from Hugging Face.
 file_path_1 = hf_hub_download(
     repo_id="WebashalarForML/Diamond_model_",
     filename="models_list/mkble/StackingRegressor_best_pipeline_mkble_0_to_1.01.pkl",
     cache_dir=MODEL_FOLDER
 )
 with open(file_path_1, "rb") as f:
     makable_model = pickle.load(f)
     filename="models_list/grd/StackingRegressor_best_pipeline_grd_0_to_1.01.pkl",
     cache_dir=MODEL_FOLDER
 )
 with open(file_path_2, "rb") as f:
     grade_model = pickle.load(f)
     filename="models_list/bygrad/StackingRegressor_best_pipeline_bygrad_0_to_1.01.pkl",
     cache_dir=MODEL_FOLDER
 )
 with open(file_path_3, "rb") as f:
     bygrade_model = pickle.load(f)
     filename="models_list/gia/StackingRegressor_best_pipeline_gia_0_to_1.01.pkl",
     cache_dir=MODEL_FOLDER
 )
 with open(file_path_4, "rb") as f:
     gia_model = pickle.load(f)
+# Classification models loaded using joblib.
 col_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_col.joblib'))
 cts_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cts.joblib'))
 cut_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cut.joblib'))
 open_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_open.joblib'))
 pav_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_pav.joblib'))
+# List of label encoder names.
+encoder_list = [
+    'Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo',
+    'EngNts', 'EngMikly', 'EngLab','EngBlk', 'EngWht', 'EngOpen','EngPav',
+    'Change_cts_value', 'Change_shape_value', 'Change_quality_value', 'Change_color_value',
+    'Change_cut_value', 'Change_Blk_Eng_to_Mkbl_value', 'Change_Wht_Eng_to_Mkbl_value',
+    'Change_Open_Eng_to_Mkbl_value', 'Change_Pav_Eng_to_Mkbl_value', 'Change_Blk_Eng_to_Grd_value',
+    'Change_Wht_Eng_to_Grd_value', 'Change_Open_Eng_to_Grd_value', 'Change_Pav_Eng_to_Grd_value',
+    'Change_Blk_Eng_to_ByGrd_value', 'Change_Wht_Eng_to_ByGrd_value', 'Change_Open_Eng_to_ByGrd_value',
+    'Change_Pav_Eng_to_ByGrd_value', 'Change_Blk_Eng_to_Gia_value', 'Change_Wht_Eng_to_Gia_value',
+    'Change_Open_Eng_to_Gia_value', 'Change_Pav_Eng_to_Gia_value'
+]
+# Load label encoders using pathlib for cleaner path management.
 loaded_label_encoder = {}
+enc_path = Path(LABEL_ENCODER_DIR)
 for val in encoder_list:
+    encoder_file = enc_path / f"label_encoder_{val}.joblib"
+    loaded_label_encoder[val] = load(encoder_file)
 # ------------------------------
 # Utility: Allowed File Check
 def predict():
     if 'file' not in request.files:
         flash('No file part', 'error')
+        return redirect(url_for('index'))
     file = request.files['file']
     if file.filename == '':
         flash('No selected file', 'error')
+        return redirect(url_for('index'))
     if file and allowed_file(file.filename):
         filename = secure_filename(file.filename)
         file.save(filepath)
         # Convert file to DataFrame
+        try:
+            if filename.endswith('.csv'):
+                df = pd.read_csv(filepath)
+            else:
+                df = pd.read_excel(filepath)
+        except Exception as e:
+            flash(f'Error reading file: {e}', 'error')
+            return redirect(url_for('index'))
         # Process the DataFrame and generate predictions and classification analysis.
         df_pred, dx_class = process_dataframe(df)
         if df_pred.empty:
+            flash("Processed prediction DataFrame is empty. Check the input file and processing logic.", "error")
+            return redirect(url_for('index'))
+        # Save output files with a timestamp and unique id.
         current_date = pd.Timestamp.now().strftime("%Y-%m-%d")
+        unique_id = uuid.uuid4().hex[:8]
         global PRED_OUTPUT_FILE, CLASS_OUTPUT_FILE
+        PRED_OUTPUT_FILE = f'data/prediction_output_{current_date}_{unique_id}.csv'
+        CLASS_OUTPUT_FILE = f'data/classification_output_{current_date}_{unique_id}.csv'
         df_pred.to_csv(PRED_OUTPUT_FILE, index=False)
         dx_class.to_csv(CLASS_OUTPUT_FILE, index=False)
         return redirect(url_for('report_view', report_type='pred', page=1))
     else:
         flash('Invalid file type. Only CSV and Excel files are allowed.', 'error')
+        return redirect(url_for('index'))
 def process_dataframe(df):
     try:
+        # Define the columns needed for two parts.
         required_columns = ['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut',
                             'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngAmt']
         required_columns_2 = required_columns + ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']
         # Transform categorical columns for prediction DataFrame using the label encoders.
         for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']:
+            try:
+                df_pred[col] = loaded_label_encoder[col].transform(df_pred[col])
+            except ValueError as e:
+                flash(f'Invalid value in column {col}: {e}', 'error')
+                return pd.DataFrame(), pd.DataFrame()
         # Update the classification DataFrame with the transformed prediction columns.
         for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']:
         # Transform the extra columns in the classification DataFrame.
         for col in ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']:
+            try:
+                df_class[col] = loaded_label_encoder[col].transform(df_class[col])
+            except ValueError as e:
+                flash(f'Invalid value in column {col}: {e}', 'error')
+                return pd.DataFrame(), pd.DataFrame()
+        # Convert both DataFrames to float.
         df_pred = df_pred.astype(float)
         df_class = df_class.astype(float)
         # -------------------------
         # Prediction Report Section
         # -------------------------
         x = df_pred.copy()
         df_pred['GIA_Predicted'] = gia_model.predict(x)
         df_pred['Grade_Predicted'] = grade_model.predict(x)
         # -------------------------
         # Classification Report Section
         # -------------------------
         x2 = df_class.copy()
         dx = df_pred.copy()  # Start with the prediction data.
         dx['col_change'] = col_model.predict(x)
         dx['Change_Open_Eng_to_Gia_value'] = loaded_label_encoder['Change_Open_Eng_to_Gia_value'].inverse_transform(dx['Change_Open_Eng_to_Gia_value'])
         dx['Change_Pav_Eng_to_Gia_value'] = loaded_label_encoder['Change_Pav_Eng_to_Gia_value'].inverse_transform(dx['Change_Pav_Eng_to_Gia_value'])
+        # Final return with full data for pagination.
+        return df_pred, dx.head(len(df_pred))
     except Exception as e:
         flash(f'Error processing file: {e}', 'error')
         return pd.DataFrame(), pd.DataFrame()
 # ------------------------------
 @app.route('/report')
 def report_view():
     report_type = request.args.get('report_type', 'pred')
     try:
         page = int(request.args.get('page', 1))
     except ValueError:
         page = 1
     per_page = 15  # records per page
     # Read the appropriate CSV file.
     if report_type == 'pred':
         df = pd.read_csv(PRED_OUTPUT_FILE)
     else:
         df = pd.read_csv(CLASS_OUTPUT_FILE)
     start_idx = (page - 1) * per_page
     end_idx = start_idx + per_page
     total_records = len(df)
     df_page = df.iloc[start_idx:end_idx]
     table_html = df_page.to_html(classes="data-table", index=False)
     has_prev = page > 1
     has_next = end_idx < total_records
                            has_next=has_next)
 # ------------------------------
+# Download Routes
 # ------------------------------
 @app.route('/download_pred', methods=['GET'])
 def download_pred():