{ "cells": [ { "attachments": {}, "cell_type": "markdown", "id": "41bf2a51", "metadata": {}, "source": [ "# Model Building" ] }, { "cell_type": "code", "execution_count": null, "id": "dbdd8927", "metadata": {}, "outputs": [], "source": [ "!pip install imblearn\n", "!pip install matplotlib\n", "!pip install seaborn\n", "!pip install joblib\n", "!pip install feature_engine\n", "!pip install lightgbm\n", "!pip install catboost\n", "!pip install category_encoders\n", "!pip install numpy\n", "!pip install sdv\n", "!pip install optuna\n", "!pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117\n", "!pip install scikeras[tensorflow]\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "5e65a366", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import warnings\n", "warnings.simplefilter(action='ignore', category=FutureWarning)" ] }, { "cell_type": "code", "execution_count": 2, "id": "e01675b8", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('cleaned_dataset.csv')" ] }, { "cell_type": "code", "execution_count": 3, "id": "6619eae9", "metadata": {}, "outputs": [], "source": [ "popularity_threshold = df['popularity'].quantile(0.75)\n", "df['popularity_flag'] = (df['popularity'] > popularity_threshold).astype(int)" ] }, { "cell_type": "code", "execution_count": 4, "id": "3b3b3378", "metadata": {}, "outputs": [], "source": [ "df.drop(['popularity'],axis=1,inplace =True)\n", "df['explicit'] = df['explicit'].astype(int)" ] }, { "cell_type": "code", "execution_count": 5, "id": "1b2f098f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
duration_msexplicitdanceabilityenergykeyloudnessmodespeechinessacousticnessinstrumentalnesslivenessvalencetempotime_signaturetrack_genrepopularity_flag
023066600.6760.46101-6.74600.14300.03220.0000010.35800.715087.9174acoustic1
114961000.4200.16601-17.23510.07630.92400.0000060.10100.267077.4894acoustic1
221082600.4380.35900-9.73410.05570.21000.0000000.11700.120076.3324acoustic1
320193300.2660.05960-18.51510.03630.90500.0000710.13200.1430181.7403acoustic1
419885300.6180.44302-9.68110.05260.46900.0000000.08290.1670119.9494acoustic1
521424000.6880.48106-8.80710.10500.28900.0000000.18900.666098.0174acoustic1
622940000.4070.14702-8.82210.03550.85700.0000030.09130.0765141.2843acoustic1
724294600.7030.444011-9.33110.04170.55900.0000000.09730.7120150.9604acoustic1
818961300.6250.41400-8.70010.03690.29400.0000000.15100.6690130.0884acoustic1
920559400.4420.63201-6.77010.02950.42600.0041900.07350.196078.8994acoustic1
\n", "
" ], "text/plain": [ " duration_ms explicit danceability energy key loudness mode \\\n", "0 230666 0 0.676 0.4610 1 -6.746 0 \n", "1 149610 0 0.420 0.1660 1 -17.235 1 \n", "2 210826 0 0.438 0.3590 0 -9.734 1 \n", "3 201933 0 0.266 0.0596 0 -18.515 1 \n", "4 198853 0 0.618 0.4430 2 -9.681 1 \n", "5 214240 0 0.688 0.4810 6 -8.807 1 \n", "6 229400 0 0.407 0.1470 2 -8.822 1 \n", "7 242946 0 0.703 0.4440 11 -9.331 1 \n", "8 189613 0 0.625 0.4140 0 -8.700 1 \n", "9 205594 0 0.442 0.6320 1 -6.770 1 \n", "\n", " speechiness acousticness instrumentalness liveness valence tempo \\\n", "0 0.1430 0.0322 0.000001 0.3580 0.7150 87.917 \n", "1 0.0763 0.9240 0.000006 0.1010 0.2670 77.489 \n", "2 0.0557 0.2100 0.000000 0.1170 0.1200 76.332 \n", "3 0.0363 0.9050 0.000071 0.1320 0.1430 181.740 \n", "4 0.0526 0.4690 0.000000 0.0829 0.1670 119.949 \n", "5 0.1050 0.2890 0.000000 0.1890 0.6660 98.017 \n", "6 0.0355 0.8570 0.000003 0.0913 0.0765 141.284 \n", "7 0.0417 0.5590 0.000000 0.0973 0.7120 150.960 \n", "8 0.0369 0.2940 0.000000 0.1510 0.6690 130.088 \n", "9 0.0295 0.4260 0.004190 0.0735 0.1960 78.899 \n", "\n", " time_signature track_genre popularity_flag \n", "0 4 acoustic 1 \n", "1 4 acoustic 1 \n", "2 4 acoustic 1 \n", "3 3 acoustic 1 \n", "4 4 acoustic 1 \n", "5 4 acoustic 1 \n", "6 3 acoustic 1 \n", "7 4 acoustic 1 \n", "8 4 acoustic 1 \n", "9 4 acoustic 1 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(10)\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "aaa25e9a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Ratio of minority to majority class: 0.319\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
duration_msexplicitdanceabilityenergykeyloudnessmodespeechinessacousticnessinstrumentalnesslivenessvalencetempotime_signaturetrack_genre
023066600.6760.46101-6.74600.14300.03220.0000010.35800.715087.9174acoustic
114961000.4200.16601-17.23510.07630.92400.0000060.10100.267077.4894acoustic
221082600.4380.35900-9.73410.05570.21000.0000000.11700.120076.3324acoustic
320193300.2660.05960-18.51510.03630.90500.0000710.13200.1430181.7403acoustic
419885300.6180.44302-9.68110.05260.46900.0000000.08290.1670119.9494acoustic
................................................
8041338499900.1720.23505-16.39310.04220.64000.9280000.08630.0339125.9955world-music
8041438500000.1740.11700-18.31800.04010.99400.9760000.10500.035085.2394world-music
8041527146600.6290.32900-10.89500.04200.86700.0000000.08390.7430132.3784world-music
8041628389300.5870.50607-10.88910.02970.38100.0000000.27000.4130135.9604world-music
8041724182600.5260.48701-10.20400.07250.68100.0000000.08930.708079.1984world-music
\n", "

80418 rows × 15 columns

\n", "
" ], "text/plain": [ " duration_ms explicit danceability energy key loudness mode \\\n", "0 230666 0 0.676 0.4610 1 -6.746 0 \n", "1 149610 0 0.420 0.1660 1 -17.235 1 \n", "2 210826 0 0.438 0.3590 0 -9.734 1 \n", "3 201933 0 0.266 0.0596 0 -18.515 1 \n", "4 198853 0 0.618 0.4430 2 -9.681 1 \n", "... ... ... ... ... .. ... ... \n", "80413 384999 0 0.172 0.2350 5 -16.393 1 \n", "80414 385000 0 0.174 0.1170 0 -18.318 0 \n", "80415 271466 0 0.629 0.3290 0 -10.895 0 \n", "80416 283893 0 0.587 0.5060 7 -10.889 1 \n", "80417 241826 0 0.526 0.4870 1 -10.204 0 \n", "\n", " speechiness acousticness instrumentalness liveness valence \\\n", "0 0.1430 0.0322 0.000001 0.3580 0.7150 \n", "1 0.0763 0.9240 0.000006 0.1010 0.2670 \n", "2 0.0557 0.2100 0.000000 0.1170 0.1200 \n", "3 0.0363 0.9050 0.000071 0.1320 0.1430 \n", "4 0.0526 0.4690 0.000000 0.0829 0.1670 \n", "... ... ... ... ... ... \n", "80413 0.0422 0.6400 0.928000 0.0863 0.0339 \n", "80414 0.0401 0.9940 0.976000 0.1050 0.0350 \n", "80415 0.0420 0.8670 0.000000 0.0839 0.7430 \n", "80416 0.0297 0.3810 0.000000 0.2700 0.4130 \n", "80417 0.0725 0.6810 0.000000 0.0893 0.7080 \n", "\n", " tempo time_signature track_genre \n", "0 87.917 4 acoustic \n", "1 77.489 4 acoustic \n", "2 76.332 4 acoustic \n", "3 181.740 3 acoustic \n", "4 119.949 4 acoustic \n", "... ... ... ... \n", "80413 125.995 5 world-music \n", "80414 85.239 4 world-music \n", "80415 132.378 4 world-music \n", "80416 135.960 4 world-music \n", "80417 79.198 4 world-music \n", "\n", "[80418 rows x 15 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import train_test_split\n", "from collections import Counter\n", "\n", "\n", "categorical_columns = ['track_genre','key','time_signature']\n", "\n", "#specify the continuous columns to be scaled\n", "continuous_columns = ['loudness', 'duration_ms','tempo']\n", "\n", "\n", "X = df.drop(\"popularity_flag\", axis=1)\n", "y = df[\"popularity_flag\"]\n", "\n", "#convert categorical columns to type string for target encoder to work properly except for binary categorical features\n", "X[categorical_columns]=X[categorical_columns].astype(str)\n", "counter = Counter(y)\n", "# find out the ratio of minority class to majority class for smote-nc\n", "estimate = counter[1] / counter[0]\n", "print('Ratio of minority to majority class: %.3f' % estimate)\n", "\n", "\n", "# Split your data into training and testing sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=200,stratify=y)\n", "\n", "\n", "X\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "353eeea9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
num__loudnessnum__duration_msnum__tempocat__track_genrecat__keycat__time_signatureremainder__explicitremainder__danceabilityremainder__energyremainder__moderemainder__speechinessremainder__acousticnessremainder__instrumentalnessremainder__livenessremainder__valence
228320.8566550.0347840.7535610.1348010.2199270.24736400.2470.937000.09140.094800.7890000.72100.1350
790020.4526710.0132430.9321850.0493070.2370470.24736400.2820.066300.04480.989000.9300000.09850.1550
276060.7620960.0514230.5194410.3000000.2370470.24736400.4920.474010.03510.823000.0000220.08550.2700
800200.6770060.0530290.5785400.2441520.2357550.24736400.3270.269010.03390.825000.0002340.10800.0857
492030.7576740.0584430.4666930.7368420.2122960.24736400.8220.446000.03210.486000.0010200.14700.6540
................................................
112650.7130680.0841120.4824100.0079260.2533070.24736400.7440.810010.04730.024000.9070000.06060.7150
511040.8089330.0372630.3475070.1699720.2652640.24736400.8170.539000.06210.014100.0004960.09900.1580
657220.7157420.0261020.4647330.1418760.2494660.24736400.5450.335010.03000.917000.0007140.17700.3060
52180.7769110.0216030.5975310.0116880.2122960.20118400.5040.577000.03150.012800.2150000.10700.7210
412600.7389990.0322840.2389540.8073220.2533070.24736400.4610.500010.06140.002450.0300000.11600.5720
\n", "

64334 rows × 15 columns

\n", "
" ], "text/plain": [ " num__loudness num__duration_ms num__tempo cat__track_genre \\\n", "22832 0.856655 0.034784 0.753561 0.134801 \n", "79002 0.452671 0.013243 0.932185 0.049307 \n", "27606 0.762096 0.051423 0.519441 0.300000 \n", "80020 0.677006 0.053029 0.578540 0.244152 \n", "49203 0.757674 0.058443 0.466693 0.736842 \n", "... ... ... ... ... \n", "11265 0.713068 0.084112 0.482410 0.007926 \n", "51104 0.808933 0.037263 0.347507 0.169972 \n", "65722 0.715742 0.026102 0.464733 0.141876 \n", "5218 0.776911 0.021603 0.597531 0.011688 \n", "41260 0.738999 0.032284 0.238954 0.807322 \n", "\n", " cat__key cat__time_signature remainder__explicit \\\n", "22832 0.219927 0.247364 0 \n", "79002 0.237047 0.247364 0 \n", "27606 0.237047 0.247364 0 \n", "80020 0.235755 0.247364 0 \n", "49203 0.212296 0.247364 0 \n", "... ... ... ... \n", "11265 0.253307 0.247364 0 \n", "51104 0.265264 0.247364 0 \n", "65722 0.249466 0.247364 0 \n", "5218 0.212296 0.201184 0 \n", "41260 0.253307 0.247364 0 \n", "\n", " remainder__danceability remainder__energy remainder__mode \\\n", "22832 0.247 0.9370 0 \n", "79002 0.282 0.0663 0 \n", "27606 0.492 0.4740 1 \n", "80020 0.327 0.2690 1 \n", "49203 0.822 0.4460 0 \n", "... ... ... ... \n", "11265 0.744 0.8100 1 \n", "51104 0.817 0.5390 0 \n", "65722 0.545 0.3350 1 \n", "5218 0.504 0.5770 0 \n", "41260 0.461 0.5000 1 \n", "\n", " remainder__speechiness remainder__acousticness \\\n", "22832 0.0914 0.09480 \n", "79002 0.0448 0.98900 \n", "27606 0.0351 0.82300 \n", "80020 0.0339 0.82500 \n", "49203 0.0321 0.48600 \n", "... ... ... \n", "11265 0.0473 0.02400 \n", "51104 0.0621 0.01410 \n", "65722 0.0300 0.91700 \n", "5218 0.0315 0.01280 \n", "41260 0.0614 0.00245 \n", "\n", " remainder__instrumentalness remainder__liveness remainder__valence \n", "22832 0.789000 0.7210 0.1350 \n", "79002 0.930000 0.0985 0.1550 \n", "27606 0.000022 0.0855 0.2700 \n", "80020 0.000234 0.1080 0.0857 \n", "49203 0.001020 0.1470 0.6540 \n", "... ... ... ... \n", "11265 0.907000 0.0606 0.7150 \n", "51104 0.000496 0.0990 0.1580 \n", "65722 0.000714 0.1770 0.3060 \n", "5218 0.215000 0.1070 0.7210 \n", "41260 0.030000 0.1160 0.5720 \n", "\n", "[64334 rows x 15 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.compose import ColumnTransformer\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import MinMaxScaler\n", "from category_encoders import TargetEncoder\n", "\n", "categorical_columns = ['track_genre','key','time_signature']\n", "\n", "continuous_columns = ['loudness', 'duration_ms','tempo']\n", "\n", "continuous_transformer = Pipeline(\n", " steps=[(\"scaler\", MinMaxScaler())]\n", ")\n", "\n", "categorical_transformer = Pipeline(\n", " steps=[\n", " (\"encoder\", TargetEncoder()),\n", " ]\n", ")\n", "\n", "preprocessor = ColumnTransformer(\n", " transformers=[\n", " (\"num\", continuous_transformer, continuous_columns),\n", " (\"cat\", categorical_transformer, categorical_columns),\n", " ]\n", " ,remainder=\"passthrough\"\n", ")\n", "preprocessor.set_output(transform='pandas')\n", "preprocessor \n", "\n", "X_processed = preprocessor.fit_transform(X_train,y_train)\n", "X_processed" ] }, { "cell_type": "code", "execution_count": 8, "id": "cce90012", "metadata": {}, "outputs": [], "source": [ "from imblearn.over_sampling import SMOTENC\n", "\n", "smoteNC = SMOTENC(random_state=42, categorical_features=[1,4,6,13,14],n_jobs=4)\n", "smoteNC2 = SMOTENC(random_state=42, categorical_features=[3,4,5,6,9],n_jobs=4)" ] }, { "cell_type": "code", "execution_count": 9, "id": "d66743ec", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
duration_msexplicitdanceabilityenergykeymodespeechinessinstrumentalnesslivenessvalencetempotime_signaturetrack_genre
2283219742400.2470.9370900.09140.7890000.72100.1350175.1894drum-and-bass
790028494800.2820.0663000.04480.9300000.09850.1550209.5574turkish
2760628430700.4920.4740010.03510.0000220.08550.2700130.1434folk
8002029269300.3270.2690410.03390.0002340.10800.0857141.5144world-music
4920332096000.8220.4460700.03210.0010200.14700.6540119.9944k-pop
..........................................
1126545498800.7440.8100110.04730.9070000.06060.7150123.0184chicago-house
5110421036700.8170.5390600.06210.0004960.09900.158097.0624latin
6572215209300.5450.33501010.03000.0007140.17700.3060119.6174rockabilly
521812860000.5040.5770700.03150.2150000.10700.7210145.1683black-metal
4126018437000.4610.5000110.06140.0300000.11600.572076.1764hip-hop
\n", "

64334 rows × 13 columns

\n", "
" ], "text/plain": [ " duration_ms explicit danceability energy key mode speechiness \\\n", "22832 197424 0 0.247 0.9370 9 0 0.0914 \n", "79002 84948 0 0.282 0.0663 0 0 0.0448 \n", "27606 284307 0 0.492 0.4740 0 1 0.0351 \n", "80020 292693 0 0.327 0.2690 4 1 0.0339 \n", "49203 320960 0 0.822 0.4460 7 0 0.0321 \n", "... ... ... ... ... .. ... ... \n", "11265 454988 0 0.744 0.8100 1 1 0.0473 \n", "51104 210367 0 0.817 0.5390 6 0 0.0621 \n", "65722 152093 0 0.545 0.3350 10 1 0.0300 \n", "5218 128600 0 0.504 0.5770 7 0 0.0315 \n", "41260 184370 0 0.461 0.5000 1 1 0.0614 \n", "\n", " instrumentalness liveness valence tempo time_signature \\\n", "22832 0.789000 0.7210 0.1350 175.189 4 \n", "79002 0.930000 0.0985 0.1550 209.557 4 \n", "27606 0.000022 0.0855 0.2700 130.143 4 \n", "80020 0.000234 0.1080 0.0857 141.514 4 \n", "49203 0.001020 0.1470 0.6540 119.994 4 \n", "... ... ... ... ... ... \n", "11265 0.907000 0.0606 0.7150 123.018 4 \n", "51104 0.000496 0.0990 0.1580 97.062 4 \n", "65722 0.000714 0.1770 0.3060 119.617 4 \n", "5218 0.215000 0.1070 0.7210 145.168 3 \n", "41260 0.030000 0.1160 0.5720 76.176 4 \n", "\n", " track_genre \n", "22832 drum-and-bass \n", "79002 turkish \n", "27606 folk \n", "80020 world-music \n", "49203 k-pop \n", "... ... \n", "11265 chicago-house \n", "51104 latin \n", "65722 rockabilly \n", "5218 black-metal \n", "41260 hip-hop \n", "\n", "[64334 rows x 13 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from feature_engine.selection import DropCorrelatedFeatures\n", "\n", "fes = DropCorrelatedFeatures(threshold=0.6)\n", "te = fes.fit_transform(X_train,y_train)\n", "te" ] }, { "cell_type": "code", "execution_count": 10, "id": "4797c378", "metadata": {}, "outputs": [], "source": [ "\n", "from sklearn.model_selection import cross_validate\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.metrics import classification_report, confusion_matrix\n", "from sklearn.metrics import roc_auc_score\n", "from sklearn.metrics import average_precision_score,f1_score\n", "from imblearn.pipeline import Pipeline as imbpipeline\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "6c8a6fd9", "metadata": {}, "outputs": [], "source": [ "def eval_base_models(n_folds, model, X, y):\n", " cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=200)\n", " scoring = ['balanced_accuracy', 'average_precision', 'f1']\n", "\n", " results = cross_validate(model, X, y, cv=cv, scoring=scoring, n_jobs=1)\n", " balanced_acc = results['test_balanced_accuracy']\n", " ap_score = results['test_average_precision']\n", " f1_score = results['test_f1']\n", "\n", " # create a DataFrame to store the mean and standard deviation of the scores\n", " data = {\n", " 'score': ['balanced_accuracy', 'average_precision', 'f1_score'],\n", " 'mean': [balanced_acc.mean(), ap_score.mean(), f1_score.mean()],\n", " 'std': [balanced_acc.std(), ap_score.std(), f1_score.std()]\n", " }\n", " df = pd.DataFrame(data)\n", "\n", " # print the results\n", " print(f'Average precision: {ap_score.mean():.3f} (+/- {ap_score.std():.3f})')\n", " print(f'F1 score: {f1_score.mean():.3f} (+/- {f1_score.std():.3f})')\n", " print(f'Balanced accuracy: {balanced_acc.mean():.3f} (+/- {balanced_acc.std():.3f})')\n", "\n", " return df" ] }, { "attachments": {}, "cell_type": "markdown", "id": "58e00c1e", "metadata": {}, "source": [ "### Compare oversampling and feature selection technique on the performance of baseline svc model" ] }, { "cell_type": "code", "execution_count": 12, "id": "077ca8bd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Average precision: 0.630 (+/- 0.005)\n", "F1 score: 0.533 (+/- 0.008)\n", "Balanced accuracy: 0.687 (+/- 0.005)\n", "Average precision: 0.606 (+/- 0.005)\n", "F1 score: 0.596 (+/- 0.005)\n", "Balanced accuracy: 0.754 (+/- 0.004)\n", "Average precision: 0.630 (+/- 0.005)\n", "F1 score: 0.532 (+/- 0.007)\n", "Balanced accuracy: 0.687 (+/- 0.004)\n", "Average precision: 0.606 (+/- 0.005)\n", "F1 score: 0.596 (+/- 0.006)\n", "Balanced accuracy: 0.754 (+/- 0.004)\n", "Average precision: 0.627 (+/- 0.004)\n", "F1 score: 0.608 (+/- 0.006)\n", "Balanced accuracy: 0.753 (+/- 0.005)\n" ] } ], "source": [ "from sklearn.svm import LinearSVC\n", "\n", "\n", "\n", "svc = LinearSVC(dual=False)\n", "baseline_SVC = imbpipeline(\n", " [\n", " ('preprocessor', preprocessor),\n", " ('classifier', svc)\n", " ]\n", ")\n", "\n", "\n", "\n", "oversampling_SVC = imbpipeline(\n", " [\n", " ('over', smoteNC),\n", " ('preprocessor', preprocessor),\n", " ('classifier', svc)\n", " ]\n", ")\n", "\n", "feature_selction_SVC =imbpipeline(\n", " [\n", " ('preprocessor', preprocessor),\n", " ('feature_selection',fes),\n", " ('classifier', svc)\n", " ]\n", ")\n", "\n", "both_SVC = imbpipeline(\n", " [\n", " ('over', smoteNC),\n", " ('preprocessor', preprocessor),\n", " ('feature_selection',fes),\n", " ('classifier', svc)\n", " ]\n", ")\n", "\n", "oversampling2_SVC = imbpipeline(\n", " [\n", " ('preprocessor', preprocessor),\n", " ('over', smoteNC2),\n", " ('classifier', svc)\n", " ]\n", ")\n", "models = [\n", " ('baseline', baseline_SVC),\n", " ('oversampling', oversampling_SVC),\n", " ('feature_selection', feature_selction_SVC),\n", " ('both', both_SVC),\n", " ('oversampling2', oversampling2_SVC),\n", "\n", "]\n", "\n", "# initialize a list to store the results for each model\n", "results = []\n", "\n", "# iterate over the list of models\n", "for name, model in models:\n", " # evaluate the model using the eval_base_models function\n", " df = eval_base_models(5, model, X, y)\n", "\n", " # add the model name to the DataFrame\n", " df['model'] = name\n", "\n", " # add the DataFrame to the list of results\n", " results.append(df)\n", "\n", "df_combined = pd.concat(results, ignore_index=True)\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "e4c87875", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanstd
scoreaverage_precisionbalanced_accuracyf1_scoreaverage_precisionbalanced_accuracyf1_score
model
baseline0.6303970.6870340.5327100.0052170.0047770.008199
both0.6056900.7540490.5963300.0050150.0043150.005606
feature_selection0.6301990.6867200.5321650.0051090.0041340.007051
oversampling0.6059690.7542180.5964700.0051730.0035810.004911
oversampling20.6272380.7532770.6082400.0042900.0048620.006231
\n", "
" ], "text/plain": [ " mean \\\n", "score average_precision balanced_accuracy f1_score \n", "model \n", "baseline 0.630397 0.687034 0.532710 \n", "both 0.605690 0.754049 0.596330 \n", "feature_selection 0.630199 0.686720 0.532165 \n", "oversampling 0.605969 0.754218 0.596470 \n", "oversampling2 0.627238 0.753277 0.608240 \n", "\n", " std \n", "score average_precision balanced_accuracy f1_score \n", "model \n", "baseline 0.005217 0.004777 0.008199 \n", "both 0.005015 0.004315 0.005606 \n", "feature_selection 0.005109 0.004134 0.007051 \n", "oversampling 0.005173 0.003581 0.004911 \n", "oversampling2 0.004290 0.004862 0.006231 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_pivot = df_combined.pivot(index='model', columns='score')\n", "\n", "# display the pivoted DataFrame\n", "df_pivot" ] }, { "cell_type": "code", "execution_count": 14, "id": "1317e9df", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(steps=[('preprocessor',\n",
       "                 ColumnTransformer(remainder='passthrough',\n",
       "                                   transformers=[('num',\n",
       "                                                  Pipeline(steps=[('scaler',\n",
       "                                                                   MinMaxScaler())]),\n",
       "                                                  ['loudness', 'duration_ms',\n",
       "                                                   'tempo']),\n",
       "                                                 ('cat',\n",
       "                                                  Pipeline(steps=[('encoder',\n",
       "                                                                   TargetEncoder())]),\n",
       "                                                  ['track_genre', 'key',\n",
       "                                                   'time_signature'])])),\n",
       "                ('over',\n",
       "                 SMOTENC(categorical_features=[3, 4, 5, 6, 9], n_jobs=4,\n",
       "                         random_state=42)),\n",
       "                ('classifier', LinearSVC(dual=False))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('preprocessor',\n", " ColumnTransformer(remainder='passthrough',\n", " transformers=[('num',\n", " Pipeline(steps=[('scaler',\n", " MinMaxScaler())]),\n", " ['loudness', 'duration_ms',\n", " 'tempo']),\n", " ('cat',\n", " Pipeline(steps=[('encoder',\n", " TargetEncoder())]),\n", " ['track_genre', 'key',\n", " 'time_signature'])])),\n", " ('over',\n", " SMOTENC(categorical_features=[3, 4, 5, 6, 9], n_jobs=4,\n", " random_state=42)),\n", " ('classifier', LinearSVC(dual=False))])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "oversampling2_SVC" ] }, { "attachments": {}, "cell_type": "markdown", "id": "7dd05eb9", "metadata": {}, "source": [ "## Use optuna for hyperparameter tuning" ] }, { "attachments": {}, "cell_type": "markdown", "id": "2c76ac84", "metadata": {}, "source": [ "#### LinearSVC Model" ] }, { "cell_type": "code", "execution_count": 12, "id": "5bcee11c", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\zheng\\Documents\\MMU SHIT\\YEAR2\\SEM2\\DATA SCIENCE FUNDAMENTALS\\Music-Popularity-Prediction\\.venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import optuna\n", "from optuna.pruners import MedianPruner\n", "from optuna.samplers import TPESampler" ] }, { "cell_type": "code", "execution_count": 13, "id": "99edd49f", "metadata": {}, "outputs": [], "source": [ "def objective(trial,model,params,n_jobs=2):\n", " # Define the hyperparameters to optimize\n", " # Set the hyperparameters of the pipeline\n", " cv = StratifiedKFold(n_splits=5,shuffle=True,random_state=200)\n", "\n", " model.set_params(\n", " **params\n", " )\n", " scoring = ['average_precision','f1']\n", " # Evaluate the pipeline using cross-validation\n", " scores = cross_validate(model, X_train, y_train, cv=cv,scoring = scoring,n_jobs=n_jobs)\n", "\n", " return scores['test_average_precision'].mean(),scores['test_f1'].mean()" ] }, { "cell_type": "code", "execution_count": 14, "id": "7305f2d1", "metadata": {}, "outputs": [], "source": [ "def evaluate_study(model,best_params,linear=False):\n", "\n", " print(best_params)\n", " model.set_params(**best_params)\n", "\n", "\n", " model.fit(X_train, y_train)\n", "\n", " y_hat = model.predict(X_test) \n", " if linear:\n", " y_proba = model.decision_function(X_test)\n", " else:\n", " y_proba = model.predict_proba(X_test)[:,1]\n", " \n", " print(classification_report(y_test, y_hat))\n", " cm = confusion_matrix(y_test,y_hat)\n", "\n", " # cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # normalize the data\n", "\n", " # plot the heatmap\n", " sns.heatmap(cm, annot=True, fmt='d',cmap='Blues')\n", " # add labels\n", " plt.xlabel('Predicted label')\n", " plt.ylabel('True label')\n", " plt.show()\n", " print(f1_score(y_test,y_hat))\n", " print(average_precision_score(y_test,y_proba))" ] }, { "cell_type": "code", "execution_count": 17, "id": "eae129b1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m[I 2023-04-27 17:16:56,434]\u001b[0m A new study created in RDB with name: svc_study\u001b[0m\n", "\u001b[32m[I 2023-04-27 17:18:34,848]\u001b[0m Trial 0 finished with values: [0.6271378864955628, 0.60496196260026] and parameters: {'classifier__C': 0.21065760532526429, 'classifier__max_iter': 764, 'over__sampling_strategy': 0.8704123341552652, 'over__k_neighbors': 12}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:18:35,001]\u001b[0m Trial 2 finished with values: [0.6259430247417687, 0.6031775257543313] and parameters: {'classifier__C': 0.5157974012489253, 'classifier__max_iter': 431, 'over__sampling_strategy': 0.7783524651253351, 'over__k_neighbors': 10}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:18:45,612]\u001b[0m Trial 3 finished with values: [0.6253452764179211, 0.6040863263964994] and parameters: {'classifier__C': 0.8805440603466927, 'classifier__max_iter': 391, 'over__sampling_strategy': 0.7941995707563849, 'over__k_neighbors': 25}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:18:45,666]\u001b[0m Trial 1 finished with values: [0.6253452764179211, 0.6040863263964994] and parameters: {'classifier__C': 0.7929614191569014, 'classifier__max_iter': 465, 'over__sampling_strategy': 0.8873521413202637, 'over__k_neighbors': 23}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:20:13,020]\u001b[0m Trial 4 finished with values: [0.6282477173769774, 0.5934056220405848] and parameters: {'classifier__C': 0.9301320685511987, 'classifier__max_iter': 782, 'over__sampling_strategy': 0.8138286759385291, 'over__k_neighbors': 8}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:20:13,818]\u001b[0m Trial 5 finished with values: [0.6282204635529418, 0.5929281899674224] and parameters: {'classifier__C': 0.7358484787427078, 'classifier__max_iter': 311, 'over__sampling_strategy': 0.7595946974431383, 'over__k_neighbors': 10}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:20:22,416]\u001b[0m Trial 6 finished with values: [0.6301406492411022, 0.5963181056944885] and parameters: {'classifier__C': 0.37654959368271695, 'classifier__max_iter': 515, 'over__sampling_strategy': 0.9580509861222035, 'over__k_neighbors': 2}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:20:22,863]\u001b[0m Trial 7 finished with values: [0.6307630488668194, 0.5486846695802206] and parameters: {'classifier__C': 0.20073764010325076, 'classifier__max_iter': 114, 'over__sampling_strategy': 0.35447980891581615, 'over__k_neighbors': 17}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:21:54,548]\u001b[0m Trial 8 finished with values: [0.6262782700972777, 0.6039828139345402] and parameters: {'classifier__C': 0.5723261412671994, 'classifier__max_iter': 484, 'over__sampling_strategy': 0.8837153043865249, 'over__k_neighbors': 19}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:21:54,733]\u001b[0m Trial 9 finished with values: [0.627348416651848, 0.5991734811719478] and parameters: {'classifier__C': 0.5833274778004167, 'classifier__max_iter': 389, 'over__sampling_strategy': 0.6781258602899781, 'over__k_neighbors': 28}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:22:04,155]\u001b[0m Trial 10 finished with values: [0.6249369481674698, 0.6048012295582523] and parameters: {'classifier__C': 0.8913646386789609, 'classifier__max_iter': 794, 'over__sampling_strategy': 0.9963853433976897, 'over__k_neighbors': 26}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:22:04,908]\u001b[0m Trial 11 finished with values: [0.628783847591096, 0.5956836304551931] and parameters: {'classifier__C': 0.7996602985543773, 'classifier__max_iter': 460, 'over__sampling_strategy': 0.5623434195828432, 'over__k_neighbors': 17}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:23:42,557]\u001b[0m Trial 12 finished with values: [0.6277103148265692, 0.6046972576557923] and parameters: {'classifier__C': 0.3842787765811287, 'classifier__max_iter': 106, 'over__sampling_strategy': 0.7306208161011875, 'over__k_neighbors': 14}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:23:43,324]\u001b[0m Trial 13 finished with values: [0.6298378789188432, 0.6077943051709517] and parameters: {'classifier__C': 0.2274287204137743, 'classifier__max_iter': 958, 'over__sampling_strategy': 0.9828399128358947, 'over__k_neighbors': 2}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:23:52,989]\u001b[0m Trial 14 finished with values: [0.6298008505361146, 0.6078877260932798] and parameters: {'classifier__C': 0.20208391320743763, 'classifier__max_iter': 986, 'over__sampling_strategy': 0.9980598524526756, 'over__k_neighbors': 1}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:23:53,943]\u001b[0m Trial 15 finished with values: [0.6288835375745078, 0.6077027674725691] and parameters: {'classifier__C': 0.20159948785739282, 'classifier__max_iter': 964, 'over__sampling_strategy': 0.9873481559692521, 'over__k_neighbors': 3}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:25:22,158]\u001b[0m Trial 16 finished with values: [0.6301839989502371, 0.5963195854172794] and parameters: {'classifier__C': 0.3843755207007472, 'classifier__max_iter': 112, 'over__sampling_strategy': 0.5635989840032031, 'over__k_neighbors': 3}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:25:23,175]\u001b[0m Trial 17 finished with values: [0.6307695712010175, 0.5611137807657134] and parameters: {'classifier__C': 0.2369386083671055, 'classifier__max_iter': 949, 'over__sampling_strategy': 0.36475850665972787, 'over__k_neighbors': 2}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:25:33,098]\u001b[0m Trial 18 finished with values: [0.6307638553736291, 0.5608749882360707] and parameters: {'classifier__C': 0.6703968104752966, 'classifier__max_iter': 954, 'over__sampling_strategy': 0.3640576217745692, 'over__k_neighbors': 5}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:25:33,769]\u001b[0m Trial 19 finished with values: [0.6301211263208712, 0.5877026518438576] and parameters: {'classifier__C': 0.43459690823061076, 'classifier__max_iter': 653, 'over__sampling_strategy': 0.4827431237652612, 'over__k_neighbors': 7}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:27:03,014]\u001b[0m Trial 20 finished with values: [0.6307808034539106, 0.5614467531211542] and parameters: {'classifier__C': 0.6499140003222595, 'classifier__max_iter': 648, 'over__sampling_strategy': 0.35944797674461365, 'over__k_neighbors': 6}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:27:03,866]\u001b[0m Trial 21 finished with values: [0.6286231982247419, 0.6062646879506349] and parameters: {'classifier__C': 0.29743720890195174, 'classifier__max_iter': 877, 'over__sampling_strategy': 0.9245429158927033, 'over__k_neighbors': 6}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:27:13,023]\u001b[0m Trial 22 finished with values: [0.6286443746713253, 0.6063107370182246] and parameters: {'classifier__C': 0.30278636027382355, 'classifier__max_iter': 667, 'over__sampling_strategy': 0.9172899483579131, 'over__k_neighbors': 6}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:27:13,871]\u001b[0m Trial 23 finished with values: [0.630316294718609, 0.6038237933521561] and parameters: {'classifier__C': 0.2952391353745413, 'classifier__max_iter': 853, 'over__sampling_strategy': 0.7030145869221363, 'over__k_neighbors': 1}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:28:42,169]\u001b[0m Trial 24 finished with values: [0.6295765335657215, 0.6009404295527544] and parameters: {'classifier__C': 0.29846267977500085, 'classifier__max_iter': 630, 'over__sampling_strategy': 0.608453900184778, 'over__k_neighbors': 6}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:28:42,955]\u001b[0m Trial 25 finished with values: [0.630346870523415, 0.6018137168471782] and parameters: {'classifier__C': 0.29284985753795084, 'classifier__max_iter': 619, 'over__sampling_strategy': 0.6249284215654641, 'over__k_neighbors': 1}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:28:52,741]\u001b[0m Trial 26 finished with values: [0.6292130879983919, 0.6062163858570992] and parameters: {'classifier__C': 0.30545944181831314, 'classifier__max_iter': 626, 'over__sampling_strategy': 0.8298069941031712, 'over__k_neighbors': 4}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:28:52,760]\u001b[0m Trial 27 finished with values: [0.6301014622420722, 0.6065469871950416] and parameters: {'classifier__C': 0.9978724186832753, 'classifier__max_iter': 880, 'over__sampling_strategy': 0.8286291465137434, 'over__k_neighbors': 1}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:29:44,814]\u001b[0m Trial 28 finished with values: [0.6263172807194213, 0.6044964798439986] and parameters: {'classifier__C': 0.6502553839111899, 'classifier__max_iter': 867, 'over__sampling_strategy': 0.8246162989125656, 'over__k_neighbors': 21}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:29:45,269]\u001b[0m Trial 29 finished with values: [0.6277401859010434, 0.6048636542394473] and parameters: {'classifier__C': 0.4795770582032709, 'classifier__max_iter': 260, 'over__sampling_strategy': 0.8304895880696732, 'over__k_neighbors': 9}. \u001b[0m\n" ] } ], "source": [ "from sklearn.svm import LinearSVC\n", "\n", "\n", "\n", "svc = LinearSVC(dual=False)\n", "pipe_SVC = imbpipeline(\n", " [ \n", " ('preprocessor', preprocessor),\n", "\n", " ('over',smoteNC2),\n", "\n", " ('classifier',svc)\n", " ]\n", " )\n", "def create_svcspace(trial):\n", " return {\n", " 'classifier__C': trial.suggest_float(\"classifier__C\", 0.2, 1),\n", " 'classifier__max_iter': trial.suggest_int('classifier__max_iter', 100, 1000),\n", " 'over__sampling_strategy': trial.suggest_float(\"over__sampling_strategy\", 0.35, 1.00),\n", " 'over__k_neighbors': trial.suggest_int('over__k_neighbors', 1, 30),\n", " }\n", "svc_func = lambda trial: objective(trial,pipe_SVC,create_svcspace(trial))\n", "\n", "svc_study = optuna.create_study(directions=['maximize','maximize'],pruner=MedianPruner(),study_name='svc_study',storage='sqlite:///my_study.db',sampler = TPESampler(seed=100))\n", "svc_study.optimize(svc_func, n_trials=30,n_jobs=4)\n", "# # study = optuna.load_study(study_name=\"svc_study\", storage=\"sqlite:///my_study.db\")\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 42, "id": "bbe09c19", "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "hovertemplate": "%{text}Trial", "marker": { "color": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 17, 18, 19, 21, 22, 24, 26, 28, 29 ], "colorbar": { "title": { "text": "Trial" } }, "colorscale": [ [ 0, "rgb(247,251,255)" ], [ 0.125, "rgb(222,235,247)" ], [ 0.25, "rgb(198,219,239)" ], [ 0.375, "rgb(158,202,225)" ], [ 0.5, "rgb(107,174,214)" ], [ 0.625, "rgb(66,146,198)" ], [ 0.75, "rgb(33,113,181)" ], [ 0.875, "rgb(8,81,156)" ], [ 1, "rgb(8,48,107)" ] ], "line": { "color": "Grey", "width": 0.5 } }, "mode": "markers", "showlegend": false, "text": [ "{
\"number\": 0,
\"values\": [
0.6271378864955628,
0.60496196260026
],
\"params\": {
\"classifier__C\": 0.21065760532526429,
\"classifier__max_iter\": 764,
\"over__k_neighbors\": 12,
\"over__sampling_strategy\": 0.8704123341552652
}
}", "{
\"number\": 1,
\"values\": [
0.6253452764179211,
0.6040863263964994
],
\"params\": {
\"classifier__C\": 0.7929614191569014,
\"classifier__max_iter\": 465,
\"over__k_neighbors\": 23,
\"over__sampling_strategy\": 0.8873521413202637
}
}", "{
\"number\": 2,
\"values\": [
0.6259430247417687,
0.6031775257543313
],
\"params\": {
\"classifier__C\": 0.5157974012489253,
\"classifier__max_iter\": 431,
\"over__k_neighbors\": 10,
\"over__sampling_strategy\": 0.7783524651253351
}
}", "{
\"number\": 3,
\"values\": [
0.6253452764179211,
0.6040863263964994
],
\"params\": {
\"classifier__C\": 0.8805440603466927,
\"classifier__max_iter\": 391,
\"over__k_neighbors\": 25,
\"over__sampling_strategy\": 0.7941995707563849
}
}", "{
\"number\": 4,
\"values\": [
0.6282477173769774,
0.5934056220405848
],
\"params\": {
\"classifier__C\": 0.9301320685511987,
\"classifier__max_iter\": 782,
\"over__k_neighbors\": 8,
\"over__sampling_strategy\": 0.8138286759385291
}
}", "{
\"number\": 5,
\"values\": [
0.6282204635529418,
0.5929281899674224
],
\"params\": {
\"classifier__C\": 0.7358484787427078,
\"classifier__max_iter\": 311,
\"over__k_neighbors\": 10,
\"over__sampling_strategy\": 0.7595946974431383
}
}", "{
\"number\": 6,
\"values\": [
0.6301406492411022,
0.5963181056944885
],
\"params\": {
\"classifier__C\": 0.37654959368271695,
\"classifier__max_iter\": 515,
\"over__k_neighbors\": 2,
\"over__sampling_strategy\": 0.9580509861222035
}
}", "{
\"number\": 7,
\"values\": [
0.6307630488668194,
0.5486846695802206
],
\"params\": {
\"classifier__C\": 0.20073764010325076,
\"classifier__max_iter\": 114,
\"over__k_neighbors\": 17,
\"over__sampling_strategy\": 0.35447980891581615
}
}", "{
\"number\": 8,
\"values\": [
0.6262782700972777,
0.6039828139345402
],
\"params\": {
\"classifier__C\": 0.5723261412671994,
\"classifier__max_iter\": 484,
\"over__k_neighbors\": 19,
\"over__sampling_strategy\": 0.8837153043865249
}
}", "{
\"number\": 9,
\"values\": [
0.627348416651848,
0.5991734811719478
],
\"params\": {
\"classifier__C\": 0.5833274778004167,
\"classifier__max_iter\": 389,
\"over__k_neighbors\": 28,
\"over__sampling_strategy\": 0.6781258602899781
}
}", "{
\"number\": 10,
\"values\": [
0.6249369481674698,
0.6048012295582523
],
\"params\": {
\"classifier__C\": 0.8913646386789609,
\"classifier__max_iter\": 794,
\"over__k_neighbors\": 26,
\"over__sampling_strategy\": 0.9963853433976897
}
}", "{
\"number\": 11,
\"values\": [
0.628783847591096,
0.5956836304551931
],
\"params\": {
\"classifier__C\": 0.7996602985543773,
\"classifier__max_iter\": 460,
\"over__k_neighbors\": 17,
\"over__sampling_strategy\": 0.5623434195828432
}
}", "{
\"number\": 12,
\"values\": [
0.6277103148265692,
0.6046972576557923
],
\"params\": {
\"classifier__C\": 0.3842787765811287,
\"classifier__max_iter\": 106,
\"over__k_neighbors\": 14,
\"over__sampling_strategy\": 0.7306208161011875
}
}", "{
\"number\": 15,
\"values\": [
0.6288835375745078,
0.6077027674725691
],
\"params\": {
\"classifier__C\": 0.20159948785739282,
\"classifier__max_iter\": 964,
\"over__k_neighbors\": 3,
\"over__sampling_strategy\": 0.9873481559692521
}
}", "{
\"number\": 16,
\"values\": [
0.6301839989502371,
0.5963195854172794
],
\"params\": {
\"classifier__C\": 0.3843755207007472,
\"classifier__max_iter\": 112,
\"over__k_neighbors\": 3,
\"over__sampling_strategy\": 0.5635989840032031
}
}", "{
\"number\": 17,
\"values\": [
0.6307695712010175,
0.5611137807657134
],
\"params\": {
\"classifier__C\": 0.2369386083671055,
\"classifier__max_iter\": 949,
\"over__k_neighbors\": 2,
\"over__sampling_strategy\": 0.36475850665972787
}
}", "{
\"number\": 18,
\"values\": [
0.6307638553736291,
0.5608749882360707
],
\"params\": {
\"classifier__C\": 0.6703968104752966,
\"classifier__max_iter\": 954,
\"over__k_neighbors\": 5,
\"over__sampling_strategy\": 0.3640576217745692
}
}", "{
\"number\": 19,
\"values\": [
0.6301211263208712,
0.5877026518438576
],
\"params\": {
\"classifier__C\": 0.43459690823061076,
\"classifier__max_iter\": 653,
\"over__k_neighbors\": 7,
\"over__sampling_strategy\": 0.4827431237652612
}
}", "{
\"number\": 21,
\"values\": [
0.6286231982247419,
0.6062646879506349
],
\"params\": {
\"classifier__C\": 0.29743720890195174,
\"classifier__max_iter\": 877,
\"over__k_neighbors\": 6,
\"over__sampling_strategy\": 0.9245429158927033
}
}", "{
\"number\": 22,
\"values\": [
0.6286443746713253,
0.6063107370182246
],
\"params\": {
\"classifier__C\": 0.30278636027382355,
\"classifier__max_iter\": 667,
\"over__k_neighbors\": 6,
\"over__sampling_strategy\": 0.9172899483579131
}
}", "{
\"number\": 24,
\"values\": [
0.6295765335657215,
0.6009404295527544
],
\"params\": {
\"classifier__C\": 0.29846267977500085,
\"classifier__max_iter\": 630,
\"over__k_neighbors\": 6,
\"over__sampling_strategy\": 0.608453900184778
}
}", "{
\"number\": 26,
\"values\": [
0.6292130879983919,
0.6062163858570992
],
\"params\": {
\"classifier__C\": 0.30545944181831314,
\"classifier__max_iter\": 626,
\"over__k_neighbors\": 4,
\"over__sampling_strategy\": 0.8298069941031712
}
}", "{
\"number\": 28,
\"values\": [
0.6263172807194213,
0.6044964798439986
],
\"params\": {
\"classifier__C\": 0.6502553839111899,
\"classifier__max_iter\": 867,
\"over__k_neighbors\": 21,
\"over__sampling_strategy\": 0.8246162989125656
}
}", "{
\"number\": 29,
\"values\": [
0.6277401859010434,
0.6048636542394473
],
\"params\": {
\"classifier__C\": 0.4795770582032709,
\"classifier__max_iter\": 260,
\"over__k_neighbors\": 9,
\"over__sampling_strategy\": 0.8304895880696732
}
}" ], "type": "scatter", "x": [ 0.6271378864955628, 0.6253452764179211, 0.6259430247417687, 0.6253452764179211, 0.6282477173769774, 0.6282204635529418, 0.6301406492411022, 0.6307630488668194, 0.6262782700972777, 0.627348416651848, 0.6249369481674698, 0.628783847591096, 0.6277103148265692, 0.6288835375745078, 0.6301839989502371, 0.6307695712010175, 0.6307638553736291, 0.6301211263208712, 0.6286231982247419, 0.6286443746713253, 0.6295765335657215, 0.6292130879983919, 0.6263172807194213, 0.6277401859010434 ], "y": [ 0.60496196260026, 0.6040863263964994, 0.6031775257543313, 0.6040863263964994, 0.5934056220405848, 0.5929281899674224, 0.5963181056944885, 0.5486846695802206, 0.6039828139345402, 0.5991734811719478, 0.6048012295582523, 0.5956836304551931, 0.6046972576557923, 0.6077027674725691, 0.5963195854172794, 0.5611137807657134, 0.5608749882360707, 0.5877026518438576, 0.6062646879506349, 0.6063107370182246, 0.6009404295527544, 0.6062163858570992, 0.6044964798439986, 0.6048636542394473 ] }, { "hovertemplate": "%{text}Best Trial", "marker": { "color": [ 13, 14, 20, 23, 25, 27 ], "colorbar": { "title": { "text": "Best Trial" }, "x": 1.1, "xpad": 40 }, "colorscale": [ [ 0, "rgb(255,245,240)" ], [ 0.125, "rgb(254,224,210)" ], [ 0.25, "rgb(252,187,161)" ], [ 0.375, "rgb(252,146,114)" ], [ 0.5, "rgb(251,106,74)" ], [ 0.625, "rgb(239,59,44)" ], [ 0.75, "rgb(203,24,29)" ], [ 0.875, "rgb(165,15,21)" ], [ 1, "rgb(103,0,13)" ] ], "line": { "color": "Grey", "width": 0.5 } }, "mode": "markers", "showlegend": false, "text": [ "{
\"number\": 13,
\"values\": [
0.6298378789188432,
0.6077943051709517
],
\"params\": {
\"classifier__C\": 0.2274287204137743,
\"classifier__max_iter\": 958,
\"over__k_neighbors\": 2,
\"over__sampling_strategy\": 0.9828399128358947
}
}", "{
\"number\": 14,
\"values\": [
0.6298008505361146,
0.6078877260932798
],
\"params\": {
\"classifier__C\": 0.20208391320743763,
\"classifier__max_iter\": 986,
\"over__k_neighbors\": 1,
\"over__sampling_strategy\": 0.9980598524526756
}
}", "{
\"number\": 20,
\"values\": [
0.6307808034539106,
0.5614467531211542
],
\"params\": {
\"classifier__C\": 0.6499140003222595,
\"classifier__max_iter\": 648,
\"over__k_neighbors\": 6,
\"over__sampling_strategy\": 0.35944797674461365
}
}", "{
\"number\": 23,
\"values\": [
0.630316294718609,
0.6038237933521561
],
\"params\": {
\"classifier__C\": 0.2952391353745413,
\"classifier__max_iter\": 853,
\"over__k_neighbors\": 1,
\"over__sampling_strategy\": 0.7030145869221363
}
}", "{
\"number\": 25,
\"values\": [
0.630346870523415,
0.6018137168471782
],
\"params\": {
\"classifier__C\": 0.29284985753795084,
\"classifier__max_iter\": 619,
\"over__k_neighbors\": 1,
\"over__sampling_strategy\": 0.6249284215654641
}
}", "{
\"number\": 27,
\"values\": [
0.6301014622420722,
0.6065469871950416
],
\"params\": {
\"classifier__C\": 0.9978724186832753,
\"classifier__max_iter\": 880,
\"over__k_neighbors\": 1,
\"over__sampling_strategy\": 0.8286291465137434
}
}" ], "type": "scatter", "x": [ 0.6298378789188432, 0.6298008505361146, 0.6307808034539106, 0.630316294718609, 0.630346870523415, 0.6301014622420722 ], "y": [ 0.6077943051709517, 0.6078877260932798, 0.5614467531211542, 0.6038237933521561, 0.6018137168471782, 0.6065469871950416 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Pareto-front Plot" }, "xaxis": { "title": { "text": "Mean PR AUC" } }, "yaxis": { "title": { "text": "Mean F1" } } } } }, "metadata": {}, "output_type": "display_data" } ], "source": [ "svc_study = optuna.load_study(study_name=\"svc_study\", storage=\"sqlite:///my_study.db\")\n", "optuna.visualization.plot_pareto_front(svc_study, target_names=[\"Mean PR AUC\", \"Mean F1\"])" ] }, { "cell_type": "code", "execution_count": 81, "id": "33e5c2b8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'classifier__C': 0.20159948785739282, 'classifier__max_iter': 964, 'over__k_neighbors': 3, 'over__sampling_strategy': 0.9873481559692521}\n", " precision recall f1-score support\n", "\n", " 0 0.90 0.81 0.85 12191\n", " 1 0.54 0.70 0.61 3893\n", "\n", " accuracy 0.78 16084\n", " macro avg 0.72 0.76 0.73 16084\n", "weighted avg 0.81 0.78 0.79 16084\n", "\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "0.6118384245272463\n", "0.627416495069592\n" ] } ], "source": [ "svc_best_params = svc_study.trials[15].params\n", "\n", "evaluate_study(pipe_SVC,svc_best_params,linear=True)\n" ] }, { "attachments": {}, "cell_type": "markdown", "id": "461e0bbb", "metadata": {}, "source": [ "#### RandomForestClassifier Model" ] }, { "cell_type": "code", "execution_count": 21, "id": "4916b451", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m[I 2023-04-27 17:32:00,581]\u001b[0m A new study created in RDB with name: rfc_study\u001b[0m\n", "\u001b[32m[I 2023-04-27 17:35:52,499]\u001b[0m Trial 0 finished with values: [0.6457784860397402, 0.6187270393406835] and parameters: {'classifier__n_estimators': 146, 'classifier__max_depth': 4, 'classifier__min_samples_split': 18, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'log2', 'classifier__criterion': 'gini', 'over__sampling_strategy': 0.5365781966134286, 'over__k_neighbors': 19}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:35:55,681]\u001b[0m Trial 1 finished with values: [0.6455393332818595, 0.6188688022101637] and parameters: {'classifier__n_estimators': 845, 'classifier__max_depth': 15, 'classifier__min_samples_split': 18, 'classifier__min_samples_leaf': 20, 'classifier__max_features': 'log2', 'classifier__criterion': 'gini', 'over__sampling_strategy': 0.589950434213136, 'over__k_neighbors': 27}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:37:28,900]\u001b[0m Trial 2 finished with values: [0.6158567030498819, 0.6152848033012353] and parameters: {'classifier__n_estimators': 141, 'classifier__max_depth': 9, 'classifier__min_samples_split': 13, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.5958417647731421, 'over__k_neighbors': 17}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:37:30,656]\u001b[0m Trial 3 finished with values: [0.5398945879747302, 0.6015517044276091] and parameters: {'classifier__n_estimators': 971, 'classifier__max_depth': 2, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'log2', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.9218837346358962, 'over__k_neighbors': 14}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:39:33,674]\u001b[0m Trial 4 finished with values: [0.6581801490909269, 0.608470030275935] and parameters: {'classifier__n_estimators': 195, 'classifier__max_depth': 16, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 14, 'classifier__max_features': 'log2', 'classifier__criterion': 'gini', 'over__sampling_strategy': 0.42898528190525675, 'over__k_neighbors': 7}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:39:35,354]\u001b[0m Trial 5 finished with values: [0.6589574334035359, 0.6089957694795349] and parameters: {'classifier__n_estimators': 532, 'classifier__max_depth': 20, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 17, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'gini', 'over__sampling_strategy': 0.43873208266745045, 'over__k_neighbors': 14}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:42:28,285]\u001b[0m Trial 6 finished with values: [0.6505599390501532, 0.624233752040281] and parameters: {'classifier__n_estimators': 220, 'classifier__max_depth': 20, 'classifier__min_samples_split': 13, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'log2', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.6178764808588074, 'over__k_neighbors': 14}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:42:31,250]\u001b[0m Trial 7 finished with values: [0.6379579286393324, 0.6224983697745267] and parameters: {'classifier__n_estimators': 656, 'classifier__max_depth': 11, 'classifier__min_samples_split': 10, 'classifier__min_samples_leaf': 15, 'classifier__max_features': 'log2', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.7139119140769485, 'over__k_neighbors': 12}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:44:19,052]\u001b[0m Trial 8 finished with values: [0.5624099817641309, 0.6062502673775031] and parameters: {'classifier__n_estimators': 619, 'classifier__max_depth': 2, 'classifier__min_samples_split': 17, 'classifier__min_samples_leaf': 18, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.8932461021013373, 'over__k_neighbors': 25}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:44:22,124]\u001b[0m Trial 9 finished with values: [0.647871554226571, 0.6272805974697734] and parameters: {'classifier__n_estimators': 294, 'classifier__max_depth': 17, 'classifier__min_samples_split': 6, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'gini', 'over__sampling_strategy': 0.6817578152556343, 'over__k_neighbors': 11}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:45:51,804]\u001b[0m Trial 10 finished with values: [0.5259406528405167, 0.5620287996142432] and parameters: {'classifier__n_estimators': 606, 'classifier__max_depth': 1, 'classifier__min_samples_split': 14, 'classifier__min_samples_leaf': 17, 'classifier__max_features': 'log2', 'classifier__criterion': 'gini', 'over__sampling_strategy': 0.8424753505080824, 'over__k_neighbors': 6}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:45:54,043]\u001b[0m Trial 11 finished with values: [0.6525617070849642, 0.5606514491596611] and parameters: {'classifier__n_estimators': 411, 'classifier__max_depth': 10, 'classifier__min_samples_split': 13, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'log2', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.3583905920374773, 'over__k_neighbors': 2}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:48:21,164]\u001b[0m Trial 12 finished with values: [0.6668654803559559, 0.5915465896102717] and parameters: {'classifier__n_estimators': 358, 'classifier__max_depth': 20, 'classifier__min_samples_split': 10, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.3547768896580491, 'over__k_neighbors': 21}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:48:24,410]\u001b[0m Trial 13 finished with values: [0.6583877591807694, 0.610637110065188] and parameters: {'classifier__n_estimators': 407, 'classifier__max_depth': 20, 'classifier__min_samples_split': 9, 'classifier__min_samples_leaf': 11, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.4733055211643861, 'over__k_neighbors': 21}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:51:02,537]\u001b[0m Trial 14 finished with values: [0.6435582016647413, 0.6255286751780276] and parameters: {'classifier__n_estimators': 314, 'classifier__max_depth': 20, 'classifier__min_samples_split': 9, 'classifier__min_samples_leaf': 5, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.7952207723683082, 'over__k_neighbors': 22}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:51:06,863]\u001b[0m Trial 15 finished with values: [0.6338362404935031, 0.6205946355129283] and parameters: {'classifier__n_estimators': 294, 'classifier__max_depth': 13, 'classifier__min_samples_split': 8, 'classifier__min_samples_leaf': 4, 'classifier__max_features': 'log2', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.9990937099884252, 'over__k_neighbors': 23}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:53:05,108]\u001b[0m Trial 16 finished with values: [0.626635156917178, 0.6195240635679194] and parameters: {'classifier__n_estimators': 280, 'classifier__max_depth': 13, 'classifier__min_samples_split': 16, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'log2', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.9815724136110824, 'over__k_neighbors': 30}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:53:08,490]\u001b[0m Trial 17 finished with values: [0.6135718897254712, 0.6110960409710786] and parameters: {'classifier__n_estimators': 459, 'classifier__max_depth': 6, 'classifier__min_samples_split': 16, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'log2', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.6194991144490641, 'over__k_neighbors': 30}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:57:01,273]\u001b[0m Trial 18 finished with values: [0.6304722385136827, 0.6179047547972963] and parameters: {'classifier__n_estimators': 457, 'classifier__max_depth': 8, 'classifier__min_samples_split': 12, 'classifier__min_samples_leaf': 11, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.7227304249186857, 'over__k_neighbors': 18}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 17:57:07,364]\u001b[0m Trial 19 finished with values: [0.6486985656754299, 0.6288791063338287] and parameters: {'classifier__n_estimators': 746, 'classifier__max_depth': 18, 'classifier__min_samples_split': 12, 'classifier__min_samples_leaf': 10, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.7272211189665856, 'over__k_neighbors': 9}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:01:58,465]\u001b[0m Trial 20 finished with values: [0.6570914227372827, 0.6223681187066623] and parameters: {'classifier__n_estimators': 756, 'classifier__max_depth': 18, 'classifier__min_samples_split': 11, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.5216865704678908, 'over__k_neighbors': 8}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:02:07,594]\u001b[0m Trial 21 finished with values: [0.6492495856530484, 0.6285538705535771] and parameters: {'classifier__n_estimators': 758, 'classifier__max_depth': 18, 'classifier__min_samples_split': 11, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.7530444727082267, 'over__k_neighbors': 8}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:05:40,125]\u001b[0m Trial 22 finished with values: [0.6675140939925557, 0.5842531426932009] and parameters: {'classifier__n_estimators': 728, 'classifier__max_depth': 18, 'classifier__min_samples_split': 11, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.3543632598916632, 'over__k_neighbors': 9}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:05:53,444]\u001b[0m Trial 23 finished with values: [0.6681641126887758, 0.5842433909874318] and parameters: {'classifier__n_estimators': 757, 'classifier__max_depth': 18, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.35219232617527685, 'over__k_neighbors': 3}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:10:01,202]\u001b[0m Trial 24 finished with values: [0.6573525280866581, 0.6159498878314016] and parameters: {'classifier__n_estimators': 862, 'classifier__max_depth': 14, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.49538874444690467, 'over__k_neighbors': 4}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:10:17,626]\u001b[0m Trial 25 finished with values: [0.6597957569716394, 0.6188639788680736] and parameters: {'classifier__n_estimators': 881, 'classifier__max_depth': 15, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.49911191560552626, 'over__k_neighbors': 1}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:14:30,405]\u001b[0m Trial 26 finished with values: [0.658094277264654, 0.6214757135849159] and parameters: {'classifier__n_estimators': 820, 'classifier__max_depth': 17, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 13, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.5516450253093519, 'over__k_neighbors': 1}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:14:51,144]\u001b[0m Trial 27 finished with values: [0.6559723199421449, 0.6204850899021139] and parameters: {'classifier__n_estimators': 789, 'classifier__max_depth': 18, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 13, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.5357368739946705, 'over__k_neighbors': 4}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:19:15,815]\u001b[0m Trial 28 finished with values: [0.6485791873797704, 0.621370566743404] and parameters: {'classifier__n_estimators': 987, 'classifier__max_depth': 12, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 13, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.5563957091557683, 'over__k_neighbors': 4}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:19:39,098]\u001b[0m Trial 29 finished with values: [0.6452819707115311, 0.6233399945710498] and parameters: {'classifier__n_estimators': 985, 'classifier__max_depth': 12, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 12, 'classifier__max_features': 'sqrt', 'classifier__criterion': 'entropy', 'over__sampling_strategy': 0.6649391677270532, 'over__k_neighbors': 4}. \u001b[0m\n" ] } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "\n", "\n", "pipe_RFC = imbpipeline(\n", " [\n", "\n", "\n", " ('preprocessor',preprocessor),\n", " ('over',smoteNC2),\n", " ('classifier',RandomForestClassifier(n_jobs=4))\n", " ]\n", " )\n", "\n", "\n", "def rfc_space(trial):\n", " return {\n", " 'classifier__n_estimators': trial.suggest_int('classifier__n_estimators', 100, 1000),\n", " 'classifier__max_depth': trial.suggest_int('classifier__max_depth', 1, 20),\n", " 'classifier__min_samples_split': trial.suggest_int('classifier__min_samples_split', 2, 20),\n", " 'classifier__min_samples_leaf': trial.suggest_int('classifier__min_samples_leaf', 1, 20),\n", " 'classifier__max_features': trial.suggest_categorical('classifier__max_features', ['sqrt', 'log2']),\n", " 'classifier__criterion': trial.suggest_categorical('classifier__criterion', ['gini', 'entropy']),\n", " 'over__sampling_strategy': trial.suggest_float(\"over__sampling_strategy\", 0.35, 1.00),\n", " 'over__k_neighbors': trial.suggest_int('over__k_neighbors', 1, 30)\n", "}\n", "rfc_func = lambda trial: objective(trial,pipe_RFC,rfc_space(trial))\n", "\n", "rfc_study = optuna.create_study(directions=['maximize','maximize'],pruner=MedianPruner(),study_name='rfc_study',storage='sqlite:///my_study.db',sampler = TPESampler(seed=100))\n", "rfc_study.optimize(rfc_func, n_trials=30,n_jobs=2)\n", "\n" ] }, { "cell_type": "code", "execution_count": 82, "id": "d98f8134", "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "hovertemplate": "%{text}Trial", "marker": { "color": [ 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 24, 27, 28, 29 ], "colorbar": { "title": { "text": "Trial" } }, "colorscale": [ [ 0, "rgb(247,251,255)" ], [ 0.125, "rgb(222,235,247)" ], [ 0.25, "rgb(198,219,239)" ], [ 0.375, "rgb(158,202,225)" ], [ 0.5, "rgb(107,174,214)" ], [ 0.625, "rgb(66,146,198)" ], [ 0.75, "rgb(33,113,181)" ], [ 0.875, "rgb(8,81,156)" ], [ 1, "rgb(8,48,107)" ] ], "line": { "color": "Grey", "width": 0.5 } }, "mode": "markers", "showlegend": false, "text": [ "{
\"number\": 0,
\"values\": [
0.6457784860397402,
0.6187270393406835
],
\"params\": {
\"classifier__criterion\": \"gini\",
\"classifier__max_depth\": 4,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 9,
\"classifier__min_samples_split\": 18,
\"classifier__n_estimators\": 146,
\"over__k_neighbors\": 19,
\"over__sampling_strategy\": 0.5365781966134286
}
}", "{
\"number\": 1,
\"values\": [
0.6455393332818595,
0.6188688022101637
],
\"params\": {
\"classifier__criterion\": \"gini\",
\"classifier__max_depth\": 15,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 20,
\"classifier__min_samples_split\": 18,
\"classifier__n_estimators\": 845,
\"over__k_neighbors\": 27,
\"over__sampling_strategy\": 0.589950434213136
}
}", "{
\"number\": 2,
\"values\": [
0.6158567030498819,
0.6152848033012353
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 9,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 3,
\"classifier__min_samples_split\": 13,
\"classifier__n_estimators\": 141,
\"over__k_neighbors\": 17,
\"over__sampling_strategy\": 0.5958417647731421
}
}", "{
\"number\": 3,
\"values\": [
0.5398945879747302,
0.6015517044276091
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 2,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 7,
\"classifier__min_samples_split\": 5,
\"classifier__n_estimators\": 971,
\"over__k_neighbors\": 14,
\"over__sampling_strategy\": 0.9218837346358962
}
}", "{
\"number\": 4,
\"values\": [
0.6581801490909269,
0.608470030275935
],
\"params\": {
\"classifier__criterion\": \"gini\",
\"classifier__max_depth\": 16,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 14,
\"classifier__min_samples_split\": 3,
\"classifier__n_estimators\": 195,
\"over__k_neighbors\": 7,
\"over__sampling_strategy\": 0.42898528190525675
}
}", "{
\"number\": 5,
\"values\": [
0.6589574334035359,
0.6089957694795349
],
\"params\": {
\"classifier__criterion\": \"gini\",
\"classifier__max_depth\": 20,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 17,
\"classifier__min_samples_split\": 5,
\"classifier__n_estimators\": 532,
\"over__k_neighbors\": 14,
\"over__sampling_strategy\": 0.43873208266745045
}
}", "{
\"number\": 7,
\"values\": [
0.6379579286393324,
0.6224983697745267
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 11,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 15,
\"classifier__min_samples_split\": 10,
\"classifier__n_estimators\": 656,
\"over__k_neighbors\": 12,
\"over__sampling_strategy\": 0.7139119140769485
}
}", "{
\"number\": 8,
\"values\": [
0.5624099817641309,
0.6062502673775031
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 2,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 18,
\"classifier__min_samples_split\": 17,
\"classifier__n_estimators\": 619,
\"over__k_neighbors\": 25,
\"over__sampling_strategy\": 0.8932461021013373
}
}", "{
\"number\": 9,
\"values\": [
0.647871554226571,
0.6272805974697734
],
\"params\": {
\"classifier__criterion\": \"gini\",
\"classifier__max_depth\": 17,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 7,
\"classifier__min_samples_split\": 6,
\"classifier__n_estimators\": 294,
\"over__k_neighbors\": 11,
\"over__sampling_strategy\": 0.6817578152556343
}
}", "{
\"number\": 10,
\"values\": [
0.5259406528405167,
0.5620287996142432
],
\"params\": {
\"classifier__criterion\": \"gini\",
\"classifier__max_depth\": 1,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 17,
\"classifier__min_samples_split\": 14,
\"classifier__n_estimators\": 606,
\"over__k_neighbors\": 6,
\"over__sampling_strategy\": 0.8424753505080824
}
}", "{
\"number\": 11,
\"values\": [
0.6525617070849642,
0.5606514491596611
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 10,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 1,
\"classifier__min_samples_split\": 13,
\"classifier__n_estimators\": 411,
\"over__k_neighbors\": 2,
\"over__sampling_strategy\": 0.3583905920374773
}
}", "{
\"number\": 13,
\"values\": [
0.6583877591807694,
0.610637110065188
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 20,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 11,
\"classifier__min_samples_split\": 9,
\"classifier__n_estimators\": 407,
\"over__k_neighbors\": 21,
\"over__sampling_strategy\": 0.4733055211643861
}
}", "{
\"number\": 14,
\"values\": [
0.6435582016647413,
0.6255286751780276
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 20,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 5,
\"classifier__min_samples_split\": 9,
\"classifier__n_estimators\": 314,
\"over__k_neighbors\": 22,
\"over__sampling_strategy\": 0.7952207723683082
}
}", "{
\"number\": 15,
\"values\": [
0.6338362404935031,
0.6205946355129283
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 13,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 4,
\"classifier__min_samples_split\": 8,
\"classifier__n_estimators\": 294,
\"over__k_neighbors\": 23,
\"over__sampling_strategy\": 0.9990937099884252
}
}", "{
\"number\": 16,
\"values\": [
0.626635156917178,
0.6195240635679194
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 13,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 2,
\"classifier__min_samples_split\": 16,
\"classifier__n_estimators\": 280,
\"over__k_neighbors\": 30,
\"over__sampling_strategy\": 0.9815724136110824
}
}", "{
\"number\": 17,
\"values\": [
0.6135718897254712,
0.6110960409710786
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 6,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 1,
\"classifier__min_samples_split\": 16,
\"classifier__n_estimators\": 459,
\"over__k_neighbors\": 30,
\"over__sampling_strategy\": 0.6194991144490641
}
}", "{
\"number\": 18,
\"values\": [
0.6304722385136827,
0.6179047547972963
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 8,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 11,
\"classifier__min_samples_split\": 12,
\"classifier__n_estimators\": 457,
\"over__k_neighbors\": 18,
\"over__sampling_strategy\": 0.7227304249186857
}
}", "{
\"number\": 24,
\"values\": [
0.6573525280866581,
0.6159498878314016
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 14,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 9,
\"classifier__min_samples_split\": 7,
\"classifier__n_estimators\": 862,
\"over__k_neighbors\": 4,
\"over__sampling_strategy\": 0.49538874444690467
}
}", "{
\"number\": 27,
\"values\": [
0.6559723199421449,
0.6204850899021139
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 18,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 13,
\"classifier__min_samples_split\": 2,
\"classifier__n_estimators\": 789,
\"over__k_neighbors\": 4,
\"over__sampling_strategy\": 0.5357368739946705
}
}", "{
\"number\": 28,
\"values\": [
0.6485791873797704,
0.621370566743404
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 12,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 13,
\"classifier__min_samples_split\": 3,
\"classifier__n_estimators\": 987,
\"over__k_neighbors\": 4,
\"over__sampling_strategy\": 0.5563957091557683
}
}", "{
\"number\": 29,
\"values\": [
0.6452819707115311,
0.6233399945710498
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 12,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 12,
\"classifier__min_samples_split\": 4,
\"classifier__n_estimators\": 985,
\"over__k_neighbors\": 4,
\"over__sampling_strategy\": 0.6649391677270532
}
}" ], "type": "scatter", "x": [ 0.6457784860397402, 0.6455393332818595, 0.6158567030498819, 0.5398945879747302, 0.6581801490909269, 0.6589574334035359, 0.6379579286393324, 0.5624099817641309, 0.647871554226571, 0.5259406528405167, 0.6525617070849642, 0.6583877591807694, 0.6435582016647413, 0.6338362404935031, 0.626635156917178, 0.6135718897254712, 0.6304722385136827, 0.6573525280866581, 0.6559723199421449, 0.6485791873797704, 0.6452819707115311 ], "y": [ 0.6187270393406835, 0.6188688022101637, 0.6152848033012353, 0.6015517044276091, 0.608470030275935, 0.6089957694795349, 0.6224983697745267, 0.6062502673775031, 0.6272805974697734, 0.5620287996142432, 0.5606514491596611, 0.610637110065188, 0.6255286751780276, 0.6205946355129283, 0.6195240635679194, 0.6110960409710786, 0.6179047547972963, 0.6159498878314016, 0.6204850899021139, 0.621370566743404, 0.6233399945710498 ] }, { "hovertemplate": "%{text}Best Trial", "marker": { "color": [ 6, 12, 19, 20, 21, 22, 23, 25, 26 ], "colorbar": { "title": { "text": "Best Trial" }, "x": 1.1, "xpad": 40 }, "colorscale": [ [ 0, "rgb(255,245,240)" ], [ 0.125, "rgb(254,224,210)" ], [ 0.25, "rgb(252,187,161)" ], [ 0.375, "rgb(252,146,114)" ], [ 0.5, "rgb(251,106,74)" ], [ 0.625, "rgb(239,59,44)" ], [ 0.75, "rgb(203,24,29)" ], [ 0.875, "rgb(165,15,21)" ], [ 1, "rgb(103,0,13)" ] ], "line": { "color": "Grey", "width": 0.5 } }, "mode": "markers", "showlegend": false, "text": [ "{
\"number\": 6,
\"values\": [
0.6505599390501532,
0.624233752040281
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 20,
\"classifier__max_features\": \"log2\",
\"classifier__min_samples_leaf\": 3,
\"classifier__min_samples_split\": 13,
\"classifier__n_estimators\": 220,
\"over__k_neighbors\": 14,
\"over__sampling_strategy\": 0.6178764808588074
}
}", "{
\"number\": 12,
\"values\": [
0.6668654803559559,
0.5915465896102717
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 20,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 1,
\"classifier__min_samples_split\": 10,
\"classifier__n_estimators\": 358,
\"over__k_neighbors\": 21,
\"over__sampling_strategy\": 0.3547768896580491
}
}", "{
\"number\": 19,
\"values\": [
0.6486985656754299,
0.6288791063338287
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 18,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 10,
\"classifier__min_samples_split\": 12,
\"classifier__n_estimators\": 746,
\"over__k_neighbors\": 9,
\"over__sampling_strategy\": 0.7272211189665856
}
}", "{
\"number\": 20,
\"values\": [
0.6570914227372827,
0.6223681187066623
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 18,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 6,
\"classifier__min_samples_split\": 11,
\"classifier__n_estimators\": 756,
\"over__k_neighbors\": 8,
\"over__sampling_strategy\": 0.5216865704678908
}
}", "{
\"number\": 21,
\"values\": [
0.6492495856530484,
0.6285538705535771
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 18,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 9,
\"classifier__min_samples_split\": 11,
\"classifier__n_estimators\": 758,
\"over__k_neighbors\": 8,
\"over__sampling_strategy\": 0.7530444727082267
}
}", "{
\"number\": 22,
\"values\": [
0.6675140939925557,
0.5842531426932009
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 18,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 6,
\"classifier__min_samples_split\": 11,
\"classifier__n_estimators\": 728,
\"over__k_neighbors\": 9,
\"over__sampling_strategy\": 0.3543632598916632
}
}", "{
\"number\": 23,
\"values\": [
0.6681641126887758,
0.5842433909874318
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 18,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 6,
\"classifier__min_samples_split\": 7,
\"classifier__n_estimators\": 757,
\"over__k_neighbors\": 3,
\"over__sampling_strategy\": 0.35219232617527685
}
}", "{
\"number\": 25,
\"values\": [
0.6597957569716394,
0.6188639788680736
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 15,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 9,
\"classifier__min_samples_split\": 7,
\"classifier__n_estimators\": 881,
\"over__k_neighbors\": 1,
\"over__sampling_strategy\": 0.49911191560552626
}
}", "{
\"number\": 26,
\"values\": [
0.658094277264654,
0.6214757135849159
],
\"params\": {
\"classifier__criterion\": \"entropy\",
\"classifier__max_depth\": 17,
\"classifier__max_features\": \"sqrt\",
\"classifier__min_samples_leaf\": 13,
\"classifier__min_samples_split\": 2,
\"classifier__n_estimators\": 820,
\"over__k_neighbors\": 1,
\"over__sampling_strategy\": 0.5516450253093519
}
}" ], "type": "scatter", "x": [ 0.6505599390501532, 0.6668654803559559, 0.6486985656754299, 0.6570914227372827, 0.6492495856530484, 0.6675140939925557, 0.6681641126887758, 0.6597957569716394, 0.658094277264654 ], "y": [ 0.624233752040281, 0.5915465896102717, 0.6288791063338287, 0.6223681187066623, 0.6285538705535771, 0.5842531426932009, 0.5842433909874318, 0.6188639788680736, 0.6214757135849159 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Pareto-front Plot" }, "xaxis": { "title": { "text": "Mean PR AUC" } }, "yaxis": { "title": { "text": "Mean F1" } } } } }, "metadata": {}, "output_type": "display_data" } ], "source": [ "rfc_study = optuna.load_study(study_name=\"rfc_study\", storage=\"sqlite:///my_study.db\")\n", "optuna.visualization.plot_pareto_front(rfc_study, target_names=[\"Mean PR AUC\", \"Mean F1\"])" ] }, { "cell_type": "code", "execution_count": 83, "id": "1172e6f6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'classifier__criterion': 'entropy', 'classifier__max_depth': 15, 'classifier__max_features': 'sqrt', 'classifier__min_samples_leaf': 9, 'classifier__min_samples_split': 7, 'classifier__n_estimators': 881, 'over__k_neighbors': 1, 'over__sampling_strategy': 0.49911191560552626}\n", " precision recall f1-score support\n", "\n", " 0 0.88 0.88 0.88 12191\n", " 1 0.61 0.62 0.61 3893\n", "\n", " accuracy 0.81 16084\n", " macro avg 0.75 0.75 0.75 16084\n", "weighted avg 0.81 0.81 0.81 16084\n", "\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "0.6147740366150302\n", "0.6574943365651901\n" ] } ], "source": [ "\n", "rfc_best_params = rfc_study.trials[25].params\n", "evaluate_study(pipe_RFC,rfc_best_params)\n" ] }, { "attachments": {}, "cell_type": "markdown", "id": "663f0abc", "metadata": {}, "source": [ "#### LGBM Classifier Model" ] }, { "cell_type": "code", "execution_count": 27, "id": "87c95fc6", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m[I 2023-04-27 18:42:08,518]\u001b[0m A new study created in RDB with name: lgbm_study\u001b[0m\n", "\u001b[32m[I 2023-04-27 18:43:15,882]\u001b[0m Trial 0 finished with values: [0.6481791433257595, 0.608212027804601] and parameters: {'classifier__n_estimators': 878, 'classifier__learning_rate': 0.0914658479860042, 'classifier__num_leaves': 52, 'classifier__min_child_samples': 39, 'classifier__subsample': 0.8551474722356436, 'classifier__colsample_bytree': 0.6923913504558172, 'classifier__reg_alpha': 0.295762093408928, 'classifier__reg_lambda': 0.45757904384482906, 'classifier__max_depth': 4, 'over__sampling_strategy': 0.6644200991667422, 'over__k_neighbors': 14}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:43:16,206]\u001b[0m Trial 1 finished with values: [0.6481791433257595, 0.608212027804601] and parameters: {'classifier__n_estimators': 734, 'classifier__learning_rate': 0.12262001940318636, 'classifier__num_leaves': 139, 'classifier__min_child_samples': 18, 'classifier__subsample': 0.9551502480484655, 'classifier__colsample_bytree': 0.8836872773395599, 'classifier__reg_alpha': 0.3571316810129358, 'classifier__reg_lambda': 0.7796445000270502, 'classifier__max_depth': 6, 'over__sampling_strategy': 0.6306694569878601, 'over__k_neighbors': 2}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:44:15,194]\u001b[0m Trial 2 finished with values: [0.6445797291737211, 0.608400783342778] and parameters: {'classifier__n_estimators': 635, 'classifier__learning_rate': 0.10522687923567785, 'classifier__num_leaves': 79, 'classifier__min_child_samples': 38, 'classifier__subsample': 0.8604173819294232, 'classifier__colsample_bytree': 0.6494467550658721, 'classifier__reg_alpha': 0.24399972442948165, 'classifier__reg_lambda': 0.9345718162446607, 'classifier__max_depth': 9, 'over__sampling_strategy': 0.7388052336432036, 'over__k_neighbors': 6}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:44:15,687]\u001b[0m Trial 3 finished with values: [0.6217637298126792, 0.5977452305257794] and parameters: {'classifier__n_estimators': 381, 'classifier__learning_rate': 0.017736285174618147, 'classifier__num_leaves': 86, 'classifier__min_child_samples': 19, 'classifier__subsample': 0.6148958076790818, 'classifier__colsample_bytree': 0.9911211585650836, 'classifier__reg_alpha': 0.46044526140838715, 'classifier__reg_lambda': 0.7526456800569701, 'classifier__max_depth': 1, 'over__sampling_strategy': 0.6052892104590009, 'over__k_neighbors': 22}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:45:18,564]\u001b[0m Trial 4 finished with values: [0.6437272777192509, 0.6164909886102607] and parameters: {'classifier__n_estimators': 440, 'classifier__learning_rate': 0.014988221940214163, 'classifier__num_leaves': 31, 'classifier__min_child_samples': 43, 'classifier__subsample': 0.9382702804809353, 'classifier__colsample_bytree': 0.8378839922571999, 'classifier__reg_alpha': 0.11873476365664196, 'classifier__reg_lambda': 0.7013976118630566, 'classifier__max_depth': 6, 'over__sampling_strategy': 0.955421001376642, 'over__k_neighbors': 13}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:45:18,834]\u001b[0m Trial 5 finished with values: [0.6429011933266364, 0.6098298238510574] and parameters: {'classifier__n_estimators': 430, 'classifier__learning_rate': 0.19333835190551785, 'classifier__num_leaves': 85, 'classifier__min_child_samples': 14, 'classifier__subsample': 0.851599014018061, 'classifier__colsample_bytree': 0.8078731777886868, 'classifier__reg_alpha': 0.1612332411124245, 'classifier__reg_lambda': 0.7733976323443501, 'classifier__max_depth': 9, 'over__sampling_strategy': 0.7676143281665351, 'over__k_neighbors': 6}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:46:14,506]\u001b[0m Trial 7 finished with values: [0.6724975193877245, 0.5996970508621173] and parameters: {'classifier__n_estimators': 249, 'classifier__learning_rate': 0.050274449073944835, 'classifier__num_leaves': 117, 'classifier__min_child_samples': 18, 'classifier__subsample': 0.9993057768077361, 'classifier__colsample_bytree': 0.9371874013132707, 'classifier__reg_alpha': 0.15008994068887171, 'classifier__reg_lambda': 0.537409132772931, 'classifier__max_depth': 10, 'over__sampling_strategy': 0.3637609878850091, 'over__k_neighbors': 11}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:46:14,624]\u001b[0m Trial 6 finished with values: [0.667893232414531, 0.6004670314712397] and parameters: {'classifier__n_estimators': 366, 'classifier__learning_rate': 0.1174670919654562, 'classifier__num_leaves': 47, 'classifier__min_child_samples': 11, 'classifier__subsample': 0.8188165352367329, 'classifier__colsample_bytree': 0.6800359925898924, 'classifier__reg_alpha': 0.9873332093569686, 'classifier__reg_lambda': 0.8237280408782749, 'classifier__max_depth': 10, 'over__sampling_strategy': 0.40630225255124763, 'over__k_neighbors': 16}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:47:14,212]\u001b[0m Trial 8 finished with values: [0.644061341384452, 0.618076921917051] and parameters: {'classifier__n_estimators': 933, 'classifier__learning_rate': 0.04169925542642716, 'classifier__num_leaves': 95, 'classifier__min_child_samples': 6, 'classifier__subsample': 0.6661833630861052, 'classifier__colsample_bytree': 0.9525851019799721, 'classifier__reg_alpha': 0.14540907341047074, 'classifier__reg_lambda': 0.809786012465244, 'classifier__max_depth': 4, 'over__sampling_strategy': 0.7831280950769215, 'over__k_neighbors': 24}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:47:15,174]\u001b[0m Trial 9 finished with values: [0.6409114319982183, 0.6162873449909416] and parameters: {'classifier__n_estimators': 233, 'classifier__learning_rate': 0.18292389184027003, 'classifier__num_leaves': 56, 'classifier__min_child_samples': 35, 'classifier__subsample': 0.7800106231791527, 'classifier__colsample_bytree': 0.6585106528539849, 'classifier__reg_alpha': 0.14979252525773687, 'classifier__reg_lambda': 0.3977119377315529, 'classifier__max_depth': 3, 'over__sampling_strategy': 0.7792700963951842, 'over__k_neighbors': 21}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:48:10,620]\u001b[0m Trial 10 finished with values: [0.6457356569264796, 0.609554444333644] and parameters: {'classifier__n_estimators': 332, 'classifier__learning_rate': 0.1851732861715213, 'classifier__num_leaves': 46, 'classifier__min_child_samples': 21, 'classifier__subsample': 0.9271762975718307, 'classifier__colsample_bytree': 0.7477726917849581, 'classifier__reg_alpha': 0.6147485447416097, 'classifier__reg_lambda': 0.5685811248770686, 'classifier__max_depth': 5, 'over__sampling_strategy': 0.9930844928958459, 'over__k_neighbors': 23}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:48:11,008]\u001b[0m Trial 11 finished with values: [0.6724119773010454, 0.5957260154807694] and parameters: {'classifier__n_estimators': 104, 'classifier__learning_rate': 0.06356590163794429, 'classifier__num_leaves': 134, 'classifier__min_child_samples': 26, 'classifier__subsample': 0.9839846972144687, 'classifier__colsample_bytree': 0.9043000596481929, 'classifier__reg_alpha': 0.6260534720132058, 'classifier__reg_lambda': 0.1776688462298313, 'classifier__max_depth': 8, 'over__sampling_strategy': 0.3546251107632013, 'over__k_neighbors': 30}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:49:15,508]\u001b[0m Trial 12 finished with values: [0.66643196901092, 0.596496244360692] and parameters: {'classifier__n_estimators': 110, 'classifier__learning_rate': 0.053123465249201224, 'classifier__num_leaves': 123, 'classifier__min_child_samples': 7, 'classifier__subsample': 0.9933849808737168, 'classifier__colsample_bytree': 0.9604962569090681, 'classifier__reg_alpha': 0.10442163172828339, 'classifier__reg_lambda': 0.20726205718286705, 'classifier__max_depth': 7, 'over__sampling_strategy': 0.3600573168356785, 'over__k_neighbors': 30}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:49:15,586]\u001b[0m Trial 13 finished with values: [0.6581628335685197, 0.6048570876600964] and parameters: {'classifier__n_estimators': 968, 'classifier__learning_rate': 0.05241527439059202, 'classifier__num_leaves': 113, 'classifier__min_child_samples': 5, 'classifier__subsample': 0.7305761300447936, 'classifier__colsample_bytree': 0.9835178272391156, 'classifier__reg_alpha': 0.11044106249513164, 'classifier__reg_lambda': 0.5902397730232802, 'classifier__max_depth': 7, 'over__sampling_strategy': 0.5250553639022084, 'over__k_neighbors': 29}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:50:10,811]\u001b[0m Trial 14 finished with values: [0.6426255801744561, 0.6013851508529975] and parameters: {'classifier__n_estimators': 897, 'classifier__learning_rate': 0.04541877167931373, 'classifier__num_leaves': 112, 'classifier__min_child_samples': 5, 'classifier__subsample': 0.7172003473116372, 'classifier__colsample_bytree': 0.9205857257380231, 'classifier__reg_alpha': 0.3933945938648763, 'classifier__reg_lambda': 0.9950663869009093, 'classifier__max_depth': 2, 'over__sampling_strategy': 0.46785824050446545, 'over__k_neighbors': 11}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:50:10,884]\u001b[0m Trial 15 finished with values: [0.6380577562893421, 0.6007970316134726] and parameters: {'classifier__n_estimators': 583, 'classifier__learning_rate': 0.03032144203952168, 'classifier__num_leaves': 106, 'classifier__min_child_samples': 28, 'classifier__subsample': 0.706231742926022, 'classifier__colsample_bytree': 0.9113759215711714, 'classifier__reg_alpha': 0.3710755981188715, 'classifier__reg_lambda': 0.9441599180564746, 'classifier__max_depth': 2, 'over__sampling_strategy': 0.4888752504242806, 'over__k_neighbors': 16}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:51:09,046]\u001b[0m Trial 16 finished with values: [0.655562077970423, 0.6163713640536606] and parameters: {'classifier__n_estimators': 584, 'classifier__learning_rate': 0.08002748485834182, 'classifier__num_leaves': 99, 'classifier__min_child_samples': 50, 'classifier__subsample': 0.6046161155624623, 'classifier__colsample_bytree': 0.865395155350399, 'classifier__reg_alpha': 0.25147331302730935, 'classifier__reg_lambda': 0.6342523024869502, 'classifier__max_depth': 4, 'over__sampling_strategy': 0.5464823221918449, 'over__k_neighbors': 8}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:51:10,104]\u001b[0m Trial 17 finished with values: [0.657472661906612, 0.6164055067213046] and parameters: {'classifier__n_estimators': 732, 'classifier__learning_rate': 0.07615194246840477, 'classifier__num_leaves': 150, 'classifier__min_child_samples': 49, 'classifier__subsample': 0.9225448981507509, 'classifier__colsample_bytree': 0.8573314341149787, 'classifier__reg_alpha': 0.2625542544688248, 'classifier__reg_lambda': 0.631270560308917, 'classifier__max_depth': 5, 'over__sampling_strategy': 0.5394095714061843, 'over__k_neighbors': 8}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:52:16,385]\u001b[0m Trial 18 finished with values: [0.6571268758411501, 0.6121574397593857] and parameters: {'classifier__n_estimators': 726, 'classifier__learning_rate': 0.07949003092464581, 'classifier__num_leaves': 150, 'classifier__min_child_samples': 49, 'classifier__subsample': 0.6386970640052748, 'classifier__colsample_bytree': 0.8466595120509447, 'classifier__reg_alpha': 0.24375255397715137, 'classifier__reg_lambda': 0.6562369952278189, 'classifier__max_depth': 5, 'over__sampling_strategy': 0.5639017001375626, 'over__k_neighbors': 8}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:52:16,434]\u001b[0m Trial 19 finished with values: [0.6612317271181414, 0.5964562759482784] and parameters: {'classifier__n_estimators': 744, 'classifier__learning_rate': 0.07951878286221085, 'classifier__num_leaves': 150, 'classifier__min_child_samples': 49, 'classifier__subsample': 0.9033772298994664, 'classifier__colsample_bytree': 0.8521712791207438, 'classifier__reg_alpha': 0.48073334356365494, 'classifier__reg_lambda': 0.4783589983907456, 'classifier__max_depth': 10, 'over__sampling_strategy': 0.43829324499921507, 'over__k_neighbors': 1}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:53:15,430]\u001b[0m Trial 20 finished with values: [0.6631264577959408, 0.5984944170836299] and parameters: {'classifier__n_estimators': 803, 'classifier__learning_rate': 0.06568783658245095, 'classifier__num_leaves': 126, 'classifier__min_child_samples': 29, 'classifier__subsample': 0.9990595873319331, 'classifier__colsample_bytree': 0.7742748827210983, 'classifier__reg_alpha': 0.24306612915613623, 'classifier__reg_lambda': 0.3515482415258482, 'classifier__max_depth': 7, 'over__sampling_strategy': 0.4220391193920819, 'over__k_neighbors': 19}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:53:16,189]\u001b[0m Trial 21 finished with values: [0.6679526578331464, 0.6095917667700379] and parameters: {'classifier__n_estimators': 248, 'classifier__learning_rate': 0.0670089893467733, 'classifier__num_leaves': 127, 'classifier__min_child_samples': 29, 'classifier__subsample': 0.8959672200807781, 'classifier__colsample_bytree': 0.7559761731724634, 'classifier__reg_alpha': 0.2542211800893746, 'classifier__reg_lambda': 0.3324837722080793, 'classifier__max_depth': 7, 'over__sampling_strategy': 0.42693612969965145, 'over__k_neighbors': 18}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:54:13,189]\u001b[0m Trial 22 finished with values: [0.6587923086641287, 0.6172246451932154] and parameters: {'classifier__n_estimators': 235, 'classifier__learning_rate': 0.03632498277445621, 'classifier__num_leaves': 75, 'classifier__min_child_samples': 14, 'classifier__subsample': 0.9575900967944961, 'classifier__colsample_bytree': 0.9444801642063763, 'classifier__reg_alpha': 0.1950395397952936, 'classifier__reg_lambda': 0.5410531610446644, 'classifier__max_depth': 8, 'over__sampling_strategy': 0.4954705242821451, 'over__k_neighbors': 11}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:54:13,374]\u001b[0m Trial 23 finished with values: [0.6375478298667103, 0.6162857021901866] and parameters: {'classifier__n_estimators': 248, 'classifier__learning_rate': 0.037910081673774854, 'classifier__num_leaves': 71, 'classifier__min_child_samples': 12, 'classifier__subsample': 0.6547551775468159, 'classifier__colsample_bytree': 0.9494567468841774, 'classifier__reg_alpha': 0.17307713876599226, 'classifier__reg_lambda': 0.5227941491590341, 'classifier__max_depth': 4, 'over__sampling_strategy': 0.8406792309767497, 'over__k_neighbors': 26}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:55:09,023]\u001b[0m Trial 24 finished with values: [0.663339714943868, 0.6158715021120922] and parameters: {'classifier__n_estimators': 242, 'classifier__learning_rate': 0.030884543885426448, 'classifier__num_leaves': 68, 'classifier__min_child_samples': 15, 'classifier__subsample': 0.9553754706605744, 'classifier__colsample_bytree': 0.9394940518123692, 'classifier__reg_alpha': 0.19454271347862542, 'classifier__reg_lambda': 0.5145682218654029, 'classifier__max_depth': 9, 'over__sampling_strategy': 0.48172053115635893, 'over__k_neighbors': 11}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:55:09,793]\u001b[0m Trial 25 finished with values: [0.658532746096862, 0.6089764409371342] and parameters: {'classifier__n_estimators': 214, 'classifier__learning_rate': 0.010405418186988036, 'classifier__num_leaves': 70, 'classifier__min_child_samples': 22, 'classifier__subsample': 0.8932906567444393, 'classifier__colsample_bytree': 0.9979890646724019, 'classifier__reg_alpha': 0.30902378637668526, 'classifier__reg_lambda': 0.32759647526569924, 'classifier__max_depth': 9, 'over__sampling_strategy': 0.4832302105180492, 'over__k_neighbors': 11}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:56:07,650]\u001b[0m Trial 26 finished with values: [0.6731138231715039, 0.5963481774429714] and parameters: {'classifier__n_estimators': 483, 'classifier__learning_rate': 0.02480024714179163, 'classifier__num_leaves': 66, 'classifier__min_child_samples': 23, 'classifier__subsample': 0.9636336625539972, 'classifier__colsample_bytree': 0.9973921112983907, 'classifier__reg_alpha': 0.3297322858463567, 'classifier__reg_lambda': 0.44279110256514215, 'classifier__max_depth': 9, 'over__sampling_strategy': 0.35146593969061785, 'over__k_neighbors': 11}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:56:09,486]\u001b[0m Trial 27 finished with values: [0.6748442249568417, 0.5951677194666252] and parameters: {'classifier__n_estimators': 496, 'classifier__learning_rate': 0.032082575420636736, 'classifier__num_leaves': 97, 'classifier__min_child_samples': 24, 'classifier__subsample': 0.999858146240725, 'classifier__colsample_bytree': 0.6029679609870878, 'classifier__reg_alpha': 0.17938860811200935, 'classifier__reg_lambda': 0.44451618736407306, 'classifier__max_depth': 10, 'over__sampling_strategy': 0.3532223856999658, 'over__k_neighbors': 5}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:57:06,298]\u001b[0m Trial 28 finished with values: [0.6707629021981326, 0.6064782569547265] and parameters: {'classifier__n_estimators': 485, 'classifier__learning_rate': 0.024890145248810423, 'classifier__num_leaves': 63, 'classifier__min_child_samples': 23, 'classifier__subsample': 0.9617530274541308, 'classifier__colsample_bytree': 0.9643081588142669, 'classifier__reg_alpha': 0.33467016117964643, 'classifier__reg_lambda': 0.28066668481890683, 'classifier__max_depth': 8, 'over__sampling_strategy': 0.3883638288395505, 'over__k_neighbors': 4}. \u001b[0m\n", "\u001b[32m[I 2023-04-27 18:57:07,807]\u001b[0m Trial 29 finished with values: [0.6725516595604328, 0.6063189073346005] and parameters: {'classifier__n_estimators': 517, 'classifier__learning_rate': 0.028056880977932984, 'classifier__num_leaves': 90, 'classifier__min_child_samples': 34, 'classifier__subsample': 0.969042533171844, 'classifier__colsample_bytree': 0.6053047106607662, 'classifier__reg_alpha': 0.20049320363081555, 'classifier__reg_lambda': 0.10380040489027925, 'classifier__max_depth': 8, 'over__sampling_strategy': 0.39352742674680213, 'over__k_neighbors': 3}. \u001b[0m\n" ] } ], "source": [ "from lightgbm import LGBMClassifier\n", "\n", "\n", "pipe_LGBM = imbpipeline(\n", " [\n", " ('preprocessor',preprocessor),\n", " ('over',smoteNC2),\n", " ('classifier',LGBMClassifier(n_jobs=4,objective='binary'))]\n", " )\n", "\n", "def lgbm_space(trial):\n", " return {\n", " 'classifier__n_estimators': trial.suggest_int('classifier__n_estimators', 100, 1000),\n", " 'classifier__learning_rate': trial.suggest_float('classifier__learning_rate', 0.01, 0.2),\n", " 'classifier__num_leaves': trial.suggest_int('classifier__num_leaves', 30, 150),\n", " 'classifier__min_child_samples': trial.suggest_int('classifier__min_child_samples', 5, 50),\n", " 'classifier__subsample': trial.suggest_float('classifier__subsample', 0.6, 1.0),\n", " 'classifier__colsample_bytree': trial.suggest_float('classifier__colsample_bytree', 0.6, 1.0),\n", " 'classifier__reg_alpha': trial.suggest_float('classifier__reg_alpha', 0.1, 1.0),\n", " 'classifier__reg_lambda': trial.suggest_float('classifier__reg_lambda', 0.1, 1.0),\n", " 'classifier__max_depth': trial.suggest_int('classifier__max_depth', 1, 10),\n", " 'over__sampling_strategy': trial.suggest_float(\"over__sampling_strategy\", 0.35, 1.00),\n", " 'over__k_neighbors': trial.suggest_int('over__k_neighbors', 1, 30)\n", " }\n", "\n", "lgbm_func = lambda trial: objective(trial,pipe_LGBM,lgbm_space(trial))\n", "\n", "lgbm_study = optuna.create_study(directions=['maximize','maximize'],pruner=MedianPruner(),study_name='lgbm_study',storage='sqlite:///my_study.db',sampler = TPESampler(seed=100))\n", "lgbm_study.optimize(lgbm_func, n_trials=30,n_jobs=2)\n", "\n" ] }, { "cell_type": "code", "execution_count": 84, "id": "c79d52a1", "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "hovertemplate": "%{text}Trial", "marker": { "color": [ 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 25 ], "colorbar": { "title": { "text": "Trial" } }, "colorscale": [ [ 0, "rgb(247,251,255)" ], [ 0.125, "rgb(222,235,247)" ], [ 0.25, "rgb(198,219,239)" ], [ 0.375, "rgb(158,202,225)" ], [ 0.5, "rgb(107,174,214)" ], [ 0.625, "rgb(66,146,198)" ], [ 0.75, "rgb(33,113,181)" ], [ 0.875, "rgb(8,81,156)" ], [ 1, "rgb(8,48,107)" ] ], "line": { "color": "Grey", "width": 0.5 } }, "mode": "markers", "showlegend": false, "text": [ "{
\"number\": 0,
\"values\": [
0.6481791433257595,
0.608212027804601
],
\"params\": {
\"classifier__colsample_bytree\": 0.6923913504558172,
\"classifier__learning_rate\": 0.0914658479860042,
\"classifier__max_depth\": 4,
\"classifier__min_child_samples\": 39,
\"classifier__n_estimators\": 878,
\"classifier__num_leaves\": 52,
\"classifier__reg_alpha\": 0.295762093408928,
\"classifier__reg_lambda\": 0.45757904384482906,
\"classifier__subsample\": 0.8551474722356436,
\"over__k_neighbors\": 14,
\"over__sampling_strategy\": 0.6644200991667422
}
}", "{
\"number\": 1,
\"values\": [
0.6481791433257595,
0.608212027804601
],
\"params\": {
\"classifier__colsample_bytree\": 0.8836872773395599,
\"classifier__learning_rate\": 0.12262001940318636,
\"classifier__max_depth\": 6,
\"classifier__min_child_samples\": 18,
\"classifier__n_estimators\": 734,
\"classifier__num_leaves\": 139,
\"classifier__reg_alpha\": 0.3571316810129358,
\"classifier__reg_lambda\": 0.7796445000270502,
\"classifier__subsample\": 0.9551502480484655,
\"over__k_neighbors\": 2,
\"over__sampling_strategy\": 0.6306694569878601
}
}", "{
\"number\": 2,
\"values\": [
0.6445797291737211,
0.608400783342778
],
\"params\": {
\"classifier__colsample_bytree\": 0.6494467550658721,
\"classifier__learning_rate\": 0.10522687923567785,
\"classifier__max_depth\": 9,
\"classifier__min_child_samples\": 38,
\"classifier__n_estimators\": 635,
\"classifier__num_leaves\": 79,
\"classifier__reg_alpha\": 0.24399972442948165,
\"classifier__reg_lambda\": 0.9345718162446607,
\"classifier__subsample\": 0.8604173819294232,
\"over__k_neighbors\": 6,
\"over__sampling_strategy\": 0.7388052336432036
}
}", "{
\"number\": 3,
\"values\": [
0.6217637298126792,
0.5977452305257794
],
\"params\": {
\"classifier__colsample_bytree\": 0.9911211585650836,
\"classifier__learning_rate\": 0.017736285174618147,
\"classifier__max_depth\": 1,
\"classifier__min_child_samples\": 19,
\"classifier__n_estimators\": 381,
\"classifier__num_leaves\": 86,
\"classifier__reg_alpha\": 0.46044526140838715,
\"classifier__reg_lambda\": 0.7526456800569701,
\"classifier__subsample\": 0.6148958076790818,
\"over__k_neighbors\": 22,
\"over__sampling_strategy\": 0.6052892104590009
}
}", "{
\"number\": 4,
\"values\": [
0.6437272777192509,
0.6164909886102607
],
\"params\": {
\"classifier__colsample_bytree\": 0.8378839922571999,
\"classifier__learning_rate\": 0.014988221940214163,
\"classifier__max_depth\": 6,
\"classifier__min_child_samples\": 43,
\"classifier__n_estimators\": 440,
\"classifier__num_leaves\": 31,
\"classifier__reg_alpha\": 0.11873476365664196,
\"classifier__reg_lambda\": 0.7013976118630566,
\"classifier__subsample\": 0.9382702804809353,
\"over__k_neighbors\": 13,
\"over__sampling_strategy\": 0.955421001376642
}
}", "{
\"number\": 5,
\"values\": [
0.6429011933266364,
0.6098298238510574
],
\"params\": {
\"classifier__colsample_bytree\": 0.8078731777886868,
\"classifier__learning_rate\": 0.19333835190551785,
\"classifier__max_depth\": 9,
\"classifier__min_child_samples\": 14,
\"classifier__n_estimators\": 430,
\"classifier__num_leaves\": 85,
\"classifier__reg_alpha\": 0.1612332411124245,
\"classifier__reg_lambda\": 0.7733976323443501,
\"classifier__subsample\": 0.851599014018061,
\"over__k_neighbors\": 6,
\"over__sampling_strategy\": 0.7676143281665351
}
}", "{
\"number\": 6,
\"values\": [
0.667893232414531,
0.6004670314712397
],
\"params\": {
\"classifier__colsample_bytree\": 0.6800359925898924,
\"classifier__learning_rate\": 0.1174670919654562,
\"classifier__max_depth\": 10,
\"classifier__min_child_samples\": 11,
\"classifier__n_estimators\": 366,
\"classifier__num_leaves\": 47,
\"classifier__reg_alpha\": 0.9873332093569686,
\"classifier__reg_lambda\": 0.8237280408782749,
\"classifier__subsample\": 0.8188165352367329,
\"over__k_neighbors\": 16,
\"over__sampling_strategy\": 0.40630225255124763
}
}", "{
\"number\": 7,
\"values\": [
0.6724975193877245,
0.5996970508621173
],
\"params\": {
\"classifier__colsample_bytree\": 0.9371874013132707,
\"classifier__learning_rate\": 0.050274449073944835,
\"classifier__max_depth\": 10,
\"classifier__min_child_samples\": 18,
\"classifier__n_estimators\": 249,
\"classifier__num_leaves\": 117,
\"classifier__reg_alpha\": 0.15008994068887171,
\"classifier__reg_lambda\": 0.537409132772931,
\"classifier__subsample\": 0.9993057768077361,
\"over__k_neighbors\": 11,
\"over__sampling_strategy\": 0.3637609878850091
}
}", "{
\"number\": 9,
\"values\": [
0.6409114319982183,
0.6162873449909416
],
\"params\": {
\"classifier__colsample_bytree\": 0.6585106528539849,
\"classifier__learning_rate\": 0.18292389184027003,
\"classifier__max_depth\": 3,
\"classifier__min_child_samples\": 35,
\"classifier__n_estimators\": 233,
\"classifier__num_leaves\": 56,
\"classifier__reg_alpha\": 0.14979252525773687,
\"classifier__reg_lambda\": 0.3977119377315529,
\"classifier__subsample\": 0.7800106231791527,
\"over__k_neighbors\": 21,
\"over__sampling_strategy\": 0.7792700963951842
}
}", "{
\"number\": 10,
\"values\": [
0.6457356569264796,
0.609554444333644
],
\"params\": {
\"classifier__colsample_bytree\": 0.7477726917849581,
\"classifier__learning_rate\": 0.1851732861715213,
\"classifier__max_depth\": 5,
\"classifier__min_child_samples\": 21,
\"classifier__n_estimators\": 332,
\"classifier__num_leaves\": 46,
\"classifier__reg_alpha\": 0.6147485447416097,
\"classifier__reg_lambda\": 0.5685811248770686,
\"classifier__subsample\": 0.9271762975718307,
\"over__k_neighbors\": 23,
\"over__sampling_strategy\": 0.9930844928958459
}
}", "{
\"number\": 11,
\"values\": [
0.6724119773010454,
0.5957260154807694
],
\"params\": {
\"classifier__colsample_bytree\": 0.9043000596481929,
\"classifier__learning_rate\": 0.06356590163794429,
\"classifier__max_depth\": 8,
\"classifier__min_child_samples\": 26,
\"classifier__n_estimators\": 104,
\"classifier__num_leaves\": 134,
\"classifier__reg_alpha\": 0.6260534720132058,
\"classifier__reg_lambda\": 0.1776688462298313,
\"classifier__subsample\": 0.9839846972144687,
\"over__k_neighbors\": 30,
\"over__sampling_strategy\": 0.3546251107632013
}
}", "{
\"number\": 12,
\"values\": [
0.66643196901092,
0.596496244360692
],
\"params\": {
\"classifier__colsample_bytree\": 0.9604962569090681,
\"classifier__learning_rate\": 0.053123465249201224,
\"classifier__max_depth\": 7,
\"classifier__min_child_samples\": 7,
\"classifier__n_estimators\": 110,
\"classifier__num_leaves\": 123,
\"classifier__reg_alpha\": 0.10442163172828339,
\"classifier__reg_lambda\": 0.20726205718286705,
\"classifier__subsample\": 0.9933849808737168,
\"over__k_neighbors\": 30,
\"over__sampling_strategy\": 0.3600573168356785
}
}", "{
\"number\": 13,
\"values\": [
0.6581628335685197,
0.6048570876600964
],
\"params\": {
\"classifier__colsample_bytree\": 0.9835178272391156,
\"classifier__learning_rate\": 0.05241527439059202,
\"classifier__max_depth\": 7,
\"classifier__min_child_samples\": 5,
\"classifier__n_estimators\": 968,
\"classifier__num_leaves\": 113,
\"classifier__reg_alpha\": 0.11044106249513164,
\"classifier__reg_lambda\": 0.5902397730232802,
\"classifier__subsample\": 0.7305761300447936,
\"over__k_neighbors\": 29,
\"over__sampling_strategy\": 0.5250553639022084
}
}", "{
\"number\": 14,
\"values\": [
0.6426255801744561,
0.6013851508529975
],
\"params\": {
\"classifier__colsample_bytree\": 0.9205857257380231,
\"classifier__learning_rate\": 0.04541877167931373,
\"classifier__max_depth\": 2,
\"classifier__min_child_samples\": 5,
\"classifier__n_estimators\": 897,
\"classifier__num_leaves\": 112,
\"classifier__reg_alpha\": 0.3933945938648763,
\"classifier__reg_lambda\": 0.9950663869009093,
\"classifier__subsample\": 0.7172003473116372,
\"over__k_neighbors\": 11,
\"over__sampling_strategy\": 0.46785824050446545
}
}", "{
\"number\": 15,
\"values\": [
0.6380577562893421,
0.6007970316134726
],
\"params\": {
\"classifier__colsample_bytree\": 0.9113759215711714,
\"classifier__learning_rate\": 0.03032144203952168,
\"classifier__max_depth\": 2,
\"classifier__min_child_samples\": 28,
\"classifier__n_estimators\": 583,
\"classifier__num_leaves\": 106,
\"classifier__reg_alpha\": 0.3710755981188715,
\"classifier__reg_lambda\": 0.9441599180564746,
\"classifier__subsample\": 0.706231742926022,
\"over__k_neighbors\": 16,
\"over__sampling_strategy\": 0.4888752504242806
}
}", "{
\"number\": 16,
\"values\": [
0.655562077970423,
0.6163713640536606
],
\"params\": {
\"classifier__colsample_bytree\": 0.865395155350399,
\"classifier__learning_rate\": 0.08002748485834182,
\"classifier__max_depth\": 4,
\"classifier__min_child_samples\": 50,
\"classifier__n_estimators\": 584,
\"classifier__num_leaves\": 99,
\"classifier__reg_alpha\": 0.25147331302730935,
\"classifier__reg_lambda\": 0.6342523024869502,
\"classifier__subsample\": 0.6046161155624623,
\"over__k_neighbors\": 8,
\"over__sampling_strategy\": 0.5464823221918449
}
}", "{
\"number\": 17,
\"values\": [
0.657472661906612,
0.6164055067213046
],
\"params\": {
\"classifier__colsample_bytree\": 0.8573314341149787,
\"classifier__learning_rate\": 0.07615194246840477,
\"classifier__max_depth\": 5,
\"classifier__min_child_samples\": 49,
\"classifier__n_estimators\": 732,
\"classifier__num_leaves\": 150,
\"classifier__reg_alpha\": 0.2625542544688248,
\"classifier__reg_lambda\": 0.631270560308917,
\"classifier__subsample\": 0.9225448981507509,
\"over__k_neighbors\": 8,
\"over__sampling_strategy\": 0.5394095714061843
}
}", "{
\"number\": 18,
\"values\": [
0.6571268758411501,
0.6121574397593857
],
\"params\": {
\"classifier__colsample_bytree\": 0.8466595120509447,
\"classifier__learning_rate\": 0.07949003092464581,
\"classifier__max_depth\": 5,
\"classifier__min_child_samples\": 49,
\"classifier__n_estimators\": 726,
\"classifier__num_leaves\": 150,
\"classifier__reg_alpha\": 0.24375255397715137,
\"classifier__reg_lambda\": 0.6562369952278189,
\"classifier__subsample\": 0.6386970640052748,
\"over__k_neighbors\": 8,
\"over__sampling_strategy\": 0.5639017001375626
}
}", "{
\"number\": 19,
\"values\": [
0.6612317271181414,
0.5964562759482784
],
\"params\": {
\"classifier__colsample_bytree\": 0.8521712791207438,
\"classifier__learning_rate\": 0.07951878286221085,
\"classifier__max_depth\": 10,
\"classifier__min_child_samples\": 49,
\"classifier__n_estimators\": 744,
\"classifier__num_leaves\": 150,
\"classifier__reg_alpha\": 0.48073334356365494,
\"classifier__reg_lambda\": 0.4783589983907456,
\"classifier__subsample\": 0.9033772298994664,
\"over__k_neighbors\": 1,
\"over__sampling_strategy\": 0.43829324499921507
}
}", "{
\"number\": 20,
\"values\": [
0.6631264577959408,
0.5984944170836299
],
\"params\": {
\"classifier__colsample_bytree\": 0.7742748827210983,
\"classifier__learning_rate\": 0.06568783658245095,
\"classifier__max_depth\": 7,
\"classifier__min_child_samples\": 29,
\"classifier__n_estimators\": 803,
\"classifier__num_leaves\": 126,
\"classifier__reg_alpha\": 0.24306612915613623,
\"classifier__reg_lambda\": 0.3515482415258482,
\"classifier__subsample\": 0.9990595873319331,
\"over__k_neighbors\": 19,
\"over__sampling_strategy\": 0.4220391193920819
}
}", "{
\"number\": 23,
\"values\": [
0.6375478298667103,
0.6162857021901866
],
\"params\": {
\"classifier__colsample_bytree\": 0.9494567468841774,
\"classifier__learning_rate\": 0.037910081673774854,
\"classifier__max_depth\": 4,
\"classifier__min_child_samples\": 12,
\"classifier__n_estimators\": 248,
\"classifier__num_leaves\": 71,
\"classifier__reg_alpha\": 0.17307713876599226,
\"classifier__reg_lambda\": 0.5227941491590341,
\"classifier__subsample\": 0.6547551775468159,
\"over__k_neighbors\": 26,
\"over__sampling_strategy\": 0.8406792309767497
}
}", "{
\"number\": 25,
\"values\": [
0.658532746096862,
0.6089764409371342
],
\"params\": {
\"classifier__colsample_bytree\": 0.9979890646724019,
\"classifier__learning_rate\": 0.010405418186988036,
\"classifier__max_depth\": 9,
\"classifier__min_child_samples\": 22,
\"classifier__n_estimators\": 214,
\"classifier__num_leaves\": 70,
\"classifier__reg_alpha\": 0.30902378637668526,
\"classifier__reg_lambda\": 0.32759647526569924,
\"classifier__subsample\": 0.8932906567444393,
\"over__k_neighbors\": 11,
\"over__sampling_strategy\": 0.4832302105180492
}
}" ], "type": "scatter", "x": [ 0.6481791433257595, 0.6481791433257595, 0.6445797291737211, 0.6217637298126792, 0.6437272777192509, 0.6429011933266364, 0.667893232414531, 0.6724975193877245, 0.6409114319982183, 0.6457356569264796, 0.6724119773010454, 0.66643196901092, 0.6581628335685197, 0.6426255801744561, 0.6380577562893421, 0.655562077970423, 0.657472661906612, 0.6571268758411501, 0.6612317271181414, 0.6631264577959408, 0.6375478298667103, 0.658532746096862 ], "y": [ 0.608212027804601, 0.608212027804601, 0.608400783342778, 0.5977452305257794, 0.6164909886102607, 0.6098298238510574, 0.6004670314712397, 0.5996970508621173, 0.6162873449909416, 0.609554444333644, 0.5957260154807694, 0.596496244360692, 0.6048570876600964, 0.6013851508529975, 0.6007970316134726, 0.6163713640536606, 0.6164055067213046, 0.6121574397593857, 0.5964562759482784, 0.5984944170836299, 0.6162857021901866, 0.6089764409371342 ] }, { "hovertemplate": "%{text}Best Trial", "marker": { "color": [ 8, 21, 22, 24, 26, 27, 28, 29 ], "colorbar": { "title": { "text": "Best Trial" }, "x": 1.1, "xpad": 40 }, "colorscale": [ [ 0, "rgb(255,245,240)" ], [ 0.125, "rgb(254,224,210)" ], [ 0.25, "rgb(252,187,161)" ], [ 0.375, "rgb(252,146,114)" ], [ 0.5, "rgb(251,106,74)" ], [ 0.625, "rgb(239,59,44)" ], [ 0.75, "rgb(203,24,29)" ], [ 0.875, "rgb(165,15,21)" ], [ 1, "rgb(103,0,13)" ] ], "line": { "color": "Grey", "width": 0.5 } }, "mode": "markers", "showlegend": false, "text": [ "{
\"number\": 8,
\"values\": [
0.644061341384452,
0.618076921917051
],
\"params\": {
\"classifier__colsample_bytree\": 0.9525851019799721,
\"classifier__learning_rate\": 0.04169925542642716,
\"classifier__max_depth\": 4,
\"classifier__min_child_samples\": 6,
\"classifier__n_estimators\": 933,
\"classifier__num_leaves\": 95,
\"classifier__reg_alpha\": 0.14540907341047074,
\"classifier__reg_lambda\": 0.809786012465244,
\"classifier__subsample\": 0.6661833630861052,
\"over__k_neighbors\": 24,
\"over__sampling_strategy\": 0.7831280950769215
}
}", "{
\"number\": 21,
\"values\": [
0.6679526578331464,
0.6095917667700379
],
\"params\": {
\"classifier__colsample_bytree\": 0.7559761731724634,
\"classifier__learning_rate\": 0.0670089893467733,
\"classifier__max_depth\": 7,
\"classifier__min_child_samples\": 29,
\"classifier__n_estimators\": 248,
\"classifier__num_leaves\": 127,
\"classifier__reg_alpha\": 0.2542211800893746,
\"classifier__reg_lambda\": 0.3324837722080793,
\"classifier__subsample\": 0.8959672200807781,
\"over__k_neighbors\": 18,
\"over__sampling_strategy\": 0.42693612969965145
}
}", "{
\"number\": 22,
\"values\": [
0.6587923086641287,
0.6172246451932154
],
\"params\": {
\"classifier__colsample_bytree\": 0.9444801642063763,
\"classifier__learning_rate\": 0.03632498277445621,
\"classifier__max_depth\": 8,
\"classifier__min_child_samples\": 14,
\"classifier__n_estimators\": 235,
\"classifier__num_leaves\": 75,
\"classifier__reg_alpha\": 0.1950395397952936,
\"classifier__reg_lambda\": 0.5410531610446644,
\"classifier__subsample\": 0.9575900967944961,
\"over__k_neighbors\": 11,
\"over__sampling_strategy\": 0.4954705242821451
}
}", "{
\"number\": 24,
\"values\": [
0.663339714943868,
0.6158715021120922
],
\"params\": {
\"classifier__colsample_bytree\": 0.9394940518123692,
\"classifier__learning_rate\": 0.030884543885426448,
\"classifier__max_depth\": 9,
\"classifier__min_child_samples\": 15,
\"classifier__n_estimators\": 242,
\"classifier__num_leaves\": 68,
\"classifier__reg_alpha\": 0.19454271347862542,
\"classifier__reg_lambda\": 0.5145682218654029,
\"classifier__subsample\": 0.9553754706605744,
\"over__k_neighbors\": 11,
\"over__sampling_strategy\": 0.48172053115635893
}
}", "{
\"number\": 26,
\"values\": [
0.6731138231715039,
0.5963481774429714
],
\"params\": {
\"classifier__colsample_bytree\": 0.9973921112983907,
\"classifier__learning_rate\": 0.02480024714179163,
\"classifier__max_depth\": 9,
\"classifier__min_child_samples\": 23,
\"classifier__n_estimators\": 483,
\"classifier__num_leaves\": 66,
\"classifier__reg_alpha\": 0.3297322858463567,
\"classifier__reg_lambda\": 0.44279110256514215,
\"classifier__subsample\": 0.9636336625539972,
\"over__k_neighbors\": 11,
\"over__sampling_strategy\": 0.35146593969061785
}
}", "{
\"number\": 27,
\"values\": [
0.6748442249568417,
0.5951677194666252
],
\"params\": {
\"classifier__colsample_bytree\": 0.6029679609870878,
\"classifier__learning_rate\": 0.032082575420636736,
\"classifier__max_depth\": 10,
\"classifier__min_child_samples\": 24,
\"classifier__n_estimators\": 496,
\"classifier__num_leaves\": 97,
\"classifier__reg_alpha\": 0.17938860811200935,
\"classifier__reg_lambda\": 0.44451618736407306,
\"classifier__subsample\": 0.999858146240725,
\"over__k_neighbors\": 5,
\"over__sampling_strategy\": 0.3532223856999658
}
}", "{
\"number\": 28,
\"values\": [
0.6707629021981326,
0.6064782569547265
],
\"params\": {
\"classifier__colsample_bytree\": 0.9643081588142669,
\"classifier__learning_rate\": 0.024890145248810423,
\"classifier__max_depth\": 8,
\"classifier__min_child_samples\": 23,
\"classifier__n_estimators\": 485,
\"classifier__num_leaves\": 63,
\"classifier__reg_alpha\": 0.33467016117964643,
\"classifier__reg_lambda\": 0.28066668481890683,
\"classifier__subsample\": 0.9617530274541308,
\"over__k_neighbors\": 4,
\"over__sampling_strategy\": 0.3883638288395505
}
}", "{
\"number\": 29,
\"values\": [
0.6725516595604328,
0.6063189073346005
],
\"params\": {
\"classifier__colsample_bytree\": 0.6053047106607662,
\"classifier__learning_rate\": 0.028056880977932984,
\"classifier__max_depth\": 8,
\"classifier__min_child_samples\": 34,
\"classifier__n_estimators\": 517,
\"classifier__num_leaves\": 90,
\"classifier__reg_alpha\": 0.20049320363081555,
\"classifier__reg_lambda\": 0.10380040489027925,
\"classifier__subsample\": 0.969042533171844,
\"over__k_neighbors\": 3,
\"over__sampling_strategy\": 0.39352742674680213
}
}" ], "type": "scatter", "x": [ 0.644061341384452, 0.6679526578331464, 0.6587923086641287, 0.663339714943868, 0.6731138231715039, 0.6748442249568417, 0.6707629021981326, 0.6725516595604328 ], "y": [ 0.618076921917051, 0.6095917667700379, 0.6172246451932154, 0.6158715021120922, 0.5963481774429714, 0.5951677194666252, 0.6064782569547265, 0.6063189073346005 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Pareto-front Plot" }, "xaxis": { "title": { "text": "Mean PR AUC" } }, "yaxis": { "title": { "text": "Mean F1" } } } } }, "metadata": {}, "output_type": "display_data" } ], "source": [ "lgbm_study = optuna.load_study(study_name=\"lgbm_study\", storage=\"sqlite:///my_study.db\")\n", "optuna.visualization.plot_pareto_front(lgbm_study, target_names=[\"Mean PR AUC\", \"Mean F1\"])" ] }, { "cell_type": "code", "execution_count": 85, "id": "fdb19cd0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'classifier__colsample_bytree': 0.9444801642063763, 'classifier__learning_rate': 0.03632498277445621, 'classifier__max_depth': 8, 'classifier__min_child_samples': 14, 'classifier__n_estimators': 235, 'classifier__num_leaves': 75, 'classifier__reg_alpha': 0.1950395397952936, 'classifier__reg_lambda': 0.5410531610446644, 'classifier__subsample': 0.9575900967944961, 'over__k_neighbors': 11, 'over__sampling_strategy': 0.4954705242821451}\n", " precision recall f1-score support\n", "\n", " 0 0.87 0.88 0.88 12191\n", " 1 0.61 0.61 0.61 3893\n", "\n", " accuracy 0.81 16084\n", " macro avg 0.74 0.74 0.74 16084\n", "weighted avg 0.81 0.81 0.81 16084\n", "\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "0.6096615861534488\n", "0.6589919445727145\n" ] } ], "source": [ "\n", "lgbm_best_params = lgbm_study.trials[22].params\n", "\n", "evaluate_study(pipe_LGBM,lgbm_best_params)\n" ] }, { "attachments": {}, "cell_type": "markdown", "id": "37bcee22", "metadata": {}, "source": [ "#### CatBoost Classifier Model" ] }, { "cell_type": "code", "execution_count": null, "id": "2a7b5838", "metadata": {}, "outputs": [], "source": [ "from catboost import CatBoostClassifier\n", "\n", "\n", " \n", "pipe_CatBoost = imbpipeline(\n", " [\n", "\n", " ('preprocessor',preprocessor),\n", " ('over',smoteNC2),\n", "\n", " ('classifier',CatBoostClassifier(thread_count=-1,silent=True,task_type=\"GPU\"))]\n", " )\n", "\n", "\n", "def cat_space(trial):\n", " return {\n", " 'classifier__iterations': trial.suggest_int('classifier__iterations', 100, 1000),\n", " 'classifier__depth': trial.suggest_int('classifier__depth', 1, 10),\n", " 'classifier__learning_rate': trial.suggest_float('classifier__learning_rate', 0.01, 0.3),\n", " 'classifier__l2_leaf_reg': trial.suggest_float('classifier__l2_leaf_reg', 3.0, 17),\n", " 'classifier__loss_function': trial.suggest_categorical('classifier__loss_function', ['Logloss','CrossEntropy']),\n", " 'classifier__bagging_temperature': trial.suggest_float('classifier__bagging_temperature', 0, 1),\n", " 'classifier__bootstrap_type': trial.suggest_categorical('classifier__bootstrap_type', ['Bayesian']),\n", " 'over__sampling_strategy': trial.suggest_float(\"over__sampling_strategy\", 0.35, 1.00),\n", " 'over__k_neighbors': trial.suggest_int('over__k_neighbors', 1, 30)\n", " }\n", "\n", "cat_func = lambda trial: objective(trial,pipe_CatBoost,cat_space(trial),n_jobs=1)\n", "\n", "\n", "cat_study = optuna.create_study(directions=['maximize','maximize'],pruner=MedianPruner(),study_name='catboost_study',storage='sqlite:///my_study.db',sampler = TPESampler(seed=100))\n", "cat_study.optimize(cat_func, n_trials=30,n_jobs=1)\n" ] }, { "cell_type": "code", "execution_count": 19, "id": "a2e122ed", "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "hovertemplate": "%{text}Trial", "marker": { "color": [ 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 24, 25, 27, 28, 29 ], "colorbar": { "title": { "text": "Trial" } }, "colorscale": [ [ 0, "rgb(247,251,255)" ], [ 0.125, "rgb(222,235,247)" ], [ 0.25, "rgb(198,219,239)" ], [ 0.375, "rgb(158,202,225)" ], [ 0.5, "rgb(107,174,214)" ], [ 0.625, "rgb(66,146,198)" ], [ 0.75, "rgb(33,113,181)" ], [ 0.875, "rgb(8,81,156)" ], [ 1, "rgb(8,48,107)" ] ], "line": { "color": "Grey", "width": 0.5 } }, "mode": "markers", "showlegend": false, "text": [ "{
\"number\": 0,
\"values\": [
0.6372193139352654,
0.620093055675489
],
\"params\": {
\"classifier__bagging_temperature\": 0.6707490847267786,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 3,
\"classifier__iterations\": 589,
\"classifier__l2_leaf_reg\": 14.826865852478651,
\"classifier__learning_rate\": 0.1331101013172486,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 5,
\"over__sampling_strategy\": 0.8868042908182809
}
}", "{
\"number\": 3,
\"values\": [
0.6575427980381697,
0.5902805291782084
],
\"params\": {
\"classifier__bagging_temperature\": 0.38194344494311006,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 8,
\"classifier__iterations\": 327,
\"classifier__l2_leaf_reg\": 11.383807276998901,
\"classifier__learning_rate\": 0.014423941661438313,
\"classifier__loss_function\": \"Logloss\",
\"over__k_neighbors\": 27,
\"over__sampling_strategy\": 0.37370943678516977
}
}", "{
\"number\": 4,
\"values\": [
0.6381240964177318,
0.5648889821268726
],
\"params\": {
\"classifier__bagging_temperature\": 0.5818421923987779,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 1,
\"classifier__iterations\": 983,
\"classifier__l2_leaf_reg\": 11.07662099160046,
\"classifier__learning_rate\": 0.2682583239712662,
\"classifier__loss_function\": \"Logloss\",
\"over__k_neighbors\": 7,
\"over__sampling_strategy\": 0.36328543581750006
}
}", "{
\"number\": 5,
\"values\": [
0.6611581469842929,
0.6201903716043509
],
\"params\": {
\"classifier__bagging_temperature\": 0.8848532934911055,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 8,
\"classifier__iterations\": 590,
\"classifier__l2_leaf_reg\": 7.002539665696106,
\"classifier__learning_rate\": 0.08270161645013481,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 18,
\"over__sampling_strategy\": 0.5836800985589865
}
}", "{
\"number\": 6,
\"values\": [
0.6428961846729273,
0.6194567064656205
],
\"params\": {
\"classifier__bagging_temperature\": 0.376252454297363,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 4,
\"classifier__iterations\": 419,
\"classifier__l2_leaf_reg\": 6.327718920736706,
\"classifier__learning_rate\": 0.06164348695668341,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 19,
\"over__sampling_strategy\": 0.7353235106343263
}
}", "{
\"number\": 7,
\"values\": [
0.6447732222702529,
0.6004771535288781
],
\"params\": {
\"classifier__bagging_temperature\": 0.20434527686864423,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 10,
\"classifier__iterations\": 228,
\"classifier__l2_leaf_reg\": 11.43215320823212,
\"classifier__learning_rate\": 0.28445016543463936,
\"classifier__loss_function\": \"Logloss\",
\"over__k_neighbors\": 8,
\"over__sampling_strategy\": 0.5298972899076179
}
}", "{
\"number\": 8,
\"values\": [
0.6388699470253478,
0.6002452833216091
],
\"params\": {
\"classifier__bagging_temperature\": 0.09205560337723862,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 10,
\"classifier__iterations\": 256,
\"classifier__l2_leaf_reg\": 11.371631580604891,
\"classifier__learning_rate\": 0.28753365410231146,
\"classifier__loss_function\": \"Logloss\",
\"over__k_neighbors\": 16,
\"over__sampling_strategy\": 0.651273712309146
}
}", "{
\"number\": 9,
\"values\": [
0.6567761324881193,
0.618368666467077
],
\"params\": {
\"classifier__bagging_temperature\": 0.7543489945823536,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 6,
\"classifier__iterations\": 179,
\"classifier__l2_leaf_reg\": 8.530503044615214,
\"classifier__learning_rate\": 0.2977258305880532,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 20,
\"over__sampling_strategy\": 0.5534931870325313
}
}", "{
\"number\": 10,
\"values\": [
0.6396117049502414,
0.6057137545740187
],
\"params\": {
\"classifier__bagging_temperature\": 0.9915240636830094,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 7,
\"classifier__iterations\": 762,
\"classifier__l2_leaf_reg\": 3.975285286881059,
\"classifier__learning_rate\": 0.215582574630618,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 30,
\"over__sampling_strategy\": 0.7555589976885287
}
}", "{
\"number\": 11,
\"values\": [
0.6489385721085841,
0.6218906017623407
],
\"params\": {
\"classifier__bagging_temperature\": 0.9984437003073823,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 8,
\"classifier__iterations\": 562,
\"classifier__l2_leaf_reg\": 3.1348205728505256,
\"classifier__learning_rate\": 0.07919214342005734,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 12,
\"over__sampling_strategy\": 0.9964286097717713
}
}", "{
\"number\": 12,
\"values\": [
0.6623677084875563,
0.6078036510543606
],
\"params\": {
\"classifier__bagging_temperature\": 0.8150636289316929,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 9,
\"classifier__iterations\": 716,
\"classifier__l2_leaf_reg\": 7.4902011187560005,
\"classifier__learning_rate\": 0.0845515926496103,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 25,
\"over__sampling_strategy\": 0.4820378419913067
}
}", "{
\"number\": 13,
\"values\": [
0.6512443989230502,
0.6188393862092557
],
\"params\": {
\"classifier__bagging_temperature\": 0.8450040159186086,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 6,
\"classifier__iterations\": 456,
\"classifier__l2_leaf_reg\": 5.405969890766709,
\"classifier__learning_rate\": 0.16186098171020508,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 13,
\"over__sampling_strategy\": 0.7102253365867022
}
}", "{
\"number\": 15,
\"values\": [
0.641296403583347,
0.6164033777698541
],
\"params\": {
\"classifier__bagging_temperature\": 0.5680134101638298,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 5,
\"classifier__iterations\": 893,
\"classifier__l2_leaf_reg\": 9.231149082536739,
\"classifier__learning_rate\": 0.010004610961700494,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 21,
\"over__sampling_strategy\": 0.6440612778138175
}
}", "{
\"number\": 16,
\"values\": [
0.6658500186062489,
0.6099764883337795
],
\"params\": {
\"classifier__bagging_temperature\": 0.6976402133882517,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 7,
\"classifier__iterations\": 697,
\"classifier__l2_leaf_reg\": 7.5956351205291135,
\"classifier__learning_rate\": 0.108764473821013,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 24,
\"over__sampling_strategy\": 0.4370311041912889
}
}", "{
\"number\": 17,
\"values\": [
0.6637500523722162,
0.6058115630594205
],
\"params\": {
\"classifier__bagging_temperature\": 0.7068512335897372,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 7,
\"classifier__iterations\": 474,
\"classifier__l2_leaf_reg\": 4.4538318038745235,
\"classifier__learning_rate\": 0.17022593368552327,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 15,
\"over__sampling_strategy\": 0.44041493066506293
}
}", "{
\"number\": 18,
\"values\": [
0.6562446450396104,
0.6183833758601154
],
\"params\": {
\"classifier__bagging_temperature\": 0.8548604601577593,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 5,
\"classifier__iterations\": 760,
\"classifier__l2_leaf_reg\": 6.900433756417542,
\"classifier__learning_rate\": 0.1193744096547058,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 24,
\"over__sampling_strategy\": 0.6162657608281761
}
}", "{
\"number\": 19,
\"values\": [
0.633931652472807,
0.595875651427753
],
\"params\": {
\"classifier__bagging_temperature\": 0.8765625269097432,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 1,
\"classifier__iterations\": 362,
\"classifier__l2_leaf_reg\": 9.833002257344305,
\"classifier__learning_rate\": 0.10450782903195371,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 18,
\"over__sampling_strategy\": 0.4732332384704713
}
}", "{
\"number\": 20,
\"values\": [
0.6599319532829758,
0.6193920144183575
],
\"params\": {
\"classifier__bagging_temperature\": 0.6691018280119739,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 7,
\"classifier__iterations\": 515,
\"classifier__l2_leaf_reg\": 7.221703083312235,
\"classifier__learning_rate\": 0.04362351574635248,
\"classifier__loss_function\": \"Logloss\",
\"over__k_neighbors\": 30,
\"over__sampling_strategy\": 0.5733366480944478
}
}", "{
\"number\": 24,
\"values\": [
0.6696504312893732,
0.6065122932528354
],
\"params\": {
\"classifier__bagging_temperature\": 0.51653553589431,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 10,
\"classifier__iterations\": 852,
\"classifier__l2_leaf_reg\": 8.47069272498055,
\"classifier__learning_rate\": 0.03891585400575964,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 27,
\"over__sampling_strategy\": 0.40920949898805703
}
}", "{
\"number\": 25,
\"values\": [
0.664650145636385,
0.6177823241917246
],
\"params\": {
\"classifier__bagging_temperature\": 0.4300999921535704,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 8,
\"classifier__iterations\": 663,
\"classifier__l2_leaf_reg\": 10.242541283787418,
\"classifier__learning_rate\": 0.02697750608441178,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 27,
\"over__sampling_strategy\": 0.49709493307236585
}
}", "{
\"number\": 27,
\"values\": [
0.6713093944539977,
0.5961718852689115
],
\"params\": {
\"classifier__bagging_temperature\": 0.31374205367346913,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 9,
\"classifier__iterations\": 939,
\"classifier__l2_leaf_reg\": 5.747039912459049,
\"classifier__learning_rate\": 0.04935004974456386,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 15,
\"over__sampling_strategy\": 0.3547328567232082
}
}", "{
\"number\": 28,
\"values\": [
0.6632011863700182,
0.6015055189966374
],
\"params\": {
\"classifier__bagging_temperature\": 0.6161250868029889,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 10,
\"classifier__iterations\": 757,
\"classifier__l2_leaf_reg\": 4.649397974644055,
\"classifier__learning_rate\": 0.06692720369296869,
\"classifier__loss_function\": \"Logloss\",
\"over__k_neighbors\": 21,
\"over__sampling_strategy\": 0.40719921393953895
}
}", "{
\"number\": 29,
\"values\": [
0.6630232613430695,
0.6180352555226188
],
\"params\": {
\"classifier__bagging_temperature\": 0.49214852884270216,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 9,
\"classifier__iterations\": 806,
\"classifier__l2_leaf_reg\": 12.24970467704765,
\"classifier__learning_rate\": 0.020596973847678335,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 26,
\"over__sampling_strategy\": 0.5231204767894236
}
}" ], "type": "scatter", "x": [ 0.6372193139352654, 0.6575427980381697, 0.6381240964177318, 0.6611581469842929, 0.6428961846729273, 0.6447732222702529, 0.6388699470253478, 0.6567761324881193, 0.6396117049502414, 0.6489385721085841, 0.6623677084875563, 0.6512443989230502, 0.641296403583347, 0.6658500186062489, 0.6637500523722162, 0.6562446450396104, 0.633931652472807, 0.6599319532829758, 0.6696504312893732, 0.664650145636385, 0.6713093944539977, 0.6632011863700182, 0.6630232613430695 ], "y": [ 0.620093055675489, 0.5902805291782084, 0.5648889821268726, 0.6201903716043509, 0.6194567064656205, 0.6004771535288781, 0.6002452833216091, 0.618368666467077, 0.6057137545740187, 0.6218906017623407, 0.6078036510543606, 0.6188393862092557, 0.6164033777698541, 0.6099764883337795, 0.6058115630594205, 0.6183833758601154, 0.595875651427753, 0.6193920144183575, 0.6065122932528354, 0.6177823241917246, 0.5961718852689115, 0.6015055189966374, 0.6180352555226188 ] }, { "hovertemplate": "%{text}Best Trial", "marker": { "color": [ 1, 2, 14, 21, 22, 23, 26 ], "colorbar": { "title": { "text": "Best Trial" }, "x": 1.1, "xpad": 40 }, "colorscale": [ [ 0, "rgb(255,245,240)" ], [ 0.125, "rgb(254,224,210)" ], [ 0.25, "rgb(252,187,161)" ], [ 0.375, "rgb(252,146,114)" ], [ 0.5, "rgb(251,106,74)" ], [ 0.625, "rgb(239,59,44)" ], [ 0.75, "rgb(203,24,29)" ], [ 0.875, "rgb(165,15,21)" ], [ 1, "rgb(103,0,13)" ] ], "line": { "color": "Grey", "width": 0.5 } }, "mode": "markers", "showlegend": false, "text": [ "{
\"number\": 1,
\"values\": [
0.6499444078337409,
0.6226688815315535
],
\"params\": {
\"classifier__bagging_temperature\": 0.9786237847073697,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 9,
\"classifier__iterations\": 618,
\"classifier__l2_leaf_reg\": 5.594595073701051,
\"classifier__learning_rate\": 0.07066861541398498,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 6,
\"over__sampling_strategy\": 0.8775940469080601
}
}", "{
\"number\": 2,
\"values\": [
0.6519056805949188,
0.6208813075599615
],
\"params\": {
\"classifier__bagging_temperature\": 0.17541045374233666,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 3,
\"classifier__iterations\": 835,
\"classifier__l2_leaf_reg\": 16.160417474713245,
\"classifier__learning_rate\": 0.1351942132623053,
\"classifier__loss_function\": \"Logloss\",
\"over__k_neighbors\": 1,
\"over__sampling_strategy\": 0.5923408300884501
}
}", "{
\"number\": 14,
\"values\": [
0.649116091444005,
0.6232189954438947
],
\"params\": {
\"classifier__bagging_temperature\": 0.5476812504333937,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 8,
\"classifier__iterations\": 660,
\"classifier__l2_leaf_reg\": 8.196322052582264,
\"classifier__learning_rate\": 0.020822392654677994,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 22,
\"over__sampling_strategy\": 0.799756620482692
}
}", "{
\"number\": 21,
\"values\": [
0.6697576900596074,
0.6157760713140774
],
\"params\": {
\"classifier__bagging_temperature\": 0.5493942540864682,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 8,
\"classifier__iterations\": 675,
\"classifier__l2_leaf_reg\": 8.17458323414613,
\"classifier__learning_rate\": 0.03933887055125068,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 23,
\"over__sampling_strategy\": 0.4493776726423679
}
}", "{
\"number\": 22,
\"values\": [
0.6651752616339168,
0.6202762500813138
],
\"params\": {
\"classifier__bagging_temperature\": 0.44283997155438665,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 9,
\"classifier__iterations\": 640,
\"classifier__l2_leaf_reg\": 8.75337551959668,
\"classifier__learning_rate\": 0.04025049905197784,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 22,
\"over__sampling_strategy\": 0.5129058805845305
}
}", "{
\"number\": 23,
\"values\": [
0.6650023331732944,
0.6203855398698063
],
\"params\": {
\"classifier__bagging_temperature\": 0.4519142804553712,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 9,
\"classifier__iterations\": 660,
\"classifier__l2_leaf_reg\": 8.583158000065652,
\"classifier__learning_rate\": 0.03926895529208242,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 22,
\"over__sampling_strategy\": 0.5154215629511536
}
}", "{
\"number\": 26,
\"values\": [
0.6727803318761914,
0.5965936207481224
],
\"params\": {
\"classifier__bagging_temperature\": 0.5885852898255278,
\"classifier__bootstrap_type\": \"Bayesian\",
\"classifier__depth\": 9,
\"classifier__iterations\": 772,
\"classifier__l2_leaf_reg\": 5.729692575897893,
\"classifier__learning_rate\": 0.05227674034319018,
\"classifier__loss_function\": \"CrossEntropy\",
\"over__k_neighbors\": 22,
\"over__sampling_strategy\": 0.3502332783291935
}
}" ], "type": "scatter", "x": [ 0.6499444078337409, 0.6519056805949188, 0.649116091444005, 0.6697576900596074, 0.6651752616339168, 0.6650023331732944, 0.6727803318761914 ], "y": [ 0.6226688815315535, 0.6208813075599615, 0.6232189954438947, 0.6157760713140774, 0.6202762500813138, 0.6203855398698063, 0.5965936207481224 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Pareto-front Plot" }, "xaxis": { "title": { "text": "Mean PR AUC" } }, "yaxis": { "title": { "text": "Mean F1" } } } } }, "metadata": {}, "output_type": "display_data" } ], "source": [ "cat_study = optuna.load_study(study_name=\"catboost_study\", storage=\"sqlite:///my_study.db\")\n", "optuna.visualization.plot_pareto_front(cat_study, target_names=[\"Mean PR AUC\", \"Mean F1\"])" ] }, { "cell_type": "code", "execution_count": 20, "id": "addeaf5e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'classifier__bagging_temperature': 0.44283997155438665, 'classifier__bootstrap_type': 'Bayesian', 'classifier__depth': 9, 'classifier__iterations': 640, 'classifier__l2_leaf_reg': 8.75337551959668, 'classifier__learning_rate': 0.04025049905197784, 'classifier__loss_function': 'CrossEntropy', 'over__k_neighbors': 22, 'over__sampling_strategy': 0.5129058805845305}\n", " precision recall f1-score support\n", "\n", " 0 0.88 0.88 0.88 12191\n", " 1 0.62 0.61 0.61 3893\n", "\n", " accuracy 0.82 16084\n", " macro avg 0.75 0.74 0.75 16084\n", "weighted avg 0.81 0.82 0.82 16084\n", "\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "0.614382490880667\n", "0.665956929903449\n" ] } ], "source": [ "from catboost import CatBoostClassifier\n", "\n", "pipe_CatBoost = imbpipeline(\n", " [\n", "\n", " ('preprocessor',preprocessor),\n", " ('over',smoteNC2),\n", "\n", " ('classifier',CatBoostClassifier(thread_count=-1,silent=True,task_type=\"GPU\"))]\n", " )\n", "cat_best_params = cat_study.trials[22].params\n", "evaluate_study(pipe_CatBoost,cat_best_params)\n" ] }, { "attachments": {}, "cell_type": "markdown", "id": "86442ca8", "metadata": {}, "source": [ "### Cross validate the Tuned Models " ] }, { "cell_type": "code", "execution_count": 87, "id": "912ebbe5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Average precision: 0.629 (+/- 0.004)\n", "F1 score: 0.591 (+/- 0.005)\n", "Balanced accuracy: 0.730 (+/- 0.004)\n", "Average precision: 0.658 (+/- 0.003)\n", "F1 score: 0.617 (+/- 0.003)\n", "Balanced accuracy: 0.748 (+/- 0.003)\n", "Average precision: 0.664 (+/- 0.003)\n", "F1 score: 0.617 (+/- 0.004)\n", "Balanced accuracy: 0.747 (+/- 0.003)\n", "Average precision: 0.670 (+/- 0.004)\n", "F1 score: 0.620 (+/- 0.003)\n", "Balanced accuracy: 0.748 (+/- 0.002)\n" ] } ], "source": [ "\n", "\n", "pipe_SVC = imbpipeline(\n", " [ \n", " ('preprocessor', preprocessor),\n", "\n", " ('over',smoteNC2),\n", "\n", " ('classifier',LinearSVC(dual=False))\n", " ]\n", " )\n", "pipe_SVC.set_params(**svc_best_params)\n", "\n", "\n", "pipe_RFC = imbpipeline(\n", " [('preprocessor',preprocessor),\n", " ('over',smoteNC2),\n", " ('classifier',RandomForestClassifier(n_jobs=4))\n", " ]\n", " )\n", "pipe_RFC.set_params(**rfc_best_params)\n", "\n", "pipe_LGBM = imbpipeline(\n", " [\n", " ('preprocessor',preprocessor),\n", " ('over',smoteNC2),\n", " ('classifier',LGBMClassifier(n_jobs=4,objective='binary'))]\n", " )\n", "pipe_LGBM.set_params(**lgbm_best_params)\n", "\n", "catboost_model = CatBoostClassifier(thread_count=-1,silent=True,task_type=\"GPU\")\n", "pipe_CatBoost = imbpipeline(\n", " [\n", "\n", " ('preprocessor',preprocessor),\n", " ('over',smoteNC2),\n", "\n", " ('classifier',catboost_model)]\n", " )\n", "pipe_CatBoost.set_params(**cat_best_params)\n", "\n", "\n", "models = [\n", " ('svc', pipe_SVC),\n", " ('rfc', pipe_RFC),\n", " ('lgbm', pipe_LGBM),\n", " ('catboost', pipe_CatBoost)\n", "]\n", "\n", "# initialize a list to store the results for each model\n", "results = []\n", "\n", "# iterate over the list of models\n", "for name, model in models:\n", " # evaluate the model using the eval_base_models function\n", " df = eval_base_models(5, model, X, y)\n", "\n", " # add the model name to the DataFrame\n", " df['model'] = name\n", "\n", " # add the DataFrame to the list of results\n", " results.append(df)\n", "\n", "df_final_combined = pd.concat(results, ignore_index=True)\n", "\n" ] }, { "cell_type": "code", "execution_count": 88, "id": "ea315ff5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanstd
scoreaverage_precisionbalanced_accuracyf1_scoreaverage_precisionbalanced_accuracyf1_score
model
catboost0.6698960.7482310.6196650.0035000.0024220.003336
lgbm0.6639570.7471430.6170570.0029850.0032400.004117
rfc0.6579940.7477120.6170410.0029140.0026340.002895
svc0.6286260.7303330.5910880.0044570.0037620.005207
\n", "
" ], "text/plain": [ " mean std \\\n", "score average_precision balanced_accuracy f1_score average_precision \n", "model \n", "catboost 0.669896 0.748231 0.619665 0.003500 \n", "lgbm 0.663957 0.747143 0.617057 0.002985 \n", "rfc 0.657994 0.747712 0.617041 0.002914 \n", "svc 0.628626 0.730333 0.591088 0.004457 \n", "\n", " \n", "score balanced_accuracy f1_score \n", "model \n", "catboost 0.002422 0.003336 \n", "lgbm 0.003240 0.004117 \n", "rfc 0.002634 0.002895 \n", "svc 0.003762 0.005207 " ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_final_pivot = df_final_combined.pivot(index='model', columns='score')\n", "\n", "# display the pivoted DataFrame\n", "df_final_pivot" ] }, { "attachments": {}, "cell_type": "markdown", "id": "20f8c70f", "metadata": {}, "source": [ "### Chosen Model: CatBoost Classifier" ] }, { "attachments": {}, "cell_type": "markdown", "id": "cc98d9aa", "metadata": {}, "source": [ "#### Feature Importance" ] }, { "cell_type": "code", "execution_count": 89, "id": "6b5f9c05", "metadata": {}, "outputs": [], "source": [ "def plot_feature_importance(model,X,y):\n", "# Create a StratifiedKFold object\n", " cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", "\n", " # Initialize an empty list to store the feature importances for each fold\n", " feature_importances = []\n", "\n", " # Loop over each fold\n", " for train_index, test_index in cv.split(X, y):\n", " # Split the data into training and validation sets\n", " X_train_cv, X_val_cv = X.iloc[train_index], X.iloc[test_index]\n", " y_train_cv, y_val_cv = y.iloc[train_index], y.iloc[test_index]\n", " \n", " # Fit the pipeline on the training data\n", " model.fit(X_train_cv, y_train_cv)\n", " \n", " # Access the fitted CatBoost model from the pipeline\n", " catboost_model = model.named_steps['classifier']\n", " \n", " # Calculate the feature importances\n", " fi = catboost_model.get_feature_importance()\n", " \n", " # Append the feature importances to the list\n", " feature_importances.append(fi)\n", "\n", " # Calculate the average feature importance for each feature\n", " feature_importances = pd.DataFrame(feature_importances, columns=X.columns)\n", " avg_feature_importances = feature_importances.mean(axis=0)\n", "\n", " # Create a DataFrame with average feature importances and feature names\n", " fi_df = pd.DataFrame({'feature_importance': avg_feature_importances,\n", " 'feature_names': X.columns})\n", "\n", " # Sort the DataFrame by average feature importance\n", " fi_df.sort_values(by=['feature_importance'], ascending=False, inplace=True)\n", "\n", " # Plot Seaborn bar chart\n", " plt.figure(figsize=(10, 8))\n", " sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])\n", "\n", " # Add chart labels\n", " plt.title('AVERAGE CATBOOST FEATURE IMPORTANCE ACROSS 5 FOLDS')\n", " plt.xlabel('FEATURE IMPORTANCE')\n", " plt.ylabel('FEATURE NAMES')" ] }, { "cell_type": "code", "execution_count": 90, "id": "b0b91b8a", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_feature_importance(pipe_CatBoost,X,y)" ] }, { "attachments": {}, "cell_type": "markdown", "id": "e2780c6e", "metadata": {}, "source": [ "### Saving the best model" ] }, { "cell_type": "code", "execution_count": 21, "id": "b88f2d45", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['final_model.pkl']" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import joblib\n", "pipe_final_CatBoost = imbpipeline(\n", " [\n", "\n", " ('preprocessor',preprocessor),\n", " ('over',smoteNC2),\n", "\n", " ('classifier',CatBoostClassifier(thread_count=-1,silent=True,task_type=\"GPU\"))]\n", " )\n", "pipe_final_CatBoost.set_params(**cat_best_params)\n", "\n", "pipe_final_CatBoost.fit(X,y)\n", "\n", "joblib.dump(pipe_final_CatBoost,'final_model.pkl')\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 5 }