finalize
Browse files- Exploratory Data Analysis.ipynb +0 -0
- Model Building.ipynb +91 -7
Exploratory Data Analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
Model Building.ipynb
CHANGED
@@ -1068,7 +1068,7 @@
|
|
1068 |
},
|
1069 |
{
|
1070 |
"cell_type": "code",
|
1071 |
-
"execution_count":
|
1072 |
"id": "d66743ec",
|
1073 |
"metadata": {},
|
1074 |
"outputs": [
|
@@ -1333,7 +1333,7 @@
|
|
1333 |
"[64334 rows x 13 columns]"
|
1334 |
]
|
1335 |
},
|
1336 |
-
"execution_count":
|
1337 |
"metadata": {},
|
1338 |
"output_type": "execute_result"
|
1339 |
}
|
@@ -1348,7 +1348,7 @@
|
|
1348 |
},
|
1349 |
{
|
1350 |
"cell_type": "code",
|
1351 |
-
"execution_count":
|
1352 |
"id": "4797c378",
|
1353 |
"metadata": {},
|
1354 |
"outputs": [],
|
@@ -1364,7 +1364,7 @@
|
|
1364 |
},
|
1365 |
{
|
1366 |
"cell_type": "code",
|
1367 |
-
"execution_count":
|
1368 |
"id": "6c8a6fd9",
|
1369 |
"metadata": {},
|
1370 |
"outputs": [],
|
@@ -1405,7 +1405,7 @@
|
|
1405 |
},
|
1406 |
{
|
1407 |
"cell_type": "code",
|
1408 |
-
"execution_count":
|
1409 |
"id": "077ca8bd",
|
1410 |
"metadata": {},
|
1411 |
"outputs": [
|
@@ -1506,7 +1506,7 @@
|
|
1506 |
},
|
1507 |
{
|
1508 |
"cell_type": "code",
|
1509 |
-
"execution_count":
|
1510 |
"id": "e4c87875",
|
1511 |
"metadata": {},
|
1512 |
"outputs": [
|
@@ -1627,7 +1627,7 @@
|
|
1627 |
"oversampling2 0.004290 0.004862 0.006231 "
|
1628 |
]
|
1629 |
},
|
1630 |
-
"execution_count":
|
1631 |
"metadata": {},
|
1632 |
"output_type": "execute_result"
|
1633 |
}
|
@@ -1639,6 +1639,81 @@
|
|
1639 |
"df_pivot"
|
1640 |
]
|
1641 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1642 |
{
|
1643 |
"attachments": {},
|
1644 |
"cell_type": "markdown",
|
@@ -5437,6 +5512,15 @@
|
|
5437 |
"evaluate_study(pipe_LGBM,lgbm_best_params)\n"
|
5438 |
]
|
5439 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5440 |
{
|
5441 |
"cell_type": "code",
|
5442 |
"execution_count": null,
|
|
|
1068 |
},
|
1069 |
{
|
1070 |
"cell_type": "code",
|
1071 |
+
"execution_count": 9,
|
1072 |
"id": "d66743ec",
|
1073 |
"metadata": {},
|
1074 |
"outputs": [
|
|
|
1333 |
"[64334 rows x 13 columns]"
|
1334 |
]
|
1335 |
},
|
1336 |
+
"execution_count": 9,
|
1337 |
"metadata": {},
|
1338 |
"output_type": "execute_result"
|
1339 |
}
|
|
|
1348 |
},
|
1349 |
{
|
1350 |
"cell_type": "code",
|
1351 |
+
"execution_count": 10,
|
1352 |
"id": "4797c378",
|
1353 |
"metadata": {},
|
1354 |
"outputs": [],
|
|
|
1364 |
},
|
1365 |
{
|
1366 |
"cell_type": "code",
|
1367 |
+
"execution_count": 11,
|
1368 |
"id": "6c8a6fd9",
|
1369 |
"metadata": {},
|
1370 |
"outputs": [],
|
|
|
1405 |
},
|
1406 |
{
|
1407 |
"cell_type": "code",
|
1408 |
+
"execution_count": 12,
|
1409 |
"id": "077ca8bd",
|
1410 |
"metadata": {},
|
1411 |
"outputs": [
|
|
|
1506 |
},
|
1507 |
{
|
1508 |
"cell_type": "code",
|
1509 |
+
"execution_count": 13,
|
1510 |
"id": "e4c87875",
|
1511 |
"metadata": {},
|
1512 |
"outputs": [
|
|
|
1627 |
"oversampling2 0.004290 0.004862 0.006231 "
|
1628 |
]
|
1629 |
},
|
1630 |
+
"execution_count": 13,
|
1631 |
"metadata": {},
|
1632 |
"output_type": "execute_result"
|
1633 |
}
|
|
|
1639 |
"df_pivot"
|
1640 |
]
|
1641 |
},
|
1642 |
+
{
|
1643 |
+
"cell_type": "code",
|
1644 |
+
"execution_count": 14,
|
1645 |
+
"id": "1317e9df",
|
1646 |
+
"metadata": {},
|
1647 |
+
"outputs": [
|
1648 |
+
{
|
1649 |
+
"data": {
|
1650 |
+
"text/html": [
|
1651 |
+
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
|
1652 |
+
" ColumnTransformer(remainder='passthrough',\n",
|
1653 |
+
" transformers=[('num',\n",
|
1654 |
+
" Pipeline(steps=[('scaler',\n",
|
1655 |
+
" MinMaxScaler())]),\n",
|
1656 |
+
" ['loudness', 'duration_ms',\n",
|
1657 |
+
" 'tempo']),\n",
|
1658 |
+
" ('cat',\n",
|
1659 |
+
" Pipeline(steps=[('encoder',\n",
|
1660 |
+
" TargetEncoder())]),\n",
|
1661 |
+
" ['track_genre', 'key',\n",
|
1662 |
+
" 'time_signature'])])),\n",
|
1663 |
+
" ('over',\n",
|
1664 |
+
" SMOTENC(categorical_features=[3, 4, 5, 6, 9], n_jobs=4,\n",
|
1665 |
+
" random_state=42)),\n",
|
1666 |
+
" ('classifier', LinearSVC(dual=False))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('preprocessor',\n",
|
1667 |
+
" ColumnTransformer(remainder='passthrough',\n",
|
1668 |
+
" transformers=[('num',\n",
|
1669 |
+
" Pipeline(steps=[('scaler',\n",
|
1670 |
+
" MinMaxScaler())]),\n",
|
1671 |
+
" ['loudness', 'duration_ms',\n",
|
1672 |
+
" 'tempo']),\n",
|
1673 |
+
" ('cat',\n",
|
1674 |
+
" Pipeline(steps=[('encoder',\n",
|
1675 |
+
" TargetEncoder())]),\n",
|
1676 |
+
" ['track_genre', 'key',\n",
|
1677 |
+
" 'time_signature'])])),\n",
|
1678 |
+
" ('over',\n",
|
1679 |
+
" SMOTENC(categorical_features=[3, 4, 5, 6, 9], n_jobs=4,\n",
|
1680 |
+
" random_state=42)),\n",
|
1681 |
+
" ('classifier', LinearSVC(dual=False))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">preprocessor: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(remainder='passthrough',\n",
|
1682 |
+
" transformers=[('num',\n",
|
1683 |
+
" Pipeline(steps=[('scaler', MinMaxScaler())]),\n",
|
1684 |
+
" ['loudness', 'duration_ms', 'tempo']),\n",
|
1685 |
+
" ('cat',\n",
|
1686 |
+
" Pipeline(steps=[('encoder', TargetEncoder())]),\n",
|
1687 |
+
" ['track_genre', 'key', 'time_signature'])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">num</label><div class=\"sk-toggleable__content\"><pre>['loudness', 'duration_ms', 'tempo']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MinMaxScaler</label><div class=\"sk-toggleable__content\"><pre>MinMaxScaler()</pre></div></div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">cat</label><div class=\"sk-toggleable__content\"><pre>['track_genre', 'key', 'time_signature']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">TargetEncoder</label><div class=\"sk-toggleable__content\"><pre>TargetEncoder()</pre></div></div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">remainder</label><div class=\"sk-toggleable__content\"><pre>['explicit', 'danceability', 'energy', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence']</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">passthrough</label><div class=\"sk-toggleable__content\"><pre>passthrough</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SMOTENC</label><div class=\"sk-toggleable__content\"><pre>SMOTENC(categorical_features=[3, 4, 5, 6, 9], n_jobs=4, random_state=42)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearSVC</label><div class=\"sk-toggleable__content\"><pre>LinearSVC(dual=False)</pre></div></div></div></div></div></div></div>"
|
1688 |
+
],
|
1689 |
+
"text/plain": [
|
1690 |
+
"Pipeline(steps=[('preprocessor',\n",
|
1691 |
+
" ColumnTransformer(remainder='passthrough',\n",
|
1692 |
+
" transformers=[('num',\n",
|
1693 |
+
" Pipeline(steps=[('scaler',\n",
|
1694 |
+
" MinMaxScaler())]),\n",
|
1695 |
+
" ['loudness', 'duration_ms',\n",
|
1696 |
+
" 'tempo']),\n",
|
1697 |
+
" ('cat',\n",
|
1698 |
+
" Pipeline(steps=[('encoder',\n",
|
1699 |
+
" TargetEncoder())]),\n",
|
1700 |
+
" ['track_genre', 'key',\n",
|
1701 |
+
" 'time_signature'])])),\n",
|
1702 |
+
" ('over',\n",
|
1703 |
+
" SMOTENC(categorical_features=[3, 4, 5, 6, 9], n_jobs=4,\n",
|
1704 |
+
" random_state=42)),\n",
|
1705 |
+
" ('classifier', LinearSVC(dual=False))])"
|
1706 |
+
]
|
1707 |
+
},
|
1708 |
+
"execution_count": 14,
|
1709 |
+
"metadata": {},
|
1710 |
+
"output_type": "execute_result"
|
1711 |
+
}
|
1712 |
+
],
|
1713 |
+
"source": [
|
1714 |
+
"oversampling2_SVC"
|
1715 |
+
]
|
1716 |
+
},
|
1717 |
{
|
1718 |
"attachments": {},
|
1719 |
"cell_type": "markdown",
|
|
|
5512 |
"evaluate_study(pipe_LGBM,lgbm_best_params)\n"
|
5513 |
]
|
5514 |
},
|
5515 |
+
{
|
5516 |
+
"attachments": {},
|
5517 |
+
"cell_type": "markdown",
|
5518 |
+
"id": "37bcee22",
|
5519 |
+
"metadata": {},
|
5520 |
+
"source": [
|
5521 |
+
"#### CatBoost Classifier Model"
|
5522 |
+
]
|
5523 |
+
},
|
5524 |
{
|
5525 |
"cell_type": "code",
|
5526 |
"execution_count": null,
|