diivien commited on
Commit
18ad38b
·
1 Parent(s): b208fb5
Exploratory Data Analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
Model Building.ipynb CHANGED
@@ -1068,7 +1068,7 @@
1068
  },
1069
  {
1070
  "cell_type": "code",
1071
- "execution_count": 22,
1072
  "id": "d66743ec",
1073
  "metadata": {},
1074
  "outputs": [
@@ -1333,7 +1333,7 @@
1333
  "[64334 rows x 13 columns]"
1334
  ]
1335
  },
1336
- "execution_count": 22,
1337
  "metadata": {},
1338
  "output_type": "execute_result"
1339
  }
@@ -1348,7 +1348,7 @@
1348
  },
1349
  {
1350
  "cell_type": "code",
1351
- "execution_count": 17,
1352
  "id": "4797c378",
1353
  "metadata": {},
1354
  "outputs": [],
@@ -1364,7 +1364,7 @@
1364
  },
1365
  {
1366
  "cell_type": "code",
1367
- "execution_count": 18,
1368
  "id": "6c8a6fd9",
1369
  "metadata": {},
1370
  "outputs": [],
@@ -1405,7 +1405,7 @@
1405
  },
1406
  {
1407
  "cell_type": "code",
1408
- "execution_count": 19,
1409
  "id": "077ca8bd",
1410
  "metadata": {},
1411
  "outputs": [
@@ -1506,7 +1506,7 @@
1506
  },
1507
  {
1508
  "cell_type": "code",
1509
- "execution_count": 20,
1510
  "id": "e4c87875",
1511
  "metadata": {},
1512
  "outputs": [
@@ -1627,7 +1627,7 @@
1627
  "oversampling2 0.004290 0.004862 0.006231 "
1628
  ]
1629
  },
1630
- "execution_count": 20,
1631
  "metadata": {},
1632
  "output_type": "execute_result"
1633
  }
@@ -1639,6 +1639,81 @@
1639
  "df_pivot"
1640
  ]
1641
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1642
  {
1643
  "attachments": {},
1644
  "cell_type": "markdown",
@@ -5437,6 +5512,15 @@
5437
  "evaluate_study(pipe_LGBM,lgbm_best_params)\n"
5438
  ]
5439
  },
 
 
 
 
 
 
 
 
 
5440
  {
5441
  "cell_type": "code",
5442
  "execution_count": null,
 
1068
  },
1069
  {
1070
  "cell_type": "code",
1071
+ "execution_count": 9,
1072
  "id": "d66743ec",
1073
  "metadata": {},
1074
  "outputs": [
 
1333
  "[64334 rows x 13 columns]"
1334
  ]
1335
  },
1336
+ "execution_count": 9,
1337
  "metadata": {},
1338
  "output_type": "execute_result"
1339
  }
 
1348
  },
1349
  {
1350
  "cell_type": "code",
1351
+ "execution_count": 10,
1352
  "id": "4797c378",
1353
  "metadata": {},
1354
  "outputs": [],
 
1364
  },
1365
  {
1366
  "cell_type": "code",
1367
+ "execution_count": 11,
1368
  "id": "6c8a6fd9",
1369
  "metadata": {},
1370
  "outputs": [],
 
1405
  },
1406
  {
1407
  "cell_type": "code",
1408
+ "execution_count": 12,
1409
  "id": "077ca8bd",
1410
  "metadata": {},
1411
  "outputs": [
 
1506
  },
1507
  {
1508
  "cell_type": "code",
1509
+ "execution_count": 13,
1510
  "id": "e4c87875",
1511
  "metadata": {},
1512
  "outputs": [
 
1627
  "oversampling2 0.004290 0.004862 0.006231 "
1628
  ]
1629
  },
1630
+ "execution_count": 13,
1631
  "metadata": {},
1632
  "output_type": "execute_result"
1633
  }
 
1639
  "df_pivot"
1640
  ]
1641
  },
1642
+ {
1643
+ "cell_type": "code",
1644
+ "execution_count": 14,
1645
+ "id": "1317e9df",
1646
+ "metadata": {},
1647
+ "outputs": [
1648
+ {
1649
+ "data": {
1650
+ "text/html": [
1651
+ "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
1652
+ " ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
1653
+ " transformers=[(&#x27;num&#x27;,\n",
1654
+ " Pipeline(steps=[(&#x27;scaler&#x27;,\n",
1655
+ " MinMaxScaler())]),\n",
1656
+ " [&#x27;loudness&#x27;, &#x27;duration_ms&#x27;,\n",
1657
+ " &#x27;tempo&#x27;]),\n",
1658
+ " (&#x27;cat&#x27;,\n",
1659
+ " Pipeline(steps=[(&#x27;encoder&#x27;,\n",
1660
+ " TargetEncoder())]),\n",
1661
+ " [&#x27;track_genre&#x27;, &#x27;key&#x27;,\n",
1662
+ " &#x27;time_signature&#x27;])])),\n",
1663
+ " (&#x27;over&#x27;,\n",
1664
+ " SMOTENC(categorical_features=[3, 4, 5, 6, 9], n_jobs=4,\n",
1665
+ " random_state=42)),\n",
1666
+ " (&#x27;classifier&#x27;, LinearSVC(dual=False))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
1667
+ " ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
1668
+ " transformers=[(&#x27;num&#x27;,\n",
1669
+ " Pipeline(steps=[(&#x27;scaler&#x27;,\n",
1670
+ " MinMaxScaler())]),\n",
1671
+ " [&#x27;loudness&#x27;, &#x27;duration_ms&#x27;,\n",
1672
+ " &#x27;tempo&#x27;]),\n",
1673
+ " (&#x27;cat&#x27;,\n",
1674
+ " Pipeline(steps=[(&#x27;encoder&#x27;,\n",
1675
+ " TargetEncoder())]),\n",
1676
+ " [&#x27;track_genre&#x27;, &#x27;key&#x27;,\n",
1677
+ " &#x27;time_signature&#x27;])])),\n",
1678
+ " (&#x27;over&#x27;,\n",
1679
+ " SMOTENC(categorical_features=[3, 4, 5, 6, 9], n_jobs=4,\n",
1680
+ " random_state=42)),\n",
1681
+ " (&#x27;classifier&#x27;, LinearSVC(dual=False))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">preprocessor: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(remainder=&#x27;passthrough&#x27;,\n",
1682
+ " transformers=[(&#x27;num&#x27;,\n",
1683
+ " Pipeline(steps=[(&#x27;scaler&#x27;, MinMaxScaler())]),\n",
1684
+ " [&#x27;loudness&#x27;, &#x27;duration_ms&#x27;, &#x27;tempo&#x27;]),\n",
1685
+ " (&#x27;cat&#x27;,\n",
1686
+ " Pipeline(steps=[(&#x27;encoder&#x27;, TargetEncoder())]),\n",
1687
+ " [&#x27;track_genre&#x27;, &#x27;key&#x27;, &#x27;time_signature&#x27;])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">num</label><div class=\"sk-toggleable__content\"><pre>[&#x27;loudness&#x27;, &#x27;duration_ms&#x27;, &#x27;tempo&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MinMaxScaler</label><div class=\"sk-toggleable__content\"><pre>MinMaxScaler()</pre></div></div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">cat</label><div class=\"sk-toggleable__content\"><pre>[&#x27;track_genre&#x27;, &#x27;key&#x27;, &#x27;time_signature&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">TargetEncoder</label><div class=\"sk-toggleable__content\"><pre>TargetEncoder()</pre></div></div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">remainder</label><div class=\"sk-toggleable__content\"><pre>[&#x27;explicit&#x27;, &#x27;danceability&#x27;, &#x27;energy&#x27;, &#x27;mode&#x27;, &#x27;speechiness&#x27;, &#x27;acousticness&#x27;, &#x27;instrumentalness&#x27;, &#x27;liveness&#x27;, &#x27;valence&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">passthrough</label><div class=\"sk-toggleable__content\"><pre>passthrough</pre></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SMOTENC</label><div class=\"sk-toggleable__content\"><pre>SMOTENC(categorical_features=[3, 4, 5, 6, 9], n_jobs=4, random_state=42)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearSVC</label><div class=\"sk-toggleable__content\"><pre>LinearSVC(dual=False)</pre></div></div></div></div></div></div></div>"
1688
+ ],
1689
+ "text/plain": [
1690
+ "Pipeline(steps=[('preprocessor',\n",
1691
+ " ColumnTransformer(remainder='passthrough',\n",
1692
+ " transformers=[('num',\n",
1693
+ " Pipeline(steps=[('scaler',\n",
1694
+ " MinMaxScaler())]),\n",
1695
+ " ['loudness', 'duration_ms',\n",
1696
+ " 'tempo']),\n",
1697
+ " ('cat',\n",
1698
+ " Pipeline(steps=[('encoder',\n",
1699
+ " TargetEncoder())]),\n",
1700
+ " ['track_genre', 'key',\n",
1701
+ " 'time_signature'])])),\n",
1702
+ " ('over',\n",
1703
+ " SMOTENC(categorical_features=[3, 4, 5, 6, 9], n_jobs=4,\n",
1704
+ " random_state=42)),\n",
1705
+ " ('classifier', LinearSVC(dual=False))])"
1706
+ ]
1707
+ },
1708
+ "execution_count": 14,
1709
+ "metadata": {},
1710
+ "output_type": "execute_result"
1711
+ }
1712
+ ],
1713
+ "source": [
1714
+ "oversampling2_SVC"
1715
+ ]
1716
+ },
1717
  {
1718
  "attachments": {},
1719
  "cell_type": "markdown",
 
5512
  "evaluate_study(pipe_LGBM,lgbm_best_params)\n"
5513
  ]
5514
  },
5515
+ {
5516
+ "attachments": {},
5517
+ "cell_type": "markdown",
5518
+ "id": "37bcee22",
5519
+ "metadata": {},
5520
+ "source": [
5521
+ "#### CatBoost Classifier Model"
5522
+ ]
5523
+ },
5524
  {
5525
  "cell_type": "code",
5526
  "execution_count": null,