{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# following: https://realpython.com/k-means-clustering-python/" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "from sklearn.datasets import make_blobs\n", "from sklearn.cluster import KMeans\n", "from sklearn.preprocessing import StandardScaler" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# create synthetic data\n", "features, true_labels = make_blobs(n_samples=200, centers=3, cluster_std=2.75, random_state=42)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 9.77075874, 3.27621022],\n", " [ -9.71349666, 11.27451802],\n", " [ -6.91330582, -9.34755911],\n", " [-10.86185913, -10.75063497],\n", " [ -8.50038027, -4.54370383]])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "features[:5]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 0, 2, 2, 2])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "true_labels[:5]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "scaler = StandardScaler()\n", "scaled_features = scaler.fit_transform(features)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 2.13082109, 0.25604351],\n", " [-1.52698523, 1.41036744],\n", " [-1.00130152, -1.56583175],\n", " [-1.74256891, -1.76832509],\n", " [-1.29924521, -0.87253446]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scaled_features[:5]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "kmeans = KMeans(init=\"random\", n_clusters=3, n_init=10, max_iter=300, random_state=42)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
KMeans(init='random', n_clusters=3, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(init='random', n_clusters=3, random_state=42)