{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Clusters (example 01) ##\n", "\n", "\n", "Purpose: Identify clusters in a random generated blobs sample\n", "\n", "**1** import libraries needed:numpy, sklearn, matplotlib and pandas\n", "\n", "**2** generate a sample of blobs and convert it into a dataframe called df1\n", "\n", "**3** Verify datatype\n", "\n", "**4** Plot the blobs\n", "\n", "**5** calculete WCSS\n", "\n", "**6** plot the new chart with centroids\n", "\n", "**7** identify to what group does each item belongs\n", "\n", "**8** add new column\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from matplotlib import pyplot as plt\n", "from sklearn.cluster import KMeans" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets.samples_generator import make_blobs\n", "XY,y= make_blobs(n_samples=400, centers=5, cluster_std=0.60, random_state=0)\n", "df1=pd.DataFrame(XY)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 float64\n", "1 float64\n", "dtype: object" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.dtypes" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(400, 2)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.shape" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0-1.3799807.185038
1-1.7640412.222230
21.9755390.718989
3-1.5543263.050187
41.9889431.509767
\n", "
" ], "text/plain": [ " 0 1\n", "0 -1.379980 7.185038\n", "1 -1.764041 2.222230\n", "2 1.975539 0.718989\n", "3 -1.554326 3.050187\n", "4 1.988943 1.509767" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, 'Y')" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.scatter(df1[0], df1[1])\n", "plt.title('Blobs')\n", "plt.xlabel('X')\n", "plt.ylabel('Y')\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "wcss = []\n", "for i in range(1, 11):\n", " kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)\n", " kmeans.fit(df1)\n", " wcss.append(kmeans.inertia_)\n", "plt.plot(range(1, 11), wcss)\n", "plt.title('Elbow Method')\n", "plt.xlabel('Number of clusters')\n", "plt.ylabel('WCSS')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "kmeans = KMeans(n_clusters=5, init='k-means++', max_iter=400, n_init=10, random_state=0)\n", "pred_y = kmeans.fit_predict(df1)\n", "plt.scatter(df1[0], df1[1])\n", "plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red')\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 2 }