{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**LabML03a**\n", "\n", "Purpose: Identify clusters of Gira docking station\n", "\n", "1 import libraries needed:numpy, sklearn, matplotlib and pandas\n", "\n", "2 generate a sample of blobs and convert it into a dataframe called df1\n", "\n", "3 Verify datatype\n", "\n", "4 Plot the blobs\n", "\n", "5 calculete WCSS\n", "\n", "6 plot the new chart with centroids\n", "\n", "7 identify to what group does each item belongs\n", "\n", "Comment the code" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from matplotlib import pyplot as plt\n", "from sklearn.cluster import KMeans" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "file='https://github.com/masterfloss/data/blob/main/giras201030.csv?raw=true'\n", "dfGiras=pd.read_csv(file,sep=';')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dfGiras.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dfGiras.loc[0,'position'].split()[1].replace('[','').replace(',','')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for i in range(len(dfGiras['position'])):\n", " dfGiras.loc[i,'long']=dfGiras.loc[i,'position'].split()[1].replace('[','').replace(',','')\n", " dfGiras.loc[i,'lat']=dfGiras.loc[i,'position'].split()[2].replace('],','')\n", " \n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dfGiras.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df1=dfGiras[['long','lat']]\n", "\n", "df1.dtypes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df1.loc[:,'long']=pd.to_numeric(df1.loc[:,'long'])\n", "df1.loc[:,'lat']=pd.to_numeric(df1.loc[:,'lat'])\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df1.dtypes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.scatter(df1['long'], df1['lat'])\n", "plt.title('Giras')\n", "plt.xlabel('long')\n", "plt.ylabel('lat')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wcss = []\n", "for i in range(1, 11):\n", " model = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)\n", " model.fit(df1)\n", " wcss.append(model.inertia_)\n", "plt.plot(range(1, 11), wcss)\n", "plt.title('Elbow Method')\n", "plt.xlabel('Number of clusters')\n", "plt.ylabel('WCSS')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model1 = KMeans(n_clusters=5, init='k-means++', max_iter=400, n_init=10, random_state=0)\n", "model1.fit_predict(df1)\n", "plt.scatter(df1[\"long\"], df1[\"lat\"])\n", "plt.scatter(model1.cluster_centers_[:, 0], model1.cluster_centers_[:, 1], s=300, c='red')\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model1.predict(df1.loc[0:0,:])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model1.predict(df1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }