{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**LabML03a**\n",
    "\n",
    "Purpose: Identify clusters of Gira docking station\n",
    "\n",
    "1 import libraries needed:numpy, sklearn, matplotlib and pandas\n",
    "\n",
    "2 generate a sample of blobs and convert it into a dataframe called df1\n",
    "\n",
    "3 Verify datatype\n",
    "\n",
    "4 Plot the blobs\n",
    "\n",
    "5 calculete WCSS\n",
    "\n",
    "6 plot the new chart with centroids\n",
    "\n",
    "7 identify to what group does each item belongs\n",
    "\n",
    "Comment the code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from matplotlib import pyplot as plt\n",
    "from sklearn.cluster import KMeans"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "file='https://github.com/masterfloss/data/blob/main/giras201030.csv?raw=true'\n",
    "dfGiras=pd.read_csv(file,sep=';')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfGiras.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfGiras.loc[0,'position'].split()[1].replace('[','').replace(',','')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(dfGiras['position'])):\n",
    "    dfGiras.loc[i,'long']=dfGiras.loc[i,'position'].split()[1].replace('[','').replace(',','')\n",
    "    dfGiras.loc[i,'lat']=dfGiras.loc[i,'position'].split()[2].replace('],','')\n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfGiras.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1=dfGiras[['long','lat']]\n",
    "\n",
    "df1.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.loc[:,'long']=pd.to_numeric(df1.loc[:,'long'])\n",
    "df1.loc[:,'lat']=pd.to_numeric(df1.loc[:,'lat'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.scatter(df1['long'], df1['lat'])\n",
    "plt.title('Giras')\n",
    "plt.xlabel('long')\n",
    "plt.ylabel('lat')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wcss = []\n",
    "for i in range(1, 11):\n",
    "    model = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)\n",
    "    model.fit(df1)\n",
    "    wcss.append(model.inertia_)\n",
    "plt.plot(range(1, 11), wcss)\n",
    "plt.title('Elbow Method')\n",
    "plt.xlabel('Number of clusters')\n",
    "plt.ylabel('WCSS')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model1 = KMeans(n_clusters=5, init='k-means++', max_iter=400, n_init=10, random_state=0)\n",
    "model1.fit_predict(df1)\n",
    "plt.scatter(df1[\"long\"], df1[\"lat\"])\n",
    "plt.scatter(model1.cluster_centers_[:, 0], model1.cluster_centers_[:, 1], s=300, c='red')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model1.predict(df1.loc[0:0,:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model1.predict(df1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}