{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Suppose you have a list of positions of possible clients of Uber in Lisbon (Passageiros.csv). \n", "How many cars could you use and where they could be positioned in order to reduce time?\n", "\n", "Use a cluster analysis appraoch to support the solution of this problem.\n", "* import the libraries needed\n", "* import dataset from Passageiros.csv file\n", "* Verify imported data \n", "* verify data types and convert into numeric if needed. Use for example, df['x']=pd.to_numeric(df['x'], errors='coerce')\n", "* plot a scatter chart\n", "* create a X dataframe including only numeric columns\n", "* calculete WCSS using X dataframe:\n", "\n", " wcss = []\n", "\n", " for i in range(1, 11):\n", "\n", " kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)\n", " \n", " kmeans.fit(df)\n", " \n", " wcss.append(kmeans.inertia_)\n", "\n", " plt.plot(range(1, 11), wcss)\n", "\n", " plt.title('Elbow Method')\n", "\n", " plt.xlabel('Number of clusters')\n", "\n", " plt.ylabel('WCSS')\n", "\n", " plt.show()\n", "\n", "\n", "* plot a scatter chart showing centroids of the clusters estimated\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from matplotlib import pyplot as plt\n", "from sklearn.cluster import KMeans" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#import dataset from Passageiros.csv file\n", "df=pd.read_csv('Passageiros.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | X | \n", "Y | \n", "Name | \n", "
---|---|---|---|
0 | \n", "-9.163874 | \n", "38.711563 | \n", "Passageiro 1 | \n", "
1 | \n", "-9.199447 | \n", "38.703342 | \n", "Passageiro 2 | \n", "
2 | \n", "-9.143752 | \n", "38.729060 | \n", "Passageiro 3 | \n", "
3 | \n", "-9.150410 | \n", "38.755656 | \n", "Passageiro 4 | \n", "
4 | \n", "-9.136334 | \n", "38.758534 | \n", "Passageiro 5 | \n", "