{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#Lab06" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load libraries\n", "import pandas as pd\n", "#pd.set_option('display.max_rows', 500)\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.decomposition import PCA\n", "from sklearn.pipeline import Pipeline" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Regionprice2018price2000purchacingPower2017crime2019crime1993wage2018IMT2018percapitaIMI2018percapitaWaste2018WasteSel2018
0Portugal108016.053344100.032.630.81166.997.6146.80.5129590.109483
1Continente109778.053935100.732.130.91170.3100.3149.20.5109570.107298
2Norte80153.04714592.128.123.01056.655.0120.20.4698460.081176
3Alto Minho50090.02687479.728.718.0978.136.6115.10.4695070.080048
4Arcos de Valdevez30437.02257667.826.516.2882.120.296.90.3962060.052423
\n", "
" ], "text/plain": [ " Region price2018 price2000 purchacingPower2017 crime2019 \\\n", "0 Portugal 108016.0 53344 100.0 32.6 \n", "1 Continente 109778.0 53935 100.7 32.1 \n", "2 Norte 80153.0 47145 92.1 28.1 \n", "3 Alto Minho 50090.0 26874 79.7 28.7 \n", "4 Arcos de Valdevez 30437.0 22576 67.8 26.5 \n", "\n", " crime1993 wage2018 IMT2018percapita IMI2018percapita Waste2018 \\\n", "0 30.8 1166.9 97.6 146.8 0.512959 \n", "1 30.9 1170.3 100.3 149.2 0.510957 \n", "2 23.0 1056.6 55.0 120.2 0.469846 \n", "3 18.0 978.1 36.6 115.1 0.469507 \n", "4 16.2 882.1 20.2 96.9 0.396206 \n", "\n", " WasteSel2018 \n", "0 0.109483 \n", "1 0.107298 \n", "2 0.081176 \n", "3 0.080048 \n", "4 0.052423 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=pd.read_excel('realEstate.xlsx')\n", "\n", "df=df.dropna()\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Regionprice2018price2000purchacingPower2017crime2019crime1993wage2018IMT2018percapitaIMI2018percapitaWaste2018WasteSel2018
0Portugal108016.053344100.032.630.81166.997.6146.80.5129590.109483
1Continente109778.053935100.732.130.91170.3100.3149.20.5109570.107298
2Norte80153.04714592.128.123.01056.655.0120.20.4698460.081176
3Alto Minho50090.02687479.728.718.0978.136.6115.10.4695070.080048
4Arcos de Valdevez30437.02257667.826.516.2882.120.296.90.3962060.052423
....................................
347Ribeira Brava34080.01786768.425.922.4895.329.663.00.4266180.098271
348Santa Cruz95380.06851471.514.325.01084.346.7104.60.3983130.088227
349Santana25299.01543458.319.926.5862.48.662.60.4084340.087468
350São Vicente45500.02060461.017.346.2854.124.387.30.3882950.086720
352Porto Santo74163.04941893.531.824.41142.957.0285.01.1658980.707036
\n", "

341 rows × 11 columns

\n", "
" ], "text/plain": [ " Region price2018 price2000 purchacingPower2017 crime2019 \\\n", "0 Portugal 108016.0 53344 100.0 32.6 \n", "1 Continente 109778.0 53935 100.7 32.1 \n", "2 Norte 80153.0 47145 92.1 28.1 \n", "3 Alto Minho 50090.0 26874 79.7 28.7 \n", "4 Arcos de Valdevez 30437.0 22576 67.8 26.5 \n", ".. ... ... ... ... ... \n", "347 Ribeira Brava 34080.0 17867 68.4 25.9 \n", "348 Santa Cruz 95380.0 68514 71.5 14.3 \n", "349 Santana 25299.0 15434 58.3 19.9 \n", "350 São Vicente 45500.0 20604 61.0 17.3 \n", "352 Porto Santo 74163.0 49418 93.5 31.8 \n", "\n", " crime1993 wage2018 IMT2018percapita IMI2018percapita Waste2018 \\\n", "0 30.8 1166.9 97.6 146.8 0.512959 \n", "1 30.9 1170.3 100.3 149.2 0.510957 \n", "2 23.0 1056.6 55.0 120.2 0.469846 \n", "3 18.0 978.1 36.6 115.1 0.469507 \n", "4 16.2 882.1 20.2 96.9 0.396206 \n", ".. ... ... ... ... ... \n", "347 22.4 895.3 29.6 63.0 0.426618 \n", "348 25.0 1084.3 46.7 104.6 0.398313 \n", "349 26.5 862.4 8.6 62.6 0.408434 \n", "350 46.2 854.1 24.3 87.3 0.388295 \n", "352 24.4 1142.9 57.0 285.0 1.165898 \n", "\n", " WasteSel2018 \n", "0 0.109483 \n", "1 0.107298 \n", "2 0.081176 \n", "3 0.080048 \n", "4 0.052423 \n", ".. ... \n", "347 0.098271 \n", "348 0.088227 \n", "349 0.087468 \n", "350 0.086720 \n", "352 0.707036 \n", "\n", "[341 rows x 11 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "price2018 float64\n", "price2000 int64\n", "purchacingPower2017 float64\n", "crime2019 float64\n", "crime1993 float64\n", "wage2018 float64\n", "IMT2018percapita float64\n", "IMI2018percapita float64\n", "Waste2018 float64\n", "WasteSel2018 float64\n", "dtype: object" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X=df.drop(\"Region\",axis=1)\n", "X.dtypes" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "pipeline = Pipeline([('scaling', StandardScaler()), ('pca', PCA(n_components=3))])\n", "\n", "principalComponents=pipeline.fit_transform(X)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
01.972826-1.1982040.136065
12.005852-1.2521150.087374
20.582760-0.974156-0.041264
3-0.432546-0.0463210.237438
4-1.4970810.2146080.228310
............
336-1.2676920.3853130.391458
3370.053560-1.485031-1.389320
338-1.8400920.5064800.216991
339-1.0729900.2515040.456633
3405.8024273.826544-1.314745
\n", "

341 rows × 3 columns

\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1.972826 -1.198204 0.136065\n", "1 2.005852 -1.252115 0.087374\n", "2 0.582760 -0.974156 -0.041264\n", "3 -0.432546 -0.046321 0.237438\n", "4 -1.497081 0.214608 0.228310\n", ".. ... ... ...\n", "336 -1.267692 0.385313 0.391458\n", "337 0.053560 -1.485031 -1.389320\n", "338 -1.840092 0.506480 0.216991\n", "339 -1.072990 0.251504 0.456633\n", "340 5.802427 3.826544 -1.314745\n", "\n", "[341 rows x 3 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(principalComponents)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.59161215, 0.16106816, 0.06103431])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline[1].explained_variance_ratio_" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8137146211505711" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## View the ratio of explained variance\n", "sum(pipeline[1].explained_variance_ratio_)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "Comp=pd.DataFrame(pipeline[1].components_).T" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
00.363071-0.145598-0.376953
10.314375-0.360249-0.312781
20.311255-0.4362240.167600
30.3025490.2055300.627603
40.322923-0.0532720.481302
50.210538-0.5227790.103148
60.3404750.208544-0.254828
70.3438710.212630-0.101260
80.3250840.387359-0.081231
90.3040990.318753-0.118401
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 0.363071 -0.145598 -0.376953\n", "1 0.314375 -0.360249 -0.312781\n", "2 0.311255 -0.436224 0.167600\n", "3 0.302549 0.205530 0.627603\n", "4 0.322923 -0.053272 0.481302\n", "5 0.210538 -0.522779 0.103148\n", "6 0.340475 0.208544 -0.254828\n", "7 0.343871 0.212630 -0.101260\n", "8 0.325084 0.387359 -0.081231\n", "9 0.304099 0.318753 -0.118401" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "Comp" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Region', 'price2018', 'price2000', 'purchacingPower2017', 'crime2019',\n", " 'crime1993', 'wage2018', 'IMT2018percapita', 'IMI2018percapita',\n", " 'Waste2018', 'WasteSel2018'],\n", " dtype='object')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns[0:]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "Comp['variables']=df.columns[1:]\n", "Comp=Comp.set_index('variables')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 float64\n", "1 float64\n", "2 float64\n", "dtype: object" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Comp.dtypes" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0 1 2
variables
price20180.363071-0.145598-0.376953
IMI2018percapita0.3438710.212630-0.101260
IMT2018percapita0.3404750.208544-0.254828
Waste20180.3250840.387359-0.081231
crime19930.322923-0.0532720.481302
price20000.314375-0.360249-0.312781
purchacingPower20170.311255-0.4362240.167600
WasteSel20180.3040990.318753-0.118401
crime20190.3025490.2055300.627603
wage20180.210538-0.5227790.103148
" ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "def moreThan01(val):\n", " color = 'red' if val > 0.1 else 'black'\n", " return 'color: %s' % color\n", "\n", "Comp1=Comp.sort_values(by=[0, 1, 2], ascending=False)\n", "\n", "s = Comp1.style.applymap(moreThan01)\n", "s\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "\n", "import numpy as np\n", "from numpy.linalg import svd\n", "def varimax(Phi, gamma = 1.0, q = 20, tol = 1e-6):\n", " from scipy.linalg import svd\n", " p,k = Phi.shape\n", " R = np.eye(k)\n", " d=0\n", " for i in range(q):\n", " d_old = d\n", " Lambda = np.dot(Phi, R)\n", " u,s,vh = svd(np.dot(Phi.T,np.asarray(Lambda)**3 - (gamma/p) * np.dot(Lambda, np.diag(np.diag(np.dot(Lambda.T,Lambda))))))\n", " R = np.dot(u,vh)\n", " d = np.sum(s)\n", " if d_old!=0 and d/d_old < 1 + tol: break\n", " return np.dot(Phi, R)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "rot_comps = varimax(Comp)\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "#rot_comps" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "Comp1=pd.DataFrame(rot_comps)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
00.357865-0.347737-0.214767
10.182294-0.495110-0.219267
2-0.074974-0.5220580.192601
30.0451080.0502240.723261
4-0.008642-0.1869970.551114
5-0.164236-0.5434720.076999
60.470298-0.031651-0.046571
70.405968-0.0190520.092405
80.4739360.1381970.136421
90.4397620.0899160.081377
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 0.357865 -0.347737 -0.214767\n", "1 0.182294 -0.495110 -0.219267\n", "2 -0.074974 -0.522058 0.192601\n", "3 0.045108 0.050224 0.723261\n", "4 -0.008642 -0.186997 0.551114\n", "5 -0.164236 -0.543472 0.076999\n", "6 0.470298 -0.031651 -0.046571\n", "7 0.405968 -0.019052 0.092405\n", "8 0.473936 0.138197 0.136421\n", "9 0.439762 0.089916 0.081377" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Comp1" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "Comp1['variables']=df.columns[1:]\n", "Comp1s=Comp1.set_index('variables')" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0 1 2
variables
Waste20180.4739360.1381970.136421
IMT2018percapita0.470298-0.031651-0.046571
WasteSel20180.4397620.0899160.081377
IMI2018percapita0.405968-0.0190520.092405
price20180.357865-0.347737-0.214767
price20000.182294-0.495110-0.219267
crime20190.0451080.0502240.723261
crime1993-0.008642-0.1869970.551114
purchacingPower2017-0.074974-0.5220580.192601
wage2018-0.164236-0.5434720.076999
" ], "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "def moreThan05(val):\n", " color = 'red' if val > 0.35 or val<-0.35 else 'black'\n", " return 'color: %s' % color\n", "\n", "Comp1s=Comp1s.sort_values(by=[0, 1, 2], ascending=False)\n", "\n", "s = Comp1s.style.applymap(moreThan05)\n", "s" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "Comp1=pd.DataFrame(principalComponents)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
01.972826-1.1982040.136065
12.005852-1.2521150.087374
20.582760-0.974156-0.041264
3-0.432546-0.0463210.237438
4-1.4970810.2146080.228310
............
336-1.2676920.3853130.391458
3370.053560-1.485031-1.389320
338-1.8400920.5064800.216991
339-1.0729900.2515040.456633
3405.8024273.826544-1.314745
\n", "

341 rows × 3 columns

\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1.972826 -1.198204 0.136065\n", "1 2.005852 -1.252115 0.087374\n", "2 0.582760 -0.974156 -0.041264\n", "3 -0.432546 -0.046321 0.237438\n", "4 -1.497081 0.214608 0.228310\n", ".. ... ... ...\n", "336 -1.267692 0.385313 0.391458\n", "337 0.053560 -1.485031 -1.389320\n", "338 -1.840092 0.506480 0.216991\n", "339 -1.072990 0.251504 0.456633\n", "340 5.802427 3.826544 -1.314745\n", "\n", "[341 rows x 3 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Comp1" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\profc\\AppData\\Roaming\\Python\\Python38\\site-packages\\sklearn\\cluster\\_kmeans.py:881: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.\n", " warnings.warn(\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from sklearn.cluster import KMeans\n", "from matplotlib import pyplot as plt\n", "wcss = []\n", "for i in range(1, 11):\n", " model =KMeans(n_clusters=i, random_state=1)\n", " model.fit(Comp1)\n", " wcss.append(model.inertia_)\n", "plt.plot(range(1, 11), wcss)\n", "plt.title('Elbow Method')\n", "plt.xlabel('Number of clusters')\n", "plt.ylabel('WCSS')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4299584387809169" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn import metrics\n", "kmeans_model = KMeans(n_clusters=3, random_state=1).fit(Comp1)\n", "labels = kmeans_model.labels_\n", "metrics.silhouette_score(Comp1, labels, metric='euclidean')" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.scatter(Comp1[0], Comp1[1])\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.scatter(Comp1[1], Comp1[2])" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.scatter(Comp1[0], Comp1[2])" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig = plt.figure()\n", "ax = fig.add_subplot(projection='3d')\n", "ax.scatter(Comp1[0], Comp1[1], Comp1[2], c='r', marker='o')\n", "\n", "ax.set_xlabel('X Label')\n", "ax.set_ylabel('Y Label')\n", "ax.set_zlabel('Z Label')\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 }