{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from sklearn.cluster import KMeans\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df=pd.read_csv(\"culture2015.csv\", sep=\";\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ctrcountrypdiidvmasuailtowvsivr
0AFEAfrica East642741523240
1AFWAfrica West77204654978
2ALBAlbania#NULL!#NULL!#NULL!#NULL!6115
3ALGAlgeria#NULL!#NULL!#NULL!#NULL!2632
4ANDAndorra#NULL!#NULL!#NULL!#NULL!#NULL!65
\n", "
" ], "text/plain": [ " ctr country pdi idv mas uai ltowvs ivr\n", "0 AFE Africa East 64 27 41 52 32 40\n", "1 AFW Africa West 77 20 46 54 9 78\n", "2 ALB Albania #NULL! #NULL! #NULL! #NULL! 61 15\n", "3 ALG Algeria #NULL! #NULL! #NULL! #NULL! 26 32\n", "4 AND Andorra #NULL! #NULL! #NULL! #NULL! #NULL! 65" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ctr object\n", "country object\n", "pdi object\n", "idv object\n", "mas object\n", "uai object\n", "ltowvs object\n", "ivr object\n", "dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#df.iloc[:,2]=pd.to_numeric(df.iloc[:,2], errors='coerce')\n", "\n", "for i in range(2,8):\n", " df.iloc[:,i]=pd.to_numeric(df.iloc[:,i], errors='coerce')\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "df=df.dropna()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df=df.reset_index()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df1=df.iloc[:,3:9]\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[82.9 27.7 59.2 37.6 58.2 33.3 ]\n", " [67.72222222 33.38888889 43.66666667 81.88888889 37.55555556 37.55555556]\n", " [58.14285714 42. 31.14285714 74.57142857 82.28571429 22.71428571]\n", " [32.09090909 77.81818182 39.63636364 43.45454545 38.09090909 67.63636364]\n", " [46.90909091 65. 65.36363636 78.36363636 68.18181818 48. ]\n", " [66.375 22.375 54.25 76.625 18.25 82.125 ]]\n" ] } ], "source": [ "kmeans = KMeans(n_clusters=6).fit(df1)\n", "centroids = kmeans.cluster_centers_\n", "print(centroids)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "pred_y =kmeans.predict(df1)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "df2=pred_y.shape" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(65, 9)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "df2=pd.DataFrame(pred_y, columns=['groups'])" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "XY2=df.merge(df2,left_index=True, right_index=True)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexctrcountrypdiidvmasuailtowvsivrgroups
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [index, ctr, country, pdi, idv, mas, uai, ltowvs, ivr, groups]\n", "Index: []" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "XY2[XY2.groups == 6]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexctrcountrypdiidvmasuailtowvsivrgroups
59AUTAustria11.055.079.070.060.063.04
713BELBelgium65.075.054.094.082.057.04
1528CZECzech Rep57.058.057.074.070.029.04
2037FRAFrance68.071.043.086.063.048.04
2139GERGermany35.067.066.065.083.040.04
2546HUNHungary46.080.088.082.058.031.04
3054ITAItaly50.076.070.075.061.030.04
3156JPNJapan54.046.095.092.088.042.04
3562LUXLuxembourg40.060.050.070.064.056.04
3766MLTMalta56.059.047.096.047.066.04
5695SWISwitzerland34.068.070.058.074.066.04
\n", "
" ], "text/plain": [ " index ctr country pdi idv mas uai ltowvs ivr groups\n", "5 9 AUT Austria 11.0 55.0 79.0 70.0 60.0 63.0 4\n", "7 13 BEL Belgium 65.0 75.0 54.0 94.0 82.0 57.0 4\n", "15 28 CZE Czech Rep 57.0 58.0 57.0 74.0 70.0 29.0 4\n", "20 37 FRA France 68.0 71.0 43.0 86.0 63.0 48.0 4\n", "21 39 GER Germany 35.0 67.0 66.0 65.0 83.0 40.0 4\n", "25 46 HUN Hungary 46.0 80.0 88.0 82.0 58.0 31.0 4\n", "30 54 ITA Italy 50.0 76.0 70.0 75.0 61.0 30.0 4\n", "31 56 JPN Japan 54.0 46.0 95.0 92.0 88.0 42.0 4\n", "35 62 LUX Luxembourg 40.0 60.0 50.0 70.0 64.0 56.0 4\n", "37 66 MLT Malta 56.0 59.0 47.0 96.0 47.0 66.0 4\n", "56 95 SWI Switzerland 34.0 68.0 70.0 58.0 74.0 66.0 4" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "XY2[XY2.groups == 4]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "centroids = kmeans.cluster_centers_" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[82.9 , 27.7 , 59.2 , 37.6 , 58.2 ,\n", " 33.3 ],\n", " [67.72222222, 33.38888889, 43.66666667, 81.88888889, 37.55555556,\n", " 37.55555556],\n", " [58.14285714, 42. , 31.14285714, 74.57142857, 82.28571429,\n", " 22.71428571],\n", " [32.09090909, 77.81818182, 39.63636364, 43.45454545, 38.09090909,\n", " 67.63636364],\n", " [46.90909091, 65. , 65.36363636, 78.36363636, 68.18181818,\n", " 48. ],\n", " [66.375 , 22.375 , 54.25 , 76.625 , 18.25 ,\n", " 82.125 ]])" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "centroids" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "df2=pd.DataFrame(pred_y, columns=['groups'])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "#group=[i for i in range(0,8)]" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "df2=pd.DataFrame(centroids,columns=[\"pdi\",\"idv\",\"mas\",\"uai\",\"ltowvs\",\"ivr\"])" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pdiidvmasuailtowvsivr
082.90000027.70000059.20000037.60000058.20000033.300000
167.72222233.38888943.66666781.88888937.55555637.555556
258.14285742.00000031.14285774.57142982.28571422.714286
332.09090977.81818239.63636443.45454538.09090967.636364
446.90909165.00000065.36363678.36363668.18181848.000000
566.37500022.37500054.25000076.62500018.25000082.125000
\n", "
" ], "text/plain": [ " pdi idv mas uai ltowvs ivr\n", "0 82.900000 27.700000 59.200000 37.600000 58.200000 33.300000\n", "1 67.722222 33.388889 43.666667 81.888889 37.555556 37.555556\n", "2 58.142857 42.000000 31.142857 74.571429 82.285714 22.714286\n", "3 32.090909 77.818182 39.636364 43.454545 38.090909 67.636364\n", "4 46.909091 65.000000 65.363636 78.363636 68.181818 48.000000\n", "5 66.375000 22.375000 54.250000 76.625000 18.250000 82.125000" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }