{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Case 01 ##\n", "\n", "Group of countries according to cultures.\n", "\n", "The Hofstede model allows characterizing the culture supported on studies on national values.\n", "\n", "Those values are:\n", "\n", "* PDI - Power distance\n", "* IDV - Individualism\n", "* MAS - Motivation - Aspiration\n", "* UAI - Uncertainty avoidance\n", "* LTOWVS - Long Term Orientation\n", "* IVR - Indulgencs vs. Restraing\n", "\n", "This study identifies the value of a specific country. Is it possible identifying groups of countries with similar cultures?\n" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from sklearn.cluster import KMeans\n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "file='https://raw.githubusercontent.com/masterfloss/data/main/culture2015.csv'\n", "df=pd.read_csv(file, sep=\";\")" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ctrcountrypdiidvmasuailtowvsivr
0AFEAfrica East642741523240
1AFWAfrica West77204654978
2ALBAlbania#NULL!#NULL!#NULL!#NULL!6115
3ALGAlgeria#NULL!#NULL!#NULL!#NULL!2632
4ANDAndorra#NULL!#NULL!#NULL!#NULL!#NULL!65
\n", "
" ], "text/plain": [ " ctr country pdi idv mas uai ltowvs ivr\n", "0 AFE Africa East 64 27 41 52 32 40\n", "1 AFW Africa West 77 20 46 54 9 78\n", "2 ALB Albania #NULL! #NULL! #NULL! #NULL! 61 15\n", "3 ALG Algeria #NULL! #NULL! #NULL! #NULL! 26 32\n", "4 AND Andorra #NULL! #NULL! #NULL! #NULL! #NULL! 65" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ctr object\n", "country object\n", "pdi object\n", "idv object\n", "mas object\n", "uai object\n", "ltowvs object\n", "ivr object\n", "dtype: object" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "#df.iloc[:,2]=pd.to_numeric(df.iloc[:,2], errors='coerce')\n", "\n", "for i in range(2,8):\n", " df.iloc[:,i]=pd.to_numeric(df.iloc[:,i], errors='coerce')\n" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ctrcountrypdiidvmasuailtowvsivr
0AFEAfrica East64.027.041.052.032.040.0
1AFWAfrica West77.020.046.054.09.078.0
5ARAArab countries80.038.053.068.023.034.0
6ARGArgentina49.046.056.086.020.062.0
8AULAustralia38.090.061.051.021.071.0
...........................
102TURTurkey66.037.045.085.046.049.0
103USAU.S.A.40.091.062.046.026.068.0
106URUUruguay61.036.038.0100.026.053.0
107VENVenezuela81.012.073.076.016.0100.0
108VIEVietnam70.020.040.030.057.035.0
\n", "

65 rows × 8 columns

\n", "
" ], "text/plain": [ " ctr country pdi idv mas uai ltowvs ivr\n", "0 AFE Africa East 64.0 27.0 41.0 52.0 32.0 40.0\n", "1 AFW Africa West 77.0 20.0 46.0 54.0 9.0 78.0\n", "5 ARA Arab countries 80.0 38.0 53.0 68.0 23.0 34.0\n", "6 ARG Argentina 49.0 46.0 56.0 86.0 20.0 62.0\n", "8 AUL Australia 38.0 90.0 61.0 51.0 21.0 71.0\n", ".. ... ... ... ... ... ... ... ...\n", "102 TUR Turkey 66.0 37.0 45.0 85.0 46.0 49.0\n", "103 USA U.S.A. 40.0 91.0 62.0 46.0 26.0 68.0\n", "106 URU Uruguay 61.0 36.0 38.0 100.0 26.0 53.0\n", "107 VEN Venezuela 81.0 12.0 73.0 76.0 16.0 100.0\n", "108 VIE Vietnam 70.0 20.0 40.0 30.0 57.0 35.0\n", "\n", "[65 rows x 8 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=df.dropna()\n", "df" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "df=df.reset_index()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "df1=df.iloc[:,3:9]\n" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "wcss = []\n", "for i in range(1, 11):\n", " model = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)\n", " model.fit(df1)\n", " wcss.append(model.inertia_)\n", "plt.plot(range(1, 11), wcss)\n", "plt.title('Elbow Method')\n", "plt.xlabel('Number of clusters')\n", "plt.ylabel('WCSS')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[32.09090909 77.81818182 39.63636364 43.45454545 38.09090909 67.63636364]\n", " [69.1 35.55 42.3 85.7 41.1 40.3 ]\n", " [76.14285714 25.21428571 54.78571429 46.57142857 61.21428571 32.21428571]\n", " [66.85714286 22.28571429 58. 75.28571429 16.42857143 84.14285714]\n", " [45.07692308 65.07692308 56.15384615 73.38461538 72. 39. ]]\n" ] } ], "source": [ "kmeans = KMeans(n_clusters=5).fit(df1)\n", "centroids = kmeans.cluster_centers_\n", "print(centroids)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "pred_y =kmeans.predict(df1)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "df2=pred_y.shape" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(65, 9)" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "df2=pd.DataFrame(pred_y, columns=['groups'])" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
groups
02
13
21
33
40
......
601
610
621
633
642
\n", "

65 rows × 1 columns

\n", "
" ], "text/plain": [ " groups\n", "0 2\n", "1 3\n", "2 1\n", "3 3\n", "4 0\n", ".. ...\n", "60 1\n", "61 0\n", "62 1\n", "63 3\n", "64 2\n", "\n", "[65 rows x 1 columns]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "XY2=df.merge(df2,left_index=True, right_index=True)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexctrcountrypdiidvmasuailtowvsivrgroups
00AFEAfrica East64.027.041.052.032.040.02
11AFWAfrica West77.020.046.054.09.078.03
25ARAArab countries80.038.053.068.023.034.01
36ARGArgentina49.046.056.086.020.062.03
48AULAustralia38.090.061.051.021.071.00
.................................
60102TURTurkey66.037.045.085.046.049.01
61103USAU.S.A.40.091.062.046.026.068.00
62106URUUruguay61.036.038.0100.026.053.01
63107VENVenezuela81.012.073.076.016.0100.03
64108VIEVietnam70.020.040.030.057.035.02
\n", "

65 rows × 10 columns

\n", "
" ], "text/plain": [ " index ctr country pdi idv mas uai ltowvs ivr groups\n", "0 0 AFE Africa East 64.0 27.0 41.0 52.0 32.0 40.0 2\n", "1 1 AFW Africa West 77.0 20.0 46.0 54.0 9.0 78.0 3\n", "2 5 ARA Arab countries 80.0 38.0 53.0 68.0 23.0 34.0 1\n", "3 6 ARG Argentina 49.0 46.0 56.0 86.0 20.0 62.0 3\n", "4 8 AUL Australia 38.0 90.0 61.0 51.0 21.0 71.0 0\n", ".. ... ... ... ... ... ... ... ... ... ...\n", "60 102 TUR Turkey 66.0 37.0 45.0 85.0 46.0 49.0 1\n", "61 103 USA U.S.A. 40.0 91.0 62.0 46.0 26.0 68.0 0\n", "62 106 URU Uruguay 61.0 36.0 38.0 100.0 26.0 53.0 1\n", "63 107 VEN Venezuela 81.0 12.0 73.0 76.0 16.0 100.0 3\n", "64 108 VIE Vietnam 70.0 20.0 40.0 30.0 57.0 35.0 2\n", "\n", "[65 rows x 10 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "XY2" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexctrcountrypdiidvmasuailtowvsivrgroups
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [index, ctr, country, pdi, idv, mas, uai, ltowvs, ivr, groups]\n", "Index: []" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "XY2[XY2.groups == 6]" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexctrcountrypdiidvmasuailtowvsivrgroups
59AUTAustria11.055.079.070.060.063.04
713BELBelgium65.075.054.094.082.057.04
1528CZECzech Rep57.058.057.074.070.029.04
1835ESTEstonia40.060.030.060.082.016.04
2037FRAFrance68.071.043.086.063.048.04
2139GERGermany35.067.066.065.083.040.04
2546HUNHungary46.080.088.082.058.031.04
3054ITAItaly50.076.070.075.061.030.04
3156JPNJapan54.046.095.092.088.042.04
3360LATLatvia44.070.09.063.069.013.04
3461LITLithuania42.060.019.065.082.016.04
3562LUXLuxembourg40.060.050.070.064.056.04
5695SWISwitzerland34.068.070.058.074.066.04
\n", "
" ], "text/plain": [ " index ctr country pdi idv mas uai ltowvs ivr groups\n", "5 9 AUT Austria 11.0 55.0 79.0 70.0 60.0 63.0 4\n", "7 13 BEL Belgium 65.0 75.0 54.0 94.0 82.0 57.0 4\n", "15 28 CZE Czech Rep 57.0 58.0 57.0 74.0 70.0 29.0 4\n", "18 35 EST Estonia 40.0 60.0 30.0 60.0 82.0 16.0 4\n", "20 37 FRA France 68.0 71.0 43.0 86.0 63.0 48.0 4\n", "21 39 GER Germany 35.0 67.0 66.0 65.0 83.0 40.0 4\n", "25 46 HUN Hungary 46.0 80.0 88.0 82.0 58.0 31.0 4\n", "30 54 ITA Italy 50.0 76.0 70.0 75.0 61.0 30.0 4\n", "31 56 JPN Japan 54.0 46.0 95.0 92.0 88.0 42.0 4\n", "33 60 LAT Latvia 44.0 70.0 9.0 63.0 69.0 13.0 4\n", "34 61 LIT Lithuania 42.0 60.0 19.0 65.0 82.0 16.0 4\n", "35 62 LUX Luxembourg 40.0 60.0 50.0 70.0 64.0 56.0 4\n", "56 95 SWI Switzerland 34.0 68.0 70.0 58.0 74.0 66.0 4" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "XY2[XY2.groups == 4]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "centroids = kmeans.cluster_centers_" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[32.09090909, 77.81818182, 39.63636364, 43.45454545, 38.09090909,\n", " 67.63636364],\n", " [69.1 , 35.55 , 42.3 , 85.7 , 41.1 ,\n", " 40.3 ],\n", " [76.14285714, 25.21428571, 54.78571429, 46.57142857, 61.21428571,\n", " 32.21428571],\n", " [66.85714286, 22.28571429, 58. , 75.28571429, 16.42857143,\n", " 84.14285714],\n", " [45.07692308, 65.07692308, 56.15384615, 73.38461538, 72. ,\n", " 39. ]])" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "centroids" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "df2=pd.DataFrame(pred_y, columns=['groups'])" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
groups
02
13
21
33
40
......
601
610
621
633
642
\n", "

65 rows × 1 columns

\n", "
" ], "text/plain": [ " groups\n", "0 2\n", "1 3\n", "2 1\n", "3 3\n", "4 0\n", ".. ...\n", "60 1\n", "61 0\n", "62 1\n", "63 3\n", "64 2\n", "\n", "[65 rows x 1 columns]" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 2 }