{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from sklearn.cluster import KMeans" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df=pd.read_csv(\"culture2015.csv\", sep=\";\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ctrcountrypdiidvmasuailtowvsivr
0AFEAfrica East642741523240
1AFWAfrica West77204654978
2ALBAlbania#NULL!#NULL!#NULL!#NULL!6115
3ALGAlgeria#NULL!#NULL!#NULL!#NULL!2632
4ANDAndorra#NULL!#NULL!#NULL!#NULL!#NULL!65
\n", "
" ], "text/plain": [ " ctr country pdi idv mas uai ltowvs ivr\n", "0 AFE Africa East 64 27 41 52 32 40\n", "1 AFW Africa West 77 20 46 54 9 78\n", "2 ALB Albania #NULL! #NULL! #NULL! #NULL! 61 15\n", "3 ALG Algeria #NULL! #NULL! #NULL! #NULL! 26 32\n", "4 AND Andorra #NULL! #NULL! #NULL! #NULL! #NULL! 65" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ctr object\n", "country object\n", "pdi object\n", "idv object\n", "mas object\n", "uai object\n", "ltowvs object\n", "ivr object\n", "dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#df.iloc[:,2]=pd.to_numeric(df.iloc[:,2], errors='coerce')\n", "\n", "for i in range(2,8):\n", " df.iloc[:,i]=pd.to_numeric(df.iloc[:,i], errors='coerce')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df=df.dropna()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df=df.reset_index()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df1=df.iloc[:,3:9]\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "kmeans = KMeans(n_clusters=6).fit(df1)\n", "centroids = kmeans.cluster_centers_\n", "print(centroids)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pred_y =kmeans.predict(df1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df2=pred_y.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df2=pd.DataFrame(pred_y, columns=['groups'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "XY2=df.merge(df2,left_index=True, right_index=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "XY2[XY2.groups == 6]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "XY2[XY2.groups == 4]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "centroids = kmeans.cluster_centers_" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "centroids" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df2=pd.DataFrame(pred_y, columns=['groups'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#group=[i for i in range(0,8)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df2=pd.DataFrame(centroids,columns=[\"pdi\",\"idv\",\"mas\",\"uai\",\"ltowvs\",\"ivr\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }