{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "RR59OgSFRpoV" }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "source": [ "file=\"https://github.com/masterfloss/data/raw/refs/heads/main/game_records2.xlsx\"" ], "metadata": { "id": "OtDiFHWvR17s" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df=pd.read_excel(file)\n" ], "metadata": { "id": "96nBPOM1Rw79" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.cluster import KMeans\n", "import matplotlib.pyplot as plt\n", "\n", "# 1. Select features for clustering\n", "features = ['followers_change', 'credibility_change', 'current_followers', 'current_credibility', 'followers_round_10', 'credibility_round_10']\n", "X = df[features]\n", "\n", "# 2. Preprocess data\n", "scaler = StandardScaler()\n", "X_scaled = scaler.fit_transform(X)\n", "\n", "# 3. Apply K-Means clustering\n", "optimal_clusters = 3\n", "kmeans = KMeans(n_clusters=optimal_clusters, random_state=42, n_init=10)\n", "clusters = kmeans.fit_predict(X_scaled)\n", "\n", "# 4. Add the cluster labels to the original DataFrame\n", "df['cluster_label'] = clusters\n", "\n", "# Display the first few rows with the new cluster labels\n", "display(df.head())" ], "metadata": { "id": "By6ju3aWTsWw" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "plt.scatter(df['current_followers'], df['current_credibility'], c=df['cluster_label'], cmap='viridis')\n", "plt.xlabel('Current Followers')\n", "plt.ylabel('Current Credibility')\n", "plt.title('User Clusters based on Current Followers and Credibility')\n", "plt.colorbar(label='Cluster Label')\n", "plt.show()" ], "metadata": { "id": "MPinQhGrUWNc" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import statsmodels.api as sm\n", "\n", "# Define features (independent variables) and target variables (dependent variables)\n", "features = ['followers_change', 'credibility_change', 'round_number']\n", "target_followers = 'current_followers'\n", "\n", "# Add a constant to the features for the intercept\n", "X = df[features]\n", "X = sm.add_constant(X)\n", "\n", "# Regression for current_followers\n", "model_followers = sm.OLS(df[target_followers], X).fit()\n", "print(\"Regression Results for Current Followers:\")\n", "print(model_followers.summary())" ], "metadata": { "id": "K--kQbU2tfbG" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "features = ['followers_change', 'credibility_change', 'round_number']\n", "target_credibility = 'current_credibility'\n", "\n", "# Add a constant to the features for the intercept\n", "X = df[features]\n", "X = sm.add_constant(X)\n", "\n", "# Regression for current_credibility\n", "model_credibility = sm.OLS(df[target_credibility], X).fit()\n", "print(\"\\nRegression Results for Current Credibility:\")\n", "print(model_credibility.summary())" ], "metadata": { "id": "x549rNp2uv2Y" }, "execution_count": null, "outputs": [] } ] }