{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "RR59OgSFRpoV"
      },
      "outputs": [],
      "source": [
        "import pandas as pd"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "file=\"https://github.com/masterfloss/data/raw/refs/heads/main/game_records2.xlsx\""
      ],
      "metadata": {
        "id": "OtDiFHWvR17s"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df=pd.read_excel(file)\n"
      ],
      "metadata": {
        "id": "96nBPOM1Rw79"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "from sklearn.cluster import KMeans\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "# 1. Select features for clustering\n",
        "features = ['followers_change', 'credibility_change', 'current_followers', 'current_credibility', 'followers_round_10', 'credibility_round_10']\n",
        "X = df[features]\n",
        "\n",
        "# 2. Preprocess data\n",
        "scaler = StandardScaler()\n",
        "X_scaled = scaler.fit_transform(X)\n",
        "\n",
        "# 3. Apply K-Means clustering\n",
        "optimal_clusters = 3\n",
        "kmeans = KMeans(n_clusters=optimal_clusters, random_state=42, n_init=10)\n",
        "clusters = kmeans.fit_predict(X_scaled)\n",
        "\n",
        "# 4. Add the cluster labels to the original DataFrame\n",
        "df['cluster_label'] = clusters\n",
        "\n",
        "# Display the first few rows with the new cluster labels\n",
        "display(df.head())"
      ],
      "metadata": {
        "id": "By6ju3aWTsWw"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "plt.scatter(df['current_followers'], df['current_credibility'], c=df['cluster_label'], cmap='viridis')\n",
        "plt.xlabel('Current Followers')\n",
        "plt.ylabel('Current Credibility')\n",
        "plt.title('User Clusters based on Current Followers and Credibility')\n",
        "plt.colorbar(label='Cluster Label')\n",
        "plt.show()"
      ],
      "metadata": {
        "id": "MPinQhGrUWNc"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import statsmodels.api as sm\n",
        "\n",
        "# Define features (independent variables) and target variables (dependent variables)\n",
        "features = ['followers_change', 'credibility_change', 'round_number']\n",
        "target_followers = 'current_followers'\n",
        "\n",
        "# Add a constant to the features for the intercept\n",
        "X = df[features]\n",
        "X = sm.add_constant(X)\n",
        "\n",
        "# Regression for current_followers\n",
        "model_followers = sm.OLS(df[target_followers], X).fit()\n",
        "print(\"Regression Results for Current Followers:\")\n",
        "print(model_followers.summary())"
      ],
      "metadata": {
        "id": "K--kQbU2tfbG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "features = ['followers_change', 'credibility_change', 'round_number']\n",
        "target_credibility = 'current_credibility'\n",
        "\n",
        "# Add a constant to the features for the intercept\n",
        "X = df[features]\n",
        "X = sm.add_constant(X)\n",
        "\n",
        "# Regression for current_credibility\n",
        "model_credibility = sm.OLS(df[target_credibility], X).fit()\n",
        "print(\"\\nRegression Results for Current Credibility:\")\n",
        "print(model_credibility.summary())"
      ],
      "metadata": {
        "id": "x549rNp2uv2Y"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}