{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "8sMmeoh0FeEC",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "4b9d37fe-2945-4036-d544-0b338b17ecdc"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy: 0.5494652406417112\n",
            "\n",
            "Classification Report:\n",
            "               precision    recall  f1-score   support\n",
            "\n",
            "       anger       0.40      0.46      0.43       216\n",
            "     disgust       0.55      0.60      0.57       211\n",
            "        fear       0.64      0.65      0.65       216\n",
            "       guilt       0.46      0.43      0.44       211\n",
            "         joy       0.64      0.70      0.67       217\n",
            "     sadness       0.66      0.57      0.61       216\n",
            "       shame       0.49      0.43      0.46       209\n",
            "\n",
            "    accuracy                           0.55      1496\n",
            "   macro avg       0.55      0.55      0.55      1496\n",
            "weighted avg       0.55      0.55      0.55      1496\n",
            "\n"
          ]
        }
      ],
      "source": [
        "import pandas as pd\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.pipeline import Pipeline\n",
        "from sklearn.metrics import classification_report, accuracy_score\n",
        "\n",
        "file='https://github.com/masterfloss/data/raw/refs/heads/main/emotions.xlsx'\n",
        "\n",
        "df=pd.read_excel(file)\n",
        "\n",
        "X= df['text']\n",
        "y= df['emotions']\n",
        "\n",
        "X_train, X_test, y_train, y_test = train_test_split(\n",
        "    df['text'], df['emotions'],\n",
        "    test_size=0.2,\n",
        "    random_state=42,\n",
        "    stratify=df['emotions']\n",
        ")\n",
        "\n",
        "model = Pipeline([\n",
        "    ('tfidf', TfidfVectorizer(stop_words='english')),\n",
        "    ('clf', LogisticRegression(max_iter=1000))\n",
        "])\n",
        "\n",
        "model.fit(X_train, y_train)\n",
        "\n",
        "\n",
        "y_pred = model.predict(X_test)\n",
        "print(\"Accuracy:\", accuracy_score(y_test, y_pred))\n",
        "print(\"\\nClassification Report:\\n\", classification_report(y_test, y_pred))\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "model.predict([\"You are stupid\"])"
      ],
      "metadata": {
        "id": "Ve8sjQBNGQ9E",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "c9f8ea73-22f3-40bf-8903-3a689336a9d4"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array(['anger'], dtype=object)"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "HLcWOLFUIwH-"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}