{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "8sMmeoh0FeEC", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "4b9d37fe-2945-4036-d544-0b338b17ecdc" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Accuracy: 0.5494652406417112\n", "\n", "Classification Report:\n", " precision recall f1-score support\n", "\n", " anger 0.40 0.46 0.43 216\n", " disgust 0.55 0.60 0.57 211\n", " fear 0.64 0.65 0.65 216\n", " guilt 0.46 0.43 0.44 211\n", " joy 0.64 0.70 0.67 217\n", " sadness 0.66 0.57 0.61 216\n", " shame 0.49 0.43 0.46 209\n", "\n", " accuracy 0.55 1496\n", " macro avg 0.55 0.55 0.55 1496\n", "weighted avg 0.55 0.55 0.55 1496\n", "\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.metrics import classification_report, accuracy_score\n", "\n", "file='https://github.com/masterfloss/data/raw/refs/heads/main/emotions.xlsx'\n", "\n", "df=pd.read_excel(file)\n", "\n", "X= df['text']\n", "y= df['emotions']\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " df['text'], df['emotions'],\n", " test_size=0.2,\n", " random_state=42,\n", " stratify=df['emotions']\n", ")\n", "\n", "model = Pipeline([\n", " ('tfidf', TfidfVectorizer(stop_words='english')),\n", " ('clf', LogisticRegression(max_iter=1000))\n", "])\n", "\n", "model.fit(X_train, y_train)\n", "\n", "\n", "y_pred = model.predict(X_test)\n", "print(\"Accuracy:\", accuracy_score(y_test, y_pred))\n", "print(\"\\nClassification Report:\\n\", classification_report(y_test, y_pred))\n" ] }, { "cell_type": "code", "source": [ "model.predict([\"You are stupid\"])" ], "metadata": { "id": "Ve8sjQBNGQ9E", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "c9f8ea73-22f3-40bf-8903-3a689336a9d4" }, "execution_count": 11, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array(['anger'], dtype=object)" ] }, "metadata": {}, "execution_count": 11 } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "HLcWOLFUIwH-" }, "execution_count": null, "outputs": [] } ] }