{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "##### Lab05 ##\n", "\n", "* Consider the following variables, corresponding to information of a football player. The date is in the following URL: 'https://raw.githubusercontent.com/masterfloss/data/main/jogadores.csv'\n", "\n", "The data store is:\n", "\n", "- 'Idade' - age \n", "\n", "- 'Altura' - height \n", "\n", "- 'Epoca' -season football\n", "\n", "- 'Minutos' - minutes played\n", "\n", "- 'Valor de Mercado' – market value\n", "\n", "- 'Ser Transferido' - being transferred\n", "\n", "\n", "#### The purpose is to explain the market value of the player. ####\n", "\n", "from sklearn.XXXX import XXXXX\n", "\n", "model = XXXXX()\n", "\n", "result = model.fit(features,target)\n", "\n", "y_pred = result.predict(X_test)\n", "\n", "\n", "##### Note: Possible algorithms #####\n", "\n", "OLS\n", "RIDGE\n", "LASSO\n", "\n", "See: https://scikit-learn.org/stable/supervised_learning.html\n", "\n", "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html\n", "\n", "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html\n", "\n", "#### To evaluate the predictive quality of the model, you may calculate the R2 and MSE scores: ####\n", "\n", "from sklearn.metrics import mean_squared_error, r2_score\n", "\n", "y_pred1 = result.predict(X_train)\n", "\n", "r2_score(y_train, y_pred1)\n", "\n", "y_pred = result.predict(X_test)\n", "\n", "r2_score(y_test, y_pred)\n", "\n", "mean_squared_error(y_test, y_pred)\n", "\n", "\n", "**Data collected by Pedro Carapau**\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", "import pandas as pd\n", "from sklearn import linear_model\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import mean_squared_error, r2_score\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import PolynomialFeatures" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# read data\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# preprocessing, train test, ...(not normalized or standarize)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# object creation and model fit OLS\n", "\n", "# calculate R2, MSE\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# result.coef_" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# object creation and model fit Ridge with parameter alpha =0.5\n", "\n", "# calculate R2, MSE\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# object creation and model fit Lasso with parameter alpha =0.5\n", "\n", "# calculate R2, MSE\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# object creation and model fit BayesianRidge with parameter compute_score=True\n", "\n", "# calculate R2, MSE\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Polynomial Regression\n", "# if an input sample is two dimensional and of the form [a, b], \n", "# the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Object creation Neural Network\n", "from sklearn.neural_network import MLPRegressor\n", "model = MLPRegressor(random_state=1,hidden_layer_sizes = (9,7), activation='relu', max_iter=5000, solver='lbfgs')\n", "result=model.fit(X_train, y_train)\n", "# calculate R2, MSE\n", "y_pred=result.predict(X_test)\n", "r2_score(y_train, y_train),r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Standardize for the specific case of OLS\n", "from sklearn.preprocessing import StandardScaler\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 }