Last active
March 31, 2021 00:54
-
-
Save pikonha/afbf2270f1beb6f80b2b9add4b1a7267 to your computer and use it in GitHub Desktop.
cancer-analysis.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "name": "Detecção de Câncar de Mama.ipynb", | |
| "provenance": [], | |
| "collapsed_sections": [], | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/picolloo/afbf2270f1beb6f80b2b9add4b1a7267/detec-o-de-c-ncar-de-mama.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "7yEsLd3bk0h7" | |
| }, | |
| "source": [ | |
| "Disciplina de Data Mining - INE5644\n", | |
| "## Exercício 1 - Detecção de Câncer de Mama (Classificação)\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "iHra4Wdrkz4U" | |
| }, | |
| "source": [ | |
| "import numpy as np\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "import pandas as pd" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "xq9DCd3rlcnL" | |
| }, | |
| "source": [ | |
| "###0. Carregamento do conjunto de dados" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "5xA7ndkBlIEe", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "outputId": "b3aa6001-9e99-4289-a39b-bc1dcba09329" | |
| }, | |
| "source": [ | |
| "from sklearn.datasets import load_breast_cancer\n", | |
| "cancer = load_breast_cancer()\n", | |
| "\n", | |
| "#formato dos datasets no sklearn\n", | |
| "cancer.keys()" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 2 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "S_CP2rPyCvUM", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 258 | |
| }, | |
| "outputId": "a110845c-e7f5-4614-eb6a-1366853c797a" | |
| }, | |
| "source": [ | |
| "df = pd.DataFrame(cancer['data'])\n", | |
| "df.columns = cancer['feature_names']\n", | |
| "df['target'] = cancer['target']\n", | |
| "df.head()" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>mean radius</th>\n", | |
| " <th>mean texture</th>\n", | |
| " <th>mean perimeter</th>\n", | |
| " <th>mean area</th>\n", | |
| " <th>mean smoothness</th>\n", | |
| " <th>mean compactness</th>\n", | |
| " <th>mean concavity</th>\n", | |
| " <th>mean concave points</th>\n", | |
| " <th>mean symmetry</th>\n", | |
| " <th>mean fractal dimension</th>\n", | |
| " <th>radius error</th>\n", | |
| " <th>texture error</th>\n", | |
| " <th>perimeter error</th>\n", | |
| " <th>area error</th>\n", | |
| " <th>smoothness error</th>\n", | |
| " <th>compactness error</th>\n", | |
| " <th>concavity error</th>\n", | |
| " <th>concave points error</th>\n", | |
| " <th>symmetry error</th>\n", | |
| " <th>fractal dimension error</th>\n", | |
| " <th>worst radius</th>\n", | |
| " <th>worst texture</th>\n", | |
| " <th>worst perimeter</th>\n", | |
| " <th>worst area</th>\n", | |
| " <th>worst smoothness</th>\n", | |
| " <th>worst compactness</th>\n", | |
| " <th>worst concavity</th>\n", | |
| " <th>worst concave points</th>\n", | |
| " <th>worst symmetry</th>\n", | |
| " <th>worst fractal dimension</th>\n", | |
| " <th>target</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1.097064</td>\n", | |
| " <td>-2.073335</td>\n", | |
| " <td>1.269934</td>\n", | |
| " <td>0.984375</td>\n", | |
| " <td>1.568466</td>\n", | |
| " <td>3.283515</td>\n", | |
| " <td>2.652874</td>\n", | |
| " <td>2.532475</td>\n", | |
| " <td>2.217515</td>\n", | |
| " <td>2.255747</td>\n", | |
| " <td>2.489734</td>\n", | |
| " <td>-0.565265</td>\n", | |
| " <td>2.833031</td>\n", | |
| " <td>2.487578</td>\n", | |
| " <td>-0.214002</td>\n", | |
| " <td>1.316862</td>\n", | |
| " <td>0.724026</td>\n", | |
| " <td>0.660820</td>\n", | |
| " <td>1.148757</td>\n", | |
| " <td>0.907083</td>\n", | |
| " <td>1.886690</td>\n", | |
| " <td>-1.359293</td>\n", | |
| " <td>2.303601</td>\n", | |
| " <td>2.001237</td>\n", | |
| " <td>1.307686</td>\n", | |
| " <td>2.616665</td>\n", | |
| " <td>2.109526</td>\n", | |
| " <td>2.296076</td>\n", | |
| " <td>2.750622</td>\n", | |
| " <td>1.937015</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1.829821</td>\n", | |
| " <td>-0.353632</td>\n", | |
| " <td>1.685955</td>\n", | |
| " <td>1.908708</td>\n", | |
| " <td>-0.826962</td>\n", | |
| " <td>-0.487072</td>\n", | |
| " <td>-0.023846</td>\n", | |
| " <td>0.548144</td>\n", | |
| " <td>0.001392</td>\n", | |
| " <td>-0.868652</td>\n", | |
| " <td>0.499255</td>\n", | |
| " <td>-0.876244</td>\n", | |
| " <td>0.263327</td>\n", | |
| " <td>0.742402</td>\n", | |
| " <td>-0.605351</td>\n", | |
| " <td>-0.692926</td>\n", | |
| " <td>-0.440780</td>\n", | |
| " <td>0.260162</td>\n", | |
| " <td>-0.805450</td>\n", | |
| " <td>-0.099444</td>\n", | |
| " <td>1.805927</td>\n", | |
| " <td>-0.369203</td>\n", | |
| " <td>1.535126</td>\n", | |
| " <td>1.890489</td>\n", | |
| " <td>-0.375612</td>\n", | |
| " <td>-0.430444</td>\n", | |
| " <td>-0.146749</td>\n", | |
| " <td>1.087084</td>\n", | |
| " <td>-0.243890</td>\n", | |
| " <td>0.281190</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1.579888</td>\n", | |
| " <td>0.456187</td>\n", | |
| " <td>1.566503</td>\n", | |
| " <td>1.558884</td>\n", | |
| " <td>0.942210</td>\n", | |
| " <td>1.052926</td>\n", | |
| " <td>1.363478</td>\n", | |
| " <td>2.037231</td>\n", | |
| " <td>0.939685</td>\n", | |
| " <td>-0.398008</td>\n", | |
| " <td>1.228676</td>\n", | |
| " <td>-0.780083</td>\n", | |
| " <td>0.850928</td>\n", | |
| " <td>1.181336</td>\n", | |
| " <td>-0.297005</td>\n", | |
| " <td>0.814974</td>\n", | |
| " <td>0.213076</td>\n", | |
| " <td>1.424827</td>\n", | |
| " <td>0.237036</td>\n", | |
| " <td>0.293559</td>\n", | |
| " <td>1.511870</td>\n", | |
| " <td>-0.023974</td>\n", | |
| " <td>1.347475</td>\n", | |
| " <td>1.456285</td>\n", | |
| " <td>0.527407</td>\n", | |
| " <td>1.082932</td>\n", | |
| " <td>0.854974</td>\n", | |
| " <td>1.955000</td>\n", | |
| " <td>1.152255</td>\n", | |
| " <td>0.201391</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>-0.768909</td>\n", | |
| " <td>0.253732</td>\n", | |
| " <td>-0.592687</td>\n", | |
| " <td>-0.764464</td>\n", | |
| " <td>3.283553</td>\n", | |
| " <td>3.402909</td>\n", | |
| " <td>1.915897</td>\n", | |
| " <td>1.451707</td>\n", | |
| " <td>2.867383</td>\n", | |
| " <td>4.910919</td>\n", | |
| " <td>0.326373</td>\n", | |
| " <td>-0.110409</td>\n", | |
| " <td>0.286593</td>\n", | |
| " <td>-0.288378</td>\n", | |
| " <td>0.689702</td>\n", | |
| " <td>2.744280</td>\n", | |
| " <td>0.819518</td>\n", | |
| " <td>1.115007</td>\n", | |
| " <td>4.732680</td>\n", | |
| " <td>2.047511</td>\n", | |
| " <td>-0.281464</td>\n", | |
| " <td>0.133984</td>\n", | |
| " <td>-0.249939</td>\n", | |
| " <td>-0.550021</td>\n", | |
| " <td>3.394275</td>\n", | |
| " <td>3.893397</td>\n", | |
| " <td>1.989588</td>\n", | |
| " <td>2.175786</td>\n", | |
| " <td>6.046041</td>\n", | |
| " <td>4.935010</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>1.750297</td>\n", | |
| " <td>-1.151816</td>\n", | |
| " <td>1.776573</td>\n", | |
| " <td>1.826229</td>\n", | |
| " <td>0.280372</td>\n", | |
| " <td>0.539340</td>\n", | |
| " <td>1.371011</td>\n", | |
| " <td>1.428493</td>\n", | |
| " <td>-0.009560</td>\n", | |
| " <td>-0.562450</td>\n", | |
| " <td>1.270543</td>\n", | |
| " <td>-0.790244</td>\n", | |
| " <td>1.273189</td>\n", | |
| " <td>1.190357</td>\n", | |
| " <td>1.483067</td>\n", | |
| " <td>-0.048520</td>\n", | |
| " <td>0.828471</td>\n", | |
| " <td>1.144205</td>\n", | |
| " <td>-0.361092</td>\n", | |
| " <td>0.499328</td>\n", | |
| " <td>1.298575</td>\n", | |
| " <td>-1.466770</td>\n", | |
| " <td>1.338539</td>\n", | |
| " <td>1.220724</td>\n", | |
| " <td>0.220556</td>\n", | |
| " <td>-0.313395</td>\n", | |
| " <td>0.613179</td>\n", | |
| " <td>0.729259</td>\n", | |
| " <td>-0.868353</td>\n", | |
| " <td>-0.397100</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " mean radius mean texture ... worst fractal dimension target\n", | |
| "0 1.097064 -2.073335 ... 1.937015 0\n", | |
| "1 1.829821 -0.353632 ... 0.281190 0\n", | |
| "2 1.579888 0.456187 ... 0.201391 0\n", | |
| "3 -0.768909 0.253732 ... 4.935010 0\n", | |
| "4 1.750297 -1.151816 ... -0.397100 0\n", | |
| "\n", | |
| "[5 rows x 31 columns]" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 47 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "g1GBP7FACovY" | |
| }, | |
| "source": [ | |
| "#carrega X e y\n", | |
| "X = df[cancer['feature_names']]\n", | |
| "y = df['target']\n", | |
| "\n", | |
| "#divide treino e teste\n", | |
| "from sklearn.model_selection import train_test_split\n", | |
| "X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.15)" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 413 | |
| }, | |
| "id": "hcCyQrN0Ofbs", | |
| "outputId": "47456b0d-73fb-4b5b-d616-281583edfa64" | |
| }, | |
| "source": [ | |
| "" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>mean radius</th>\n", | |
| " <th>mean texture</th>\n", | |
| " <th>mean perimeter</th>\n", | |
| " <th>mean area</th>\n", | |
| " <th>mean smoothness</th>\n", | |
| " <th>mean compactness</th>\n", | |
| " <th>mean concavity</th>\n", | |
| " <th>mean concave points</th>\n", | |
| " <th>mean symmetry</th>\n", | |
| " <th>mean fractal dimension</th>\n", | |
| " <th>radius error</th>\n", | |
| " <th>texture error</th>\n", | |
| " <th>perimeter error</th>\n", | |
| " <th>area error</th>\n", | |
| " <th>smoothness error</th>\n", | |
| " <th>compactness error</th>\n", | |
| " <th>concavity error</th>\n", | |
| " <th>concave points error</th>\n", | |
| " <th>symmetry error</th>\n", | |
| " <th>fractal dimension error</th>\n", | |
| " <th>worst radius</th>\n", | |
| " <th>worst texture</th>\n", | |
| " <th>worst perimeter</th>\n", | |
| " <th>worst area</th>\n", | |
| " <th>worst smoothness</th>\n", | |
| " <th>worst compactness</th>\n", | |
| " <th>worst concavity</th>\n", | |
| " <th>worst concave points</th>\n", | |
| " <th>worst symmetry</th>\n", | |
| " <th>worst fractal dimension</th>\n", | |
| " <th>target</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>17.99</td>\n", | |
| " <td>10.38</td>\n", | |
| " <td>122.80</td>\n", | |
| " <td>1001.0</td>\n", | |
| " <td>0.11840</td>\n", | |
| " <td>0.27760</td>\n", | |
| " <td>0.30010</td>\n", | |
| " <td>0.14710</td>\n", | |
| " <td>0.2419</td>\n", | |
| " <td>0.07871</td>\n", | |
| " <td>1.0950</td>\n", | |
| " <td>0.9053</td>\n", | |
| " <td>8.589</td>\n", | |
| " <td>153.40</td>\n", | |
| " <td>0.006399</td>\n", | |
| " <td>0.04904</td>\n", | |
| " <td>0.05373</td>\n", | |
| " <td>0.01587</td>\n", | |
| " <td>0.03003</td>\n", | |
| " <td>0.006193</td>\n", | |
| " <td>25.38</td>\n", | |
| " <td>17.33</td>\n", | |
| " <td>184.60</td>\n", | |
| " <td>2019.0</td>\n", | |
| " <td>0.1622</td>\n", | |
| " <td>0.6656</td>\n", | |
| " <td>0.7119</td>\n", | |
| " <td>0.2654</td>\n", | |
| " <td>0.4601</td>\n", | |
| " <td>0.11890</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>20.57</td>\n", | |
| " <td>17.77</td>\n", | |
| " <td>132.90</td>\n", | |
| " <td>1326.0</td>\n", | |
| " <td>0.08474</td>\n", | |
| " <td>0.07864</td>\n", | |
| " <td>0.08690</td>\n", | |
| " <td>0.07017</td>\n", | |
| " <td>0.1812</td>\n", | |
| " <td>0.05667</td>\n", | |
| " <td>0.5435</td>\n", | |
| " <td>0.7339</td>\n", | |
| " <td>3.398</td>\n", | |
| " <td>74.08</td>\n", | |
| " <td>0.005225</td>\n", | |
| " <td>0.01308</td>\n", | |
| " <td>0.01860</td>\n", | |
| " <td>0.01340</td>\n", | |
| " <td>0.01389</td>\n", | |
| " <td>0.003532</td>\n", | |
| " <td>24.99</td>\n", | |
| " <td>23.41</td>\n", | |
| " <td>158.80</td>\n", | |
| " <td>1956.0</td>\n", | |
| " <td>0.1238</td>\n", | |
| " <td>0.1866</td>\n", | |
| " <td>0.2416</td>\n", | |
| " <td>0.1860</td>\n", | |
| " <td>0.2750</td>\n", | |
| " <td>0.08902</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>19.69</td>\n", | |
| " <td>21.25</td>\n", | |
| " <td>130.00</td>\n", | |
| " <td>1203.0</td>\n", | |
| " <td>0.10960</td>\n", | |
| " <td>0.15990</td>\n", | |
| " <td>0.19740</td>\n", | |
| " <td>0.12790</td>\n", | |
| " <td>0.2069</td>\n", | |
| " <td>0.05999</td>\n", | |
| " <td>0.7456</td>\n", | |
| " <td>0.7869</td>\n", | |
| " <td>4.585</td>\n", | |
| " <td>94.03</td>\n", | |
| " <td>0.006150</td>\n", | |
| " <td>0.04006</td>\n", | |
| " <td>0.03832</td>\n", | |
| " <td>0.02058</td>\n", | |
| " <td>0.02250</td>\n", | |
| " <td>0.004571</td>\n", | |
| " <td>23.57</td>\n", | |
| " <td>25.53</td>\n", | |
| " <td>152.50</td>\n", | |
| " <td>1709.0</td>\n", | |
| " <td>0.1444</td>\n", | |
| " <td>0.4245</td>\n", | |
| " <td>0.4504</td>\n", | |
| " <td>0.2430</td>\n", | |
| " <td>0.3613</td>\n", | |
| " <td>0.08758</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>11.42</td>\n", | |
| " <td>20.38</td>\n", | |
| " <td>77.58</td>\n", | |
| " <td>386.1</td>\n", | |
| " <td>0.14250</td>\n", | |
| " <td>0.28390</td>\n", | |
| " <td>0.24140</td>\n", | |
| " <td>0.10520</td>\n", | |
| " <td>0.2597</td>\n", | |
| " <td>0.09744</td>\n", | |
| " <td>0.4956</td>\n", | |
| " <td>1.1560</td>\n", | |
| " <td>3.445</td>\n", | |
| " <td>27.23</td>\n", | |
| " <td>0.009110</td>\n", | |
| " <td>0.07458</td>\n", | |
| " <td>0.05661</td>\n", | |
| " <td>0.01867</td>\n", | |
| " <td>0.05963</td>\n", | |
| " <td>0.009208</td>\n", | |
| " <td>14.91</td>\n", | |
| " <td>26.50</td>\n", | |
| " <td>98.87</td>\n", | |
| " <td>567.7</td>\n", | |
| " <td>0.2098</td>\n", | |
| " <td>0.8663</td>\n", | |
| " <td>0.6869</td>\n", | |
| " <td>0.2575</td>\n", | |
| " <td>0.6638</td>\n", | |
| " <td>0.17300</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>20.29</td>\n", | |
| " <td>14.34</td>\n", | |
| " <td>135.10</td>\n", | |
| " <td>1297.0</td>\n", | |
| " <td>0.10030</td>\n", | |
| " <td>0.13280</td>\n", | |
| " <td>0.19800</td>\n", | |
| " <td>0.10430</td>\n", | |
| " <td>0.1809</td>\n", | |
| " <td>0.05883</td>\n", | |
| " <td>0.7572</td>\n", | |
| " <td>0.7813</td>\n", | |
| " <td>5.438</td>\n", | |
| " <td>94.44</td>\n", | |
| " <td>0.011490</td>\n", | |
| " <td>0.02461</td>\n", | |
| " <td>0.05688</td>\n", | |
| " <td>0.01885</td>\n", | |
| " <td>0.01756</td>\n", | |
| " <td>0.005115</td>\n", | |
| " <td>22.54</td>\n", | |
| " <td>16.67</td>\n", | |
| " <td>152.20</td>\n", | |
| " <td>1575.0</td>\n", | |
| " <td>0.1374</td>\n", | |
| " <td>0.2050</td>\n", | |
| " <td>0.4000</td>\n", | |
| " <td>0.1625</td>\n", | |
| " <td>0.2364</td>\n", | |
| " <td>0.07678</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>12.45</td>\n", | |
| " <td>15.70</td>\n", | |
| " <td>82.57</td>\n", | |
| " <td>477.1</td>\n", | |
| " <td>0.12780</td>\n", | |
| " <td>0.17000</td>\n", | |
| " <td>0.15780</td>\n", | |
| " <td>0.08089</td>\n", | |
| " <td>0.2087</td>\n", | |
| " <td>0.07613</td>\n", | |
| " <td>0.3345</td>\n", | |
| " <td>0.8902</td>\n", | |
| " <td>2.217</td>\n", | |
| " <td>27.19</td>\n", | |
| " <td>0.007510</td>\n", | |
| " <td>0.03345</td>\n", | |
| " <td>0.03672</td>\n", | |
| " <td>0.01137</td>\n", | |
| " <td>0.02165</td>\n", | |
| " <td>0.005082</td>\n", | |
| " <td>15.47</td>\n", | |
| " <td>23.75</td>\n", | |
| " <td>103.40</td>\n", | |
| " <td>741.6</td>\n", | |
| " <td>0.1791</td>\n", | |
| " <td>0.5249</td>\n", | |
| " <td>0.5355</td>\n", | |
| " <td>0.1741</td>\n", | |
| " <td>0.3985</td>\n", | |
| " <td>0.12440</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>18.25</td>\n", | |
| " <td>19.98</td>\n", | |
| " <td>119.60</td>\n", | |
| " <td>1040.0</td>\n", | |
| " <td>0.09463</td>\n", | |
| " <td>0.10900</td>\n", | |
| " <td>0.11270</td>\n", | |
| " <td>0.07400</td>\n", | |
| " <td>0.1794</td>\n", | |
| " <td>0.05742</td>\n", | |
| " <td>0.4467</td>\n", | |
| " <td>0.7732</td>\n", | |
| " <td>3.180</td>\n", | |
| " <td>53.91</td>\n", | |
| " <td>0.004314</td>\n", | |
| " <td>0.01382</td>\n", | |
| " <td>0.02254</td>\n", | |
| " <td>0.01039</td>\n", | |
| " <td>0.01369</td>\n", | |
| " <td>0.002179</td>\n", | |
| " <td>22.88</td>\n", | |
| " <td>27.66</td>\n", | |
| " <td>153.20</td>\n", | |
| " <td>1606.0</td>\n", | |
| " <td>0.1442</td>\n", | |
| " <td>0.2576</td>\n", | |
| " <td>0.3784</td>\n", | |
| " <td>0.1932</td>\n", | |
| " <td>0.3063</td>\n", | |
| " <td>0.08368</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>13.71</td>\n", | |
| " <td>20.83</td>\n", | |
| " <td>90.20</td>\n", | |
| " <td>577.9</td>\n", | |
| " <td>0.11890</td>\n", | |
| " <td>0.16450</td>\n", | |
| " <td>0.09366</td>\n", | |
| " <td>0.05985</td>\n", | |
| " <td>0.2196</td>\n", | |
| " <td>0.07451</td>\n", | |
| " <td>0.5835</td>\n", | |
| " <td>1.3770</td>\n", | |
| " <td>3.856</td>\n", | |
| " <td>50.96</td>\n", | |
| " <td>0.008805</td>\n", | |
| " <td>0.03029</td>\n", | |
| " <td>0.02488</td>\n", | |
| " <td>0.01448</td>\n", | |
| " <td>0.01486</td>\n", | |
| " <td>0.005412</td>\n", | |
| " <td>17.06</td>\n", | |
| " <td>28.14</td>\n", | |
| " <td>110.60</td>\n", | |
| " <td>897.0</td>\n", | |
| " <td>0.1654</td>\n", | |
| " <td>0.3682</td>\n", | |
| " <td>0.2678</td>\n", | |
| " <td>0.1556</td>\n", | |
| " <td>0.3196</td>\n", | |
| " <td>0.11510</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>13.00</td>\n", | |
| " <td>21.82</td>\n", | |
| " <td>87.50</td>\n", | |
| " <td>519.8</td>\n", | |
| " <td>0.12730</td>\n", | |
| " <td>0.19320</td>\n", | |
| " <td>0.18590</td>\n", | |
| " <td>0.09353</td>\n", | |
| " <td>0.2350</td>\n", | |
| " <td>0.07389</td>\n", | |
| " <td>0.3063</td>\n", | |
| " <td>1.0020</td>\n", | |
| " <td>2.406</td>\n", | |
| " <td>24.32</td>\n", | |
| " <td>0.005731</td>\n", | |
| " <td>0.03502</td>\n", | |
| " <td>0.03553</td>\n", | |
| " <td>0.01226</td>\n", | |
| " <td>0.02143</td>\n", | |
| " <td>0.003749</td>\n", | |
| " <td>15.49</td>\n", | |
| " <td>30.73</td>\n", | |
| " <td>106.20</td>\n", | |
| " <td>739.3</td>\n", | |
| " <td>0.1703</td>\n", | |
| " <td>0.5401</td>\n", | |
| " <td>0.5390</td>\n", | |
| " <td>0.2060</td>\n", | |
| " <td>0.4378</td>\n", | |
| " <td>0.10720</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>12.46</td>\n", | |
| " <td>24.04</td>\n", | |
| " <td>83.97</td>\n", | |
| " <td>475.9</td>\n", | |
| " <td>0.11860</td>\n", | |
| " <td>0.23960</td>\n", | |
| " <td>0.22730</td>\n", | |
| " <td>0.08543</td>\n", | |
| " <td>0.2030</td>\n", | |
| " <td>0.08243</td>\n", | |
| " <td>0.2976</td>\n", | |
| " <td>1.5990</td>\n", | |
| " <td>2.039</td>\n", | |
| " <td>23.94</td>\n", | |
| " <td>0.007149</td>\n", | |
| " <td>0.07217</td>\n", | |
| " <td>0.07743</td>\n", | |
| " <td>0.01432</td>\n", | |
| " <td>0.01789</td>\n", | |
| " <td>0.010080</td>\n", | |
| " <td>15.09</td>\n", | |
| " <td>40.68</td>\n", | |
| " <td>97.65</td>\n", | |
| " <td>711.4</td>\n", | |
| " <td>0.1853</td>\n", | |
| " <td>1.0580</td>\n", | |
| " <td>1.1050</td>\n", | |
| " <td>0.2210</td>\n", | |
| " <td>0.4366</td>\n", | |
| " <td>0.20750</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " mean radius mean texture ... worst fractal dimension target\n", | |
| "0 17.99 10.38 ... 0.11890 0\n", | |
| "1 20.57 17.77 ... 0.08902 0\n", | |
| "2 19.69 21.25 ... 0.08758 0\n", | |
| "3 11.42 20.38 ... 0.17300 0\n", | |
| "4 20.29 14.34 ... 0.07678 0\n", | |
| "5 12.45 15.70 ... 0.12440 0\n", | |
| "6 18.25 19.98 ... 0.08368 0\n", | |
| "7 13.71 20.83 ... 0.11510 0\n", | |
| "8 13.00 21.82 ... 0.10720 0\n", | |
| "9 12.46 24.04 ... 0.20750 0\n", | |
| "\n", | |
| "[10 rows x 31 columns]" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 42 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "UP27uBUBlj6J" | |
| }, | |
| "source": [ | |
| "### 1. Padronizar (z-score) os dados (feature scaling)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "VI3B2NY_lRrq", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 258 | |
| }, | |
| "outputId": "f7494ab2-ed9c-48d8-d9af-6d0d07fa804d" | |
| }, | |
| "source": [ | |
| "from scipy.stats import zscore\n", | |
| "df.apply(zscore)\n", | |
| "df.head()" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>mean radius</th>\n", | |
| " <th>mean texture</th>\n", | |
| " <th>mean perimeter</th>\n", | |
| " <th>mean area</th>\n", | |
| " <th>mean smoothness</th>\n", | |
| " <th>mean compactness</th>\n", | |
| " <th>mean concavity</th>\n", | |
| " <th>mean concave points</th>\n", | |
| " <th>mean symmetry</th>\n", | |
| " <th>mean fractal dimension</th>\n", | |
| " <th>radius error</th>\n", | |
| " <th>texture error</th>\n", | |
| " <th>perimeter error</th>\n", | |
| " <th>area error</th>\n", | |
| " <th>smoothness error</th>\n", | |
| " <th>compactness error</th>\n", | |
| " <th>concavity error</th>\n", | |
| " <th>concave points error</th>\n", | |
| " <th>symmetry error</th>\n", | |
| " <th>fractal dimension error</th>\n", | |
| " <th>worst radius</th>\n", | |
| " <th>worst texture</th>\n", | |
| " <th>worst perimeter</th>\n", | |
| " <th>worst area</th>\n", | |
| " <th>worst smoothness</th>\n", | |
| " <th>worst compactness</th>\n", | |
| " <th>worst concavity</th>\n", | |
| " <th>worst concave points</th>\n", | |
| " <th>worst symmetry</th>\n", | |
| " <th>worst fractal dimension</th>\n", | |
| " <th>target</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1.097064</td>\n", | |
| " <td>-2.073335</td>\n", | |
| " <td>1.269934</td>\n", | |
| " <td>0.984375</td>\n", | |
| " <td>1.568466</td>\n", | |
| " <td>3.283515</td>\n", | |
| " <td>2.652874</td>\n", | |
| " <td>2.532475</td>\n", | |
| " <td>2.217515</td>\n", | |
| " <td>2.255747</td>\n", | |
| " <td>2.489734</td>\n", | |
| " <td>-0.565265</td>\n", | |
| " <td>2.833031</td>\n", | |
| " <td>2.487578</td>\n", | |
| " <td>-0.214002</td>\n", | |
| " <td>1.316862</td>\n", | |
| " <td>0.724026</td>\n", | |
| " <td>0.660820</td>\n", | |
| " <td>1.148757</td>\n", | |
| " <td>0.907083</td>\n", | |
| " <td>1.886690</td>\n", | |
| " <td>-1.359293</td>\n", | |
| " <td>2.303601</td>\n", | |
| " <td>2.001237</td>\n", | |
| " <td>1.307686</td>\n", | |
| " <td>2.616665</td>\n", | |
| " <td>2.109526</td>\n", | |
| " <td>2.296076</td>\n", | |
| " <td>2.750622</td>\n", | |
| " <td>1.937015</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1.829821</td>\n", | |
| " <td>-0.353632</td>\n", | |
| " <td>1.685955</td>\n", | |
| " <td>1.908708</td>\n", | |
| " <td>-0.826962</td>\n", | |
| " <td>-0.487072</td>\n", | |
| " <td>-0.023846</td>\n", | |
| " <td>0.548144</td>\n", | |
| " <td>0.001392</td>\n", | |
| " <td>-0.868652</td>\n", | |
| " <td>0.499255</td>\n", | |
| " <td>-0.876244</td>\n", | |
| " <td>0.263327</td>\n", | |
| " <td>0.742402</td>\n", | |
| " <td>-0.605351</td>\n", | |
| " <td>-0.692926</td>\n", | |
| " <td>-0.440780</td>\n", | |
| " <td>0.260162</td>\n", | |
| " <td>-0.805450</td>\n", | |
| " <td>-0.099444</td>\n", | |
| " <td>1.805927</td>\n", | |
| " <td>-0.369203</td>\n", | |
| " <td>1.535126</td>\n", | |
| " <td>1.890489</td>\n", | |
| " <td>-0.375612</td>\n", | |
| " <td>-0.430444</td>\n", | |
| " <td>-0.146749</td>\n", | |
| " <td>1.087084</td>\n", | |
| " <td>-0.243890</td>\n", | |
| " <td>0.281190</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1.579888</td>\n", | |
| " <td>0.456187</td>\n", | |
| " <td>1.566503</td>\n", | |
| " <td>1.558884</td>\n", | |
| " <td>0.942210</td>\n", | |
| " <td>1.052926</td>\n", | |
| " <td>1.363478</td>\n", | |
| " <td>2.037231</td>\n", | |
| " <td>0.939685</td>\n", | |
| " <td>-0.398008</td>\n", | |
| " <td>1.228676</td>\n", | |
| " <td>-0.780083</td>\n", | |
| " <td>0.850928</td>\n", | |
| " <td>1.181336</td>\n", | |
| " <td>-0.297005</td>\n", | |
| " <td>0.814974</td>\n", | |
| " <td>0.213076</td>\n", | |
| " <td>1.424827</td>\n", | |
| " <td>0.237036</td>\n", | |
| " <td>0.293559</td>\n", | |
| " <td>1.511870</td>\n", | |
| " <td>-0.023974</td>\n", | |
| " <td>1.347475</td>\n", | |
| " <td>1.456285</td>\n", | |
| " <td>0.527407</td>\n", | |
| " <td>1.082932</td>\n", | |
| " <td>0.854974</td>\n", | |
| " <td>1.955000</td>\n", | |
| " <td>1.152255</td>\n", | |
| " <td>0.201391</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>-0.768909</td>\n", | |
| " <td>0.253732</td>\n", | |
| " <td>-0.592687</td>\n", | |
| " <td>-0.764464</td>\n", | |
| " <td>3.283553</td>\n", | |
| " <td>3.402909</td>\n", | |
| " <td>1.915897</td>\n", | |
| " <td>1.451707</td>\n", | |
| " <td>2.867383</td>\n", | |
| " <td>4.910919</td>\n", | |
| " <td>0.326373</td>\n", | |
| " <td>-0.110409</td>\n", | |
| " <td>0.286593</td>\n", | |
| " <td>-0.288378</td>\n", | |
| " <td>0.689702</td>\n", | |
| " <td>2.744280</td>\n", | |
| " <td>0.819518</td>\n", | |
| " <td>1.115007</td>\n", | |
| " <td>4.732680</td>\n", | |
| " <td>2.047511</td>\n", | |
| " <td>-0.281464</td>\n", | |
| " <td>0.133984</td>\n", | |
| " <td>-0.249939</td>\n", | |
| " <td>-0.550021</td>\n", | |
| " <td>3.394275</td>\n", | |
| " <td>3.893397</td>\n", | |
| " <td>1.989588</td>\n", | |
| " <td>2.175786</td>\n", | |
| " <td>6.046041</td>\n", | |
| " <td>4.935010</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>1.750297</td>\n", | |
| " <td>-1.151816</td>\n", | |
| " <td>1.776573</td>\n", | |
| " <td>1.826229</td>\n", | |
| " <td>0.280372</td>\n", | |
| " <td>0.539340</td>\n", | |
| " <td>1.371011</td>\n", | |
| " <td>1.428493</td>\n", | |
| " <td>-0.009560</td>\n", | |
| " <td>-0.562450</td>\n", | |
| " <td>1.270543</td>\n", | |
| " <td>-0.790244</td>\n", | |
| " <td>1.273189</td>\n", | |
| " <td>1.190357</td>\n", | |
| " <td>1.483067</td>\n", | |
| " <td>-0.048520</td>\n", | |
| " <td>0.828471</td>\n", | |
| " <td>1.144205</td>\n", | |
| " <td>-0.361092</td>\n", | |
| " <td>0.499328</td>\n", | |
| " <td>1.298575</td>\n", | |
| " <td>-1.466770</td>\n", | |
| " <td>1.338539</td>\n", | |
| " <td>1.220724</td>\n", | |
| " <td>0.220556</td>\n", | |
| " <td>-0.313395</td>\n", | |
| " <td>0.613179</td>\n", | |
| " <td>0.729259</td>\n", | |
| " <td>-0.868353</td>\n", | |
| " <td>-0.397100</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " mean radius mean texture ... worst fractal dimension target\n", | |
| "0 1.097064 -2.073335 ... 1.937015 0\n", | |
| "1 1.829821 -0.353632 ... 0.281190 0\n", | |
| "2 1.579888 0.456187 ... 0.201391 0\n", | |
| "3 -0.768909 0.253732 ... 4.935010 0\n", | |
| "4 1.750297 -1.151816 ... -0.397100 0\n", | |
| "\n", | |
| "[5 rows x 31 columns]" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 50 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "QeQtpbgwmJMP" | |
| }, | |
| "source": [ | |
| "### 2. Construir o modelo" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "dmMOZovnlpsx", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "outputId": "b08760a2-37d8-409b-9f72-11f1a022a6d3" | |
| }, | |
| "source": [ | |
| "from sklearn.tree import DecisionTreeClassifier\n", | |
| "model = DecisionTreeClassifier()\n", | |
| "model.fit(X_train, y_train)" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',\n", | |
| " max_depth=None, max_features=None, max_leaf_nodes=None,\n", | |
| " min_impurity_decrease=0.0, min_impurity_split=None,\n", | |
| " min_samples_leaf=1, min_samples_split=2,\n", | |
| " min_weight_fraction_leaf=0.0, presort='deprecated',\n", | |
| " random_state=None, splitter='best')" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 53 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "pm4vzMgqmRQc" | |
| }, | |
| "source": [ | |
| "###3. Realizar a predição e a avaliação do modelo" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "mNA_NWmUTjVU" | |
| }, | |
| "source": [ | |
| "from sklearn.linear_model import LogisticRegression\n", | |
| "\n", | |
| "model= LogisticRegression()\n", | |
| "model.fit(X_train,y_train)\n", | |
| "\n", | |
| "y_pred=model.predict(X_test)" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "Ik02F1cRTsZO", | |
| "outputId": "9b56bab3-d7b7-488a-817e-4ac22baeadea" | |
| }, | |
| "source": [ | |
| "from sklearn.metrics import classification_report, confusion_matrix\n", | |
| "print(confusion_matrix(y_test, y_pred))\n", | |
| "print(classification_report(y_test, y_pred))" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "[[25 2]\n", | |
| " [ 2 57]]\n", | |
| " precision recall f1-score support\n", | |
| "\n", | |
| " 0 0.93 0.93 0.93 27\n", | |
| " 1 0.97 0.97 0.97 59\n", | |
| "\n", | |
| " accuracy 0.95 86\n", | |
| " macro avg 0.95 0.95 0.95 86\n", | |
| "weighted avg 0.95 0.95 0.95 86\n", | |
| "\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment