Last active
February 22, 2018 06:13
-
-
Save sinhrks/cc9a88f74074fc296e12 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'0.4'" | |
| ] | |
| }, | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%matplotlib inline\n", | |
| "import numpy as np\n", | |
| "import xgboost as xgb\n", | |
| "from sklearn import datasets\n", | |
| "\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "plt.style.use('ggplot')\n", | |
| "\n", | |
| "xgb.__version__" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Create DMatrix from pandas.DataFrame" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>SepalLength</th>\n", | |
| " <th>SepalWidth</th>\n", | |
| " <th>PetalLength</th>\n", | |
| " <th>PetalWidth</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>5.1</td>\n", | |
| " <td>3.5</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>4.9</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>4.7</td>\n", | |
| " <td>3.2</td>\n", | |
| " <td>1.3</td>\n", | |
| " <td>0.2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4.6</td>\n", | |
| " <td>3.1</td>\n", | |
| " <td>1.5</td>\n", | |
| " <td>0.2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>146</th>\n", | |
| " <td>6.3</td>\n", | |
| " <td>2.5</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>1.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>147</th>\n", | |
| " <td>6.5</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>5.2</td>\n", | |
| " <td>2.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>148</th>\n", | |
| " <td>6.2</td>\n", | |
| " <td>3.4</td>\n", | |
| " <td>5.4</td>\n", | |
| " <td>2.3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>149</th>\n", | |
| " <td>5.9</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>5.1</td>\n", | |
| " <td>1.8</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>150 rows × 4 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " SepalLength SepalWidth PetalLength PetalWidth\n", | |
| "0 5.1 3.5 1.4 0.2\n", | |
| "1 4.9 3.0 1.4 0.2\n", | |
| "2 4.7 3.2 1.3 0.2\n", | |
| "3 4.6 3.1 1.5 0.2\n", | |
| ".. ... ... ... ...\n", | |
| "146 6.3 2.5 5.0 1.9\n", | |
| "147 6.5 3.0 5.2 2.0\n", | |
| "148 6.2 3.4 5.4 2.3\n", | |
| "149 5.9 3.0 5.1 1.8\n", | |
| "\n", | |
| "[150 rows x 4 columns]" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris = datasets.load_iris()\n", | |
| "\n", | |
| "import pandas as pd\n", | |
| "pd.set_option('display.max_rows', 8)\n", | |
| "\n", | |
| "train = pd.DataFrame(iris.data, columns=['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth'])\n", | |
| "train" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(150L, 4L)" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dm = xgb.DMatrix(train, label=iris.target)\n", | |
| "dm.num_row(), dm.num_col()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dm.feature_names" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['q', 'q', 'q', 'q']" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dm.feature_types" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### cv now returns pandas.DataFrame or np.ndarray" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>test-mlogloss-mean</th>\n", | |
| " <th>test-mlogloss-std</th>\n", | |
| " <th>train-mlogloss-mean</th>\n", | |
| " <th>train-mlogloss-std</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.753459</td>\n", | |
| " <td>0.027033</td>\n", | |
| " <td>0.737631</td>\n", | |
| " <td>0.003818</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0.552303</td>\n", | |
| " <td>0.048738</td>\n", | |
| " <td>0.526929</td>\n", | |
| " <td>0.005102</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.423481</td>\n", | |
| " <td>0.066469</td>\n", | |
| " <td>0.390115</td>\n", | |
| " <td>0.005873</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0.339942</td>\n", | |
| " <td>0.082163</td>\n", | |
| " <td>0.295637</td>\n", | |
| " <td>0.006148</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0.219242</td>\n", | |
| " <td>0.124195</td>\n", | |
| " <td>0.143760</td>\n", | |
| " <td>0.006318</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>0.200365</td>\n", | |
| " <td>0.137163</td>\n", | |
| " <td>0.116560</td>\n", | |
| " <td>0.006130</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>0.187477</td>\n", | |
| " <td>0.145066</td>\n", | |
| " <td>0.096047</td>\n", | |
| " <td>0.005444</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>0.181228</td>\n", | |
| " <td>0.156536</td>\n", | |
| " <td>0.080041</td>\n", | |
| " <td>0.005265</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>10 rows × 4 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " test-mlogloss-mean test-mlogloss-std train-mlogloss-mean \\\n", | |
| "0 0.753459 0.027033 0.737631 \n", | |
| "1 0.552303 0.048738 0.526929 \n", | |
| "2 0.423481 0.066469 0.390115 \n", | |
| "3 0.339942 0.082163 0.295637 \n", | |
| ".. ... ... ... \n", | |
| "6 0.219242 0.124195 0.143760 \n", | |
| "7 0.200365 0.137163 0.116560 \n", | |
| "8 0.187477 0.145066 0.096047 \n", | |
| "9 0.181228 0.156536 0.080041 \n", | |
| "\n", | |
| " train-mlogloss-std \n", | |
| "0 0.003818 \n", | |
| "1 0.005102 \n", | |
| "2 0.005873 \n", | |
| "3 0.006148 \n", | |
| ".. ... \n", | |
| "6 0.006318 \n", | |
| "7 0.006130 \n", | |
| "8 0.005444 \n", | |
| "9 0.005265 \n", | |
| "\n", | |
| "[10 rows x 4 columns]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "params={'objective': 'multi:softprob',\n", | |
| " 'eval_metric': 'mlogloss',\n", | |
| " 'eta': 0.3,\n", | |
| " 'num_class': 3}\n", | |
| "\n", | |
| "# default (returns pd.DataFrame, progress report is disabled)\n", | |
| "# if pandas is not installed, it works as specifying as_pandas=False (see below)\n", | |
| "xgb.cv(params, dm, num_boost_round=10, nfold=10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "[0]\tcv-test-mlogloss:0.7534586+0.0270330788894\tcv-train-mlogloss:0.7376308+0.00381774878168\n", | |
| "[1]\tcv-test-mlogloss:0.5523035+0.0487375163775\tcv-train-mlogloss:0.5269287+0.00510218267117\n", | |
| "[2]\tcv-test-mlogloss:0.4234808+0.0664692426297\tcv-train-mlogloss:0.3901153+0.00587297442613\n", | |
| "[3]\tcv-test-mlogloss:0.3399421+0.082162847007\tcv-train-mlogloss:0.2956371+0.0061480195421\n", | |
| "[4]\tcv-test-mlogloss:0.2822133+0.094546005664\tcv-train-mlogloss:0.2284948+0.00582542583508\n", | |
| "[5]\tcv-test-mlogloss:0.2445561+0.108854130468\tcv-train-mlogloss:0.1798245+0.00614406771203\n", | |
| "[6]\tcv-test-mlogloss:0.2192424+0.124194576109\tcv-train-mlogloss:0.1437596+0.0063180363595\n", | |
| "[7]\tcv-test-mlogloss:0.2003654+0.137162644979\tcv-train-mlogloss:0.1165601+0.00612969169616\n", | |
| "[8]\tcv-test-mlogloss:0.1874767+0.145066157809\tcv-train-mlogloss:0.0960466+0.00544370076694\n", | |
| "[9]\tcv-test-mlogloss:0.1812277+0.156536125975\tcv-train-mlogloss:0.0800408+0.0052649908224\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>test-mlogloss-mean</th>\n", | |
| " <th>test-mlogloss-std</th>\n", | |
| " <th>train-mlogloss-mean</th>\n", | |
| " <th>train-mlogloss-std</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.753459</td>\n", | |
| " <td>0.027033</td>\n", | |
| " <td>0.737631</td>\n", | |
| " <td>0.003818</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0.552303</td>\n", | |
| " <td>0.048738</td>\n", | |
| " <td>0.526929</td>\n", | |
| " <td>0.005102</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.423481</td>\n", | |
| " <td>0.066469</td>\n", | |
| " <td>0.390115</td>\n", | |
| " <td>0.005873</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0.339942</td>\n", | |
| " <td>0.082163</td>\n", | |
| " <td>0.295637</td>\n", | |
| " <td>0.006148</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0.219242</td>\n", | |
| " <td>0.124195</td>\n", | |
| " <td>0.143760</td>\n", | |
| " <td>0.006318</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>0.200365</td>\n", | |
| " <td>0.137163</td>\n", | |
| " <td>0.116560</td>\n", | |
| " <td>0.006130</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>0.187477</td>\n", | |
| " <td>0.145066</td>\n", | |
| " <td>0.096047</td>\n", | |
| " <td>0.005444</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>0.181228</td>\n", | |
| " <td>0.156536</td>\n", | |
| " <td>0.080041</td>\n", | |
| " <td>0.005265</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>10 rows × 4 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " test-mlogloss-mean test-mlogloss-std train-mlogloss-mean \\\n", | |
| "0 0.753459 0.027033 0.737631 \n", | |
| "1 0.552303 0.048738 0.526929 \n", | |
| "2 0.423481 0.066469 0.390115 \n", | |
| "3 0.339942 0.082163 0.295637 \n", | |
| ".. ... ... ... \n", | |
| "6 0.219242 0.124195 0.143760 \n", | |
| "7 0.200365 0.137163 0.116560 \n", | |
| "8 0.187477 0.145066 0.096047 \n", | |
| "9 0.181228 0.156536 0.080041 \n", | |
| "\n", | |
| " train-mlogloss-std \n", | |
| "0 0.003818 \n", | |
| "1 0.005102 \n", | |
| "2 0.005873 \n", | |
| "3 0.006148 \n", | |
| ".. ... \n", | |
| "6 0.006318 \n", | |
| "7 0.006130 \n", | |
| "8 0.005444 \n", | |
| "9 0.005265 \n", | |
| "\n", | |
| "[10 rows x 4 columns]" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Specifying show_progress explishitly to display progress\n", | |
| "xgb.cv(params, dm, num_boost_round=10, nfold=10, show_progress=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "[0]\tcv-test-mlogloss:0.7534586\tcv-train-mlogloss:0.7376308\n", | |
| "[1]\tcv-test-mlogloss:0.5523035\tcv-train-mlogloss:0.5269287\n", | |
| "[2]\tcv-test-mlogloss:0.4234808\tcv-train-mlogloss:0.3901153\n", | |
| "[3]\tcv-test-mlogloss:0.3399421\tcv-train-mlogloss:0.2956371\n", | |
| "[4]\tcv-test-mlogloss:0.2822133\tcv-train-mlogloss:0.2284948\n", | |
| "[5]\tcv-test-mlogloss:0.2445561\tcv-train-mlogloss:0.1798245\n", | |
| "[6]\tcv-test-mlogloss:0.2192424\tcv-train-mlogloss:0.1437596\n", | |
| "[7]\tcv-test-mlogloss:0.2003654\tcv-train-mlogloss:0.1165601\n", | |
| "[8]\tcv-test-mlogloss:0.1874767\tcv-train-mlogloss:0.0960466\n", | |
| "[9]\tcv-test-mlogloss:0.1812277\tcv-train-mlogloss:0.0800408\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>test-mlogloss-mean</th>\n", | |
| " <th>test-mlogloss-std</th>\n", | |
| " <th>train-mlogloss-mean</th>\n", | |
| " <th>train-mlogloss-std</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.753459</td>\n", | |
| " <td>0.027033</td>\n", | |
| " <td>0.737631</td>\n", | |
| " <td>0.003818</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0.552303</td>\n", | |
| " <td>0.048738</td>\n", | |
| " <td>0.526929</td>\n", | |
| " <td>0.005102</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.423481</td>\n", | |
| " <td>0.066469</td>\n", | |
| " <td>0.390115</td>\n", | |
| " <td>0.005873</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0.339942</td>\n", | |
| " <td>0.082163</td>\n", | |
| " <td>0.295637</td>\n", | |
| " <td>0.006148</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0.219242</td>\n", | |
| " <td>0.124195</td>\n", | |
| " <td>0.143760</td>\n", | |
| " <td>0.006318</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>0.200365</td>\n", | |
| " <td>0.137163</td>\n", | |
| " <td>0.116560</td>\n", | |
| " <td>0.006130</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>0.187477</td>\n", | |
| " <td>0.145066</td>\n", | |
| " <td>0.096047</td>\n", | |
| " <td>0.005444</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>0.181228</td>\n", | |
| " <td>0.156536</td>\n", | |
| " <td>0.080041</td>\n", | |
| " <td>0.005265</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>10 rows × 4 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " test-mlogloss-mean test-mlogloss-std train-mlogloss-mean \\\n", | |
| "0 0.753459 0.027033 0.737631 \n", | |
| "1 0.552303 0.048738 0.526929 \n", | |
| "2 0.423481 0.066469 0.390115 \n", | |
| "3 0.339942 0.082163 0.295637 \n", | |
| ".. ... ... ... \n", | |
| "6 0.219242 0.124195 0.143760 \n", | |
| "7 0.200365 0.137163 0.116560 \n", | |
| "8 0.187477 0.145066 0.096047 \n", | |
| "9 0.181228 0.156536 0.080041 \n", | |
| "\n", | |
| " train-mlogloss-std \n", | |
| "0 0.003818 \n", | |
| "1 0.005102 \n", | |
| "2 0.005873 \n", | |
| "3 0.006148 \n", | |
| ".. ... \n", | |
| "6 0.006318 \n", | |
| "7 0.006130 \n", | |
| "8 0.005444 \n", | |
| "9 0.005265 \n", | |
| "\n", | |
| "[10 rows x 4 columns]" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Specify show_stdv=False to hide stdv from progress show stdv (for back-compat)\n", | |
| "# Note that result contains stdv always \n", | |
| "xgb.cv(params, dm, num_boost_round=10, nfold=10, show_progress=True, show_stdv=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "[0]\tcv-test-mlogloss:0.7534586+0.0270330788894\tcv-train-mlogloss:0.7376308+0.00381774878168\n", | |
| "[1]\tcv-test-mlogloss:0.5523035+0.0487375163775\tcv-train-mlogloss:0.5269287+0.00510218267117\n", | |
| "[2]\tcv-test-mlogloss:0.4234808+0.0664692426297\tcv-train-mlogloss:0.3901153+0.00587297442613\n", | |
| "[3]\tcv-test-mlogloss:0.3399421+0.082162847007\tcv-train-mlogloss:0.2956371+0.0061480195421\n", | |
| "[4]\tcv-test-mlogloss:0.2822133+0.094546005664\tcv-train-mlogloss:0.2284948+0.00582542583508\n", | |
| "[5]\tcv-test-mlogloss:0.2445561+0.108854130468\tcv-train-mlogloss:0.1798245+0.00614406771203\n", | |
| "[6]\tcv-test-mlogloss:0.2192424+0.124194576109\tcv-train-mlogloss:0.1437596+0.0063180363595\n", | |
| "[7]\tcv-test-mlogloss:0.2003654+0.137162644979\tcv-train-mlogloss:0.1165601+0.00612969169616\n", | |
| "[8]\tcv-test-mlogloss:0.1874767+0.145066157809\tcv-train-mlogloss:0.0960466+0.00544370076694\n", | |
| "[9]\tcv-test-mlogloss:0.1812277+0.156536125975\tcv-train-mlogloss:0.0800408+0.0052649908224\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[ 0.7534586 , 0.02703308, 0.7376308 , 0.00381775],\n", | |
| " [ 0.5523035 , 0.04873752, 0.5269287 , 0.00510218],\n", | |
| " [ 0.4234808 , 0.06646924, 0.3901153 , 0.00587297],\n", | |
| " [ 0.3399421 , 0.08216285, 0.2956371 , 0.00614802],\n", | |
| " [ 0.2822133 , 0.09454601, 0.2284948 , 0.00582543],\n", | |
| " [ 0.2445561 , 0.10885413, 0.1798245 , 0.00614407],\n", | |
| " [ 0.2192424 , 0.12419458, 0.1437596 , 0.00631804],\n", | |
| " [ 0.2003654 , 0.13716264, 0.1165601 , 0.00612969],\n", | |
| " [ 0.1874767 , 0.14506616, 0.0960466 , 0.0054437 ],\n", | |
| " [ 0.1812277 , 0.15653613, 0.0800408 , 0.00526499]])" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# specifying as_pandas=False returns np.ndarray\n", | |
| "# progress report is enabled because returned np.ndarray can't contain metadata\n", | |
| "xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[ 0.7534586 , 0.02703308, 0.7376308 , 0.00381775],\n", | |
| " [ 0.5523035 , 0.04873752, 0.5269287 , 0.00510218],\n", | |
| " [ 0.4234808 , 0.06646924, 0.3901153 , 0.00587297],\n", | |
| " [ 0.3399421 , 0.08216285, 0.2956371 , 0.00614802],\n", | |
| " [ 0.2822133 , 0.09454601, 0.2284948 , 0.00582543],\n", | |
| " [ 0.2445561 , 0.10885413, 0.1798245 , 0.00614407],\n", | |
| " [ 0.2192424 , 0.12419458, 0.1437596 , 0.00631804],\n", | |
| " [ 0.2003654 , 0.13716264, 0.1165601 , 0.00612969],\n", | |
| " [ 0.1874767 , 0.14506616, 0.0960466 , 0.0054437 ],\n", | |
| " [ 0.1812277 , 0.15653613, 0.0800408 , 0.00526499]])" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# we can hide progress exlicitly\n", | |
| "xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False, show_progress=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>test-error-mean</th>\n", | |
| " <th>test-error-std</th>\n", | |
| " <th>train-error-mean</th>\n", | |
| " <th>train-error-std</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.046544</td>\n", | |
| " <td>0.007774</td>\n", | |
| " <td>0.046544</td>\n", | |
| " <td>0.000864</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0.022273</td>\n", | |
| " <td>0.004821</td>\n", | |
| " <td>0.022273</td>\n", | |
| " <td>0.000536</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.007066</td>\n", | |
| " <td>0.002678</td>\n", | |
| " <td>0.007066</td>\n", | |
| " <td>0.000298</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0.015207</td>\n", | |
| " <td>0.003791</td>\n", | |
| " <td>0.015207</td>\n", | |
| " <td>0.000421</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0.001229</td>\n", | |
| " <td>0.001339</td>\n", | |
| " <td>0.001229</td>\n", | |
| " <td>0.000149</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>0.001229</td>\n", | |
| " <td>0.001339</td>\n", | |
| " <td>0.001229</td>\n", | |
| " <td>0.000149</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>0.001229</td>\n", | |
| " <td>0.001339</td>\n", | |
| " <td>0.000956</td>\n", | |
| " <td>0.000496</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>0.001229</td>\n", | |
| " <td>0.001339</td>\n", | |
| " <td>0.000683</td>\n", | |
| " <td>0.000566</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>10 rows × 4 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " test-error-mean test-error-std train-error-mean train-error-std\n", | |
| "0 0.046544 0.007774 0.046544 0.000864\n", | |
| "1 0.022273 0.004821 0.022273 0.000536\n", | |
| "2 0.007066 0.002678 0.007066 0.000298\n", | |
| "3 0.015207 0.003791 0.015207 0.000421\n", | |
| ".. ... ... ... ...\n", | |
| "6 0.001229 0.001339 0.001229 0.000149\n", | |
| "7 0.001229 0.001339 0.001229 0.000149\n", | |
| "8 0.001229 0.001339 0.000956 0.000496\n", | |
| "9 0.001229 0.001339 0.000683 0.000566\n", | |
| "\n", | |
| "[10 rows x 4 columns]" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# column name will be changed depending on eval function\n", | |
| "\n", | |
| "dpath = '/Users/sin/Documents/Git/xgboost/demo/data/'\n", | |
| "dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')\n", | |
| "param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }\n", | |
| "# specify validations set to watch performance\n", | |
| "xgb.cv(param, dtrain, num_boost_round=10, nfold=10)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.10" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment