Sundrique · September 13, 2015 11:38
diff --git a/MNIST.ipynb b/MNIST.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Usage example employing Lasagne for digit recognition using the MNIST dataset.\n",
    "\n",
    "This example is deliberately structured as a long flat file, focusing on how\n",
    "to use Lasagne, instead of focusing on writing maximally modular and reusable\n",
    "code. It is used as the foundation for the introductory Lasagne tutorial:\n",
    "http://lasagne.readthedocs.org/en/latest/user/tutorial.html\n",
    "\n",
    "More in-depth examples and reproductions of paper results are maintained in\n",
    "a separate repository: https://github.com/Lasagne/Recipes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "\n",
    "import sys\n",
    "import os\n",
    "import time\n",
    "\n",
    "import numpy as np\n",
    "import theano\n",
    "import theano.tensor as T\n",
    "\n",
    "import lasagne"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Download and prepare the MNIST dataset\n",
    "This is just some way of getting the MNIST dataset from an online location and loading it into numpy arrays. It doesn't involve Lasagne at all."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def load_dataset():\n",
    "    # We first define some helper functions for supporting both Python 2 and 3.\n",
    "    if sys.version_info[0] == 2:\n",
    "        from urllib import urlretrieve\n",
    "        import cPickle as pickle\n",
    "\n",
    "        def pickle_load(f, encoding):\n",
    "            return pickle.load(f)\n",
    "    else:\n",
    "        from urllib.request import urlretrieve\n",
    "        import pickle\n",
    "\n",
    "        def pickle_load(f, encoding):\n",
    "            return pickle.load(f, encoding=encoding)\n",
    "\n",
    "    # We'll now download the MNIST dataset if it is not yet available.\n",
    "    url = 'http://deeplearning.net/data/mnist/mnist.pkl.gz'\n",
    "    filename = 'mnist.pkl.gz'\n",
    "    if not os.path.exists(filename):\n",
    "        print(\"Downloading MNIST dataset...\")\n",
    "        urlretrieve(url, filename)\n",
    "\n",
    "    # We'll then load and unpickle the file.\n",
    "    import gzip\n",
    "    with gzip.open(filename, 'rb') as f:\n",
    "        data = pickle_load(f, encoding='latin-1')\n",
    "\n",
    "    # The MNIST dataset we have here consists of six numpy arrays:\n",
    "    # Inputs and targets for the training set, validation set and test set.\n",
    "    X_train, y_train = data[0]\n",
    "    X_val, y_val = data[1]\n",
    "    X_test, y_test = data[2]\n",
    "\n",
    "    # The inputs come as vectors, we reshape them to monochrome 2D images,\n",
    "    # according to the shape convention: (examples, channels, rows, columns)\n",
    "    X_train = X_train.reshape((-1, 1, 28, 28))\n",
    "    X_val = X_val.reshape((-1, 1, 28, 28))\n",
    "    X_test = X_test.reshape((-1, 1, 28, 28))\n",
    "\n",
    "    # The targets are int64, we cast them to int8 for GPU compatibility.\n",
    "    y_train = y_train.astype(np.uint8)\n",
    "    y_val = y_val.astype(np.uint8)\n",
    "    y_test = y_test.astype(np.uint8)\n",
    "\n",
    "    # We just return all the arrays in order, as expected in main().\n",
    "    # (It doesn't matter how we do this as long as we can read them again.)\n",
    "    return X_train, y_train, X_val, y_val, X_test, y_test"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Build the neural network model\n",
    "This script supports three types of models. For each one, we define a function that takes a Theano variable representing the input and returns the output layer of a neural network model build in Lasagne."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def build_mlp(input_var=None):\n",
    "    # This creates an MLP of two hidden layers of 800 units each, followed by\n",
    "    # a softmax output layer of 10 units. It applies 20% dropout to the input\n",
    "    # data and 50% dropout to the hidden layers.\n",
    "\n",
    "    # Input layer, specifying the expected input shape of the network\n",
    "    # (unspecified batchsize, 1 channel, 28 rows and 28 columns) and\n",
    "    # linking it to the given Theano variable `input_var`, if any:\n",
    "    l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),\n",
    "                                     input_var=input_var)\n",
    "\n",
    "    # Apply 20% dropout to the input data:\n",
    "    l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)\n",
    "\n",
    "    # Add a fully-connected layer of 800 units, using the linear rectifier, and\n",
    "    # initializing weights with Glorot's scheme (which is the default anyway):\n",
    "    l_hid1 = lasagne.layers.DenseLayer(\n",
    "            l_in_drop, num_units=800,\n",
    "            nonlinearity=lasagne.nonlinearities.rectify,\n",
    "            W=lasagne.init.GlorotUniform())\n",
    "\n",
    "    # We'll now add dropout of 50%:\n",
    "    l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)\n",
    "\n",
    "    # Another 800-unit layer:\n",
    "    l_hid2 = lasagne.layers.DenseLayer(\n",
    "            l_hid1_drop, num_units=800,\n",
    "            nonlinearity=lasagne.nonlinearities.rectify)\n",
    "\n",
    "    # 50% dropout again:\n",
    "    l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)\n",
    "\n",
    "    # Finally, we'll add the fully-connected output layer, of 10 softmax units:\n",
    "    l_out = lasagne.layers.DenseLayer(\n",
    "            l_hid2_drop, num_units=10,\n",
    "            nonlinearity=lasagne.nonlinearities.softmax)\n",
    "\n",
    "    # Each layer is linked to its incoming layer(s), so we only need to pass\n",
    "    # the output layer to give access to a network in Lasagne:\n",
    "    return l_out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2,\n",
    "                     drop_hidden=.5):\n",
    "    # By default, this creates the same network as `build_mlp`, but it can be\n",
    "    # customized with respect to the number and size of hidden layers. This\n",
    "    # mostly showcases how creating a network in Python code can be a lot more\n",
    "    # flexible than a configuration file. Note that to make the code easier,\n",
    "    # all the layers are just called `network` -- there is no need to give them\n",
    "    # different names if all we return is the last one we created anyway; we\n",
    "    # just used different names above for clarity.\n",
    "\n",
    "    # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):\n",
    "    network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),\n",
    "                                        input_var=input_var)\n",
    "    if drop_input:\n",
    "        network = lasagne.layers.dropout(network, p=drop_input)\n",
    "    # Hidden layers and dropout:\n",
    "    nonlin = lasagne.nonlinearities.rectify\n",
    "    for _ in range(depth):\n",
    "        network = lasagne.layers.DenseLayer(\n",
    "                network, width, nonlinearity=nonlin)\n",
    "        if drop_hidden:\n",
    "            network = lasagne.layers.dropout(network, p=drop_hidden)\n",
    "    # Output layer:\n",
    "    softmax = lasagne.nonlinearities.softmax\n",
    "    network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax)\n",
    "    return network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def build_cnn(input_var=None):\n",
    "    # As a third model, we'll create a CNN of two convolution + pooling stages\n",
    "    # and a fully-connected hidden layer in front of the output layer.\n",
    "\n",
    "    # Input layer, as usual:\n",
    "    network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),\n",
    "                                        input_var=input_var)\n",
    "    # This time we do not apply input dropout, as it tends to work less well\n",
    "    # for convolutional layers.\n",
    "\n",
    "    # Convolutional layer with 32 kernels of size 5x5. Strided and padded\n",
    "    # convolutions are supported as well; see the docstring.\n",
    "    network = lasagne.layers.Conv2DLayer(\n",
    "            network, num_filters=32, filter_size=(5, 5),\n",
    "            nonlinearity=lasagne.nonlinearities.rectify,\n",
    "            W=lasagne.init.GlorotUniform())\n",
    "    # Expert note: Lasagne provides alternative convolutional layers that\n",
    "    # override Theano's choice of which implementation to use; for details\n",
    "    # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.\n",
    "\n",
    "    # Max-pooling layer of factor 2 in both dimensions:\n",
    "    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))\n",
    "\n",
    "    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:\n",
    "    network = lasagne.layers.Conv2DLayer(\n",
    "            network, num_filters=32, filter_size=(5, 5),\n",
    "            nonlinearity=lasagne.nonlinearities.rectify)\n",
    "    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))\n",
    "\n",
    "    # A fully-connected layer of 256 units with 50% dropout on its inputs:\n",
    "    network = lasagne.layers.DenseLayer(\n",
    "            lasagne.layers.dropout(network, p=.5),\n",
    "            num_units=256,\n",
    "            nonlinearity=lasagne.nonlinearities.rectify)\n",
    "\n",
    "    # And, finally, the 10-unit output layer with 50% dropout on its inputs:\n",
    "    network = lasagne.layers.DenseLayer(\n",
    "            lasagne.layers.dropout(network, p=.5),\n",
    "            num_units=10,\n",
    "            nonlinearity=lasagne.nonlinearities.softmax)\n",
    "\n",
    "    return network"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Batch iterator\n",
    "This is just a simple helper function iterating over training data in mini-batches of a particular size, optionally in random order. It assumes data is available as numpy arrays. For big datasets, you could load numpy arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your own custom data iteration function. For small datasets, you can also copy them to GPU at once for slightly improved performance. This would involve several changes in the main program, though, and is not demonstrated here."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def iterate_minibatches(inputs, targets, batchsize, shuffle=False):\n",
    "    assert len(inputs) == len(targets)\n",
    "    if shuffle:\n",
    "        indices = np.arange(len(inputs))\n",
    "        np.random.shuffle(indices)\n",
    "    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):\n",
    "        if shuffle:\n",
    "            excerpt = indices[start_idx:start_idx + batchsize]\n",
    "        else:\n",
    "            excerpt = slice(start_idx, start_idx + batchsize)\n",
    "        yield inputs[excerpt], targets[excerpt]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Main program\n",
    "Everything else will be handled in our main program now. We could pull out more functions to better separate the code, but it wouldn't make it any easier to read."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def main(model='mlp', num_epochs=500):\n",
    "    # Load the dataset\n",
    "    print(\"Loading data...\")\n",
    "    X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()\n",
    "\n",
    "    # Prepare Theano variables for inputs and targets\n",
    "    input_var = T.tensor4('inputs')\n",
    "    target_var = T.ivector('targets')\n",
    "\n",
    "    # Create neural network model (depending on first command line parameter)\n",
    "    print(\"Building model and compiling functions...\")\n",
    "    if model == 'mlp':\n",
    "        network = build_mlp(input_var)\n",
    "    elif model.startswith('custom_mlp:'):\n",
    "        depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')\n",
    "        network = build_custom_mlp(input_var, int(depth), int(width),\n",
    "                                   float(drop_in), float(drop_hid))\n",
    "    elif model == 'cnn':\n",
    "        network = build_cnn(input_var)\n",
    "    else:\n",
    "        print(\"Unrecognized model type %r.\" % model)\n",
    "\n",
    "    # Create a loss expression for training, i.e., a scalar objective we want\n",
    "    # to minimize (for our multi-class problem, it is the cross-entropy loss):\n",
    "    prediction = lasagne.layers.get_output(network)\n",
    "    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)\n",
    "    loss = loss.mean()\n",
    "    # We could add some weight decay as well here, see lasagne.regularization.\n",
    "\n",
    "    # Create update expressions for training, i.e., how to modify the\n",
    "    # parameters at each training step. Here, we'll use Stochastic Gradient\n",
    "    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.\n",
    "    params = lasagne.layers.get_all_params(network, trainable=True)\n",
    "    updates = lasagne.updates.nesterov_momentum(\n",
    "            loss, params, learning_rate=0.01, momentum=0.9)\n",
    "\n",
    "    # Create a loss expression for validation/testing. The crucial difference\n",
    "    # here is that we do a deterministic forward pass through the network,\n",
    "    # disabling dropout layers.\n",
    "    test_prediction = lasagne.layers.get_output(network, deterministic=True)\n",
    "    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,\n",
    "                                                            target_var)\n",
    "    test_loss = test_loss.mean()\n",
    "    # As a bonus, also create an expression for the classification accuracy:\n",
    "    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),\n",
    "                      dtype=theano.config.floatX)\n",
    "\n",
    "    # Compile a function performing a training step on a mini-batch (by giving\n",
    "    # the updates dictionary) and returning the corresponding training loss:\n",
    "    train_fn = theano.function([input_var, target_var], loss, updates=updates)\n",
    "\n",
    "    # Compile a second function computing the validation loss and accuracy:\n",
    "    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])\n",
    "\n",
    "    # Finally, launch the training loop.\n",
    "    print(\"Starting training...\")\n",
    "    # We iterate over epochs:\n",
    "    for epoch in range(num_epochs):\n",
    "        # In each epoch, we do a full pass over the training data:\n",
    "        train_err = 0\n",
    "        train_batches = 0\n",
    "        start_time = time.time()\n",
    "        for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):\n",
    "            inputs, targets = batch\n",
    "            train_err += train_fn(inputs, targets)\n",
    "            train_batches += 1\n",
    "\n",
    "        # And a full pass over the validation data:\n",
    "        val_err = 0\n",
    "        val_acc = 0\n",
    "        val_batches = 0\n",
    "        for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):\n",
    "            inputs, targets = batch\n",
    "            err, acc = val_fn(inputs, targets)\n",
    "            val_err += err\n",
    "            val_acc += acc\n",
    "            val_batches += 1\n",
    "\n",
    "        # Then we print the results for this epoch:\n",
    "        print(\"Epoch {} of {} took {:.3f}s\".format(\n",
    "            epoch + 1, num_epochs, time.time() - start_time))\n",
    "        print(\"  training loss:\\t\\t{:.6f}\".format(train_err / train_batches))\n",
    "        print(\"  validation loss:\\t\\t{:.6f}\".format(val_err / val_batches))\n",
    "        print(\"  validation accuracy:\\t\\t{:.2f} %\".format(\n",
    "            val_acc / val_batches * 100))\n",
    "\n",
    "    # After training, we compute and print the test error:\n",
    "    test_err = 0\n",
    "    test_acc = 0\n",
    "    test_batches = 0\n",
    "    for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):\n",
    "        inputs, targets = batch\n",
    "        err, acc = val_fn(inputs, targets)\n",
    "        test_err += err\n",
    "        test_acc += acc\n",
    "        test_batches += 1\n",
    "    print(\"Final results:\")\n",
    "    print(\"  test loss:\\t\\t\\t{:.6f}\".format(test_err / test_batches))\n",
    "    print(\"  test accuracy:\\t\\t{:.2f} %\".format(\n",
    "        test_acc / test_batches * 100))\n",
    "\n",
    "    # Optionally, you could now dump the network weights to a file like this:\n",
    "    # np.savez('model.npz', lasagne.layers.get_all_param_values(network))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "main()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Usage example employing Lasagne for digit recognition using the MNIST dataset.\n",
	"\n",
	"This example is deliberately structured as a long flat file, focusing on how\n",
	"to use Lasagne, instead of focusing on writing maximally modular and reusable\n",
	"code. It is used as the foundation for the introductory Lasagne tutorial:\n",
	"http://lasagne.readthedocs.org/en/latest/user/tutorial.html\n",
	"\n",
	"More in-depth examples and reproductions of paper results are maintained in\n",
	"a separate repository: https://github.com/Lasagne/Recipes"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"from __future__ import print_function\n",
	"\n",
	"import sys\n",
	"import os\n",
	"import time\n",
	"\n",
	"import numpy as np\n",
	"import theano\n",
	"import theano.tensor as T\n",
	"\n",
	"import lasagne"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Download and prepare the MNIST dataset\n",
	"This is just some way of getting the MNIST dataset from an online location and loading it into numpy arrays. It doesn't involve Lasagne at all."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def load_dataset():\n",
	" # We first define some helper functions for supporting both Python 2 and 3.\n",
	" if sys.version_info[0] == 2:\n",
	" from urllib import urlretrieve\n",
	" import cPickle as pickle\n",
	"\n",
	" def pickle_load(f, encoding):\n",
	" return pickle.load(f)\n",
	" else:\n",
	" from urllib.request import urlretrieve\n",
	" import pickle\n",
	"\n",
	" def pickle_load(f, encoding):\n",
	" return pickle.load(f, encoding=encoding)\n",
	"\n",
	" # We'll now download the MNIST dataset if it is not yet available.\n",
	" url = 'http://deeplearning.net/data/mnist/mnist.pkl.gz'\n",
	" filename = 'mnist.pkl.gz'\n",
	" if not os.path.exists(filename):\n",
	" print(\"Downloading MNIST dataset...\")\n",
	" urlretrieve(url, filename)\n",
	"\n",
	" # We'll then load and unpickle the file.\n",
	" import gzip\n",
	" with gzip.open(filename, 'rb') as f:\n",
	" data = pickle_load(f, encoding='latin-1')\n",
	"\n",
	" # The MNIST dataset we have here consists of six numpy arrays:\n",
	" # Inputs and targets for the training set, validation set and test set.\n",
	" X_train, y_train = data[0]\n",
	" X_val, y_val = data[1]\n",
	" X_test, y_test = data[2]\n",
	"\n",
	" # The inputs come as vectors, we reshape them to monochrome 2D images,\n",
	" # according to the shape convention: (examples, channels, rows, columns)\n",
	" X_train = X_train.reshape((-1, 1, 28, 28))\n",
	" X_val = X_val.reshape((-1, 1, 28, 28))\n",
	" X_test = X_test.reshape((-1, 1, 28, 28))\n",
	"\n",
	" # The targets are int64, we cast them to int8 for GPU compatibility.\n",
	" y_train = y_train.astype(np.uint8)\n",
	" y_val = y_val.astype(np.uint8)\n",
	" y_test = y_test.astype(np.uint8)\n",
	"\n",
	" # We just return all the arrays in order, as expected in main().\n",
	" # (It doesn't matter how we do this as long as we can read them again.)\n",
	" return X_train, y_train, X_val, y_val, X_test, y_test"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Build the neural network model\n",
	"This script supports three types of models. For each one, we define a function that takes a Theano variable representing the input and returns the output layer of a neural network model build in Lasagne."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def build_mlp(input_var=None):\n",
	" # This creates an MLP of two hidden layers of 800 units each, followed by\n",
	" # a softmax output layer of 10 units. It applies 20% dropout to the input\n",
	" # data and 50% dropout to the hidden layers.\n",
	"\n",
	" # Input layer, specifying the expected input shape of the network\n",
	" # (unspecified batchsize, 1 channel, 28 rows and 28 columns) and\n",
	" # linking it to the given Theano variable `input_var`, if any:\n",
	" l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),\n",
	" input_var=input_var)\n",
	"\n",
	" # Apply 20% dropout to the input data:\n",
	" l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)\n",
	"\n",
	" # Add a fully-connected layer of 800 units, using the linear rectifier, and\n",
	" # initializing weights with Glorot's scheme (which is the default anyway):\n",
	" l_hid1 = lasagne.layers.DenseLayer(\n",
	" l_in_drop, num_units=800,\n",
	" nonlinearity=lasagne.nonlinearities.rectify,\n",
	" W=lasagne.init.GlorotUniform())\n",
	"\n",
	" # We'll now add dropout of 50%:\n",
	" l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)\n",
	"\n",
	" # Another 800-unit layer:\n",
	" l_hid2 = lasagne.layers.DenseLayer(\n",
	" l_hid1_drop, num_units=800,\n",
	" nonlinearity=lasagne.nonlinearities.rectify)\n",
	"\n",
	" # 50% dropout again:\n",
	" l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)\n",
	"\n",
	" # Finally, we'll add the fully-connected output layer, of 10 softmax units:\n",
	" l_out = lasagne.layers.DenseLayer(\n",
	" l_hid2_drop, num_units=10,\n",
	" nonlinearity=lasagne.nonlinearities.softmax)\n",
	"\n",
	" # Each layer is linked to its incoming layer(s), so we only need to pass\n",
	" # the output layer to give access to a network in Lasagne:\n",
	" return l_out"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2,\n",
	" drop_hidden=.5):\n",
	" # By default, this creates the same network as `build_mlp`, but it can be\n",
	" # customized with respect to the number and size of hidden layers. This\n",
	" # mostly showcases how creating a network in Python code can be a lot more\n",
	" # flexible than a configuration file. Note that to make the code easier,\n",
	" # all the layers are just called `network` -- there is no need to give them\n",
	" # different names if all we return is the last one we created anyway; we\n",
	" # just used different names above for clarity.\n",
	"\n",
	" # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):\n",
	" network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),\n",
	" input_var=input_var)\n",
	" if drop_input:\n",
	" network = lasagne.layers.dropout(network, p=drop_input)\n",
	" # Hidden layers and dropout:\n",
	" nonlin = lasagne.nonlinearities.rectify\n",
	" for _ in range(depth):\n",
	" network = lasagne.layers.DenseLayer(\n",
	" network, width, nonlinearity=nonlin)\n",
	" if drop_hidden:\n",
	" network = lasagne.layers.dropout(network, p=drop_hidden)\n",
	" # Output layer:\n",
	" softmax = lasagne.nonlinearities.softmax\n",
	" network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax)\n",
	" return network"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def build_cnn(input_var=None):\n",
	" # As a third model, we'll create a CNN of two convolution + pooling stages\n",
	" # and a fully-connected hidden layer in front of the output layer.\n",
	"\n",
	" # Input layer, as usual:\n",
	" network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),\n",
	" input_var=input_var)\n",
	" # This time we do not apply input dropout, as it tends to work less well\n",
	" # for convolutional layers.\n",
	"\n",
	" # Convolutional layer with 32 kernels of size 5x5. Strided and padded\n",
	" # convolutions are supported as well; see the docstring.\n",
	" network = lasagne.layers.Conv2DLayer(\n",
	" network, num_filters=32, filter_size=(5, 5),\n",
	" nonlinearity=lasagne.nonlinearities.rectify,\n",
	" W=lasagne.init.GlorotUniform())\n",
	" # Expert note: Lasagne provides alternative convolutional layers that\n",
	" # override Theano's choice of which implementation to use; for details\n",
	" # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.\n",
	"\n",
	" # Max-pooling layer of factor 2 in both dimensions:\n",
	" network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))\n",
	"\n",
	" # Another convolution with 32 5x5 kernels, and another 2x2 pooling:\n",
	" network = lasagne.layers.Conv2DLayer(\n",
	" network, num_filters=32, filter_size=(5, 5),\n",
	" nonlinearity=lasagne.nonlinearities.rectify)\n",
	" network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))\n",
	"\n",
	" # A fully-connected layer of 256 units with 50% dropout on its inputs:\n",
	" network = lasagne.layers.DenseLayer(\n",
	" lasagne.layers.dropout(network, p=.5),\n",
	" num_units=256,\n",
	" nonlinearity=lasagne.nonlinearities.rectify)\n",
	"\n",
	" # And, finally, the 10-unit output layer with 50% dropout on its inputs:\n",
	" network = lasagne.layers.DenseLayer(\n",
	" lasagne.layers.dropout(network, p=.5),\n",
	" num_units=10,\n",
	" nonlinearity=lasagne.nonlinearities.softmax)\n",
	"\n",
	" return network"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Batch iterator\n",
	"This is just a simple helper function iterating over training data in mini-batches of a particular size, optionally in random order. It assumes data is available as numpy arrays. For big datasets, you could load numpy arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your own custom data iteration function. For small datasets, you can also copy them to GPU at once for slightly improved performance. This would involve several changes in the main program, though, and is not demonstrated here."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def iterate_minibatches(inputs, targets, batchsize, shuffle=False):\n",
	" assert len(inputs) == len(targets)\n",
	" if shuffle:\n",
	" indices = np.arange(len(inputs))\n",
	" np.random.shuffle(indices)\n",
	" for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):\n",
	" if shuffle:\n",
	" excerpt = indices[start_idx:start_idx + batchsize]\n",
	" else:\n",
	" excerpt = slice(start_idx, start_idx + batchsize)\n",
	" yield inputs[excerpt], targets[excerpt]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Main program\n",
	"Everything else will be handled in our main program now. We could pull out more functions to better separate the code, but it wouldn't make it any easier to read."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def main(model='mlp', num_epochs=500):\n",
	" # Load the dataset\n",
	" print(\"Loading data...\")\n",
	" X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()\n",
	"\n",
	" # Prepare Theano variables for inputs and targets\n",
	" input_var = T.tensor4('inputs')\n",
	" target_var = T.ivector('targets')\n",
	"\n",
	" # Create neural network model (depending on first command line parameter)\n",
	" print(\"Building model and compiling functions...\")\n",
	" if model == 'mlp':\n",
	" network = build_mlp(input_var)\n",
	" elif model.startswith('custom_mlp:'):\n",
	" depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')\n",
	" network = build_custom_mlp(input_var, int(depth), int(width),\n",
	" float(drop_in), float(drop_hid))\n",
	" elif model == 'cnn':\n",
	" network = build_cnn(input_var)\n",
	" else:\n",
	" print(\"Unrecognized model type %r.\" % model)\n",
	"\n",
	" # Create a loss expression for training, i.e., a scalar objective we want\n",
	" # to minimize (for our multi-class problem, it is the cross-entropy loss):\n",
	" prediction = lasagne.layers.get_output(network)\n",
	" loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)\n",
	" loss = loss.mean()\n",
	" # We could add some weight decay as well here, see lasagne.regularization.\n",
	"\n",
	" # Create update expressions for training, i.e., how to modify the\n",
	" # parameters at each training step. Here, we'll use Stochastic Gradient\n",
	" # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.\n",
	" params = lasagne.layers.get_all_params(network, trainable=True)\n",
	" updates = lasagne.updates.nesterov_momentum(\n",
	" loss, params, learning_rate=0.01, momentum=0.9)\n",
	"\n",
	" # Create a loss expression for validation/testing. The crucial difference\n",
	" # here is that we do a deterministic forward pass through the network,\n",
	" # disabling dropout layers.\n",
	" test_prediction = lasagne.layers.get_output(network, deterministic=True)\n",
	" test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,\n",
	" target_var)\n",
	" test_loss = test_loss.mean()\n",
	" # As a bonus, also create an expression for the classification accuracy:\n",
	" test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),\n",
	" dtype=theano.config.floatX)\n",
	"\n",
	" # Compile a function performing a training step on a mini-batch (by giving\n",
	" # the updates dictionary) and returning the corresponding training loss:\n",
	" train_fn = theano.function([input_var, target_var], loss, updates=updates)\n",
	"\n",
	" # Compile a second function computing the validation loss and accuracy:\n",
	" val_fn = theano.function([input_var, target_var], [test_loss, test_acc])\n",
	"\n",
	" # Finally, launch the training loop.\n",
	" print(\"Starting training...\")\n",
	" # We iterate over epochs:\n",
	" for epoch in range(num_epochs):\n",
	" # In each epoch, we do a full pass over the training data:\n",
	" train_err = 0\n",
	" train_batches = 0\n",
	" start_time = time.time()\n",
	" for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):\n",
	" inputs, targets = batch\n",
	" train_err += train_fn(inputs, targets)\n",
	" train_batches += 1\n",
	"\n",
	" # And a full pass over the validation data:\n",
	" val_err = 0\n",
	" val_acc = 0\n",
	" val_batches = 0\n",
	" for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):\n",
	" inputs, targets = batch\n",
	" err, acc = val_fn(inputs, targets)\n",
	" val_err += err\n",
	" val_acc += acc\n",
	" val_batches += 1\n",
	"\n",
	" # Then we print the results for this epoch:\n",
	" print(\"Epoch {} of {} took {:.3f}s\".format(\n",
	" epoch + 1, num_epochs, time.time() - start_time))\n",
	" print(\" training loss:\\t\\t{:.6f}\".format(train_err / train_batches))\n",
	" print(\" validation loss:\\t\\t{:.6f}\".format(val_err / val_batches))\n",
	" print(\" validation accuracy:\\t\\t{:.2f} %\".format(\n",
	" val_acc / val_batches * 100))\n",
	"\n",
	" # After training, we compute and print the test error:\n",
	" test_err = 0\n",
	" test_acc = 0\n",
	" test_batches = 0\n",
	" for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):\n",
	" inputs, targets = batch\n",
	" err, acc = val_fn(inputs, targets)\n",
	" test_err += err\n",
	" test_acc += acc\n",
	" test_batches += 1\n",
	" print(\"Final results:\")\n",
	" print(\" test loss:\\t\\t\\t{:.6f}\".format(test_err / test_batches))\n",
	" print(\" test accuracy:\\t\\t{:.2f} %\".format(\n",
	" test_acc / test_batches * 100))\n",
	"\n",
	" # Optionally, you could now dump the network weights to a file like this:\n",
	" # np.savez('model.npz', lasagne.layers.get_all_param_values(network))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"main()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.6"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}
No results found