Last active
December 28, 2016 14:27
-
-
Save benjaminwilson/b25a321f292f98d74269b83d4ed2b9a8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# LCD digits dataset shows NMF parts-based decomposition\n", | |
| "\n", | |
| "This synthetic image dataset shows very nicely how NMF decompose images as the sums of their parts. The collection of images is encoded as a 2d array of non-negative values. Each row corresponds to an image, and each column corresponds to a pixel. The non-negative entries represent the whiteness of the pixel, encoded here as a value between 0 and 1.\n", | |
| " \n", | |
| "See also the accompanying blog post http://building-babylon.net/2016/12/28/an-lcd-digit-dataset-for-illustrating-the-parts-based-representation-of-nmf/ .\n", | |
| "\n", | |
| "Released under Apache Licence v2.0. I hope you find it useful." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "length = 4 # length of any LCD cell (\"stroke\")\n", | |
| "shape = (2 * length + 5, length + 4) # shape of the images" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Code for displaying a vector as an image" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 50, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from matplotlib import pyplot as plt\n", | |
| "\n", | |
| "def show_as_image(vector):\n", | |
| " \"\"\"\n", | |
| " Given a 1d vector representing an image, display that image in \n", | |
| " black and white. If there are negative values, then use red for \n", | |
| " that pixel.\n", | |
| " (displaying negative pixel values in red allows e.g. visual contrasting\n", | |
| " of PCA and NMF components)\n", | |
| " \"\"\"\n", | |
| " bitmap = vector.copy().reshape(shape) # make a square array\n", | |
| " bitmap /= np.abs(vector).max() # normalise (a copy!)\n", | |
| " bitmap = bitmap[:,:,np.newaxis]\n", | |
| " rgb_layers = [np.abs(bitmap)] + [bitmap.clip(0)] * 2\n", | |
| " rgb_bitmap = np.concatenate(rgb_layers, axis=-1)\n", | |
| " plt.figure(figsize=(2, 1))\n", | |
| " plt.imshow(rgb_bitmap, interpolation='nearest')\n", | |
| " plt.xticks([])\n", | |
| " plt.yticks([])\n", | |
| " plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Code for generating the images" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[[ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 1. 1. 1. 1. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import numpy as np\n", | |
| "\n", | |
| "def vertical_stroke(rightness, downness):\n", | |
| " \"\"\"\n", | |
| " Return a 2d numpy array representing an image with a single vertical stroke in it.\n", | |
| " `rightness` and `downness` are values from [0, 1] and define the position of the vertical stroke.\n", | |
| " \"\"\"\n", | |
| " i = (downness * (length + 1)) + 2\n", | |
| " j = rightness * (length + 1) + 1\n", | |
| " x = np.zeros(shape=shape, dtype=np.float64)\n", | |
| " for delta in range(length):\n", | |
| " x[i + delta, j] = 1.\n", | |
| " return x\n", | |
| "\n", | |
| "def horizontal_stroke(downness):\n", | |
| " \"\"\"\n", | |
| " Analogue to vertical_stroke, but it returns horizontal strokes.\n", | |
| " `downness` is here a value in [0, 1, 2].\n", | |
| " \"\"\"\n", | |
| " i = (downness * (length + 1)) + 1\n", | |
| " x = np.zeros(shape=shape, dtype=np.float64)\n", | |
| " for j in range(length):\n", | |
| " x[i, 2 + j] = 1.\n", | |
| " return x\n", | |
| "\n", | |
| "print(horizontal_stroke(0))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "strokes = [horizontal_stroke(k) for k in range(3)] + [vertical_stroke(k, l) for k in range(2) for l in range(2)]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import random\n", | |
| "\n", | |
| "def random_composition():\n", | |
| " \"\"\"\n", | |
| " Return a random composition of 2, 3, 4, or 5 strokes as a single 2d numpy array.\n", | |
| " (So not guaranteed to look like a real digit!)\n", | |
| " \"\"\"\n", | |
| " x = np.zeros(shape=shape, dtype=np.float64)\n", | |
| " num_strokes = random.choice([2, 3, 4, 5])\n", | |
| " sample = random.sample(strokes, num_strokes) # without replacement\n", | |
| " for atom in sample:\n", | |
| " x += atom\n", | |
| " return x" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 156, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[[ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 1. 1. 1. 1. 0. 0.]\n", | |
| " [ 0. 1. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 1. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 1. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 1. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 1. 1. 1. 1. 0. 0.]\n", | |
| " [ 0. 1. 0. 0. 0. 0. 1. 0.]\n", | |
| " [ 0. 1. 0. 0. 0. 0. 1. 0.]\n", | |
| " [ 0. 1. 0. 0. 0. 0. 1. 0.]\n", | |
| " [ 0. 1. 0. 0. 0. 0. 1. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
| " [ 0. 0. 0. 0. 0. 0. 0. 0.]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "bitmap = random_composition()\n", | |
| "print(bitmap)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 157, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAYhJREFUeJzt3LFNw1AUQNH3EEWUlgVcUiJaBmAI5ssQGQBmoMwC9HSf\nIhS3sfXQtx1b3NOksOU8X1nfimwlW2uhq7tbD7AlxgBjgDHAGGAMMAaUYmTmMTOfM/O49EBLqM5/\nXzzeU0S8R8RbZn52T7e+x4g4RcRLRHyM7VSNMfx+nvpmurkhJmJU14zLHJNswGVqYzXGd/8cmzB5\nHt5NoLpm/Nkav4Yzc9bjeWWAMcAYYAwwBnTfTcbuGnOv9GvwygBjgDHAGGAMMAYYA4wBxgBjgDHA\nGGAMMAYYA4wBxgBjgDHAGGAMMAYYA7qfm4w9H/Ep/M4ZA4wBxgBjgDFg168kzP3dXhlgDDAGGAOM\nAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY8BirySswVcSFmQMMAYYA4wBxgBjgDHA\nGGAMqMY4LDrFeibPoxpj6J9jE4apjVl5xzszHyLiNa5/FLjH//k7xDXEubX2NbZTKcZ/4QIKxgBj\ngDHAGGAMMAb8AJimKhAh4kfnAAAAAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10d3e0128>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "show_as_image(bitmap.flatten())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Generate the dataset" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "(200, 104)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "N = 200\n", | |
| "samples = [random_composition().flatten() for _ in range(N)]\n", | |
| "X = np.vstack(samples)\n", | |
| "print(X.shape)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# NMF learns the individual LCD cells" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 158, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from sklearn.decomposition import NMF\n", | |
| "\n", | |
| "model = NMF(n_components=7)\n", | |
| "model.fit(X)\n", | |
| "X_transformed = model.transform(X)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 159, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXRJREFUeJzt3LFNw1AURuH/IoooLQu4pES0DMAQzJchGABmoMwC9HSP\nwhSnsfUUx4kjzteksGVdH1lOYftVay0a3V17gC0xBhgDjAHGAGOAMaArRlXtq+q5qvZrD7SG3vnv\nO4/3lOQjyVtVfS2e7vIekxySvCT5nNqpN8bw93tYNtPVDZmJ0XvPOJ5jkg04zm3sjfGzfI5NmD0P\n/03AGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wB\nxgBjgDHAGGAMMAYYA4wBvS/FbtLUJ2VVddLxvDLAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBj\ngDHAGGAMMAYYA4wBxoCbfm5y6vORKV4ZYAwwBhgDjAHGAGOAMcAYYAzojbFbdYrLmT2P3hjD8jk2\nYZjbWD3rjlfVQ5LXjAsF3uI6f7uMId5ba99TO3XF+C+8gYIxwBhgDDAGGAOMAb/0Ox4ERg6wHgAA\nAABJRU5ErkJggg==\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10d3e0160>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXRJREFUeJzt3LFNw1AURuH/IoooLQu4pES0DMAQzJchGABmoMwC9HSP\nwhSnsfUUx4kjzteksGVdH1mOZUuvWmvR6O7aA2yJMcAYYAwwBhgDjAFdMapqX1XPVbVfe6A19M5/\n33m8pyQfSd6q6mvxdJf3mOSQ5CXJ59ROvTGGv9/DspmubshMjN57xvEck2zAcW5jb4yf5XNswux5\n+G8CxgBjgDHAGGAMMAb0PoFu0tT726o66XheGWAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHA\nGGAMMAYYA4wBxgBjwE1/Nzn1+8gUrwwwBhgDjAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAwwBhgD\njAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAzojbFbdYrLmT2P3hjD8jk2\nYZjbWD3rjlfVQ5LXjAsF3uI6f7uMId5ba99TO3XF+C+8gYIxwBhgDDAGGAOMAb+XDR4EFHrWqQAA\nAABJRU5ErkJggg==\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x1048477f0>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXNJREFUeJzt3L1NxEAUReH7EMFqUxpwSIhIKYAiqG+LoACogXAbICcb\nAhOcZK1B/lkjzpc4sGU9H1meZORqrUWjm2sPsCfGAGOAMcAYYAwwBnTFqKpjVT1W1XHtgdbQO/9t\n5/0ekrwleamqj9nTbe8+ySnJU5L3Sxf1xhh+jqd5M13dkIkYvd+M8xKT7MB56mRvjK/5c+zC5HO4\nmoAxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOM\nAb07d35tiz3pVbXo/XwzwBhgDDAGGAOMAastrUsve1vwzQBjgDHAGGAMMAYYA4wBxgBjgDHAGGAM\nMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjQG+Mw6pTbGfyOXpjDPPn2IVh\n6mT17PGuqrskzxl/FPgX//N3yBjitbX2eemirhj/hR9QMAYYA4wBxgBjgDHgG7+5HgRvcxjuAAAA\nAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10d46a048>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXNJREFUeJzt3L1NxEAUReH7EMFqUxpwSIhIKYAiqG+LoACogXAbICcb\nAhOcZK1B/lkjzpc4sGU9H1ljOZlqrUWjm2sPsCfGAGOAMcAYYAwwBnTFqKpjVT1W1XHtgdbQO/9t\n5/0ekrwleamqj9nTbe8+ySnJU5L3Sxf1xhh+jqd5M13dkIkYvWvGeYlJduA8dbI3xtf8OXZh8jn8\nmoAxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOM\nAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAG\nGAN6NxP5tS22yauqRe/nmwHGAGOAMcAYsNrXZOmVfgu+GWAMMAYYA3pjHFadYjuTz9EbY5g/xy4M\nUyer54eqqu6SPGfcKPAv7vN3yBjitbX2eemirhj/hQsoGAOMAcYAY4AxwBjwDSI3HgSezP/jAAAA\nAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10d31efd0>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXVJREFUeJzt3DFOw0AQQNEZRBGl5QIuKREtB+AQnC+H4ABwBspcgJ5u\nKUzxm7VG2thxxH9NClvW+MtyCtubrbXQ7O7aA+yJMcAYYAwwBhgDjAGlGJl5zMznzDyuPdAaqvPf\nF4/3FBEfEfGWmV/D023vMSJOEfESEZ+9naoxpr/f09hMVzfFQozqPeN8iUl24Ly0sRrjZ3yOXVg8\nD/9NwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOM\nAcYAY4AxwBhgDDAGGAOMAdWXYrt6n3Vl5uihN+eVAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAG\nGAOMAcYAY4AxwBhgDDAGDD83ucXnIz1eGWAMMAYYA4wBxgBjgDHAGGAMqMY4rDrFdhbPoxpjGp9j\nF6aljVlZdzwzHyLiNeaFAm9xnb9DzCHeW2vfvZ1KMf4Lb6BgDDAGGAOMAcYAY8Avac0eBNFJxq8A\nAAAASUVORK5CYII=\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10ff8dfd0>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXJJREFUeJzt3L1NxEAUReH7EMFqUxpwSIhIKYAiqG+LoACogXAbICcb\nAhOcZK1B/lkjzpc4sGU9H1ljOZlqrUWjm2sPsCfGAGOAMcAYYAwwBnTFqKpjVT1W1XHtgdbQO/9t\n5/0ekrwleamqj9nTbe8+ySnJU5L3Sxf1xhh+jqd5M13dkIkYvWvGeYlJduA8dbI3xtf8OXZh8jn8\nmkDvmvFrW/wNV9Wi9/PNAGOAMcAYYAxY7Wuy9Eq/Bd8MMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBj\ngDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wB\nxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAM6I1xWHWK7Uw+R2+MYf4cuzBM\nnayendaq6i7Jc8aNAv/iPn+HjCFeW2ufly7qivFfuICCMcAYYAwwBhgDjAHfOUQeBFs6qIkAAAAA\nSUVORK5CYII=\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x111621f60>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXVJREFUeJzt3DFOw0AQQNEZRBGl5QIuKREtB+AQnC+H4ABwBspcgJ5u\nKUzxm7VG2thxxH9NClvW+MtyLFvabK2FZnfXHmBPjAHGAGOAMcAYYAwoxcjMY2Y+Z+Zx7YHWUJ3/\nvni8p4j4iIi3zPwanm57jxFxioiXiPjs7VSNMf39nsZmuropFmJU7xnnS0yyA+eljdUYP+Nz7MLi\nefhvAsYAY4AxwBhgDDAGVJ9Au3rvUDNz9NCb88oAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOM\nAcYAY4AxwBhgDDAGGAOGv5vc4veRHq8MMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYY\nA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMqMY4rDrFdhbPoxpjGp9j\nF6aljVlZdzwzHyLiNeaFAm9xnb9DzCHeW2vfvZ1KMf4Lb6BgDDAGGAOMAcYAY8AvDJ8eBEUOVQQA\nAAAASUVORK5CYII=\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10fae0048>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "for component in model.components_:\n", | |
| " show_as_image(component)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 164, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAYZJREFUeJzt3LFNw0AUgOH3EEWUlgVcUiJaBmAI5ssQDAAzUGYBerqj\nMMVf4NMF+xxb/F+Twpbz/Ms6K3LkLKWERjfXHmBLjAHGAGOAMcAYYAxoipGZx8x8zMxj74F6aJ3/\ntvF4DxHxFhEvmfkxe7r13UfEKSKeIuJ9aqfWGMPP52neTFc3RCVG65pxXmKSDTjXNrbG+Jo/xyZU\nz8O7CbSuGRdb49dwZi56PK8MMAYYA4wBxoBud5OlV/rfTN2x/vrdXhlgDDAGGAOMAcYAY4AxwBhg\nDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY0C35yZr8Cl8R8YAY4AxwBhgDNj1rdW/JHRkDDAGGAOM\nAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY8Cun5v4l4SOjAHGAGOAMcAYYAwwBhgD\njAGtMQ5dp1hP9TxaYwzz59iEobYxW960lpl3EfEc44sC9/iev0OMIV5LKZ9TOzXF+C9cQMEYYAww\nBhgDjAHGgG9+hiQQGsUb9QAAAABJRU5ErkJggg==\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10d2c6390>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "digit7 = horizontal_stroke(0) + vertical_stroke(1, 0) + vertical_stroke(1, 1)\n", | |
| "show_as_image(digit7)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 165, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[ 0.4930625 , 0.54888887, 0. , 0. , 0. ,\n", | |
| " 0.39639133, 0. ]])" | |
| ] | |
| }, | |
| "execution_count": 165, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model.transform([digit7.flatten()])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "source": [ | |
| "# Demonstration that PCA does not!\n", | |
| "\n", | |
| "The components that we obtain from PCA are dense and can not be interpreted." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 160, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "PCA(copy=True, iterated_power='auto', n_components=7, random_state=None,\n", | |
| " svd_solver='auto', tol=0.0, whiten=False)" | |
| ] | |
| }, | |
| "execution_count": 160, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from sklearn.decomposition import PCA\n", | |
| "\n", | |
| "pca = PCA(n_components=7)\n", | |
| "pca.fit(X)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 161, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAd9JREFUeJzt3D1uGlEUQOF7IxeINhugjKCJHDrXURaRFbAiVkDBEqLU\ncUXSgVyyAffpXgos6xQwfjPDAFbO11iagafL0dMgzE+WUkIHH649wC0xBhgDjAHGAGOAMaAqRmaO\nM/M+M8dDDzSE2vnvKtf7HBG/IuJ7Zj71nu7yPkXEKiIeIuLx1I1qY0xe/q76zXR1k2iIUXvN2J9j\nkhuwbzpZG+Nv/zluQuPj8NkEaq8ZrW2326GWfjWbzc66njsDjAHGAGOAMaD3s8l6vT56/NxX+mNO\n/fc2O67nzgBjgDHAGGAMGOy1ySWc+71AdwYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAZk\nzWfHM/M+In63WXg6nXadqdput2t7ly+llD+nTrozwBhgDDAGGAOMAb3fRJrP50ePbzabvku/abFY\nHD2+XC47refOAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAwwBhgDBvtIwteu\nE7Xws/1d/EhCLWOAMcAYYAwY7KucHa70V+fOAGOAMcAYUBtjNOgUl9P4OGpjTPrPcRMmTSdrX6h9\njIhvcfihwPf4O3+jOIT4UUp5PnWjqhj/Cy+gYAwwBhgDjAHGAGPAPw7SPTXqg1X7AAAAAElFTkSu\nQmCC\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10fb0ef60>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAeJJREFUeJzt3DFOKkEcgPH/37wEY2e8ABWxNFi+A3gBOlvOYcE5aD3E\nO4B2WhsqL/BiK9VY8PLyFewyOIAYv19CSHZhMvtlshRMNkspoZWTr57AMTEGGAOMAcYAY4AxoCpG\nZp5l5jgzz/Y9oX2onf+vyvGuIuIhIm4z86V5dod3GRH3EfE7Ih67PlQbY/jv/b5tTl9uGD0xau8Z\nr7uYyRF47TtZG+O9fR5Hofc6/DWB2nvG1qbT6b6G/m8+n+90PFcGGAOMAcYAY0Dzr8lkMll7fNd3\n+nUGg8Ha48vl8lPjuTLAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxoCs\n2TuemeOIeNpm4EPsSM/tv3JdSnnuOunKAGOAMcAYYAwwBjRvSVgsFmuP52jUOvRGbx3Hzz85nisD\njAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAxo/t9k1PH/yGw2ax16o/O7\nu52O58oAY4AxwBhgDCqlbHxFxDhWe9a++2vcd52uDDAGGAOMAbUxTvc6i8PpvY7aGMP2eRyFYd/J\n2r3jFxFxE6sHBX7H5/ydxirEn1LK364PVcX4KbyBgjHAGGAMMAYYA4wBHz1mlLK8+h9yAAAAAElF\nTkSuQmCC\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x111567e80>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAdNJREFUeJzt3DFOG0EUgOH3UAqLlgu4TIlMSY04gSu4g48QcQTfIVQ+\nAaKGDmpKLkBPNxRG0V8wm0lm1xjxf42lXXv0/Gu0LmxvllJCWwefPcA+MQYYA4wBxgBjgDGgKUZm\nHmbmIjMPpx5oCq3z/2hc7zgi7iLiIjOfuqfbvZ8RcR0RpxFxX3tSa4z5++N130yfbh4DMVqvGc9j\nTLIHnodOtsZ47Z9jLwy+Dz9NoPWa8c82m81US/+xXC5HXc+dAcYAY4AxwBjQ/Wnyu3J87Cv9R64q\nx3/953ruDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxoPt7k8vK8fV6\n3bv0X61Wq1HXc2eAMcAYYAwwBhgDsuVvWZm5iIiHj87dVV5z2jFUq9vK8bP6S05KKY+1k+4MMAYY\nA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDGg+ycJte9HdnGbvMwcdT13BhgD\njAHGAGPAZLeZGftKvwvuDDAGGAOMAa0xZpNOsTuD76M1xrx/jr0wHzrZ+gO3o4g4j+2NAr/iff5m\nsQ1xU0p5qT2pKcZ34QUUjAHGAGOAMcAYYAx4A65JMUDNCXevAAAAAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10d30f4e0>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAclJREFUeJzt3DFKA1EUQNH3RIjYiRtIaSm2LsAmWYHrcwVJ4wJ0DZZu\nQGwNBMciIrfIjD/5M8mI9zSBSfi8XD4zkCGTTdOENk6OPcCYGAOMAcYAY4AxwBhQFCMzzzPzJjPP\nhx5oCKXznxaudx0RTxFxn5kv1dMd3lVEPETEbUQ8t32oNMb0+/Whbqajm0ZHjNJzxmsfk4zAa9eb\npTE+6ucYhc7v4dUESs8ZO7sYamF473k9dwYYA4wBxgBjQPXVZL1eb1/4dLAL1Y/ZbLb1+HK53Gs9\ndwYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGFB9c6Pt/sh+dy52M9vz\n/kgbdwYYA4wBxgBjgDGg+tL62XL8EJUXi8XW4/P5fK/13BlgDDAGGAOMAdVXk+xjipFwZ4AxwBhg\nDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY8Bgv3StVqvapX81mUx6Xc+dAcYAY4AxwBgw2L/3+z7T\nH4I7A4wBxgBjQGmMs0GnOJzO71EaY1o/xyhMu97MkueOZ+ZlRNzF5kGBf/E5f2exCfHYNM1b24eK\nYvwXnkDBGGAMMAYYA4wBxoAvz68rluodCCEAAAAASUVORK5CYII=\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10453b8d0>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAcVJREFUeJzt3DFOAkEUgOH3iAXYegFK7YwdsbSwtjPxfCR21h5AOYKW\nXMAaK9YCi9+E3YzMLmD8v8ZkIOPjz2S32GSzaZrQxujQAxwTY4AxwBhgDDAGGAOKYmTmaWZeZebp\n0AMNoXT+k8L9LiPiJSIeMvO9err9O4+IeURcR8Rr25dKY0y//87rZjq4aXTEKL1mLPuY5Agsuz4s\njfFZP8dR6Pwd3k2g9Jrxa+v1eut6Zvb2P/rcK8KT8YMxwBhgDDAGVN9NJpPJ1vXRaPjOi8Vi6/ps\nNttpP08GGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhQ/dxktVptXb+p\n3bjArs9H2ngywBhgDDAGGAOMAdW31seW9fvajQvctaw/7bifJwOMAcYAY4AxwBhgDDAGGAOMAcYA\nY4AxwBhgDDAGGAOMAcYAY4AxwBhgDKh+btL2fOStduMCFz3v58kAY4AxwBhgDBjsNTN9X+n3wZMB\nxgBjgDGgNMZ40Cn2p/N3lMaY1s9xFKZdH2bJe8cz8ywibmPzosC/+J6/cWxCPDdN89H2paIY/4UX\nUDAGGAOMAcYAY4Ax4AvNyCh+jsKVcwAAAABJRU5ErkJggg==\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10fea8080>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAc9JREFUeJzt3LFKw1AUgOFzioPUSXyBjo5S6eQD+BBufbBuPoQPoEvR\n2bEv4Cg4eR0q8g9NuG0SG/H/FiGpl9OfS4ISkqWU0Nbk2AOMiTHAGGAMMAYYA4wBVTEyc5qZ88yc\nDj3QEGrnP6lc7yoiHiPiLjNfO0/3+y4j4j4ibiLiqelDtTFm3z/vu810dLNoiVF7zdj0MckIbNpO\n1sb46D7HKLR+D+8mUHvN2NtisRhq6R/r9brX9dwZYAwwBhgDjAGd7yafDccnPV/pd1k2HF8duJ47\nA4wBxgBjgDGg890k+5hiJNwZYAwwBhgDjAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYMNh/ut66Llzh\nouf13BlgDDAGGAOMAcaAzrfW84bjfd/2dlmtdj98sFw2PazQzp0BxgBjgDHAGGAMMAYYA4wBxgBj\ngDHAGGAMMAYYA4wBxgBjgDHAGGAMMAZkzevsMnMeEc/7LHx26ER7eN//V65LKS9NJ90ZYAwwBhgD\njAGDvWbmgCv90bkzwBhgDDAG1MY4HXSK39P6PWpjzLrPMQqztpO1f6hdRMRtbF8U+Bff83ca2xAP\npZTGx9qrYvwXXkDBGGAMMAYYA4wBxoAvlGEvtjIt9mYAAAAASUVORK5CYII=\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10fb0abe0>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAcJJREFUeJzt3LFKw1AUgOFzxKF0ddWS0VFcXQX7Dr6Xb9B3qOCqz+BY\n1NXdLQ51+AcTr71JG/H/FiEth9vfS4INJtu2DW0dHXoBU2IMMAYYA4wBxgBjQFGMzJxn5mVmzsde\n0BhK139cOO8iIh4j4jYzn6tXt3/nEbGKiKuIeOp6U2mM5uvnqm5NB9dET4zSc8ZmiJVMwKbvxdIY\nH/XrmITez+HVBErPGb/2MtZgWAw8z50BxgBjgDHAGFB9NVkul98eX6zXtaN/1PXtbe44z50BxgBj\ngDHAGDDa3yb7MPS9QHcGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGVH/Tte64P3JXO7jA\n0L9JdwYYA4wBxgBjgDGg+tL62nH8rHZwgbeO46c7znNngDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYY\nA4wBxgBjgDHAGGAMMAYYA4wB1fdNuu6PPNQOLrDr/ZEu7gwwBhgDjAHGgNH+lfN6rMEjcmeAMcAY\nYAwojTEbdRX70/s5SmM09euYhKbvxSx57nhmnkTETWwfFPgXn/M3i22I+7Zt37veVBTjv/AECsYA\nY4AxwBhgDDAGfAKLeiZpqd0RvAAAAABJRU5ErkJggg==\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x10d31c128>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "for component in pca.components_:\n", | |
| " show_as_image(component)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "anaconda-cloud": {}, | |
| "kernelspec": { | |
| "display_name": "Python [conda root]", | |
| "language": "python", | |
| "name": "conda-root-py" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment