dspp779 · February 20, 2017 06:29
diff --git a/Practice170221-python basic.ipynb b/Practice170221-python basic.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Python Basic"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### File Operations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read file line by line"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "lines = []\n",
    "for line in open('building_global_community.txt'):\n",
    "    # delete the blank and line feed at the begining and end\n",
    "    line = line.strip()\n",
    "    # add processed line text into list 'lines'\n",
    "    lines.append(line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'To our community,'"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lines[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "or you can just write"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# list comprehension\n",
    "lines = [line.strip() for line in open('building_global_community.txt')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'To our community,'"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lines[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### String operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sentence = \"I want to eat an apple .\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### string indexing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'t'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence[5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'eat'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence[10:13]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'.'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'eat an appl'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence[10:-3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### find sequences in string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.find('a')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "find from right-hand side"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.rfind('a')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "find with a starting point"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.find('a', 4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "return -1 when not found"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.find('can')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "combine the use of subsequence and find"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'want to eat an apple '"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence[sentence.find('want to'):sentence.rfind('.')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### String Normalization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'I want to eat an apple .'"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'i want to eat an apple .'"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.lower()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'I WANT TO EAT AN APPLE .'"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.upper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'I want to eat an apple .'"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.capitalize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'A'.isupper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'A'.islower()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'apple'.isalpha()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'20'.isdigit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'20.9'.isdigit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'20'.isdecimal()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'furen5566'.isalnum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### split sentence by blank"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['I', 'want', 'to', 'eat', 'an', 'apple', '.']"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the result is list of words in the sentence\n",
    "sentence.split(' ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.endswith('.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.startswith('He wants')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dictionary examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# book = dict()\n",
    "book = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "book['title'] = 'Natural Language Processing with Python'\n",
    "book['author'] = 'Bird, Klein, and Loper'\n",
    "book['year'] = 2009"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'author': 'Bird, Klein, and Loper',\n",
       " 'title': 'Natural Language Processing with Python',\n",
       " 'year': 2009}"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "book"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['year', 'title', 'author'])"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "book.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_values([2009, 'Natural Language Processing with Python', 'Bird, Klein, and Loper'])"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "book.values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_items([('year', 2009), ('title', 'Natural Language Processing with Python'), ('author', 'Bird, Klein, and Loper')])"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "book.items()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "string formatting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Natural Language Processing with Python is a book written by Bird, Klein, and Loper in 2009'"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'%s is a book written by %s in %d' % (book['title'], book['author'], book['year'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Natural Language Processing with Python is a book written by Bird, Klein, and Loper in 2009'"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'{0} is a book written by {1} in {2}'.format(book['title'], book['author'], book['year'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Natural Language Processing with Python is a book written by Bird, Klein, and Loper in 2009'"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# advanced formatting\n",
    "'{title} is a book written by {author} in {year}'.format(**book)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Counting Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data = ['red', 'red', 'red', 'red', 'yellow', 'yellow', 'yellow', 'blue', 'blue']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "counter = dict()\n",
    "for color in data:\n",
    "    if color in counter:\n",
    "        counter[color] += 1\n",
    "    else:\n",
    "        counter[color] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'blue': 2, 'red': 4, 'yellow': 3}"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counter"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### use default dictionary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "counter = defaultdict(lambda: 0)  # default value function is 0\n",
    "counter = defaultdict(int)  # default value function is \"int\", which initialize to 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for color in data:\n",
    "    counter[color] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defaultdict(int, {'blue': 2, 'red': 4, 'yellow': 3})"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counter"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### use built-in Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "counter = Counter(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Counter({'blue': 2, 'red': 4, 'yellow': 3})"
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "new_data = ['blue', 'red', 'blue', 'yellow', 'blue', 'yellow', 'blue', 'yellow', 'blue']\n",
    "counter.update(new_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Counter({'blue': 7, 'red': 5, 'yellow': 6})"
      ]
     },
     "execution_count": 163,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counter"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### most common elements"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('blue', 7), ('yellow', 6), ('red', 5)]"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counter.most_common()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('blue', 7), ('yellow', 6)]"
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counter.most_common(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "blue: 7\n",
      "yellow: 6\n",
      "red: 5\n"
     ]
    }
   ],
   "source": [
    "for color, count in counter.most_common():\n",
    "    print('{0}: {1}'.format(color, count))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n"
     ]
    }
   ],
   "source": [
    "# clear counter\n",
    "counter.clear()\n",
    "print(counter['blue'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "compute the word frequencies in \"Building_Global_Community.txt\"\n",
    "- read sentences from file \"Building_Global_Community.txt\"\n",
    "- split sentences into words (split, or nltk word_tokenize)\n",
    "- filter out symbols (isalpha, isdigit, isalnum)\n",
    "- normalize words and count ('Word' and 'word' are considered as the same word)\n",
    "- count the occurance of words (counting exmaple)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "write your code here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# write your code here\n",
    "..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('.', 261),\n",
       " (',', 256),\n",
       " ('to', 220),\n",
       " ('the', 181),\n",
       " ('and', 175),\n",
       " ('we', 161),\n",
       " ('of', 129),\n",
       " ('a', 127),\n",
       " ('our', 111),\n",
       " ('in', 89),\n",
       " ('is', 88),\n",
       " ('community', 80),\n",
       " ('that', 71),\n",
       " ('people', 62),\n",
       " ('for', 62),\n",
       " ('are', 55),\n",
       " ('this', 48),\n",
       " ('more', 46),\n",
       " ('can', 45),\n",
       " ('with', 44)]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wordCounter.most_common(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### Save the result into a csv file"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "https://docs.python.org/3/library/csv.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import csv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "write word count result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('wordcount.csv', 'w') as csvfile:\n",
    "    # set up header\n",
    "    fieldnames = ['word', 'count']\n",
    "    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n",
    "\n",
    "    writer.writeheader()\n",
    "    for word, count in wordCounter.most_common():\n",
    "        writer.writerow({'word': word, 'count': count})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "read csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ". 261\n",
      ", 256\n",
      "to 220\n",
      "the 181\n",
      "and 175\n",
      "we 161\n",
      "of 129\n",
      "a 127\n",
      "our 111\n",
      "in 89\n",
      "is 88\n",
      "community 80\n"
     ]
    }
   ],
   "source": [
    "with open('wordcount.csv') as csvfile:\n",
    "    reader = csv.DictReader(csvfile)\n",
    "    for row in reader:\n",
    "        print(row['word'], row['count'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
 }
diff --git a/Practice170221-tokenization and pos tagging.ipynb b/Practice170221-tokenization and pos tagging.ipynb
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Python Basic"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### File Operations"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Read file line by line"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"lines = []\n",
	"for line in open('building_global_community.txt'):\n",
	" # delete the blank and line feed at the begining and end\n",
	" line = line.strip()\n",
	" # add processed line text into list 'lines'\n",
	" lines.append(line)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'To our community,'"
	]
	},
	"execution_count": 30,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"lines[0]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"or you can just write"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# list comprehension\n",
	"lines = [line.strip() for line in open('building_global_community.txt')]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 32,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'To our community,'"
	]
	},
	"execution_count": 32,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"lines[0]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### String operations"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"sentence = \"I want to eat an apple .\""
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"#### string indexing"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'t'"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence[5]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'eat'"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence[10:13]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'.'"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence[-1]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'eat an appl'"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence[10:-3]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"#### find sequences in string"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"3"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence.find('a')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"find from right-hand side"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"17"
	]
	},
	"execution_count": 16,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence.rfind('a')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"find with a starting point"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"11"
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence.find('a', 4)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"return -1 when not found"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"-1"
	]
	},
	"execution_count": 18,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence.find('can')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"combine the use of subsequence and find"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'want to eat an apple '"
	]
	},
	"execution_count": 19,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence[sentence.find('want to'):sentence.rfind('.')]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### String Normalization"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {
	"collapsed": false,
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'I want to eat an apple .'"
	]
	},
	"execution_count": 20,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'i want to eat an apple .'"
	]
	},
	"execution_count": 21,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence.lower()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 82,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'I WANT TO EAT AN APPLE .'"
	]
	},
	"execution_count": 82,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence.upper()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 83,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'I want to eat an apple .'"
	]
	},
	"execution_count": 83,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence.capitalize()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 86,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 86,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"'A'.isupper()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 87,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"False"
	]
	},
	"execution_count": 87,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"'A'.islower()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 92,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 92,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"'apple'.isalpha()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 93,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 93,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"'20'.isdigit()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 94,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"False"
	]
	},
	"execution_count": 94,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"'20.9'.isdigit()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 99,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 99,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"'20'.isdecimal()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 101,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 101,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"'furen5566'.isalnum()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### split sentence by blank"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 78,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"['I', 'want', 'to', 'eat', 'an', 'apple', '.']"
	]
	},
	"execution_count": 78,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# the result is list of words in the sentence\n",
	"sentence.split(' ')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 102,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 102,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence.endswith('.')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 103,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"False"
	]
	},
	"execution_count": 103,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sentence.startswith('He wants')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Dictionary examples"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# book = dict()\n",
	"book = {}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 27,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"book['title'] = 'Natural Language Processing with Python'\n",
	"book['author'] = 'Bird, Klein, and Loper'\n",
	"book['year'] = 2009"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 107,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{'author': 'Bird, Klein, and Loper',\n",
	" 'title': 'Natural Language Processing with Python',\n",
	" 'year': 2009}"
	]
	},
	"execution_count": 107,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"book"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 112,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"dict_keys(['year', 'title', 'author'])"
	]
	},
	"execution_count": 112,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"book.keys()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 113,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"dict_values([2009, 'Natural Language Processing with Python', 'Bird, Klein, and Loper'])"
	]
	},
	"execution_count": 113,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"book.values()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 114,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"dict_items([('year', 2009), ('title', 'Natural Language Processing with Python'), ('author', 'Bird, Klein, and Loper')])"
	]
	},
	"execution_count": 114,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"book.items()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"string formatting"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 28,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'Natural Language Processing with Python is a book written by Bird, Klein, and Loper in 2009'"
	]
	},
	"execution_count": 28,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"'%s is a book written by %s in %d' % (book['title'], book['author'], book['year'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 108,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'Natural Language Processing with Python is a book written by Bird, Klein, and Loper in 2009'"
	]
	},
	"execution_count": 108,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"'{0} is a book written by {1} in {2}'.format(book['title'], book['author'], book['year'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 109,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'Natural Language Processing with Python is a book written by Bird, Klein, and Loper in 2009'"
	]
	},
	"execution_count": 109,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# advanced formatting\n",
	"'{title} is a book written by {author} in {year}'.format(**book)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Counting Example"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 123,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"data = ['red', 'red', 'red', 'red', 'yellow', 'yellow', 'yellow', 'blue', 'blue']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 124,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"counter = dict()\n",
	"for color in data:\n",
	" if color in counter:\n",
	" counter[color] += 1\n",
	" else:\n",
	" counter[color] = 1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 125,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{'blue': 2, 'red': 4, 'yellow': 3}"
	]
	},
	"execution_count": 125,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"counter"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### use default dictionary"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 129,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"from collections import defaultdict\n",
	"counter = defaultdict(lambda: 0) # default value function is 0\n",
	"counter = defaultdict(int) # default value function is \"int\", which initialize to 0"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 130,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"for color in data:\n",
	" counter[color] += 1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 131,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"defaultdict(int, {'blue': 2, 'red': 4, 'yellow': 3})"
	]
	},
	"execution_count": 131,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"counter"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### use built-in Counter"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 159,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from collections import Counter"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 160,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"counter = Counter(data)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 161,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Counter({'blue': 2, 'red': 4, 'yellow': 3})"
	]
	},
	"execution_count": 161,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"counter"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 162,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"new_data = ['blue', 'red', 'blue', 'yellow', 'blue', 'yellow', 'blue', 'yellow', 'blue']\n",
	"counter.update(new_data)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 163,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Counter({'blue': 7, 'red': 5, 'yellow': 6})"
	]
	},
	"execution_count": 163,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"counter"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"#### most common elements"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 164,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('blue', 7), ('yellow', 6), ('red', 5)]"
	]
	},
	"execution_count": 164,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"counter.most_common()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 165,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('blue', 7), ('yellow', 6)]"
	]
	},
	"execution_count": 165,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"counter.most_common(2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 166,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"blue: 7\n",
	"yellow: 6\n",
	"red: 5\n"
	]
	}
	],
	"source": [
	"for color, count in counter.most_common():\n",
	" print('{0}: {1}'.format(color, count))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 147,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"0\n"
	]
	}
	],
	"source": [
	"# clear counter\n",
	"counter.clear()\n",
	"print(counter['blue'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Exercise"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"compute the word frequencies in \"Building_Global_Community.txt\"\n",
	"- read sentences from file \"Building_Global_Community.txt\"\n",
	"- split sentences into words (split, or nltk word_tokenize)\n",
	"- filter out symbols (isalpha, isdigit, isalnum)\n",
	"- normalize words and count ('Word' and 'word' are considered as the same word)\n",
	"- count the occurance of words (counting exmaple)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"write your code here"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# write your code here\n",
	"..."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('.', 261),\n",
	" (',', 256),\n",
	" ('to', 220),\n",
	" ('the', 181),\n",
	" ('and', 175),\n",
	" ('we', 161),\n",
	" ('of', 129),\n",
	" ('a', 127),\n",
	" ('our', 111),\n",
	" ('in', 89),\n",
	" ('is', 88),\n",
	" ('community', 80),\n",
	" ('that', 71),\n",
	" ('people', 62),\n",
	" ('for', 62),\n",
	" ('are', 55),\n",
	" ('this', 48),\n",
	" ('more', 46),\n",
	" ('can', 45),\n",
	" ('with', 44)]"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"wordCounter.most_common(20)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"collapsed": true
	},
	"source": [
	"### Save the result into a csv file"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"https://docs.python.org/3/library/csv.html"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import csv"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"write word count result"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"with open('wordcount.csv', 'w') as csvfile:\n",
	" # set up header\n",
	" fieldnames = ['word', 'count']\n",
	" writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n",
	"\n",
	" writer.writeheader()\n",
	" for word, count in wordCounter.most_common():\n",
	" writer.writerow({'word': word, 'count': count})"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"collapsed": true
	},
	"source": [
	"read csv"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	". 261\n",
	", 256\n",
	"to 220\n",
	"the 181\n",
	"and 175\n",
	"we 161\n",
	"of 129\n",
	"a 127\n",
	"our 111\n",
	"in 89\n",
	"is 88\n",
	"community 80\n"
	]
	}
	],
	"source": [
	"with open('wordcount.csv') as csvfile:\n",
	" reader = csv.DictReader(csvfile)\n",
	" for row in reader:\n",
	" print(row['word'], row['count'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}
No results found