Created
January 28, 2016 15:05
-
-
Save sakamer71/a46ac9e2ba787e826522 to your computer and use it in GitHub Desktop.
wordcounter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| ## PEP8 says pick a variable naming convention, and stick to it | |
| #### mixed case for variables | |
| #### underscores for functions | |
| #### double and single quotes are interchangeable - use the one which will look cleanest | |
| import os | |
| import collections | |
| words = '' | |
| checkWord = '' | |
| filePrefix = 'file://' | |
| def get_word_list(): | |
| '''docstring placeholder''' | |
| instructions = "Type some words, space-separated.\n" | |
| instructions += "Or if you want me to read a file, specify path to file, using 'file://' as the prefix\n" | |
| instructions += "i.e. if your file is at /tmp/thedictionary.txt, you would enter 'file:///tmp/thedictionary.txt'\n" | |
| instructions += ": " | |
| userInput = raw_input(instructions) | |
| if userInput.startswith(filePrefix): | |
| userInput = userInput.replace(filePrefix,'') | |
| if not os.path.isfile(userInput): | |
| print 'file {} does not exist'.format(userInput) | |
| return | |
| try: | |
| with open(userInput, 'r') as myfile: | |
| words = [word.lower().strip('.,;:!?-\'\"') for line in myfile for word in line.split() ] | |
| except: | |
| print "Can't open file" | |
| else: | |
| words = userInput.split() | |
| print words | |
| return words | |
| def static_results(words): | |
| #allWordCount = len(words) | |
| print "I count {} words in total\n".format(len(words)) | |
| ##using collections instead of manually looping through list of tuples | |
| wordCount = collections.Counter(words) | |
| common = wordCount.most_common(10) | |
| ## Double sort, b/c I want count sorted numerically descending, but wordname sorted alphabetically ascending | |
| common = sorted(common, key=lambda x: x[0]) | |
| common = sorted(common, key=lambda x: x[1], reverse=True) | |
| #Print 10 or fewer top words sorted by count | |
| print 'Top words by count:' | |
| for i in common: | |
| print "{}: {}".format(i[0],i[1]) | |
| return wordCount.items() | |
| def checkOneWord(wordcount): | |
| checkWord = raw_input("Which word should i check? (q to quit)\n") | |
| if checkWord.lower() == 'q': | |
| exit("Thanks for playing! Goodbye.") | |
| exists = [item[1] for item in wordcount if checkWord in item] | |
| if exists: | |
| print '"{}" appears {} times\n'.format(checkWord,exists[0]) | |
| else: | |
| print '"{}" is not found\n'.format(checkWord) | |
| if __name__ == '__main__': | |
| print "\nWelcome to the writing stats utility, enjoy!\n" | |
| print "The text god demands text! Enter it below:\n" | |
| while not words: | |
| words = get_word_list() | |
| wordCount = static_results(words) | |
| print "\nNow for the fun part.\nEnter a specific word and I will tell you how many case-insensitive occurrences there are: " | |
| while True: | |
| checkOneWord(wordCount) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment