Skip to content

Instantly share code, notes, and snippets.

@songmw90
Last active December 14, 2020 11:34
Show Gist options
  • Select an option

  • Save songmw90/d016e8084624fa544bef to your computer and use it in GitHub Desktop.

Select an option

Save songmw90/d016e8084624fa544bef to your computer and use it in GitHub Desktop.
Simple word count
#!/usr/bin/env python
# Author : Myeong-Uk ([email protected])
# Date : 2015. 03. 24
# Desc : Count alphabetword frequency
# Command : <python wordcount.py filename> e.g. rfc3615.txt
# OUTPUT : filename_word_count.out e.g. rfc3615.txt_word_count.out
import sys,re,string,operator,csv
def read_file(filename):
fp = open(filename, 'r')
text = prepare_raw_text(fp.read())
fp.close()
return text
def prepare_raw_text(rawtext):
text = re.sub('[^A-Za-z]+', ' ', rawtext) #alphabetize word
text = text.lower().split() # convert to lowercase , split words using white space
return text
def group_words(words):
dict = {}
for word in words: #loop 0 to word
#if len(word) > 1: #word should longer than 1 (optional)
if word in dict:
dict[word] += 1
else:
dict[word] = 1
return sorted(dict.items(), key=operator.itemgetter(1), reverse=True) #sort dictionary to check frequency
def record_frequency(filename,list):
fp = open(filename+"_word_count.out","w")
for item in list:
fp.write(item[0] + " " + str(item[1]) + "\n")
fp.close()
if __name__ == '__main__':
filename = sys.argv[1]
text = read_file(sys.argv[1])
list = group_words(text)
record_frequency(filename,list)
print "done!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment