logicx24 · August 29, 2015 14:18
diff --git a/wordCount.py b/wordCount.py
 from sys import argv
 import os

 def inputFunc():
    if len(argv) >= 2:
        if os.path.isfile(argv[1]):
            text = open(argv[1]).read()
        else:
            print('not a file')
    else:
        text = input("Enter text > ")
    return text

 def splitIntoSentences(text):
    endChars = ['.', '?', '!']
    endIndices = []
    text = text.replace("\n\n", "").replace("\n", " ").strip()

    for ind, char in enumerate(text):
        if char in endChars:
            if char == '.':
                if ind < (len(text) - 1) and text[ind+1] == " " and text[ind+2].isupper():
                    endIndices.append(ind)
                elif ind == (len(text) - 1):
                    endIndices.append(ind)
            else:
                endIndices.append(ind)
    
    sentenceList = []
    i = 0
    for sentDex in endIndices:
        lst = list(text)
        if sentDex < (len(lst) - 1):
            lst[sentDex+1] = ""
            text = "".join(lst)

    for sentDex in endIndices:
        sentenceList.append(text[i:sentDex])
        i = sentDex+1

    for index in range(len(sentenceList)):
        sentenceList[index] = sentenceList[index].lower()

    if not sentenceList:
        sentenceList.append(text + ".")

    tmp = list(sentenceList[-1])
    tmp[-1] = ""
    sentenceList[-1] = "".join(tmp)

    return sentenceList

 def generateWordTuples(sentenceList):
    for ind in range(len(sentenceList)):
        sentenceList[ind] = ''.join(char for char in sentenceList[ind] if char.isalnum() or char == '.' or char == " ")
        sentenceList[ind] = (ind + 1, sentenceList[ind])
    wordTups = []
    for sentenceTup in sentenceList:
        for word in sentenceTup[1].split():
            wordTups.append((sentenceTup[0], word))
    return wordTups

 def frequencyCount(wordTups):
    wordFrequency = {}
    wordToSentences = {}
    for tup in wordTups:
        if tup[1] in wordFrequency:
            wordFrequency[tup[1]] += 1
        else:
            wordFrequency[tup[1]] = 1
        if tup[1] in wordToSentences:
            wordToSentences[tup[1]].append(tup[0])
        else:
            wordToSentences[tup[1]] = [tup[0]]
    return wordFrequency, wordToSentences


 def sortAndWrite(wordFrequency, wordToSentences):
    sortedItems = sorted(wordFrequency.items(), key=lambda x: x[0])
    rows = []
    count = 0
    for index, tup in enumerate(sortedItems):
        count += 1
        lineEls = [count, tup[0], tup[1], str(wordToSentences[tup[0]]).replace("[", "").replace("]","").replace(" ", "")]
        row = "{0}. {1}   {{{2}:{3}}}".format(*lineEls)
        rows.append(row)
    return rows

 def writeToFile(lst):
    with open('count.txt', 'w') as output:
        output.write("\n".join(lst))
        output.close()

 def main():
    text = inputFunc()
    wordFrequency, wordToSentences = frequencyCount(generateWordTuples(splitIntoSentences(text)))
    writeToFile(sortAndWrite(wordFrequency, wordToSentences))
    print("Written to file count.txt in this directory.")

 if __name__ == "__main__":
    main()
	from sys import argv
	import os

	def inputFunc():
	if len(argv) >= 2:
	if os.path.isfile(argv[1]):
	text = open(argv[1]).read()
	else:
	print('not a file')
	else:
	text = input("Enter text > ")
	return text

	def splitIntoSentences(text):
	endChars = ['.', '?', '!']
	endIndices = []
	text = text.replace("\n\n", "").replace("\n", " ").strip()

	for ind, char in enumerate(text):
	if char in endChars:
	if char == '.':
	if ind < (len(text) - 1) and text[ind+1] == " " and text[ind+2].isupper():
	endIndices.append(ind)
	elif ind == (len(text) - 1):
	endIndices.append(ind)
	else:
	endIndices.append(ind)

	sentenceList = []
	i = 0
	for sentDex in endIndices:
	lst = list(text)
	if sentDex < (len(lst) - 1):
	lst[sentDex+1] = ""
	text = "".join(lst)

	for sentDex in endIndices:
	sentenceList.append(text[i:sentDex])
	i = sentDex+1

	for index in range(len(sentenceList)):
	sentenceList[index] = sentenceList[index].lower()

	if not sentenceList:
	sentenceList.append(text + ".")

	tmp = list(sentenceList[-1])
	tmp[-1] = ""
	sentenceList[-1] = "".join(tmp)

	return sentenceList

	def generateWordTuples(sentenceList):
	for ind in range(len(sentenceList)):
	sentenceList[ind] = ''.join(char for char in sentenceList[ind] if char.isalnum() or char == '.' or char == " ")
	sentenceList[ind] = (ind + 1, sentenceList[ind])
	wordTups = []
	for sentenceTup in sentenceList:
	for word in sentenceTup[1].split():
	wordTups.append((sentenceTup[0], word))
	return wordTups

	def frequencyCount(wordTups):
	wordFrequency = {}
	wordToSentences = {}
	for tup in wordTups:
	if tup[1] in wordFrequency:
	wordFrequency[tup[1]] += 1
	else:
	wordFrequency[tup[1]] = 1
	if tup[1] in wordToSentences:
	wordToSentences[tup[1]].append(tup[0])
	else:
	wordToSentences[tup[1]] = [tup[0]]
	return wordFrequency, wordToSentences


	def sortAndWrite(wordFrequency, wordToSentences):
	sortedItems = sorted(wordFrequency.items(), key=lambda x: x[0])
	rows = []
	count = 0
	for index, tup in enumerate(sortedItems):
	count += 1
	lineEls = [count, tup[0], tup[1], str(wordToSentences[tup[0]]).replace("[", "").replace("]","").replace(" ", "")]
	row = "{0}. {1} {{{2}:{3}}}".format(*lineEls)
	rows.append(row)
	return rows

	def writeToFile(lst):
	with open('count.txt', 'w') as output:
	output.write("\n".join(lst))
	output.close()

	def main():
	text = inputFunc()
	wordFrequency, wordToSentences = frequencyCount(generateWordTuples(splitIntoSentences(text)))
	writeToFile(sortAndWrite(wordFrequency, wordToSentences))
	print("Written to file count.txt in this directory.")

	if __name__ == "__main__":
	main()
No results found