light-bringer · December 10, 2019 11:18
diff --git a/Instructions.txt b/Instructions.txt
 Have a python3 setup ready!

 Install docx from pip

 ~/Desktop/awesome-performance-test-framework  master ✗                                                                                   6h8m ✖ ⚑ ◒  
 ▶ pip install --pre python-docx
 Collecting python-docx
  Downloading https://files.pythonhosted.org/packages/e4/83/c66a1934ed5ed8ab1dbb9931f1779079f8bca0f6bbc5793c06c4b5e7d671/python-docx-0.8.10.tar.gz (5.5MB)
     |████████████████████████████████| 5.5MB 9.2MB/s 
 Requirement already satisfied: lxml>=2.3.2 in ./venv/lib/python3.7/site-packages (from python-docx) (4.4.2)
 Building wheels for collected packages: python-docx
  WARNING: Building wheel for python-docx failed: [Errno 13] Permission denied: '/Users/efi/Library/Caches/pip/wheels/18'
 Failed to build python-docx
 Installing collected packages: python-docx
    Running setup.py install for python-docx ... done
 Successfully installed python-docx-0.8.10


 Kindly try to provide full paths as a parameter to the Script.

 Example - /home/debaprid/example

 UNSUCCESFULL RUN : 
 ▶ python configspec/test.py    
 Provide directory path eg:
 ./  /home/debaprid/example

 # Successful RUN : 
 ~/Desktop/awesome-performance-test-framework  master ✗                                                                                  6h10m ✖ ⚑ ◒  
 ▶ python configspec/test.py configspec/test
 configspec/test [] ['1.txt', '2.txt']
 ['configspec/test/1.txt', 'configspec/test/2.txt']
 WordCount for configspec/test/1.txt : 3
 WordCount for configspec/test/2.txt : 3
 (venv) 
 ~/Desktop/awesome-performance-test-framework  master ✗                                                                                  6h11m ✖ ⚑ ◒  
 ▶ 
diff --git a/word_count.py b/word_count.py
 import os
 import sys
 import docx
 import re

 def get_all_files(path):
    # r=root, d=directories, f = files
    files = []
    for r, d, f in os.walk(path):
        print(r, d, f)
        for file in f:
            if '.txt' or '.doc' or '.docx' in file:
                files.append(os.path.join(r, file))
    
    return files


 def count_docx(file_name):
    try:
        document = docx.opendocx(file_name)
    except:
        print('Cannot open file to read.')
        return -1

    paratextlist = docx.getdocumenttext(document)
    newparatextlist = []
    for paratext in paratextlist:
        newparatextlist.append(paratext.encode("utf-8"))
    
    return len(re.findall(r'\w+', '\n'.join(newparatextlist)))

 def count_txt(file_name):
    wordcount = 0
    try:
        document = open(file_name)
    except:
        print('Cannot open file to read')
        return -1
    while 1:
        lines = document.readlines(100000)
        if not lines:
            break
        for line in lines:
            wordcount = wordcount + len(re.findall(r'\w+', line))
    return wordcount


 if __name__ == '__main__':
    extensions = {
        'txt' : count_txt,
        'docx' : count_docx,
        'doc' : count_docx,
    }
    try:
        all_files = get_all_files(sys.argv[1])
        print(all_files)
        for new_file in all_files:
            file_name, file_extension = os.path.splitext(new_file)
            print("WordCount for {0} : {1}".format(new_file, extensions[file_extension.lower().replace('.','')](new_file)))
    except:
        print ("Provide directory path eg:\n./  /home/debaprid/example")
        exit()
	Have a python3 setup ready!

	Install docx from pip

	~/Desktop/awesome-performance-test-framework master ✗ 6h8m ✖ ⚑ ◒
	▶ pip install --pre python-docx
	Collecting python-docx
	Downloading https://files.pythonhosted.org/packages/e4/83/c66a1934ed5ed8ab1dbb9931f1779079f8bca0f6bbc5793c06c4b5e7d671/python-docx-0.8.10.tar.gz (5.5MB)
	\|████████████████████████████████\| 5.5MB 9.2MB/s
	Requirement already satisfied: lxml>=2.3.2 in ./venv/lib/python3.7/site-packages (from python-docx) (4.4.2)
	Building wheels for collected packages: python-docx
	WARNING: Building wheel for python-docx failed: [Errno 13] Permission denied: '/Users/efi/Library/Caches/pip/wheels/18'
	Failed to build python-docx
	Installing collected packages: python-docx
	Running setup.py install for python-docx ... done
	Successfully installed python-docx-0.8.10


	Kindly try to provide full paths as a parameter to the Script.

	Example - /home/debaprid/example

	UNSUCCESFULL RUN :
	▶ python configspec/test.py
	Provide directory path eg:
	./ /home/debaprid/example

	# Successful RUN :
	~/Desktop/awesome-performance-test-framework master ✗ 6h10m ✖ ⚑ ◒
	▶ python configspec/test.py configspec/test
	configspec/test [] ['1.txt', '2.txt']
	['configspec/test/1.txt', 'configspec/test/2.txt']
	WordCount for configspec/test/1.txt : 3
	WordCount for configspec/test/2.txt : 3
	(venv)
	~/Desktop/awesome-performance-test-framework master ✗ 6h11m ✖ ⚑ ◒
	▶
	import os
	import sys
	import docx
	import re

	def get_all_files(path):
	# r=root, d=directories, f = files
	files = []
	for r, d, f in os.walk(path):
	print(r, d, f)
	for file in f:
	if '.txt' or '.doc' or '.docx' in file:
	files.append(os.path.join(r, file))

	return files


	def count_docx(file_name):
	try:
	document = docx.opendocx(file_name)
	except:
	print('Cannot open file to read.')
	return -1

	paratextlist = docx.getdocumenttext(document)
	newparatextlist = []
	for paratext in paratextlist:
	newparatextlist.append(paratext.encode("utf-8"))

	return len(re.findall(r'\w+', '\n'.join(newparatextlist)))

	def count_txt(file_name):
	wordcount = 0
	try:
	document = open(file_name)
	except:
	print('Cannot open file to read')
	return -1
	while 1:
	lines = document.readlines(100000)
	if not lines:
	break
	for line in lines:
	wordcount = wordcount + len(re.findall(r'\w+', line))
	return wordcount


	if __name__ == '__main__':
	extensions = {
	'txt' : count_txt,
	'docx' : count_docx,
	'doc' : count_docx,
	}
	try:
	all_files = get_all_files(sys.argv[1])
	print(all_files)
	for new_file in all_files:
	file_name, file_extension = os.path.splitext(new_file)
	print("WordCount for {0} : {1}".format(new_file, extensions[file_extension.lower().replace('.','')](new_file)))
	except:
	print ("Provide directory path eg:\n./ /home/debaprid/example")
	exit()