Created
March 17, 2025 16:03
-
-
Save cynthia/9f5c33f51febb791c68675305d680e54 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python | |
| # pip install pillow imagehash pdf2image | |
| from PIL import Image, ImageOps | |
| from pdf2image import convert_from_path | |
| from imagehash import colorhash | |
| filename = '/Users/cynthia/Dropbox/Paperwork/Titech/Graduation/thesis.pdf' | |
| pages = convert_from_path(filename, 50) | |
| out_tmp = 'temp' | |
| threshold = 0 # increase if you have small color elements you are okay being gray | |
| for i, page in enumerate(pages): | |
| delta = colorhash(ImageOps.grayscale(page)) - colorhash(page) | |
| if delta > threshold: | |
| out_filename = f'color_pages/{i+1}.jpg' | |
| page.save(out_filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment