Created
July 20, 2023 08:18
-
-
Save AJV009/2452207f1833abcac2ab2e02efe7e62f to your computer and use it in GitHub Desktop.
PDF to Images
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from PyPDF2 import PdfReader | |
| from pdf2image import convert_from_path | |
| # directory where your PDFs are stored | |
| pdf_dir = 'pdfFiles' | |
| # iterate over each file in the directory | |
| for file in os.listdir(pdf_dir): | |
| if file.endswith('.pdf'): | |
| # open each PDF file | |
| with open(os.path.join(pdf_dir, file), 'rb') as file_obj: | |
| # create a PdfReader object | |
| reader = PdfReader(file_obj) | |
| num_pages = len(reader.pages) | |
| # convert each page to an image | |
| images = convert_from_path(os.path.join(pdf_dir, file), dpi=200) | |
| # create a new directory for each file | |
| new_dir = os.path.join(pdf_dir, file.split('.pdf')[0]) | |
| if not os.path.exists(new_dir): | |
| os.makedirs(new_dir) | |
| # save each page/image to the new directory | |
| for i in range(num_pages): | |
| images[i].save(os.path.join(new_dir, f'{i+1}.png'), 'PNG') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment