Skip to content

Instantly share code, notes, and snippets.

@AJV009
Created July 20, 2023 08:18
Show Gist options
  • Select an option

  • Save AJV009/2452207f1833abcac2ab2e02efe7e62f to your computer and use it in GitHub Desktop.

Select an option

Save AJV009/2452207f1833abcac2ab2e02efe7e62f to your computer and use it in GitHub Desktop.
PDF to Images
import os
from PyPDF2 import PdfReader
from pdf2image import convert_from_path
# directory where your PDFs are stored
pdf_dir = 'pdfFiles'
# iterate over each file in the directory
for file in os.listdir(pdf_dir):
if file.endswith('.pdf'):
# open each PDF file
with open(os.path.join(pdf_dir, file), 'rb') as file_obj:
# create a PdfReader object
reader = PdfReader(file_obj)
num_pages = len(reader.pages)
# convert each page to an image
images = convert_from_path(os.path.join(pdf_dir, file), dpi=200)
# create a new directory for each file
new_dir = os.path.join(pdf_dir, file.split('.pdf')[0])
if not os.path.exists(new_dir):
os.makedirs(new_dir)
# save each page/image to the new directory
for i in range(num_pages):
images[i].save(os.path.join(new_dir, f'{i+1}.png'), 'PNG')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment