Created
November 22, 2025 09:31
-
-
Save zelinskiy/c55ce67ae92f4e668d88e6966c328576 to your computer and use it in GitHub Desktop.
Converts pdf from archive.org saving only bitmask making it more printable
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import fitz # PyMuPDF | |
| from PIL import Image, ImageOps | |
| import os | |
| import sys | |
| def extract_pbm_masks_and_invert(pdf_path, output_pdf): | |
| doc = fitz.open(pdf_path) | |
| # List of final images to assemble | |
| pages = [] | |
| for page_index, page in enumerate(doc): | |
| images = page.get_images(full=True) | |
| mask_pix = None | |
| # Find the FIRST 1-bit image for this page (the text mask) | |
| for img in images: | |
| smask = img[1] | |
| if smask > 0: | |
| mask_pix = fitz.Pixmap(doc.extract_image(smask)["image"]) | |
| # Convert raw pixmap to PIL image | |
| pil = Image.frombytes("L", (mask_pix.width, mask_pix.height), mask_pix.samples) | |
| # Invert black/white (turn black text -> white, white -> black) | |
| inverted = ImageOps.invert(pil) | |
| # Convert to 1-bit final form | |
| inverted = inverted.convert("1") | |
| # Store in memory (no intermediates on disk) | |
| pages.append(inverted) | |
| print(f"Page {page_index}: extracted & inverted PBM mask.") | |
| # Build final PDF from inverted masks | |
| if not pages: | |
| raise RuntimeError("No PBM masks found in entire PDF.") | |
| first = pages[0].convert("RGB") | |
| rest = [p.convert("RGB") for p in pages[1:]] | |
| first.save(output_pdf, save_all=True, append_images=rest) | |
| print(f"Done. Output PDF saved as: {output_pdf}") | |
| if __name__ == "__main__": | |
| if len(sys.argv) != 3: | |
| print("Usage: extract_pbm_textmask_to_pdf.py input.pdf output.pdf") | |
| sys.exit(1) | |
| extract_pbm_masks_and_invert(sys.argv[1], sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment