Created
November 17, 2025 12:53
-
-
Save sohang3112/52d6b5b16d7382e3c5526c4915b6aa32 to your computer and use it in GitHub Desktop.
Convert PDF files to Jupyter Notebook .ipynb file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/env python3 | |
| import os | |
| import fitz # PyMuPDF | |
| import nbformat | |
| # Source: https://www.reddit.com/r/learnpython/comments/1ji0qwz/comment/mjblesy/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button | |
| def pdf_to_ipynb(pdf_path: os.PathLike, ipynb_path: os.PathLike) -> None: | |
| """Convert PDF -> Jupyter Notebook .ipynb file.""" | |
| doc = fitz.open(pdf_path) | |
| # Extract text from each page | |
| cells = [] | |
| for page in doc: | |
| text = page.get_text("text") | |
| if text.strip(): | |
| cell = nbformat.v4.new_markdown_cell(text) # Store text as Markdown cell | |
| cells.append(cell) | |
| # Create a Jupyter Notebook structure and save to ipynb_path | |
| nb = nbformat.v4.new_notebook() | |
| nb.cells = cells | |
| with open(ipynb_path, "w", encoding="utf-8") as f: | |
| nbformat.write(nb, f) | |
| if __name__ == '__main__': | |
| from argparse import ArgumentParser | |
| from pathlib import Path | |
| import sys | |
| parser = ArgumentParser(description="Converts .pdf files to Jupyter Notebook .ipynb format. WORKS ONLY IF PDF HAS TEXT, NOT IMAGES OF TEXT!") | |
| parser.add_argument("pdf_path", help="Path to PDF to be converted") | |
| args = parser.parse_args() | |
| pdf_path = Path(args.pdf_path) | |
| if not pdf_path.is_file(): | |
| print(f"PDF path does not exist or is not a file: {pdf_path}", file=sys.stderr) | |
| sys.exit(1) | |
| ipynb_path = (pdf_path.parent / pdf_path.stem).with_suffix(".ipynb") | |
| print("Converting PDF -> IPYNB and saving at", ipynb_path) | |
| pdf_to_ipynb(pdf_path, ipynb_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment