Skip to content

Instantly share code, notes, and snippets.

@sohang3112
Created November 17, 2025 12:53
Show Gist options
  • Select an option

  • Save sohang3112/52d6b5b16d7382e3c5526c4915b6aa32 to your computer and use it in GitHub Desktop.

Select an option

Save sohang3112/52d6b5b16d7382e3c5526c4915b6aa32 to your computer and use it in GitHub Desktop.
Convert PDF files to Jupyter Notebook .ipynb file
#! /usr/bin/env python3
import os
import fitz # PyMuPDF
import nbformat
# Source: https://www.reddit.com/r/learnpython/comments/1ji0qwz/comment/mjblesy/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button
def pdf_to_ipynb(pdf_path: os.PathLike, ipynb_path: os.PathLike) -> None:
"""Convert PDF -> Jupyter Notebook .ipynb file."""
doc = fitz.open(pdf_path)
# Extract text from each page
cells = []
for page in doc:
text = page.get_text("text")
if text.strip():
cell = nbformat.v4.new_markdown_cell(text) # Store text as Markdown cell
cells.append(cell)
# Create a Jupyter Notebook structure and save to ipynb_path
nb = nbformat.v4.new_notebook()
nb.cells = cells
with open(ipynb_path, "w", encoding="utf-8") as f:
nbformat.write(nb, f)
if __name__ == '__main__':
from argparse import ArgumentParser
from pathlib import Path
import sys
parser = ArgumentParser(description="Converts .pdf files to Jupyter Notebook .ipynb format. WORKS ONLY IF PDF HAS TEXT, NOT IMAGES OF TEXT!")
parser.add_argument("pdf_path", help="Path to PDF to be converted")
args = parser.parse_args()
pdf_path = Path(args.pdf_path)
if not pdf_path.is_file():
print(f"PDF path does not exist or is not a file: {pdf_path}", file=sys.stderr)
sys.exit(1)
ipynb_path = (pdf_path.parent / pdf_path.stem).with_suffix(".ipynb")
print("Converting PDF -> IPYNB and saving at", ipynb_path)
pdf_to_ipynb(pdf_path, ipynb_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment