Skip to content

Instantly share code, notes, and snippets.

@fanurs
Created January 10, 2026 02:48
Show Gist options
  • Select an option

  • Save fanurs/046d3f461c30034e4f6c9e76e26359d0 to your computer and use it in GitHub Desktop.

Select an option

Save fanurs/046d3f461c30034e4f6c9e76e26359d0 to your computer and use it in GitHub Desktop.
Convert `.py` files with #%% into `.ipynb` files.
#!/usr/bin/env python3
import argparse
import json
import os
import platform
import re
def parse_args():
parser = argparse.ArgumentParser(
description="Convert Python scripts (.py) with #%% markers to Jupyter Notebooks (.ipynb)."
)
parser.add_argument(
"files",
metavar="FILE",
nargs="+",
help="One or more .py files to convert"
)
return parser.parse_args()
def parse_py_file(content):
"""
Parses python content and returns a list of cells (dicts).
"""
# Regex to identify cell markers.
# Matches:
# 1. ^\s*# - Line starts with optional space and #
# 2. \s*%% - Optional space and %%
# 3. (?P<markdown>\s*\[markdown\])? - Optional [markdown] tag (captured)
# 4. (?P<title>.*)$ - Any remaining text is the title/metadata
cell_marker_pattern = re.compile(r"^\s*#\s*%%(?P<markdown>\s*\[markdown\])?(?P<title>.*)$", re.IGNORECASE)
lines = content.splitlines()
cells = []
# Defaults for the first block (before any marker is found)
current_cell_type = "code"
current_cell_source = []
for line in lines:
match = cell_marker_pattern.match(line)
if match:
# FLUSH PREVIOUS CELL
# Only add if it has content or if we want to preserve empty cells
if current_cell_source or len(cells) > 0:
cells.append(create_cell(current_cell_type, current_cell_source))
# START NEW CELL
current_cell_source = []
is_markdown = bool(match.group("markdown"))
current_cell_type = "markdown" if is_markdown else "code"
else:
current_cell_source.append(line)
# Flush the final cell
if current_cell_source or len(cells) > 0:
cells.append(create_cell(current_cell_type, current_cell_source))
return cells
def create_cell(cell_type, source_lines):
"""
Creates a notebook cell dictionary.
Handles cleanup of markdown cells and trimming of code cells.
"""
cleaned_source = source_lines
if cell_type == "markdown":
cleaned_source = clean_markdown_source(source_lines)
elif cell_type == "code":
cleaned_source = trim_code_source(source_lines)
# Convert list of lines to the format ipynb expects (lines with \n)
final_source = []
for i, s in enumerate(cleaned_source):
# Jupyter expects \n at the end of every line except optionally the last one.
# We will add it to all except the very last line for consistency.
if i < len(cleaned_source) - 1:
final_source.append(s + "\n")
else:
final_source.append(s)
cell = {
"cell_type": cell_type,
"metadata": {},
"source": final_source
}
if cell_type == "code":
cell["outputs"] = []
cell["execution_count"] = None
return cell
def trim_code_source(lines):
"""
Removes trailing empty lines from code cells.
"""
# Create a copy to avoid modifying original reference if needed
trimmed = list(lines)
# Pop lines from the end as long as they are empty or whitespace only
while trimmed and not trimmed[-1].strip():
trimmed.pop()
return trimmed
def clean_markdown_source(lines):
"""
Handles:
1. Docstring style (double or single quotes)
2. Standard commented style (# )
"""
if not lines:
return []
# Filter out empty lines to find the first actual content
non_empty_indices = [i for i, line in enumerate(lines) if line.strip()]
if not non_empty_indices:
return [] # All empty
first_idx = non_empty_indices[0]
first_line = lines[first_idx].strip()
# STRATEGY 1: Docstring (double or single quotes)
if first_line.startswith('"""') or first_line.startswith("'''"):
full_text = "\n".join(lines)
strip_char = first_line[:3]
start_loc = full_text.find(strip_char)
end_loc = full_text.rfind(strip_char)
if start_loc != -1 and end_loc != -1 and start_loc != end_loc:
# content is between the quotes
content = full_text[start_loc+3 : end_loc]
# Helper: if the docstring was like:
# """
# Text
# """
# The splitlines will result in ["", "Text", ""].
# We might want to trim that specific first/last newline for cleanliness.
content_lines = content.splitlines()
if content_lines and not content_lines[0].strip():
content_lines.pop(0)
if content_lines and not content_lines[-1].strip():
content_lines.pop()
return content_lines
elif start_loc != -1:
content = full_text[start_loc+3:]
return content.splitlines()
# STRATEGY 2: Standard Comments
cleaned = []
for line in lines:
stripped = line.lstrip()
if stripped.startswith("#"):
# Remove the first # and optionally one space
content = stripped[1:]
if content.startswith(" "):
content = content[1:]
cleaned.append(content)
else:
# Pass through non-comment lines (likely empty lines or raw text)
cleaned.append(line)
return cleaned
def convert_to_ipynb(filepath):
output_path = os.path.splitext(filepath)[0] + ".ipynb"
print(f"Converting {filepath} -> {output_path}")
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
print(f"Error reading {filepath}: {e}")
return
cells = parse_py_file(content)
# Use current interpreter version for metadata
py_version = platform.python_version()
notebook = {
"cells": cells,
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {"name": "ipython", "version": 3},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": py_version
}
},
"nbformat": 4,
"nbformat_minor": 5
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(notebook, f, indent=1, ensure_ascii=False)
f.write("\n")
def main():
args = parse_args()
for file_path in args.files:
if not os.path.exists(file_path):
print(f"File not found: {file_path}")
continue
convert_to_ipynb(file_path)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment