Created
January 10, 2026 02:48
-
-
Save fanurs/046d3f461c30034e4f6c9e76e26359d0 to your computer and use it in GitHub Desktop.
Convert `.py` files with #%% into `.ipynb` files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import argparse | |
| import json | |
| import os | |
| import platform | |
| import re | |
| def parse_args(): | |
| parser = argparse.ArgumentParser( | |
| description="Convert Python scripts (.py) with #%% markers to Jupyter Notebooks (.ipynb)." | |
| ) | |
| parser.add_argument( | |
| "files", | |
| metavar="FILE", | |
| nargs="+", | |
| help="One or more .py files to convert" | |
| ) | |
| return parser.parse_args() | |
| def parse_py_file(content): | |
| """ | |
| Parses python content and returns a list of cells (dicts). | |
| """ | |
| # Regex to identify cell markers. | |
| # Matches: | |
| # 1. ^\s*# - Line starts with optional space and # | |
| # 2. \s*%% - Optional space and %% | |
| # 3. (?P<markdown>\s*\[markdown\])? - Optional [markdown] tag (captured) | |
| # 4. (?P<title>.*)$ - Any remaining text is the title/metadata | |
| cell_marker_pattern = re.compile(r"^\s*#\s*%%(?P<markdown>\s*\[markdown\])?(?P<title>.*)$", re.IGNORECASE) | |
| lines = content.splitlines() | |
| cells = [] | |
| # Defaults for the first block (before any marker is found) | |
| current_cell_type = "code" | |
| current_cell_source = [] | |
| for line in lines: | |
| match = cell_marker_pattern.match(line) | |
| if match: | |
| # FLUSH PREVIOUS CELL | |
| # Only add if it has content or if we want to preserve empty cells | |
| if current_cell_source or len(cells) > 0: | |
| cells.append(create_cell(current_cell_type, current_cell_source)) | |
| # START NEW CELL | |
| current_cell_source = [] | |
| is_markdown = bool(match.group("markdown")) | |
| current_cell_type = "markdown" if is_markdown else "code" | |
| else: | |
| current_cell_source.append(line) | |
| # Flush the final cell | |
| if current_cell_source or len(cells) > 0: | |
| cells.append(create_cell(current_cell_type, current_cell_source)) | |
| return cells | |
| def create_cell(cell_type, source_lines): | |
| """ | |
| Creates a notebook cell dictionary. | |
| Handles cleanup of markdown cells and trimming of code cells. | |
| """ | |
| cleaned_source = source_lines | |
| if cell_type == "markdown": | |
| cleaned_source = clean_markdown_source(source_lines) | |
| elif cell_type == "code": | |
| cleaned_source = trim_code_source(source_lines) | |
| # Convert list of lines to the format ipynb expects (lines with \n) | |
| final_source = [] | |
| for i, s in enumerate(cleaned_source): | |
| # Jupyter expects \n at the end of every line except optionally the last one. | |
| # We will add it to all except the very last line for consistency. | |
| if i < len(cleaned_source) - 1: | |
| final_source.append(s + "\n") | |
| else: | |
| final_source.append(s) | |
| cell = { | |
| "cell_type": cell_type, | |
| "metadata": {}, | |
| "source": final_source | |
| } | |
| if cell_type == "code": | |
| cell["outputs"] = [] | |
| cell["execution_count"] = None | |
| return cell | |
| def trim_code_source(lines): | |
| """ | |
| Removes trailing empty lines from code cells. | |
| """ | |
| # Create a copy to avoid modifying original reference if needed | |
| trimmed = list(lines) | |
| # Pop lines from the end as long as they are empty or whitespace only | |
| while trimmed and not trimmed[-1].strip(): | |
| trimmed.pop() | |
| return trimmed | |
| def clean_markdown_source(lines): | |
| """ | |
| Handles: | |
| 1. Docstring style (double or single quotes) | |
| 2. Standard commented style (# ) | |
| """ | |
| if not lines: | |
| return [] | |
| # Filter out empty lines to find the first actual content | |
| non_empty_indices = [i for i, line in enumerate(lines) if line.strip()] | |
| if not non_empty_indices: | |
| return [] # All empty | |
| first_idx = non_empty_indices[0] | |
| first_line = lines[first_idx].strip() | |
| # STRATEGY 1: Docstring (double or single quotes) | |
| if first_line.startswith('"""') or first_line.startswith("'''"): | |
| full_text = "\n".join(lines) | |
| strip_char = first_line[:3] | |
| start_loc = full_text.find(strip_char) | |
| end_loc = full_text.rfind(strip_char) | |
| if start_loc != -1 and end_loc != -1 and start_loc != end_loc: | |
| # content is between the quotes | |
| content = full_text[start_loc+3 : end_loc] | |
| # Helper: if the docstring was like: | |
| # """ | |
| # Text | |
| # """ | |
| # The splitlines will result in ["", "Text", ""]. | |
| # We might want to trim that specific first/last newline for cleanliness. | |
| content_lines = content.splitlines() | |
| if content_lines and not content_lines[0].strip(): | |
| content_lines.pop(0) | |
| if content_lines and not content_lines[-1].strip(): | |
| content_lines.pop() | |
| return content_lines | |
| elif start_loc != -1: | |
| content = full_text[start_loc+3:] | |
| return content.splitlines() | |
| # STRATEGY 2: Standard Comments | |
| cleaned = [] | |
| for line in lines: | |
| stripped = line.lstrip() | |
| if stripped.startswith("#"): | |
| # Remove the first # and optionally one space | |
| content = stripped[1:] | |
| if content.startswith(" "): | |
| content = content[1:] | |
| cleaned.append(content) | |
| else: | |
| # Pass through non-comment lines (likely empty lines or raw text) | |
| cleaned.append(line) | |
| return cleaned | |
| def convert_to_ipynb(filepath): | |
| output_path = os.path.splitext(filepath)[0] + ".ipynb" | |
| print(f"Converting {filepath} -> {output_path}") | |
| try: | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| except Exception as e: | |
| print(f"Error reading {filepath}: {e}") | |
| return | |
| cells = parse_py_file(content) | |
| # Use current interpreter version for metadata | |
| py_version = platform.python_version() | |
| notebook = { | |
| "cells": cells, | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": {"name": "ipython", "version": 3}, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": py_version | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| json.dump(notebook, f, indent=1, ensure_ascii=False) | |
| f.write("\n") | |
| def main(): | |
| args = parse_args() | |
| for file_path in args.files: | |
| if not os.path.exists(file_path): | |
| print(f"File not found: {file_path}") | |
| continue | |
| convert_to_ipynb(file_path) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment