fanurs · January 10, 2026 02:48
diff --git a/py_to_ipynb.py b/py_to_ipynb.py
 #!/usr/bin/env python3
 import argparse
 import json
 import os
 import platform
 import re

 def parse_args():
    parser = argparse.ArgumentParser(
        description="Convert Python scripts (.py) with #%% markers to Jupyter Notebooks (.ipynb)."
    )
    parser.add_argument(
        "files", 
        metavar="FILE", 
        nargs="+", 
        help="One or more .py files to convert"
    )
    return parser.parse_args()

 def parse_py_file(content):
    """
    Parses python content and returns a list of cells (dicts).
    """
    # Regex to identify cell markers.
    # Matches:
    # 1. ^\s*# - Line starts with optional space and #
    # 2. \s*%% - Optional space and %%
    # 3. (?P<markdown>\s*\[markdown\])? - Optional [markdown] tag (captured)
    # 4. (?P<title>.*)$ - Any remaining text is the title/metadata
    cell_marker_pattern = re.compile(r"^\s*#\s*%%(?P<markdown>\s*\[markdown\])?(?P<title>.*)$", re.IGNORECASE)

    lines = content.splitlines()
    cells = []
    
    # Defaults for the first block (before any marker is found)
    current_cell_type = "code"
    current_cell_source = []
    
    for line in lines:
        match = cell_marker_pattern.match(line)
        
        if match:
            # FLUSH PREVIOUS CELL
            # Only add if it has content or if we want to preserve empty cells
            if current_cell_source or len(cells) > 0:
                cells.append(create_cell(current_cell_type, current_cell_source))
            
            # START NEW CELL
            current_cell_source = []
            is_markdown = bool(match.group("markdown"))
            current_cell_type = "markdown" if is_markdown else "code"
        else:
            current_cell_source.append(line)
            
    # Flush the final cell
    if current_cell_source or len(cells) > 0:
        cells.append(create_cell(current_cell_type, current_cell_source))

    return cells

 def create_cell(cell_type, source_lines):
    """
    Creates a notebook cell dictionary.
    Handles cleanup of markdown cells and trimming of code cells.
    """
    cleaned_source = source_lines
    
    if cell_type == "markdown":
        cleaned_source = clean_markdown_source(source_lines)
    elif cell_type == "code":
        cleaned_source = trim_code_source(source_lines)

    # Convert list of lines to the format ipynb expects (lines with \n)
    final_source = []
    for i, s in enumerate(cleaned_source):
        # Jupyter expects \n at the end of every line except optionally the last one.
        # We will add it to all except the very last line for consistency.
        if i < len(cleaned_source) - 1:
            final_source.append(s + "\n")
        else:
            final_source.append(s)

    cell = {
        "cell_type": cell_type,
        "metadata": {},
        "source": final_source
    }
    
    if cell_type == "code":
        cell["outputs"] = []
        cell["execution_count"] = None
        
    return cell

 def trim_code_source(lines):
    """
    Removes trailing empty lines from code cells.
    """
    # Create a copy to avoid modifying original reference if needed
    trimmed = list(lines)
    
    # Pop lines from the end as long as they are empty or whitespace only
    while trimmed and not trimmed[-1].strip():
        trimmed.pop()
        
    return trimmed

 def clean_markdown_source(lines):
    """
    Handles:
    1. Docstring style (double or single quotes)
    2. Standard commented style (# )
    """
    if not lines:
        return []

    # Filter out empty lines to find the first actual content
    non_empty_indices = [i for i, line in enumerate(lines) if line.strip()]
    if not non_empty_indices:
        return [] # All empty
    
    first_idx = non_empty_indices[0]
    first_line = lines[first_idx].strip()

    # STRATEGY 1: Docstring (double or single quotes)
    if first_line.startswith('"""') or first_line.startswith("'''"):
        full_text = "\n".join(lines)
        strip_char = first_line[:3] 
        
        start_loc = full_text.find(strip_char)
        end_loc = full_text.rfind(strip_char)
        
        if start_loc != -1 and end_loc != -1 and start_loc != end_loc:
            # content is between the quotes
            content = full_text[start_loc+3 : end_loc]
            
            # Helper: if the docstring was like:
            # """
            # Text
            # """
            # The splitlines will result in ["", "Text", ""]. 
            # We might want to trim that specific first/last newline for cleanliness.
            content_lines = content.splitlines()
            if content_lines and not content_lines[0].strip():
                content_lines.pop(0)
            if content_lines and not content_lines[-1].strip():
                content_lines.pop()
            return content_lines
            
        elif start_loc != -1:
            content = full_text[start_loc+3:]
            return content.splitlines()

    # STRATEGY 2: Standard Comments
    cleaned = []
    for line in lines:
        stripped = line.lstrip()
        if stripped.startswith("#"):
            # Remove the first # and optionally one space
            content = stripped[1:] 
            if content.startswith(" "):
                content = content[1:] 
            cleaned.append(content)
        else:
            # Pass through non-comment lines (likely empty lines or raw text)
            cleaned.append(line)
            
    return cleaned

 def convert_to_ipynb(filepath):
    output_path = os.path.splitext(filepath)[0] + ".ipynb"
    print(f"Converting {filepath} -> {output_path}")
    
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()
    except Exception as e:
        print(f"Error reading {filepath}: {e}")
        return

    cells = parse_py_file(content)

    # Use current interpreter version for metadata
    py_version = platform.python_version()

    notebook = {
        "cells": cells,
        "metadata": {
            "kernelspec": {
                "display_name": "Python 3",
                "language": "python",
                "name": "python3"
            },
            "language_info": {
                "codemirror_mode": {"name": "ipython", "version": 3},
                "file_extension": ".py",
                "mimetype": "text/x-python",
                "name": "python",
                "nbconvert_exporter": "python",
                "pygments_lexer": "ipython3",
                "version": py_version
            }
        },
        "nbformat": 4,
        "nbformat_minor": 5
    }
    
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(notebook, f, indent=1, ensure_ascii=False)
        f.write("\n")

 def main():
    args = parse_args()
    for file_path in args.files:
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue
        convert_to_ipynb(file_path)

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	import argparse
	import json
	import os
	import platform
	import re

	def parse_args():
	parser = argparse.ArgumentParser(
	description="Convert Python scripts (.py) with #%% markers to Jupyter Notebooks (.ipynb)."
	)
	parser.add_argument(
	"files",
	metavar="FILE",
	nargs="+",
	help="One or more .py files to convert"
	)
	return parser.parse_args()

	def parse_py_file(content):
	"""
	Parses python content and returns a list of cells (dicts).
	"""
	# Regex to identify cell markers.
	# Matches:
	# 1. ^\s*# - Line starts with optional space and #
	# 2. \s*%% - Optional space and %%
	# 3. (?P<markdown>\s*\[markdown\])? - Optional [markdown] tag (captured)
	# 4. (?P<title>.*)$ - Any remaining text is the title/metadata
	cell_marker_pattern = re.compile(r"^\s#\s%%(?P<markdown>\s\[markdown\])?(?P<title>.)$", re.IGNORECASE)

	lines = content.splitlines()
	cells = []

	# Defaults for the first block (before any marker is found)
	current_cell_type = "code"
	current_cell_source = []

	for line in lines:
	match = cell_marker_pattern.match(line)

	if match:
	# FLUSH PREVIOUS CELL
	# Only add if it has content or if we want to preserve empty cells
	if current_cell_source or len(cells) > 0:
	cells.append(create_cell(current_cell_type, current_cell_source))

	# START NEW CELL
	current_cell_source = []
	is_markdown = bool(match.group("markdown"))
	current_cell_type = "markdown" if is_markdown else "code"
	else:
	current_cell_source.append(line)

	# Flush the final cell
	if current_cell_source or len(cells) > 0:
	cells.append(create_cell(current_cell_type, current_cell_source))

	return cells

	def create_cell(cell_type, source_lines):
	"""
	Creates a notebook cell dictionary.
	Handles cleanup of markdown cells and trimming of code cells.
	"""
	cleaned_source = source_lines

	if cell_type == "markdown":
	cleaned_source = clean_markdown_source(source_lines)
	elif cell_type == "code":
	cleaned_source = trim_code_source(source_lines)

	# Convert list of lines to the format ipynb expects (lines with \n)
	final_source = []
	for i, s in enumerate(cleaned_source):
	# Jupyter expects \n at the end of every line except optionally the last one.
	# We will add it to all except the very last line for consistency.
	if i < len(cleaned_source) - 1:
	final_source.append(s + "\n")
	else:
	final_source.append(s)

	cell = {
	"cell_type": cell_type,
	"metadata": {},
	"source": final_source
	}

	if cell_type == "code":
	cell["outputs"] = []
	cell["execution_count"] = None

	return cell

	def trim_code_source(lines):
	"""
	Removes trailing empty lines from code cells.
	"""
	# Create a copy to avoid modifying original reference if needed
	trimmed = list(lines)

	# Pop lines from the end as long as they are empty or whitespace only
	while trimmed and not trimmed[-1].strip():
	trimmed.pop()

	return trimmed

	def clean_markdown_source(lines):
	"""
	Handles:
	1. Docstring style (double or single quotes)
	2. Standard commented style (# )
	"""
	if not lines:
	return []

	# Filter out empty lines to find the first actual content
	non_empty_indices = [i for i, line in enumerate(lines) if line.strip()]
	if not non_empty_indices:
	return [] # All empty

	first_idx = non_empty_indices[0]
	first_line = lines[first_idx].strip()

	# STRATEGY 1: Docstring (double or single quotes)
	if first_line.startswith('"""') or first_line.startswith("'''"):
	full_text = "\n".join(lines)
	strip_char = first_line[:3]

	start_loc = full_text.find(strip_char)
	end_loc = full_text.rfind(strip_char)

	if start_loc != -1 and end_loc != -1 and start_loc != end_loc:
	# content is between the quotes
	content = full_text[start_loc+3 : end_loc]

	# Helper: if the docstring was like:
	# """
	# Text
	# """
	# The splitlines will result in ["", "Text", ""].
	# We might want to trim that specific first/last newline for cleanliness.
	content_lines = content.splitlines()
	if content_lines and not content_lines[0].strip():
	content_lines.pop(0)
	if content_lines and not content_lines[-1].strip():
	content_lines.pop()
	return content_lines

	elif start_loc != -1:
	content = full_text[start_loc+3:]
	return content.splitlines()

	# STRATEGY 2: Standard Comments
	cleaned = []
	for line in lines:
	stripped = line.lstrip()
	if stripped.startswith("#"):
	# Remove the first # and optionally one space
	content = stripped[1:]
	if content.startswith(" "):
	content = content[1:]
	cleaned.append(content)
	else:
	# Pass through non-comment lines (likely empty lines or raw text)
	cleaned.append(line)

	return cleaned

	def convert_to_ipynb(filepath):
	output_path = os.path.splitext(filepath)[0] + ".ipynb"
	print(f"Converting {filepath} -> {output_path}")

	try:
	with open(filepath, 'r', encoding='utf-8') as f:
	content = f.read()
	except Exception as e:
	print(f"Error reading {filepath}: {e}")
	return

	cells = parse_py_file(content)

	# Use current interpreter version for metadata
	py_version = platform.python_version()

	notebook = {
	"cells": cells,
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {"name": "ipython", "version": 3},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": py_version
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}

	with open(output_path, 'w', encoding='utf-8') as f:
	json.dump(notebook, f, indent=1, ensure_ascii=False)
	f.write("\n")

	def main():
	args = parse_args()
	for file_path in args.files:
	if not os.path.exists(file_path):
	print(f"File not found: {file_path}")
	continue
	convert_to_ipynb(file_path)

	if __name__ == "__main__":
	main()
No results found