BjornFJohansson · November 4, 2025 05:30
diff --git a/remove_import_aliases_in_pydna.py b/remove_import_aliases_in_pydna.py
 #!/usr/bin/env python3
 """
 Clean underscored aliases while preserving original line breaks/spacing.

 Rules
 -----
 1) from X import Name as _Name  -> from X import Name        (and _Name → Name)
 2) import pkg as _pkg           -> import pkg                (and _pkg → pkg)
 3) import networkx as _nx       -> import networkx as nx     (and _nx → nx)

 Outputs: <original>_clean.py
 """

 from __future__ import annotations
 from pathlib import Path
 import re
 import sys
 from typing import Dict, List, Tuple

 # Types
 Plan = Dict[str, str]                  # alias -> replacement name
 FromBlock = Tuple[str, str, str, str]  # (full_match, module, stuff, suffix)

 # --- helpers that DO NOT touch newlines ---

 def _normalize_dots_segment(s: str) -> str:
    """Only remove spaces/tabs around '.' (never newlines)."""
    return re.sub(r"[ \t]*\.[ \t]*", ".", s)

 def _split_top_level_commas(s: str) -> List[str]:
    parts, depth, buf = [], 0, []
    for ch in s:
        if ch == "(":
            depth += 1
        elif ch == ")":
            depth = max(0, depth - 1)
        if ch == "," and depth == 0:
            seg = "".join(buf).strip()
            if seg:
                parts.append(seg)
            buf = []
        else:
            buf.append(ch)
    tail = "".join(buf).strip()
    if tail:
        parts.append(tail)
    return parts

 # --- collect plan ---

 FROM_RE = re.compile(
    r"(^[ \t]*from[ \t]+(?P<mod>[^\s]+)[ \t]+import[ \t]+(?P<stuff>.*?)"
    r")(?=(?<!\\)\n(?![ \t])|\Z)",
    re.DOTALL | re.MULTILINE,
 )

 IMPORT_RE = re.compile(
    r"(^[ \t]*import[ \t]+(?P<tail>.*?))(?=(?<!\\)\n(?![ \t])|\Z)",
    re.DOTALL | re.MULTILINE,
 )

 def _extract_from_blocks(code: str) -> List[FromBlock]:
    blocks: List[FromBlock] = []
    for m in FROM_RE.finditer(code):
        full = m.group(1)
        mod = m.group("mod")
        stuff = m.group("stuff")
        # capture the suffix (text right after match up to the boundary) to keep exact newlines
        end = m.end()
        next_boundary = end
        blocks.append((full, mod, stuff, code[end:next_boundary]))
    return blocks

 def collect_plan(code: str) -> Plan:
    plan: Plan = {}

    # FROM blocks
    for m in FROM_RE.finditer(code):
        stuff = _normalize_dots_segment(m.group("stuff"))
        inner = stuff.strip()
        if inner.startswith("(") and inner.endswith(")"):
            inner = inner[1:-1]
        for item in _split_top_level_commas(inner):
            a = re.match(r"^(\w+)[ \t]+as[ \t]+([A-Za-z_]\w*)$", item)
            if not a:
                continue
            name, alias = a.group(1), a.group(2)
            if alias == f"_{name}":
                plan[alias] = name
            elif alias.startswith("_"):
                plan[alias] = alias.lstrip("_")

    # IMPORT lines
    for m in IMPORT_RE.finditer(code):
        tail = _normalize_dots_segment(m.group("tail"))
        for part in _split_top_level_commas(tail):
            a = re.match(r"^([A-Za-z_][\w\.]*)[ \t]+as[ \t]+([A-Za-z_]\w*)$", part)
            if not a:
                continue
            full, alias = a.group(1), a.group(2)
            base = full.split(".")[-1]
            if alias == f"_{base}":
                plan[alias] = base
            elif alias.startswith("_"):
                plan[alias] = alias.lstrip("_")

    return plan

 # --- rewrite imports (preserve original newlines/spacing) ---

 def rewrite_from_blocks(code: str, plan: Plan) -> str:
    def repl(m: re.Match) -> str:
        head = m.group(0)  # full matched block (without trailing boundary)
        mod = m.group("mod")
        stuff = _normalize_dots_segment(m.group("stuff"))

        had_paren = stuff.strip().startswith("(") and stuff.strip().endswith(")")
        inner = stuff.strip()[1:-1] if had_paren else stuff

        parts = _split_top_level_commas(inner)
        new_parts: List[str] = []
        for p in parts:
            a = re.match(r"^(\w+)[ \t]+as[ \t]+([A-Za-z_]\w*)$", p)
            if not a:
                new_parts.append(p)
                continue
            name, alias = a.group(1), a.group(2)
            if alias == f"_{name}":
                new_parts.append(name)  # drop alias
            elif alias in plan and plan[alias] != alias:
                new_parts.append(f"{name} as {plan[alias]}")  # _nx -> nx
            else:
                new_parts.append(p)

        inner_new = ", ".join(new_parts)
        if had_paren:
            inner_new = f"({inner_new})"

        # rebuild with the original leading whitespace preserved from head
        prefix_ws = re.match(r"^[ \t]*", head).group(0)
        return f"{prefix_ws}from {mod} import {inner_new}"
    return FROM_RE.sub(repl, code)

 def rewrite_import_lines(code: str, plan: Plan) -> str:
    def repl(m: re.Match) -> str:
        head = m.group(0)
        tail = _normalize_dots_segment(m.group("tail"))
        parts = _split_top_level_commas(tail)
        new_parts: List[str] = []
        for p in parts:
            a = re.match(r"^([A-Za-z_][\w\.]*)[ \t]+as[ \t]+([A-Za-z_]\w*)$", p)
            if not a:
                new_parts.append(p)
                continue
            full, alias = a.group(1), a.group(2)
            base = full.split(".")[-1]
            if alias == f"_{base}":
                new_parts.append(full)  # drop alias
            elif alias in plan and plan[alias] != alias:
                new_parts.append(f"{full} as {plan[alias]}")  # _nx -> nx
            else:
                new_parts.append(p)

        prefix_ws = re.match(r"^[ \t]*", head).group(0)
        return f"{prefix_ws}import {', '.join(new_parts)}"
    return IMPORT_RE.sub(repl, code)

 # --- replace usages (leave line breaks untouched) ---

 def replace_usages(code: str, plan: Plan) -> str:
    # Longest alias first to avoid partial overlaps
    for alias, new in sorted(plan.items(), key=lambda kv: -len(kv[0])):
        code = re.sub(rf"\b{re.escape(alias)}\b", new, code)
    return code

 # --- end-to-end ---

 def transform(code: str) -> str:
    plan = collect_plan(code)
    if not plan:
        return code
    code2 = rewrite_from_blocks(code, plan)
    code3 = rewrite_import_lines(code2, plan)
    code4 = replace_usages(code3, plan)
    return code4

 def process_path(p: Path) -> Path | None:
    src = p.read_text(encoding="utf-8")
    out = transform(src)
    if out == src:
        print(f"— No changes for {p.name}")
        return None
    dst = p.with_name(p.stem + "_clean.py")
    dst.write_text(out, encoding="utf-8")
    print(f"✔ Wrote {dst.name}")
    return dst

 def main():
    if len(sys.argv) < 2:
        print("Usage: python clean_aliases_preserve_lines.py <file-or-directory>")
        sys.exit(1)
    target = Path(sys.argv[1])
    if target.is_file() and target.suffix == ".py":
        process_path(target)
    else:
        for f in target.rglob("*.py"):
            process_path(f)

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Clean underscored aliases while preserving original line breaks/spacing.

	Rules
	-----
	1) from X import Name as _Name -> from X import Name (and _Name → Name)
	2) import pkg as _pkg -> import pkg (and _pkg → pkg)
	3) import networkx as _nx -> import networkx as nx (and _nx → nx)

	Outputs: <original>_clean.py
	"""

	from __future__ import annotations
	from pathlib import Path
	import re
	import sys
	from typing import Dict, List, Tuple

	# Types
	Plan = Dict[str, str] # alias -> replacement name
	FromBlock = Tuple[str, str, str, str] # (full_match, module, stuff, suffix)

	# --- helpers that DO NOT touch newlines ---

	def _normalize_dots_segment(s: str) -> str:
	"""Only remove spaces/tabs around '.' (never newlines)."""
	return re.sub(r"[ \t]\.[ \t]", ".", s)

	def _split_top_level_commas(s: str) -> List[str]:
	parts, depth, buf = [], 0, []
	for ch in s:
	if ch == "(":
	depth += 1
	elif ch == ")":
	depth = max(0, depth - 1)
	if ch == "," and depth == 0:
	seg = "".join(buf).strip()
	if seg:
	parts.append(seg)
	buf = []
	else:
	buf.append(ch)
	tail = "".join(buf).strip()
	if tail:
	parts.append(tail)
	return parts

	# --- collect plan ---

	FROM_RE = re.compile(
	r"(^[ \t]from[ \t]+(?P<mod>[^\s]+)[ \t]+import[ \t]+(?P<stuff>.?)"
	r")(?=(?<!\\)\n(?![ \t])\|\Z)",
	re.DOTALL \| re.MULTILINE,
	)

	IMPORT_RE = re.compile(
	r"(^[ \t]import[ \t]+(?P<tail>.?))(?=(?<!\\)\n(?![ \t])\|\Z)",
	re.DOTALL \| re.MULTILINE,
	)

	def _extract_from_blocks(code: str) -> List[FromBlock]:
	blocks: List[FromBlock] = []
	for m in FROM_RE.finditer(code):
	full = m.group(1)
	mod = m.group("mod")
	stuff = m.group("stuff")
	# capture the suffix (text right after match up to the boundary) to keep exact newlines
	end = m.end()
	next_boundary = end
	blocks.append((full, mod, stuff, code[end:next_boundary]))
	return blocks

	def collect_plan(code: str) -> Plan:
	plan: Plan = {}

	# FROM blocks
	for m in FROM_RE.finditer(code):
	stuff = _normalize_dots_segment(m.group("stuff"))
	inner = stuff.strip()
	if inner.startswith("(") and inner.endswith(")"):
	inner = inner[1:-1]
	for item in _split_top_level_commas(inner):
	a = re.match(r"^(\w+)[ \t]+as[ \t]+([A-Za-z_]\w*)$", item)
	if not a:
	continue
	name, alias = a.group(1), a.group(2)
	if alias == f"_{name}":
	plan[alias] = name
	elif alias.startswith("_"):
	plan[alias] = alias.lstrip("_")

	# IMPORT lines
	for m in IMPORT_RE.finditer(code):
	tail = _normalize_dots_segment(m.group("tail"))
	for part in _split_top_level_commas(tail):
	a = re.match(r"^([A-Za-z_][\w\.])[ \t]+as[ \t]+([A-Za-z_]\w)$", part)
	if not a:
	continue
	full, alias = a.group(1), a.group(2)
	base = full.split(".")[-1]
	if alias == f"_{base}":
	plan[alias] = base
	elif alias.startswith("_"):
	plan[alias] = alias.lstrip("_")

	return plan

	# --- rewrite imports (preserve original newlines/spacing) ---

	def rewrite_from_blocks(code: str, plan: Plan) -> str:
	def repl(m: re.Match) -> str:
	head = m.group(0) # full matched block (without trailing boundary)
	mod = m.group("mod")
	stuff = _normalize_dots_segment(m.group("stuff"))

	had_paren = stuff.strip().startswith("(") and stuff.strip().endswith(")")
	inner = stuff.strip()[1:-1] if had_paren else stuff

	parts = _split_top_level_commas(inner)
	new_parts: List[str] = []
	for p in parts:
	a = re.match(r"^(\w+)[ \t]+as[ \t]+([A-Za-z_]\w*)$", p)
	if not a:
	new_parts.append(p)
	continue
	name, alias = a.group(1), a.group(2)
	if alias == f"_{name}":
	new_parts.append(name) # drop alias
	elif alias in plan and plan[alias] != alias:
	new_parts.append(f"{name} as {plan[alias]}") # _nx -> nx
	else:
	new_parts.append(p)

	inner_new = ", ".join(new_parts)
	if had_paren:
	inner_new = f"({inner_new})"

	# rebuild with the original leading whitespace preserved from head
	prefix_ws = re.match(r"^[ \t]*", head).group(0)
	return f"{prefix_ws}from {mod} import {inner_new}"
	return FROM_RE.sub(repl, code)

	def rewrite_import_lines(code: str, plan: Plan) -> str:
	def repl(m: re.Match) -> str:
	head = m.group(0)
	tail = _normalize_dots_segment(m.group("tail"))
	parts = _split_top_level_commas(tail)
	new_parts: List[str] = []
	for p in parts:
	a = re.match(r"^([A-Za-z_][\w\.])[ \t]+as[ \t]+([A-Za-z_]\w)$", p)
	if not a:
	new_parts.append(p)
	continue
	full, alias = a.group(1), a.group(2)
	base = full.split(".")[-1]
	if alias == f"_{base}":
	new_parts.append(full) # drop alias
	elif alias in plan and plan[alias] != alias:
	new_parts.append(f"{full} as {plan[alias]}") # _nx -> nx
	else:
	new_parts.append(p)

	prefix_ws = re.match(r"^[ \t]*", head).group(0)
	return f"{prefix_ws}import {', '.join(new_parts)}"
	return IMPORT_RE.sub(repl, code)

	# --- replace usages (leave line breaks untouched) ---

	def replace_usages(code: str, plan: Plan) -> str:
	# Longest alias first to avoid partial overlaps
	for alias, new in sorted(plan.items(), key=lambda kv: -len(kv[0])):
	code = re.sub(rf"\b{re.escape(alias)}\b", new, code)
	return code

	# --- end-to-end ---

	def transform(code: str) -> str:
	plan = collect_plan(code)
	if not plan:
	return code
	code2 = rewrite_from_blocks(code, plan)
	code3 = rewrite_import_lines(code2, plan)
	code4 = replace_usages(code3, plan)
	return code4

	def process_path(p: Path) -> Path \| None:
	src = p.read_text(encoding="utf-8")
	out = transform(src)
	if out == src:
	print(f"— No changes for {p.name}")
	return None
	dst = p.with_name(p.stem + "_clean.py")
	dst.write_text(out, encoding="utf-8")
	print(f"✔ Wrote {dst.name}")
	return dst

	def main():
	if len(sys.argv) < 2:
	print("Usage: python clean_aliases_preserve_lines.py <file-or-directory>")
	sys.exit(1)
	target = Path(sys.argv[1])
	if target.is_file() and target.suffix == ".py":
	process_path(target)
	else:
	for f in target.rglob("*.py"):
	process_path(f)

	if __name__ == "__main__":
	main()
No results found