Skip to content

Instantly share code, notes, and snippets.

@BjornFJohansson
Created November 4, 2025 05:30
Show Gist options
  • Select an option

  • Save BjornFJohansson/31188a11a7f365ca5293e0e37dfb6347 to your computer and use it in GitHub Desktop.

Select an option

Save BjornFJohansson/31188a11a7f365ca5293e0e37dfb6347 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Clean underscored aliases while preserving original line breaks/spacing.
Rules
-----
1) from X import Name as _Name -> from X import Name (and _Name → Name)
2) import pkg as _pkg -> import pkg (and _pkg → pkg)
3) import networkx as _nx -> import networkx as nx (and _nx → nx)
Outputs: <original>_clean.py
"""
from __future__ import annotations
from pathlib import Path
import re
import sys
from typing import Dict, List, Tuple
# Types
Plan = Dict[str, str] # alias -> replacement name
FromBlock = Tuple[str, str, str, str] # (full_match, module, stuff, suffix)
# --- helpers that DO NOT touch newlines ---
def _normalize_dots_segment(s: str) -> str:
"""Only remove spaces/tabs around '.' (never newlines)."""
return re.sub(r"[ \t]*\.[ \t]*", ".", s)
def _split_top_level_commas(s: str) -> List[str]:
parts, depth, buf = [], 0, []
for ch in s:
if ch == "(":
depth += 1
elif ch == ")":
depth = max(0, depth - 1)
if ch == "," and depth == 0:
seg = "".join(buf).strip()
if seg:
parts.append(seg)
buf = []
else:
buf.append(ch)
tail = "".join(buf).strip()
if tail:
parts.append(tail)
return parts
# --- collect plan ---
FROM_RE = re.compile(
r"(^[ \t]*from[ \t]+(?P<mod>[^\s]+)[ \t]+import[ \t]+(?P<stuff>.*?)"
r")(?=(?<!\\)\n(?![ \t])|\Z)",
re.DOTALL | re.MULTILINE,
)
IMPORT_RE = re.compile(
r"(^[ \t]*import[ \t]+(?P<tail>.*?))(?=(?<!\\)\n(?![ \t])|\Z)",
re.DOTALL | re.MULTILINE,
)
def _extract_from_blocks(code: str) -> List[FromBlock]:
blocks: List[FromBlock] = []
for m in FROM_RE.finditer(code):
full = m.group(1)
mod = m.group("mod")
stuff = m.group("stuff")
# capture the suffix (text right after match up to the boundary) to keep exact newlines
end = m.end()
next_boundary = end
blocks.append((full, mod, stuff, code[end:next_boundary]))
return blocks
def collect_plan(code: str) -> Plan:
plan: Plan = {}
# FROM blocks
for m in FROM_RE.finditer(code):
stuff = _normalize_dots_segment(m.group("stuff"))
inner = stuff.strip()
if inner.startswith("(") and inner.endswith(")"):
inner = inner[1:-1]
for item in _split_top_level_commas(inner):
a = re.match(r"^(\w+)[ \t]+as[ \t]+([A-Za-z_]\w*)$", item)
if not a:
continue
name, alias = a.group(1), a.group(2)
if alias == f"_{name}":
plan[alias] = name
elif alias.startswith("_"):
plan[alias] = alias.lstrip("_")
# IMPORT lines
for m in IMPORT_RE.finditer(code):
tail = _normalize_dots_segment(m.group("tail"))
for part in _split_top_level_commas(tail):
a = re.match(r"^([A-Za-z_][\w\.]*)[ \t]+as[ \t]+([A-Za-z_]\w*)$", part)
if not a:
continue
full, alias = a.group(1), a.group(2)
base = full.split(".")[-1]
if alias == f"_{base}":
plan[alias] = base
elif alias.startswith("_"):
plan[alias] = alias.lstrip("_")
return plan
# --- rewrite imports (preserve original newlines/spacing) ---
def rewrite_from_blocks(code: str, plan: Plan) -> str:
def repl(m: re.Match) -> str:
head = m.group(0) # full matched block (without trailing boundary)
mod = m.group("mod")
stuff = _normalize_dots_segment(m.group("stuff"))
had_paren = stuff.strip().startswith("(") and stuff.strip().endswith(")")
inner = stuff.strip()[1:-1] if had_paren else stuff
parts = _split_top_level_commas(inner)
new_parts: List[str] = []
for p in parts:
a = re.match(r"^(\w+)[ \t]+as[ \t]+([A-Za-z_]\w*)$", p)
if not a:
new_parts.append(p)
continue
name, alias = a.group(1), a.group(2)
if alias == f"_{name}":
new_parts.append(name) # drop alias
elif alias in plan and plan[alias] != alias:
new_parts.append(f"{name} as {plan[alias]}") # _nx -> nx
else:
new_parts.append(p)
inner_new = ", ".join(new_parts)
if had_paren:
inner_new = f"({inner_new})"
# rebuild with the original leading whitespace preserved from head
prefix_ws = re.match(r"^[ \t]*", head).group(0)
return f"{prefix_ws}from {mod} import {inner_new}"
return FROM_RE.sub(repl, code)
def rewrite_import_lines(code: str, plan: Plan) -> str:
def repl(m: re.Match) -> str:
head = m.group(0)
tail = _normalize_dots_segment(m.group("tail"))
parts = _split_top_level_commas(tail)
new_parts: List[str] = []
for p in parts:
a = re.match(r"^([A-Za-z_][\w\.]*)[ \t]+as[ \t]+([A-Za-z_]\w*)$", p)
if not a:
new_parts.append(p)
continue
full, alias = a.group(1), a.group(2)
base = full.split(".")[-1]
if alias == f"_{base}":
new_parts.append(full) # drop alias
elif alias in plan and plan[alias] != alias:
new_parts.append(f"{full} as {plan[alias]}") # _nx -> nx
else:
new_parts.append(p)
prefix_ws = re.match(r"^[ \t]*", head).group(0)
return f"{prefix_ws}import {', '.join(new_parts)}"
return IMPORT_RE.sub(repl, code)
# --- replace usages (leave line breaks untouched) ---
def replace_usages(code: str, plan: Plan) -> str:
# Longest alias first to avoid partial overlaps
for alias, new in sorted(plan.items(), key=lambda kv: -len(kv[0])):
code = re.sub(rf"\b{re.escape(alias)}\b", new, code)
return code
# --- end-to-end ---
def transform(code: str) -> str:
plan = collect_plan(code)
if not plan:
return code
code2 = rewrite_from_blocks(code, plan)
code3 = rewrite_import_lines(code2, plan)
code4 = replace_usages(code3, plan)
return code4
def process_path(p: Path) -> Path | None:
src = p.read_text(encoding="utf-8")
out = transform(src)
if out == src:
print(f"— No changes for {p.name}")
return None
dst = p.with_name(p.stem + "_clean.py")
dst.write_text(out, encoding="utf-8")
print(f"✔ Wrote {dst.name}")
return dst
def main():
if len(sys.argv) < 2:
print("Usage: python clean_aliases_preserve_lines.py <file-or-directory>")
sys.exit(1)
target = Path(sys.argv[1])
if target.is_file() and target.suffix == ".py":
process_path(target)
else:
for f in target.rglob("*.py"):
process_path(f)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment