Created
March 13, 2026 04:19
-
-
Save josephbolus/7b4485a1321b947dfb26ab9e826a23c9 to your computer and use it in GitHub Desktop.
Parser-based Markdown -> Confluence wiki markup converter.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| md_to_confluence_ast.py | |
| Parser-based Markdown -> Confluence wiki markup converter. | |
| Install: | |
| pip install markdown-it-py mdit-py-plugins | |
| Usage: | |
| python md_to_confluence_ast.py input.md | |
| python md_to_confluence_ast.py input.md -o output.txt | |
| cat input.md | python md_to_confluence_ast.py | |
| """ | |
| import argparse | |
| import html | |
| import sys | |
| from typing import List, Optional | |
| from markdown_it import MarkdownIt | |
| from markdown_it.tree import SyntaxTreeNode | |
| def normalize_language(lang: str) -> str: | |
| lang = (lang or "").strip().lower() | |
| mapping = { | |
| "py": "python", | |
| "python3": "python", | |
| "js": "javascript", | |
| "node": "javascript", | |
| "ts": "typescript", | |
| "sh": "bash", | |
| "shell": "bash", | |
| "zsh": "bash", | |
| "yml": "yaml", | |
| "yaml": "yaml", | |
| "java": "java", | |
| "jdbc": "java", | |
| "sql": "sql", | |
| "mysql": "sql", | |
| "postgres": "sql", | |
| "postgresql": "sql", | |
| "sqlite": "sql", | |
| "mssql": "sql", | |
| "tsql": "sql", | |
| "plsql": "sql", | |
| } | |
| return mapping.get(lang, lang or "none") | |
| class ConfluenceRenderer: | |
| def __init__(self) -> None: | |
| self.md = MarkdownIt("gfm-like", {"linkify": False}) | |
| def render(self, markdown_text: str) -> str: | |
| tokens = self.md.parse(markdown_text) | |
| tree = SyntaxTreeNode(tokens) | |
| parts = self.render_children(tree.children or [], indent=0) | |
| text = "".join(parts) | |
| return self.cleanup(text) | |
| def cleanup(self, text: str) -> str: | |
| lines = text.splitlines() | |
| cleaned: List[str] = [] | |
| blank_run = 0 | |
| for line in lines: | |
| if line.strip() == "": | |
| blank_run += 1 | |
| if blank_run <= 1: | |
| cleaned.append("") | |
| else: | |
| blank_run = 0 | |
| cleaned.append(line.rstrip()) | |
| return "\n".join(cleaned).strip() + "\n" | |
| def render_children(self, nodes: List[SyntaxTreeNode], indent: int = 0) -> List[str]: | |
| out: List[str] = [] | |
| for node in nodes: | |
| out.append(self.render_node(node, indent)) | |
| return out | |
| def render_node(self, node: SyntaxTreeNode, indent: int = 0) -> str: | |
| t = node.type | |
| if t == "root": | |
| return "".join(self.render_children(node.children or [], indent)) | |
| if t == "paragraph": | |
| return self.render_inline_container(node).strip() + "\n\n" | |
| if t == "inline": | |
| return self.render_inlines(node.children or []) | |
| if t == "text": | |
| return node.content or "" | |
| if t == "softbreak": | |
| return "\n" | |
| if t == "hardbreak": | |
| return " \\\\\n" | |
| if t == "heading": | |
| level = 1 | |
| if node.tag and node.tag.startswith("h"): | |
| try: | |
| level = min(max(int(node.tag[1:]), 1), 6) | |
| except ValueError: | |
| level = 1 | |
| body = self.render_inline_container(node).strip() | |
| return f"h{level}. {body}\n\n" | |
| if t == "bullet_list": | |
| return "".join(self.render_children(node.children or [], indent + 1)) | |
| if t == "ordered_list": | |
| return "".join(self.render_children(node.children or [], -(abs(indent) + 1))) | |
| if t == "list_item": | |
| marker = "*" * max(indent, 1) if indent > 0 else "#" * max(abs(indent), 1) | |
| body = self.render_list_item(node, indent) | |
| return f"{marker} {body}\n" | |
| if t == "blockquote": | |
| body = self.flatten_block_children(node).strip().splitlines() | |
| return "".join( | |
| f"bq. {line}\n" if line.strip() else "bq. \n" | |
| for line in body | |
| ) + "\n" | |
| if t == "fence": | |
| lang = normalize_language(node.info or "") | |
| content = node.content or "" | |
| if lang == "none": | |
| return f"{{code}}\n{content}{{code}}\n\n" | |
| return f"{{code:language={lang}}}\n{content}{{code}}\n\n" | |
| if t == "code_block": | |
| content = node.content or "" | |
| return f"{{code}}\n{content}{{code}}\n\n" | |
| if t == "hr": | |
| return "----\n\n" | |
| if t == "table": | |
| return self.render_table(node) + "\n" | |
| if t == "html_block": | |
| return (node.content or "") + "\n\n" | |
| if t == "image": | |
| src = self.attr(node, "src") or "" | |
| alt = self.attr(node, "alt") or "" | |
| return f"!{src}|alt={alt}!" | |
| if t == "code_inline": | |
| return "{{" + (node.content or "") + "}}" | |
| if t == "strong": | |
| return "*" + self.render_inlines(node.children or []) + "*" | |
| if t == "em": | |
| return "_" + self.render_inlines(node.children or []) + "_" | |
| if t == "s": | |
| return "-" + self.render_inlines(node.children or []) + "-" | |
| if t == "link": | |
| href = self.attr(node, "href") or "" | |
| label = self.render_inlines(node.children or []).strip() or href | |
| return f"[{label}|{href}]" | |
| if t == "html_inline": | |
| return node.content or "" | |
| if node.children: | |
| return self.render_inlines(node.children) | |
| return node.content or "" | |
| def render_inline_container(self, node: SyntaxTreeNode) -> str: | |
| parts: List[str] = [] | |
| for child in node.children or []: | |
| if child.type == "inline": | |
| parts.append(self.render_inlines(child.children or [])) | |
| else: | |
| parts.append(self.render_node(child)) | |
| return "".join(parts) | |
| def render_inlines(self, nodes: List[SyntaxTreeNode]) -> str: | |
| return "".join(self.render_node(n) for n in nodes) | |
| def convert_task_marker(self, text: str) -> str: | |
| text = text.strip() | |
| if text.startswith("[ ] "): | |
| return "☐ " + text[4:] | |
| if text.startswith("[x] ") or text.startswith("[X] "): | |
| return "☑ " + text[4:] | |
| return text | |
| def render_list_item(self, node: SyntaxTreeNode, indent: int) -> str: | |
| chunks: List[str] = [] | |
| nested: List[str] = [] | |
| for child in node.children or []: | |
| if child.type == "paragraph": | |
| text = self.render_inline_container(child).strip() | |
| text = self.convert_task_marker(text) | |
| chunks.append(text) | |
| elif child.type == "bullet_list": | |
| nested.append( | |
| "\n" + "".join( | |
| self.render_children(child.children or [], abs(indent) + 1) | |
| ).rstrip() | |
| ) | |
| elif child.type == "ordered_list": | |
| nested.append( | |
| "\n" + "".join( | |
| self.render_children(child.children or [], -(abs(indent) + 1)) | |
| ).rstrip() | |
| ) | |
| else: | |
| rendered = self.render_node(child, indent) | |
| if rendered.strip(): | |
| chunks.append(rendered.strip()) | |
| main = " ".join(c for c in chunks if c).strip() | |
| if nested: | |
| return main + "".join(nested) | |
| return main | |
| def flatten_block_children(self, node: SyntaxTreeNode) -> str: | |
| parts: List[str] = [] | |
| for child in node.children or []: | |
| if child.type == "paragraph": | |
| parts.append(self.render_inline_container(child).strip()) | |
| else: | |
| text = self.render_node(child).strip() | |
| if text: | |
| parts.append(text) | |
| return "\n".join(parts) | |
| def render_table(self, node: SyntaxTreeNode) -> str: | |
| rows: List[str] = [] | |
| for child in node.children or []: | |
| if child.type == "thead": | |
| for tr in child.children or []: | |
| rows.append(self.render_table_row(tr, header=True)) | |
| elif child.type == "tbody": | |
| for tr in child.children or []: | |
| rows.append(self.render_table_row(tr, header=False)) | |
| elif child.type == "tr": | |
| rows.append(self.render_table_row(child, header=False)) | |
| return "\n".join(rows) + "\n" | |
| def render_table_row(self, tr: SyntaxTreeNode, header: bool) -> str: | |
| cells: List[str] = [] | |
| for cell in tr.children or []: | |
| text = self.render_inline_container(cell).strip() | |
| cells.append(text) | |
| if header: | |
| return "||" + "||".join(cells) + "||" | |
| return "|" + "|".join(cells) + "|" | |
| def attr(self, node: SyntaxTreeNode, name: str) -> Optional[str]: | |
| attrs = getattr(node, "attrs", None) or {} | |
| value = attrs.get(name) | |
| if value is None: | |
| return None | |
| return html.unescape(str(value)) | |
| def convert_markdown_to_confluence(markdown_text: str) -> str: | |
| return ConfluenceRenderer().render(markdown_text) | |
| def build_parser() -> argparse.ArgumentParser: | |
| parser = argparse.ArgumentParser( | |
| description="Convert Markdown to Confluence wiki markup using a real parser" | |
| ) | |
| parser.add_argument( | |
| "input", | |
| nargs="?", | |
| help="Input Markdown file. Reads stdin if omitted." | |
| ) | |
| parser.add_argument( | |
| "-o", | |
| "--output", | |
| help="Output file. Prints to stdout if omitted." | |
| ) | |
| return parser | |
| def main() -> int: | |
| parser = build_parser() | |
| args = parser.parse_args() | |
| if args.input: | |
| with open(args.input, "r", encoding="utf-8") as f: | |
| markdown_text = f.read() | |
| else: | |
| markdown_text = sys.stdin.read() | |
| result = convert_markdown_to_confluence(markdown_text) | |
| if args.output: | |
| with open(args.output, "w", encoding="utf-8", newline="\n") as f: | |
| f.write(result) | |
| else: | |
| sys.stdout.write(result) | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment