Skip to content

Instantly share code, notes, and snippets.

@josephbolus
Created March 13, 2026 04:19
Show Gist options
  • Select an option

  • Save josephbolus/7b4485a1321b947dfb26ab9e826a23c9 to your computer and use it in GitHub Desktop.

Select an option

Save josephbolus/7b4485a1321b947dfb26ab9e826a23c9 to your computer and use it in GitHub Desktop.
Parser-based Markdown -> Confluence wiki markup converter.
#!/usr/bin/env python3
"""
md_to_confluence_ast.py
Parser-based Markdown -> Confluence wiki markup converter.
Install:
pip install markdown-it-py mdit-py-plugins
Usage:
python md_to_confluence_ast.py input.md
python md_to_confluence_ast.py input.md -o output.txt
cat input.md | python md_to_confluence_ast.py
"""
import argparse
import html
import sys
from typing import List, Optional
from markdown_it import MarkdownIt
from markdown_it.tree import SyntaxTreeNode
def normalize_language(lang: str) -> str:
lang = (lang or "").strip().lower()
mapping = {
"py": "python",
"python3": "python",
"js": "javascript",
"node": "javascript",
"ts": "typescript",
"sh": "bash",
"shell": "bash",
"zsh": "bash",
"yml": "yaml",
"yaml": "yaml",
"java": "java",
"jdbc": "java",
"sql": "sql",
"mysql": "sql",
"postgres": "sql",
"postgresql": "sql",
"sqlite": "sql",
"mssql": "sql",
"tsql": "sql",
"plsql": "sql",
}
return mapping.get(lang, lang or "none")
class ConfluenceRenderer:
def __init__(self) -> None:
self.md = MarkdownIt("gfm-like", {"linkify": False})
def render(self, markdown_text: str) -> str:
tokens = self.md.parse(markdown_text)
tree = SyntaxTreeNode(tokens)
parts = self.render_children(tree.children or [], indent=0)
text = "".join(parts)
return self.cleanup(text)
def cleanup(self, text: str) -> str:
lines = text.splitlines()
cleaned: List[str] = []
blank_run = 0
for line in lines:
if line.strip() == "":
blank_run += 1
if blank_run <= 1:
cleaned.append("")
else:
blank_run = 0
cleaned.append(line.rstrip())
return "\n".join(cleaned).strip() + "\n"
def render_children(self, nodes: List[SyntaxTreeNode], indent: int = 0) -> List[str]:
out: List[str] = []
for node in nodes:
out.append(self.render_node(node, indent))
return out
def render_node(self, node: SyntaxTreeNode, indent: int = 0) -> str:
t = node.type
if t == "root":
return "".join(self.render_children(node.children or [], indent))
if t == "paragraph":
return self.render_inline_container(node).strip() + "\n\n"
if t == "inline":
return self.render_inlines(node.children or [])
if t == "text":
return node.content or ""
if t == "softbreak":
return "\n"
if t == "hardbreak":
return " \\\\\n"
if t == "heading":
level = 1
if node.tag and node.tag.startswith("h"):
try:
level = min(max(int(node.tag[1:]), 1), 6)
except ValueError:
level = 1
body = self.render_inline_container(node).strip()
return f"h{level}. {body}\n\n"
if t == "bullet_list":
return "".join(self.render_children(node.children or [], indent + 1))
if t == "ordered_list":
return "".join(self.render_children(node.children or [], -(abs(indent) + 1)))
if t == "list_item":
marker = "*" * max(indent, 1) if indent > 0 else "#" * max(abs(indent), 1)
body = self.render_list_item(node, indent)
return f"{marker} {body}\n"
if t == "blockquote":
body = self.flatten_block_children(node).strip().splitlines()
return "".join(
f"bq. {line}\n" if line.strip() else "bq. \n"
for line in body
) + "\n"
if t == "fence":
lang = normalize_language(node.info or "")
content = node.content or ""
if lang == "none":
return f"{{code}}\n{content}{{code}}\n\n"
return f"{{code:language={lang}}}\n{content}{{code}}\n\n"
if t == "code_block":
content = node.content or ""
return f"{{code}}\n{content}{{code}}\n\n"
if t == "hr":
return "----\n\n"
if t == "table":
return self.render_table(node) + "\n"
if t == "html_block":
return (node.content or "") + "\n\n"
if t == "image":
src = self.attr(node, "src") or ""
alt = self.attr(node, "alt") or ""
return f"!{src}|alt={alt}!"
if t == "code_inline":
return "{{" + (node.content or "") + "}}"
if t == "strong":
return "*" + self.render_inlines(node.children or []) + "*"
if t == "em":
return "_" + self.render_inlines(node.children or []) + "_"
if t == "s":
return "-" + self.render_inlines(node.children or []) + "-"
if t == "link":
href = self.attr(node, "href") or ""
label = self.render_inlines(node.children or []).strip() or href
return f"[{label}|{href}]"
if t == "html_inline":
return node.content or ""
if node.children:
return self.render_inlines(node.children)
return node.content or ""
def render_inline_container(self, node: SyntaxTreeNode) -> str:
parts: List[str] = []
for child in node.children or []:
if child.type == "inline":
parts.append(self.render_inlines(child.children or []))
else:
parts.append(self.render_node(child))
return "".join(parts)
def render_inlines(self, nodes: List[SyntaxTreeNode]) -> str:
return "".join(self.render_node(n) for n in nodes)
def convert_task_marker(self, text: str) -> str:
text = text.strip()
if text.startswith("[ ] "):
return "☐ " + text[4:]
if text.startswith("[x] ") or text.startswith("[X] "):
return "☑ " + text[4:]
return text
def render_list_item(self, node: SyntaxTreeNode, indent: int) -> str:
chunks: List[str] = []
nested: List[str] = []
for child in node.children or []:
if child.type == "paragraph":
text = self.render_inline_container(child).strip()
text = self.convert_task_marker(text)
chunks.append(text)
elif child.type == "bullet_list":
nested.append(
"\n" + "".join(
self.render_children(child.children or [], abs(indent) + 1)
).rstrip()
)
elif child.type == "ordered_list":
nested.append(
"\n" + "".join(
self.render_children(child.children or [], -(abs(indent) + 1))
).rstrip()
)
else:
rendered = self.render_node(child, indent)
if rendered.strip():
chunks.append(rendered.strip())
main = " ".join(c for c in chunks if c).strip()
if nested:
return main + "".join(nested)
return main
def flatten_block_children(self, node: SyntaxTreeNode) -> str:
parts: List[str] = []
for child in node.children or []:
if child.type == "paragraph":
parts.append(self.render_inline_container(child).strip())
else:
text = self.render_node(child).strip()
if text:
parts.append(text)
return "\n".join(parts)
def render_table(self, node: SyntaxTreeNode) -> str:
rows: List[str] = []
for child in node.children or []:
if child.type == "thead":
for tr in child.children or []:
rows.append(self.render_table_row(tr, header=True))
elif child.type == "tbody":
for tr in child.children or []:
rows.append(self.render_table_row(tr, header=False))
elif child.type == "tr":
rows.append(self.render_table_row(child, header=False))
return "\n".join(rows) + "\n"
def render_table_row(self, tr: SyntaxTreeNode, header: bool) -> str:
cells: List[str] = []
for cell in tr.children or []:
text = self.render_inline_container(cell).strip()
cells.append(text)
if header:
return "||" + "||".join(cells) + "||"
return "|" + "|".join(cells) + "|"
def attr(self, node: SyntaxTreeNode, name: str) -> Optional[str]:
attrs = getattr(node, "attrs", None) or {}
value = attrs.get(name)
if value is None:
return None
return html.unescape(str(value))
def convert_markdown_to_confluence(markdown_text: str) -> str:
return ConfluenceRenderer().render(markdown_text)
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Convert Markdown to Confluence wiki markup using a real parser"
)
parser.add_argument(
"input",
nargs="?",
help="Input Markdown file. Reads stdin if omitted."
)
parser.add_argument(
"-o",
"--output",
help="Output file. Prints to stdout if omitted."
)
return parser
def main() -> int:
parser = build_parser()
args = parser.parse_args()
if args.input:
with open(args.input, "r", encoding="utf-8") as f:
markdown_text = f.read()
else:
markdown_text = sys.stdin.read()
result = convert_markdown_to_confluence(markdown_text)
if args.output:
with open(args.output, "w", encoding="utf-8", newline="\n") as f:
f.write(result)
else:
sys.stdout.write(result)
return 0
if __name__ == "__main__":
raise SystemExit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment