Skip to content

Instantly share code, notes, and snippets.

@KoStard
Created September 4, 2025 20:16
Show Gist options
  • Select an option

  • Save KoStard/47ef204d5ced130b2d2d16b3b7eb34e0 to your computer and use it in GitHub Desktop.

Select an option

Save KoStard/47ef204d5ced130b2d2d16b3b7eb34e0 to your computer and use it in GitHub Desktop.
A tool to convert Markdown files to beautifully formatted PDFs.
# /// script
# requires-python = "==3.11.*"
# dependencies = [
# "beautifulsoup4",
# "weasyprint",
# ]
# ///
"""
md2pdf - A tool to convert Markdown files to beautifully formatted PDFs.
Creates professional-looking PDFs from Markdown files with support for:
- Single or double column layouts
- Custom fonts and typography
- Adjustable margins and spacing
- Title generation from filenames
- Metadata preservation
Dependencies:
- Python 3.6+
- WeasyPrint: pip install weasyprint
- BeautifulSoup4: pip install beautifulsoup4
- Pandoc: https://pandoc.org/installing.html
Usage:
./md2pdf.py [OPTIONS]
Basic Examples:
# Convert all .md files in current directory to PDFs in ./output
./md2pdf.py -i . -o ./output
# Single column layout with specified font
./md2pdf.py --single
# Custom page margins (top,right,bottom,left)
./md2pdf.py --margin 1.5,2,1,2
Full Options:
-h, --help show help message and exit
-i PATH, --input PATH
Input directory containing .md files
-o PATH, --output PATH
Output directory for PDFs
--single Use single column layout
--no-title Omit document titles
--line-height FLOAT Line height ratio (default: 1.5)
--font-size FLOAT Base font size in pt (default: 11)
--font-family FONT Comma-separated font family list
--margin MARGINS Page margins in cm "top,right,bottom,left" (default: 1,1.5,1,1.5)
--column-gap FLOAT Gap between columns in cm (default: 1.2)
--debug Enable debug output
--verbose Enable verbose output
The script will automatically:
- Process all .md files in the specified input directory
- Generate PDFs in the output directory with the same base filenames
- Use sensible defaults for professional-looking results
- Preserve links, images, and basic formatting
"""
import argparse
import os
import re
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
from bs4 import BeautifulSoup
from weasyprint import HTML, CSS
from weasyprint.text.fonts import FontConfiguration
def parse_margins(margin_str, default_margin='1.0'):
"""Parse margin string into top, right, bottom, left components."""
margins = margin_str.split(',')
if len(margins) == 1:
margin = margins[0].strip()
return (margin, margin, margin, margin)
elif len(margins) == 4:
return tuple(m.strip() for m in margins)
else:
raise ValueError(f"Invalid margin format: {margin_str}")
def md_to_html(content, title):
"""Convert markdown to HTML using Pandoc with proper title handling."""
try:
result = subprocess.run(
['pandoc', '-f', 'markdown', '-t', 'html5+smart', '--mathjax', '--metadata', f'title={title}'],
input=content,
text=True,
capture_output=True
)
if result.returncode != 0:
raise RuntimeError(f"pandoc failed: {result.stderr.strip()}")
return result.stdout
except FileNotFoundError:
raise SystemExit("Error: pandoc not found. Please install pandoc first: https://pandoc.org/installing.html")
def wrap_html(content, title, config):
"""Wrap the html content with proper structure and styling."""
# Extract main content using BeautifulSoup
soup = BeautifulSoup(content, 'html.parser')
main_content = soup.body if soup.body else soup
# Convert to string without surrounding <body> tags
content_str = str(main_content).strip()
if content_str.startswith('<body>'):
content_str = content_str[6:-7] # Remove <body> and </body>
# Generate HTML template with proper structure
title_display = 'none' if config.get('no_title') else 'block'
column_style = '1' if config.get('single_column') else '2'
page_margin_css = ' '.join(f"{m}cm" for m in config['margin'])
html_template = f"""<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>{title}</title>
<style>
@page {{
size: A4;
margin: {page_margin_css};
@top-left {{
content: "";
}}
@bottom-center {{
content: counter(page) " / " counter(pages);
font-family: {config['font_family']};
font-size: 0.8em;
color: #666;
}}
}}
body {{
margin: 0;
padding: 0;
font-family: {config['font_family']};
font-size: {config['font_size']}pt;
line-height: {config['line_height']};
color: #333;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
text-rendering: optimizeLegibility;
hyphens: auto;
/* columns set on .document-content */
}}
.document-container {{
margin: 0;
padding: {config['body_padding']}cm;
box-sizing: border-box;
}}
.document-content {{
column-gap: {config['column_gap']}cm;
column-count: {column_style};
column-fill: balance;
}}
.document-title {{
display: {title_display};
font-size: 1.8em;
font-weight: 600;
text-align: center;
column-span: all;
margin: 0 0 1em 0;
padding-bottom: 0.5em;
border-bottom: 1px solid #eee;
}}
p {{
margin: 0 0 1em 0;
text-align: justify;
orphans: 3;
widows: 3;
hyphens: auto;
}}
img {{
max-width: 100%;
height: auto;
}}
h1, h2, h3, h4, h5, h6 {{
break-after: avoid;
}}
pre, table, figure {{
break-inside: avoid;
}}
@media print {{
body {{
-webkit-print-color-adjust: exact;
print-color-adjust: exact;
}}
.document-container {{
padding-top: 0;
}}
}}
</style>
</head>
<body>
<div class="document-container">
<h1 class="document-title">{title}</h1>
<div class="document-content">
{content_str}
</div>
</div>
</body>
</html>
"""
return html_template
def convert_md_to_pdf(input_file, output_file, config):
"""Convert a single markdown file to PDF with given configuration."""
try:
# Read markdown content
with open(input_file, 'r', encoding='utf-8') as f:
md_content = f.read()
# Get title from filename
title = os.path.splitext(os.path.basename(input_file))[0]
title = re.sub(r'[_-]', ' ', title).title()
# Convert markdown to HTML content
print(" πŸ”§ Converting content...")
# Convert to HTML using pandoc
html_fragment = md_to_html(md_content, title)
# Wrap with our template and styling
full_html = wrap_html(html_fragment, title, config)
# Configure font settings
font_config = FontConfiguration()
# Resolve relative asset URLs (images, CSS) relative to the source file
base_url = os.path.dirname(os.path.abspath(input_file))
html = HTML(string=full_html, base_url=base_url)
# Generate CSS with proper media queries
if config.get('debug'):
with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as f:
f.write(full_html.encode('utf-8'))
print(f"Debug: HTML content written to {f.name}")
# Generate PDF
css = CSS(string='''
@media screen {
body { background: #fff; padding: 2em; }
}
''')
print(" πŸ“ Generating PDF...")
html.write_pdf(
output_file,
stylesheets=[css],
font_config=font_config,
presentational_hints=False
)
if os.path.exists(output_file):
file_size_kb = os.path.getsize(output_file) / 1024
print(f" βœ… Output: {os.path.basename(output_file)} ({file_size_kb:.2f} KB)")
return True
except Exception as e:
print(f" ⚠ Error processing {input_file}: {str(e)}", file=sys.stderr)
if config.get('debug'):
import traceback
traceback.print_exc()
return False
def main():
# Setup argument parser
parser = argparse.ArgumentParser(
description='Convert Markdown files to nicely formatted PDFs.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'-i', '--input',
default='.',
help='Input directory containing .md files'
)
parser.add_argument(
'-o', '--output',
default='./output',
help='Output directory for PDFs'
)
parser.add_argument(
'--single',
action='store_true',
help='Use single column layout'
)
parser.add_argument(
'--no-title',
action='store_true',
help='Omit document titles'
)
parser.add_argument(
'--line-height',
type=float,
default=1.5,
help='Line height ratio'
)
parser.add_argument(
'--font-size',
type=float,
default=11,
help='Base font size in pt'
)
parser.add_argument(
'--font-family',
default='"Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif',
help='Comma-separated font family list (enclosed in quotes)'
)
parser.add_argument(
'--margin',
default='1,1.5,1,1.5',
help='Page margins in cm "top,right,bottom,left"'
)
parser.add_argument(
'--column-gap',
type=float,
default=1.2,
help='Gap between columns in cm'
)
parser.add_argument(
'--body-padding',
type=float,
default=0.75,
help='Body padding inside page margins in cm'
)
parser.add_argument(
'--debug',
action='store_true',
help='Enable debug output'
)
parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Enable verbose output'
)
args = parser.parse_args()
if not hasattr(args, 'verbose'):
args.verbose = False
# Prepare configuration dictionary
config = {
'single_column': args.single,
'no_title': args.no_title,
'line_height': args.line_height,
'font_size': args.font_size,
'font_family': args.font_family,
'margin': list(parse_margins(args.margin, '1.0')),
'column_gap': args.column_gap,
'body_padding': args.body_padding,
'debug': args.debug,
'verbose': args.verbose
}
# Normalize paths
input_dir = os.path.abspath(args.input)
output_dir = os.path.abspath(args.output)
# Verify input directory
if not os.path.exists(input_dir) or not os.path.isdir(input_dir):
print(f"Error: Input directory does not exist: {input_dir}", file=sys.stderr)
sys.exit(1)
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
try:
os.makedirs(output_dir)
if args.verbose:
print(f"Created output directory: {output_dir}")
except OSError as e:
print(f"Error creating output directory: {e}", file=sys.stderr)
sys.exit(1)
print(f"== Starting conversion of .md files to PDF in {output_dir} ==")
if args.verbose or args.debug:
print("Configuration:")
print(f" Input directory: {input_dir}")
print(f" Output directory: {output_dir}")
print(f" Layout: {'Single column' if args.single else 'Two columns'}")
print(f" Font: {args.font_family} at {args.font_size}pt")
print(f" Margins: {config['margin']} cm")
print(f" Line height: {args.line_height}")
print(f" Column gap: {config['column_gap']} cm")
print(f" Show title: {'No' if args.no_title else 'Yes'}")
print()
# Find markdown files
markdown_files = [f for f in Path(input_dir).glob('*.md') if f.is_file()]
if not markdown_files:
print(f"No .md files found in {input_dir}", file=sys.stderr)
sys.exit(1)
# Process each markdown file
success_count = 0
total_files = len(markdown_files)
if args.verbose:
print(f"Found {total_files} markdown files to convert")
for i, md_path in enumerate(sorted(markdown_files), 1):
rel_path = os.path.relpath(md_path, input_dir)
pdf_filename = os.path.splitext(rel_path)[0] + '.pdf'
pdf_path = os.path.join(output_dir, pdf_filename)
# Make sure the output directory exists
os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
title = re.sub(r'[_-]', ' ', Path(md_path).stem).title()
print(f"[{i}/{total_files}] πŸ“„ Processing: {title}")
success = convert_md_to_pdf(
str(md_path),
pdf_path,
config
)
if success:
success_count += 1
# Print summary
print(f"\nConversion complete: {success_count}/{total_files} succeeded")
if success_count < total_files:
print("Check error messages above for failed conversions")
print(f"Output is in: {output_dir}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment