Created
September 4, 2025 20:16
-
-
Save KoStard/47ef204d5ced130b2d2d16b3b7eb34e0 to your computer and use it in GitHub Desktop.
A tool to convert Markdown files to beautifully formatted PDFs.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # requires-python = "==3.11.*" | |
| # dependencies = [ | |
| # "beautifulsoup4", | |
| # "weasyprint", | |
| # ] | |
| # /// | |
| """ | |
| md2pdf - A tool to convert Markdown files to beautifully formatted PDFs. | |
| Creates professional-looking PDFs from Markdown files with support for: | |
| - Single or double column layouts | |
| - Custom fonts and typography | |
| - Adjustable margins and spacing | |
| - Title generation from filenames | |
| - Metadata preservation | |
| Dependencies: | |
| - Python 3.6+ | |
| - WeasyPrint: pip install weasyprint | |
| - BeautifulSoup4: pip install beautifulsoup4 | |
| - Pandoc: https://pandoc.org/installing.html | |
| Usage: | |
| ./md2pdf.py [OPTIONS] | |
| Basic Examples: | |
| # Convert all .md files in current directory to PDFs in ./output | |
| ./md2pdf.py -i . -o ./output | |
| # Single column layout with specified font | |
| ./md2pdf.py --single | |
| # Custom page margins (top,right,bottom,left) | |
| ./md2pdf.py --margin 1.5,2,1,2 | |
| Full Options: | |
| -h, --help show help message and exit | |
| -i PATH, --input PATH | |
| Input directory containing .md files | |
| -o PATH, --output PATH | |
| Output directory for PDFs | |
| --single Use single column layout | |
| --no-title Omit document titles | |
| --line-height FLOAT Line height ratio (default: 1.5) | |
| --font-size FLOAT Base font size in pt (default: 11) | |
| --font-family FONT Comma-separated font family list | |
| --margin MARGINS Page margins in cm "top,right,bottom,left" (default: 1,1.5,1,1.5) | |
| --column-gap FLOAT Gap between columns in cm (default: 1.2) | |
| --debug Enable debug output | |
| --verbose Enable verbose output | |
| The script will automatically: | |
| - Process all .md files in the specified input directory | |
| - Generate PDFs in the output directory with the same base filenames | |
| - Use sensible defaults for professional-looking results | |
| - Preserve links, images, and basic formatting | |
| """ | |
| import argparse | |
| import os | |
| import re | |
| import shutil | |
| import subprocess | |
| import sys | |
| import tempfile | |
| from pathlib import Path | |
| from bs4 import BeautifulSoup | |
| from weasyprint import HTML, CSS | |
| from weasyprint.text.fonts import FontConfiguration | |
| def parse_margins(margin_str, default_margin='1.0'): | |
| """Parse margin string into top, right, bottom, left components.""" | |
| margins = margin_str.split(',') | |
| if len(margins) == 1: | |
| margin = margins[0].strip() | |
| return (margin, margin, margin, margin) | |
| elif len(margins) == 4: | |
| return tuple(m.strip() for m in margins) | |
| else: | |
| raise ValueError(f"Invalid margin format: {margin_str}") | |
| def md_to_html(content, title): | |
| """Convert markdown to HTML using Pandoc with proper title handling.""" | |
| try: | |
| result = subprocess.run( | |
| ['pandoc', '-f', 'markdown', '-t', 'html5+smart', '--mathjax', '--metadata', f'title={title}'], | |
| input=content, | |
| text=True, | |
| capture_output=True | |
| ) | |
| if result.returncode != 0: | |
| raise RuntimeError(f"pandoc failed: {result.stderr.strip()}") | |
| return result.stdout | |
| except FileNotFoundError: | |
| raise SystemExit("Error: pandoc not found. Please install pandoc first: https://pandoc.org/installing.html") | |
| def wrap_html(content, title, config): | |
| """Wrap the html content with proper structure and styling.""" | |
| # Extract main content using BeautifulSoup | |
| soup = BeautifulSoup(content, 'html.parser') | |
| main_content = soup.body if soup.body else soup | |
| # Convert to string without surrounding <body> tags | |
| content_str = str(main_content).strip() | |
| if content_str.startswith('<body>'): | |
| content_str = content_str[6:-7] # Remove <body> and </body> | |
| # Generate HTML template with proper structure | |
| title_display = 'none' if config.get('no_title') else 'block' | |
| column_style = '1' if config.get('single_column') else '2' | |
| page_margin_css = ' '.join(f"{m}cm" for m in config['margin']) | |
| html_template = f"""<!DOCTYPE html> | |
| <html> | |
| <head> | |
| <meta charset="utf-8"> | |
| <title>{title}</title> | |
| <style> | |
| @page {{ | |
| size: A4; | |
| margin: {page_margin_css}; | |
| @top-left {{ | |
| content: ""; | |
| }} | |
| @bottom-center {{ | |
| content: counter(page) " / " counter(pages); | |
| font-family: {config['font_family']}; | |
| font-size: 0.8em; | |
| color: #666; | |
| }} | |
| }} | |
| body {{ | |
| margin: 0; | |
| padding: 0; | |
| font-family: {config['font_family']}; | |
| font-size: {config['font_size']}pt; | |
| line-height: {config['line_height']}; | |
| color: #333; | |
| -webkit-font-smoothing: antialiased; | |
| -moz-osx-font-smoothing: grayscale; | |
| text-rendering: optimizeLegibility; | |
| hyphens: auto; | |
| /* columns set on .document-content */ | |
| }} | |
| .document-container {{ | |
| margin: 0; | |
| padding: {config['body_padding']}cm; | |
| box-sizing: border-box; | |
| }} | |
| .document-content {{ | |
| column-gap: {config['column_gap']}cm; | |
| column-count: {column_style}; | |
| column-fill: balance; | |
| }} | |
| .document-title {{ | |
| display: {title_display}; | |
| font-size: 1.8em; | |
| font-weight: 600; | |
| text-align: center; | |
| column-span: all; | |
| margin: 0 0 1em 0; | |
| padding-bottom: 0.5em; | |
| border-bottom: 1px solid #eee; | |
| }} | |
| p {{ | |
| margin: 0 0 1em 0; | |
| text-align: justify; | |
| orphans: 3; | |
| widows: 3; | |
| hyphens: auto; | |
| }} | |
| img {{ | |
| max-width: 100%; | |
| height: auto; | |
| }} | |
| h1, h2, h3, h4, h5, h6 {{ | |
| break-after: avoid; | |
| }} | |
| pre, table, figure {{ | |
| break-inside: avoid; | |
| }} | |
| @media print {{ | |
| body {{ | |
| -webkit-print-color-adjust: exact; | |
| print-color-adjust: exact; | |
| }} | |
| .document-container {{ | |
| padding-top: 0; | |
| }} | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="document-container"> | |
| <h1 class="document-title">{title}</h1> | |
| <div class="document-content"> | |
| {content_str} | |
| </div> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| return html_template | |
| def convert_md_to_pdf(input_file, output_file, config): | |
| """Convert a single markdown file to PDF with given configuration.""" | |
| try: | |
| # Read markdown content | |
| with open(input_file, 'r', encoding='utf-8') as f: | |
| md_content = f.read() | |
| # Get title from filename | |
| title = os.path.splitext(os.path.basename(input_file))[0] | |
| title = re.sub(r'[_-]', ' ', title).title() | |
| # Convert markdown to HTML content | |
| print(" π§ Converting content...") | |
| # Convert to HTML using pandoc | |
| html_fragment = md_to_html(md_content, title) | |
| # Wrap with our template and styling | |
| full_html = wrap_html(html_fragment, title, config) | |
| # Configure font settings | |
| font_config = FontConfiguration() | |
| # Resolve relative asset URLs (images, CSS) relative to the source file | |
| base_url = os.path.dirname(os.path.abspath(input_file)) | |
| html = HTML(string=full_html, base_url=base_url) | |
| # Generate CSS with proper media queries | |
| if config.get('debug'): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as f: | |
| f.write(full_html.encode('utf-8')) | |
| print(f"Debug: HTML content written to {f.name}") | |
| # Generate PDF | |
| css = CSS(string=''' | |
| @media screen { | |
| body { background: #fff; padding: 2em; } | |
| } | |
| ''') | |
| print(" π Generating PDF...") | |
| html.write_pdf( | |
| output_file, | |
| stylesheets=[css], | |
| font_config=font_config, | |
| presentational_hints=False | |
| ) | |
| if os.path.exists(output_file): | |
| file_size_kb = os.path.getsize(output_file) / 1024 | |
| print(f" β Output: {os.path.basename(output_file)} ({file_size_kb:.2f} KB)") | |
| return True | |
| except Exception as e: | |
| print(f" β Error processing {input_file}: {str(e)}", file=sys.stderr) | |
| if config.get('debug'): | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| def main(): | |
| # Setup argument parser | |
| parser = argparse.ArgumentParser( | |
| description='Convert Markdown files to nicely formatted PDFs.', | |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter | |
| ) | |
| parser.add_argument( | |
| '-i', '--input', | |
| default='.', | |
| help='Input directory containing .md files' | |
| ) | |
| parser.add_argument( | |
| '-o', '--output', | |
| default='./output', | |
| help='Output directory for PDFs' | |
| ) | |
| parser.add_argument( | |
| '--single', | |
| action='store_true', | |
| help='Use single column layout' | |
| ) | |
| parser.add_argument( | |
| '--no-title', | |
| action='store_true', | |
| help='Omit document titles' | |
| ) | |
| parser.add_argument( | |
| '--line-height', | |
| type=float, | |
| default=1.5, | |
| help='Line height ratio' | |
| ) | |
| parser.add_argument( | |
| '--font-size', | |
| type=float, | |
| default=11, | |
| help='Base font size in pt' | |
| ) | |
| parser.add_argument( | |
| '--font-family', | |
| default='"Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif', | |
| help='Comma-separated font family list (enclosed in quotes)' | |
| ) | |
| parser.add_argument( | |
| '--margin', | |
| default='1,1.5,1,1.5', | |
| help='Page margins in cm "top,right,bottom,left"' | |
| ) | |
| parser.add_argument( | |
| '--column-gap', | |
| type=float, | |
| default=1.2, | |
| help='Gap between columns in cm' | |
| ) | |
| parser.add_argument( | |
| '--body-padding', | |
| type=float, | |
| default=0.75, | |
| help='Body padding inside page margins in cm' | |
| ) | |
| parser.add_argument( | |
| '--debug', | |
| action='store_true', | |
| help='Enable debug output' | |
| ) | |
| parser.add_argument( | |
| '--verbose', '-v', | |
| action='store_true', | |
| help='Enable verbose output' | |
| ) | |
| args = parser.parse_args() | |
| if not hasattr(args, 'verbose'): | |
| args.verbose = False | |
| # Prepare configuration dictionary | |
| config = { | |
| 'single_column': args.single, | |
| 'no_title': args.no_title, | |
| 'line_height': args.line_height, | |
| 'font_size': args.font_size, | |
| 'font_family': args.font_family, | |
| 'margin': list(parse_margins(args.margin, '1.0')), | |
| 'column_gap': args.column_gap, | |
| 'body_padding': args.body_padding, | |
| 'debug': args.debug, | |
| 'verbose': args.verbose | |
| } | |
| # Normalize paths | |
| input_dir = os.path.abspath(args.input) | |
| output_dir = os.path.abspath(args.output) | |
| # Verify input directory | |
| if not os.path.exists(input_dir) or not os.path.isdir(input_dir): | |
| print(f"Error: Input directory does not exist: {input_dir}", file=sys.stderr) | |
| sys.exit(1) | |
| # Create output directory if it doesn't exist | |
| if not os.path.exists(output_dir): | |
| try: | |
| os.makedirs(output_dir) | |
| if args.verbose: | |
| print(f"Created output directory: {output_dir}") | |
| except OSError as e: | |
| print(f"Error creating output directory: {e}", file=sys.stderr) | |
| sys.exit(1) | |
| print(f"== Starting conversion of .md files to PDF in {output_dir} ==") | |
| if args.verbose or args.debug: | |
| print("Configuration:") | |
| print(f" Input directory: {input_dir}") | |
| print(f" Output directory: {output_dir}") | |
| print(f" Layout: {'Single column' if args.single else 'Two columns'}") | |
| print(f" Font: {args.font_family} at {args.font_size}pt") | |
| print(f" Margins: {config['margin']} cm") | |
| print(f" Line height: {args.line_height}") | |
| print(f" Column gap: {config['column_gap']} cm") | |
| print(f" Show title: {'No' if args.no_title else 'Yes'}") | |
| print() | |
| # Find markdown files | |
| markdown_files = [f for f in Path(input_dir).glob('*.md') if f.is_file()] | |
| if not markdown_files: | |
| print(f"No .md files found in {input_dir}", file=sys.stderr) | |
| sys.exit(1) | |
| # Process each markdown file | |
| success_count = 0 | |
| total_files = len(markdown_files) | |
| if args.verbose: | |
| print(f"Found {total_files} markdown files to convert") | |
| for i, md_path in enumerate(sorted(markdown_files), 1): | |
| rel_path = os.path.relpath(md_path, input_dir) | |
| pdf_filename = os.path.splitext(rel_path)[0] + '.pdf' | |
| pdf_path = os.path.join(output_dir, pdf_filename) | |
| # Make sure the output directory exists | |
| os.makedirs(os.path.dirname(pdf_path), exist_ok=True) | |
| title = re.sub(r'[_-]', ' ', Path(md_path).stem).title() | |
| print(f"[{i}/{total_files}] π Processing: {title}") | |
| success = convert_md_to_pdf( | |
| str(md_path), | |
| pdf_path, | |
| config | |
| ) | |
| if success: | |
| success_count += 1 | |
| # Print summary | |
| print(f"\nConversion complete: {success_count}/{total_files} succeeded") | |
| if success_count < total_files: | |
| print("Check error messages above for failed conversions") | |
| print(f"Output is in: {output_dir}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment