KoStard · September 4, 2025 20:16
diff --git a/md2pdf.py b/md2pdf.py
 # /// script
 # requires-python = "==3.11.*"
 # dependencies = [
 #     "beautifulsoup4",
 #     "weasyprint",
 # ]
 # ///

 """
 md2pdf - A tool to convert Markdown files to beautifully formatted PDFs.

 Creates professional-looking PDFs from Markdown files with support for:
 - Single or double column layouts
 - Custom fonts and typography
 - Adjustable margins and spacing
 - Title generation from filenames
 - Metadata preservation

 Dependencies:
 - Python 3.6+
 - WeasyPrint: pip install weasyprint
 - BeautifulSoup4: pip install beautifulsoup4
 - Pandoc: https://pandoc.org/installing.html

 Usage:
  ./md2pdf.py [OPTIONS]

 Basic Examples:
  # Convert all .md files in current directory to PDFs in ./output
  ./md2pdf.py -i . -o ./output

  # Single column layout with specified font
  ./md2pdf.py --single

  # Custom page margins (top,right,bottom,left)
  ./md2pdf.py --margin 1.5,2,1,2

 Full Options:
  -h, --help            show help message and exit
  -i PATH, --input PATH
                        Input directory containing .md files
  -o PATH, --output PATH
                        Output directory for PDFs
  --single              Use single column layout
  --no-title            Omit document titles
  --line-height FLOAT   Line height ratio (default: 1.5)
  --font-size FLOAT     Base font size in pt (default: 11)
  --font-family FONT    Comma-separated font family list
  --margin MARGINS      Page margins in cm "top,right,bottom,left" (default: 1,1.5,1,1.5)
  --column-gap FLOAT    Gap between columns in cm (default: 1.2)
  --debug               Enable debug output
  --verbose             Enable verbose output

 The script will automatically:
 - Process all .md files in the specified input directory
 - Generate PDFs in the output directory with the same base filenames
 - Use sensible defaults for professional-looking results
 - Preserve links, images, and basic formatting
 """

 import argparse
 import os
 import re
 import shutil
 import subprocess
 import sys
 import tempfile
 from pathlib import Path

 from bs4 import BeautifulSoup
 from weasyprint import HTML, CSS
 from weasyprint.text.fonts import FontConfiguration

 def parse_margins(margin_str, default_margin='1.0'):
    """Parse margin string into top, right, bottom, left components."""
    margins = margin_str.split(',')
    if len(margins) == 1:
        margin = margins[0].strip()
        return (margin, margin, margin, margin)
    elif len(margins) == 4:
        return tuple(m.strip() for m in margins)
    else:
        raise ValueError(f"Invalid margin format: {margin_str}")

 def md_to_html(content, title):
    """Convert markdown to HTML using Pandoc with proper title handling."""
    try:
        result = subprocess.run(
            ['pandoc', '-f', 'markdown', '-t', 'html5+smart', '--mathjax', '--metadata', f'title={title}'],
            input=content,
            text=True,
            capture_output=True
        )
        if result.returncode != 0:
            raise RuntimeError(f"pandoc failed: {result.stderr.strip()}")
        return result.stdout
    except FileNotFoundError:
        raise SystemExit("Error: pandoc not found. Please install pandoc first: https://pandoc.org/installing.html")

 def wrap_html(content, title, config):
    """Wrap the html content with proper structure and styling."""
    # Extract main content using BeautifulSoup
    soup = BeautifulSoup(content, 'html.parser')
    main_content = soup.body if soup.body else soup

    # Convert to string without surrounding <body> tags
    content_str = str(main_content).strip()
    if content_str.startswith('<body>'):
        content_str = content_str[6:-7]  # Remove <body> and </body>

    # Generate HTML template with proper structure
    title_display = 'none' if config.get('no_title') else 'block'
    column_style = '1' if config.get('single_column') else '2'
    page_margin_css = ' '.join(f"{m}cm" for m in config['margin'])
    
    html_template = f"""<!DOCTYPE html>
    <html>
    <head>
        <meta charset="utf-8">
        <title>{title}</title>
        <style>
            @page {{
                size: A4;
                margin: {page_margin_css};
                @top-left {{
                    content: "";
                }}
                @bottom-center {{
                    content: counter(page) " / " counter(pages);
                    font-family: {config['font_family']};
                    font-size: 0.8em;
                    color: #666;
                }}
            }}
            
            body {{
                margin: 0;
                padding: 0;
                font-family: {config['font_family']};
                font-size: {config['font_size']}pt;
                line-height: {config['line_height']};
                color: #333;
                -webkit-font-smoothing: antialiased;
                -moz-osx-font-smoothing: grayscale;
                text-rendering: optimizeLegibility;
                hyphens: auto;
                /* columns set on .document-content */
            }}
            
            .document-container {{
                margin: 0;
                padding: {config['body_padding']}cm;
                box-sizing: border-box;
            }}
            
            .document-content {{
                column-gap: {config['column_gap']}cm;
                column-count: {column_style};
                column-fill: balance;
            }}
            
            .document-title {{
                display: {title_display};
                font-size: 1.8em;
                font-weight: 600;
                text-align: center;
                column-span: all;
                margin: 0 0 1em 0;
                padding-bottom: 0.5em;
                border-bottom: 1px solid #eee;
            }}
            
            p {{
                margin: 0 0 1em 0;
                text-align: justify;
                orphans: 3;
                widows: 3;
                hyphens: auto;
            }}
            
            img {{
                max-width: 100%;
                height: auto;
            }}
            
            h1, h2, h3, h4, h5, h6 {{
                break-after: avoid;
            }}
            
            pre, table, figure {{
                break-inside: avoid;
            }}
            
            @media print {{
                body {{
                    -webkit-print-color-adjust: exact;
                    print-color-adjust: exact;
                }}
                .document-container {{
                    padding-top: 0;
                }}
            }}
        </style>
    </head>
    <body>
        <div class="document-container">
            <h1 class="document-title">{title}</h1>
            <div class="document-content">
            {content_str}
            </div>
        </div>
    </body>
    </html>
    """
    return html_template

 def convert_md_to_pdf(input_file, output_file, config):
    """Convert a single markdown file to PDF with given configuration."""
    try:
        # Read markdown content
        with open(input_file, 'r', encoding='utf-8') as f:
            md_content = f.read()
        
        # Get title from filename
        title = os.path.splitext(os.path.basename(input_file))[0]
        title = re.sub(r'[_-]', ' ', title).title()
        
        # Convert markdown to HTML content
        print("  🔧 Converting content...")
        
        # Convert to HTML using pandoc
        html_fragment = md_to_html(md_content, title)
        
        # Wrap with our template and styling
        full_html = wrap_html(html_fragment, title, config)
        
        # Configure font settings
        font_config = FontConfiguration()
        # Resolve relative asset URLs (images, CSS) relative to the source file
        base_url = os.path.dirname(os.path.abspath(input_file))
        html = HTML(string=full_html, base_url=base_url)
        
        # Generate CSS with proper media queries
        if config.get('debug'):
            with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as f:
                f.write(full_html.encode('utf-8'))
                print(f"Debug: HTML content written to {f.name}")
        
        # Generate PDF
        css = CSS(string='''
            @media screen {
                body { background: #fff; padding: 2em; }
            }
        ''')
        
        print("  📝 Generating PDF...")
        html.write_pdf(
            output_file,
            stylesheets=[css],
            font_config=font_config,
            presentational_hints=False
        )
        
        if os.path.exists(output_file):
            file_size_kb = os.path.getsize(output_file) / 1024
            print(f"  ✅ Output: {os.path.basename(output_file)} ({file_size_kb:.2f} KB)")
        
        return True
    
    except Exception as e:
        print(f"  ⚠ Error processing {input_file}: {str(e)}", file=sys.stderr)
        if config.get('debug'):
            import traceback
            traceback.print_exc()
        return False

 def main():
    # Setup argument parser
    parser = argparse.ArgumentParser(
        description='Convert Markdown files to nicely formatted PDFs.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    
    parser.add_argument(
        '-i', '--input',
        default='.',
        help='Input directory containing .md files'
    )
    parser.add_argument(
        '-o', '--output',
        default='./output',
        help='Output directory for PDFs'
    )
    parser.add_argument(
        '--single',
        action='store_true',
        help='Use single column layout'
    )
    parser.add_argument(
        '--no-title',
        action='store_true',
        help='Omit document titles'
    )
    parser.add_argument(
        '--line-height',
        type=float,
        default=1.5,
        help='Line height ratio'
    )
    parser.add_argument(
        '--font-size',
        type=float,
        default=11,
        help='Base font size in pt'
    )
    parser.add_argument(
        '--font-family',
        default='"Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif',
        help='Comma-separated font family list (enclosed in quotes)'
    )
    parser.add_argument(
        '--margin',
        default='1,1.5,1,1.5',
        help='Page margins in cm "top,right,bottom,left"'
    )
    parser.add_argument(
        '--column-gap',
        type=float,
        default=1.2,
        help='Gap between columns in cm'
    )
    parser.add_argument(
        '--body-padding',
        type=float,
        default=0.75,
        help='Body padding inside page margins in cm'
    )
    parser.add_argument(
        '--debug',
        action='store_true',
        help='Enable debug output'
    )
    parser.add_argument(
        '--verbose', '-v',
        action='store_true',
        help='Enable verbose output'
    )
    
    args = parser.parse_args()
    
    if not hasattr(args, 'verbose'):
        args.verbose = False
    
    # Prepare configuration dictionary
    config = {
        'single_column': args.single,
        'no_title': args.no_title,
        'line_height': args.line_height,
        'font_size': args.font_size,
        'font_family': args.font_family,
        'margin': list(parse_margins(args.margin, '1.0')),
        'column_gap': args.column_gap,
        'body_padding': args.body_padding,
        'debug': args.debug,
        'verbose': args.verbose
    }
    
    # Normalize paths
    input_dir = os.path.abspath(args.input)
    output_dir = os.path.abspath(args.output)
    
    # Verify input directory
    if not os.path.exists(input_dir) or not os.path.isdir(input_dir):
        print(f"Error: Input directory does not exist: {input_dir}", file=sys.stderr)
        sys.exit(1)
    
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        try:
            os.makedirs(output_dir)
            if args.verbose:
                print(f"Created output directory: {output_dir}")
        except OSError as e:
            print(f"Error creating output directory: {e}", file=sys.stderr)
            sys.exit(1)
    
    print(f"== Starting conversion of .md files to PDF in {output_dir} ==")
    
    if args.verbose or args.debug:
        print("Configuration:")
        print(f"  Input directory: {input_dir}")
        print(f"  Output directory: {output_dir}")
        print(f"  Layout: {'Single column' if args.single else 'Two columns'}")
        print(f"  Font: {args.font_family} at {args.font_size}pt")
        print(f"  Margins: {config['margin']} cm")
        print(f"  Line height: {args.line_height}")
        print(f"  Column gap: {config['column_gap']} cm")
        print(f"  Show title: {'No' if args.no_title else 'Yes'}")
        print()
    
    # Find markdown files
    markdown_files = [f for f in Path(input_dir).glob('*.md') if f.is_file()]
    
    if not markdown_files:
        print(f"No .md files found in {input_dir}", file=sys.stderr)
        sys.exit(1)
    
    # Process each markdown file
    success_count = 0
    total_files = len(markdown_files)
    
    if args.verbose:
        print(f"Found {total_files} markdown files to convert")
    
    for i, md_path in enumerate(sorted(markdown_files), 1):
        rel_path = os.path.relpath(md_path, input_dir)
        pdf_filename = os.path.splitext(rel_path)[0] + '.pdf'
        pdf_path = os.path.join(output_dir, pdf_filename)
        
        # Make sure the output directory exists
        os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
        
        title = re.sub(r'[_-]', ' ', Path(md_path).stem).title()
        print(f"[{i}/{total_files}] 📄 Processing: {title}")
        
        success = convert_md_to_pdf(
            str(md_path),
            pdf_path,
            config
        )
        if success:
            success_count += 1
    
    # Print summary
    print(f"\nConversion complete: {success_count}/{total_files} succeeded")
    if success_count < total_files:
        print("Check error messages above for failed conversions")
    print(f"Output is in: {output_dir}")

 if __name__ == "__main__":
    main()
	# /// script
	# requires-python = "==3.11.*"
	# dependencies = [
	# "beautifulsoup4",
	# "weasyprint",
	# ]
	# ///

	"""
	md2pdf - A tool to convert Markdown files to beautifully formatted PDFs.

	Creates professional-looking PDFs from Markdown files with support for:
	- Single or double column layouts
	- Custom fonts and typography
	- Adjustable margins and spacing
	- Title generation from filenames
	- Metadata preservation

	Dependencies:
	- Python 3.6+
	- WeasyPrint: pip install weasyprint
	- BeautifulSoup4: pip install beautifulsoup4
	- Pandoc: https://pandoc.org/installing.html

	Usage:
	./md2pdf.py [OPTIONS]

	Basic Examples:
	# Convert all .md files in current directory to PDFs in ./output
	./md2pdf.py -i . -o ./output

	# Single column layout with specified font
	./md2pdf.py --single

	# Custom page margins (top,right,bottom,left)
	./md2pdf.py --margin 1.5,2,1,2

	Full Options:
	-h, --help show help message and exit
	-i PATH, --input PATH
	Input directory containing .md files
	-o PATH, --output PATH
	Output directory for PDFs
	--single Use single column layout
	--no-title Omit document titles
	--line-height FLOAT Line height ratio (default: 1.5)
	--font-size FLOAT Base font size in pt (default: 11)
	--font-family FONT Comma-separated font family list
	--margin MARGINS Page margins in cm "top,right,bottom,left" (default: 1,1.5,1,1.5)
	--column-gap FLOAT Gap between columns in cm (default: 1.2)
	--debug Enable debug output
	--verbose Enable verbose output

	The script will automatically:
	- Process all .md files in the specified input directory
	- Generate PDFs in the output directory with the same base filenames
	- Use sensible defaults for professional-looking results
	- Preserve links, images, and basic formatting
	"""

	import argparse
	import os
	import re
	import shutil
	import subprocess
	import sys
	import tempfile
	from pathlib import Path

	from bs4 import BeautifulSoup
	from weasyprint import HTML, CSS
	from weasyprint.text.fonts import FontConfiguration

	def parse_margins(margin_str, default_margin='1.0'):
	"""Parse margin string into top, right, bottom, left components."""
	margins = margin_str.split(',')
	if len(margins) == 1:
	margin = margins[0].strip()
	return (margin, margin, margin, margin)
	elif len(margins) == 4:
	return tuple(m.strip() for m in margins)
	else:
	raise ValueError(f"Invalid margin format: {margin_str}")

	def md_to_html(content, title):
	"""Convert markdown to HTML using Pandoc with proper title handling."""
	try:
	result = subprocess.run(
	['pandoc', '-f', 'markdown', '-t', 'html5+smart', '--mathjax', '--metadata', f'title={title}'],
	input=content,
	text=True,
	capture_output=True
	)
	if result.returncode != 0:
	raise RuntimeError(f"pandoc failed: {result.stderr.strip()}")
	return result.stdout
	except FileNotFoundError:
	raise SystemExit("Error: pandoc not found. Please install pandoc first: https://pandoc.org/installing.html")

	def wrap_html(content, title, config):
	"""Wrap the html content with proper structure and styling."""
	# Extract main content using BeautifulSoup
	soup = BeautifulSoup(content, 'html.parser')
	main_content = soup.body if soup.body else soup

	# Convert to string without surrounding <body> tags
	content_str = str(main_content).strip()
	if content_str.startswith('<body>'):
	content_str = content_str[6:-7] # Remove <body> and </body>

	# Generate HTML template with proper structure
	title_display = 'none' if config.get('no_title') else 'block'
	column_style = '1' if config.get('single_column') else '2'
	page_margin_css = ' '.join(f"{m}cm" for m in config['margin'])

	html_template = f"""<!DOCTYPE html>
	<html>
	<head>
	<meta charset="utf-8">
	<title>{title}</title>
	<style>
	@page {{
	size: A4;
	margin: {page_margin_css};
	@top-left {{
	content: "";
	}}
	@bottom-center {{
	content: counter(page) " / " counter(pages);
	font-family: {config['font_family']};
	font-size: 0.8em;
	color: #666;
	}}
	}}

	body {{
	margin: 0;
	padding: 0;
	font-family: {config['font_family']};
	font-size: {config['font_size']}pt;
	line-height: {config['line_height']};
	color: #333;
	-webkit-font-smoothing: antialiased;
	-moz-osx-font-smoothing: grayscale;
	text-rendering: optimizeLegibility;
	hyphens: auto;
	/* columns set on .document-content */
	}}

	.document-container {{
	margin: 0;
	padding: {config['body_padding']}cm;
	box-sizing: border-box;
	}}

	.document-content {{
	column-gap: {config['column_gap']}cm;
	column-count: {column_style};
	column-fill: balance;
	}}

	.document-title {{
	display: {title_display};
	font-size: 1.8em;
	font-weight: 600;
	text-align: center;
	column-span: all;
	margin: 0 0 1em 0;
	padding-bottom: 0.5em;
	border-bottom: 1px solid #eee;
	}}

	p {{
	margin: 0 0 1em 0;
	text-align: justify;
	orphans: 3;
	widows: 3;
	hyphens: auto;
	}}

	img {{
	max-width: 100%;
	height: auto;
	}}

	h1, h2, h3, h4, h5, h6 {{
	break-after: avoid;
	}}

	pre, table, figure {{
	break-inside: avoid;
	}}

	@media print {{
	body {{
	-webkit-print-color-adjust: exact;
	print-color-adjust: exact;
	}}
	.document-container {{
	padding-top: 0;
	}}
	}}
	</style>
	</head>
	<body>
	<div class="document-container">
	<h1 class="document-title">{title}</h1>
	<div class="document-content">
	{content_str}
	</div>
	</div>
	</body>
	</html>
	"""
	return html_template

	def convert_md_to_pdf(input_file, output_file, config):
	"""Convert a single markdown file to PDF with given configuration."""
	try:
	# Read markdown content
	with open(input_file, 'r', encoding='utf-8') as f:
	md_content = f.read()

	# Get title from filename
	title = os.path.splitext(os.path.basename(input_file))[0]
	title = re.sub(r'[_-]', ' ', title).title()

	# Convert markdown to HTML content
	print(" 🔧 Converting content...")

	# Convert to HTML using pandoc
	html_fragment = md_to_html(md_content, title)

	# Wrap with our template and styling
	full_html = wrap_html(html_fragment, title, config)

	# Configure font settings
	font_config = FontConfiguration()
	# Resolve relative asset URLs (images, CSS) relative to the source file
	base_url = os.path.dirname(os.path.abspath(input_file))
	html = HTML(string=full_html, base_url=base_url)

	# Generate CSS with proper media queries
	if config.get('debug'):
	with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as f:
	f.write(full_html.encode('utf-8'))
	print(f"Debug: HTML content written to {f.name}")

	# Generate PDF
	css = CSS(string='''
	@media screen {
	body { background: #fff; padding: 2em; }
	}
	''')

	print(" 📝 Generating PDF...")
	html.write_pdf(
	output_file,
	stylesheets=[css],
	font_config=font_config,
	presentational_hints=False
	)

	if os.path.exists(output_file):
	file_size_kb = os.path.getsize(output_file) / 1024
	print(f" ✅ Output: {os.path.basename(output_file)} ({file_size_kb:.2f} KB)")

	return True

	except Exception as e:
	print(f" ⚠ Error processing {input_file}: {str(e)}", file=sys.stderr)
	if config.get('debug'):
	import traceback
	traceback.print_exc()
	return False

	def main():
	# Setup argument parser
	parser = argparse.ArgumentParser(
	description='Convert Markdown files to nicely formatted PDFs.',
	formatter_class=argparse.ArgumentDefaultsHelpFormatter
	)

	parser.add_argument(
	'-i', '--input',
	default='.',
	help='Input directory containing .md files'
	)
	parser.add_argument(
	'-o', '--output',
	default='./output',
	help='Output directory for PDFs'
	)
	parser.add_argument(
	'--single',
	action='store_true',
	help='Use single column layout'
	)
	parser.add_argument(
	'--no-title',
	action='store_true',
	help='Omit document titles'
	)
	parser.add_argument(
	'--line-height',
	type=float,
	default=1.5,
	help='Line height ratio'
	)
	parser.add_argument(
	'--font-size',
	type=float,
	default=11,
	help='Base font size in pt'
	)
	parser.add_argument(
	'--font-family',
	default='"Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif',
	help='Comma-separated font family list (enclosed in quotes)'
	)
	parser.add_argument(
	'--margin',
	default='1,1.5,1,1.5',
	help='Page margins in cm "top,right,bottom,left"'
	)
	parser.add_argument(
	'--column-gap',
	type=float,
	default=1.2,
	help='Gap between columns in cm'
	)
	parser.add_argument(
	'--body-padding',
	type=float,
	default=0.75,
	help='Body padding inside page margins in cm'
	)
	parser.add_argument(
	'--debug',
	action='store_true',
	help='Enable debug output'
	)
	parser.add_argument(
	'--verbose', '-v',
	action='store_true',
	help='Enable verbose output'
	)

	args = parser.parse_args()

	if not hasattr(args, 'verbose'):
	args.verbose = False

	# Prepare configuration dictionary
	config = {
	'single_column': args.single,
	'no_title': args.no_title,
	'line_height': args.line_height,
	'font_size': args.font_size,
	'font_family': args.font_family,
	'margin': list(parse_margins(args.margin, '1.0')),
	'column_gap': args.column_gap,
	'body_padding': args.body_padding,
	'debug': args.debug,
	'verbose': args.verbose
	}

	# Normalize paths
	input_dir = os.path.abspath(args.input)
	output_dir = os.path.abspath(args.output)

	# Verify input directory
	if not os.path.exists(input_dir) or not os.path.isdir(input_dir):
	print(f"Error: Input directory does not exist: {input_dir}", file=sys.stderr)
	sys.exit(1)

	# Create output directory if it doesn't exist
	if not os.path.exists(output_dir):
	try:
	os.makedirs(output_dir)
	if args.verbose:
	print(f"Created output directory: {output_dir}")
	except OSError as e:
	print(f"Error creating output directory: {e}", file=sys.stderr)
	sys.exit(1)

	print(f"== Starting conversion of .md files to PDF in {output_dir} ==")

	if args.verbose or args.debug:
	print("Configuration:")
	print(f" Input directory: {input_dir}")
	print(f" Output directory: {output_dir}")
	print(f" Layout: {'Single column' if args.single else 'Two columns'}")
	print(f" Font: {args.font_family} at {args.font_size}pt")
	print(f" Margins: {config['margin']} cm")
	print(f" Line height: {args.line_height}")
	print(f" Column gap: {config['column_gap']} cm")
	print(f" Show title: {'No' if args.no_title else 'Yes'}")
	print()

	# Find markdown files
	markdown_files = [f for f in Path(input_dir).glob('*.md') if f.is_file()]

	if not markdown_files:
	print(f"No .md files found in {input_dir}", file=sys.stderr)
	sys.exit(1)

	# Process each markdown file
	success_count = 0
	total_files = len(markdown_files)

	if args.verbose:
	print(f"Found {total_files} markdown files to convert")

	for i, md_path in enumerate(sorted(markdown_files), 1):
	rel_path = os.path.relpath(md_path, input_dir)
	pdf_filename = os.path.splitext(rel_path)[0] + '.pdf'
	pdf_path = os.path.join(output_dir, pdf_filename)

	# Make sure the output directory exists
	os.makedirs(os.path.dirname(pdf_path), exist_ok=True)

	title = re.sub(r'[_-]', ' ', Path(md_path).stem).title()
	print(f"[{i}/{total_files}] 📄 Processing: {title}")

	success = convert_md_to_pdf(
	str(md_path),
	pdf_path,
	config
	)
	if success:
	success_count += 1

	# Print summary
	print(f"\nConversion complete: {success_count}/{total_files} succeeded")
	if success_count < total_files:
	print("Check error messages above for failed conversions")
	print(f"Output is in: {output_dir}")

	if __name__ == "__main__":
	main()
No results found