Skip to content

Instantly share code, notes, and snippets.

@celsowm
Created September 6, 2025 15:52
Show Gist options
  • Select an option

  • Save celsowm/9d0f428b2e883f1578aea3f762c8f365 to your computer and use it in GitHub Desktop.

Select an option

Save celsowm/9d0f428b2e883f1578aea3f762c8f365 to your computer and use it in GitHub Desktop.
HtmlToPdf.php
<?php
declare(strict_types=1);
final class HtmlToPdf
{
private MiniPdfGlyph $pdf;
private CssParser $cssParser;
private array $cssRules = [];
private array $styleStack = []; // computed CSS stack for current element
private array $defaultStyles = [
'body' => [
'font-family' => 'Helvetica',
'font-size' => '12pt',
'line-height' => '1.4',
'margin' => '10mm',
'color' => '#000000'
],
'h1' => [
'font-size' => '24pt',
'font-weight' => 'bold',
'margin-top' => '20pt',
'margin-bottom' => '10pt'
],
'h2' => [
'font-size' => '18pt',
'font-weight' => 'bold',
'margin-top' => '15pt',
'margin-bottom' => '8pt'
],
'h3' => [
'font-size' => '14pt',
'font-weight' => 'bold',
'margin-top' => '12pt',
'margin-bottom' => '6pt'
],
'p' => [
'margin-bottom' => '10pt'
],
'table' => [
'border-collapse' => 'collapse',
'width' => '100%',
'margin-bottom' => '10pt'
],
'th, td' => [
'border' => '1px solid #000000',
'padding' => '5pt'
],
'th' => [
'font-weight' => 'bold',
'background-color' => '#f0f0f0'
]
];
public function __construct(float $pageWidth = 595.28, float $pageHeight = 841.89)
{
$this->pdf = new MiniPdfGlyph($pageWidth, $pageHeight);
$this->cssParser = new CssParser();
// Load default styles
$this->loadCssFromArray($this->defaultStyles);
}
public function loadCssFromFile(string $cssFile): void
{
if (!file_exists($cssFile)) {
throw new RuntimeException("CSS file not found: {$cssFile}");
}
$css = file_get_contents($cssFile);
$this->loadCss($css);
}
public function loadCss(string $css): void
{
$parsedRules = $this->cssParser->parse($css);
$this->cssRules = array_merge($this->cssRules, $this->flattenCssRules($parsedRules));
}
public function loadCssFromArray(array $cssArray): void
{
$css = '';
foreach ($cssArray as $selector => $declarations) {
$css .= "{$selector} {";
foreach ($declarations as $property => $value) {
$css .= "{$property}: {$value};";
}
$css .= "}";
}
$this->loadCss($css);
}
public function convertHtml(string $html): MiniPdfGlyph
{
$htmlParser = new HtmlParser($html);
$dom = $htmlParser->getDom();
// Process style tags
$styleTags = $htmlParser->getRawStyleTags();
foreach ($styleTags as $styleTag) {
$this->loadCss($styleTag['content']);
}
// Process inline styles
$inlineStyles = $htmlParser->getRawInlineStyles();
foreach ($inlineStyles as $inlineStyle) {
$this->cssRules[$inlineStyle['selector']] = $this->cssParser->parseDeclarations($inlineStyle['rawStyle']);
}
// Process the DOM
$this->processNode($dom->documentElement);
return $this->pdf;
}
private function processNode(DOMNode $node): void
{
if ($node->nodeType === XML_ELEMENT_NODE) {
// Compute styles and push style manager & CSS stack
$styles = $this->getStylesForElement($node);
$this->pdf->getStyleManager()->push();
$this->styleStack[] = $styles;
$this->applyStyles($styles);
match (strtolower($node->nodeName)) {
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p' => $this->processTextElement($node),
'table' => $this->processTable($node),
'img' => $this->processImage($node),
'hr' => $this->processHorizontalRule($node),
'ul', 'ol' => $this->processList($node),
'div' => $this->processDiv($node),
'br' => $this->pdf->addSpacer($this->getStyleValue('line-height', '12pt', true)),
'strong', 'b' => (function () use ($node): void {
$this->pdf->getStyleManager()->applyOptions(['style' => 'B'], $this->pdf);
$this->processChildren($node);
})(),
'em', 'i' => (function () use ($node): void {
$this->pdf->getStyleManager()->applyOptions(['style' => 'I'], $this->pdf);
$this->processChildren($node);
})(),
'a' => $this->processLink($node),
default => $this->processChildren($node),
};
// margin-bottom applied after element render
$mb = $this->getStyleValue('margin-bottom', null, true);
if ($mb !== null && is_numeric($mb) && $mb > 0) {
$this->pdf->addSpacer((float)$mb);
}
array_pop($this->styleStack);
$this->pdf->getStyleManager()->pop();
} elseif ($node->nodeType === XML_TEXT_NODE) {
$text = trim($node->textContent);
if (!empty($text)) {
$this->pdf->addParagraphText($text, []);
}
}
}
private function processTextElement(DOMNode $node): void
{
$text = $this->extractTextContent($node);
if (!empty($text)) {
$options = [
'align' => $this->getStyleValue('text-align', 'left')
];
// padding from CSS (expanded)
$pt = (float)$this->getStyleValue('padding-top', '0', true);
$pr = (float)$this->getStyleValue('padding-right', '0', true);
$pb = (float)$this->getStyleValue('padding-bottom', '0', true);
$pl = (float)$this->getStyleValue('padding-left', '0', true);
if ($pt || $pr || $pb || $pl) {
$options['padding'] = [$pt, $pr, $pb, $pl];
}
// margin-left/right mapping for block text
$ml = (float)$this->getStyleValue('margin-left', '0', true);
$mr = (float)$this->getStyleValue('margin-right', '0', true);
if ($ml > 0) {
$options['indent'] = ($options['indent'] ?? 0.0) + $ml;
$options['hangIndent'] = ($options['hangIndent'] ?? 0.0) + $ml;
}
if ($mr > 0) {
if (isset($options['padding'])) {
$options['padding'][1] += $mr;
} else {
$options['padding'] = [0.0, $mr, 0.0, 0.0];
}
}
// border (uniform or per-side)
$bw = $this->getStyleValue('border-width', null, true);
$bs = $this->getStyleValue('border-style', null);
$bc = $this->getStyleValue('border-color', null);
$bw_t = $this->getStyleValue('border-top-width', null, true);
$bw_r = $this->getStyleValue('border-right-width', null, true);
$bw_b = $this->getStyleValue('border-bottom-width', null, true);
$bw_l = $this->getStyleValue('border-left-width', null, true);
$bs_t = $this->getStyleValue('border-top-style', null);
$bs_r = $this->getStyleValue('border-right-style', null);
$bs_b = $this->getStyleValue('border-bottom-style', null);
$bs_l = $this->getStyleValue('border-left-style', null);
$bc_t = $this->getStyleValue('border-top-color', null);
$bc_r = $this->getStyleValue('border-right-color', null);
$bc_b = $this->getStyleValue('border-bottom-color', null);
$bc_l = $this->getStyleValue('border-left-color', null);
$hasAny = ($bs && $bs !== 'none') || $bs_t || $bs_r || $bs_b || $bs_l ||
($bw !== null && (float)$bw > 0) || $bw_t || $bw_r || $bw_b || $bw_l;
$border = [];
if ($hasAny) {
if ($bw_t !== null || $bw_r !== null || $bw_b !== null || $bw_l !== null) {
$border['width'] = [
(float)($bw_t ?? $bw ?? 0),
(float)($bw_r ?? $bw ?? 0),
(float)($bw_b ?? $bw ?? 0),
(float)($bw_l ?? $bw ?? 0),
];
} elseif ($bw !== null) {
$border['width'] = (float)$bw;
}
if ($bs_t || $bs_r || $bs_b || $bs_l) {
$border['style'] = [
$bs_t ?? $bs ?? 'solid',
$bs_r ?? $bs ?? 'solid',
$bs_b ?? $bs ?? 'solid',
$bs_l ?? $bs ?? 'solid',
];
} elseif ($bs) {
$border['style'] = $bs;
}
if ($bc_t || $bc_r || $bc_b || $bc_l) {
$border['color'] = [
$bc_t ?? $bc ?? '#000',
$bc_r ?? $bc ?? '#000',
$bc_b ?? $bc ?? '#000',
$bc_l ?? $bc ?? '#000',
];
} elseif ($bc) {
$border['color'] = $bc;
}
}
// border-radius
$br = $this->getStyleValue('border-radius', null);
$br_tl = $this->getStyleValue('border-top-left-radius', null);
$br_tr = $this->getStyleValue('border-top-right-radius', null);
$br_br = $this->getStyleValue('border-bottom-right-radius', null);
$br_bl = $this->getStyleValue('border-bottom-left-radius', null);
$radius = null;
if (is_string($br) && trim($br) !== '') {
$radius = $this->parseRadiusBox($br);
}
$cornerVals = [];
foreach ([['tl',$br_tl],['tr',$br_tr],['br',$br_br],['bl',$br_bl]] as [$corner,$val]) {
if (is_string($val) && trim($val) !== '') {
$cornerVals[$corner] = $this->convertCssToPoints('border-radius', $val);
}
}
if (!empty($cornerVals)) {
if ($radius === null) $radius = [0.0,0.0,0.0,0.0];
$map = ['tl'=>0,'tr'=>1,'br'=>2,'bl'=>3];
foreach ($cornerVals as $k => $v) {
$radius[$map[$k]] = (float)$v;
}
}
if ($radius !== null) {
if (empty($border)) $border = [];
$border['radius'] = $radius;
}
if (!empty($border)) {
$options['border'] = $border;
}
// background-color
$bg = $this->getStyleValue('background-color', null);
if ($bg !== null) {
$options['bgcolor'] = $bg;
}
$this->pdf->addParagraphText($text, $options);
}
}
private function processTable(DOMNode $node): void
{
$rows = [];
foreach ($node->childNodes as $child) {
if ($child->nodeName === 'tr') {
$row = [];
foreach ($child->childNodes as $cell) {
if ($cell->nodeName === 'td' || $cell->nodeName === 'th') {
$row[] = $this->extractTextContent($cell);
}
}
if (!empty($row)) {
$rows[] = $row;
}
}
}
if (!empty($rows)) {
$options = [
'borders' => $this->getStyleValue('border', '1') !== 'none',
'padding' => $this->getStyleValue('padding', '5', true)
];
$this->pdf->addTableData($rows, $options);
}
}
private function processImage(DOMElement $node): void
{
$src = $node->getAttribute('src');
$width = $this->getStyleValue('width', null, true);
$height = $this->getStyleValue('height', null, true);
if (!empty($src)) {
try {
// Try to add image by path first
$this->pdf->addImage(basename($src), $src);
$this->pdf->addImageBlock(basename($src), [
'w' => $width,
'h' => $height
]);
} catch (Exception $e) {
// If path doesn't work, try to load from data URI
if (strpos($src, 'data:image/') === 0) {
$parts = explode(',', $src, 2);
$data = base64_decode($parts[1]);
$this->pdf->addImageData(basename($src), $data);
$this->pdf->addImageBlock(basename($src), [
'w' => $width,
'h' => $height
]);
}
}
}
}
private function processHorizontalRule(DOMNode $node): void
{
$options = [
'color' => $this->getStyleValue('color', '#000000'),
'height' => $this->getStyleValue('height', '1', true)
];
$this->pdf->addHorizontalLine($options);
}
private function processList(DOMNode $node): void
{
$items = [];
foreach ($node->childNodes as $child) {
if ($child->nodeName === 'li') {
$items[] = $this->extractTextContent($child);
}
}
if (!empty($items)) {
$options = [
'type' => $node->nodeName === 'ol' ? 'decimal' : 'bullet'
];
$this->pdf->addList($items, $options);
}
}
private function processDiv(DOMNode $node): void
{
$this->processChildren($node);
}
private function processLink(DOMElement $node): void
{
$href = $node->getAttribute('href');
$text = $this->extractTextContent($node);
if (!empty($href) && !empty($text)) {
$this->pdf->addLink($text, $href, [
'color' => '#0000FF',
'style' => 'U'
]);
}
}
private function processChildren(DOMNode $node): void
{
foreach ($node->childNodes as $child) {
$this->pdf->getStyleManager()->push();
$this->processNode($child);
$this->pdf->getStyleManager()->pop();
}
}
private function extractTextContent(DOMNode $node): string
{
$text = '';
foreach ($node->childNodes as $child) {
if ($child->nodeType === XML_TEXT_NODE) {
$text .= $child->textContent;
} elseif ($child->nodeType === XML_ELEMENT_NODE) {
$text .= $this->extractTextContent($child);
}
}
return trim($text);
}
private function getStylesForElement(DOMElement $element): array
{
$selector = $this->generateCssSelector($element);
$styles = [];
// Find matching rules
foreach ($this->cssRules as $ruleSelector => $declarations) {
if ($this->selectorMatchesElement($ruleSelector, $element)) {
$styles = array_merge($styles, $declarations);
}
}
// Apply inline styles
if ($element->hasAttribute('style')) {
$inlineStyles = $this->cssParser->parseDeclarations($element->getAttribute('style'));
$styles = array_merge($styles, $inlineStyles);
}
$styles = $this->expandShorthandStyles($styles);
return $styles;
}
private function applyStyles(array $styles): void
{
$options = [];
foreach ($styles as $property => $value) {
match ($property) {
'font-family' => $options['fontAlias'] = $value,
'font-size' => $options['size'] = $this->convertToPoints($value),
'color' => $options['color'] = $value,
'text-align' => $options['align'] = $value,
'line-height' => $options['lineHeight'] = $this->convertToPoints($value),
'margin-top' => $this->pdf->addSpacer($this->convertCssToPoints('margin-top', $value)),
'margin-bottom' => null, // Will be applied after element rendering
'font-weight' => (function () use ($value, &$options): void {
if ($value === 'bold') {
$currentStyle = $this->pdf->getStyleManager()->getStyle();
if (!str_contains($currentStyle, 'B')) {
$options['style'] = $currentStyle . 'B';
}
}
})(),
'font-style' => (function () use ($value, &$options): void {
if ($value === 'italic') {
$currentStyle = $this->pdf->getStyleManager()->getStyle();
if (!str_contains($currentStyle, 'I')) {
$options['style'] = $currentStyle . 'I';
}
}
})(),
default => null, // Ignore other properties
};
}
if (!empty($options)) {
$this->pdf->getStyleManager()->applyOptions($options, $this->pdf);
}
}
private function getStyleValue(string $property, $default = null, bool $convertToPoints = false)
{
$value = $default;
// 1) Look up in the top of our computed CSS stack
if (!empty($this->styleStack)) {
$top = $this->styleStack[count($this->styleStack) - 1];
if (array_key_exists($property, $top)) {
$value = $top[$property];
}
}
// 2) Fallback to StyleManager for typography
$sm = $this->pdf->getStyleManager();
switch ($property) {
case 'font-size':
$value = $value ?? $sm->getCurrentFontSize();
break;
case 'line-height':
$value = $value ?? $sm->getLineHeight();
break;
case 'color':
$value = $value ?? $sm->getTextColor();
break;
case 'letter-spacing':
$value = $value ?? $sm->getLetterSpacing();
break;
case 'word-spacing':
$value = $value ?? $sm->getWordSpacing();
break;
}
if ($convertToPoints && is_string($value)) {
return $this->convertCssToPoints($property, $value);
}
return $value;
}
private function convertToPoints(string $value): float
{
$value = trim($value);
if (is_numeric($value)) {
return (float) $value;
}
$units = substr($value, -2);
$numeric = (float) substr($value, 0, -2);
return match ($units) {
'pt' => $numeric,
'px' => $numeric * 0.75, // Assuming 96dpi, 1px = 0.75pt
'mm' => $numeric * 2.83465, // 1mm = 2.83465pt
'cm' => $numeric * 28.3465, // 1cm = 28.3465pt
'in' => $numeric * 72, // 1inch = 72pt
'em', 'rem' => $numeric * $this->getStyleValue('font-size', 12),
'%' => $numeric * $this->getStyleValue('font-size', 12) / 100,
default => (float) $value,
};
}
private function generateCssSelector(DOMElement $element): string
{
$selector = $element->tagName;
if ($element->hasAttribute('id')) {
$selector .= '#' . $element->getAttribute('id');
}
if ($element->hasAttribute('class')) {
$classes = explode(' ', $element->getAttribute('class'));
foreach ($classes as $class) {
$class = trim($class);
if (!empty($class)) {
$selector .= '.' . $class;
}
}
}
return $selector;
}
private function selectorMatchesElement(string $selector, DOMElement $element): bool
{
// Simple selector matching implementation
// For a real implementation, consider using a CSS selector engine
$selectors = explode(',', $selector);
foreach ($selectors as $sel) {
$sel = trim($sel);
if ($sel === '*') {
return true;
}
// Check tag name
if (strpos($sel, $element->tagName) === 0) {
$remaining = substr($sel, strlen($element->tagName));
// Check if there are no more selectors or they match
if (empty($remaining) || $this->checkAttributeSelectors($remaining, $element)) {
return true;
}
}
}
return false;
}
private function checkAttributeSelectors(string $selector, DOMElement $element): bool
{
// Simple attribute selector checking
if (preg_match_all('/([#.])([a-zA-Z0-9_-]+)/', $selector, $matches)) {
foreach ($matches[1] as $i => $type) {
$value = $matches[2][$i];
if ($type === '#') {
if (!$element->hasAttribute('id') || $element->getAttribute('id') !== $value) {
return false;
}
} elseif ($type === '.') {
$classes = explode(' ', $element->getAttribute('class'));
if (!in_array($value, $classes)) {
return false;
}
}
}
return true;
}
return false;
}
private function flattenCssRules(array $parsedRules): array
{
$flatRules = [];
foreach ($parsedRules as $rule) {
if ($rule['type'] === 'rule') {
foreach ($rule['selectors'] as $selector) {
$flatRules[$selector] = $rule['declarations'];
}
} elseif ($rule['type'] === 'at-rule' && $rule['name'] === 'media') {
// Handle media queries (simple screen-only for now)
if (strpos($rule['params'], 'screen') !== false) {
$nestedRules = $this->flattenCssRules($rule['rules']);
$flatRules = array_merge($flatRules, $nestedRules);
}
}
}
return $flatRules;
}
private function expandShorthandStyles(array $styles): array
{
foreach (['margin', 'padding'] as $box) {
if (isset($styles[$box])) {
[$t,$r,$b,$l] = $this->parseBoxValues((string)$styles[$box], false);
$styles["{$box}-top"] = $styles["{$box}-top"] ?? $t;
$styles["{$box}-right"] = $styles["{$box}-right"] ?? $r;
$styles["{$box}-bottom"] = $styles["{$box}-bottom"] ?? $b;
$styles["{$box}-left"] = $styles["{$box}-left"] ?? $l;
unset($styles[$box]);
}
}
if (isset($styles['border'])) {
$b = $this->parseBorderShorthand((string)$styles['border']);
foreach ($b as $k => $v) {
$styles[$k] = $styles[$k] ?? $v;
}
unset($styles['border']);
}
foreach (['top','right','bottom','left'] as $side) {
$key = "border-{$side}";
if (isset($styles[$key])) {
$b = $this->parseBorderShorthand((string)$styles[$key]);
foreach ($b as $k => $v) {
$styles["{$key}-" . substr($k, 7)] = $styles["{$key}-" . substr($k, 7)] ?? $v;
}
unset($styles[$key]);
}
}
foreach (['border-width','border-style','border-color'] as $prop) {
if (isset($styles[$prop])) {
[$t,$r,$b,$l] = $this->parseBoxValues((string)$styles[$prop], false);
$styles["{$prop}-top"] = $styles["{$prop}-top"] ?? $t;
$styles["{$prop}-right"] = $styles["{$prop}-right"] ?? $r;
$styles["{$prop}-bottom"] = $styles["{$prop}-bottom"] ?? $b;
$styles["{$prop}-left"] = $styles["{$prop}-left"] ?? $l;
}
}
$styles = $this->expandShorthandStyles($styles);
return $styles;
}
private function parseBoxValues(string $value, bool $toPoints): array
{
$value = trim($value);
if ($value === '') return ['0','0','0','0'];
$parts = preg_split('/\s+/', $value);
$n = count($parts);
if ($n === 1) { $parts = [$parts[0], $parts[0], $parts[0], $parts[0]]; }
elseif ($n === 2) { $parts = [$parts[0], $parts[1], $parts[0], $parts[1]]; }
elseif ($n === 3) { $parts = [$parts[0], $parts[1], $parts[2], $parts[1]]; }
else { $parts = [$parts[0], $parts[1], $parts[2], $parts[3]]; }
if ($toPoints) {
return array_map(fn($v) => is_numeric($v) ? (float)$v : $this->convertCssToPoints('box', $v), $parts);
}
return $parts;
}
private function parseBorderShorthand(string $value): array
{
$out = ['border-width' => null, 'border-style' => null, 'border-color' => null];
$tokens = preg_split('/\s+/', trim($value));
$styles = ['none','solid','dashed','dotted','double','groove','ridge','inset','outset'];
foreach ($tokens as $tok) {
$t = strtolower($tok);
if (preg_match('/^\d+(\.\d+)?(px|pt|mm|cm|in|%)?$/', $t)) {
$out['border-width'] = $tok;
} elseif (in_array($t, $styles, true)) {
$out['border-style'] = $t;
} elseif (preg_match('/^#([0-9a-f]{3}|[0-9a-f]{6})$/i', $tok) || str_starts_with($t, 'rgb')) {
$out['border-color'] = $tok;
}
}
if ($out['border-style'] === null) $out['border-style'] = 'solid';
if ($out['border-width'] === null) $out['border-width'] = '1px';
if ($out['border-color'] === null) $out['border-color'] = '#000000';
return $out;
}
private function convertCssToPoints(string $property, string $value): float
{
$value = trim($value);
if ($value === '') return 0.0;
if (is_numeric($value)) return (float)$value;
if (preg_match('/^(-?\d+(\.\d+)?)([a-z%]+)$/i', $value, $m)) {
$num = (float)$m[1];
$unit = strtolower($m[3]);
} else {
return $this->convertToPoints($value);
}
$property = strtolower($property);
$widthProps = [
'margin', 'margin-top', 'margin-right', 'margin-bottom', 'margin-left',
'padding', 'padding-top', 'padding-right', 'padding-bottom', 'padding-left',
'border-width', 'border-top-width', 'border-right-width', 'border-bottom-width', 'border-left-width',
'width', 'left', 'right', 'max-width', 'min-width',
'height', 'max-height', 'min-height'
];
$isWidthBased = in_array($property, $widthProps, true);
return match ($unit) {
'pt' => $num,
'px' => $num * 0.75,
'mm' => $num * 2.83465,
'cm' => $num * 28.3465,
'in' => $num * 72.0,
'%' => $isWidthBased
? ($num * $this->pdf->getContentAreaWidth() / 100.0)
: ($num * (float)$this->getStyleValue('font-size', 12.0, false) / 100.0),
'em', 'rem' => $num * (float)$this->getStyleValue('font-size', 12.0, false),
default => (float)$value,
};
}
private function parseRadiusBox(string $value): array
{
$value = trim($value);
$value = preg_replace('/\s*\/\s*.*/', '', $value);
$parts = preg_split('/\s+/', $value);
$parts = array_values(array_filter($parts, fn($v) => $v !== ''));
if (empty($parts)) return [0.0,0.0,0.0,0.0];
$toPt = fn($v) => is_numeric($v) ? (float)$v : $this->convertCssToPoints('border-radius', $v);
if (count($parts) === 1) {
$r = $toPt($parts[0]);
return [$r,$r,$r,$r];
}
if (count($parts) === 2) {
$r1 = $toPt($parts[0]); $r2 = $toPt($parts[1]);
return [$r1,$r2,$r1,$r2];
}
if (count($parts) === 3) {
$r1 = $toPt($parts[0]); $r2 = $toPt($parts[1]); $r3 = $toPt($parts[2]);
return [$r1,$r2,$r3,$r2];
}
$r = array_map($toPt, array_slice($parts, 0, 4));
return [$r[0],$r[1],$r[2],$r[3]];
}
public function output(): string
{
return $this->pdf->output();
}
public function save(string $filename): void
{
$this->pdf->save($filename);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment