Last active
December 11, 2024 01:52
-
-
Save kaid/e7c68b22b908457cb6c5c608b5790c37 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // ==UserScript== | |
| // @name HN Tree to Markdown (Per-Node) | |
| // @namespace http://tampermonkey.net/ | |
| // @version 0.18 | |
| // @description Extract HN comment subtrees as markdown with simplified code block handling | |
| // @author Kaid | |
| // @match https://news.ycombinator.com/item?* | |
| // @grant none | |
| // ==/UserScript== | |
| (function() { | |
| 'use strict'; | |
| function getIndentLevel(commentRow) { | |
| const indentImg = commentRow.querySelector('.ind img'); | |
| return indentImg ? parseInt(indentImg.width) / 40 : 0; | |
| } | |
| function getCommentUrl(row) { | |
| const ageLink = row.querySelector('.age a'); | |
| return ageLink ? ageLink.href : ''; | |
| } | |
| function getAge(row) { | |
| const age = row.querySelector('.age'); | |
| return age ? age.textContent.trim() : ''; | |
| } | |
| function processCommentText(commentDiv, indent) { | |
| let text = ''; | |
| let links = []; | |
| let inCodeBlock = false; | |
| let codeBlockContent = ''; | |
| const codeNewline = '____CODE_NEWLINE'; | |
| const codeDelimiter = '____CODE_DELIMITER'; | |
| // Convert the comment's HTML content to plain text while preserving newlines | |
| let rawText = ''; | |
| commentDiv.childNodes.forEach(node => { | |
| if (node.nodeType === Node.TEXT_NODE) { | |
| rawText += node.textContent; | |
| } else if (node.nodeName === 'P') { | |
| rawText += '\n\n' + node.textContent; | |
| } else if (node.nodeName === 'PRE') { | |
| if (inCodeBlock) { | |
| rawText += node.textContent; | |
| } else { | |
| rawText += codeDelimiter + node.textContent.replaceAll('\n', codeNewline) + codeDelimiter; | |
| } | |
| } else if (node.nodeName === 'A') { | |
| const linkText = node.textContent; | |
| const href = node.href; | |
| if (linkText !== href) { | |
| rawText += linkText; | |
| links.push(href); | |
| } | |
| } | |
| }); | |
| // Process the text line by line | |
| const lines = rawText.split('\n'); | |
| let paragraphs = []; | |
| let currentParagraph = []; | |
| for (let i = 0; i < lines.length; i++) { | |
| let line = lines[i]; | |
| // Check for code block markers | |
| if (line.includes('```')) { | |
| if (!inCodeBlock) { | |
| // Start of code block | |
| if (currentParagraph.length > 0) { | |
| paragraphs.push(currentParagraph.join(' ').trim()); | |
| currentParagraph = []; | |
| } | |
| inCodeBlock = true; | |
| codeBlockContent = line + '\n'; | |
| } else { | |
| // End of code block | |
| codeBlockContent += line; | |
| paragraphs.push(codeBlockContent); | |
| inCodeBlock = false; | |
| codeBlockContent = ''; | |
| } | |
| continue; | |
| } | |
| if (inCodeBlock) { | |
| // Inside code block, preserve formatting | |
| codeBlockContent += line.replaceAll(codeDelimiter, '').replaceAll(codeNewline, '\n') + '\n'; | |
| } else { | |
| // Regular text processing | |
| const trimmedLine = line.trim().replaceAll(codeDelimiter, '\n```\n').replaceAll(codeNewline, '\n'); | |
| if (trimmedLine === '') { | |
| if (currentParagraph.length > 0) { | |
| paragraphs.push(currentParagraph.join(' ').trim()); | |
| currentParagraph = []; | |
| } | |
| } else { | |
| currentParagraph.push(trimmedLine); | |
| } | |
| } | |
| } | |
| // Handle any remaining content | |
| if (inCodeBlock) { | |
| // If we're still in a code block at the end, add closing marker | |
| codeBlockContent += '```'; | |
| paragraphs.push(codeBlockContent); | |
| } else if (currentParagraph.length > 0) { | |
| paragraphs.push(currentParagraph.join(' ').trim()); | |
| } | |
| // Format paragraphs with proper indentation | |
| text = paragraphs | |
| .filter(p => p) // Remove empty paragraphs | |
| .map((p, i) => { | |
| if (i === 0) return p; | |
| if (p.includes('```')) { | |
| // For code blocks, indent each line | |
| return '\n' + p.split('\n') | |
| .map(line => indent + line) | |
| .join('\n'); | |
| } | |
| // For regular paragraphs, add proper indentation | |
| return '\n' + indent + p; | |
| }) | |
| .join('\n'); | |
| return { text, links }; | |
| } | |
| function processSubtree(startRow) { | |
| const startLevel = getIndentLevel(startRow); | |
| let currentRow = startRow; | |
| let markdown = ''; | |
| while (currentRow) { | |
| if (!currentRow.classList.contains('comtr')) { | |
| currentRow = currentRow.nextElementSibling; | |
| continue; | |
| } | |
| const level = getIndentLevel(currentRow); | |
| if (currentRow !== startRow && level <= startLevel) { | |
| break; | |
| } | |
| const commentDiv = currentRow.querySelector('.commtext'); | |
| if (commentDiv) { | |
| const relativeLevel = level - startLevel; | |
| const indent = ' '.repeat(relativeLevel); | |
| const username = currentRow.querySelector('.hnuser')?.textContent || '[deleted]'; | |
| const commentUrl = getCommentUrl(currentRow); | |
| const age = getAge(currentRow); | |
| const { text, links } = processCommentText(commentDiv, indent + ' '); | |
| markdown += `${indent}- **${username}** [${age}](${commentUrl}): ${text}`; | |
| if (links.length > 0) { | |
| markdown += '\n' + indent + ' ' + links.map(link => `<${link}>`).join('\n' + indent + ' '); | |
| } | |
| markdown += '\n'; | |
| } | |
| currentRow = currentRow.nextElementSibling; | |
| } | |
| return markdown; | |
| } | |
| function copyToClipboard(text) { | |
| const textarea = document.createElement('textarea'); | |
| textarea.value = text; | |
| document.body.appendChild(textarea); | |
| textarea.select(); | |
| document.execCommand('copy'); | |
| document.body.removeChild(textarea); | |
| } | |
| // Add extract buttons to all comments | |
| const commentRows = document.querySelectorAll('tr.athing.comtr'); | |
| commentRows.forEach(row => { | |
| const comhead = row.querySelector('.comhead'); | |
| if (!comhead) return; | |
| const extractButton = document.createElement('a'); | |
| extractButton.href = 'javascript:void(0)'; | |
| extractButton.textContent = '⎋'; | |
| extractButton.title = 'Extract subtree as markdown'; | |
| extractButton.style.cssText = 'margin-left: 4px; text-decoration: none; color: #828282;'; | |
| extractButton.addEventListener('click', (e) => { | |
| e.preventDefault(); | |
| const markdown = processSubtree(row); | |
| copyToClipboard(markdown); | |
| const origText = e.target.textContent; | |
| e.target.textContent = origText + ' copied!'; | |
| setTimeout(() => { | |
| e.target.textContent = origText; | |
| }, 2000); | |
| }); | |
| const navs = comhead.querySelector('.navs'); | |
| if (navs) { | |
| navs.appendChild(document.createTextNode(' ')); | |
| navs.appendChild(extractButton); | |
| } | |
| }); | |
| })(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment