Skip to content

Instantly share code, notes, and snippets.

@kaid
Last active December 11, 2024 01:52
Show Gist options
  • Select an option

  • Save kaid/e7c68b22b908457cb6c5c608b5790c37 to your computer and use it in GitHub Desktop.

Select an option

Save kaid/e7c68b22b908457cb6c5c608b5790c37 to your computer and use it in GitHub Desktop.
// ==UserScript==
// @name HN Tree to Markdown (Per-Node)
// @namespace http://tampermonkey.net/
// @version 0.18
// @description Extract HN comment subtrees as markdown with simplified code block handling
// @author Kaid
// @match https://news.ycombinator.com/item?*
// @grant none
// ==/UserScript==
(function() {
'use strict';
function getIndentLevel(commentRow) {
const indentImg = commentRow.querySelector('.ind img');
return indentImg ? parseInt(indentImg.width) / 40 : 0;
}
function getCommentUrl(row) {
const ageLink = row.querySelector('.age a');
return ageLink ? ageLink.href : '';
}
function getAge(row) {
const age = row.querySelector('.age');
return age ? age.textContent.trim() : '';
}
function processCommentText(commentDiv, indent) {
let text = '';
let links = [];
let inCodeBlock = false;
let codeBlockContent = '';
const codeNewline = '____CODE_NEWLINE';
const codeDelimiter = '____CODE_DELIMITER';
// Convert the comment's HTML content to plain text while preserving newlines
let rawText = '';
commentDiv.childNodes.forEach(node => {
if (node.nodeType === Node.TEXT_NODE) {
rawText += node.textContent;
} else if (node.nodeName === 'P') {
rawText += '\n\n' + node.textContent;
} else if (node.nodeName === 'PRE') {
if (inCodeBlock) {
rawText += node.textContent;
} else {
rawText += codeDelimiter + node.textContent.replaceAll('\n', codeNewline) + codeDelimiter;
}
} else if (node.nodeName === 'A') {
const linkText = node.textContent;
const href = node.href;
if (linkText !== href) {
rawText += linkText;
links.push(href);
}
}
});
// Process the text line by line
const lines = rawText.split('\n');
let paragraphs = [];
let currentParagraph = [];
for (let i = 0; i < lines.length; i++) {
let line = lines[i];
// Check for code block markers
if (line.includes('```')) {
if (!inCodeBlock) {
// Start of code block
if (currentParagraph.length > 0) {
paragraphs.push(currentParagraph.join(' ').trim());
currentParagraph = [];
}
inCodeBlock = true;
codeBlockContent = line + '\n';
} else {
// End of code block
codeBlockContent += line;
paragraphs.push(codeBlockContent);
inCodeBlock = false;
codeBlockContent = '';
}
continue;
}
if (inCodeBlock) {
// Inside code block, preserve formatting
codeBlockContent += line.replaceAll(codeDelimiter, '').replaceAll(codeNewline, '\n') + '\n';
} else {
// Regular text processing
const trimmedLine = line.trim().replaceAll(codeDelimiter, '\n```\n').replaceAll(codeNewline, '\n');
if (trimmedLine === '') {
if (currentParagraph.length > 0) {
paragraphs.push(currentParagraph.join(' ').trim());
currentParagraph = [];
}
} else {
currentParagraph.push(trimmedLine);
}
}
}
// Handle any remaining content
if (inCodeBlock) {
// If we're still in a code block at the end, add closing marker
codeBlockContent += '```';
paragraphs.push(codeBlockContent);
} else if (currentParagraph.length > 0) {
paragraphs.push(currentParagraph.join(' ').trim());
}
// Format paragraphs with proper indentation
text = paragraphs
.filter(p => p) // Remove empty paragraphs
.map((p, i) => {
if (i === 0) return p;
if (p.includes('```')) {
// For code blocks, indent each line
return '\n' + p.split('\n')
.map(line => indent + line)
.join('\n');
}
// For regular paragraphs, add proper indentation
return '\n' + indent + p;
})
.join('\n');
return { text, links };
}
function processSubtree(startRow) {
const startLevel = getIndentLevel(startRow);
let currentRow = startRow;
let markdown = '';
while (currentRow) {
if (!currentRow.classList.contains('comtr')) {
currentRow = currentRow.nextElementSibling;
continue;
}
const level = getIndentLevel(currentRow);
if (currentRow !== startRow && level <= startLevel) {
break;
}
const commentDiv = currentRow.querySelector('.commtext');
if (commentDiv) {
const relativeLevel = level - startLevel;
const indent = ' '.repeat(relativeLevel);
const username = currentRow.querySelector('.hnuser')?.textContent || '[deleted]';
const commentUrl = getCommentUrl(currentRow);
const age = getAge(currentRow);
const { text, links } = processCommentText(commentDiv, indent + ' ');
markdown += `${indent}- **${username}** [${age}](${commentUrl}): ${text}`;
if (links.length > 0) {
markdown += '\n' + indent + ' ' + links.map(link => `<${link}>`).join('\n' + indent + ' ');
}
markdown += '\n';
}
currentRow = currentRow.nextElementSibling;
}
return markdown;
}
function copyToClipboard(text) {
const textarea = document.createElement('textarea');
textarea.value = text;
document.body.appendChild(textarea);
textarea.select();
document.execCommand('copy');
document.body.removeChild(textarea);
}
// Add extract buttons to all comments
const commentRows = document.querySelectorAll('tr.athing.comtr');
commentRows.forEach(row => {
const comhead = row.querySelector('.comhead');
if (!comhead) return;
const extractButton = document.createElement('a');
extractButton.href = 'javascript:void(0)';
extractButton.textContent = '⎋';
extractButton.title = 'Extract subtree as markdown';
extractButton.style.cssText = 'margin-left: 4px; text-decoration: none; color: #828282;';
extractButton.addEventListener('click', (e) => {
e.preventDefault();
const markdown = processSubtree(row);
copyToClipboard(markdown);
const origText = e.target.textContent;
e.target.textContent = origText + ' copied!';
setTimeout(() => {
e.target.textContent = origText;
}, 2000);
});
const navs = comhead.querySelector('.navs');
if (navs) {
navs.appendChild(document.createTextNode(' '));
navs.appendChild(extractButton);
}
});
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment