digitalWestie · March 9, 2026 17:30
diff --git a/compress-readme.md b/compress-readme.md
diff --git a/compress.js b/compress.js
 #!/usr/bin/env node
 //
 // Context-aware line deduplication.
 //
 // Usage:
 //   node compress.js [options] file1 [file2 ...]  → stdout
 //
 // Options:
 //   -n N, --context N   Number of lines to keep before each unique line (default: 4)
 //   --drop              Omit dropped lines (default)
 //   --prepend           Keep dropped lines but prepend with "COMPRESS: " (for testing)

 const fs = require('fs');

 const DROP_MODES = Object.freeze({ drop: 'drop', prepend: 'prepend' });

 function parseArgs(argv) {
  const args = argv.slice(2);
  let contextN = 4;
  let dropMode = DROP_MODES.drop;
  const files = [];

  for (let i = 0; i < args.length; i++) {
    const a = args[i];
    if (a === '-n' || a === '--context') {
      contextN = parseInt(args[++i], 10);
      if (!Number.isInteger(contextN) || contextN < 0) {
        console.error('compress: -n/--context must be a non-negative integer');
        process.exit(1);
      }
    } else if (a === '--drop') {
      dropMode = DROP_MODES.drop;
    } else if (a === '--prepend') {
      dropMode = DROP_MODES.prepend;
    } else if (a === '--help' || a === '-h') {
      console.error(`Usage: node compress.js [options] file1 [file2 ...]
  -n N, --context N   Lines of context before each unique line (default: 4)
  --drop              Omit dropped lines (default)
  --prepend           Keep dropped lines, prepend with "COMPRESS: " (for testing)
  --help, -h          Show this help`);
      process.exit(0);
    } else {
      files.push(a);
    }
  }

  return { contextN, dropMode, files };
 }

 function readLines(files) {
  const lines = [];
  for (const file of files) {
    if (!fs.existsSync(file)) {
      console.error(`compress: File not found: ${file}`);
      process.exit(1);
    }
    const content = fs.readFileSync(file, 'utf8');
    const fileLines = content.split(/\r?\n/).map((line) => line.replace(/\r$/, ''));
    // If file didn't end with newline, last element might be ''; keep as-is so we can preserve length
    lines.push(...fileLines);
  }
  return lines;
 }

 function run(lines, contextN, dropMode) {
  const L = lines.length;
  if (L === 0) return [];

  const isBlank = (line) => line.trim() === '';

  // Phase 1: frequency and first occurrence
  const freq = new Map();
  const first = new Map();
  for (let i = 0; i < L; i++) {
    const s = lines[i];
    freq.set(s, (freq.get(s) ?? 0) + 1);
    if (!first.has(s)) first.set(s, i);
  }

  // Phase 2: mark indices to keep
  const keep = new Set();

  // 2a. Unique lines
  for (let i = 0; i < L; i++) {
    if (freq.get(lines[i]) === 1) keep.add(i);
  }

  // 2b. Context before each unique line
  for (let i = 0; i < L; i++) {
    if (freq.get(lines[i]) !== 1) continue;
    const start = Math.max(0, i - contextN);
    for (let j = start; j < i; j++) keep.add(j);
  }

  // 2c. First occurrence of each duplicate
  for (const [s, count] of freq) {
    if (count > 1) keep.add(first.get(s));
  }

  // 2d. Never deduplicate blank/whitespace-only lines: always keep them
  for (let i = 0; i < L; i++) {
    if (isBlank(lines[i])) keep.add(i);
  }

  // Phase 3: emit in order (default and --drop: skip dropped lines; --prepend: prefix with "COMPRESS: ")
  const out = [];
  for (let i = 0; i < L; i++) {
    if (keep.has(i)) {
      out.push(lines[i]);
    } else if (dropMode === DROP_MODES.prepend) {
      out.push('COMPRESS: ' + lines[i]);
    }
  }
  return out;
 }

 const { contextN, dropMode, files } = parseArgs(process.argv);

 if (files.length === 0) {
  console.error('Usage: node compress.js [options] file1 [file2 ...]\n  Use --help for options.');
  process.exit(1);
 }

 const lines = readLines(files);
 const result = run(lines, contextN, dropMode);

 const keptCount =
  dropMode === DROP_MODES.prepend
    ? result.filter((l) => !l.startsWith('COMPRESS: ')).length
    : result.length;
 const droppedCount = lines.length - keptCount;

 const inputChars = lines.join('\n').length;
 const outputChars = (result.join('\n') + (result.length ? '\n' : '')).length;

 const summaryDetail =
  dropMode === DROP_MODES.prepend
    ? `(${keptCount} kept, ${droppedCount} with COMPRESS: prefix)`
    : `(${keptCount} kept, ${droppedCount} dropped)`;

 console.error('Summary:');
 console.error('  Input files:    ', files.length);
 console.error('  Input lines:    ', lines.length);
 console.error('  Input chars:    ', inputChars);
 console.error('  Output lines:   ', result.length, summaryDetail);
 console.error('  Output chars:   ', outputChars);
 console.error('');

 process.stdout.write(result.join('\n') + (result.length ? '\n' : ''));
 process.exit(0);
Option	Description
`-n N`, `--context N`	Number of lines to keep before each unique line (default: 4).
`--drop`	Omit dropped lines (default).
`--prepend`	Keep dropped lines but prepend with `COMPRESS:` (for testing).
`--help`, `-h`	Show usage and exit.
Line type	When it's kept
Unique (`freq === 1`)	At every occurrence (there is only one).
Duplicate (`freq > 1`)	Only at its first occurrence (`i === first(lines[i])`).
Blank/whitespace-only	Always (every occurrence).
Any line	If it lies in the context window (within the last `N` lines before some unique line), it is kept there as well.
	#!/usr/bin/env node
	//
	// Context-aware line deduplication.
	//
	// Usage:
	// node compress.js [options] file1 [file2 ...] → stdout
	//
	// Options:
	// -n N, --context N Number of lines to keep before each unique line (default: 4)
	// --drop Omit dropped lines (default)
	// --prepend Keep dropped lines but prepend with "COMPRESS: " (for testing)

	const fs = require('fs');

	const DROP_MODES = Object.freeze({ drop: 'drop', prepend: 'prepend' });

	function parseArgs(argv) {
	const args = argv.slice(2);
	let contextN = 4;
	let dropMode = DROP_MODES.drop;
	const files = [];

	for (let i = 0; i < args.length; i++) {
	const a = args[i];
	if (a === '-n' \|\| a === '--context') {
	contextN = parseInt(args[++i], 10);
	if (!Number.isInteger(contextN) \|\| contextN < 0) {
	console.error('compress: -n/--context must be a non-negative integer');
	process.exit(1);
	}
	} else if (a === '--drop') {
	dropMode = DROP_MODES.drop;
	} else if (a === '--prepend') {
	dropMode = DROP_MODES.prepend;
	} else if (a === '--help' \|\| a === '-h') {
	console.error(`Usage: node compress.js [options] file1 [file2 ...]
	-n N, --context N Lines of context before each unique line (default: 4)
	--drop Omit dropped lines (default)
	--prepend Keep dropped lines, prepend with "COMPRESS: " (for testing)
	--help, -h Show this help`);
	process.exit(0);
	} else {
	files.push(a);
	}
	}

	return { contextN, dropMode, files };
	}

	function readLines(files) {
	const lines = [];
	for (const file of files) {
	if (!fs.existsSync(file)) {
	console.error(`compress: File not found: ${file}`);
	process.exit(1);
	}
	const content = fs.readFileSync(file, 'utf8');
	const fileLines = content.split(/\r?\n/).map((line) => line.replace(/\r$/, ''));
	// If file didn't end with newline, last element might be ''; keep as-is so we can preserve length
	lines.push(...fileLines);
	}
	return lines;
	}

	function run(lines, contextN, dropMode) {
	const L = lines.length;
	if (L === 0) return [];

	const isBlank = (line) => line.trim() === '';

	// Phase 1: frequency and first occurrence
	const freq = new Map();
	const first = new Map();
	for (let i = 0; i < L; i++) {
	const s = lines[i];
	freq.set(s, (freq.get(s) ?? 0) + 1);
	if (!first.has(s)) first.set(s, i);
	}

	// Phase 2: mark indices to keep
	const keep = new Set();

	// 2a. Unique lines
	for (let i = 0; i < L; i++) {
	if (freq.get(lines[i]) === 1) keep.add(i);
	}

	// 2b. Context before each unique line
	for (let i = 0; i < L; i++) {
	if (freq.get(lines[i]) !== 1) continue;
	const start = Math.max(0, i - contextN);
	for (let j = start; j < i; j++) keep.add(j);
	}

	// 2c. First occurrence of each duplicate
	for (const [s, count] of freq) {
	if (count > 1) keep.add(first.get(s));
	}

	// 2d. Never deduplicate blank/whitespace-only lines: always keep them
	for (let i = 0; i < L; i++) {
	if (isBlank(lines[i])) keep.add(i);
	}

	// Phase 3: emit in order (default and --drop: skip dropped lines; --prepend: prefix with "COMPRESS: ")
	const out = [];
	for (let i = 0; i < L; i++) {
	if (keep.has(i)) {
	out.push(lines[i]);
	} else if (dropMode === DROP_MODES.prepend) {
	out.push('COMPRESS: ' + lines[i]);
	}
	}
	return out;
	}

	const { contextN, dropMode, files } = parseArgs(process.argv);

	if (files.length === 0) {
	console.error('Usage: node compress.js [options] file1 [file2 ...]\n Use --help for options.');
	process.exit(1);
	}

	const lines = readLines(files);
	const result = run(lines, contextN, dropMode);

	const keptCount =
	dropMode === DROP_MODES.prepend
	? result.filter((l) => !l.startsWith('COMPRESS: ')).length
	: result.length;
	const droppedCount = lines.length - keptCount;

	const inputChars = lines.join('\n').length;
	const outputChars = (result.join('\n') + (result.length ? '\n' : '')).length;

	const summaryDetail =
	dropMode === DROP_MODES.prepend
	? `(${keptCount} kept, ${droppedCount} with COMPRESS: prefix)`
	: `(${keptCount} kept, ${droppedCount} dropped)`;

	console.error('Summary:');
	console.error(' Input files: ', files.length);
	console.error(' Input lines: ', lines.length);
	console.error(' Input chars: ', inputChars);
	console.error(' Output lines: ', result.length, summaryDetail);
	console.error(' Output chars: ', outputChars);
	console.error('');

	process.stdout.write(result.join('\n') + (result.length ? '\n' : ''));
	process.exit(0);