varenc · March 7, 2026 03:59
diff --git a/zsh-strip-comments.sh b/zsh-strip-comments.sh
 #!/usr/bin/env python3


 ###  Zsh Comments Stripper ###
 # semi parser-based comment stripper for zsh that preserves indentation. 
 # Uses the Pygments Lexer, which is meant for Bash, but works in ZSH too. 
 # Hacks employed, will break in some situations, but works on my like ~25k lines of shell script.
 #######

 import sys
 import argparse
 from pygments.lexers.shell import BashLexer
 from pygments.token import Token

 def strip_shell_comments(code):
    lexer = BashLexer()
    processed_lines = []
    
    for line in code.splitlines():
        # 1. ZSH GLOB FLAG PROTECTION
        # If the line contains '(#', it's likely a glob flag that BashLexer kills.
        # We handle these lines with a 'safe' manual split.
        if "(#" in line:
            # We only strip if there is a ' #' (space then hash) 
            # to avoid cutting inside the glob flag itself.
            if " #" in line:
                code_part = line.rsplit(" #", 1)[0]
                processed_lines.append(code_part.rstrip())
            else:
                processed_lines.append(line.rstrip())
            continue

        # 2. SURGICAL LEXER STRIP
        tokens = list(lexer.get_tokens(line + '\n'))
        
        # Check if the line has actual executable content
        has_code = any(
            ttype not in Token.Comment and 
            ttype not in Token.Text.Whitespace and 
            value.strip() != ""
            for ttype, value in tokens
        )
        
        if not has_code:
            processed_lines.append("")
        else:
            line_result = "".join(
                v for t, v in tokens 
                if t not in Token.Comment or t is Token.Comment.Hashbang
            )
            processed_lines.append(line_result.rstrip())

    # 3. VERTICAL WHITESPACE COLLAPSING
    final_output = []
    prev_was_empty = False
    for line in processed_lines:
        current_is_empty = (line.strip() == "")
        if current_is_empty:
            if not prev_was_empty:
                final_output.append("")
                prev_was_empty = True
            continue
        final_output.append(line)
        prev_was_empty = False

    return "\n".join(final_output)

 def main():
    parser = argparse.ArgumentParser(description="zsh comment stripper, usually works")
    parser.add_argument("input", help="Source script")
    parser.add_argument("-o", "--output", help="Output file (default: <STDOUT>)")
    args = parser.parse_args()

    try:
        with open(args.input, 'r', encoding='utf-8') as f:
            result = strip_shell_comments(f.read())
        
        if args.output:
            with open(args.output, 'w', encoding='utf-8') as f:
                f.write(result + '\n')
        else:
            # Using buffer.write to avoid encoding issues with pipes
            sys.stdout.buffer.write((result + '\n').encode('utf-8'))
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3


	### Zsh Comments Stripper ###
	# semi parser-based comment stripper for zsh that preserves indentation.
	# Uses the Pygments Lexer, which is meant for Bash, but works in ZSH too.
	# Hacks employed, will break in some situations, but works on my like ~25k lines of shell script.
	#######

	import sys
	import argparse
	from pygments.lexers.shell import BashLexer
	from pygments.token import Token

	def strip_shell_comments(code):
	lexer = BashLexer()
	processed_lines = []

	for line in code.splitlines():
	# 1. ZSH GLOB FLAG PROTECTION
	# If the line contains '(#', it's likely a glob flag that BashLexer kills.
	# We handle these lines with a 'safe' manual split.
	if "(#" in line:
	# We only strip if there is a ' #' (space then hash)
	# to avoid cutting inside the glob flag itself.
	if " #" in line:
	code_part = line.rsplit(" #", 1)[0]
	processed_lines.append(code_part.rstrip())
	else:
	processed_lines.append(line.rstrip())
	continue

	# 2. SURGICAL LEXER STRIP
	tokens = list(lexer.get_tokens(line + '\n'))

	# Check if the line has actual executable content
	has_code = any(
	ttype not in Token.Comment and
	ttype not in Token.Text.Whitespace and
	value.strip() != ""
	for ttype, value in tokens
	)

	if not has_code:
	processed_lines.append("")
	else:
	line_result = "".join(
	v for t, v in tokens
	if t not in Token.Comment or t is Token.Comment.Hashbang
	)
	processed_lines.append(line_result.rstrip())

	# 3. VERTICAL WHITESPACE COLLAPSING
	final_output = []
	prev_was_empty = False
	for line in processed_lines:
	current_is_empty = (line.strip() == "")
	if current_is_empty:
	if not prev_was_empty:
	final_output.append("")
	prev_was_empty = True
	continue
	final_output.append(line)
	prev_was_empty = False

	return "\n".join(final_output)

	def main():
	parser = argparse.ArgumentParser(description="zsh comment stripper, usually works")
	parser.add_argument("input", help="Source script")
	parser.add_argument("-o", "--output", help="Output file (default: <STDOUT>)")
	args = parser.parse_args()

	try:
	with open(args.input, 'r', encoding='utf-8') as f:
	result = strip_shell_comments(f.read())

	if args.output:
	with open(args.output, 'w', encoding='utf-8') as f:
	f.write(result + '\n')
	else:
	# Using buffer.write to avoid encoding issues with pipes
	sys.stdout.buffer.write((result + '\n').encode('utf-8'))
	except Exception as e:
	print(f"Error: {e}", file=sys.stderr)

	if __name__ == "__main__":
	main()
No results found