Skip to content

Instantly share code, notes, and snippets.

@varenc
Last active March 7, 2026 03:59
Show Gist options
  • Select an option

  • Save varenc/00995ae0e35c89504e989aaebbb09d24 to your computer and use it in GitHub Desktop.

Select an option

Save varenc/00995ae0e35c89504e989aaebbb09d24 to your computer and use it in GitHub Desktop.
A semi parser-based comment stripper for zsh that preserves indentation. Uses the Pygments Lexer, which is meant for Bash, but works in ZSH too. Hacks employed, will break in some situations, but works on my like ~25k lines of shell script.
#!/usr/bin/env python3
### Zsh Comments Stripper ###
# semi parser-based comment stripper for zsh that preserves indentation.
# Uses the Pygments Lexer, which is meant for Bash, but works in ZSH too.
# Hacks employed, will break in some situations, but works on my like ~25k lines of shell script.
#######
import sys
import argparse
from pygments.lexers.shell import BashLexer
from pygments.token import Token
def strip_shell_comments(code):
lexer = BashLexer()
processed_lines = []
for line in code.splitlines():
# 1. ZSH GLOB FLAG PROTECTION
# If the line contains '(#', it's likely a glob flag that BashLexer kills.
# We handle these lines with a 'safe' manual split.
if "(#" in line:
# We only strip if there is a ' #' (space then hash)
# to avoid cutting inside the glob flag itself.
if " #" in line:
code_part = line.rsplit(" #", 1)[0]
processed_lines.append(code_part.rstrip())
else:
processed_lines.append(line.rstrip())
continue
# 2. SURGICAL LEXER STRIP
tokens = list(lexer.get_tokens(line + '\n'))
# Check if the line has actual executable content
has_code = any(
ttype not in Token.Comment and
ttype not in Token.Text.Whitespace and
value.strip() != ""
for ttype, value in tokens
)
if not has_code:
processed_lines.append("")
else:
line_result = "".join(
v for t, v in tokens
if t not in Token.Comment or t is Token.Comment.Hashbang
)
processed_lines.append(line_result.rstrip())
# 3. VERTICAL WHITESPACE COLLAPSING
final_output = []
prev_was_empty = False
for line in processed_lines:
current_is_empty = (line.strip() == "")
if current_is_empty:
if not prev_was_empty:
final_output.append("")
prev_was_empty = True
continue
final_output.append(line)
prev_was_empty = False
return "\n".join(final_output)
def main():
parser = argparse.ArgumentParser(description="zsh comment stripper, usually works")
parser.add_argument("input", help="Source script")
parser.add_argument("-o", "--output", help="Output file (default: <STDOUT>)")
args = parser.parse_args()
try:
with open(args.input, 'r', encoding='utf-8') as f:
result = strip_shell_comments(f.read())
if args.output:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(result + '\n')
else:
# Using buffer.write to avoid encoding issues with pipes
sys.stdout.buffer.write((result + '\n').encode('utf-8'))
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment