Last active
March 7, 2026 03:59
-
-
Save varenc/00995ae0e35c89504e989aaebbb09d24 to your computer and use it in GitHub Desktop.
A semi parser-based comment stripper for zsh that preserves indentation. Uses the Pygments Lexer, which is meant for Bash, but works in ZSH too. Hacks employed, will break in some situations, but works on my like ~25k lines of shell script.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| ### Zsh Comments Stripper ### | |
| # semi parser-based comment stripper for zsh that preserves indentation. | |
| # Uses the Pygments Lexer, which is meant for Bash, but works in ZSH too. | |
| # Hacks employed, will break in some situations, but works on my like ~25k lines of shell script. | |
| ####### | |
| import sys | |
| import argparse | |
| from pygments.lexers.shell import BashLexer | |
| from pygments.token import Token | |
| def strip_shell_comments(code): | |
| lexer = BashLexer() | |
| processed_lines = [] | |
| for line in code.splitlines(): | |
| # 1. ZSH GLOB FLAG PROTECTION | |
| # If the line contains '(#', it's likely a glob flag that BashLexer kills. | |
| # We handle these lines with a 'safe' manual split. | |
| if "(#" in line: | |
| # We only strip if there is a ' #' (space then hash) | |
| # to avoid cutting inside the glob flag itself. | |
| if " #" in line: | |
| code_part = line.rsplit(" #", 1)[0] | |
| processed_lines.append(code_part.rstrip()) | |
| else: | |
| processed_lines.append(line.rstrip()) | |
| continue | |
| # 2. SURGICAL LEXER STRIP | |
| tokens = list(lexer.get_tokens(line + '\n')) | |
| # Check if the line has actual executable content | |
| has_code = any( | |
| ttype not in Token.Comment and | |
| ttype not in Token.Text.Whitespace and | |
| value.strip() != "" | |
| for ttype, value in tokens | |
| ) | |
| if not has_code: | |
| processed_lines.append("") | |
| else: | |
| line_result = "".join( | |
| v for t, v in tokens | |
| if t not in Token.Comment or t is Token.Comment.Hashbang | |
| ) | |
| processed_lines.append(line_result.rstrip()) | |
| # 3. VERTICAL WHITESPACE COLLAPSING | |
| final_output = [] | |
| prev_was_empty = False | |
| for line in processed_lines: | |
| current_is_empty = (line.strip() == "") | |
| if current_is_empty: | |
| if not prev_was_empty: | |
| final_output.append("") | |
| prev_was_empty = True | |
| continue | |
| final_output.append(line) | |
| prev_was_empty = False | |
| return "\n".join(final_output) | |
| def main(): | |
| parser = argparse.ArgumentParser(description="zsh comment stripper, usually works") | |
| parser.add_argument("input", help="Source script") | |
| parser.add_argument("-o", "--output", help="Output file (default: <STDOUT>)") | |
| args = parser.parse_args() | |
| try: | |
| with open(args.input, 'r', encoding='utf-8') as f: | |
| result = strip_shell_comments(f.read()) | |
| if args.output: | |
| with open(args.output, 'w', encoding='utf-8') as f: | |
| f.write(result + '\n') | |
| else: | |
| # Using buffer.write to avoid encoding issues with pipes | |
| sys.stdout.buffer.write((result + '\n').encode('utf-8')) | |
| except Exception as e: | |
| print(f"Error: {e}", file=sys.stderr) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment