Skip to content

Instantly share code, notes, and snippets.

@jvacek
Last active September 26, 2025 11:27
Show Gist options
  • Select an option

  • Save jvacek/888f840c8acd703429e21d8dec70f531 to your computer and use it in GitHub Desktop.

Select an option

Save jvacek/888f840c8acd703429e21d8dec70f531 to your computer and use it in GitHub Desktop.

This little script will help you make a spelling mistake leaderboard in your repo.

It will also help you find most common 10 misspelled words

Use

  • You will first need to have uv installed.
  • cd into your repository's root
  • (optional but recommended) Set up a typos config, and run against it a few times to check for false positives etc. (uvx typos)
  • Run the script
    • Run directly using

      uvx typos --format json --no-check-filenames | uv run ./blame_counter.py

    • Download the script first and then run

      uvx typos --format json --no-check-filenames | uv run https://gist.githubusercontent.com/jvacek/888f840c8acd703429e21d8dec70f531/raw/blame_counter.py

You can use the following flags to modify the output:

  • --skip-authors
  • --skip-words
  • --max-words <int> (default is 10, use 0 to print all)
#!/usr/bin/env python3
# /// script
# dependencies = [
# "tqdm",
# ]
# ///
# Run as follows:
# uvx typos --format json --no-check-filenames | uv run ./blame_counter.py
# or
# uvx typos --format json --no-check-filenames | uv run https://gist.githubusercontent.com/jvacek/888f840c8acd703429e21d8dec70f531/raw/blame_counter.py
#
# Command-line options:
# --skip-authors Skip the authors leaderboard
# --skip-words Skip the misspelled words leaderboard
# --max-words N Maximum number of words to display (0 = all)
import argparse
import json
import re
import subprocess
import sys
from collections import Counter
from tqdm import tqdm
def get_author_email_for_line(filepath, line_number):
"""
Runs `git blame` on a specific file and line number to find the author's email.
Args:
filepath (str): The path to the file.
line_number (str): The line number to check.
Returns:
str: The email of the author, or None if it could not be determined.
"""
try:
# The --line-porcelain format is stable and easy to parse
command = [
"git",
"blame",
"--line-porcelain",
f"-L{line_number},{line_number}",
filepath,
]
# Execute the git blame command
result = subprocess.run(
command,
capture_output=True,
text=True,
check=True,
encoding="utf-8", # Ensure consistent encoding
)
# Find the line starting with 'author-mail' and extract the email
for line in result.stdout.splitlines():
if line.startswith("author-mail "):
# The email is enclosed in <...>
email_match = re.search(r"<(.*)>", line)
if email_match:
return email_match.group(1).strip()
except FileNotFoundError:
# This error occurs if the 'git' command is not found
print(
"Error: 'git' command not found. Is Git installed and in your PATH?",
file=sys.stderr,
)
# Exit the script if git isn't available
sys.exit(1)
except subprocess.CalledProcessError as e:
# This error occurs if git blame fails (e.g., file not in git, line number invalid)
print(
f"Warning: `git blame` failed for {filepath}:{line_number}. Error: {e.stderr.strip()}",
file=sys.stderr,
)
return None
except Exception as e:
print(
f"An unexpected error occurred while processing {filepath}:{line_number}: {e}",
file=sys.stderr,
)
return None
print(
f"Warning: Could not determine author for {filepath}:{line_number}",
file=sys.stderr,
)
return None
def main():
"""
Main function to read spellchecker output from stdin, process it,
and print summaries of mistakes per author and per misspelled word.
Command-line arguments:
--skip-authors: Skip displaying the authors leaderboard
--skip-words: Skip displaying the misspelled words leaderboard
--max-words <int>: Maximum number of words to display (0 = all)
"""
# Parse command line arguments
parser = argparse.ArgumentParser(
description="Process spelling mistakes and attribute them to authors."
)
parser.add_argument(
"--skip-authors", action="store_true", help="Skip the authors leaderboard"
)
parser.add_argument(
"--skip-words",
action="store_true",
help="Skip the misspelled words leaderboard",
)
parser.add_argument(
"--max-words",
type=int,
default=10,
help="Maximum number of words to display (0 = all)",
)
args = parser.parse_args()
author_counts = Counter()
word_counts = Counter()
print("Processing spellchecker output from stdin...", file=sys.stderr)
print("This may take a moment depending on the number of errors.", file=sys.stderr)
# Read each line from the standard input
for line in tqdm(sys.stdin):
if not line.strip(): # Skip empty lines
continue
try:
data = json.loads(line)
# Basic validation for required keys in the JSON object
if "path" not in data or "line_num" not in data or "typo" not in data:
print(
f"Warning: Skipping JSON object with missing keys: {line.strip()}",
file=sys.stderr,
)
continue
filepath = data["path"]
# The git blame function expects a string for the line number
line_number = str(data["line_num"])
misspelled_word = data["typo"]
# Track the count of each misspelled word
word_counts[misspelled_word] += 1
if not args.skip_authors:
author_email = get_author_email_for_line(filepath, line_number)
if author_email:
author_counts[author_email] += 1
except json.JSONDecodeError:
print(
f"Warning: Skipping malformed JSON line: {line.strip()}",
file=sys.stderr,
)
continue
# --- Print the final results ---
# Print author leaderboard if not skipped
if not args.skip_authors:
if not author_counts:
print("\nNo mistakes were successfully attributed to an author.")
else:
print("\n--- Spelling Mistake Leaderboard (by Email) ---")
for email, count in author_counts.most_common():
plural = "s" if count > 1 else ""
print(f"{email}: {count} mistake{plural}")
# Print word leaderboard if not skipped
if not args.skip_words:
if not word_counts:
print("\nNo misspelled words were found.")
else:
print("\n--- Most Common Misspellings ---")
# If max_words is specified and > 0, limit the number of words displayed
word_items = word_counts.most_common()
if args.max_words > 0:
word_items = word_items[: args.max_words]
for word, count in word_items:
plural = "s" if count > 1 else ""
print(f"'{word}': {count} time{plural}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment