Created
August 23, 2025 15:21
-
-
Save popmonkey/c06e491e5e8c45274ae3432c06ddcf46 to your computer and use it in GitHub Desktop.
file system inventory/summary
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # ============================================================================== | |
| # Filesystem Inventory Processor | |
| # | |
| # Description: | |
| # A self-contained script to process the output of `find ... -ls`. | |
| # It accepts input from a file or stdin, displays a real-time line count | |
| # on the terminal, and writes a clean, color-free report to a specified | |
| # output file. | |
| # | |
| # Usage: | |
| # # From a file | |
| # ./inventory_processor.sh /path/to/find_output.txt /path/to/report.txt | |
| # | |
| # # From a pipe (e.g., local scan) | |
| # sudo find / -ls | ./inventory_processor.sh - /path/to/report.txt | |
| # | |
| # # From a remote pipe | |
| # ssh user@host "find . -ls" | ./inventory_processor.sh - /path/to/report.txt | |
| # | |
| # Author: | |
| # popmonkey & Gemini | |
| # ============================================================================== | |
| # --- Embedded AWK Script --- | |
| # The core processing logic is contained within this AWK script. | |
| # It is passed to the awk command via a variable. | |
| read -r -d '' AWK_SCRIPT <<'EOF' | |
| BEGIN { | |
| # --- Configuration --- | |
| SIZE_THRESHOLD_GB = 1 | |
| FILE_COUNT_THRESHOLD = 1000 | |
| SIZE_THRESHOLD_BYTES = SIZE_THRESHOLD_GB * 1024 * 1024 * 1024 | |
| # --- ANSI Color Codes --- | |
| CYAN = "\033[0;36m" | |
| GREEN = "\033[0;32m" | |
| WHITE = "\033[1;37m" | |
| NC = "\033[0m" | |
| print CYAN "==> Processing file list..." NC > "/dev/stderr" | |
| } | |
| { | |
| # Print a progress update to stderr every 10,000 lines. | |
| # The carriage return `\r` keeps the output on a single line. | |
| if (NR % 10000 == 0) { | |
| printf "%s==> Processed %'d lines...\r%s", CYAN, NR, NC > "/dev/stderr" | |
| } | |
| file_size = $7 | |
| full_path = $11 | |
| if (substr($3, 1, 1) == "-") { | |
| if (file_size > SIZE_THRESHOLD_BYTES) { | |
| large_files[full_path] = file_size | |
| } | |
| parent_dir = full_path | |
| sub(/\/[^\/]*$/, "", parent_dir) | |
| if (parent_dir == "") { parent_dir = "/" } | |
| dir_file_counts[parent_dir]++ | |
| dir_sizes[parent_dir] += file_size | |
| } | |
| } | |
| END { | |
| # Clear the progress line and print the final count. | |
| printf "%s==> Processed %'d total lines. Calculating sizes...%s\n", CYAN, NR, NC > "/dev/stderr" | |
| TMP_FILE = "/tmp/awk_inv_sort." PROCINFO["pid"] | |
| for (dir in dir_sizes) { | |
| print dir > TMP_FILE | |
| } | |
| close(TMP_FILE) | |
| cmd = "sort -r " TMP_FILE | |
| while ((cmd | getline sorted_dir) > 0) { | |
| parent_dir = sorted_dir | |
| sub(/\/[^\/]*$/, "", parent_dir) | |
| if (parent_dir != "" && parent_dir != sorted_dir) { | |
| dir_sizes[parent_dir] += dir_sizes[sorted_dir] | |
| } | |
| } | |
| close(cmd) | |
| system("rm " TMP_FILE) | |
| print CYAN "==> Generating summary..." NC > "/dev/stderr" | |
| print "\n" GREEN "--- Directories with total size > " SIZE_THRESHOLD_GB "GB (sorted largest to smallest) ---" NC | |
| cmd = "sort -k1,1nr" | |
| for (dir in dir_sizes) { | |
| if (dir_sizes[dir] > SIZE_THRESHOLD_BYTES) { | |
| gigs = dir_sizes[dir] / (1024*1024*1024) | |
| printf "%.2f GB\t%s\n", gigs, dir | cmd | |
| } | |
| } | |
| close(cmd) | |
| print "\n" GREEN "--- Directories with > " FILE_COUNT_THRESHOLD " files (direct children only) ---" NC | |
| cmd = "sort -k1,1nr" | |
| for (dir in dir_file_counts) { | |
| if (dir_file_counts[dir] >= FILE_COUNT_THRESHOLD) { | |
| printf "%d files\t- %s\n", dir_file_counts[dir], dir | cmd | |
| } | |
| } | |
| close(cmd) | |
| print "\n" GREEN "--- Individual files > " SIZE_THRESHOLD_GB "GB (sorted largest to smallest) ---" NC | |
| cmd = "sort -k1,1nr" | |
| for (file in large_files) { | |
| gigs = large_files[file] / (1024*1024*1024) | |
| printf "%.2f GB\t%s\n", gigs, file | cmd | |
| } | |
| close(cmd) | |
| } | |
| EOF | |
| # --- Main Script Logic --- | |
| # 1. Validate Input | |
| if [ "$#" -ne 2 ]; then | |
| echo "Error: Incorrect number of arguments." >&2 | |
| echo "Usage: $0 <input_file_or_dash> <output_file>" >&2 | |
| exit 1 | |
| fi | |
| INPUT_SOURCE="$1" | |
| OUTPUT_FILE="$2" | |
| # Use /dev/stdin if the input source is a dash '-'. | |
| if [ "$INPUT_SOURCE" = "-" ]; then | |
| INPUT_SOURCE="/dev/stdin" | |
| elif [ ! -f "$INPUT_SOURCE" ]; then | |
| echo "Error: Input file not found at '$INPUT_SOURCE'" >&2 | |
| exit 1 | |
| fi | |
| # 2. Execute and Process | |
| # Process the data: | |
| # - `cat` reads from the specified source (file or stdin). | |
| # - The `awk` command runs our main inventory logic, printing progress to stderr. | |
| # - The `sed` command strips color codes from stdout for the output file. | |
| cat "$INPUT_SOURCE" | awk "$AWK_SCRIPT" | sed -r 's/\x1b\[[0-9;]*m//g' > "$OUTPUT_FILE" | |
| # 3. Final Message | |
| echo -e "\033[0;36m==> Inventory complete! Report saved to '$OUTPUT_FILE'\033[0m" >&2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment