Created
May 23, 2025 16:39
-
-
Save vargo/d2e766bf56a2544955f143d05df61ba2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # DISCLAIMER: USE AT OWN RISK | |
| # SHOULD NOT BE HARMFULL, BUT CAN BE RESOUCE INTENSIVE | |
| # THIS SCRIPT IS THE RESULT OF ME TINKERING IN THE TERMINAL TO CATCH WHAT FILES EAT UP MY DISKSPACE ON MACOS | |
| # TESTED ON MACBOOK PRO M1, MACOS 15.3.2, iTerm2, FISH SHELL | |
| # This function is for Fish Shell (but can be converted to other shells quite easily) | |
| # I found the tools to discover large files on MacOS lacking, so this script can scan a path for files larger than size X (in GB) | |
| # See `find-large --help` | |
| ###################################### | |
| # Declare global variables for temporary files | |
| set -g tmpfile | |
| set -g tmpfile_sorted | |
| ###################################### | |
| ###################################### | |
| function cleanup_and_exit | |
| echo "\nExiting..." | |
| # Perform any necessary cleanup tasks here, such as removing temporary files | |
| if test -n "$tmpfile" | |
| rm -f $tmpfile | |
| end | |
| if test -n "$tmpfile_sorted" | |
| rm -f $tmpfile_sorted | |
| end | |
| exit 1 | |
| end | |
| ###################################### | |
| # Set up exit trap globally for CTRL-C interrupt, ensuring it's active throughout the script's execution | |
| trap cleanup_and_exit SIGINT | |
| ###################################### | |
| # main function | |
| function find-large | |
| ###################################### | |
| # Initialize temporary files with unique names | |
| set -g tmpfile (mktemp -t findlarge_unsorted) | |
| set -g tmpfile_sorted (mktemp -t findlarge_sorted) | |
| # Set default values | |
| set path "/" | |
| set min_gb 2 | |
| set format "table" | |
| set exclude "/System/Volumes/Data/Volumes/NAS" | |
| set outfile "find-large-output" #filename, extension driven by format value | |
| # Boolean input args | |
| set show_spinner 1 # Default is to show % progress | |
| set sort_files 1 # Default is to sort by size descending | |
| set debug 0 # Default is no debug | |
| set dump_to_file 0 # default is not to write output to file | |
| ###################################### | |
| ###################################### | |
| # Parse input arguments | |
| set i 1 | |
| while test $i -le (count $argv) | |
| set arg $argv[$i] | |
| switch $arg | |
| case '--help' | |
| echo "" | |
| echo "Usage: find-large --path [PATH] --min_gb [SIZE] [options]" | |
| echo "" | |
| echo "Options:" | |
| echo " --path PATH Path to search (required)" | |
| echo " --min_gb SIZE Minimum file size in GB (e.g. 10, 1, 0.1)" | |
| echo " --format json|yaml|table Output format (default: table)" | |
| echo " --exclude PATH Path to exclude (default: $exclude)" | |
| echo " --dump-to-file Write output to file" | |
| echo " --no-spinner Disable the spinner" | |
| echo " --no-sort Do not sort results" | |
| echo " --debug Enable debug mode" | |
| echo " --help Show this help message" | |
| return 0 | |
| case '--path' | |
| if test (math $i + 1) -le (count $argv) | |
| set i (math $i + 1) | |
| set path $argv[$i] | |
| else | |
| echo "Missing value for --path" | |
| return 1 | |
| end | |
| case '--min_gb' | |
| if test (math $i + 1) -le (count $argv) | |
| set i (math $i + 1) | |
| set min_gb $argv[$i] | |
| else | |
| echo "Missing value for --min_gb" | |
| return 1 | |
| end | |
| case '--format' | |
| if test (math $i + 1) -le (count $argv) | |
| set i (math $i + 1) | |
| set format $argv[$i] | |
| switch $format | |
| case 'json' 'yaml' 'table' | |
| # Valid format, continue | |
| case '*' | |
| echo "Invalid format: $format. Allowed values are: json, yaml, table." | |
| return 1 | |
| end | |
| else | |
| echo "Missing value for --format" | |
| return 1 | |
| end | |
| case '--exclude' | |
| if test (math $i + 1) -le (count $argv) | |
| set i (math $i + 1) | |
| set exclude $argv[$i] | |
| else | |
| echo "Missing value for --exclude" | |
| return 1 | |
| end | |
| case --path=\* | |
| set path (string replace -- '--path=' '' -- $arg) | |
| case --min_gb=\* | |
| set min_gb (string replace -- '--min_gb=' '' -- $arg) | |
| case --format=\* | |
| set format (string replace -- '--format=' '' -- $arg) | |
| switch $format | |
| case 'json' 'yaml' 'table' | |
| # Valid format, continue | |
| case '*' | |
| echo "Invalid format: $format. Allowed values are: json, yaml, table." | |
| return 1 | |
| end | |
| case --exclude=\* | |
| set exclude (string replace -- '--exclude=' '' -- $arg) | |
| case '--no-spinner' | |
| set show_spinner 0 | |
| case '--no-sort' | |
| set sort_files 0 | |
| case '--debug' | |
| set debug 1 | |
| case '--dump-to-file' | |
| set dump_to_file 1 | |
| case '*' | |
| echo "Unknown option: $arg" | |
| return 1 | |
| end | |
| set i (math $i + 1) | |
| end | |
| ###################################### | |
| ###################################### | |
| set min_size_bytes (math "ceil($min_gb * 1024 * 1024 * 1024)") | |
| echo "Minimal size in bytes is: $min_size_bytes" | |
| set sizes | |
| set paths | |
| echo "Please wait, getting ready..." | |
| ###################################### | |
| # Retrieve total nr of files if progress needs to be shown | |
| if test $show_spinner -eq 1 | |
| set total_files 0 | |
| # First, try to get total files by meta-data | |
| set total_files (mdfind -onlyin $path "kMDItemFSName = '*'" 2>/dev/null | wc -l | tr -d ' ') | |
| # If no result, fallback method with find | |
| if test $total_files -eq 0 | |
| # Use find to count total files and suppress output | |
| find $path -type f 2>&1 | while read | |
| set total_files (math $total_files + 1) | |
| end > /dev/null | |
| end | |
| if test $total_files -eq 0 | |
| echo "No files found in the specified path." | |
| return 0 | |
| else | |
| echo "$total_files files found in the specified path." | |
| end | |
| end | |
| ###################################### | |
| ###################################### | |
| # SCANNING BIT | |
| set current_file 0 | |
| find $path -type f 2>&1 | \ | |
| grep -v "Permission denied" | \ | |
| grep -v "Operation not permitted" | \ | |
| grep -v "No such file or directory" | \ | |
| grep -v "^$exclude" | \ | |
| while read file | |
| if test -f "$file" | |
| if test $show_spinner -eq 1 | |
| # Update progress | |
| set current_file (math $current_file + 1) | |
| echo -ne "\rScanning... | Progress: $current_file / $total_files ($(math "floor((100.0 * $current_file) / $total_files)")%)" | |
| end | |
| set size_bytes (stat -f %z "$file") | |
| if test $size_bytes -ge $min_size_bytes | |
| set size_gb (math --scale=2 "$size_bytes / 1024 / 1024 / 1024") | |
| set sizes $sizes $size_gb | |
| set paths $paths $file | |
| # write result line | |
| echo -e "$size_gb\t$file" >> $tmpfile | |
| end | |
| end | |
| end | |
| if test $debug -eq 1 | |
| echo -ne "\rContents dump of tmp file: \n" | |
| cat $tmpfile | |
| end | |
| echo -ne "\rDone scanning! \n" | |
| ###################################### | |
| ###################################### | |
| # SORT RESULT | |
| if test $sort_files -eq 1 | |
| echo "Sorting results and saving to tmpfile_sorted." | |
| sort -nr $tmpfile > $tmpfile_sorted | |
| else | |
| echo "Not sorting results; using tmpfile as sorted output." | |
| cp $tmpfile $tmpfile_sorted | |
| end | |
| ###################################### | |
| ###################################### | |
| # Output functions | |
| function _output_table | |
| set tmpfile_sorted $argv[1] | |
| printf "%-10s %s\n" "Size (GB)" "Path" | |
| printf "%-10s %s\n" "---------" "----" | |
| for line in (cat $tmpfile_sorted) | |
| set size (echo $line | cut -f1) | |
| set path (echo $line | cut -f2-) | |
| printf "%-10s %s\n" $size $path | |
| end | |
| end | |
| function _output_json | |
| set tmpfile_sorted $argv[1] | |
| echo "[" | |
| set idx 1 | |
| cat $tmpfile_sorted | while read line | |
| set size (echo $line | cut -f1) | |
| set path (string escape -- (echo $line | cut -f2-)) | |
| echo -n " {\"size_gb\": $size, \"path\": \"$path\"}" | |
| if test $idx -lt (wc -l < $tmpfile_sorted) | |
| echo "," | |
| else | |
| echo | |
| end | |
| set idx (math $idx + 1) | |
| end | |
| echo "]" | |
| end | |
| function _output_yaml | |
| set tmpfile_sorted $argv[1] | |
| cat $tmpfile_sorted | while read line | |
| set size (echo $line | cut -f1) | |
| set path (string escape -- (echo $line | cut -f2-)) | |
| echo "- size_gb: $size" | |
| echo " path: \"$path\"" | |
| end | |
| end | |
| switch $format | |
| case table | |
| _output_table $tmpfile_sorted | |
| case json | |
| _output_json $tmpfile_sorted | |
| case yaml | |
| _output_yaml $tmpfile_sorted | |
| end | |
| # Correctly format and save output to file | |
| if test $dump_to_file -eq 1 | |
| switch $format | |
| case table | |
| _output_table $tmpfile_sorted > "$outfile.txt" | |
| case json | |
| _output_json $tmpfile_sorted > "$outfile.json" | |
| case yaml | |
| _output_yaml $tmpfile_sorted > "$outfile.yaml" | |
| end | |
| echo "Saved output to $outfile" | |
| end | |
| # Clean up | |
| if test -e "$tmpfile" | |
| rm $tmpfile | |
| end | |
| if test -e "$tmpfile_sorted" | |
| rm $tmpfile_sorted | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment