Skip to content

Instantly share code, notes, and snippets.

@vargo
Created May 23, 2025 16:39
Show Gist options
  • Select an option

  • Save vargo/d2e766bf56a2544955f143d05df61ba2 to your computer and use it in GitHub Desktop.

Select an option

Save vargo/d2e766bf56a2544955f143d05df61ba2 to your computer and use it in GitHub Desktop.
# DISCLAIMER: USE AT OWN RISK
# SHOULD NOT BE HARMFULL, BUT CAN BE RESOUCE INTENSIVE
# THIS SCRIPT IS THE RESULT OF ME TINKERING IN THE TERMINAL TO CATCH WHAT FILES EAT UP MY DISKSPACE ON MACOS
# TESTED ON MACBOOK PRO M1, MACOS 15.3.2, iTerm2, FISH SHELL
# This function is for Fish Shell (but can be converted to other shells quite easily)
# I found the tools to discover large files on MacOS lacking, so this script can scan a path for files larger than size X (in GB)
# See `find-large --help`
######################################
# Declare global variables for temporary files
set -g tmpfile
set -g tmpfile_sorted
######################################
######################################
function cleanup_and_exit
echo "\nExiting..."
# Perform any necessary cleanup tasks here, such as removing temporary files
if test -n "$tmpfile"
rm -f $tmpfile
end
if test -n "$tmpfile_sorted"
rm -f $tmpfile_sorted
end
exit 1
end
######################################
# Set up exit trap globally for CTRL-C interrupt, ensuring it's active throughout the script's execution
trap cleanup_and_exit SIGINT
######################################
# main function
function find-large
######################################
# Initialize temporary files with unique names
set -g tmpfile (mktemp -t findlarge_unsorted)
set -g tmpfile_sorted (mktemp -t findlarge_sorted)
# Set default values
set path "/"
set min_gb 2
set format "table"
set exclude "/System/Volumes/Data/Volumes/NAS"
set outfile "find-large-output" #filename, extension driven by format value
# Boolean input args
set show_spinner 1 # Default is to show % progress
set sort_files 1 # Default is to sort by size descending
set debug 0 # Default is no debug
set dump_to_file 0 # default is not to write output to file
######################################
######################################
# Parse input arguments
set i 1
while test $i -le (count $argv)
set arg $argv[$i]
switch $arg
case '--help'
echo ""
echo "Usage: find-large --path [PATH] --min_gb [SIZE] [options]"
echo ""
echo "Options:"
echo " --path PATH Path to search (required)"
echo " --min_gb SIZE Minimum file size in GB (e.g. 10, 1, 0.1)"
echo " --format json|yaml|table Output format (default: table)"
echo " --exclude PATH Path to exclude (default: $exclude)"
echo " --dump-to-file Write output to file"
echo " --no-spinner Disable the spinner"
echo " --no-sort Do not sort results"
echo " --debug Enable debug mode"
echo " --help Show this help message"
return 0
case '--path'
if test (math $i + 1) -le (count $argv)
set i (math $i + 1)
set path $argv[$i]
else
echo "Missing value for --path"
return 1
end
case '--min_gb'
if test (math $i + 1) -le (count $argv)
set i (math $i + 1)
set min_gb $argv[$i]
else
echo "Missing value for --min_gb"
return 1
end
case '--format'
if test (math $i + 1) -le (count $argv)
set i (math $i + 1)
set format $argv[$i]
switch $format
case 'json' 'yaml' 'table'
# Valid format, continue
case '*'
echo "Invalid format: $format. Allowed values are: json, yaml, table."
return 1
end
else
echo "Missing value for --format"
return 1
end
case '--exclude'
if test (math $i + 1) -le (count $argv)
set i (math $i + 1)
set exclude $argv[$i]
else
echo "Missing value for --exclude"
return 1
end
case --path=\*
set path (string replace -- '--path=' '' -- $arg)
case --min_gb=\*
set min_gb (string replace -- '--min_gb=' '' -- $arg)
case --format=\*
set format (string replace -- '--format=' '' -- $arg)
switch $format
case 'json' 'yaml' 'table'
# Valid format, continue
case '*'
echo "Invalid format: $format. Allowed values are: json, yaml, table."
return 1
end
case --exclude=\*
set exclude (string replace -- '--exclude=' '' -- $arg)
case '--no-spinner'
set show_spinner 0
case '--no-sort'
set sort_files 0
case '--debug'
set debug 1
case '--dump-to-file'
set dump_to_file 1
case '*'
echo "Unknown option: $arg"
return 1
end
set i (math $i + 1)
end
######################################
######################################
set min_size_bytes (math "ceil($min_gb * 1024 * 1024 * 1024)")
echo "Minimal size in bytes is: $min_size_bytes"
set sizes
set paths
echo "Please wait, getting ready..."
######################################
# Retrieve total nr of files if progress needs to be shown
if test $show_spinner -eq 1
set total_files 0
# First, try to get total files by meta-data
set total_files (mdfind -onlyin $path "kMDItemFSName = '*'" 2>/dev/null | wc -l | tr -d ' ')
# If no result, fallback method with find
if test $total_files -eq 0
# Use find to count total files and suppress output
find $path -type f 2>&1 | while read
set total_files (math $total_files + 1)
end > /dev/null
end
if test $total_files -eq 0
echo "No files found in the specified path."
return 0
else
echo "$total_files files found in the specified path."
end
end
######################################
######################################
# SCANNING BIT
set current_file 0
find $path -type f 2>&1 | \
grep -v "Permission denied" | \
grep -v "Operation not permitted" | \
grep -v "No such file or directory" | \
grep -v "^$exclude" | \
while read file
if test -f "$file"
if test $show_spinner -eq 1
# Update progress
set current_file (math $current_file + 1)
echo -ne "\rScanning... | Progress: $current_file / $total_files ($(math "floor((100.0 * $current_file) / $total_files)")%)"
end
set size_bytes (stat -f %z "$file")
if test $size_bytes -ge $min_size_bytes
set size_gb (math --scale=2 "$size_bytes / 1024 / 1024 / 1024")
set sizes $sizes $size_gb
set paths $paths $file
# write result line
echo -e "$size_gb\t$file" >> $tmpfile
end
end
end
if test $debug -eq 1
echo -ne "\rContents dump of tmp file: \n"
cat $tmpfile
end
echo -ne "\rDone scanning! \n"
######################################
######################################
# SORT RESULT
if test $sort_files -eq 1
echo "Sorting results and saving to tmpfile_sorted."
sort -nr $tmpfile > $tmpfile_sorted
else
echo "Not sorting results; using tmpfile as sorted output."
cp $tmpfile $tmpfile_sorted
end
######################################
######################################
# Output functions
function _output_table
set tmpfile_sorted $argv[1]
printf "%-10s %s\n" "Size (GB)" "Path"
printf "%-10s %s\n" "---------" "----"
for line in (cat $tmpfile_sorted)
set size (echo $line | cut -f1)
set path (echo $line | cut -f2-)
printf "%-10s %s\n" $size $path
end
end
function _output_json
set tmpfile_sorted $argv[1]
echo "["
set idx 1
cat $tmpfile_sorted | while read line
set size (echo $line | cut -f1)
set path (string escape -- (echo $line | cut -f2-))
echo -n " {\"size_gb\": $size, \"path\": \"$path\"}"
if test $idx -lt (wc -l < $tmpfile_sorted)
echo ","
else
echo
end
set idx (math $idx + 1)
end
echo "]"
end
function _output_yaml
set tmpfile_sorted $argv[1]
cat $tmpfile_sorted | while read line
set size (echo $line | cut -f1)
set path (string escape -- (echo $line | cut -f2-))
echo "- size_gb: $size"
echo " path: \"$path\""
end
end
switch $format
case table
_output_table $tmpfile_sorted
case json
_output_json $tmpfile_sorted
case yaml
_output_yaml $tmpfile_sorted
end
# Correctly format and save output to file
if test $dump_to_file -eq 1
switch $format
case table
_output_table $tmpfile_sorted > "$outfile.txt"
case json
_output_json $tmpfile_sorted > "$outfile.json"
case yaml
_output_yaml $tmpfile_sorted > "$outfile.yaml"
end
echo "Saved output to $outfile"
end
# Clean up
if test -e "$tmpfile"
rm $tmpfile
end
if test -e "$tmpfile_sorted"
rm $tmpfile_sorted
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment