Skip to content

Instantly share code, notes, and snippets.

@Limbicnation
Created February 26, 2025 11:31
Show Gist options
  • Select an option

  • Save Limbicnation/efa034c62e1e47bb82570c221c8385a4 to your computer and use it in GitHub Desktop.

Select an option

Save Limbicnation/efa034c62e1e47bb82570c221c8385a4 to your computer and use it in GitHub Desktop.
cleanup_cache.sh A specialized cache cleanup utility for Ubuntu 24.04 that targets development and application caches. This script efficiently removes cache files from various sources including pip, npm, yarn, Conda, Docker, CMake build directories, Hugging Face, PyTorch, TensorFlow, and browser caches. Features parallel processing, backup capab…
#!/bin/bash
# Enhanced cleanup_cache.sh for Ubuntu 24.04
# Security and efficiency improvements
# Enable error handling and safety features
set -e # Exit on error
set -u # Exit on undefined variables
set -o pipefail # Exit on pipe failures
# Additional security hardening
readonly SCRIPT_NAME=$(basename "$0")
readonly SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
readonly TIME_STAMP=$(date +%Y%m%d_%H%M%S)
# Script configuration (all variables set as readonly for security)
readonly BACKUP_ENABLED=${BACKUP_ENABLED:-1}
readonly BACKUP_DIR="${SCRIPT_DIR}/cache_backup_${TIME_STAMP}"
readonly LOG_FILE="${SCRIPT_DIR}/cleanup_${TIME_STAMP}.log"
readonly BACKUP_RETENTION_DAYS=7 # Auto-delete backups older than this
readonly TIMEOUT_DURATION=30 # Seconds to wait for user input
readonly MAX_BACKUP_SIZE_MB=1000 # Don't back up directories larger than this
# Runtime configuration
DRY_RUN=0
VERBOSE=0
FORCE=0
PARALLEL=0
# Security: Ensure we're running as the correct user (not root unless necessary)
if [[ $EUID -eq 0 ]]; then
echo "Warning: Running this script as root. Only individual operations will be elevated as needed."
fi
# Setup logging with rotation
mkdir -p "$(dirname "$LOG_FILE")"
exec > >(tee -a "$LOG_FILE") 2>&1
# Function to show help
show_help() {
echo "Cache Cleanup Script for Ubuntu 24.04"
echo "-----------------------------------"
echo "Usage: $SCRIPT_NAME [OPTIONS]"
echo ""
echo "Options:"
echo " -d Dry run (show what would be done)"
echo " -v Verbose output"
echo " -f Force cleanup (no confirmation prompts)"
echo " -p Enable parallel processing where possible"
echo " -n No backup (skip creating backups)"
echo " -h Show this help message"
echo ""
echo "The script will clean various caches, including pip, CMake, Hugging Face,"
echo "PyTorch, TensorFlow, NVIDIA, Jupyter, npm, yarn, and Docker."
exit 0
}
# Function for secure directory and file handling
secure_path() {
local path="$1"
# Replace any instances of '..' in the path
path="${path//\.\./}"
# Ensure path doesn't end in / (except for root)
[[ "$path" != "/" ]] && path="${path%/}"
# Ensure path is absolute if it starts with /
if [[ "$path" == /* ]]; then
path="$(readlink -f "$path" 2>/dev/null || echo "$path")"
fi
echo "$path"
}
# Function to estimate directory size in MB
get_dir_size_mb() {
local dir="$1"
if [[ -d "$dir" ]]; then
local size=$(du -sm "$dir" 2>/dev/null | cut -f1)
echo "${size:-0}"
else
echo "0"
fi
}
# Function to check if directory is too large for backup
is_too_large_for_backup() {
local dir="$1"
local size_mb=$(get_dir_size_mb "$dir")
[[ $size_mb -gt $MAX_BACKUP_SIZE_MB ]]
}
# Function to backup directories before deletion with size check
backup_cache() {
local dir="$1"
# Security: Sanitize path
dir=$(secure_path "$dir")
# Skip if backup is disabled
[[ $BACKUP_ENABLED -eq 0 ]] && return 0
# Skip if the directory doesn't exist
[[ ! -d "$dir" ]] && return 0
local backup_path="${BACKUP_DIR}${dir}"
# Check directory size before backup
if is_too_large_for_backup "$dir"; then
echo "Warning: Directory '$dir' is too large (>$MAX_BACKUP_SIZE_MB MB). Skipping backup."
return 1
fi
mkdir -p "$(dirname "$backup_path")"
# Use rsync instead of cp for more control and efficiency
if [[ $VERBOSE -eq 1 ]]; then
rsync -a --info=progress2 "$dir/" "$backup_path/" 2>/dev/null || true
else
rsync -a "$dir/" "$backup_path/" 2>/dev/null || true
fi
# Add backup metadata
echo "Backed up from: $dir" > "${backup_path}/.backup_source"
echo "Backup date: $(date)" >> "${backup_path}/.backup_source"
echo "Backed up by: $USER" >> "${backup_path}/.backup_source"
return 0
}
# Function to safely remove directories
safe_remove() {
local dir="$1"
# Security: Sanitize path
dir=$(secure_path "$dir")
# Safety check for critical paths
for critical_path in "/" "/bin" "/boot" "/dev" "/etc" "/home" "/lib" "/media" "/mnt" "/opt" "/proc" "/root" "/run" "/sbin" "/srv" "/sys" "/tmp" "/usr" "/var"; do
if [[ "$dir" == "$critical_path" ]]; then
echo "ERROR: Refusing to remove critical system path: $dir"
return 1
fi
done
if [[ $DRY_RUN -eq 1 ]]; then
echo "[DRY RUN] Would remove: $dir"
return 0
fi
if [[ -d "$dir" ]]; then
[[ $VERBOSE -eq 1 ]] && echo "Removing directory: $dir"
# Get size before removal for reporting
local size_before=$(get_dir_size_mb "$dir")
# Use find to remove files first, which is more controlled than rm -rf
if [[ $VERBOSE -eq 1 ]]; then
find "$dir" -type f -print -delete
find "$dir" -type l -print -delete
find "$dir" -type d -empty -print -delete
rm -rf "$dir"
else
find "$dir" -type f -delete 2>/dev/null || true
find "$dir" -type l -delete 2>/dev/null || true
find "$dir" -type d -empty -delete 2>/dev/null || true
rm -rf "$dir" 2>/dev/null || true
fi
echo "Freed approximately ${size_before}MB from $dir"
elif [[ -f "$dir" ]]; then
[[ $VERBOSE -eq 1 ]] && echo "Removing file: $dir"
rm -f "$dir"
else
[[ $VERBOSE -eq 1 ]] && echo "Nothing to remove at: $dir"
fi
}
# Function to prompt user with timeout
prompt_with_timeout() {
local prompt="$1"
local timeout=$TIMEOUT_DURATION
local result
# Skip if force is enabled
if [[ $FORCE -eq 1 ]]; then
return 0
fi
echo -e -n "$prompt"
read -t "$timeout" -r result || true
if [[ -z "$result" ]]; then
echo "Timed out, assuming 'n'"
return 1
fi
[[ "$result" =~ ^[Yy]$ ]]
}
# Function to clean specific cache with more detailed reporting
clean_cache() {
local description="$1"
local directory="$2"
# Security: Sanitize path
directory=$(secure_path "$directory")
echo "Cleaning $description..."
if [[ -d "$directory" ]]; then
local size_before=$(get_dir_size_mb "$directory")
[[ $VERBOSE -eq 1 ]] && echo "Found $description at $directory (${size_before}MB)"
if backup_cache "$directory"; then
echo "✓ Backup created for $description"
else
echo "⚠ Backup not created for $description"
fi
safe_remove "$directory"
echo "✓ $description cleaned (${size_before}MB freed)"
else
[[ $VERBOSE -eq 1 ]] && echo "No $description found at $directory"
fi
}
# Function to run tasks in parallel if enabled
run_parallel() {
if [[ $PARALLEL -eq 1 ]] && command -v parallel &>/dev/null; then
parallel --will-cite "$@"
else
for cmd in "$@"; do
eval "$cmd"
done
fi
}
# Function to remove old backups
cleanup_old_backups() {
echo "Checking for old backups..."
local backup_pattern="${SCRIPT_DIR}/cache_backup_*"
local old_backups=$(find $backup_pattern -maxdepth 0 -type d -mtime +$BACKUP_RETENTION_DAYS 2>/dev/null)
if [[ -n "$old_backups" ]]; then
echo "Found old backup directories to remove:"
echo "$old_backups"
if [[ $DRY_RUN -eq 1 ]]; then
echo "[DRY RUN] Would remove old backups"
else
if [[ $FORCE -eq 1 ]] || prompt_with_timeout "Remove old backups? (y/n): "; then
echo "$old_backups" | while read -r old_backup; do
echo "Removing old backup: $old_backup"
rm -rf "$old_backup"
done
echo "✓ Old backups removed"
else
echo "Keeping old backups"
fi
fi
else
echo "No old backups found to clean up"
fi
}
# Parse command line options
while getopts "dvfpnh" opt; do
case $opt in
d) DRY_RUN=1 ;;
v) VERBOSE=1 ;;
f) FORCE=1 ;;
p) PARALLEL=1 ;;
n) BACKUP_ENABLED=0 ;;
h) show_help ;;
*) show_help ;;
esac
done
# Output script configuration
echo "Cache Cleanup Script for Ubuntu 24.04"
echo "-----------------------------------"
echo "Started at: $(date)"
echo "Run by: $USER"
[[ $DRY_RUN -eq 1 ]] && echo "MODE: DRY RUN (no changes will be made)"
[[ $VERBOSE -eq 1 ]] && echo "MODE: VERBOSE"
[[ $FORCE -eq 1 ]] && echo "MODE: FORCE (no confirmations)"
[[ $PARALLEL -eq 1 ]] && echo "MODE: PARALLEL execution enabled"
[[ $BACKUP_ENABLED -eq 0 ]] && echo "MODE: BACKUP DISABLED"
echo "-----------------------------------"
# Initial space check
echo "Initial disk space usage:"
df -h /home
# Clean up old backups first
cleanup_old_backups
# Create backup directory if needed
if [[ $BACKUP_ENABLED -eq 1 ]] && [[ $DRY_RUN -eq 0 ]]; then
mkdir -p "$BACKUP_DIR"
echo "Backup will be created in: $BACKUP_DIR"
fi
# Confirmation prompt
if [[ $DRY_RUN -eq 0 ]] && [[ $FORCE -eq 0 ]]; then
echo "WARNING: This script will remove cache files and directories."
if ! prompt_with_timeout "Are you sure you want to clean up caches? (y/n): "; then
echo "Aborted."
exit 1
fi
fi
# Check for required tools
echo "Checking for required tools..."
for tool in rsync find du; do
if ! command -v $tool &>/dev/null; then
echo "Error: Required tool '$tool' not found."
echo "Please install it with: sudo apt-get install $tool"
exit 1
fi
done
# Check for parallel if enabled
if [[ $PARALLEL -eq 1 ]] && ! command -v parallel &>/dev/null; then
echo "Warning: 'parallel' not found. Installing..."
if [[ $DRY_RUN -eq 0 ]]; then
sudo apt-get update && sudo apt-get install -y parallel
else
echo "[DRY RUN] Would install 'parallel'"
fi
fi
# Define cache directories to clean
declare -A cache_dirs=(
["pip cache"]="$HOME/.cache/pip"
["CMake cache"]="./build"
["Hugging Face cache"]="$HOME/.cache/huggingface"
["Hugging Face user directory"]="$HOME/.huggingface"
["PyTorch cache"]="$HOME/.cache/torch"
["TensorFlow/Keras cache"]="$HOME/.keras"
["NVIDIA compute cache"]="$HOME/.nv"
["Jupyter cache"]="$HOME/.jupyter/runtime"
["Python bytecode cache"]="$HOME/.config/__pycache__"
["VSCode cache"]="$HOME/.config/Code/Cache"
["VSCode CachedData"]="$HOME/.config/Code/CachedData"
["VSCode CachedExtensions"]="$HOME/.config/Code/CachedExtensions"
["Node cache"]="$HOME/.node_repl_history"
["Mozilla cache"]="$HOME/.mozilla/firefox/*/cache2"
["Chrome cache"]="$HOME/.config/google-chrome/Default/Cache"
["Chromium cache"]="$HOME/.config/chromium/Default/Cache"
["Thumbnails cache"]="$HOME/.cache/thumbnails"
["Font cache"]="$HOME/.cache/fontconfig"
)
# Specialized cleanups that need different handling
echo "Running specialized cleanups..."
# Conda cleanup
if command -v conda &>/dev/null; then
echo "Cleaning Conda cache..."
if [[ $DRY_RUN -eq 0 ]]; then
conda clean --all -y
echo "✓ Conda cache cleaned"
else
echo "[DRY RUN] Would clean Conda cache"
fi
fi
# System package cache cleanup
echo "Cleaning system cache..."
if [[ $DRY_RUN -eq 0 ]]; then
sudo apt-get clean
sudo apt-get autoclean
echo "✓ System package cache cleaned"
else
echo "[DRY RUN] Would clean system package cache"
fi
# Find and clean CMake build directories recursively with improved handling
echo "Cleaning CMake build directories..."
if [[ $DRY_RUN -eq 0 ]]; then
# Use a more targeted approach to find only CMake build directories
find . -maxdepth 3 -type d -name "build" -exec bash -c '
dir="$1"
# Check if it looks like a CMake build directory
if [[ -f "$dir/CMakeCache.txt" ]] || [[ -d "$dir/CMakeFiles" ]]; then
echo "Found CMake build directory: $dir"
# Get size before cleanup
size=$(du -sh "$dir" 2>/dev/null | cut -f1)
if [[ $BACKUP_ENABLED -eq 1 ]]; then
backup_path="$BACKUP_DIR$dir"
mkdir -p "$(dirname "$backup_path")"
rsync -a "$dir/" "$backup_path/" 2>/dev/null || true
fi
find "$dir" -type f -delete 2>/dev/null || true
find "$dir" -type d -empty -delete 2>/dev/null || true
echo "Cleaned CMake build directory: $dir (was $size)"
fi
' bash {} \;
echo "✓ CMake build directories cleaned"
else
echo "[DRY RUN] Would clean CMake build directories"
fi
# npm cache (if npm is installed)
if command -v npm &>/dev/null; then
echo "Cleaning npm cache..."
if [[ $DRY_RUN -eq 0 ]]; then
# Get cache size before cleaning
npm_cache_dir=$(npm config get cache)
npm_cache_size=$(get_dir_size_mb "$npm_cache_dir")
npm cache clean --force
echo "✓ npm cache cleaned (${npm_cache_size}MB freed)"
else
echo "[DRY RUN] Would clean npm cache"
fi
fi
# yarn cache (if yarn is installed)
if command -v yarn &>/dev/null; then
echo "Cleaning yarn cache..."
if [[ $DRY_RUN -eq 0 ]]; then
# Get cache info before cleaning
yarn_cache_dir=$(yarn cache dir)
yarn_cache_size=$(get_dir_size_mb "$yarn_cache_dir")
yarn cache clean
echo "✓ yarn cache cleaned (${yarn_cache_size}MB freed)"
else
echo "[DRY RUN] Would clean yarn cache"
fi
fi
# Docker cleanup (if installed) with more comprehensive options
if command -v docker &>/dev/null; then
echo "Cleaning Docker cache..."
if [[ $DRY_RUN -eq 0 ]]; then
# Get Docker disk usage before cleaning
docker_before=$(docker system df 2>/dev/null || echo "Not available")
# Ask for confirmation for more aggressive cleanup
if [[ $FORCE -eq 1 ]] || prompt_with_timeout "Perform aggressive Docker cleanup (removes all unused images)? (y/n): "; then
echo "Performing aggressive Docker cleanup..."
docker system prune -a -f --volumes
else
echo "Performing standard Docker cleanup..."
docker system prune -f
fi
# Get Docker disk usage after cleaning
docker_after=$(docker system df 2>/dev/null || echo "Not available")
echo "✓ Docker cache cleaned"
echo "Docker disk usage before: $docker_before"
echo "Docker disk usage after: $docker_after"
else
echo "[DRY RUN] Would clean Docker cache"
fi
fi
# Clean standard cache directories
echo "Cleaning standard cache directories..."
if [[ $PARALLEL -eq 1 ]] && command -v parallel &>/dev/null; then
echo "Using parallel processing for cache cleanup..."
# Prepare commands for parallel execution
commands=()
for desc in "${!cache_dirs[@]}"; do
dir="${cache_dirs[$desc]}"
commands+=("clean_cache \"$desc\" \"$dir\"")
done
# Run commands in parallel
printf "%s\n" "${commands[@]}" | parallel -j 4
else
# Sequential processing
for desc in "${!cache_dirs[@]}"; do
dir="${cache_dirs[$desc]}"
clean_cache "$desc" "$dir"
done
fi
# Clean apt cache
if [[ $DRY_RUN -eq 0 ]]; then
echo "Cleaning APT lists cache..."
sudo rm -rf /var/lib/apt/lists/*
echo "✓ APT lists cache cleaned"
fi
# Final space check
echo -e "\nFinal disk space usage:"
df -h /home
# Calculate space saved
home_free_before=$(df -h /home | awk 'NR==2 {print $4}')
home_free_after=$(df -h /home | awk 'NR==2 {print $4}')
echo -e "\nHome directory space: $home_free_before -> $home_free_after"
# Report largest remaining cache directories
echo -e "\nLargest remaining cache directories:"
find "$HOME/.cache" -type d -exec du -sm {} \; 2>/dev/null | sort -nr | head -10
# Summary
echo -e "\nCleanup Summary:"
echo "Started at: $(date -d@"$(stat -c %Y "$LOG_FILE")")"
echo "Finished at: $(date)"
echo "Log file: $LOG_FILE"
if [[ $BACKUP_ENABLED -eq 1 ]] && [[ $DRY_RUN -eq 0 ]]; then
echo "Backup directory: $BACKUP_DIR"
echo "Backup files will be automatically removed after $BACKUP_RETENTION_DAYS days"
fi
# Set secure permissions for backup directory and log file
if [[ $DRY_RUN -eq 0 ]]; then
if [[ $BACKUP_ENABLED -eq 1 ]]; then
chmod -R 700 "$BACKUP_DIR" # Only the owner can access the backup
fi
chmod 600 "$LOG_FILE" # Only the owner can read the log
fi
echo "Cleanup completed successfully!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment