Created
February 26, 2025 11:31
-
-
Save Limbicnation/efa034c62e1e47bb82570c221c8385a4 to your computer and use it in GitHub Desktop.
cleanup_cache.sh A specialized cache cleanup utility for Ubuntu 24.04 that targets development and application caches. This script efficiently removes cache files from various sources including pip, npm, yarn, Conda, Docker, CMake build directories, Hugging Face, PyTorch, TensorFlow, and browser caches. Features parallel processing, backup capab…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Enhanced cleanup_cache.sh for Ubuntu 24.04 | |
| # Security and efficiency improvements | |
| # Enable error handling and safety features | |
| set -e # Exit on error | |
| set -u # Exit on undefined variables | |
| set -o pipefail # Exit on pipe failures | |
| # Additional security hardening | |
| readonly SCRIPT_NAME=$(basename "$0") | |
| readonly SCRIPT_DIR=$(dirname "$(readlink -f "$0")") | |
| readonly TIME_STAMP=$(date +%Y%m%d_%H%M%S) | |
| # Script configuration (all variables set as readonly for security) | |
| readonly BACKUP_ENABLED=${BACKUP_ENABLED:-1} | |
| readonly BACKUP_DIR="${SCRIPT_DIR}/cache_backup_${TIME_STAMP}" | |
| readonly LOG_FILE="${SCRIPT_DIR}/cleanup_${TIME_STAMP}.log" | |
| readonly BACKUP_RETENTION_DAYS=7 # Auto-delete backups older than this | |
| readonly TIMEOUT_DURATION=30 # Seconds to wait for user input | |
| readonly MAX_BACKUP_SIZE_MB=1000 # Don't back up directories larger than this | |
| # Runtime configuration | |
| DRY_RUN=0 | |
| VERBOSE=0 | |
| FORCE=0 | |
| PARALLEL=0 | |
| # Security: Ensure we're running as the correct user (not root unless necessary) | |
| if [[ $EUID -eq 0 ]]; then | |
| echo "Warning: Running this script as root. Only individual operations will be elevated as needed." | |
| fi | |
| # Setup logging with rotation | |
| mkdir -p "$(dirname "$LOG_FILE")" | |
| exec > >(tee -a "$LOG_FILE") 2>&1 | |
| # Function to show help | |
| show_help() { | |
| echo "Cache Cleanup Script for Ubuntu 24.04" | |
| echo "-----------------------------------" | |
| echo "Usage: $SCRIPT_NAME [OPTIONS]" | |
| echo "" | |
| echo "Options:" | |
| echo " -d Dry run (show what would be done)" | |
| echo " -v Verbose output" | |
| echo " -f Force cleanup (no confirmation prompts)" | |
| echo " -p Enable parallel processing where possible" | |
| echo " -n No backup (skip creating backups)" | |
| echo " -h Show this help message" | |
| echo "" | |
| echo "The script will clean various caches, including pip, CMake, Hugging Face," | |
| echo "PyTorch, TensorFlow, NVIDIA, Jupyter, npm, yarn, and Docker." | |
| exit 0 | |
| } | |
| # Function for secure directory and file handling | |
| secure_path() { | |
| local path="$1" | |
| # Replace any instances of '..' in the path | |
| path="${path//\.\./}" | |
| # Ensure path doesn't end in / (except for root) | |
| [[ "$path" != "/" ]] && path="${path%/}" | |
| # Ensure path is absolute if it starts with / | |
| if [[ "$path" == /* ]]; then | |
| path="$(readlink -f "$path" 2>/dev/null || echo "$path")" | |
| fi | |
| echo "$path" | |
| } | |
| # Function to estimate directory size in MB | |
| get_dir_size_mb() { | |
| local dir="$1" | |
| if [[ -d "$dir" ]]; then | |
| local size=$(du -sm "$dir" 2>/dev/null | cut -f1) | |
| echo "${size:-0}" | |
| else | |
| echo "0" | |
| fi | |
| } | |
| # Function to check if directory is too large for backup | |
| is_too_large_for_backup() { | |
| local dir="$1" | |
| local size_mb=$(get_dir_size_mb "$dir") | |
| [[ $size_mb -gt $MAX_BACKUP_SIZE_MB ]] | |
| } | |
| # Function to backup directories before deletion with size check | |
| backup_cache() { | |
| local dir="$1" | |
| # Security: Sanitize path | |
| dir=$(secure_path "$dir") | |
| # Skip if backup is disabled | |
| [[ $BACKUP_ENABLED -eq 0 ]] && return 0 | |
| # Skip if the directory doesn't exist | |
| [[ ! -d "$dir" ]] && return 0 | |
| local backup_path="${BACKUP_DIR}${dir}" | |
| # Check directory size before backup | |
| if is_too_large_for_backup "$dir"; then | |
| echo "Warning: Directory '$dir' is too large (>$MAX_BACKUP_SIZE_MB MB). Skipping backup." | |
| return 1 | |
| fi | |
| mkdir -p "$(dirname "$backup_path")" | |
| # Use rsync instead of cp for more control and efficiency | |
| if [[ $VERBOSE -eq 1 ]]; then | |
| rsync -a --info=progress2 "$dir/" "$backup_path/" 2>/dev/null || true | |
| else | |
| rsync -a "$dir/" "$backup_path/" 2>/dev/null || true | |
| fi | |
| # Add backup metadata | |
| echo "Backed up from: $dir" > "${backup_path}/.backup_source" | |
| echo "Backup date: $(date)" >> "${backup_path}/.backup_source" | |
| echo "Backed up by: $USER" >> "${backup_path}/.backup_source" | |
| return 0 | |
| } | |
| # Function to safely remove directories | |
| safe_remove() { | |
| local dir="$1" | |
| # Security: Sanitize path | |
| dir=$(secure_path "$dir") | |
| # Safety check for critical paths | |
| for critical_path in "/" "/bin" "/boot" "/dev" "/etc" "/home" "/lib" "/media" "/mnt" "/opt" "/proc" "/root" "/run" "/sbin" "/srv" "/sys" "/tmp" "/usr" "/var"; do | |
| if [[ "$dir" == "$critical_path" ]]; then | |
| echo "ERROR: Refusing to remove critical system path: $dir" | |
| return 1 | |
| fi | |
| done | |
| if [[ $DRY_RUN -eq 1 ]]; then | |
| echo "[DRY RUN] Would remove: $dir" | |
| return 0 | |
| fi | |
| if [[ -d "$dir" ]]; then | |
| [[ $VERBOSE -eq 1 ]] && echo "Removing directory: $dir" | |
| # Get size before removal for reporting | |
| local size_before=$(get_dir_size_mb "$dir") | |
| # Use find to remove files first, which is more controlled than rm -rf | |
| if [[ $VERBOSE -eq 1 ]]; then | |
| find "$dir" -type f -print -delete | |
| find "$dir" -type l -print -delete | |
| find "$dir" -type d -empty -print -delete | |
| rm -rf "$dir" | |
| else | |
| find "$dir" -type f -delete 2>/dev/null || true | |
| find "$dir" -type l -delete 2>/dev/null || true | |
| find "$dir" -type d -empty -delete 2>/dev/null || true | |
| rm -rf "$dir" 2>/dev/null || true | |
| fi | |
| echo "Freed approximately ${size_before}MB from $dir" | |
| elif [[ -f "$dir" ]]; then | |
| [[ $VERBOSE -eq 1 ]] && echo "Removing file: $dir" | |
| rm -f "$dir" | |
| else | |
| [[ $VERBOSE -eq 1 ]] && echo "Nothing to remove at: $dir" | |
| fi | |
| } | |
| # Function to prompt user with timeout | |
| prompt_with_timeout() { | |
| local prompt="$1" | |
| local timeout=$TIMEOUT_DURATION | |
| local result | |
| # Skip if force is enabled | |
| if [[ $FORCE -eq 1 ]]; then | |
| return 0 | |
| fi | |
| echo -e -n "$prompt" | |
| read -t "$timeout" -r result || true | |
| if [[ -z "$result" ]]; then | |
| echo "Timed out, assuming 'n'" | |
| return 1 | |
| fi | |
| [[ "$result" =~ ^[Yy]$ ]] | |
| } | |
| # Function to clean specific cache with more detailed reporting | |
| clean_cache() { | |
| local description="$1" | |
| local directory="$2" | |
| # Security: Sanitize path | |
| directory=$(secure_path "$directory") | |
| echo "Cleaning $description..." | |
| if [[ -d "$directory" ]]; then | |
| local size_before=$(get_dir_size_mb "$directory") | |
| [[ $VERBOSE -eq 1 ]] && echo "Found $description at $directory (${size_before}MB)" | |
| if backup_cache "$directory"; then | |
| echo "✓ Backup created for $description" | |
| else | |
| echo "⚠ Backup not created for $description" | |
| fi | |
| safe_remove "$directory" | |
| echo "✓ $description cleaned (${size_before}MB freed)" | |
| else | |
| [[ $VERBOSE -eq 1 ]] && echo "No $description found at $directory" | |
| fi | |
| } | |
| # Function to run tasks in parallel if enabled | |
| run_parallel() { | |
| if [[ $PARALLEL -eq 1 ]] && command -v parallel &>/dev/null; then | |
| parallel --will-cite "$@" | |
| else | |
| for cmd in "$@"; do | |
| eval "$cmd" | |
| done | |
| fi | |
| } | |
| # Function to remove old backups | |
| cleanup_old_backups() { | |
| echo "Checking for old backups..." | |
| local backup_pattern="${SCRIPT_DIR}/cache_backup_*" | |
| local old_backups=$(find $backup_pattern -maxdepth 0 -type d -mtime +$BACKUP_RETENTION_DAYS 2>/dev/null) | |
| if [[ -n "$old_backups" ]]; then | |
| echo "Found old backup directories to remove:" | |
| echo "$old_backups" | |
| if [[ $DRY_RUN -eq 1 ]]; then | |
| echo "[DRY RUN] Would remove old backups" | |
| else | |
| if [[ $FORCE -eq 1 ]] || prompt_with_timeout "Remove old backups? (y/n): "; then | |
| echo "$old_backups" | while read -r old_backup; do | |
| echo "Removing old backup: $old_backup" | |
| rm -rf "$old_backup" | |
| done | |
| echo "✓ Old backups removed" | |
| else | |
| echo "Keeping old backups" | |
| fi | |
| fi | |
| else | |
| echo "No old backups found to clean up" | |
| fi | |
| } | |
| # Parse command line options | |
| while getopts "dvfpnh" opt; do | |
| case $opt in | |
| d) DRY_RUN=1 ;; | |
| v) VERBOSE=1 ;; | |
| f) FORCE=1 ;; | |
| p) PARALLEL=1 ;; | |
| n) BACKUP_ENABLED=0 ;; | |
| h) show_help ;; | |
| *) show_help ;; | |
| esac | |
| done | |
| # Output script configuration | |
| echo "Cache Cleanup Script for Ubuntu 24.04" | |
| echo "-----------------------------------" | |
| echo "Started at: $(date)" | |
| echo "Run by: $USER" | |
| [[ $DRY_RUN -eq 1 ]] && echo "MODE: DRY RUN (no changes will be made)" | |
| [[ $VERBOSE -eq 1 ]] && echo "MODE: VERBOSE" | |
| [[ $FORCE -eq 1 ]] && echo "MODE: FORCE (no confirmations)" | |
| [[ $PARALLEL -eq 1 ]] && echo "MODE: PARALLEL execution enabled" | |
| [[ $BACKUP_ENABLED -eq 0 ]] && echo "MODE: BACKUP DISABLED" | |
| echo "-----------------------------------" | |
| # Initial space check | |
| echo "Initial disk space usage:" | |
| df -h /home | |
| # Clean up old backups first | |
| cleanup_old_backups | |
| # Create backup directory if needed | |
| if [[ $BACKUP_ENABLED -eq 1 ]] && [[ $DRY_RUN -eq 0 ]]; then | |
| mkdir -p "$BACKUP_DIR" | |
| echo "Backup will be created in: $BACKUP_DIR" | |
| fi | |
| # Confirmation prompt | |
| if [[ $DRY_RUN -eq 0 ]] && [[ $FORCE -eq 0 ]]; then | |
| echo "WARNING: This script will remove cache files and directories." | |
| if ! prompt_with_timeout "Are you sure you want to clean up caches? (y/n): "; then | |
| echo "Aborted." | |
| exit 1 | |
| fi | |
| fi | |
| # Check for required tools | |
| echo "Checking for required tools..." | |
| for tool in rsync find du; do | |
| if ! command -v $tool &>/dev/null; then | |
| echo "Error: Required tool '$tool' not found." | |
| echo "Please install it with: sudo apt-get install $tool" | |
| exit 1 | |
| fi | |
| done | |
| # Check for parallel if enabled | |
| if [[ $PARALLEL -eq 1 ]] && ! command -v parallel &>/dev/null; then | |
| echo "Warning: 'parallel' not found. Installing..." | |
| if [[ $DRY_RUN -eq 0 ]]; then | |
| sudo apt-get update && sudo apt-get install -y parallel | |
| else | |
| echo "[DRY RUN] Would install 'parallel'" | |
| fi | |
| fi | |
| # Define cache directories to clean | |
| declare -A cache_dirs=( | |
| ["pip cache"]="$HOME/.cache/pip" | |
| ["CMake cache"]="./build" | |
| ["Hugging Face cache"]="$HOME/.cache/huggingface" | |
| ["Hugging Face user directory"]="$HOME/.huggingface" | |
| ["PyTorch cache"]="$HOME/.cache/torch" | |
| ["TensorFlow/Keras cache"]="$HOME/.keras" | |
| ["NVIDIA compute cache"]="$HOME/.nv" | |
| ["Jupyter cache"]="$HOME/.jupyter/runtime" | |
| ["Python bytecode cache"]="$HOME/.config/__pycache__" | |
| ["VSCode cache"]="$HOME/.config/Code/Cache" | |
| ["VSCode CachedData"]="$HOME/.config/Code/CachedData" | |
| ["VSCode CachedExtensions"]="$HOME/.config/Code/CachedExtensions" | |
| ["Node cache"]="$HOME/.node_repl_history" | |
| ["Mozilla cache"]="$HOME/.mozilla/firefox/*/cache2" | |
| ["Chrome cache"]="$HOME/.config/google-chrome/Default/Cache" | |
| ["Chromium cache"]="$HOME/.config/chromium/Default/Cache" | |
| ["Thumbnails cache"]="$HOME/.cache/thumbnails" | |
| ["Font cache"]="$HOME/.cache/fontconfig" | |
| ) | |
| # Specialized cleanups that need different handling | |
| echo "Running specialized cleanups..." | |
| # Conda cleanup | |
| if command -v conda &>/dev/null; then | |
| echo "Cleaning Conda cache..." | |
| if [[ $DRY_RUN -eq 0 ]]; then | |
| conda clean --all -y | |
| echo "✓ Conda cache cleaned" | |
| else | |
| echo "[DRY RUN] Would clean Conda cache" | |
| fi | |
| fi | |
| # System package cache cleanup | |
| echo "Cleaning system cache..." | |
| if [[ $DRY_RUN -eq 0 ]]; then | |
| sudo apt-get clean | |
| sudo apt-get autoclean | |
| echo "✓ System package cache cleaned" | |
| else | |
| echo "[DRY RUN] Would clean system package cache" | |
| fi | |
| # Find and clean CMake build directories recursively with improved handling | |
| echo "Cleaning CMake build directories..." | |
| if [[ $DRY_RUN -eq 0 ]]; then | |
| # Use a more targeted approach to find only CMake build directories | |
| find . -maxdepth 3 -type d -name "build" -exec bash -c ' | |
| dir="$1" | |
| # Check if it looks like a CMake build directory | |
| if [[ -f "$dir/CMakeCache.txt" ]] || [[ -d "$dir/CMakeFiles" ]]; then | |
| echo "Found CMake build directory: $dir" | |
| # Get size before cleanup | |
| size=$(du -sh "$dir" 2>/dev/null | cut -f1) | |
| if [[ $BACKUP_ENABLED -eq 1 ]]; then | |
| backup_path="$BACKUP_DIR$dir" | |
| mkdir -p "$(dirname "$backup_path")" | |
| rsync -a "$dir/" "$backup_path/" 2>/dev/null || true | |
| fi | |
| find "$dir" -type f -delete 2>/dev/null || true | |
| find "$dir" -type d -empty -delete 2>/dev/null || true | |
| echo "Cleaned CMake build directory: $dir (was $size)" | |
| fi | |
| ' bash {} \; | |
| echo "✓ CMake build directories cleaned" | |
| else | |
| echo "[DRY RUN] Would clean CMake build directories" | |
| fi | |
| # npm cache (if npm is installed) | |
| if command -v npm &>/dev/null; then | |
| echo "Cleaning npm cache..." | |
| if [[ $DRY_RUN -eq 0 ]]; then | |
| # Get cache size before cleaning | |
| npm_cache_dir=$(npm config get cache) | |
| npm_cache_size=$(get_dir_size_mb "$npm_cache_dir") | |
| npm cache clean --force | |
| echo "✓ npm cache cleaned (${npm_cache_size}MB freed)" | |
| else | |
| echo "[DRY RUN] Would clean npm cache" | |
| fi | |
| fi | |
| # yarn cache (if yarn is installed) | |
| if command -v yarn &>/dev/null; then | |
| echo "Cleaning yarn cache..." | |
| if [[ $DRY_RUN -eq 0 ]]; then | |
| # Get cache info before cleaning | |
| yarn_cache_dir=$(yarn cache dir) | |
| yarn_cache_size=$(get_dir_size_mb "$yarn_cache_dir") | |
| yarn cache clean | |
| echo "✓ yarn cache cleaned (${yarn_cache_size}MB freed)" | |
| else | |
| echo "[DRY RUN] Would clean yarn cache" | |
| fi | |
| fi | |
| # Docker cleanup (if installed) with more comprehensive options | |
| if command -v docker &>/dev/null; then | |
| echo "Cleaning Docker cache..." | |
| if [[ $DRY_RUN -eq 0 ]]; then | |
| # Get Docker disk usage before cleaning | |
| docker_before=$(docker system df 2>/dev/null || echo "Not available") | |
| # Ask for confirmation for more aggressive cleanup | |
| if [[ $FORCE -eq 1 ]] || prompt_with_timeout "Perform aggressive Docker cleanup (removes all unused images)? (y/n): "; then | |
| echo "Performing aggressive Docker cleanup..." | |
| docker system prune -a -f --volumes | |
| else | |
| echo "Performing standard Docker cleanup..." | |
| docker system prune -f | |
| fi | |
| # Get Docker disk usage after cleaning | |
| docker_after=$(docker system df 2>/dev/null || echo "Not available") | |
| echo "✓ Docker cache cleaned" | |
| echo "Docker disk usage before: $docker_before" | |
| echo "Docker disk usage after: $docker_after" | |
| else | |
| echo "[DRY RUN] Would clean Docker cache" | |
| fi | |
| fi | |
| # Clean standard cache directories | |
| echo "Cleaning standard cache directories..." | |
| if [[ $PARALLEL -eq 1 ]] && command -v parallel &>/dev/null; then | |
| echo "Using parallel processing for cache cleanup..." | |
| # Prepare commands for parallel execution | |
| commands=() | |
| for desc in "${!cache_dirs[@]}"; do | |
| dir="${cache_dirs[$desc]}" | |
| commands+=("clean_cache \"$desc\" \"$dir\"") | |
| done | |
| # Run commands in parallel | |
| printf "%s\n" "${commands[@]}" | parallel -j 4 | |
| else | |
| # Sequential processing | |
| for desc in "${!cache_dirs[@]}"; do | |
| dir="${cache_dirs[$desc]}" | |
| clean_cache "$desc" "$dir" | |
| done | |
| fi | |
| # Clean apt cache | |
| if [[ $DRY_RUN -eq 0 ]]; then | |
| echo "Cleaning APT lists cache..." | |
| sudo rm -rf /var/lib/apt/lists/* | |
| echo "✓ APT lists cache cleaned" | |
| fi | |
| # Final space check | |
| echo -e "\nFinal disk space usage:" | |
| df -h /home | |
| # Calculate space saved | |
| home_free_before=$(df -h /home | awk 'NR==2 {print $4}') | |
| home_free_after=$(df -h /home | awk 'NR==2 {print $4}') | |
| echo -e "\nHome directory space: $home_free_before -> $home_free_after" | |
| # Report largest remaining cache directories | |
| echo -e "\nLargest remaining cache directories:" | |
| find "$HOME/.cache" -type d -exec du -sm {} \; 2>/dev/null | sort -nr | head -10 | |
| # Summary | |
| echo -e "\nCleanup Summary:" | |
| echo "Started at: $(date -d@"$(stat -c %Y "$LOG_FILE")")" | |
| echo "Finished at: $(date)" | |
| echo "Log file: $LOG_FILE" | |
| if [[ $BACKUP_ENABLED -eq 1 ]] && [[ $DRY_RUN -eq 0 ]]; then | |
| echo "Backup directory: $BACKUP_DIR" | |
| echo "Backup files will be automatically removed after $BACKUP_RETENTION_DAYS days" | |
| fi | |
| # Set secure permissions for backup directory and log file | |
| if [[ $DRY_RUN -eq 0 ]]; then | |
| if [[ $BACKUP_ENABLED -eq 1 ]]; then | |
| chmod -R 700 "$BACKUP_DIR" # Only the owner can access the backup | |
| fi | |
| chmod 600 "$LOG_FILE" # Only the owner can read the log | |
| fi | |
| echo "Cleanup completed successfully!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment