Last active
May 24, 2023 01:30
-
-
Save arivictor/3c03acb38c21fd958776c0d966533af9 to your computer and use it in GitHub Desktop.
Iterate over a list of GCS buckets and log their object count
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Set your list of GCS bucket names, log file path, and threshold | |
| BUCKET_NAMES=("bucket1" "bucket2" "bucket3") | |
| LOG_FILE="/path/to/log_file.json" | |
| THRESHOLD=10 | |
| # Iterate over the list of bucket names | |
| for BUCKET_NAME in "${BUCKET_NAMES[@]}"; do | |
| # Get the list of top-level subfolders in the bucket (folders only) | |
| subfolders=$(gsutil ls -d gs://"${BUCKET_NAME}"/*/) | |
| # Iterate over each subfolder | |
| while IFS= read -r subfolder; do | |
| # Check if the subfolder ends with a trailing slash, indicating it is a directory | |
| if [[ "${subfolder}" == */ ]]; then | |
| # Extract the folder name from the subfolder path | |
| folder_name=$(basename "${subfolder}") | |
| # Run gsutil command to list objects in the subfolder and count the lines | |
| result=$(gsutil ls -r "${subfolder}*" | wc -l) | |
| # Check if the object count exceeds the threshold | |
| if ((result >= THRESHOLD)); then | |
| # Create a JSON structure for the log entry | |
| log_entry='{ | |
| "timestamp": "'"$(date +"%Y-%m-%dT%H:%M:%SZ")"'", | |
| "bucket_name": "'"${BUCKET_NAME}"'", | |
| "subfolder_path": "'"${folder_name}"'", | |
| "object_count": '"${result}"' | |
| }' | |
| # Write the log entry to the log file | |
| echo "${log_entry}" >> "${LOG_FILE}" | |
| fi | |
| fi | |
| done <<< "$subfolders" | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment