Created
March 12, 2026 10:43
-
-
Save nicolaracco/296dcc1b97366c1171fb66ff613a470c to your computer and use it in GitHub Desktop.
When you don't have any monitoring solution available...
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # pod-metrics.sh - Compact rolling metrics for a Kubernetes pod | |
| # Usage: ./pod-metrics.sh <pod-name> [namespace] [container] [interval_seconds] | |
| set -euo pipefail | |
| POD="${1:-}" | |
| NAMESPACE="${2:-default}" | |
| CONTAINER="${3:-}" | |
| INTERVAL="${4:-30}" | |
| if [[ -z "$POD" ]]; then | |
| echo "Usage: $0 <pod-name> [namespace] [container] [interval_seconds]" | |
| echo " interval_seconds refresh interval in seconds (default: 30)" | |
| exit 1 | |
| fi | |
| if ! [[ "$INTERVAL" =~ ^[0-9]+$ ]]; then | |
| echo "Error: interval must be a positive integer (seconds)" >&2 | |
| exit 1 | |
| fi | |
| CONTAINER_FLAG="" | |
| [[ -n "$CONTAINER" ]] && CONTAINER_FLAG="-c $CONTAINER" | |
| MAX_ROWS=20 | |
| # Rolling history arrays | |
| declare -a H_TIME H_CPU H_MEM H_RX H_TX H_FDS H_RESTARTS H_TOPCPU H_TOPMEM | |
| fmt_bytes() { | |
| LC_ALL=C awk -v b="$1" 'BEGIN { | |
| if (b+0 != b) { print "-"; exit } | |
| if (b >= 1073741824) printf "%.1fG", b/1073741824 | |
| else if (b >= 1048576) printf "%.1fM", b/1048576 | |
| else if (b >= 1024) printf "%.1fK", b/1024 | |
| else printf "%dB", b | |
| }' | |
| } | |
| collect() { | |
| local ts pod_status restarts cpu_req mem_lim | |
| ts=$(date '+%H:%M:%S') | |
| # Pod JSON — used for status, limits, restart count | |
| local pod_json | |
| pod_json=$(kubectl get pod "$POD" -n "$NAMESPACE" -o json 2>/dev/null || echo '{}') | |
| pod_status=$(printf '%s' "$pod_json" | python3 -c " | |
| import json,sys; p=json.load(sys.stdin) | |
| print(p.get('status',{}).get('phase','?'))" 2>/dev/null || echo '?') | |
| restarts=$(printf '%s' "$pod_json" | python3 -c " | |
| import json,sys; p=json.load(sys.stdin) | |
| cs=p.get('status',{}).get('containerStatuses',[]) | |
| print(sum(c.get('restartCount',0) for c in cs))" 2>/dev/null || echo '?') | |
| cpu_req=$(printf '%s' "$pod_json" | python3 -c " | |
| import json,sys; p=json.load(sys.stdin) | |
| c=p.get('spec',{}).get('containers',[{}])[0] | |
| print(c.get('resources',{}).get('requests',{}).get('cpu','<none>'))" 2>/dev/null || echo '?') | |
| mem_lim=$(printf '%s' "$pod_json" | python3 -c " | |
| import json,sys; p=json.load(sys.stdin) | |
| c=p.get('spec',{}).get('containers',[{}])[0] | |
| print(c.get('resources',{}).get('limits',{}).get('memory','<none>'))" 2>/dev/null || echo '?') | |
| # kubectl top (best-effort) | |
| local top_cpu='-' top_mem='-' | |
| local top_line | |
| top_line=$(kubectl top pod "$POD" -n "$NAMESPACE" --no-headers 2>/dev/null || true) | |
| if [[ -n "$top_line" ]]; then | |
| top_cpu=$(awk '{print $2}' <<< "$top_line") | |
| top_mem=$(awk '{print $3}' <<< "$top_line") | |
| fi | |
| # /proc snapshot from inside the container | |
| # Use -i + heredoc so the inner sh script can use unescaped single quotes | |
| # shellcheck disable=SC2086 | |
| local proc_out | |
| proc_out=$(kubectl exec -i "$POD" -n "$NAMESPACE" $CONTAINER_FLAG -- sh << 'SHELLEOF' 2>/dev/null || echo "- - - - - -" | |
| # CPU — prefer cgroup accounting (container-local, no 32-bit overflow risk). | |
| # $(()) in busybox/dash is 32-bit; /proc/stat cumulative ticks overflow it on | |
| # hosts running more than a few days, producing garbage percentages. | |
| cpu=0 | |
| if [ -f /sys/fs/cgroup/cpu.stat ]; then | |
| # cgroup v2: usage_usec is cumulative microseconds of CPU time | |
| u1=$(awk '/^usage_usec/{print $2}' /sys/fs/cgroup/cpu.stat) | |
| sleep 1 | |
| u2=$(awk '/^usage_usec/{print $2}' /sys/fs/cgroup/cpu.stat) | |
| cpu=$(awk -v u1="$u1" -v u2="$u2" 'BEGIN { printf "%.1f", (u2-u1)/10000 }') | |
| elif [ -f /sys/fs/cgroup/cpuacct/cpuacct.usage ]; then | |
| # cgroup v1: usage is cumulative nanoseconds | |
| u1=$(cat /sys/fs/cgroup/cpuacct/cpuacct.usage) | |
| sleep 1 | |
| u2=$(cat /sys/fs/cgroup/cpuacct/cpuacct.usage) | |
| cpu=$(awk -v u1="$u1" -v u2="$u2" 'BEGIN { printf "%.1f", (u2-u1)/10000000 }') | |
| else | |
| # Fallback: /proc/stat — use awk floats for the delta to avoid sh integer overflow | |
| line1=$(grep '^cpu ' /proc/stat) | |
| sleep 1 | |
| line2=$(grep '^cpu ' /proc/stat) | |
| cpu=$(awk -v l1="$line1" -v l2="$line2" 'BEGIN { | |
| n = split(l1, a, " "); split(l2, b, " ") | |
| for (i=2; i<=8; i++) { t1+=a[i]; t2+=b[i] } | |
| dt = t2-t1; di = b[5]-a[5] | |
| printf "%.1f", dt>0 ? (dt-di)/dt*100 : 0 | |
| }') | |
| fi | |
| # Memory (kB) | |
| mt=$(awk '/^MemTotal:/{print $2}' /proc/meminfo) | |
| mf=$(awk '/^MemAvailable:/{print $2}' /proc/meminfo) | |
| mu=$(( mt - mf )) | |
| # Network | |
| rx=$(awk 'NR>2 && $1 !~ /^lo:/ { s += $2 } END { print s+0 }' /proc/net/dev) | |
| tx=$(awk 'NR>2 && $1 !~ /^lo:/ { s += $10 } END { print s+0 }' /proc/net/dev) | |
| # FDs — use glob, no grep needed | |
| fds=0 | |
| for d in /proc/[0-9]*/fd; do | |
| n=$(ls "$d" 2>/dev/null | wc -l) | |
| fds=$(( fds + n )) | |
| done | |
| printf '%s %s %s %s %s %s\n' "$cpu" "$mu" "$mt" "$rx" "$tx" "$fds" | |
| SHELLEOF | |
| ) | |
| local cpu mem_used mem_total rx tx fds | |
| cpu=$(awk '{print $1}' <<< "$proc_out") | |
| mem_used=$(awk '{print $2}' <<< "$proc_out") | |
| mem_total=$(awk '{print $3}' <<< "$proc_out") | |
| rx=$(awk '{print $4}' <<< "$proc_out") | |
| tx=$(awk '{print $5}' <<< "$proc_out") | |
| fds=$(awk '{print $6}' <<< "$proc_out") | |
| # Human-readable memory (kB → MiB) | |
| local mem_fmt='-' | |
| if [[ "$mem_used" =~ ^[0-9]+$ && "$mem_total" =~ ^[0-9]+$ ]]; then | |
| mem_fmt="$(( mem_used / 1024 ))Mi/$(( mem_total / 1024 ))Mi" | |
| fi | |
| local rx_fmt tx_fmt | |
| rx_fmt=$(fmt_bytes "$rx") | |
| tx_fmt=$(fmt_bytes "$tx") | |
| # Append to history | |
| H_TIME+=("$ts") | |
| H_CPU+=("${cpu}%") | |
| H_MEM+=("$mem_fmt") | |
| H_RX+=("$rx_fmt") | |
| H_TX+=("$tx_fmt") | |
| H_FDS+=("$fds") | |
| H_RESTARTS+=("$restarts") | |
| H_TOPCPU+=("$top_cpu") | |
| H_TOPMEM+=("$top_mem") | |
| # Trim oldest entry when over limit | |
| if (( ${#H_TIME[@]} > MAX_ROWS )); then | |
| H_TIME=("${H_TIME[@]:1}") | |
| H_CPU=("${H_CPU[@]:1}") | |
| H_MEM=("${H_MEM[@]:1}") | |
| H_RX=("${H_RX[@]:1}") | |
| H_TX=("${H_TX[@]:1}") | |
| H_FDS=("${H_FDS[@]:1}") | |
| H_RESTARTS=("${H_RESTARTS[@]:1}") | |
| H_TOPCPU=("${H_TOPCPU[@]:1}") | |
| H_TOPMEM=("${H_TOPMEM[@]:1}") | |
| fi | |
| # ── Render ────────────────────────────────────────────────────────────────── | |
| clear | |
| printf 'pod=%-25s ns=%-15s status=%-10s restarts=%s\n' \ | |
| "$POD" "$NAMESPACE" "$pod_status" "$restarts" | |
| printf 'cpu_req=%-8s mem_lim=%-8s interval=%ss updated=%s\n' \ | |
| "$cpu_req" "$mem_lim" "$INTERVAL" "$ts" | |
| echo | |
| printf '%-10s %-8s %-20s %-10s %-10s %-6s %-8s %-8s\n' \ | |
| TIME "CPU%" "MEM(used/total)" NET_RX NET_TX FDs TOP_CPU TOP_MEM | |
| printf '%.0s─' {1..88}; echo | |
| for i in "${!H_TIME[@]}"; do | |
| # Highlight the latest row | |
| local fmt='%-10s %-8s %-20s %-10s %-10s %-6s %-8s %-8s\n' | |
| if (( i == ${#H_TIME[@]} - 1 )); then | |
| printf '\033[1m'"$fmt"'\033[0m' \ | |
| "${H_TIME[$i]}" "${H_CPU[$i]}" "${H_MEM[$i]}" \ | |
| "${H_RX[$i]}" "${H_TX[$i]}" "${H_FDS[$i]}" \ | |
| "${H_TOPCPU[$i]}" "${H_TOPMEM[$i]}" | |
| else | |
| printf "$fmt" \ | |
| "${H_TIME[$i]}" "${H_CPU[$i]}" "${H_MEM[$i]}" \ | |
| "${H_RX[$i]}" "${H_TX[$i]}" "${H_FDS[$i]}" \ | |
| "${H_TOPCPU[$i]}" "${H_TOPMEM[$i]}" | |
| fi | |
| done | |
| } | |
| while true; do | |
| collect | |
| sleep "$INTERVAL" | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment