Skip to content

Instantly share code, notes, and snippets.

@nicolaracco
Created March 12, 2026 10:43
Show Gist options
  • Select an option

  • Save nicolaracco/296dcc1b97366c1171fb66ff613a470c to your computer and use it in GitHub Desktop.

Select an option

Save nicolaracco/296dcc1b97366c1171fb66ff613a470c to your computer and use it in GitHub Desktop.
When you don't have any monitoring solution available...
#!/usr/bin/env bash
# pod-metrics.sh - Compact rolling metrics for a Kubernetes pod
# Usage: ./pod-metrics.sh <pod-name> [namespace] [container] [interval_seconds]
set -euo pipefail
POD="${1:-}"
NAMESPACE="${2:-default}"
CONTAINER="${3:-}"
INTERVAL="${4:-30}"
if [[ -z "$POD" ]]; then
echo "Usage: $0 <pod-name> [namespace] [container] [interval_seconds]"
echo " interval_seconds refresh interval in seconds (default: 30)"
exit 1
fi
if ! [[ "$INTERVAL" =~ ^[0-9]+$ ]]; then
echo "Error: interval must be a positive integer (seconds)" >&2
exit 1
fi
CONTAINER_FLAG=""
[[ -n "$CONTAINER" ]] && CONTAINER_FLAG="-c $CONTAINER"
MAX_ROWS=20
# Rolling history arrays
declare -a H_TIME H_CPU H_MEM H_RX H_TX H_FDS H_RESTARTS H_TOPCPU H_TOPMEM
fmt_bytes() {
LC_ALL=C awk -v b="$1" 'BEGIN {
if (b+0 != b) { print "-"; exit }
if (b >= 1073741824) printf "%.1fG", b/1073741824
else if (b >= 1048576) printf "%.1fM", b/1048576
else if (b >= 1024) printf "%.1fK", b/1024
else printf "%dB", b
}'
}
collect() {
local ts pod_status restarts cpu_req mem_lim
ts=$(date '+%H:%M:%S')
# Pod JSON — used for status, limits, restart count
local pod_json
pod_json=$(kubectl get pod "$POD" -n "$NAMESPACE" -o json 2>/dev/null || echo '{}')
pod_status=$(printf '%s' "$pod_json" | python3 -c "
import json,sys; p=json.load(sys.stdin)
print(p.get('status',{}).get('phase','?'))" 2>/dev/null || echo '?')
restarts=$(printf '%s' "$pod_json" | python3 -c "
import json,sys; p=json.load(sys.stdin)
cs=p.get('status',{}).get('containerStatuses',[])
print(sum(c.get('restartCount',0) for c in cs))" 2>/dev/null || echo '?')
cpu_req=$(printf '%s' "$pod_json" | python3 -c "
import json,sys; p=json.load(sys.stdin)
c=p.get('spec',{}).get('containers',[{}])[0]
print(c.get('resources',{}).get('requests',{}).get('cpu','<none>'))" 2>/dev/null || echo '?')
mem_lim=$(printf '%s' "$pod_json" | python3 -c "
import json,sys; p=json.load(sys.stdin)
c=p.get('spec',{}).get('containers',[{}])[0]
print(c.get('resources',{}).get('limits',{}).get('memory','<none>'))" 2>/dev/null || echo '?')
# kubectl top (best-effort)
local top_cpu='-' top_mem='-'
local top_line
top_line=$(kubectl top pod "$POD" -n "$NAMESPACE" --no-headers 2>/dev/null || true)
if [[ -n "$top_line" ]]; then
top_cpu=$(awk '{print $2}' <<< "$top_line")
top_mem=$(awk '{print $3}' <<< "$top_line")
fi
# /proc snapshot from inside the container
# Use -i + heredoc so the inner sh script can use unescaped single quotes
# shellcheck disable=SC2086
local proc_out
proc_out=$(kubectl exec -i "$POD" -n "$NAMESPACE" $CONTAINER_FLAG -- sh << 'SHELLEOF' 2>/dev/null || echo "- - - - - -"
# CPU — prefer cgroup accounting (container-local, no 32-bit overflow risk).
# $(()) in busybox/dash is 32-bit; /proc/stat cumulative ticks overflow it on
# hosts running more than a few days, producing garbage percentages.
cpu=0
if [ -f /sys/fs/cgroup/cpu.stat ]; then
# cgroup v2: usage_usec is cumulative microseconds of CPU time
u1=$(awk '/^usage_usec/{print $2}' /sys/fs/cgroup/cpu.stat)
sleep 1
u2=$(awk '/^usage_usec/{print $2}' /sys/fs/cgroup/cpu.stat)
cpu=$(awk -v u1="$u1" -v u2="$u2" 'BEGIN { printf "%.1f", (u2-u1)/10000 }')
elif [ -f /sys/fs/cgroup/cpuacct/cpuacct.usage ]; then
# cgroup v1: usage is cumulative nanoseconds
u1=$(cat /sys/fs/cgroup/cpuacct/cpuacct.usage)
sleep 1
u2=$(cat /sys/fs/cgroup/cpuacct/cpuacct.usage)
cpu=$(awk -v u1="$u1" -v u2="$u2" 'BEGIN { printf "%.1f", (u2-u1)/10000000 }')
else
# Fallback: /proc/stat — use awk floats for the delta to avoid sh integer overflow
line1=$(grep '^cpu ' /proc/stat)
sleep 1
line2=$(grep '^cpu ' /proc/stat)
cpu=$(awk -v l1="$line1" -v l2="$line2" 'BEGIN {
n = split(l1, a, " "); split(l2, b, " ")
for (i=2; i<=8; i++) { t1+=a[i]; t2+=b[i] }
dt = t2-t1; di = b[5]-a[5]
printf "%.1f", dt>0 ? (dt-di)/dt*100 : 0
}')
fi
# Memory (kB)
mt=$(awk '/^MemTotal:/{print $2}' /proc/meminfo)
mf=$(awk '/^MemAvailable:/{print $2}' /proc/meminfo)
mu=$(( mt - mf ))
# Network
rx=$(awk 'NR>2 && $1 !~ /^lo:/ { s += $2 } END { print s+0 }' /proc/net/dev)
tx=$(awk 'NR>2 && $1 !~ /^lo:/ { s += $10 } END { print s+0 }' /proc/net/dev)
# FDs — use glob, no grep needed
fds=0
for d in /proc/[0-9]*/fd; do
n=$(ls "$d" 2>/dev/null | wc -l)
fds=$(( fds + n ))
done
printf '%s %s %s %s %s %s\n' "$cpu" "$mu" "$mt" "$rx" "$tx" "$fds"
SHELLEOF
)
local cpu mem_used mem_total rx tx fds
cpu=$(awk '{print $1}' <<< "$proc_out")
mem_used=$(awk '{print $2}' <<< "$proc_out")
mem_total=$(awk '{print $3}' <<< "$proc_out")
rx=$(awk '{print $4}' <<< "$proc_out")
tx=$(awk '{print $5}' <<< "$proc_out")
fds=$(awk '{print $6}' <<< "$proc_out")
# Human-readable memory (kB → MiB)
local mem_fmt='-'
if [[ "$mem_used" =~ ^[0-9]+$ && "$mem_total" =~ ^[0-9]+$ ]]; then
mem_fmt="$(( mem_used / 1024 ))Mi/$(( mem_total / 1024 ))Mi"
fi
local rx_fmt tx_fmt
rx_fmt=$(fmt_bytes "$rx")
tx_fmt=$(fmt_bytes "$tx")
# Append to history
H_TIME+=("$ts")
H_CPU+=("${cpu}%")
H_MEM+=("$mem_fmt")
H_RX+=("$rx_fmt")
H_TX+=("$tx_fmt")
H_FDS+=("$fds")
H_RESTARTS+=("$restarts")
H_TOPCPU+=("$top_cpu")
H_TOPMEM+=("$top_mem")
# Trim oldest entry when over limit
if (( ${#H_TIME[@]} > MAX_ROWS )); then
H_TIME=("${H_TIME[@]:1}")
H_CPU=("${H_CPU[@]:1}")
H_MEM=("${H_MEM[@]:1}")
H_RX=("${H_RX[@]:1}")
H_TX=("${H_TX[@]:1}")
H_FDS=("${H_FDS[@]:1}")
H_RESTARTS=("${H_RESTARTS[@]:1}")
H_TOPCPU=("${H_TOPCPU[@]:1}")
H_TOPMEM=("${H_TOPMEM[@]:1}")
fi
# ── Render ──────────────────────────────────────────────────────────────────
clear
printf 'pod=%-25s ns=%-15s status=%-10s restarts=%s\n' \
"$POD" "$NAMESPACE" "$pod_status" "$restarts"
printf 'cpu_req=%-8s mem_lim=%-8s interval=%ss updated=%s\n' \
"$cpu_req" "$mem_lim" "$INTERVAL" "$ts"
echo
printf '%-10s %-8s %-20s %-10s %-10s %-6s %-8s %-8s\n' \
TIME "CPU%" "MEM(used/total)" NET_RX NET_TX FDs TOP_CPU TOP_MEM
printf '%.0s─' {1..88}; echo
for i in "${!H_TIME[@]}"; do
# Highlight the latest row
local fmt='%-10s %-8s %-20s %-10s %-10s %-6s %-8s %-8s\n'
if (( i == ${#H_TIME[@]} - 1 )); then
printf '\033[1m'"$fmt"'\033[0m' \
"${H_TIME[$i]}" "${H_CPU[$i]}" "${H_MEM[$i]}" \
"${H_RX[$i]}" "${H_TX[$i]}" "${H_FDS[$i]}" \
"${H_TOPCPU[$i]}" "${H_TOPMEM[$i]}"
else
printf "$fmt" \
"${H_TIME[$i]}" "${H_CPU[$i]}" "${H_MEM[$i]}" \
"${H_RX[$i]}" "${H_TX[$i]}" "${H_FDS[$i]}" \
"${H_TOPCPU[$i]}" "${H_TOPMEM[$i]}"
fi
done
}
while true; do
collect
sleep "$INTERVAL"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment