Skip to content

Instantly share code, notes, and snippets.

@GauntletWizard
Last active June 16, 2025 03:48
Show Gist options
  • Select an option

  • Save GauntletWizard/97b9f5d754d2c85cc2464a1a10865a81 to your computer and use it in GitHub Desktop.

Select an option

Save GauntletWizard/97b9f5d754d2c85cc2464a1a10865a81 to your computer and use it in GitHub Desktop.
Kube Tricks
#!/bin/bash
set -eux -o pipefail
# There's a number of shell and kubectl tricks in use here:
# -o jsonpath='{.items[*]}' outputs a list of items, rather than a single object with -items.
# This allows jq to stream the output (kubectl still needs to buffer it), speeding up the process a lot.
tmpdir="$(mktemp -d)"
all_configmaps="${tmpdir}/all-configmaps.json"
find_candidates() {
kubectl get configmap -o jsonpath='{.items[*]}' > "${all_configmaps}"
candidate_configmaps="${tmpdir}/candidate_configmaps.json"
# List all items that match out filters;
# Greater than 90 days old:
<"${all_configmaps}" \
jq -r --argjson "date" "$(date +%s)" '
select((.metadata.creationTimestamp | fromdateiso8601) < ($date - (60 * 60 * 24 * 90))) |
.metadata.name
' > "${candidate_configmaps}"
}
obj_dir="${tmpdir}/objs/"
mkdir "${obj_dir}"
find_refs() {
objtype="$1"
local all_obs="${obj_dir}/all-${objtype}.json"
kubectl get "${objtype}" -o jsonpath='{.items[*]}' > "${all_obs}"
local all_obj_refs="${obj_dir}/all-${objtype}-refs.txt"
<"${all_obs}" all_configmap_refs > "${all_obj_refs}"
}
all_referenced="${tmpdir}/all-referenced-configmaps.txt"
find_all_used() {
find_refs deployments
find_refs replicasets
find_refs cronjobs
find_refs jobs
find_refs pods
# Sponge rather than >> to assure we always get complete output
sort -m "${obj_dir}"/*.txt | uniq | sponge "${all_referenced}"
}
# All configmap refs is a filter function that takes a json file of kubernetes objects, and outputs a text file of configmaps referenced by these files.
all_configmap_refs() {
# Pods can reference configmaps in one of several ways. The most common are pod.spec.volumes.configMap, pod.spec.containers.envFrom.configMapRef, and pod.spec.containers.env.value.configMapKeyRef. Other references to configmaps in pods will also be caught by these recursive descents
jq -r '.. | .configMap?, .configMapRef?, .configMapKeyRef? | select(. != null) | .name' |
sort |
uniq
}
to_delete="${tmpdir}/to_delete.txt"
all_to_delete() {
comm -23 "${candidate_configmaps}" "${all_referenced}" > "${to_delete}"
return
}
main() {
find_all_used
find_candidates
all_to_delete
wc -l "${candidate_configmaps}" "${all_referenced}" "${to_delete}"
# Summarize the configmaps to delete. Anything with a 1 is probably a bad idea.
< "${to_delete}" sed 's/-[[:alnum:]]*$//' | uniq -c
}
kubectl --context CLUSTER run --image ubuntu pgtool -- /bin/bash -c "apt-get update; apt-get install -y postgresql-client; trap : TERM INT; sleep infinity & wait"
kubectl --context gke_lido-staging_us-east1_lido-staging-us-east1 run --image redis redis -- /bin/bash -c "trap : TERM INT; sleep infinity & wait"
kubectl --context CLUSTER run --image amazon/aws-cli --command --overrides='{"spec": { "serviceAccountName": "default"}}' cli -- /bin/bash -c "trap : TERM INT;sleep infinity"
yq -o json '.spec.template | .metadata.name="bc-test" | .kind = "Pod" | .apiVersion = "v1" | .spec.containers[].command = ["/bin/bash", "-c", "sleep 86400"] | .spec.containers[].livenessProbe=null | .spec.containers[].readinessProbe=null' deployment.yaml | kubectl apply -f -
kubectl get secret $SECRET -o json | jq '.data[] | @base64d'
kubectl --context gke_lido-staging_us-east1_lido-staging-us-east1 get secrets lido-app -o json | jq '.data | to_entries | .[] | .key +": " +(.value|@base64d)'
# As .env file
kubectl get configmap CONFIGMAP -o json |jq -r '.data |to_entries[] | .key + "=\"" + .value + "\"" '
# This daemonset does nothing and uses no resources, but it causes each node to download the specified image as soon as the node starts up. This is useful to reduce cold-start times on batch jobs, or highly varaible workloads, though if you're autoscaling just one deployment it won't help.
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: prefetch
spec:
selector:
matchLabels:
instance: prefetch
updateStrategy:
rollingUpdate:
maxUnavailable: "100%"
# Container template
template:
metadata:
labels:
app: YOUR_APP
role: prefetch
instance: prefetch
spec:
containers:
- image: YOUR_IMAGE:GITHUB_SHA
name: prefetch
command:
- /bin/bash
- -c
# Sleep forever, but wake up immediately on signal
- "trap : TERM INT; sleep infinity & wait"
resources:
requests:
cpu: "0"
memory: "0"
(now - 86400 * 30) as $cutoffdate |.items[] | select((.metadata.creationTimestamp | fromdate) < $cutoffdate) | .metadata.name
# Scale down deployments > 1w old.
kubectl -n mergerequests get deploy -o json |jq -r ".items[] | select((now - (.metadata.creationTimestamp|fromdate)) > 86400 * ${SCALEDOWN_DAYS}) | select(.metadata.name | test(\".*pr.*\")) | .metadata.name" | xargs -r kubectl scale -n mergerequests --replicas 0 deploy
# Delete deployments and services >2w old
kubectl -n mergerequests get deploy -o json |jq -r ".items[] | select((now - (.metadata.creationTimestamp|fromdate)) > 86400 * ${DELETE_DAYS}) | select(.metadata.name | test(\".*pr.*\")) | .metadata.name" | xargs -r kubectl -n mergerequests delete deploy
kubectl -n mergerequests get service -o json |jq -r ".items[] | select((now - (.metadata.creationTimestamp|fromdate)) > 86400 * ${DELETE_DAYS}) |select(.metadata.name | test(\".*pr.*\")) | .metadata.name" | xargs -r kubectl -n mergerequests delete service
  • Check for upgrade issues
    • Specifically, EKS "Add-Ons" can be issues
  • Upgrade Cluster
  • Upgrade Node Groups
  • Upgrade Kube-state-metrics
  • Check monitoring/metrics
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment