Skip to content

Instantly share code, notes, and snippets.

@jakefhyde
Created February 20, 2024 22:43
Show Gist options
  • Select an option

  • Save jakefhyde/ede05b3f8d491d4d4bf0ed9781413fed to your computer and use it in GitHub Desktop.

Select an option

Save jakefhyde/ede05b3f8d491d4d4bf0ed9781413fed to your computer and use it in GitHub Desktop.
Validates all Rancher and AWS infrastructure resources required for provisioning an RKE2/K3s cluster using the AWS out-of-tree external cloud-provider
#!/bin/bash
# shellcheck disable=SC2016
set -e
# Validates all Rancher and AWS infrastructure resources required for provisioning an RKE2/K3s cluster using the AWS
# out-of-tree external cloud-provider
display_help() {
echo "Usage: $(basename "$0") --server-url [server url] --cluster [cluster name] --region [region]"
echo
echo ' $RANCHER_TOKEN [Required] environment variable containing rancher admin token'
echo ' -s, --server-url [Required] rancher server url'
echo " -c, --cluster [Required] target cluster name (provisioning.cattle.io)"
echo " -r, --region [Required] AWS Region"
echo " -n, --namespace [Optional] namespace cluster & machines live in (default: fleet-default)"
echo " -d, --debug [Optional] calls 'set -x'"
echo " -h, --help print this message"
}
log() {
echo -e "$1 $2"
}
debug() {
if [[ $DEBUG == true ]]; then
log "\033[0;36m[DEBUG]\033[0m" "$1"
fi
}
fatal() {
log "\033[0;31m[FATAL]\033[0m" "$1"
exit 1
}
error() {
log "\033[0;31m[ERROR]\033[0m" "$1"
}
warning() {
log "\033[1;33m[WARN]\033[0m" "$1"
}
info() {
log "\033[0;32m[INFO]\033[0m" "$1"
}
commandExists() {
local cmd
cmd="$1"
if ! command -v "$cmd" &>/dev/null; then
fatal "Missing $cmd"
fi
}
cmds=(
"kubectl"
"jq"
"aws"
"cut"
)
for cmd in "${cmds[@]}"; do
commandExists "${cmd}"
done
POSITIONAL_ARGS=()
while [[ $# -gt 0 ]]; do
case $1 in
-s | --server-url)
SERVER_URL="$2"
shift # past argument
shift # past value
;;
-c | --cluster)
CLUSTER="$2"
shift # past argument
shift # past value
;;
-r | --region)
REGION="$2"
shift # past argument
shift # past value
;;
-n | --namespace)
NAMESPACE="$2"
shift # past argument
shift # past value
;;
-d | --debug)
set -x
DEBUG=true
shift # past argument
;;
-h | --help)
display_help
exit 1
;;
-*)
echo "Unknown option $1"
display_help
exit 1
;;
*)
POSITIONAL_ARGS+=("$1") # save positional arg
shift # past argument
;;
esac
done
set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
if [[ -z "$RANCHER_TOKEN" ]]; then
echo '$RANCHER_TOKEN is unset'
display_help
exit 1
fi
if [[ -z "$SERVER_URL" ]]; then
echo '--server-url is unset'
display_help
exit 1
fi
if [[ -z "$CLUSTER" ]]; then
echo '--cluster is unset'
display_help
exit 1
fi
if [[ -z "$REGION" ]]; then
echo '--region is unset'
display_help
exit 1
fi
NAMESPACE=${NAMESPACE:-fleet-default}
getLocalKubeConfig() {
echo "Getting local cluster kubeconfig"
curl "https://${SERVER_URL}/v3/clusters/local?action=generateKubeconfig" \
-X 'POST' \
-H 'accept: application/yaml' \
-H "cookie: R_SESS=${RANCHER_TOKEN}" \
--compressed | yq -r '.config' > .kube/config
}
getAllMachines() {
echo "Getting local cluster CAPI machines belonging to cluster ${CLUSTER}"
LABEL_SELECTOR="cluster.x-k8s.io/cluster-name=${CLUSTER}"
MACHINES=$(kubectl --kubeconfig .kube/config get machines -n "${NAMESPACE}" -l "${LABEL_SELECTOR}" -o jsonpath='{.items[*].metadata.name}')
}
validateMachine() {
local machineName
machineName="$1"
info "Validating machine ${machineName}"
local machineJson
machineJson="$(kubectl --kubeconfig .kube/config get machine -n "${NAMESPACE}" "${machineName}" -o json)"
validateKey "spec.infrastructureRef" "${machineJson}"
local infraRefJson
infraRefJson="$(jq '.spec.infrastructureRef' <<< "${machineJson}")"
validateKey "apiVersion" "${infraRefJson}"
validateKey "kind" "${infraRefJson}"
validateKey "name" "${infraRefJson}"
validateKey "namespace" "${infraRefJson}"
local name
name="$(jq -r '.name' <<< "${infraRefJson}")"
local namespace
namespace="$(jq -r '.namespace' <<< "${infraRefJson}")"
local apiVersion
apiVersion="$(jq -r '.apiVersion' <<< "${infraRefJson}")"
local kind
kind="$(jq -r '.kind' <<< "${infraRefJson}")"
info "Validating infrastructure machine object ${name}"
local infraJson
infraJson="$(kubectl --kubeconfig .kube/config get "$(tr '[:upper:]' '[:lower:]' <<< "${kind}").$(cut -d / -f1 <<< "${apiVersion}")" -n "${namespace}" "${name}" -o json)"
info "Validating infrastructure machine object ${name} tags"
validateKey "spec.tags" "${infraJson}"
local tags
tags="$(jq -r '.spec.tags' <<< "${infraJson}")"
local clusterTag
clusterTag=$(grep -Pio "kubernetes.io/cluster/([A-Za-z-0-9]+),(owned|shared)" <<< "${tags}" | awk -F/ '{print $NF}')
local clusterTagPrefix
clusterTagPrefix=$(cut -d, -f1 <<< "${clusterTag}")
if [[ $clusterTag == "" ]]; then
fatal "could not find kubernetes.io/cluster/ tag on machine"
fi
info "Validating aws instance ${name}"
local instanceJson
instanceJson="$(aws ec2 describe-instances --region "${REGION}" --filters "Name=tag:Name,Values=${name}")"
if ((i = "$(jq '.Reservations | length' <<<"$instanceJson")")) && [[ i -ne 1 ]]; then
fatal "Unexpected number of reservations for ${name}: expected 1, got $i"
fi
instanceJson="$(jq '.Reservations[0]' <<<"$instanceJson")"
if ((i = "$(jq '.Instances | length' <<<"$instanceJson")")) && [[ i -ne 1 ]]; then
fatal "Unexpected number of instances for ${name}: expected 1, got $i"
fi
instanceJson="$(jq '.Instances[0]' <<<"$instanceJson")"
local vpcId
vpcId="$(jq -r '.spec.vpcId' <<<"$infraJson")"
info "Validating vpc ${vpcId}"
local vpcJson
vpcJson="$(aws ec2 describe-vpcs --region "${REGION}" --filters "Name=vpc-id,Values=${vpcId}")"
if ((i = "$(jq '.Vpcs | length' <<<"$vpcJson")")) && [[ i -ne 1 ]]; then
fatal "Unexpected number of vpcs for vpc ${vpcId}: expected 1, got $i"
fi
vpcJson="$(jq '.Vpcs[0]' <<<"$vpcJson")"
if [[ "$(jq -r '.Tags | .[] | select(.Key=="kubernetes.io/cluster/jhyde-aws-test")' <<<"$vpcJson")" == "" ]]; then
fatal "vpc $(jq -r '.VpcId' <<<"$vpcJson") did not have ${clusterTagPrefix}"
fi
local subnetId
subnetId="$(jq -r '.spec.subnetId' <<<"$infraJson")"
info "Validating subnet ${subnetId}"
local subnetJson
subnetJson="$(aws ec2 describe-subnets --region "${REGION}" --filters "Name=subnet-id,Values=${subnetId}")"
if ((i = "$(jq '.Subnets | length' <<<"$subnetJson")")) && [[ i -ne 1 ]]; then
fatal "Unexpected number of subnets for subnet ${subnetId}: expected 1, got $i"
fi
subnetJson="$(jq '.Subnets[0]' <<<"$subnetJson")"
if [[ "$(jq -r '.Tags | .[] | select(.Key=="kubernetes.io/cluster/jhyde-aws-test")' <<<"$subnetJson")" == "" ]]; then
fatal "subnet $(jq -r '.SubnetId' <<<"$subnetJson") did not have ${clusterTagPrefix}"
fi
local iamInstanceProfileName
iamInstanceProfileName="$(jq -r '.spec.iamInstanceProfile' <<<"$infraJson")"
info "Validating iam instance profile ${iamInstanceProfileName}"
local instanceProfileJson
instanceProfileJson="$(aws iam get-instance-profile --instance-profile-name "${iamInstanceProfileName}")"
if ((i = "$(jq '.InstanceProfile.Roles | length' <<<"$instanceProfileJson")")) && [[ i -ne 1 ]]; then
fatal "Unexpected number of roles for instance profile ${iamInstanceProfileName}: expected 1, got $i"
fi
local roleName
roleName="$(jq -r '.InstanceProfile.Roles[0].RoleName' <<<"$instanceProfileJson")"
local policyArnJson
policyArnJson="$(aws iam list-attached-role-policies --role-name "${roleName}")"
if ((i = "$(jq '.AttachedPolicies | length' <<<"$policyArnJson")")) && [[ i -ne 1 ]]; then
fatal "Unexpected number of attached policies for role ${roleName}: expected 1, got $i"
fi
local policyArn
policyArn="$(jq -r '.AttachedPolicies[0].PolicyArn' <<<"$policyArnJson")"
local policyVersionJson
policyVersionJson="$(aws iam get-policy --policy-arn "${policyArn}")"
local policyDocumentJson
policyDocumentJson="$(aws iam get-policy-version --policy-arn "${policyArn}" --version-id "$(jq -r '.Policy.DefaultVersionId' <<<"$policyVersionJson")")"
validateKey "PolicyVersion.document.Statement" "${policyDocumentJson}"
if ((i = "$(jq '.PolicyVersion.document.Statement | length' <<<"$policyDocumentJson")")) && [[ i -ne 1 ]]; then
fatal "Unexpected number of statements for policy ${policyArn}: expected 1, got $i"
fi
local actions
if [[ "$(jq -r '.metadata.labels."cluster.x-k8s.io/control-plane"' <<<"${machineJson}")" == "true" ]]; then
actions=(
"kms:DescribeKey"
"iam:CreateServiceLinkedRole"
"elasticloadbalancing:SetLoadBalancerPoliciesOfListener"
"elasticloadbalancing:SetLoadBalancerPoliciesForBackendServer"
"elasticloadbalancing:RegisterTargets"
"elasticloadbalancing:RegisterInstancesWithLoadBalancer"
"elasticloadbalancing:ModifyTargetGroup"
"elasticloadbalancing:ModifyLoadBalancerAttributes"
"elasticloadbalancing:ModifyListener"
"elasticloadbalancing:DetachLoadBalancerFromSubnets"
"elasticloadbalancing:DescribeTargetHealth"
"elasticloadbalancing:DescribeTargetGroups"
"elasticloadbalancing:DescribeLoadBalancers"
"elasticloadbalancing:DescribeLoadBalancerPolicies"
"elasticloadbalancing:DescribeLoadBalancerAttributes"
"elasticloadbalancing:DescribeListeners"
"elasticloadbalancing:DeregisterInstancesFromLoadBalancer"
"elasticloadbalancing:DeleteTargetGroup"
"elasticloadbalancing:DeleteLoadBalancerListeners"
"elasticloadbalancing:DeleteLoadBalancer"
"elasticloadbalancing:DeleteListener"
"elasticloadbalancing:CreateTargetGroup"
"elasticloadbalancing:CreateLoadBalancerPolicy"
"elasticloadbalancing:CreateLoadBalancerListeners"
"elasticloadbalancing:CreateLoadBalancer"
"elasticloadbalancing:CreateListener"
"elasticloadbalancing:ConfigureHealthCheck"
"elasticloadbalancing:AttachLoadBalancerToSubnets"
"elasticloadbalancing:ApplySecurityGroupsToLoadBalancer"
"elasticloadbalancing:AddTags"
"ec2:RevokeSecurityGroupIngress"
"ec2:ModifyVolume"
"ec2:ModifyInstanceAttribute"
"ec2:DetachVolume"
"ec2:DescribeVpcs"
"ec2:DescribeVolumes"
"ec2:DescribeSubnets"
"ec2:DescribeSecurityGroups"
"ec2:DescribeRouteTables"
"ec2:DescribeRegions"
"ec2:DescribeInstances"
"ec2:DeleteVolume"
"ec2:DeleteSecurityGroup"
"ec2:DeleteRoute"
"ec2:CreateVolume"
"ec2:CreateTags"
"ec2:CreateSecurityGroup"
"ec2:CreateRoute"
"ec2:AuthorizeSecurityGroupIngress"
"ec2:AttachVolume"
"autoscaling:DescribeTags"
"autoscaling:DescribeLaunchConfigurations"
"autoscaling:DescribeAutoScalingGroups"
)
else
actions=(
"ec2:DescribeInstances"
"ec2:DescribeRegions"
"ecr:GetAuthorizationToken"
"ecr:BatchCheckLayerAvailability"
"ecr:GetDownloadUrlForLayer"
"ecr:GetRepositoryPolicy"
"ecr:DescribeRepositories"
"ecr:ListImages"
"ecr:BatchGetImage"
)
fi
info "Validating actions in policy document"
for action in "${actions[@]}"; do
debug "Validating action $action in policy document"
jq -e ".PolicyVersion.Document.Statement[0].Action | any(. == \"$action\")" <<<"$policyDocumentJson" >/dev/null
done
info "Successfully validated machine ${machineName}"
}
validateKey() {
local key
key="$1"
local json
json="$2"
# If nested keys are passed, validate recursively.
if [[ $key == *"."* ]]; then
validateKey "$(cut -d'.' -f 2- <<< "${key}")" "$(jq ".$(cut -d'.' -f 1 <<< "${key}")" <<< "${json}")"
key="$(cut -d'.' -f 1 <<< "${key}")"
fi
if [ ! "$(jq "has(\"$key\")" <<< "${json}")" ]; then
fatal "$json is missing $key field"
fi
}
main() {
info "Validating cluster ${CLUSTER}"
TMP_DIR="$(mktemp -d)" && pushd "$TMP_DIR"
info "Created working directory ${TMP_DIR}"
mkdir -p .kube
getLocalKubeConfig
getAllMachines
for machine in $MACHINES; do
validateMachine "$machine"
done
popd && rm -rf "$TMP_DIR"
info "Successfully validated cluster ${CLUSTER}"
}
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment