Created
February 20, 2024 22:43
-
-
Save jakefhyde/ede05b3f8d491d4d4bf0ed9781413fed to your computer and use it in GitHub Desktop.
Validates all Rancher and AWS infrastructure resources required for provisioning an RKE2/K3s cluster using the AWS out-of-tree external cloud-provider
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # shellcheck disable=SC2016 | |
| set -e | |
| # Validates all Rancher and AWS infrastructure resources required for provisioning an RKE2/K3s cluster using the AWS | |
| # out-of-tree external cloud-provider | |
| display_help() { | |
| echo "Usage: $(basename "$0") --server-url [server url] --cluster [cluster name] --region [region]" | |
| echo | |
| echo ' $RANCHER_TOKEN [Required] environment variable containing rancher admin token' | |
| echo ' -s, --server-url [Required] rancher server url' | |
| echo " -c, --cluster [Required] target cluster name (provisioning.cattle.io)" | |
| echo " -r, --region [Required] AWS Region" | |
| echo " -n, --namespace [Optional] namespace cluster & machines live in (default: fleet-default)" | |
| echo " -d, --debug [Optional] calls 'set -x'" | |
| echo " -h, --help print this message" | |
| } | |
| log() { | |
| echo -e "$1 $2" | |
| } | |
| debug() { | |
| if [[ $DEBUG == true ]]; then | |
| log "\033[0;36m[DEBUG]\033[0m" "$1" | |
| fi | |
| } | |
| fatal() { | |
| log "\033[0;31m[FATAL]\033[0m" "$1" | |
| exit 1 | |
| } | |
| error() { | |
| log "\033[0;31m[ERROR]\033[0m" "$1" | |
| } | |
| warning() { | |
| log "\033[1;33m[WARN]\033[0m" "$1" | |
| } | |
| info() { | |
| log "\033[0;32m[INFO]\033[0m" "$1" | |
| } | |
| commandExists() { | |
| local cmd | |
| cmd="$1" | |
| if ! command -v "$cmd" &>/dev/null; then | |
| fatal "Missing $cmd" | |
| fi | |
| } | |
| cmds=( | |
| "kubectl" | |
| "jq" | |
| "aws" | |
| "cut" | |
| ) | |
| for cmd in "${cmds[@]}"; do | |
| commandExists "${cmd}" | |
| done | |
| POSITIONAL_ARGS=() | |
| while [[ $# -gt 0 ]]; do | |
| case $1 in | |
| -s | --server-url) | |
| SERVER_URL="$2" | |
| shift # past argument | |
| shift # past value | |
| ;; | |
| -c | --cluster) | |
| CLUSTER="$2" | |
| shift # past argument | |
| shift # past value | |
| ;; | |
| -r | --region) | |
| REGION="$2" | |
| shift # past argument | |
| shift # past value | |
| ;; | |
| -n | --namespace) | |
| NAMESPACE="$2" | |
| shift # past argument | |
| shift # past value | |
| ;; | |
| -d | --debug) | |
| set -x | |
| DEBUG=true | |
| shift # past argument | |
| ;; | |
| -h | --help) | |
| display_help | |
| exit 1 | |
| ;; | |
| -*) | |
| echo "Unknown option $1" | |
| display_help | |
| exit 1 | |
| ;; | |
| *) | |
| POSITIONAL_ARGS+=("$1") # save positional arg | |
| shift # past argument | |
| ;; | |
| esac | |
| done | |
| set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters | |
| if [[ -z "$RANCHER_TOKEN" ]]; then | |
| echo '$RANCHER_TOKEN is unset' | |
| display_help | |
| exit 1 | |
| fi | |
| if [[ -z "$SERVER_URL" ]]; then | |
| echo '--server-url is unset' | |
| display_help | |
| exit 1 | |
| fi | |
| if [[ -z "$CLUSTER" ]]; then | |
| echo '--cluster is unset' | |
| display_help | |
| exit 1 | |
| fi | |
| if [[ -z "$REGION" ]]; then | |
| echo '--region is unset' | |
| display_help | |
| exit 1 | |
| fi | |
| NAMESPACE=${NAMESPACE:-fleet-default} | |
| getLocalKubeConfig() { | |
| echo "Getting local cluster kubeconfig" | |
| curl "https://${SERVER_URL}/v3/clusters/local?action=generateKubeconfig" \ | |
| -X 'POST' \ | |
| -H 'accept: application/yaml' \ | |
| -H "cookie: R_SESS=${RANCHER_TOKEN}" \ | |
| --compressed | yq -r '.config' > .kube/config | |
| } | |
| getAllMachines() { | |
| echo "Getting local cluster CAPI machines belonging to cluster ${CLUSTER}" | |
| LABEL_SELECTOR="cluster.x-k8s.io/cluster-name=${CLUSTER}" | |
| MACHINES=$(kubectl --kubeconfig .kube/config get machines -n "${NAMESPACE}" -l "${LABEL_SELECTOR}" -o jsonpath='{.items[*].metadata.name}') | |
| } | |
| validateMachine() { | |
| local machineName | |
| machineName="$1" | |
| info "Validating machine ${machineName}" | |
| local machineJson | |
| machineJson="$(kubectl --kubeconfig .kube/config get machine -n "${NAMESPACE}" "${machineName}" -o json)" | |
| validateKey "spec.infrastructureRef" "${machineJson}" | |
| local infraRefJson | |
| infraRefJson="$(jq '.spec.infrastructureRef' <<< "${machineJson}")" | |
| validateKey "apiVersion" "${infraRefJson}" | |
| validateKey "kind" "${infraRefJson}" | |
| validateKey "name" "${infraRefJson}" | |
| validateKey "namespace" "${infraRefJson}" | |
| local name | |
| name="$(jq -r '.name' <<< "${infraRefJson}")" | |
| local namespace | |
| namespace="$(jq -r '.namespace' <<< "${infraRefJson}")" | |
| local apiVersion | |
| apiVersion="$(jq -r '.apiVersion' <<< "${infraRefJson}")" | |
| local kind | |
| kind="$(jq -r '.kind' <<< "${infraRefJson}")" | |
| info "Validating infrastructure machine object ${name}" | |
| local infraJson | |
| infraJson="$(kubectl --kubeconfig .kube/config get "$(tr '[:upper:]' '[:lower:]' <<< "${kind}").$(cut -d / -f1 <<< "${apiVersion}")" -n "${namespace}" "${name}" -o json)" | |
| info "Validating infrastructure machine object ${name} tags" | |
| validateKey "spec.tags" "${infraJson}" | |
| local tags | |
| tags="$(jq -r '.spec.tags' <<< "${infraJson}")" | |
| local clusterTag | |
| clusterTag=$(grep -Pio "kubernetes.io/cluster/([A-Za-z-0-9]+),(owned|shared)" <<< "${tags}" | awk -F/ '{print $NF}') | |
| local clusterTagPrefix | |
| clusterTagPrefix=$(cut -d, -f1 <<< "${clusterTag}") | |
| if [[ $clusterTag == "" ]]; then | |
| fatal "could not find kubernetes.io/cluster/ tag on machine" | |
| fi | |
| info "Validating aws instance ${name}" | |
| local instanceJson | |
| instanceJson="$(aws ec2 describe-instances --region "${REGION}" --filters "Name=tag:Name,Values=${name}")" | |
| if ((i = "$(jq '.Reservations | length' <<<"$instanceJson")")) && [[ i -ne 1 ]]; then | |
| fatal "Unexpected number of reservations for ${name}: expected 1, got $i" | |
| fi | |
| instanceJson="$(jq '.Reservations[0]' <<<"$instanceJson")" | |
| if ((i = "$(jq '.Instances | length' <<<"$instanceJson")")) && [[ i -ne 1 ]]; then | |
| fatal "Unexpected number of instances for ${name}: expected 1, got $i" | |
| fi | |
| instanceJson="$(jq '.Instances[0]' <<<"$instanceJson")" | |
| local vpcId | |
| vpcId="$(jq -r '.spec.vpcId' <<<"$infraJson")" | |
| info "Validating vpc ${vpcId}" | |
| local vpcJson | |
| vpcJson="$(aws ec2 describe-vpcs --region "${REGION}" --filters "Name=vpc-id,Values=${vpcId}")" | |
| if ((i = "$(jq '.Vpcs | length' <<<"$vpcJson")")) && [[ i -ne 1 ]]; then | |
| fatal "Unexpected number of vpcs for vpc ${vpcId}: expected 1, got $i" | |
| fi | |
| vpcJson="$(jq '.Vpcs[0]' <<<"$vpcJson")" | |
| if [[ "$(jq -r '.Tags | .[] | select(.Key=="kubernetes.io/cluster/jhyde-aws-test")' <<<"$vpcJson")" == "" ]]; then | |
| fatal "vpc $(jq -r '.VpcId' <<<"$vpcJson") did not have ${clusterTagPrefix}" | |
| fi | |
| local subnetId | |
| subnetId="$(jq -r '.spec.subnetId' <<<"$infraJson")" | |
| info "Validating subnet ${subnetId}" | |
| local subnetJson | |
| subnetJson="$(aws ec2 describe-subnets --region "${REGION}" --filters "Name=subnet-id,Values=${subnetId}")" | |
| if ((i = "$(jq '.Subnets | length' <<<"$subnetJson")")) && [[ i -ne 1 ]]; then | |
| fatal "Unexpected number of subnets for subnet ${subnetId}: expected 1, got $i" | |
| fi | |
| subnetJson="$(jq '.Subnets[0]' <<<"$subnetJson")" | |
| if [[ "$(jq -r '.Tags | .[] | select(.Key=="kubernetes.io/cluster/jhyde-aws-test")' <<<"$subnetJson")" == "" ]]; then | |
| fatal "subnet $(jq -r '.SubnetId' <<<"$subnetJson") did not have ${clusterTagPrefix}" | |
| fi | |
| local iamInstanceProfileName | |
| iamInstanceProfileName="$(jq -r '.spec.iamInstanceProfile' <<<"$infraJson")" | |
| info "Validating iam instance profile ${iamInstanceProfileName}" | |
| local instanceProfileJson | |
| instanceProfileJson="$(aws iam get-instance-profile --instance-profile-name "${iamInstanceProfileName}")" | |
| if ((i = "$(jq '.InstanceProfile.Roles | length' <<<"$instanceProfileJson")")) && [[ i -ne 1 ]]; then | |
| fatal "Unexpected number of roles for instance profile ${iamInstanceProfileName}: expected 1, got $i" | |
| fi | |
| local roleName | |
| roleName="$(jq -r '.InstanceProfile.Roles[0].RoleName' <<<"$instanceProfileJson")" | |
| local policyArnJson | |
| policyArnJson="$(aws iam list-attached-role-policies --role-name "${roleName}")" | |
| if ((i = "$(jq '.AttachedPolicies | length' <<<"$policyArnJson")")) && [[ i -ne 1 ]]; then | |
| fatal "Unexpected number of attached policies for role ${roleName}: expected 1, got $i" | |
| fi | |
| local policyArn | |
| policyArn="$(jq -r '.AttachedPolicies[0].PolicyArn' <<<"$policyArnJson")" | |
| local policyVersionJson | |
| policyVersionJson="$(aws iam get-policy --policy-arn "${policyArn}")" | |
| local policyDocumentJson | |
| policyDocumentJson="$(aws iam get-policy-version --policy-arn "${policyArn}" --version-id "$(jq -r '.Policy.DefaultVersionId' <<<"$policyVersionJson")")" | |
| validateKey "PolicyVersion.document.Statement" "${policyDocumentJson}" | |
| if ((i = "$(jq '.PolicyVersion.document.Statement | length' <<<"$policyDocumentJson")")) && [[ i -ne 1 ]]; then | |
| fatal "Unexpected number of statements for policy ${policyArn}: expected 1, got $i" | |
| fi | |
| local actions | |
| if [[ "$(jq -r '.metadata.labels."cluster.x-k8s.io/control-plane"' <<<"${machineJson}")" == "true" ]]; then | |
| actions=( | |
| "kms:DescribeKey" | |
| "iam:CreateServiceLinkedRole" | |
| "elasticloadbalancing:SetLoadBalancerPoliciesOfListener" | |
| "elasticloadbalancing:SetLoadBalancerPoliciesForBackendServer" | |
| "elasticloadbalancing:RegisterTargets" | |
| "elasticloadbalancing:RegisterInstancesWithLoadBalancer" | |
| "elasticloadbalancing:ModifyTargetGroup" | |
| "elasticloadbalancing:ModifyLoadBalancerAttributes" | |
| "elasticloadbalancing:ModifyListener" | |
| "elasticloadbalancing:DetachLoadBalancerFromSubnets" | |
| "elasticloadbalancing:DescribeTargetHealth" | |
| "elasticloadbalancing:DescribeTargetGroups" | |
| "elasticloadbalancing:DescribeLoadBalancers" | |
| "elasticloadbalancing:DescribeLoadBalancerPolicies" | |
| "elasticloadbalancing:DescribeLoadBalancerAttributes" | |
| "elasticloadbalancing:DescribeListeners" | |
| "elasticloadbalancing:DeregisterInstancesFromLoadBalancer" | |
| "elasticloadbalancing:DeleteTargetGroup" | |
| "elasticloadbalancing:DeleteLoadBalancerListeners" | |
| "elasticloadbalancing:DeleteLoadBalancer" | |
| "elasticloadbalancing:DeleteListener" | |
| "elasticloadbalancing:CreateTargetGroup" | |
| "elasticloadbalancing:CreateLoadBalancerPolicy" | |
| "elasticloadbalancing:CreateLoadBalancerListeners" | |
| "elasticloadbalancing:CreateLoadBalancer" | |
| "elasticloadbalancing:CreateListener" | |
| "elasticloadbalancing:ConfigureHealthCheck" | |
| "elasticloadbalancing:AttachLoadBalancerToSubnets" | |
| "elasticloadbalancing:ApplySecurityGroupsToLoadBalancer" | |
| "elasticloadbalancing:AddTags" | |
| "ec2:RevokeSecurityGroupIngress" | |
| "ec2:ModifyVolume" | |
| "ec2:ModifyInstanceAttribute" | |
| "ec2:DetachVolume" | |
| "ec2:DescribeVpcs" | |
| "ec2:DescribeVolumes" | |
| "ec2:DescribeSubnets" | |
| "ec2:DescribeSecurityGroups" | |
| "ec2:DescribeRouteTables" | |
| "ec2:DescribeRegions" | |
| "ec2:DescribeInstances" | |
| "ec2:DeleteVolume" | |
| "ec2:DeleteSecurityGroup" | |
| "ec2:DeleteRoute" | |
| "ec2:CreateVolume" | |
| "ec2:CreateTags" | |
| "ec2:CreateSecurityGroup" | |
| "ec2:CreateRoute" | |
| "ec2:AuthorizeSecurityGroupIngress" | |
| "ec2:AttachVolume" | |
| "autoscaling:DescribeTags" | |
| "autoscaling:DescribeLaunchConfigurations" | |
| "autoscaling:DescribeAutoScalingGroups" | |
| ) | |
| else | |
| actions=( | |
| "ec2:DescribeInstances" | |
| "ec2:DescribeRegions" | |
| "ecr:GetAuthorizationToken" | |
| "ecr:BatchCheckLayerAvailability" | |
| "ecr:GetDownloadUrlForLayer" | |
| "ecr:GetRepositoryPolicy" | |
| "ecr:DescribeRepositories" | |
| "ecr:ListImages" | |
| "ecr:BatchGetImage" | |
| ) | |
| fi | |
| info "Validating actions in policy document" | |
| for action in "${actions[@]}"; do | |
| debug "Validating action $action in policy document" | |
| jq -e ".PolicyVersion.Document.Statement[0].Action | any(. == \"$action\")" <<<"$policyDocumentJson" >/dev/null | |
| done | |
| info "Successfully validated machine ${machineName}" | |
| } | |
| validateKey() { | |
| local key | |
| key="$1" | |
| local json | |
| json="$2" | |
| # If nested keys are passed, validate recursively. | |
| if [[ $key == *"."* ]]; then | |
| validateKey "$(cut -d'.' -f 2- <<< "${key}")" "$(jq ".$(cut -d'.' -f 1 <<< "${key}")" <<< "${json}")" | |
| key="$(cut -d'.' -f 1 <<< "${key}")" | |
| fi | |
| if [ ! "$(jq "has(\"$key\")" <<< "${json}")" ]; then | |
| fatal "$json is missing $key field" | |
| fi | |
| } | |
| main() { | |
| info "Validating cluster ${CLUSTER}" | |
| TMP_DIR="$(mktemp -d)" && pushd "$TMP_DIR" | |
| info "Created working directory ${TMP_DIR}" | |
| mkdir -p .kube | |
| getLocalKubeConfig | |
| getAllMachines | |
| for machine in $MACHINES; do | |
| validateMachine "$machine" | |
| done | |
| popd && rm -rf "$TMP_DIR" | |
| info "Successfully validated cluster ${CLUSTER}" | |
| } | |
| main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment