Last active
February 27, 2026 13:35
-
-
Save sdboyer/87dac49d1e9387f4372a909d6c5eb91c to your computer and use it in GitHub Desktop.
Poolside preflight check script for on-premises installation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| $ ./preflight-check.sh | |
| Preflight checks starting... (log: preflight-20260224-180212.log) | |
| [PASS] os: Ubuntu 22.04.5 LTS | |
| [PASS] kernel: 6.10.14-linuxkit | |
| [WARN] cpu: 14 cores (min: 128) | |
| [FAIL] ram: 7GB (min: 512GB) | |
| [WARN] disk /: 823GB free (min: 1000GB) | |
| [WARN] disk /var: 823GB free (min: 1000GB) | |
| [WARN] disk /var/lib: 823GB free (min: 1000GB) | |
| [WARN] disk /opt: 823GB free (min: 1000GB) | |
| [PASS] fstab: no noexec on critical paths | |
| [WARN] gpu: no NVIDIA devices in lspci | |
| [PASS] gpu: no nvidia driver loaded (good for GPU operator) | |
| [WARN] nouveau: not blacklisted (may need to before GPU setup) | |
| [WARN] sysctl: BPF settings may need adjustment (bpf_disabled=0, jit_harden=N/A) | |
| [PASS] selinux: not installed | |
| [PASS] network: outbound OK | |
| [PASS] tools: jq | |
| [FAIL] tools missing: kubectl helm terraform yq | |
| Result: 2 failed, 8 warnings | |
| Details: preflight-20260224-180212.log |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| # Preflight Check Script for Poolside Installation | |
| # Outputs pass/fail summary to stdout, detailed info to log file | |
| CONFIG="${POOLIO_COMPONENT:-}" | |
| LOG_FILE="${LOG_FILE:-preflight-$(date +%Y%m%d-%H%M%S).log}" | |
| # Parse config | |
| if [[ -n "$CONFIG" ]]; then | |
| fc() { echo "$CONFIG" | jq -r ".fileConfig.schema$1"; } | |
| TARGET_DISTRO=$(fc '.targetDistro // ""') | |
| MIN_CPU_CORES=$(fc '.minCpuCores // 128') | |
| MIN_RAM_GB=$(fc '.minRamGB // 512') | |
| MIN_DISK_GB=$(fc '.minDiskGB // 1000') | |
| DISK_PATHS=$(fc '.diskPaths // ["/", "/var", "/var/lib", "/opt"] | @json') | |
| AIR_GAPPED=$(fc '.airGapped // false') | |
| REQUIRED_SOFTWARE=$(fc '.requiredSoftware // ["kubectl","helm","terraform","jq","yq"] | @json') | |
| else | |
| TARGET_DISTRO="${TARGET_DISTRO:-}" | |
| MIN_CPU_CORES="${MIN_CPU_CORES:-128}" | |
| MIN_RAM_GB="${MIN_RAM_GB:-512}" | |
| MIN_DISK_GB="${MIN_DISK_GB:-1000}" | |
| DISK_PATHS="${DISK_PATHS:-[\"/\", \"/var\", \"/var/lib\", \"/opt\"]}" | |
| AIR_GAPPED="${AIR_GAPPED:-false}" | |
| REQUIRED_SOFTWARE="${REQUIRED_SOFTWARE:-[\"kubectl\",\"helm\",\"terraform\",\"jq\",\"yq\"]}" | |
| fi | |
| CHECKS='[]' | |
| FAIL_COUNT=0 | |
| WARN_COUNT=0 | |
| # Collected values for JSON output | |
| OS_VERSION="" | |
| CPU_CORES=0 | |
| RAM_GB=0 | |
| GPU_COUNT=0 | |
| log() { echo "$@" >> "$LOG_FILE"; } | |
| log_section() { log ""; log "=== $1 ==="; } | |
| add_check() { | |
| local name="$1" status="$2" message="$3" detail="${4:-}" | |
| # stdout: simple one-liner | |
| local icon="[PASS]" | |
| [[ "$status" == "warn" ]] && icon="[WARN]" | |
| [[ "$status" == "fail" ]] && icon="[FAIL]" | |
| printf "%-8s %s\n" "$icon" "$message" | |
| # log: include detail | |
| log "$icon $message" | |
| [[ -n "$detail" ]] && log " $detail" | |
| # accumulate for JSON | |
| if [[ -n "$detail" ]]; then | |
| CHECKS=$(echo "$CHECKS" | jq --arg n "$name" --arg s "$status" --arg m "$message" --arg d "$detail" \ | |
| '. + [{"name": $n, "status": $s, "message": $m, "detail": $d}]') | |
| else | |
| CHECKS=$(echo "$CHECKS" | jq --arg n "$name" --arg s "$status" --arg m "$message" \ | |
| '. + [{"name": $n, "status": $s, "message": $m}]') | |
| fi | |
| [[ "$status" == "fail" ]] && ((FAIL_COUNT++)) || true | |
| [[ "$status" == "warn" ]] && ((WARN_COUNT++)) || true | |
| } | |
| # --- OS --- | |
| check_os() { | |
| log_section "OS" | |
| if [[ ! -f /etc/os-release ]]; then | |
| add_check "os" "fail" "os: unknown (no /etc/os-release)" | |
| return | |
| fi | |
| source /etc/os-release | |
| OS_VERSION="$PRETTY_NAME" | |
| local detected="${ID:-unknown}-${VERSION_ID:-0}" | |
| log "$(cat /etc/os-release)" | |
| if [[ -z "$TARGET_DISTRO" ]]; then | |
| add_check "os" "pass" "os: $PRETTY_NAME" | |
| elif [[ "$ID" == "${TARGET_DISTRO%%-*}" ]] && [[ "$VERSION_ID" == "${TARGET_DISTRO#*-}"* ]]; then | |
| add_check "os" "pass" "os: $PRETTY_NAME" | |
| else | |
| add_check "os" "fail" "os: $PRETTY_NAME (expected $TARGET_DISTRO)" | |
| fi | |
| } | |
| # --- Kernel --- | |
| check_kernel() { | |
| log_section "Kernel" | |
| local kver | |
| kver=$(uname -r) | |
| add_check "kernel" "pass" "kernel: $kver" | |
| log "uname -a: $(uname -a)" | |
| # Boot kernel config | |
| if [[ -f "/boot/config-$kver" ]]; then | |
| log "" | |
| log "Kernel config (/boot/config-$kver) - key settings:" | |
| grep -E "^CONFIG_(PREEMPT|HZ|MODULES|BPF|CGROUP)" "/boot/config-$kver" 2>/dev/null | head -20 >> "$LOG_FILE" || true | |
| fi | |
| # initramfs | |
| if [[ -f "/boot/initramfs-$kver.img" ]] || [[ -f "/boot/initrd.img-$kver" ]]; then | |
| log "initramfs: present" | |
| else | |
| log "initramfs: not found at expected path" | |
| fi | |
| } | |
| # --- Hardware --- | |
| check_cpu() { | |
| log_section "CPU" | |
| if [[ -f /proc/cpuinfo ]]; then | |
| CPU_CORES=$(grep -c "^processor" /proc/cpuinfo) | |
| log "$(head -30 /proc/cpuinfo)" | |
| else | |
| CPU_CORES=$(nproc 2>/dev/null || echo 0) | |
| fi | |
| if [[ $CPU_CORES -ge $MIN_CPU_CORES ]]; then | |
| add_check "cpu" "pass" "cpu: $CPU_CORES cores" | |
| else | |
| add_check "cpu" "warn" "cpu: $CPU_CORES cores (min: $MIN_CPU_CORES)" | |
| fi | |
| } | |
| check_ram() { | |
| log_section "Memory" | |
| if [[ -f /proc/meminfo ]]; then | |
| local mem_kb | |
| mem_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}') | |
| RAM_GB=$((mem_kb / 1024 / 1024)) | |
| log "$(cat /proc/meminfo)" | |
| else | |
| RAM_GB=0 | |
| fi | |
| if [[ $RAM_GB -ge $MIN_RAM_GB ]]; then | |
| add_check "ram" "pass" "ram: ${RAM_GB}GB" | |
| else | |
| add_check "ram" "fail" "ram: ${RAM_GB}GB (min: ${MIN_RAM_GB}GB)" | |
| fi | |
| } | |
| check_disk() { | |
| log_section "Disk" | |
| log "df -h:" | |
| df -h >> "$LOG_FILE" 2>/dev/null || true | |
| local paths | |
| paths=$(echo "$DISK_PATHS" | jq -r '.[]' 2>/dev/null || echo "$DISK_PATHS" | tr -d '[]"' | tr ',' '\n') | |
| while IFS= read -r path; do | |
| [[ -z "$path" ]] && continue | |
| path=$(echo "$path" | tr -d ' ') | |
| if [[ ! -d "$path" ]]; then | |
| continue # skip non-existent paths silently | |
| fi | |
| local avail_gb="" | |
| if df -BG "$path" &>/dev/null; then | |
| avail_gb=$(df -BG "$path" 2>/dev/null | awk 'NR==2 {print $4}' | tr -d 'G') | |
| else | |
| local blocks | |
| blocks=$(df "$path" 2>/dev/null | awk 'NR==2 {print $4}') | |
| [[ -n "$blocks" && "$blocks" =~ ^[0-9]+$ ]] && avail_gb=$((blocks * 512 / 1024 / 1024 / 1024)) | |
| fi | |
| if [[ -n "$avail_gb" && "$avail_gb" =~ ^[0-9]+$ ]]; then | |
| if [[ $avail_gb -ge $MIN_DISK_GB ]]; then | |
| add_check "disk_$path" "pass" "disk $path: ${avail_gb}GB free" | |
| else | |
| add_check "disk_$path" "warn" "disk $path: ${avail_gb}GB free (min: ${MIN_DISK_GB}GB)" | |
| fi | |
| fi | |
| done <<< "$paths" | |
| } | |
| check_fstab() { | |
| log_section "Fstab & Mounts" | |
| if [[ -f /etc/fstab ]]; then | |
| log "/etc/fstab:" | |
| log "$(cat /etc/fstab)" | |
| fi | |
| log "" | |
| log "mount:" | |
| mount >> "$LOG_FILE" 2>/dev/null || true | |
| # Check for noexec on critical paths | |
| local bad_mounts="" | |
| for path in / /var /var/lib /opt /tmp; do | |
| if mount | grep -E "^[^ ]+ on $path " | grep -q "noexec"; then | |
| bad_mounts="$bad_mounts $path" | |
| fi | |
| done | |
| if [[ -n "$bad_mounts" ]]; then | |
| add_check "fstab" "warn" "fstab: noexec on$bad_mounts" | |
| else | |
| add_check "fstab" "pass" "fstab: no noexec on critical paths" | |
| fi | |
| } | |
| # --- GPU --- | |
| check_gpu() { | |
| log_section "GPU" | |
| # Check via lspci (doesn't require drivers) | |
| if command -v lspci &>/dev/null; then | |
| local gpus | |
| gpus=$(lspci | grep -iE "vga|3d|display|nvidia" || true) | |
| GPU_COUNT=$(echo "$gpus" | grep -ci nvidia 2>/dev/null || echo 0) | |
| GPU_COUNT=$(echo "$GPU_COUNT" | tr -d '[:space:]' | head -c 10) | |
| [[ ! "$GPU_COUNT" =~ ^[0-9]+$ ]] && GPU_COUNT=0 | |
| log "lspci GPU entries:" | |
| log "$gpus" | |
| if [[ $GPU_COUNT -gt 0 ]]; then | |
| add_check "gpu_detected" "pass" "gpu: $GPU_COUNT NVIDIA device(s) via lspci" | |
| else | |
| add_check "gpu_detected" "warn" "gpu: no NVIDIA devices in lspci" | |
| fi | |
| else | |
| add_check "gpu_detected" "warn" "gpu: lspci not available" | |
| fi | |
| # Check if nvidia driver is loaded (this can be a problem - GPU operator should install) | |
| if lsmod 2>/dev/null | grep -q "^nvidia"; then | |
| log "" | |
| log "nvidia kernel modules loaded:" | |
| lsmod | grep nvidia >> "$LOG_FILE" || true | |
| add_check "gpu_driver" "warn" "gpu: nvidia driver already loaded (GPU operator prefers no host drivers)" | |
| else | |
| add_check "gpu_driver" "pass" "gpu: no nvidia driver loaded (good for GPU operator)" | |
| fi | |
| # If nvidia-smi exists, gather info but don't require it | |
| if command -v nvidia-smi &>/dev/null; then | |
| log "" | |
| log "nvidia-smi output:" | |
| nvidia-smi >> "$LOG_FILE" 2>&1 || log "nvidia-smi failed" | |
| fi | |
| } | |
| check_nouveau() { | |
| if lsmod 2>/dev/null | grep -q "^nouveau"; then | |
| add_check "nouveau" "fail" "nouveau: driver loaded (must blacklist)" | |
| elif grep -rq "blacklist nouveau" /etc/modprobe.d /lib/modprobe.d 2>/dev/null; then | |
| add_check "nouveau" "pass" "nouveau: blacklisted" | |
| else | |
| add_check "nouveau" "warn" "nouveau: not blacklisted (may need to before GPU setup)" | |
| fi | |
| } | |
| # --- Sysctl --- | |
| check_sysctl() { | |
| log_section "Sysctl" | |
| # Known sysctls we care about | |
| local sysctls=( | |
| "kernel.unprivileged_bpf_disabled" | |
| "net.core.bpf_jit_harden" | |
| "kernel.dmesg_restrict" | |
| "kernel.perf_event_paranoid" | |
| "vm.swappiness" | |
| "net.ipv4.ip_forward" | |
| "net.bridge.bridge-nf-call-iptables" | |
| ) | |
| log "Runtime sysctl values:" | |
| for key in "${sysctls[@]}"; do | |
| local val | |
| val=$(sysctl -n "$key" 2>/dev/null || echo "N/A") | |
| log " $key = $val" | |
| done | |
| # Persisted sysctls | |
| log "" | |
| log "Persisted sysctl configs:" | |
| for f in /etc/sysctl.conf /etc/sysctl.d/*; do | |
| if [[ -f "$f" ]]; then | |
| log "" | |
| log "$f:" | |
| grep -v "^#" "$f" | grep -v "^$" >> "$LOG_FILE" 2>/dev/null || true | |
| fi | |
| done | |
| # Check BPF settings specifically (GPU compatibility) | |
| local bpf_disabled bpf_harden | |
| bpf_disabled=$(sysctl -n kernel.unprivileged_bpf_disabled 2>/dev/null || echo "N/A") | |
| bpf_harden=$(sysctl -n net.core.bpf_jit_harden 2>/dev/null || echo "N/A") | |
| if [[ "$bpf_disabled" == "0" && "$bpf_harden" == "0" ]]; then | |
| add_check "sysctl_bpf" "pass" "sysctl: BPF settings OK for GPU" | |
| elif [[ "$bpf_disabled" == "N/A" ]]; then | |
| add_check "sysctl_bpf" "pass" "sysctl: BPF settings not applicable" | |
| else | |
| add_check "sysctl_bpf" "warn" "sysctl: BPF settings may need adjustment (bpf_disabled=$bpf_disabled, jit_harden=$bpf_harden)" | |
| fi | |
| } | |
| # --- SELinux --- | |
| check_selinux() { | |
| log_section "SELinux" | |
| if command -v getenforce &>/dev/null; then | |
| local mode | |
| mode=$(getenforce 2>/dev/null || echo "unknown") | |
| log "getenforce: $mode" | |
| if [[ -f /etc/selinux/config ]]; then | |
| log "" | |
| log "/etc/selinux/config:" | |
| log "$(cat /etc/selinux/config)" | |
| fi | |
| add_check "selinux" "pass" "selinux: $mode" | |
| else | |
| add_check "selinux" "pass" "selinux: not installed" | |
| fi | |
| } | |
| # --- Network --- | |
| check_network() { | |
| log_section "Network" | |
| if [[ "$AIR_GAPPED" == "true" ]]; then | |
| add_check "network" "pass" "network: skipped (air-gapped)" | |
| return | |
| fi | |
| # Check for proxy | |
| if [[ -n "${http_proxy:-}" ]] || [[ -n "${https_proxy:-}" ]] || [[ -n "${HTTP_PROXY:-}" ]]; then | |
| log "Proxy detected:" | |
| log " http_proxy=${http_proxy:-${HTTP_PROXY:-}}" | |
| log " https_proxy=${https_proxy:-${HTTPS_PROXY:-}}" | |
| add_check "network_proxy" "warn" "network: proxy configured" | |
| fi | |
| if command -v curl &>/dev/null && curl -s --connect-timeout 5 https://google.com &>/dev/null; then | |
| add_check "network" "pass" "network: outbound OK" | |
| elif command -v curl &>/dev/null; then | |
| add_check "network" "fail" "network: no outbound connectivity" | |
| else | |
| add_check "network" "warn" "network: curl not available to test" | |
| fi | |
| } | |
| # --- Software --- | |
| check_software() { | |
| log_section "Software" | |
| local tools | |
| tools=$(echo "$REQUIRED_SOFTWARE" | jq -r '.[]' 2>/dev/null || echo "$REQUIRED_SOFTWARE" | tr -d '[]"' | tr ',' '\n') | |
| local found="" missing="" | |
| while IFS= read -r tool; do | |
| [[ -z "$tool" ]] && continue | |
| tool=$(echo "$tool" | tr -d ' ') | |
| if command -v "$tool" &>/dev/null; then | |
| local ver | |
| ver=$("$tool" version --short 2>/dev/null || "$tool" --version 2>/dev/null | head -1 || echo "") | |
| ver=$(echo "$ver" | head -1 | sed 's/^[^0-9]*//' | cut -d' ' -f1 | head -c 20) | |
| found="$found $tool" | |
| log " $tool: ${ver:-installed}" | |
| else | |
| missing="$missing $tool" | |
| log " $tool: not found" | |
| fi | |
| done <<< "$tools" | |
| found=$(echo "$found" | xargs) # trim | |
| missing=$(echo "$missing" | xargs) | |
| if [[ -n "$found" ]]; then | |
| add_check "software_found" "pass" "tools: $found" | |
| fi | |
| if [[ -n "$missing" ]]; then | |
| add_check "software_missing" "fail" "tools missing: $missing" | |
| fi | |
| } | |
| # --- dmesg --- | |
| capture_dmesg() { | |
| log_section "dmesg (last 100 lines)" | |
| dmesg 2>/dev/null | tail -100 >> "$LOG_FILE" || log "dmesg not accessible" | |
| } | |
| # --- Main --- | |
| main() { | |
| echo "Preflight checks starting... (log: $LOG_FILE)" | |
| echo "" | |
| log "Preflight check: $(date)" | |
| log "Host: $(hostname)" | |
| check_os | |
| check_kernel | |
| check_cpu | |
| check_ram | |
| check_disk | |
| check_fstab | |
| check_gpu | |
| check_nouveau | |
| check_sysctl | |
| check_selinux | |
| check_network | |
| check_software | |
| capture_dmesg | |
| echo "" | |
| if [[ $FAIL_COUNT -eq 0 ]]; then | |
| echo "Result: OK ($WARN_COUNT warnings)" | |
| else | |
| echo "Result: $FAIL_COUNT failed, $WARN_COUNT warnings" | |
| fi | |
| echo "Details: $LOG_FILE" | |
| # JSON output for component mode | |
| if [[ -n "$CONFIG" ]]; then | |
| local passed=true | |
| [[ $FAIL_COUNT -gt 0 ]] && passed=false | |
| jq -n \ | |
| --argjson passed "$passed" \ | |
| --arg os_version "$OS_VERSION" \ | |
| --argjson cpu_cores "$CPU_CORES" \ | |
| --argjson ram_gb "$RAM_GB" \ | |
| --argjson gpu_count "$GPU_COUNT" \ | |
| --argjson checks "$CHECKS" \ | |
| '{passed: $passed, os_version: $os_version, cpu_cores: $cpu_cores, ram_gb: $ram_gb, gpu_count: $gpu_count, checks: $checks}' | |
| fi | |
| exit 0 | |
| } | |
| main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment