Created
September 4, 2025 18:22
-
-
Save wasertech/8acbdb9b12ac610cc9e04c8b5c4fafa1 to your computer and use it in GitHub Desktop.
Multi-WAN control + health monitor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Multi-WAN control + health monitor | |
| # - mw-status, mw-activate, mw-deactivate, mw-debug | |
| # - mw-monitor-start/stop/status | |
| # - mw-probe-once : one-shot probe (ICMP then TCP) for debugging | |
| # Distributed under MIT | |
| : ${MW_STATE_DIR:="$HOME/.cache/multiwan"} | |
| : ${MW_ACTIVE_LINKS:="$MW_STATE_DIR/active_links.tsv"} | |
| mkdir -p "$MW_STATE_DIR" | |
| _mw_log() { print -r -- "$@"; } | |
| _resolve_gw() { | |
| local dev="$1" gw | |
| gw=$(ip -o route show default dev "$dev" 2>/dev/null | awk '/^default / {print $3; exit}') | |
| if [[ -z "$gw" ]]; then | |
| gw=$(ip route get 1.1.1.1 oif "$dev" 2>/dev/null | awk '{for(i=1;i<=NF;i++) if($i=="via"){print $(i+1); exit}}') | |
| fi | |
| if [[ -z "$gw" || "$gw" == "nexthop" || "$gw" == "none" ]]; then | |
| echo "" | |
| else | |
| echo "$gw" | |
| fi | |
| } | |
| _mw_detect_wans() { | |
| local all=( $(ip -o link show | awk -F': ' '{print $2}' | grep -Ev '^(lo|docker|veth|br-|virbr|tun|wg|lxc)') ) | |
| local if oper ipcidr ip gw net | |
| for if in "${all[@]}"; do | |
| oper=$(cat "/sys/class/net/$if/operstate" 2>/dev/null || echo unknown) | |
| [[ "$oper" != "up" && "$oper" != "unknown" ]] && continue | |
| ipcidr=$(ip -4 -o addr show dev "$if" 2>/dev/null | awk '{print $4}' | head -n1) | |
| [[ -z "$ipcidr" ]] && continue | |
| ip="${ipcidr%/*}" | |
| gw=$(_resolve_gw "$if") | |
| [[ -z "$gw" ]] && continue | |
| net=$(ip -4 route list dev "$if" 2>/dev/null | awk '$1 ~ /([0-9]+\.){3}[0-9]+\/[0-9]+/ && /scope link/ {print $1; exit}') | |
| print -r -- "$if $ip ${net:-none} $gw" | |
| done | |
| } | |
| _mw_save_previous_default() { | |
| ip -o route show default | grep -E '^default ' > "$MW_STATE_DIR/previous_default.route" 2>/dev/null || true | |
| : > "$MW_STATE_DIR/added_rules.list" | |
| : > "$MW_STATE_DIR/adjusted_sysctl.list" | |
| } | |
| _mw_restore_previous_default() { | |
| if [[ -s "$MW_STATE_DIR/previous_default.route" ]]; then | |
| while ip route show default | grep -q '^default '; do | |
| sudo ip route del default || break | |
| done | |
| while IFS= read -r line; do | |
| [[ -z "$line" ]] && continue | |
| local via dev metric | |
| via=$(print -r -- "$line" | awk '{for(i=1;i<=NF;i++) if($i=="via"){print $(i+1); exit}}') | |
| dev=$(print -r -- "$line" | awk '{for(i=1;i<=NF;i++) if($i=="dev"){print $(i+1); exit}}') | |
| metric=$(print -r -- "$line" | awk '{for(i=1;i<=NF;i++) if($i=="metric"){print $(i+1); exit}}') | |
| [[ -z "$via" || -z "$dev" ]] && continue | |
| local cmd="ip route add default via $via dev $dev" | |
| [[ -n "$metric" ]] && cmd+=" metric $metric" | |
| sudo $=cmd || true | |
| done < "$MW_STATE_DIR/previous_default.route" | |
| fi | |
| } | |
| _mw_sysctl_set() { | |
| local key="$1" val="$2" cur | |
| cur=$(sysctl -n "$key" 2>/dev/null) || return 1 | |
| print -r -- "$key $cur" >> "$MW_STATE_DIR/adjusted_sysctl.list" | |
| sudo sysctl -w "$key=$val" >/dev/null | |
| } | |
| _mw_sysctl_restore() { | |
| if [[ -s "$MW_STATE_DIR/adjusted_sysctl.list" ]]; then | |
| while read -r key cur; do | |
| [[ -z "$key" ]] && continue | |
| sudo sysctl -w "$key=$cur" >/dev/null || true | |
| done < "$MW_STATE_DIR/adjusted_sysctl.list" | |
| fi | |
| } | |
| _mw_purge_rules_tables() { | |
| local prios=( $(ip -o rule | awk '{gsub(":","",$1); p=$1+0; if(p>=18000 && p<=19999) print p}' | sort -nr) ) | |
| for p in "${prios[@]}"; do sudo ip -4 rule del priority "$p" 2>/dev/null || true; done | |
| local tbl | |
| for tbl in {101..140}; do | |
| while ip -o rule | grep -q " lookup $tbl\b"; do | |
| local line=$(ip -o rule | grep " lookup $tbl\b" | head -n1) | |
| local prio=$(print -r -- "$line" | awk '{gsub(":","",$1); print $1}') | |
| sudo ip -4 rule del priority "$prio" 2>/dev/null || true | |
| done | |
| sudo ip route flush table "$tbl" 2>/dev/null || true | |
| done | |
| } | |
| mw-status() { | |
| _mw_log "" | |
| _mw_log "Multi-WAN status (lecture seule)" | |
| _mw_log "Route(s) par défaut actuelle(s):" | |
| ip -o route show default || true | |
| _mw_log "" | |
| _mw_log "Interfaces candidates (IF IP NETCIDR GW):" | |
| local lines; lines=$(_mw_detect_wans) | |
| if [[ -z "$lines" ]]; then | |
| _mw_log "Aucune interface WAN pleinement exploitable détectée." | |
| return 0 | |
| fi | |
| print -r -- "$lines" | awk '{printf " - %-12s %-15s %-18s GW=%s\n",$1,$2,($3=="none"?"(auto) none":$3),$4}' | |
| _mw_log "" | |
| _mw_log "Règles gérées (18000..19999):" | |
| ip -o rule | awk '{gsub(":","",$1); if($1+0>=18000 && $1+0<=19999) print}' | |
| _mw_log "" | |
| _mw_log "Aucune modification effectuée." | |
| } | |
| mw-activate() { | |
| local dry=0 req_ifs="" weights="" | |
| while (( $# )); do | |
| case "$1" in | |
| --dry-run) dry=1 ;; | |
| --if) req_ifs="$2"; shift ;; | |
| --weights) weights="$2"; shift ;; | |
| --help|-h) _mw_log "Usage: mw-activate [--dry-run] [--if \"dev dev\"] [--weights \"dev:weight,...\"]"; return 0 ;; | |
| *) _mw_log "Arg inconnu: $1"; return 1 ;; | |
| esac; shift | |
| done | |
| local lines; lines=$(_mw_detect_wans) | |
| [[ -z "$lines" ]] && { _mw_log "Rien à activer: aucune interface avec IP+GW."; return 1; } | |
| if [[ -n "$req_ifs" ]]; then | |
| local sel=""; local if | |
| for if in ${(s: :)req_ifs}; do | |
| sel+=$(print -r -- "$lines" | awk -v dev="$if" '$1==dev{print}')$'\n' | |
| done | |
| lines="$sel" | |
| fi | |
| lines=$(print -r -- "$lines" | awk 'NF>0') | |
| local -a ifs ips nets gws | |
| local if ip net gw | |
| while read -r if ip net gw; do | |
| [[ -z "$if" ]] && continue | |
| ifs+="$if"; ips+="$ip"; nets+="$net"; gws+="$gw" | |
| done <<< "$lines" | |
| (( ${#ifs[@]} == 0 )) && { _mw_log "Aucune interface après filtrage."; return 1; } | |
| local -A weight_map; local kv k v | |
| for kv in ${(s:,:)weights}; do | |
| k="${kv%%:*}"; v="${kv##*:}" | |
| [[ -n "$k" && -n "$v" ]] && weight_map[$k]="$v" | |
| done | |
| _mw_log "" | |
| _mw_log "Plan Multi-WAN:" | |
| for i in {1..${#ifs[@]}}; do | |
| _mw_log " - ${ifs[i]} src=${ips[i]} net=${nets[i]} gw=${gws[i]} weight=${weight_map[${ifs[i]}]:-1}" | |
| done | |
| local route_cmd="ip route replace default scope global" | |
| local valid_indices=() | |
| for i in {1..${#ifs[@]}}; do | |
| local dev=${ifs[i]} gw=${gws[i]} | |
| if [[ -z "$gw" || "$gw" == "nexthop" ]]; then | |
| gw=$(_resolve_gw "$dev") | |
| fi | |
| if [[ -n "$gw" ]]; then | |
| route_cmd+=" nexthop via $gw dev $dev weight ${weight_map[$dev]:-1}" | |
| valid_indices+=($i) | |
| else | |
| _mw_log "Warning: no valid GW for $dev, skipping from ECMP and active list." | |
| fi | |
| done | |
| if [[ "${#valid_indices[@]}" -eq 0 ]]; then | |
| _mw_log "No valid nexthops available — aborting activation." | |
| return 1 | |
| fi | |
| _mw_log "" | |
| _mw_log "Commande ECMP:" | |
| _mw_log " sudo $route_cmd" | |
| local base=18000 | |
| local -a apply_table_cmds=() apply_rules=() | |
| for idx in "${valid_indices[@]}"; do | |
| local prio=$((base + idx)) | |
| local t=$((100 + idx)) | |
| local dev="${ifs[idx]}" ip="${ips[idx]}" net="${nets[idx]}" gw="${gws[idx]}" | |
| gw=$(_resolve_gw "$dev") | |
| [[ -z "$gw" ]] && gw="${gws[idx]}" | |
| apply_table_cmds+=("ip route flush table $t") | |
| [[ "$net" != "none" && -n "$net" ]] && apply_table_cmds+=("ip route replace $net dev $dev src $ip table $t") | |
| apply_table_cmds+=("ip route replace default via $gw dev $dev table $t") | |
| apply_rules+=("ip -4 rule add from $ip/32 table $t priority $prio") | |
| done | |
| _mw_log "" | |
| _mw_log "Règles source-based et tables envisagées:" | |
| for c in "${apply_table_cmds[@]}"; do _mw_log " sudo $c"; done | |
| for r in "${apply_rules[@]}"; do _mw_log " sudo $r"; done | |
| _mw_log "" | |
| _mw_log "Sysctl: rp_filter loose + ECMP L4 hashing (sauvegarde):" | |
| _mw_log " sudo sysctl -w net.ipv4.conf.all.rp_filter=2" | |
| _mw_log " sudo sysctl -w net.ipv4.conf.default.rp_filter=2" | |
| for dev in "${ifs[@]}"; do _mw_log " sudo sysctl -w net.ipv4.conf.$dev.rp_filter=2"; done | |
| _mw_log " sudo sysctl -w net.ipv4.fib_multipath_hash_policy=1" | |
| (( dry )) && { _mw_log ""; _mw_log "Dry-run: aucune modification appliquée."; return 0; } | |
| _mw_save_previous_default | |
| _mw_purge_rules_tables | |
| _mw_sysctl_set "net.ipv4.conf.all.rp_filter" 2 || true | |
| _mw_sysctl_set "net.ipv4.conf.default.rp_filter" 2 || true | |
| for dev in "${ifs[@]}"; do _mw_sysctl_set "net.ipv4.conf.$dev.rp_filter" 2 || true; done | |
| _mw_sysctl_set "net.ipv4.fib_multipath_hash_policy" 1 || true | |
| for c in "${apply_table_cmds[@]}"; do sudo $=c 2>/dev/null || true; done | |
| : > "$MW_ACTIVE_LINKS" | |
| for idx in "${valid_indices[@]}"; do | |
| dev="${ifs[idx]}" ip="${ips[idx]}" net="${nets[idx]}" | |
| gw=$(_resolve_gw "$dev") | |
| [[ -z "$gw" ]] && gw="${gws[idx]}" | |
| [[ -z "$gw" || "$gw" == "nexthop" ]] && continue | |
| w="${weight_map[$dev]:-1}" | |
| print -r -- "$dev $ip $net $gw $w" >> "$MW_ACTIVE_LINKS" | |
| done | |
| for r in "${apply_rules[@]}"; do | |
| sudo $=r && print -r -- "$r" >> "$MW_STATE_DIR/added_rules.list" | |
| done | |
| sudo $=route_cmd | |
| _mw_log "" | |
| _mw_log "Multi-WAN ACTIVÉ. Vérifs:" | |
| ip -o route show default | |
| _mw_log "Règles gérées:" | |
| ip -o rule | awk '{gsub(":","",$1); if($1+0>=18000 && $1+0<=19999) print}' | |
| } | |
| mw-deactivate() { | |
| _mw_log "Désactivation Multi-WAN…" | |
| while ip route show default | grep -q '^default '; do | |
| sudo ip route del default || break | |
| done | |
| _mw_purge_rules_tables | |
| _mw_restore_previous_default | |
| _mw_sysctl_restore | |
| _mw_log "Multi-WAN DÉSACTIVÉ. État actuel:" | |
| ip -o route show default | |
| _mw_log "Fini." | |
| } | |
| mw-debug() { | |
| _mw_log "Debug rapide:" | |
| _mw_log "- ip -o route show default" | |
| ip -o route show default | |
| _mw_log "- ip rule (managed range):" | |
| ip -o rule | awk '{gsub(":","",$1); if($1+0>=18000 && $1+0<=19999) print}' | |
| _mw_log "- Candidates:" | |
| _mw_detect_wans | awk '{printf " * %-10s src=%-15s net=%-18s gw=%s\n",$1,$2,$3,$4}' | |
| _mw_log "- Route test from each source to 1.1.1.1:" | |
| local l; while read -r l; do | |
| local if ip; if=$(print -r -- "$l" | awk '{print $1}'); ip=$(print -r -- "$l" | awk '{print $2}') | |
| [[ -z "$if" || -z "$ip" ]] && continue | |
| printf " from %-15s -> " "$ip" | |
| ip route get 1.1.1.1 from "$ip" oif "$if" 2>/dev/null | sed 's/^/ /' || echo "NO ROUTE" | |
| done < <(_mw_detect_wans) | |
| } | |
| # ========== Health Monitor ========== | |
| mw__monitor_loop() { | |
| local interval="$1" samples="$2" loss_thr="$3" rtt_thr="$4" down_hys="$5" up_hys="$6" | |
| typeset -A fail pass state weight gw_map | |
| local last_set="" | |
| echo "$(date -Is) MONITOR started interval=$interval samples=$samples loss<=$loss_thr rtt<=$rtt_thr down=$down_hys up=$up_hys" | |
| while true; do | |
| if [[ ! -s "$MW_ACTIVE_LINKS" ]]; then | |
| echo "$(date -Is) No active links file ($MW_ACTIVE_LINKS). Sleeping…" | |
| sleep "$interval" | |
| continue | |
| fi | |
| local -a healthy=() | |
| local dev ip net gw w | |
| while read -r dev ip net gw w; do | |
| [[ -z "$dev" ]] && continue | |
| if [[ -z "$gw" || "$gw" == "nexthop" ]]; then | |
| echo "$(date -Is) SKIP dev=$dev no valid gw ($gw)"; continue | |
| fi | |
| # 1) ICMP probe | |
| local out loss rtt ok=0 | |
| out=$(/usr/bin/ping -n -q -I "$dev" -c "$samples" -W 1 "$gw" 2>/dev/null) | |
| loss=$(echo "$out" | awk -F',' '/packet loss/ {gsub(/%/,"",$3); print $3+0}') | |
| rtt=$(echo "$out" | awk -F'/' '/rtt/ {print $5+0}') | |
| [[ -z "$loss" ]] && loss=100 | |
| [[ -z "$rtt" ]] && rtt=9999 | |
| if (( loss <= loss_thr && rtt <= rtt_thr )); then | |
| ok=1 | |
| else | |
| # 2) Fallback TCP probe via curl (connect only) | |
| curl --interface "$ip" --connect-timeout 3 --max-time 5 -sS -o /dev/null https://1.1.1.1 --insecure 2>/dev/null && ok=1 || ok=0 | |
| if (( ok == 1 )); then | |
| loss=0; rtt=200 | |
| fi | |
| fi | |
| if (( ok == 1 )); then | |
| pass[$dev]=$(( ${pass[$dev]:-0} + 1 )); fail[$dev]=0 | |
| else | |
| fail[$dev]=$(( ${fail[$dev]:-0} + 1 )); pass[$dev]=0 | |
| fi | |
| if [[ "${state[$dev]}" == "down" ]]; then | |
| (( ${pass[$dev]:-0} >= up_hys )) && state[$dev]="up" | |
| else | |
| [[ -z "${state[$dev]}" ]] && state[$dev]="up" | |
| (( ${fail[$dev]:-0} >= down_hys )) && state[$dev]="down" | |
| fi | |
| weight[$dev]="$w"; gw_map[$dev]="$gw" | |
| [[ "${state[$dev]}" == "up" ]] && healthy+=("$dev") | |
| echo "$(date -Is) dev=$dev gw=$gw loss=$loss rtt=$rtt ms state=${state[$dev]} pass=${pass[$dev]:-0} fail=${fail[$dev]:-0}" | |
| done < "$MW_ACTIVE_LINKS" | |
| local set="${healthy[*]}" | |
| if [[ -n "$set" && "$set" != "$last_set" ]]; then | |
| local cmd="ip route replace default scope global" | |
| local d | |
| for d in "${healthy[@]}"; do | |
| cmd+=" nexthop via ${gw_map[$d]} dev $d weight ${weight[$d]:-1}" | |
| done | |
| echo "$(date -Is) APPLY: $cmd" | |
| sudo $=cmd || true | |
| last_set="$set" | |
| fi | |
| sleep "$interval" | |
| done | |
| } | |
| mw-monitor-start() { | |
| local interval=5 samples=3 loss=30 rtt=150 down=2 up=3 | |
| while (( $# )); do | |
| case "$1" in | |
| --interval) interval="$2"; shift ;; | |
| --samples) samples="$2"; shift ;; | |
| --loss) loss="$2"; shift ;; | |
| --rtt) rtt="$2"; shift ;; | |
| --down) down="$2"; shift ;; | |
| --up) up="$2"; shift ;; | |
| *) _mw_log "Arg inconnu: $1"; return 1 ;; | |
| esac; shift | |
| done | |
| [[ ! -s "$MW_ACTIVE_LINKS" ]] && { _mw_log "Run mw-activate first to create $MW_ACTIVE_LINKS"; return 1; } | |
| if [[ -s "$MW_STATE_DIR/monitor.pid" ]] && kill -0 $(cat "$MW_STATE_DIR/monitor.pid") 2>/dev/null; then | |
| _mw_log "Monitor already running (pid $(cat "$MW_STATE_DIR/monitor.pid"))." | |
| return 0 | |
| fi | |
| ( mw__monitor_loop "$interval" "$samples" "$loss" "$rtt" "$down" "$up" ) >> "$MW_STATE_DIR/monitor.log" 2>&1 & | |
| echo $! > "$MW_STATE_DIR/monitor.pid" | |
| _mw_log "Monitor started (pid $(cat "$MW_STATE_DIR/monitor.pid")). Log: $MW_STATE_DIR/monitor.log" | |
| } | |
| mw-monitor-stop() { | |
| if [[ -s "$MW_STATE_DIR/monitor.pid" ]]; then | |
| local pid=$(cat "$MW_STATE_DIR/monitor.pid") | |
| if kill -0 "$pid" 2>/dev/null; then | |
| kill "$pid" 2>/dev/null || true | |
| sleep 0.2 | |
| kill -9 "$pid" 2>/dev/null || true | |
| _mw_log "Monitor stopped." | |
| fi | |
| rm -f "$MW_STATE_DIR/monitor.pid" | |
| else | |
| _mw_log "No monitor pid file; nothing to stop." | |
| fi | |
| } | |
| # One-shot health check (ICMP then TCP fallback) - no route changes | |
| mw-monitor-status() { | |
| local samples=3 loss=30 rtt=150 | |
| while (( $# )); do | |
| case "$1" in | |
| --samples) samples="$2"; shift ;; | |
| --loss) loss="$2"; shift ;; | |
| --rtt) rtt="$2"; shift ;; | |
| *) _mw_log "Arg inconnu: $1"; return 1 ;; | |
| esac; shift | |
| done | |
| [[ ! -s "$MW_ACTIVE_LINKS" ]] && { _mw_log "No $MW_ACTIVE_LINKS. Run mw-activate first."; return 1; } | |
| _mw_log "One-off health check (ICMP then TCP fallback):" | |
| while read -r dev ip net gw w; do | |
| [[ -z "$dev" ]] && continue | |
| if [[ -z "$gw" || "$gw" == "nexthop" ]]; then | |
| printf " - %-10s gw=%-15s weight=%-3s -> SKIP (no gw)\n" "$dev" "$gw" "$w" | |
| continue | |
| fi | |
| local out lossv rttv ok=0 | |
| out=$(/usr/bin/ping -n -q -I "$dev" -c "$samples" -W 1 "$gw" 2>/dev/null) | |
| lossv=$(echo "$out" | awk -F',' '/packet loss/ {gsub(/%/,"",$3); print $3+0}') | |
| rttv=$(echo "$out" | awk -F'/' '/rtt/ {print $5+0}') | |
| [[ -z "$lossv" ]] && lossv=100 | |
| [[ -z "$rttv" ]] && rttv=9999 | |
| if (( lossv <= loss && rttv <= rtt )); then | |
| ok=1 | |
| else | |
| curl --interface "$ip" --connect-timeout 3 --max-time 5 -sS -o /dev/null https://1.1.1.1 --insecure 2>/dev/null && ok=1 || ok=0 | |
| if (( ok == 1 )); then | |
| lossv=0; rttv=200 | |
| fi | |
| fi | |
| local health="DOWN" | |
| (( ok == 1 )) && health="UP" | |
| printf " - %-10s gw=%-15s weight=%-3s loss=%-3s rtt=%-4s ms -> %s\n" "$dev" "$gw" "$w" "$lossv" "$rttv" "$health" | |
| done < "$MW_ACTIVE_LINKS" | |
| _mw_log "Current defaults:" | |
| ip -o route show default | |
| if [[ -s "$MW_STATE_DIR/monitor.pid" ]] && kill -0 $(cat "$MW_STATE_DIR/monitor.pid") 2>/dev/null; then | |
| _mw_log "Monitor is running (pid $(cat "$MW_STATE_DIR/monitor.pid"))." | |
| else | |
| _mw_log "Monitor is not running." | |
| fi | |
| } | |
| # One-shot probe (full verbose) for debugging single cycle | |
| mw-probe-once() { | |
| [[ ! -s "$MW_ACTIVE_LINKS" ]] && { _mw_log "No $MW_ACTIVE_LINKS. Run mw-activate first."; return 1; } | |
| _mw_log "Verbose one-shot probe (ICMP then TCP fallback):" | |
| while read -r dev ip net gw w; do | |
| [[ -z "$dev" ]] && continue | |
| _mw_log "==== $dev ($ip) gw=$gw weight=$w ====" | |
| ip route get 1.1.1.1 from "$ip" oif "$dev" 2>/dev/null || _mw_log " ip route get: FAIL" | |
| _mw_log " ARP:" | |
| ip neigh show dev "$dev" || _mw_log " ip neigh: FAIL" | |
| _mw_log " ICMP -> $gw:" | |
| /usr/bin/ping -c 3 -I "$dev" -W 1 "$gw" || _mw_log " ping: maybe blocked" | |
| _mw_log " TCP connect -> 1.1.1.1:" | |
| curl --interface "$ip" --connect-timeout 3 --max-time 5 -sS -o /dev/null -w " HTTP ok\n" https://1.1.1.1 --insecure 2>/dev/null || _mw_log " curl: FAIL" | |
| done < "$MW_ACTIVE_LINKS" | |
| _mw_log "Done." | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.