Skip to content

Instantly share code, notes, and snippets.

@wasertech
Created September 4, 2025 18:22
Show Gist options
  • Select an option

  • Save wasertech/8acbdb9b12ac610cc9e04c8b5c4fafa1 to your computer and use it in GitHub Desktop.

Select an option

Save wasertech/8acbdb9b12ac610cc9e04c8b5c4fafa1 to your computer and use it in GitHub Desktop.
Multi-WAN control + health monitor
# Multi-WAN control + health monitor
# - mw-status, mw-activate, mw-deactivate, mw-debug
# - mw-monitor-start/stop/status
# - mw-probe-once : one-shot probe (ICMP then TCP) for debugging
# Distributed under MIT
: ${MW_STATE_DIR:="$HOME/.cache/multiwan"}
: ${MW_ACTIVE_LINKS:="$MW_STATE_DIR/active_links.tsv"}
mkdir -p "$MW_STATE_DIR"
_mw_log() { print -r -- "$@"; }
_resolve_gw() {
local dev="$1" gw
gw=$(ip -o route show default dev "$dev" 2>/dev/null | awk '/^default / {print $3; exit}')
if [[ -z "$gw" ]]; then
gw=$(ip route get 1.1.1.1 oif "$dev" 2>/dev/null | awk '{for(i=1;i<=NF;i++) if($i=="via"){print $(i+1); exit}}')
fi
if [[ -z "$gw" || "$gw" == "nexthop" || "$gw" == "none" ]]; then
echo ""
else
echo "$gw"
fi
}
_mw_detect_wans() {
local all=( $(ip -o link show | awk -F': ' '{print $2}' | grep -Ev '^(lo|docker|veth|br-|virbr|tun|wg|lxc)') )
local if oper ipcidr ip gw net
for if in "${all[@]}"; do
oper=$(cat "/sys/class/net/$if/operstate" 2>/dev/null || echo unknown)
[[ "$oper" != "up" && "$oper" != "unknown" ]] && continue
ipcidr=$(ip -4 -o addr show dev "$if" 2>/dev/null | awk '{print $4}' | head -n1)
[[ -z "$ipcidr" ]] && continue
ip="${ipcidr%/*}"
gw=$(_resolve_gw "$if")
[[ -z "$gw" ]] && continue
net=$(ip -4 route list dev "$if" 2>/dev/null | awk '$1 ~ /([0-9]+\.){3}[0-9]+\/[0-9]+/ && /scope link/ {print $1; exit}')
print -r -- "$if $ip ${net:-none} $gw"
done
}
_mw_save_previous_default() {
ip -o route show default | grep -E '^default ' > "$MW_STATE_DIR/previous_default.route" 2>/dev/null || true
: > "$MW_STATE_DIR/added_rules.list"
: > "$MW_STATE_DIR/adjusted_sysctl.list"
}
_mw_restore_previous_default() {
if [[ -s "$MW_STATE_DIR/previous_default.route" ]]; then
while ip route show default | grep -q '^default '; do
sudo ip route del default || break
done
while IFS= read -r line; do
[[ -z "$line" ]] && continue
local via dev metric
via=$(print -r -- "$line" | awk '{for(i=1;i<=NF;i++) if($i=="via"){print $(i+1); exit}}')
dev=$(print -r -- "$line" | awk '{for(i=1;i<=NF;i++) if($i=="dev"){print $(i+1); exit}}')
metric=$(print -r -- "$line" | awk '{for(i=1;i<=NF;i++) if($i=="metric"){print $(i+1); exit}}')
[[ -z "$via" || -z "$dev" ]] && continue
local cmd="ip route add default via $via dev $dev"
[[ -n "$metric" ]] && cmd+=" metric $metric"
sudo $=cmd || true
done < "$MW_STATE_DIR/previous_default.route"
fi
}
_mw_sysctl_set() {
local key="$1" val="$2" cur
cur=$(sysctl -n "$key" 2>/dev/null) || return 1
print -r -- "$key $cur" >> "$MW_STATE_DIR/adjusted_sysctl.list"
sudo sysctl -w "$key=$val" >/dev/null
}
_mw_sysctl_restore() {
if [[ -s "$MW_STATE_DIR/adjusted_sysctl.list" ]]; then
while read -r key cur; do
[[ -z "$key" ]] && continue
sudo sysctl -w "$key=$cur" >/dev/null || true
done < "$MW_STATE_DIR/adjusted_sysctl.list"
fi
}
_mw_purge_rules_tables() {
local prios=( $(ip -o rule | awk '{gsub(":","",$1); p=$1+0; if(p>=18000 && p<=19999) print p}' | sort -nr) )
for p in "${prios[@]}"; do sudo ip -4 rule del priority "$p" 2>/dev/null || true; done
local tbl
for tbl in {101..140}; do
while ip -o rule | grep -q " lookup $tbl\b"; do
local line=$(ip -o rule | grep " lookup $tbl\b" | head -n1)
local prio=$(print -r -- "$line" | awk '{gsub(":","",$1); print $1}')
sudo ip -4 rule del priority "$prio" 2>/dev/null || true
done
sudo ip route flush table "$tbl" 2>/dev/null || true
done
}
mw-status() {
_mw_log ""
_mw_log "Multi-WAN status (lecture seule)"
_mw_log "Route(s) par défaut actuelle(s):"
ip -o route show default || true
_mw_log ""
_mw_log "Interfaces candidates (IF IP NETCIDR GW):"
local lines; lines=$(_mw_detect_wans)
if [[ -z "$lines" ]]; then
_mw_log "Aucune interface WAN pleinement exploitable détectée."
return 0
fi
print -r -- "$lines" | awk '{printf " - %-12s %-15s %-18s GW=%s\n",$1,$2,($3=="none"?"(auto) none":$3),$4}'
_mw_log ""
_mw_log "Règles gérées (18000..19999):"
ip -o rule | awk '{gsub(":","",$1); if($1+0>=18000 && $1+0<=19999) print}'
_mw_log ""
_mw_log "Aucune modification effectuée."
}
mw-activate() {
local dry=0 req_ifs="" weights=""
while (( $# )); do
case "$1" in
--dry-run) dry=1 ;;
--if) req_ifs="$2"; shift ;;
--weights) weights="$2"; shift ;;
--help|-h) _mw_log "Usage: mw-activate [--dry-run] [--if \"dev dev\"] [--weights \"dev:weight,...\"]"; return 0 ;;
*) _mw_log "Arg inconnu: $1"; return 1 ;;
esac; shift
done
local lines; lines=$(_mw_detect_wans)
[[ -z "$lines" ]] && { _mw_log "Rien à activer: aucune interface avec IP+GW."; return 1; }
if [[ -n "$req_ifs" ]]; then
local sel=""; local if
for if in ${(s: :)req_ifs}; do
sel+=$(print -r -- "$lines" | awk -v dev="$if" '$1==dev{print}')$'\n'
done
lines="$sel"
fi
lines=$(print -r -- "$lines" | awk 'NF>0')
local -a ifs ips nets gws
local if ip net gw
while read -r if ip net gw; do
[[ -z "$if" ]] && continue
ifs+="$if"; ips+="$ip"; nets+="$net"; gws+="$gw"
done <<< "$lines"
(( ${#ifs[@]} == 0 )) && { _mw_log "Aucune interface après filtrage."; return 1; }
local -A weight_map; local kv k v
for kv in ${(s:,:)weights}; do
k="${kv%%:*}"; v="${kv##*:}"
[[ -n "$k" && -n "$v" ]] && weight_map[$k]="$v"
done
_mw_log ""
_mw_log "Plan Multi-WAN:"
for i in {1..${#ifs[@]}}; do
_mw_log " - ${ifs[i]} src=${ips[i]} net=${nets[i]} gw=${gws[i]} weight=${weight_map[${ifs[i]}]:-1}"
done
local route_cmd="ip route replace default scope global"
local valid_indices=()
for i in {1..${#ifs[@]}}; do
local dev=${ifs[i]} gw=${gws[i]}
if [[ -z "$gw" || "$gw" == "nexthop" ]]; then
gw=$(_resolve_gw "$dev")
fi
if [[ -n "$gw" ]]; then
route_cmd+=" nexthop via $gw dev $dev weight ${weight_map[$dev]:-1}"
valid_indices+=($i)
else
_mw_log "Warning: no valid GW for $dev, skipping from ECMP and active list."
fi
done
if [[ "${#valid_indices[@]}" -eq 0 ]]; then
_mw_log "No valid nexthops available — aborting activation."
return 1
fi
_mw_log ""
_mw_log "Commande ECMP:"
_mw_log " sudo $route_cmd"
local base=18000
local -a apply_table_cmds=() apply_rules=()
for idx in "${valid_indices[@]}"; do
local prio=$((base + idx))
local t=$((100 + idx))
local dev="${ifs[idx]}" ip="${ips[idx]}" net="${nets[idx]}" gw="${gws[idx]}"
gw=$(_resolve_gw "$dev")
[[ -z "$gw" ]] && gw="${gws[idx]}"
apply_table_cmds+=("ip route flush table $t")
[[ "$net" != "none" && -n "$net" ]] && apply_table_cmds+=("ip route replace $net dev $dev src $ip table $t")
apply_table_cmds+=("ip route replace default via $gw dev $dev table $t")
apply_rules+=("ip -4 rule add from $ip/32 table $t priority $prio")
done
_mw_log ""
_mw_log "Règles source-based et tables envisagées:"
for c in "${apply_table_cmds[@]}"; do _mw_log " sudo $c"; done
for r in "${apply_rules[@]}"; do _mw_log " sudo $r"; done
_mw_log ""
_mw_log "Sysctl: rp_filter loose + ECMP L4 hashing (sauvegarde):"
_mw_log " sudo sysctl -w net.ipv4.conf.all.rp_filter=2"
_mw_log " sudo sysctl -w net.ipv4.conf.default.rp_filter=2"
for dev in "${ifs[@]}"; do _mw_log " sudo sysctl -w net.ipv4.conf.$dev.rp_filter=2"; done
_mw_log " sudo sysctl -w net.ipv4.fib_multipath_hash_policy=1"
(( dry )) && { _mw_log ""; _mw_log "Dry-run: aucune modification appliquée."; return 0; }
_mw_save_previous_default
_mw_purge_rules_tables
_mw_sysctl_set "net.ipv4.conf.all.rp_filter" 2 || true
_mw_sysctl_set "net.ipv4.conf.default.rp_filter" 2 || true
for dev in "${ifs[@]}"; do _mw_sysctl_set "net.ipv4.conf.$dev.rp_filter" 2 || true; done
_mw_sysctl_set "net.ipv4.fib_multipath_hash_policy" 1 || true
for c in "${apply_table_cmds[@]}"; do sudo $=c 2>/dev/null || true; done
: > "$MW_ACTIVE_LINKS"
for idx in "${valid_indices[@]}"; do
dev="${ifs[idx]}" ip="${ips[idx]}" net="${nets[idx]}"
gw=$(_resolve_gw "$dev")
[[ -z "$gw" ]] && gw="${gws[idx]}"
[[ -z "$gw" || "$gw" == "nexthop" ]] && continue
w="${weight_map[$dev]:-1}"
print -r -- "$dev $ip $net $gw $w" >> "$MW_ACTIVE_LINKS"
done
for r in "${apply_rules[@]}"; do
sudo $=r && print -r -- "$r" >> "$MW_STATE_DIR/added_rules.list"
done
sudo $=route_cmd
_mw_log ""
_mw_log "Multi-WAN ACTIVÉ. Vérifs:"
ip -o route show default
_mw_log "Règles gérées:"
ip -o rule | awk '{gsub(":","",$1); if($1+0>=18000 && $1+0<=19999) print}'
}
mw-deactivate() {
_mw_log "Désactivation Multi-WAN…"
while ip route show default | grep -q '^default '; do
sudo ip route del default || break
done
_mw_purge_rules_tables
_mw_restore_previous_default
_mw_sysctl_restore
_mw_log "Multi-WAN DÉSACTIVÉ. État actuel:"
ip -o route show default
_mw_log "Fini."
}
mw-debug() {
_mw_log "Debug rapide:"
_mw_log "- ip -o route show default"
ip -o route show default
_mw_log "- ip rule (managed range):"
ip -o rule | awk '{gsub(":","",$1); if($1+0>=18000 && $1+0<=19999) print}'
_mw_log "- Candidates:"
_mw_detect_wans | awk '{printf " * %-10s src=%-15s net=%-18s gw=%s\n",$1,$2,$3,$4}'
_mw_log "- Route test from each source to 1.1.1.1:"
local l; while read -r l; do
local if ip; if=$(print -r -- "$l" | awk '{print $1}'); ip=$(print -r -- "$l" | awk '{print $2}')
[[ -z "$if" || -z "$ip" ]] && continue
printf " from %-15s -> " "$ip"
ip route get 1.1.1.1 from "$ip" oif "$if" 2>/dev/null | sed 's/^/ /' || echo "NO ROUTE"
done < <(_mw_detect_wans)
}
# ========== Health Monitor ==========
mw__monitor_loop() {
local interval="$1" samples="$2" loss_thr="$3" rtt_thr="$4" down_hys="$5" up_hys="$6"
typeset -A fail pass state weight gw_map
local last_set=""
echo "$(date -Is) MONITOR started interval=$interval samples=$samples loss<=$loss_thr rtt<=$rtt_thr down=$down_hys up=$up_hys"
while true; do
if [[ ! -s "$MW_ACTIVE_LINKS" ]]; then
echo "$(date -Is) No active links file ($MW_ACTIVE_LINKS). Sleeping…"
sleep "$interval"
continue
fi
local -a healthy=()
local dev ip net gw w
while read -r dev ip net gw w; do
[[ -z "$dev" ]] && continue
if [[ -z "$gw" || "$gw" == "nexthop" ]]; then
echo "$(date -Is) SKIP dev=$dev no valid gw ($gw)"; continue
fi
# 1) ICMP probe
local out loss rtt ok=0
out=$(/usr/bin/ping -n -q -I "$dev" -c "$samples" -W 1 "$gw" 2>/dev/null)
loss=$(echo "$out" | awk -F',' '/packet loss/ {gsub(/%/,"",$3); print $3+0}')
rtt=$(echo "$out" | awk -F'/' '/rtt/ {print $5+0}')
[[ -z "$loss" ]] && loss=100
[[ -z "$rtt" ]] && rtt=9999
if (( loss <= loss_thr && rtt <= rtt_thr )); then
ok=1
else
# 2) Fallback TCP probe via curl (connect only)
curl --interface "$ip" --connect-timeout 3 --max-time 5 -sS -o /dev/null https://1.1.1.1 --insecure 2>/dev/null && ok=1 || ok=0
if (( ok == 1 )); then
loss=0; rtt=200
fi
fi
if (( ok == 1 )); then
pass[$dev]=$(( ${pass[$dev]:-0} + 1 )); fail[$dev]=0
else
fail[$dev]=$(( ${fail[$dev]:-0} + 1 )); pass[$dev]=0
fi
if [[ "${state[$dev]}" == "down" ]]; then
(( ${pass[$dev]:-0} >= up_hys )) && state[$dev]="up"
else
[[ -z "${state[$dev]}" ]] && state[$dev]="up"
(( ${fail[$dev]:-0} >= down_hys )) && state[$dev]="down"
fi
weight[$dev]="$w"; gw_map[$dev]="$gw"
[[ "${state[$dev]}" == "up" ]] && healthy+=("$dev")
echo "$(date -Is) dev=$dev gw=$gw loss=$loss rtt=$rtt ms state=${state[$dev]} pass=${pass[$dev]:-0} fail=${fail[$dev]:-0}"
done < "$MW_ACTIVE_LINKS"
local set="${healthy[*]}"
if [[ -n "$set" && "$set" != "$last_set" ]]; then
local cmd="ip route replace default scope global"
local d
for d in "${healthy[@]}"; do
cmd+=" nexthop via ${gw_map[$d]} dev $d weight ${weight[$d]:-1}"
done
echo "$(date -Is) APPLY: $cmd"
sudo $=cmd || true
last_set="$set"
fi
sleep "$interval"
done
}
mw-monitor-start() {
local interval=5 samples=3 loss=30 rtt=150 down=2 up=3
while (( $# )); do
case "$1" in
--interval) interval="$2"; shift ;;
--samples) samples="$2"; shift ;;
--loss) loss="$2"; shift ;;
--rtt) rtt="$2"; shift ;;
--down) down="$2"; shift ;;
--up) up="$2"; shift ;;
*) _mw_log "Arg inconnu: $1"; return 1 ;;
esac; shift
done
[[ ! -s "$MW_ACTIVE_LINKS" ]] && { _mw_log "Run mw-activate first to create $MW_ACTIVE_LINKS"; return 1; }
if [[ -s "$MW_STATE_DIR/monitor.pid" ]] && kill -0 $(cat "$MW_STATE_DIR/monitor.pid") 2>/dev/null; then
_mw_log "Monitor already running (pid $(cat "$MW_STATE_DIR/monitor.pid"))."
return 0
fi
( mw__monitor_loop "$interval" "$samples" "$loss" "$rtt" "$down" "$up" ) >> "$MW_STATE_DIR/monitor.log" 2>&1 &
echo $! > "$MW_STATE_DIR/monitor.pid"
_mw_log "Monitor started (pid $(cat "$MW_STATE_DIR/monitor.pid")). Log: $MW_STATE_DIR/monitor.log"
}
mw-monitor-stop() {
if [[ -s "$MW_STATE_DIR/monitor.pid" ]]; then
local pid=$(cat "$MW_STATE_DIR/monitor.pid")
if kill -0 "$pid" 2>/dev/null; then
kill "$pid" 2>/dev/null || true
sleep 0.2
kill -9 "$pid" 2>/dev/null || true
_mw_log "Monitor stopped."
fi
rm -f "$MW_STATE_DIR/monitor.pid"
else
_mw_log "No monitor pid file; nothing to stop."
fi
}
# One-shot health check (ICMP then TCP fallback) - no route changes
mw-monitor-status() {
local samples=3 loss=30 rtt=150
while (( $# )); do
case "$1" in
--samples) samples="$2"; shift ;;
--loss) loss="$2"; shift ;;
--rtt) rtt="$2"; shift ;;
*) _mw_log "Arg inconnu: $1"; return 1 ;;
esac; shift
done
[[ ! -s "$MW_ACTIVE_LINKS" ]] && { _mw_log "No $MW_ACTIVE_LINKS. Run mw-activate first."; return 1; }
_mw_log "One-off health check (ICMP then TCP fallback):"
while read -r dev ip net gw w; do
[[ -z "$dev" ]] && continue
if [[ -z "$gw" || "$gw" == "nexthop" ]]; then
printf " - %-10s gw=%-15s weight=%-3s -> SKIP (no gw)\n" "$dev" "$gw" "$w"
continue
fi
local out lossv rttv ok=0
out=$(/usr/bin/ping -n -q -I "$dev" -c "$samples" -W 1 "$gw" 2>/dev/null)
lossv=$(echo "$out" | awk -F',' '/packet loss/ {gsub(/%/,"",$3); print $3+0}')
rttv=$(echo "$out" | awk -F'/' '/rtt/ {print $5+0}')
[[ -z "$lossv" ]] && lossv=100
[[ -z "$rttv" ]] && rttv=9999
if (( lossv <= loss && rttv <= rtt )); then
ok=1
else
curl --interface "$ip" --connect-timeout 3 --max-time 5 -sS -o /dev/null https://1.1.1.1 --insecure 2>/dev/null && ok=1 || ok=0
if (( ok == 1 )); then
lossv=0; rttv=200
fi
fi
local health="DOWN"
(( ok == 1 )) && health="UP"
printf " - %-10s gw=%-15s weight=%-3s loss=%-3s rtt=%-4s ms -> %s\n" "$dev" "$gw" "$w" "$lossv" "$rttv" "$health"
done < "$MW_ACTIVE_LINKS"
_mw_log "Current defaults:"
ip -o route show default
if [[ -s "$MW_STATE_DIR/monitor.pid" ]] && kill -0 $(cat "$MW_STATE_DIR/monitor.pid") 2>/dev/null; then
_mw_log "Monitor is running (pid $(cat "$MW_STATE_DIR/monitor.pid"))."
else
_mw_log "Monitor is not running."
fi
}
# One-shot probe (full verbose) for debugging single cycle
mw-probe-once() {
[[ ! -s "$MW_ACTIVE_LINKS" ]] && { _mw_log "No $MW_ACTIVE_LINKS. Run mw-activate first."; return 1; }
_mw_log "Verbose one-shot probe (ICMP then TCP fallback):"
while read -r dev ip net gw w; do
[[ -z "$dev" ]] && continue
_mw_log "==== $dev ($ip) gw=$gw weight=$w ===="
ip route get 1.1.1.1 from "$ip" oif "$dev" 2>/dev/null || _mw_log " ip route get: FAIL"
_mw_log " ARP:"
ip neigh show dev "$dev" || _mw_log " ip neigh: FAIL"
_mw_log " ICMP -> $gw:"
/usr/bin/ping -c 3 -I "$dev" -W 1 "$gw" || _mw_log " ping: maybe blocked"
_mw_log " TCP connect -> 1.1.1.1:"
curl --interface "$ip" --connect-timeout 3 --max-time 5 -sS -o /dev/null -w " HTTP ok\n" https://1.1.1.1 --insecure 2>/dev/null || _mw_log " curl: FAIL"
done < "$MW_ACTIVE_LINKS"
_mw_log "Done."
}
@wasertech
Copy link
Author

wasertech commented Sep 4, 2025

# Check Multi-WAN status
mw-status

# Dry-run with example interface
mw-activate --if "enp1s0f0u9 enp6s0 enp4s0 wlp5s0" --weights "enp1s0f0u9:6,enp6s0:3,enp4s0:5,wlp5s0:4" --dry-run

# If it looks good , you can omit the --dry-run flag to apply this config
mw-activate --if "enp1s0f0u9 enp6s0 enp4s0 wlp5s0" --weights "enp1s0f0u9:6,enp6s0:3,enp4s0:5,wlp5s0:4"

# Check the status again
mw-status

# Start the monitor (1h check)
mw-monitor-start --interval 3600 --samples 3 --loss 30 --rtt 250 --down 2 --up 3

# Check cached active links
cat ~/.cache/multiwan/active_links.tsv

# Open the monitor feed (in a new shell session preferably)
tail -f ~/.cache/multiwan/monitor.log

# Stop the monitor
mw-monitor-stop

# Disable Multi-WAN
mw-deactivate

# Very your config is back to normal
mw-status

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment