Skip to content

Instantly share code, notes, and snippets.

@FrankRuns
Created September 12, 2025 11:46
Show Gist options
  • Select an option

  • Save FrankRuns/4d03c5d5be5d6d37153df849cbd995ed to your computer and use it in GitHub Desktop.

Select an option

Save FrankRuns/4d03c5d5be5d6d37153df849cbd995ed to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Decision-preserving dimensionality reduction for supply-chain network design.
What this script does:
• Strict 1,000-mile lane cap (no k-nearest fallback).
• Supply-aware clustering: only merge demand points sharing the same TOP-2 nearest DC signature.
• Demand-weighted clustering guardrails:
- Mean distance to centroid ≤ CLUSTER_MEAN_MILES
- Max demand per cluster ≤ MAX_DEMAND_PER_CLUSTER
- Max cluster diameter ≤ MAX_CLUSTER_DIAMETER
• Auto-refine if clusters are too few (tightens threshold a bit and re-clusters).
• Facility-location MILP (PuLP/CBC): shipping unit-miles + fixed open cost.
• Visuals, including DC→cluster “transportation matrix on a map”.
• Restored solution report: Objective (unit-miles), Arcs, Time, etc.
Outputs (PNG files):
- fig_all_lanes.png
- fig_pruned_lanes.png
- fig_cluster_links.png
- fig_reduced_lanes.png
- fig_dc_cluster_matrix_all.png
- fig_dc_cluster_matrix_top3.png
- fig_base_solution.png
- fig_reduced_solution.png
"""
import math
import time
import random
from dataclasses import dataclass
from typing import Dict, Tuple, List
import numpy as np
import matplotlib.pyplot as plt
try:
import pulp
PULP_AVAILABLE = True
except Exception:
PULP_AVAILABLE = False
# ---------------- Settings ----------------
SEED = 42
random.seed(SEED); np.random.seed(SEED)
# Problem size
N_DEMAND, N_DCS = 500, 10
DEMAND_PER_POINT, DC_CAPACITY = 100, 12_500
TOTAL_DEMAND = N_DEMAND * DEMAND_PER_POINT
EXPECTED_DCS_OPEN = math.ceil(TOTAL_DEMAND / DC_CAPACITY) # ~4
# Lanes (STRICT)
MAX_LANE_MILES = 1000 # bump if any demand/cluster has no DC within this cap
# Supply-aware clustering parameters
SIG_K = 2 # top-2 nearest DCs define a signature bucket
CLUSTER_MEAN_MILES = 180 # demand-weighted mean distance to centroid guardrail
MAX_DEMAND_PER_CLUSTER = 2_000 # units (≈20 points at 100 each)
MAX_CLUSTER_DIAMETER = 500 # miles (approx guardrail)
TARGET_MIN_CLUSTERS = 60 # if fewer formed, auto-tighten thresholds
AUTO_REFINE_STEPS = 2 # max refinement iterations
TIGHTEN_FACTOR = 0.9 # multiply mean threshold by this when refining
# Economics
OPEN_COST_UNITS = 2.0e5
# ALLOW_AT_MOST = EXPECTED_DCS_OPEN + 1 # optional; comment out to remove the cap
# Figures
FIG_ALL_LANES = "fig_all_lanes.png"
FIG_PRUNED_LANES = "fig_pruned_lanes.png"
FIG_CLUSTER_LINKS = "fig_cluster_links.png"
FIG_REDUCED_LANES = "fig_reduced_lanes.png"
FIG_DC_CLUSTER_ALL = "fig_dc_cluster_matrix_all.png"
FIG_DC_CLUSTER_TOP3 = "fig_dc_cluster_matrix_top3.png"
FIG_BASE_SOLUTION = "fig_base_solution.png"
FIG_REDUCED_SOLUTION = "fig_reduced_solution.png"
# ---------------- Utils ----------------
def haversine_miles(lat1, lon1, lat2, lon2):
R = 3958.8
phi1, phi2 = math.radians(lat1), math.radians(lat2)
dphi = math.radians(lat2 - lat1)
dlambda = math.radians(lon2 - lon1)
a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2
return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
def us_lower48_sampler(n, jitter=0.0):
rng = np.random.default_rng(SEED)
LAT_MIN, LAT_MAX = 25.0, 49.5
LON_MIN, LON_MAX = -124.8, -66.9
pts = []
attempts = 0
while len(pts) < n:
attempts += 1
lat = rng.uniform(LAT_MIN, LAT_MAX)
lon = rng.uniform(LON_MIN, LON_MAX)
in_gulf = (lat < 26.0) and (-97.0 < lon < -81.0)
if in_gulf:
continue
pts.append((lat + rng.normal(0, jitter), lon + rng.normal(0, jitter)))
if attempts > n * 10000:
raise RuntimeError("Sampler struggling; relax rules.")
return np.array(pts)
def build_distance_matrix(A, B):
return np.array([[haversine_miles(a[0], a[1], b[0], b[1]) for b in B] for a in A])
def plot_points_and_lanes(dcs, demands, mask, title, outfile, max_lines=6000):
plt.figure(figsize=(10, 6))
plt.scatter(demands[:,1], demands[:,0], s=8, alpha=0.6, label="Demand/Clusters")
plt.scatter(dcs[:,1], dcs[:,0], marker="^", s=90, label="DCs")
nI, nJ = mask.shape
drawn = 0
for i in range(nI):
if drawn >= max_lines: break
for j in range(nJ):
if mask[i,j]:
plt.plot([demands[i,1], dcs[j,1]],[demands[i,0], dcs[j,0]], lw=0.25, alpha=0.25)
drawn += 1
if drawn >= max_lines: break
plt.title(title); plt.xlabel("Longitude"); plt.ylabel("Latitude")
plt.legend(loc="upper right"); plt.tight_layout(); plt.savefig(outfile, dpi=180); plt.close()
def plot_cluster_links(points, centroids, labels, title, outfile, max_lines=100000):
plt.figure(figsize=(10, 6))
plt.scatter(points[:,1], points[:,0], s=8, alpha=0.5, label="Demand")
plt.scatter(centroids[:,1], centroids[:,0], marker="X", s=120, alpha=0.9, label="Cluster centers", color="blue")
drawn = 0
for i in range(points.shape[0]):
c = labels[i]
plt.plot([points[i,1], centroids[c,1]],[points[i,0], centroids[c,0]], lw=0.25, alpha=0.25)
drawn += 1
if drawn >= max_lines: break
plt.title(title); plt.xlabel("Longitude"); plt.ylabel("Latitude")
plt.legend(loc="upper right"); plt.tight_layout(); plt.savefig(outfile, dpi=200); plt.close()
def plot_dominant_assignments(dcs, points, X, open_dcs, title, outfile, max_lines=6000):
plt.figure(figsize=(10, 6))
plt.scatter(points[:,1], points[:,0], s=8, alpha=0.6, label="Demand/Clusters")
closed = [j for j in range(dcs.shape[0]) if j not in open_dcs]
if closed: plt.scatter(dcs[closed,1], dcs[closed,0], marker="^", s=70, alpha=0.9, label="DCs (closed)")
if open_dcs: plt.scatter(dcs[open_dcs,1], dcs[open_dcs,0], marker="*", s=180, alpha=0.95, label="DCs (open)")
if X.size > 0:
chosen = np.argmax(X, axis=1)
drawn = 0
for i, j in enumerate(chosen):
plt.plot([points[i,1], dcs[j,1]],[points[i,0], dcs[j,0]], lw=0.35, alpha=0.35)
drawn += 1
if drawn >= max_lines: break
plt.title(title); plt.xlabel("Longitude"); plt.ylabel("Latitude")
plt.legend(loc="upper right"); plt.tight_layout(); plt.savefig(outfile, dpi=200); plt.close()
def plot_dc_cluster_matrix_map(dcs, cents, totals, D2, mask,
title, outfile,
mode="topk", k=3,
style="fixed", # "fixed" or "scaled"
lw_fixed=0.25, # match ALL-lanes plot
alpha_fixed=0.25, # match ALL-lanes plot
linewidth_min=0.4, linewidth_max=2.5):
"""
DC→cluster 'matrix on a map'.
style="fixed": every arc uses lw_fixed/alpha_fixed (best for A/B visual comparability).
style="scaled": line width scales with cluster demand (previous behavior).
"""
import numpy as np
import matplotlib.pyplot as plt
I, J = mask.shape
# --- choose linewidths/alpha ---
if style == "fixed":
linewidths = np.full(I, float(lw_fixed))
def alpha_for(i, j): return float(alpha_fixed)
else:
# demand-scaled (NumPy 2-safe)
w = np.asarray(totals, dtype=float).reshape(-1)
rng = float(np.ptp(w))
s = np.zeros_like(w) if rng == 0.0 else (w - float(np.min(w))) / rng
linewidths = linewidth_min + (linewidth_max - linewidth_min) * s
def alpha_for(i, j):
return 0.25 + 0.6 * (1.0 - D2[i, j] / (MAX_LANE_MILES + 1e-9))
# --- plot ---
plt.figure(figsize=(10, 6))
plt.scatter(cents[:,1], cents[:,0], marker="X", s=120, alpha=0.9, label="Clusters")
plt.scatter(dcs[:,1], dcs[:,0], marker="^", s=90, alpha=0.9, label="DCs")
for i in range(I):
allowed = np.where(mask[i])[0]
if allowed.size == 0:
continue
if mode == "topk" and allowed.size > k:
ord_idx = np.argsort(D2[i, allowed])[:k]
cols = allowed[ord_idx]
else:
cols = allowed
for j in cols:
plt.plot([cents[i,1], dcs[j,1]],
[cents[i,0], dcs[j,0]],
lw=float(linewidths[i]), alpha=alpha_for(i, j))
plt.title(title)
plt.xlabel("Longitude"); plt.ylabel("Latitude")
plt.legend(loc="upper right")
plt.tight_layout()
plt.savefig(outfile, dpi=200)
plt.close()
# --------------- Supply-aware clustering ---------------
def topk_dc_signature(D_row: np.ndarray, k: int = 2) -> Tuple[int, ...]:
"""Return indices of top-k nearest DCs by distance (regardless of cap)."""
order = np.argsort(D_row)[:k]
return tuple(int(x) for x in order)
def cluster_bucket(points: np.ndarray, qty: np.ndarray, mean_threshold: float,
max_demand: int, max_diam: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[float]]:
"""Greedy, demand-weighted clustering inside a prefiltered bucket of points."""
n = points.shape[0]
remaining = set(range(n))
labels = -np.ones(n, dtype=int)
cents, totals, means = [], [], []
k = 0
# Precompute pairwise distances for diameter checks
pair = np.zeros((n, n), dtype=float)
for i in range(n):
for j in range(i+1, n):
d = haversine_miles(points[i,0], points[i,1], points[j,0], points[j,1])
pair[i,j] = pair[j,i] = d
def weighted_centroid(ids):
w = qty[np.array(ids)].astype(float)
lat = float(np.average(points[np.array(ids),0], weights=w))
lon = float(np.average(points[np.array(ids),1], weights=w))
return np.array([lat, lon], dtype=float)
def mean_dist(ids, center):
w = qty[np.array(ids)].astype(float)
dists = np.array([haversine_miles(points[t,0], points[t,1], center[0], center[1]) for t in ids])
return float(np.sum(w * dists) / np.sum(w))
def diameter(ids):
if len(ids) <= 1:
return 0.0
idx = np.array(ids, dtype=int)
return float(np.max(pair[np.ix_(idx, idx)]))
while remaining:
seed = max(remaining, key=lambda i: qty[i])
cluster = [seed]
centroid = weighted_centroid(cluster)
mean_d = 0.0
demand_sum = int(qty[seed])
candidates = sorted(list(remaining - set(cluster)), key=lambda i: pair[seed, i])
improved = True
while improved and candidates:
improved = False
for i in list(candidates):
trial = cluster + [i]
if demand_sum + int(qty[i]) > max_demand:
candidates.remove(i); continue
c_new = weighted_centroid(trial)
mean_new = mean_dist(trial, c_new)
if mean_new > mean_threshold:
candidates.remove(i); continue
diam_new = diameter(trial)
if diam_new > max_diam:
candidates.remove(i); continue
# accept
cluster.append(i)
centroid = c_new
mean_d = mean_new
demand_sum += int(qty[i])
candidates.remove(i)
candidates = sorted(candidates, key=lambda idx: haversine_miles(points[idx,0], points[idx,1], centroid[0], centroid[1]))
improved = True
break
for i in cluster:
labels[i] = k
remaining.discard(i)
cents.append((centroid[0], centroid[1]))
totals.append(demand_sum)
means.append(mean_d)
k += 1
return np.array(cents), np.array(totals, dtype=int), labels, means
def supply_aware_clustering(points: np.ndarray, qty: np.ndarray, D_to_dcs: np.ndarray,
mean_threshold: float, max_demand: int, max_diam: float,
sig_k: int = 2, target_min_clusters: int = 0, refine_steps: int = 0, tighten_factor: float = 0.9):
"""Cluster inside top-k nearest DC signature buckets; optionally auto-refine if too few clusters."""
n = points.shape[0]
# 1) Build signature buckets
sigs = [topk_dc_signature(D_to_dcs[i], sig_k) for i in range(n)]
buckets = {}
for i, sig in enumerate(sigs):
buckets.setdefault(sig, []).append(i)
def run_once(threshold):
all_cents = []; all_totals = []; all_labels = -np.ones(n, dtype=int)
all_means = []
next_cluster_id = 0
for sig, idxs in buckets.items():
sub_pts = points[np.array(idxs)]
sub_qty = qty[np.array(idxs)]
cents, totals, labels, means = cluster_bucket(sub_pts, sub_qty, threshold, max_demand, max_diam)
# remap labels to global ids
for local, gidx in enumerate(idxs):
all_labels[gidx] = next_cluster_id + labels[local]
next_cluster_id += int(labels.max()) + 1
all_cents.extend(list(map(tuple, cents)))
all_totals.extend(list(totals))
all_means.extend(list(means))
return np.array(all_cents), np.array(all_totals, dtype=int), all_labels, all_means
# initial pass
curr_thr = float(mean_threshold)
cents, totals, labels, means = run_once(curr_thr)
# auto-refine if too few clusters
steps = 0
while target_min_clusters and (cents.shape[0] < target_min_clusters) and (steps < refine_steps):
curr_thr *= float(tighten_factor)
cents, totals, labels, means = run_once(curr_thr)
steps += 1
return cents, totals, labels, means, curr_thr
# ---------------- Model ----------------
@dataclass
class ProblemData:
dcs: np.ndarray
demands: np.ndarray
demand_qty: np.ndarray
dc_capacity: np.ndarray
dist_miles: np.ndarray
lane_mask: np.ndarray
def solve_facility_location(data: ProblemData, verbose: bool=False) -> Dict:
I, J = data.demands.shape[0], data.dcs.shape[0]
demand, capacity, D, mask = data.demand_qty, data.dc_capacity, data.dist_miles, data.lane_mask
cost = D * demand[:,None]
arcs = [(i,j) for i in range(I) for j in range(J) if mask[i,j]]
if PULP_AVAILABLE and arcs:
x = pulp.LpVariable.dicts("x",(range(I),range(J)),0,1,cat=pulp.LpContinuous)
y = pulp.LpVariable.dicts("y",range(J),0,1,cat=pulp.LpBinary)
model = pulp.LpProblem("FLP", pulp.LpMinimize)
model += pulp.lpSum(cost[i,j]*x[i][j] for (i,j) in arcs) + OPEN_COST_UNITS*pulp.lpSum(y[j] for j in range(J))
for i in range(I):
model += pulp.lpSum(x[i][j] for j in range(J) if mask[i,j]) == 1.0
for j in range(J):
model += pulp.lpSum(demand[i]*x[i][j] for i in range(I) if mask[i,j]) <= capacity[j]*y[j]
try:
model += pulp.lpSum(y[j] for j in range(J)) <= ALLOW_AT_MOST
except NameError:
pass
for i in range(I):
for j in range(J):
if not mask[i,j]:
model += x[i][j] == 0.0
t0 = time.perf_counter()
model.solve(pulp.PULP_CBC_CMD(msg=verbose))
solve_time = time.perf_counter() - t0
status = pulp.LpStatus[model.status]
obj = pulp.value(model.objective) if pulp.value(model.objective) is not None else float("nan")
X = np.zeros((I,J), dtype=float)
for i in range(I):
for j in range(J):
val = x[i][j].value()
X[i,j] = 0.0 if val is None else float(val)
Y = np.array([y[j].value() or 0.0 for j in range(J)])
open_dcs = [j for j in range(J) if Y[j] > 0.5]
return dict(solver="PuLP-CBC", status=status, objective=obj, X=X, Y=Y,
open_dcs=open_dcs, n_arcs=len(arcs), n_x_vars=len(arcs),
n_y_vars=J, solve_time=solve_time)
return dict(solver="None", status="Skipped", objective=float("nan"),
X=np.zeros((I,J)), Y=np.zeros(J), open_dcs=[],
n_arcs=len(arcs), n_x_vars=len(arcs), n_y_vars=J, solve_time=float("nan"))
# ---------------- Reporter ----------------
def report_solution_block(label, sol, D, qty, open_cost_units):
"""
Pretty-print the exact stats you wanted, including:
- Solver/Status
- Objective (total): shipping unit-miles + open-DC cost
- Objective (unit-miles): shipping-only
- Open-cost component
- Open DCs list + count
- Arcs and **Solve time (s)**
- Avg assignment distance (mi/unit)
"""
shipping_um = float(np.sum(D * sol["X"] * qty[:, None])) if sol["X"].size else float("nan")
open_cost = open_cost_units * len(sol["open_dcs"])
avg_assign = shipping_um / float(qty.sum()) if qty.sum() > 0 else float("nan")
print(f"{label}:")
print(f" Solver: {sol['solver']} Status: {sol['status']}")
print(f" Objective (total): {sol['objective']:.2f}")
print(f" Objective (unit-miles): {shipping_um:.2f} | Open-cost: {open_cost:.2f}")
print(f" Avg assignment distance: {avg_assign:.1f} mi/unit")
print(f" Open DCs: {sol['open_dcs']} (count={len(sol['open_dcs'])})")
print(f" Arcs: {sol['n_arcs']} | Solve time (s): {sol['solve_time']:.3f}")
# ---------------- Main ----------------
def main():
print("=== Decision-Preserving Reduction — strict lanes + supply-aware clustering ===")
dcs = us_lower48_sampler(N_DCS, jitter=0.3)
demands = us_lower48_sampler(N_DEMAND, jitter=0.0)
demand_qty = np.full(N_DEMAND, DEMAND_PER_POINT, dtype=int)
dc_capacity = np.full(N_DCS, DC_CAPACITY, dtype=int)
# Distances + strict masks (NO fallback)
D = build_distance_matrix(demands, dcs)
strict_mask = (D <= MAX_LANE_MILES)
orphan_rows = np.where(~strict_mask.any(axis=1))[0]
if orphan_rows.size > 0:
raise RuntimeError(f"{orphan_rows.size} demand nodes have NO DC within {MAX_LANE_MILES} miles. Increase MAX_LANE_MILES.")
# Visuals (pre-solve)
plot_points_and_lanes(dcs, demands, np.ones_like(D, bool), "All DC→Demand lanes (subset drawn)", FIG_ALL_LANES)
plot_points_and_lanes(dcs, demands, strict_mask, f"Pruned lanes (≤ {MAX_LANE_MILES} miles)", FIG_PRUNED_LANES)
orig_arcs = N_DEMAND * N_DCS
pruned_arcs = int(strict_mask.sum())
print(f"[ARCS] original={orig_arcs:,} after_pruning={pruned_arcs:,}")
# Solve BASE and report
print("Solving BASE...")
base = ProblemData(dcs, demands, demand_qty, dc_capacity, D, strict_mask)
base_sol = solve_facility_location(base, verbose=False)
report_solution_block("BASE", base_sol, D, demand_qty, OPEN_COST_UNITS)
plot_dominant_assignments(dcs, demands, base_sol['X'], base_sol['open_dcs'], "BASE — dominant assignments", FIG_BASE_SOLUTION)
# Supply-aware, demand-weighted clustering
print(f"Clustering (supply-aware): mean≤{CLUSTER_MEAN_MILES} mi, max_demand≤{MAX_DEMAND_PER_CLUSTER}, diam≤{MAX_CLUSTER_DIAMETER} mi")
cents, totals, labels, mean_dists, eff_thr = supply_aware_clustering(
demands, demand_qty, D,
mean_threshold=CLUSTER_MEAN_MILES,
max_demand=MAX_DEMAND_PER_CLUSTER,
max_diam=MAX_CLUSTER_DIAMETER,
sig_k=SIG_K,
target_min_clusters=TARGET_MIN_CLUSTERS,
refine_steps=AUTO_REFINE_STEPS,
tighten_factor=TIGHTEN_FACTOR
)
print(f" Clusters formed: {cents.shape[0]} (from {N_DEMAND}) | eff_mean_threshold={eff_thr:.1f} mi | "
f"median mean={np.median(mean_dists):.1f} mi, p90={np.percentile(mean_dists,90):.1f} mi, max={np.max(mean_dists):.1f} mi")
plot_cluster_links(demands, cents, labels, "Demand → Cluster centroid (membership links)", FIG_CLUSTER_LINKS)
# Reduced problem (DC ↔ clusters)
D2 = build_distance_matrix(cents, dcs)
strict_mask_2 = (D2 <= MAX_LANE_MILES)
orphan_clusters = np.where(~strict_mask_2.any(axis=1))[0]
if orphan_clusters.size > 0:
raise RuntimeError(f"{orphan_clusters.size} clusters have NO DC within {MAX_LANE_MILES} miles. Increase MAX_LANE_MILES or tighten clustering.")
# Visuals: DC→cluster lanes and "matrix on a map"
plot_points_and_lanes(dcs, cents, strict_mask_2, f"Reduced lanes (clusters, ≤ {MAX_LANE_MILES} miles)", FIG_REDUCED_LANES)
# all allowed arcs, skinny like the ALL-lanes figure
plot_dc_cluster_matrix_map(
dcs, cents, totals, D2, strict_mask_2,
title=f"DC → Cluster matrix (all allowed, skinny, ≤ {MAX_LANE_MILES} mi)",
outfile="fig_dc_cluster_matrix_all.png",
mode="all",
style="fixed", lw_fixed=0.25, alpha_fixed=0.25
)
# top-3 per cluster, same skinny style for apples-to-apples comparison
plot_dc_cluster_matrix_map(
dcs, cents, totals, D2, strict_mask_2,
title=f"DC → Cluster matrix (top-3, skinny, ≤ {MAX_LANE_MILES} mi)",
outfile="fig_dc_cluster_matrix_top3.png",
mode="topk", k=3,
style="fixed", lw_fixed=0.25, alpha_fixed=0.25
)
clustered_arcs = int(strict_mask_2.sum())
print(f"[ARCS] after_clustering={clustered_arcs:,}")
# Solve REDUCED and report
print("Solving REDUCED...")
red = ProblemData(dcs, cents, totals, dc_capacity, D2, strict_mask_2)
red_sol = solve_facility_location(red, verbose=False)
report_solution_block("REDUCED", red_sol, D2, totals, OPEN_COST_UNITS)
plot_dominant_assignments(dcs, cents, red_sol['X'], red_sol['open_dcs'], "REDUCED — dominant assignments", FIG_REDUCED_SOLUTION)
# Decision stability
base_set, red_set = set(base_sol['open_dcs']), set(red_sol['open_dcs'])
jaccard = len(base_set & red_set) / max(1, len(base_set | red_set))
print(f"[Stability] Jaccard(base_open, reduced_open) = {jaccard:.3f} | base={sorted(base_set)} reduced={sorted(red_set)}")
# Summary
lane_reduction = 1.0 - (red_sol['n_arcs'] / max(1, base_sol['n_arcs']))
obj_gap_pct = 100.0 * (red_sol['objective'] - base_sol['objective']) / (abs(base_sol['objective']) + 1e-9)
print("=== BEFORE vs AFTER (summary) ===")
print(f"Nodes: {N_DEMAND} → {cents.shape[0]}")
print(f"Arcs: {base_sol['n_arcs']} → {red_sol['n_arcs']} (Δ ≈ {lane_reduction*100:.1f}% fewer arcs)")
print(f"Open DCs: {len(base_sol['open_dcs'])} → {len(red_sol['open_dcs'])}")
print(f"Objective: {base_sol['objective']:.0f} → {red_sol['objective']:.0f} (gap ≈ {obj_gap_pct:.2f}%)")
print(f"[ARCS SUMMARY] original={orig_arcs:,} pruned={pruned_arcs:,} clustered={clustered_arcs:,}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment