FrankRuns · September 12, 2025 11:46
diff --git a/supply_chain_dimension_reduction.py b/supply_chain_dimension_reduction.py
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Decision-preserving dimensionality reduction for supply-chain network design.

 What this script does:
  • Strict 1,000-mile lane cap (no k-nearest fallback).
  • Supply-aware clustering: only merge demand points sharing the same TOP-2 nearest DC signature.
  • Demand-weighted clustering guardrails:
      - Mean distance to centroid ≤ CLUSTER_MEAN_MILES
      - Max demand per cluster ≤ MAX_DEMAND_PER_CLUSTER
      - Max cluster diameter ≤ MAX_CLUSTER_DIAMETER
  • Auto-refine if clusters are too few (tightens threshold a bit and re-clusters).
  • Facility-location MILP (PuLP/CBC): shipping unit-miles + fixed open cost.
  • Visuals, including DC→cluster “transportation matrix on a map”.
  • Restored solution report: Objective (unit-miles), Arcs, Time, etc.

 Outputs (PNG files):
  - fig_all_lanes.png
  - fig_pruned_lanes.png
  - fig_cluster_links.png
  - fig_reduced_lanes.png
  - fig_dc_cluster_matrix_all.png
  - fig_dc_cluster_matrix_top3.png
  - fig_base_solution.png
  - fig_reduced_solution.png
 """

 import math
 import time
 import random
 from dataclasses import dataclass
 from typing import Dict, Tuple, List

 import numpy as np
 import matplotlib.pyplot as plt

 try:
    import pulp
    PULP_AVAILABLE = True
 except Exception:
    PULP_AVAILABLE = False

 # ---------------- Settings ----------------
 SEED = 42
 random.seed(SEED); np.random.seed(SEED)

 # Problem size
 N_DEMAND, N_DCS = 500, 10
 DEMAND_PER_POINT, DC_CAPACITY = 100, 12_500
 TOTAL_DEMAND = N_DEMAND * DEMAND_PER_POINT
 EXPECTED_DCS_OPEN = math.ceil(TOTAL_DEMAND / DC_CAPACITY)  # ~4

 # Lanes (STRICT)
 MAX_LANE_MILES = 1000  # bump if any demand/cluster has no DC within this cap

 # Supply-aware clustering parameters
 SIG_K = 2                         # top-2 nearest DCs define a signature bucket
 CLUSTER_MEAN_MILES = 180          # demand-weighted mean distance to centroid guardrail
 MAX_DEMAND_PER_CLUSTER = 2_000    # units (≈20 points at 100 each)
 MAX_CLUSTER_DIAMETER = 500        # miles (approx guardrail)
 TARGET_MIN_CLUSTERS = 60          # if fewer formed, auto-tighten thresholds
 AUTO_REFINE_STEPS = 2             # max refinement iterations
 TIGHTEN_FACTOR = 0.9              # multiply mean threshold by this when refining

 # Economics
 OPEN_COST_UNITS = 2.0e5
 # ALLOW_AT_MOST = EXPECTED_DCS_OPEN + 1  # optional; comment out to remove the cap

 # Figures
 FIG_ALL_LANES               = "fig_all_lanes.png"
 FIG_PRUNED_LANES            = "fig_pruned_lanes.png"
 FIG_CLUSTER_LINKS           = "fig_cluster_links.png"
 FIG_REDUCED_LANES           = "fig_reduced_lanes.png"
 FIG_DC_CLUSTER_ALL          = "fig_dc_cluster_matrix_all.png"
 FIG_DC_CLUSTER_TOP3         = "fig_dc_cluster_matrix_top3.png"
 FIG_BASE_SOLUTION           = "fig_base_solution.png"
 FIG_REDUCED_SOLUTION        = "fig_reduced_solution.png"


 # ---------------- Utils ----------------
 def haversine_miles(lat1, lon1, lat2, lon2):
    R = 3958.8
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    dphi = math.radians(lat2 - lat1)
    dlambda = math.radians(lon2 - lon1)
    a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2
    return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))

 def us_lower48_sampler(n, jitter=0.0):
    rng = np.random.default_rng(SEED)
    LAT_MIN, LAT_MAX = 25.0, 49.5
    LON_MIN, LON_MAX = -124.8, -66.9
    pts = []
    attempts = 0
    while len(pts) < n:
        attempts += 1
        lat = rng.uniform(LAT_MIN, LAT_MAX)
        lon = rng.uniform(LON_MIN, LON_MAX)
        in_gulf = (lat < 26.0) and (-97.0 < lon < -81.0)
        if in_gulf:
            continue
        pts.append((lat + rng.normal(0, jitter), lon + rng.normal(0, jitter)))
        if attempts > n * 10000:
            raise RuntimeError("Sampler struggling; relax rules.")
    return np.array(pts)

 def build_distance_matrix(A, B):
    return np.array([[haversine_miles(a[0], a[1], b[0], b[1]) for b in B] for a in A])

 def plot_points_and_lanes(dcs, demands, mask, title, outfile, max_lines=6000):
    plt.figure(figsize=(10, 6))
    plt.scatter(demands[:,1], demands[:,0], s=8, alpha=0.6, label="Demand/Clusters")
    plt.scatter(dcs[:,1], dcs[:,0], marker="^", s=90, label="DCs")
    nI, nJ = mask.shape
    drawn = 0
    for i in range(nI):
        if drawn >= max_lines: break
        for j in range(nJ):
            if mask[i,j]:
                plt.plot([demands[i,1], dcs[j,1]],[demands[i,0], dcs[j,0]], lw=0.25, alpha=0.25)
                drawn += 1
                if drawn >= max_lines: break
    plt.title(title); plt.xlabel("Longitude"); plt.ylabel("Latitude")
    plt.legend(loc="upper right"); plt.tight_layout(); plt.savefig(outfile, dpi=180); plt.close()

 def plot_cluster_links(points, centroids, labels, title, outfile, max_lines=100000):
    plt.figure(figsize=(10, 6))
    plt.scatter(points[:,1], points[:,0], s=8, alpha=0.5, label="Demand")
    plt.scatter(centroids[:,1], centroids[:,0], marker="X", s=120, alpha=0.9, label="Cluster centers", color="blue")
    drawn = 0
    for i in range(points.shape[0]):
        c = labels[i]
        plt.plot([points[i,1], centroids[c,1]],[points[i,0], centroids[c,0]], lw=0.25, alpha=0.25)
        drawn += 1
        if drawn >= max_lines: break
    plt.title(title); plt.xlabel("Longitude"); plt.ylabel("Latitude")
    plt.legend(loc="upper right"); plt.tight_layout(); plt.savefig(outfile, dpi=200); plt.close()

 def plot_dominant_assignments(dcs, points, X, open_dcs, title, outfile, max_lines=6000):
    plt.figure(figsize=(10, 6))
    plt.scatter(points[:,1], points[:,0], s=8, alpha=0.6, label="Demand/Clusters")
    closed = [j for j in range(dcs.shape[0]) if j not in open_dcs]
    if closed:   plt.scatter(dcs[closed,1], dcs[closed,0], marker="^", s=70, alpha=0.9, label="DCs (closed)")
    if open_dcs: plt.scatter(dcs[open_dcs,1], dcs[open_dcs,0], marker="*", s=180, alpha=0.95, label="DCs (open)")
    if X.size > 0:
        chosen = np.argmax(X, axis=1)
        drawn = 0
        for i, j in enumerate(chosen):
            plt.plot([points[i,1], dcs[j,1]],[points[i,0], dcs[j,0]], lw=0.35, alpha=0.35)
            drawn += 1
            if drawn >= max_lines: break
    plt.title(title); plt.xlabel("Longitude"); plt.ylabel("Latitude")
    plt.legend(loc="upper right"); plt.tight_layout(); plt.savefig(outfile, dpi=200); plt.close()

 def plot_dc_cluster_matrix_map(dcs, cents, totals, D2, mask,
                               title, outfile,
                               mode="topk", k=3,
                               style="fixed",            # "fixed" or "scaled"
                               lw_fixed=0.25,           # match ALL-lanes plot
                               alpha_fixed=0.25,        # match ALL-lanes plot
                               linewidth_min=0.4, linewidth_max=2.5):
    """
    DC→cluster 'matrix on a map'.
    style="fixed":  every arc uses lw_fixed/alpha_fixed (best for A/B visual comparability).
    style="scaled": line width scales with cluster demand (previous behavior).
    """
    import numpy as np
    import matplotlib.pyplot as plt

    I, J = mask.shape

    # --- choose linewidths/alpha ---
    if style == "fixed":
        linewidths = np.full(I, float(lw_fixed))
        def alpha_for(i, j): return float(alpha_fixed)
    else:
        # demand-scaled (NumPy 2-safe)
        w = np.asarray(totals, dtype=float).reshape(-1)
        rng = float(np.ptp(w))
        s = np.zeros_like(w) if rng == 0.0 else (w - float(np.min(w))) / rng
        linewidths = linewidth_min + (linewidth_max - linewidth_min) * s
        def alpha_for(i, j):
            return 0.25 + 0.6 * (1.0 - D2[i, j] / (MAX_LANE_MILES + 1e-9))

    # --- plot ---
    plt.figure(figsize=(10, 6))
    plt.scatter(cents[:,1], cents[:,0], marker="X", s=120, alpha=0.9, label="Clusters")
    plt.scatter(dcs[:,1],  dcs[:,0],  marker="^", s=90,  alpha=0.9, label="DCs")

    for i in range(I):
        allowed = np.where(mask[i])[0]
        if allowed.size == 0:
            continue
        if mode == "topk" and allowed.size > k:
            ord_idx = np.argsort(D2[i, allowed])[:k]
            cols = allowed[ord_idx]
        else:
            cols = allowed
        for j in cols:
            plt.plot([cents[i,1], dcs[j,1]],
                     [cents[i,0], dcs[j,0]],
                     lw=float(linewidths[i]), alpha=alpha_for(i, j))

    plt.title(title)
    plt.xlabel("Longitude"); plt.ylabel("Latitude")
    plt.legend(loc="upper right")
    plt.tight_layout()
    plt.savefig(outfile, dpi=200)
    plt.close()


 # --------------- Supply-aware clustering ---------------
 def topk_dc_signature(D_row: np.ndarray, k: int = 2) -> Tuple[int, ...]:
    """Return indices of top-k nearest DCs by distance (regardless of cap)."""
    order = np.argsort(D_row)[:k]
    return tuple(int(x) for x in order)

 def cluster_bucket(points: np.ndarray, qty: np.ndarray, mean_threshold: float,
                   max_demand: int, max_diam: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[float]]:
    """Greedy, demand-weighted clustering inside a prefiltered bucket of points."""
    n = points.shape[0]
    remaining = set(range(n))
    labels = -np.ones(n, dtype=int)
    cents, totals, means = [], [], []
    k = 0

    # Precompute pairwise distances for diameter checks
    pair = np.zeros((n, n), dtype=float)
    for i in range(n):
        for j in range(i+1, n):
            d = haversine_miles(points[i,0], points[i,1], points[j,0], points[j,1])
            pair[i,j] = pair[j,i] = d

    def weighted_centroid(ids):
        w = qty[np.array(ids)].astype(float)
        lat = float(np.average(points[np.array(ids),0], weights=w))
        lon = float(np.average(points[np.array(ids),1], weights=w))
        return np.array([lat, lon], dtype=float)

    def mean_dist(ids, center):
        w = qty[np.array(ids)].astype(float)
        dists = np.array([haversine_miles(points[t,0], points[t,1], center[0], center[1]) for t in ids])
        return float(np.sum(w * dists) / np.sum(w))

    def diameter(ids):
        if len(ids) <= 1:
            return 0.0
        idx = np.array(ids, dtype=int)
        return float(np.max(pair[np.ix_(idx, idx)]))

    while remaining:
        seed = max(remaining, key=lambda i: qty[i])
        cluster = [seed]
        centroid = weighted_centroid(cluster)
        mean_d = 0.0
        demand_sum = int(qty[seed])
        candidates = sorted(list(remaining - set(cluster)), key=lambda i: pair[seed, i])
        improved = True
        while improved and candidates:
            improved = False
            for i in list(candidates):
                trial = cluster + [i]
                if demand_sum + int(qty[i]) > max_demand:
                    candidates.remove(i); continue
                c_new = weighted_centroid(trial)
                mean_new = mean_dist(trial, c_new)
                if mean_new > mean_threshold:
                    candidates.remove(i); continue
                diam_new = diameter(trial)
                if diam_new > max_diam:
                    candidates.remove(i); continue
                # accept
                cluster.append(i)
                centroid = c_new
                mean_d = mean_new
                demand_sum += int(qty[i])
                candidates.remove(i)
                candidates = sorted(candidates, key=lambda idx: haversine_miles(points[idx,0], points[idx,1], centroid[0], centroid[1]))
                improved = True
                break

        for i in cluster:
            labels[i] = k
            remaining.discard(i)
        cents.append((centroid[0], centroid[1]))
        totals.append(demand_sum)
        means.append(mean_d)
        k += 1

    return np.array(cents), np.array(totals, dtype=int), labels, means

 def supply_aware_clustering(points: np.ndarray, qty: np.ndarray, D_to_dcs: np.ndarray,
                            mean_threshold: float, max_demand: int, max_diam: float,
                            sig_k: int = 2, target_min_clusters: int = 0, refine_steps: int = 0, tighten_factor: float = 0.9):
    """Cluster inside top-k nearest DC signature buckets; optionally auto-refine if too few clusters."""
    n = points.shape[0]
    # 1) Build signature buckets
    sigs = [topk_dc_signature(D_to_dcs[i], sig_k) for i in range(n)]
    buckets = {}
    for i, sig in enumerate(sigs):
        buckets.setdefault(sig, []).append(i)

    def run_once(threshold):
        all_cents = []; all_totals = []; all_labels = -np.ones(n, dtype=int)
        all_means = []
        next_cluster_id = 0
        for sig, idxs in buckets.items():
            sub_pts = points[np.array(idxs)]
            sub_qty = qty[np.array(idxs)]
            cents, totals, labels, means = cluster_bucket(sub_pts, sub_qty, threshold, max_demand, max_diam)
            # remap labels to global ids
            for local, gidx in enumerate(idxs):
                all_labels[gidx] = next_cluster_id + labels[local]
            next_cluster_id += int(labels.max()) + 1
            all_cents.extend(list(map(tuple, cents)))
            all_totals.extend(list(totals))
            all_means.extend(list(means))
        return np.array(all_cents), np.array(all_totals, dtype=int), all_labels, all_means

    # initial pass
    curr_thr = float(mean_threshold)
    cents, totals, labels, means = run_once(curr_thr)

    # auto-refine if too few clusters
    steps = 0
    while target_min_clusters and (cents.shape[0] < target_min_clusters) and (steps < refine_steps):
        curr_thr *= float(tighten_factor)
        cents, totals, labels, means = run_once(curr_thr)
        steps += 1

    return cents, totals, labels, means, curr_thr


 # ---------------- Model ----------------
 @dataclass
 class ProblemData:
    dcs: np.ndarray
    demands: np.ndarray
    demand_qty: np.ndarray
    dc_capacity: np.ndarray
    dist_miles: np.ndarray
    lane_mask: np.ndarray

 def solve_facility_location(data: ProblemData, verbose: bool=False) -> Dict:
    I, J = data.demands.shape[0], data.dcs.shape[0]
    demand, capacity, D, mask = data.demand_qty, data.dc_capacity, data.dist_miles, data.lane_mask
    cost = D * demand[:,None]
    arcs = [(i,j) for i in range(I) for j in range(J) if mask[i,j]]

    if PULP_AVAILABLE and arcs:
        x = pulp.LpVariable.dicts("x",(range(I),range(J)),0,1,cat=pulp.LpContinuous)
        y = pulp.LpVariable.dicts("y",range(J),0,1,cat=pulp.LpBinary)
        model = pulp.LpProblem("FLP", pulp.LpMinimize)
        model += pulp.lpSum(cost[i,j]*x[i][j] for (i,j) in arcs) + OPEN_COST_UNITS*pulp.lpSum(y[j] for j in range(J))
        for i in range(I):
            model += pulp.lpSum(x[i][j] for j in range(J) if mask[i,j]) == 1.0
        for j in range(J):
            model += pulp.lpSum(demand[i]*x[i][j] for i in range(I) if mask[i,j]) <= capacity[j]*y[j]
        try:
            model += pulp.lpSum(y[j] for j in range(J)) <= ALLOW_AT_MOST
        except NameError:
            pass
        for i in range(I):
            for j in range(J):
                if not mask[i,j]:
                    model += x[i][j] == 0.0
        t0 = time.perf_counter()
        model.solve(pulp.PULP_CBC_CMD(msg=verbose))
        solve_time = time.perf_counter() - t0
        status = pulp.LpStatus[model.status]
        obj = pulp.value(model.objective) if pulp.value(model.objective) is not None else float("nan")
        X = np.zeros((I,J), dtype=float)
        for i in range(I):
            for j in range(J):
                val = x[i][j].value()
                X[i,j] = 0.0 if val is None else float(val)
        Y = np.array([y[j].value() or 0.0 for j in range(J)])
        open_dcs = [j for j in range(J) if Y[j] > 0.5]
        return dict(solver="PuLP-CBC", status=status, objective=obj, X=X, Y=Y,
                    open_dcs=open_dcs, n_arcs=len(arcs), n_x_vars=len(arcs),
                    n_y_vars=J, solve_time=solve_time)

    return dict(solver="None", status="Skipped", objective=float("nan"),
                X=np.zeros((I,J)), Y=np.zeros(J), open_dcs=[],
                n_arcs=len(arcs), n_x_vars=len(arcs), n_y_vars=J, solve_time=float("nan"))


 # ---------------- Reporter ----------------
 def report_solution_block(label, sol, D, qty, open_cost_units):
    """
    Pretty-print the exact stats you wanted, including:
      - Solver/Status
      - Objective (total): shipping unit-miles + open-DC cost
      - Objective (unit-miles): shipping-only
      - Open-cost component
      - Open DCs list + count
      - Arcs and **Solve time (s)**
      - Avg assignment distance (mi/unit)
    """
    shipping_um = float(np.sum(D * sol["X"] * qty[:, None])) if sol["X"].size else float("nan")
    open_cost = open_cost_units * len(sol["open_dcs"])
    avg_assign = shipping_um / float(qty.sum()) if qty.sum() > 0 else float("nan")
    print(f"{label}:")
    print(f"  Solver: {sol['solver']}  Status: {sol['status']}")
    print(f"  Objective (total): {sol['objective']:.2f}")
    print(f"  Objective (unit-miles): {shipping_um:.2f}  |  Open-cost: {open_cost:.2f}")
    print(f"  Avg assignment distance: {avg_assign:.1f} mi/unit")
    print(f"  Open DCs: {sol['open_dcs']} (count={len(sol['open_dcs'])})")
    print(f"  Arcs: {sol['n_arcs']}  |  Solve time (s): {sol['solve_time']:.3f}")


 # ---------------- Main ----------------
 def main():
    print("=== Decision-Preserving Reduction — strict lanes + supply-aware clustering ===")
    dcs = us_lower48_sampler(N_DCS, jitter=0.3)
    demands = us_lower48_sampler(N_DEMAND, jitter=0.0)
    demand_qty = np.full(N_DEMAND, DEMAND_PER_POINT, dtype=int)
    dc_capacity = np.full(N_DCS, DC_CAPACITY, dtype=int)

    # Distances + strict masks (NO fallback)
    D = build_distance_matrix(demands, dcs)
    strict_mask = (D <= MAX_LANE_MILES)
    orphan_rows = np.where(~strict_mask.any(axis=1))[0]
    if orphan_rows.size > 0:
        raise RuntimeError(f"{orphan_rows.size} demand nodes have NO DC within {MAX_LANE_MILES} miles. Increase MAX_LANE_MILES.")

    # Visuals (pre-solve)
    plot_points_and_lanes(dcs, demands, np.ones_like(D, bool), "All DC→Demand lanes (subset drawn)", FIG_ALL_LANES)
    plot_points_and_lanes(dcs, demands, strict_mask, f"Pruned lanes (≤ {MAX_LANE_MILES} miles)", FIG_PRUNED_LANES)

    orig_arcs = N_DEMAND * N_DCS
    pruned_arcs = int(strict_mask.sum())
    print(f"[ARCS] original={orig_arcs:,}  after_pruning={pruned_arcs:,}")

    # Solve BASE and report
    print("Solving BASE...")
    base = ProblemData(dcs, demands, demand_qty, dc_capacity, D, strict_mask)
    base_sol = solve_facility_location(base, verbose=False)
    report_solution_block("BASE", base_sol, D, demand_qty, OPEN_COST_UNITS)
    plot_dominant_assignments(dcs, demands, base_sol['X'], base_sol['open_dcs'], "BASE — dominant assignments", FIG_BASE_SOLUTION)

    # Supply-aware, demand-weighted clustering
    print(f"Clustering (supply-aware): mean≤{CLUSTER_MEAN_MILES} mi, max_demand≤{MAX_DEMAND_PER_CLUSTER}, diam≤{MAX_CLUSTER_DIAMETER} mi")
    cents, totals, labels, mean_dists, eff_thr = supply_aware_clustering(
        demands, demand_qty, D,
        mean_threshold=CLUSTER_MEAN_MILES,
        max_demand=MAX_DEMAND_PER_CLUSTER,
        max_diam=MAX_CLUSTER_DIAMETER,
        sig_k=SIG_K,
        target_min_clusters=TARGET_MIN_CLUSTERS,
        refine_steps=AUTO_REFINE_STEPS,
        tighten_factor=TIGHTEN_FACTOR
    )
    print(f"  Clusters formed: {cents.shape[0]} (from {N_DEMAND})  |  eff_mean_threshold={eff_thr:.1f} mi  |  "
          f"median mean={np.median(mean_dists):.1f} mi, p90={np.percentile(mean_dists,90):.1f} mi, max={np.max(mean_dists):.1f} mi")
    plot_cluster_links(demands, cents, labels, "Demand → Cluster centroid (membership links)", FIG_CLUSTER_LINKS)

    # Reduced problem (DC ↔ clusters)
    D2 = build_distance_matrix(cents, dcs)
    strict_mask_2 = (D2 <= MAX_LANE_MILES)
    orphan_clusters = np.where(~strict_mask_2.any(axis=1))[0]
    if orphan_clusters.size > 0:
        raise RuntimeError(f"{orphan_clusters.size} clusters have NO DC within {MAX_LANE_MILES} miles. Increase MAX_LANE_MILES or tighten clustering.")

    # Visuals: DC→cluster lanes and "matrix on a map"
    plot_points_and_lanes(dcs, cents, strict_mask_2, f"Reduced lanes (clusters, ≤ {MAX_LANE_MILES} miles)", FIG_REDUCED_LANES)
    # all allowed arcs, skinny like the ALL-lanes figure
    plot_dc_cluster_matrix_map(
        dcs, cents, totals, D2, strict_mask_2,
        title=f"DC → Cluster matrix (all allowed, skinny, ≤ {MAX_LANE_MILES} mi)",
        outfile="fig_dc_cluster_matrix_all.png",
        mode="all",
        style="fixed", lw_fixed=0.25, alpha_fixed=0.25
    )

    # top-3 per cluster, same skinny style for apples-to-apples comparison
    plot_dc_cluster_matrix_map(
        dcs, cents, totals, D2, strict_mask_2,
        title=f"DC → Cluster matrix (top-3, skinny, ≤ {MAX_LANE_MILES} mi)",
        outfile="fig_dc_cluster_matrix_top3.png",
        mode="topk", k=3,
        style="fixed", lw_fixed=0.25, alpha_fixed=0.25
    )

    clustered_arcs = int(strict_mask_2.sum())
    print(f"[ARCS] after_clustering={clustered_arcs:,}")

    # Solve REDUCED and report
    print("Solving REDUCED...")
    red = ProblemData(dcs, cents, totals, dc_capacity, D2, strict_mask_2)
    red_sol = solve_facility_location(red, verbose=False)
    report_solution_block("REDUCED", red_sol, D2, totals, OPEN_COST_UNITS)
    plot_dominant_assignments(dcs, cents, red_sol['X'], red_sol['open_dcs'], "REDUCED — dominant assignments", FIG_REDUCED_SOLUTION)

    # Decision stability
    base_set, red_set = set(base_sol['open_dcs']), set(red_sol['open_dcs'])
    jaccard = len(base_set & red_set) / max(1, len(base_set | red_set))
    print(f"[Stability] Jaccard(base_open, reduced_open) = {jaccard:.3f}  | base={sorted(base_set)}  reduced={sorted(red_set)}")

    # Summary
    lane_reduction = 1.0 - (red_sol['n_arcs'] / max(1, base_sol['n_arcs']))
    obj_gap_pct = 100.0 * (red_sol['objective'] - base_sol['objective']) / (abs(base_sol['objective']) + 1e-9)
    print("=== BEFORE vs AFTER (summary) ===")
    print(f"Nodes:     {N_DEMAND} → {cents.shape[0]}")
    print(f"Arcs:      {base_sol['n_arcs']} → {red_sol['n_arcs']}  (Δ ≈ {lane_reduction*100:.1f}% fewer arcs)")
    print(f"Open DCs:  {len(base_sol['open_dcs'])} → {len(red_sol['open_dcs'])}")
    print(f"Objective: {base_sol['objective']:.0f} → {red_sol['objective']:.0f}  (gap ≈ {obj_gap_pct:.2f}%)")
    print(f"[ARCS SUMMARY] original={orig_arcs:,}  pruned={pruned_arcs:,}  clustered={clustered_arcs:,}")

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	Decision-preserving dimensionality reduction for supply-chain network design.

	What this script does:
	• Strict 1,000-mile lane cap (no k-nearest fallback).
	• Supply-aware clustering: only merge demand points sharing the same TOP-2 nearest DC signature.
	• Demand-weighted clustering guardrails:
	- Mean distance to centroid ≤ CLUSTER_MEAN_MILES
	- Max demand per cluster ≤ MAX_DEMAND_PER_CLUSTER
	- Max cluster diameter ≤ MAX_CLUSTER_DIAMETER
	• Auto-refine if clusters are too few (tightens threshold a bit and re-clusters).
	• Facility-location MILP (PuLP/CBC): shipping unit-miles + fixed open cost.
	• Visuals, including DC→cluster “transportation matrix on a map”.
	• Restored solution report: Objective (unit-miles), Arcs, Time, etc.

	Outputs (PNG files):
	- fig_all_lanes.png
	- fig_pruned_lanes.png
	- fig_cluster_links.png
	- fig_reduced_lanes.png
	- fig_dc_cluster_matrix_all.png
	- fig_dc_cluster_matrix_top3.png
	- fig_base_solution.png
	- fig_reduced_solution.png
	"""

	import math
	import time
	import random
	from dataclasses import dataclass
	from typing import Dict, Tuple, List

	import numpy as np
	import matplotlib.pyplot as plt

	try:
	import pulp
	PULP_AVAILABLE = True
	except Exception:
	PULP_AVAILABLE = False

	# ---------------- Settings ----------------
	SEED = 42
	random.seed(SEED); np.random.seed(SEED)

	# Problem size
	N_DEMAND, N_DCS = 500, 10
	DEMAND_PER_POINT, DC_CAPACITY = 100, 12_500
	TOTAL_DEMAND = N_DEMAND * DEMAND_PER_POINT
	EXPECTED_DCS_OPEN = math.ceil(TOTAL_DEMAND / DC_CAPACITY) # ~4

	# Lanes (STRICT)
	MAX_LANE_MILES = 1000 # bump if any demand/cluster has no DC within this cap

	# Supply-aware clustering parameters
	SIG_K = 2 # top-2 nearest DCs define a signature bucket
	CLUSTER_MEAN_MILES = 180 # demand-weighted mean distance to centroid guardrail
	MAX_DEMAND_PER_CLUSTER = 2_000 # units (≈20 points at 100 each)
	MAX_CLUSTER_DIAMETER = 500 # miles (approx guardrail)
	TARGET_MIN_CLUSTERS = 60 # if fewer formed, auto-tighten thresholds
	AUTO_REFINE_STEPS = 2 # max refinement iterations
	TIGHTEN_FACTOR = 0.9 # multiply mean threshold by this when refining

	# Economics
	OPEN_COST_UNITS = 2.0e5
	# ALLOW_AT_MOST = EXPECTED_DCS_OPEN + 1 # optional; comment out to remove the cap

	# Figures
	FIG_ALL_LANES = "fig_all_lanes.png"
	FIG_PRUNED_LANES = "fig_pruned_lanes.png"
	FIG_CLUSTER_LINKS = "fig_cluster_links.png"
	FIG_REDUCED_LANES = "fig_reduced_lanes.png"
	FIG_DC_CLUSTER_ALL = "fig_dc_cluster_matrix_all.png"
	FIG_DC_CLUSTER_TOP3 = "fig_dc_cluster_matrix_top3.png"
	FIG_BASE_SOLUTION = "fig_base_solution.png"
	FIG_REDUCED_SOLUTION = "fig_reduced_solution.png"


	# ---------------- Utils ----------------
	def haversine_miles(lat1, lon1, lat2, lon2):
	R = 3958.8
	phi1, phi2 = math.radians(lat1), math.radians(lat2)
	dphi = math.radians(lat2 - lat1)
	dlambda = math.radians(lon2 - lon1)
	a = math.sin(dphi/2)*2 + math.cos(phi1)math.cos(phi2)math.sin(dlambda/2)*2
	return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))

	def us_lower48_sampler(n, jitter=0.0):
	rng = np.random.default_rng(SEED)
	LAT_MIN, LAT_MAX = 25.0, 49.5
	LON_MIN, LON_MAX = -124.8, -66.9
	pts = []
	attempts = 0
	while len(pts) < n:
	attempts += 1
	lat = rng.uniform(LAT_MIN, LAT_MAX)
	lon = rng.uniform(LON_MIN, LON_MAX)
	in_gulf = (lat < 26.0) and (-97.0 < lon < -81.0)
	if in_gulf:
	continue
	pts.append((lat + rng.normal(0, jitter), lon + rng.normal(0, jitter)))
	if attempts > n * 10000:
	raise RuntimeError("Sampler struggling; relax rules.")
	return np.array(pts)

	def build_distance_matrix(A, B):
	return np.array([[haversine_miles(a[0], a[1], b[0], b[1]) for b in B] for a in A])

	def plot_points_and_lanes(dcs, demands, mask, title, outfile, max_lines=6000):
	plt.figure(figsize=(10, 6))
	plt.scatter(demands[:,1], demands[:,0], s=8, alpha=0.6, label="Demand/Clusters")
	plt.scatter(dcs[:,1], dcs[:,0], marker="^", s=90, label="DCs")
	nI, nJ = mask.shape
	drawn = 0
	for i in range(nI):
	if drawn >= max_lines: break
	for j in range(nJ):
	if mask[i,j]:
	plt.plot([demands[i,1], dcs[j,1]],[demands[i,0], dcs[j,0]], lw=0.25, alpha=0.25)
	drawn += 1
	if drawn >= max_lines: break
	plt.title(title); plt.xlabel("Longitude"); plt.ylabel("Latitude")
	plt.legend(loc="upper right"); plt.tight_layout(); plt.savefig(outfile, dpi=180); plt.close()

	def plot_cluster_links(points, centroids, labels, title, outfile, max_lines=100000):
	plt.figure(figsize=(10, 6))
	plt.scatter(points[:,1], points[:,0], s=8, alpha=0.5, label="Demand")
	plt.scatter(centroids[:,1], centroids[:,0], marker="X", s=120, alpha=0.9, label="Cluster centers", color="blue")
	drawn = 0
	for i in range(points.shape[0]):
	c = labels[i]
	plt.plot([points[i,1], centroids[c,1]],[points[i,0], centroids[c,0]], lw=0.25, alpha=0.25)
	drawn += 1
	if drawn >= max_lines: break
	plt.title(title); plt.xlabel("Longitude"); plt.ylabel("Latitude")
	plt.legend(loc="upper right"); plt.tight_layout(); plt.savefig(outfile, dpi=200); plt.close()

	def plot_dominant_assignments(dcs, points, X, open_dcs, title, outfile, max_lines=6000):
	plt.figure(figsize=(10, 6))
	plt.scatter(points[:,1], points[:,0], s=8, alpha=0.6, label="Demand/Clusters")
	closed = [j for j in range(dcs.shape[0]) if j not in open_dcs]
	if closed: plt.scatter(dcs[closed,1], dcs[closed,0], marker="^", s=70, alpha=0.9, label="DCs (closed)")
	if open_dcs: plt.scatter(dcs[open_dcs,1], dcs[open_dcs,0], marker="*", s=180, alpha=0.95, label="DCs (open)")
	if X.size > 0:
	chosen = np.argmax(X, axis=1)
	drawn = 0
	for i, j in enumerate(chosen):
	plt.plot([points[i,1], dcs[j,1]],[points[i,0], dcs[j,0]], lw=0.35, alpha=0.35)
	drawn += 1
	if drawn >= max_lines: break
	plt.title(title); plt.xlabel("Longitude"); plt.ylabel("Latitude")
	plt.legend(loc="upper right"); plt.tight_layout(); plt.savefig(outfile, dpi=200); plt.close()

	def plot_dc_cluster_matrix_map(dcs, cents, totals, D2, mask,
	title, outfile,
	mode="topk", k=3,
	style="fixed", # "fixed" or "scaled"
	lw_fixed=0.25, # match ALL-lanes plot
	alpha_fixed=0.25, # match ALL-lanes plot
	linewidth_min=0.4, linewidth_max=2.5):
	"""
	DC→cluster 'matrix on a map'.
	style="fixed": every arc uses lw_fixed/alpha_fixed (best for A/B visual comparability).
	style="scaled": line width scales with cluster demand (previous behavior).
	"""
	import numpy as np
	import matplotlib.pyplot as plt

	I, J = mask.shape

	# --- choose linewidths/alpha ---
	if style == "fixed":
	linewidths = np.full(I, float(lw_fixed))
	def alpha_for(i, j): return float(alpha_fixed)
	else:
	# demand-scaled (NumPy 2-safe)
	w = np.asarray(totals, dtype=float).reshape(-1)
	rng = float(np.ptp(w))
	s = np.zeros_like(w) if rng == 0.0 else (w - float(np.min(w))) / rng
	linewidths = linewidth_min + (linewidth_max - linewidth_min) * s
	def alpha_for(i, j):
	return 0.25 + 0.6 * (1.0 - D2[i, j] / (MAX_LANE_MILES + 1e-9))

	# --- plot ---
	plt.figure(figsize=(10, 6))
	plt.scatter(cents[:,1], cents[:,0], marker="X", s=120, alpha=0.9, label="Clusters")
	plt.scatter(dcs[:,1], dcs[:,0], marker="^", s=90, alpha=0.9, label="DCs")

	for i in range(I):
	allowed = np.where(mask[i])[0]
	if allowed.size == 0:
	continue
	if mode == "topk" and allowed.size > k:
	ord_idx = np.argsort(D2[i, allowed])[:k]
	cols = allowed[ord_idx]
	else:
	cols = allowed
	for j in cols:
	plt.plot([cents[i,1], dcs[j,1]],
	[cents[i,0], dcs[j,0]],
	lw=float(linewidths[i]), alpha=alpha_for(i, j))

	plt.title(title)
	plt.xlabel("Longitude"); plt.ylabel("Latitude")
	plt.legend(loc="upper right")
	plt.tight_layout()
	plt.savefig(outfile, dpi=200)
	plt.close()


	# --------------- Supply-aware clustering ---------------
	def topk_dc_signature(D_row: np.ndarray, k: int = 2) -> Tuple[int, ...]:
	"""Return indices of top-k nearest DCs by distance (regardless of cap)."""
	order = np.argsort(D_row)[:k]
	return tuple(int(x) for x in order)

	def cluster_bucket(points: np.ndarray, qty: np.ndarray, mean_threshold: float,
	max_demand: int, max_diam: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[float]]:
	"""Greedy, demand-weighted clustering inside a prefiltered bucket of points."""
	n = points.shape[0]
	remaining = set(range(n))
	labels = -np.ones(n, dtype=int)
	cents, totals, means = [], [], []
	k = 0

	# Precompute pairwise distances for diameter checks
	pair = np.zeros((n, n), dtype=float)
	for i in range(n):
	for j in range(i+1, n):
	d = haversine_miles(points[i,0], points[i,1], points[j,0], points[j,1])
	pair[i,j] = pair[j,i] = d

	def weighted_centroid(ids):
	w = qty[np.array(ids)].astype(float)
	lat = float(np.average(points[np.array(ids),0], weights=w))
	lon = float(np.average(points[np.array(ids),1], weights=w))
	return np.array([lat, lon], dtype=float)

	def mean_dist(ids, center):
	w = qty[np.array(ids)].astype(float)
	dists = np.array([haversine_miles(points[t,0], points[t,1], center[0], center[1]) for t in ids])
	return float(np.sum(w * dists) / np.sum(w))

	def diameter(ids):
	if len(ids) <= 1:
	return 0.0
	idx = np.array(ids, dtype=int)
	return float(np.max(pair[np.ix_(idx, idx)]))

	while remaining:
	seed = max(remaining, key=lambda i: qty[i])
	cluster = [seed]
	centroid = weighted_centroid(cluster)
	mean_d = 0.0
	demand_sum = int(qty[seed])
	candidates = sorted(list(remaining - set(cluster)), key=lambda i: pair[seed, i])
	improved = True
	while improved and candidates:
	improved = False
	for i in list(candidates):
	trial = cluster + [i]
	if demand_sum + int(qty[i]) > max_demand:
	candidates.remove(i); continue
	c_new = weighted_centroid(trial)
	mean_new = mean_dist(trial, c_new)
	if mean_new > mean_threshold:
	candidates.remove(i); continue
	diam_new = diameter(trial)
	if diam_new > max_diam:
	candidates.remove(i); continue
	# accept
	cluster.append(i)
	centroid = c_new
	mean_d = mean_new
	demand_sum += int(qty[i])
	candidates.remove(i)
	candidates = sorted(candidates, key=lambda idx: haversine_miles(points[idx,0], points[idx,1], centroid[0], centroid[1]))
	improved = True
	break

	for i in cluster:
	labels[i] = k
	remaining.discard(i)
	cents.append((centroid[0], centroid[1]))
	totals.append(demand_sum)
	means.append(mean_d)
	k += 1

	return np.array(cents), np.array(totals, dtype=int), labels, means

	def supply_aware_clustering(points: np.ndarray, qty: np.ndarray, D_to_dcs: np.ndarray,
	mean_threshold: float, max_demand: int, max_diam: float,
	sig_k: int = 2, target_min_clusters: int = 0, refine_steps: int = 0, tighten_factor: float = 0.9):
	"""Cluster inside top-k nearest DC signature buckets; optionally auto-refine if too few clusters."""
	n = points.shape[0]
	# 1) Build signature buckets
	sigs = [topk_dc_signature(D_to_dcs[i], sig_k) for i in range(n)]
	buckets = {}
	for i, sig in enumerate(sigs):
	buckets.setdefault(sig, []).append(i)

	def run_once(threshold):
	all_cents = []; all_totals = []; all_labels = -np.ones(n, dtype=int)
	all_means = []
	next_cluster_id = 0
	for sig, idxs in buckets.items():
	sub_pts = points[np.array(idxs)]
	sub_qty = qty[np.array(idxs)]
	cents, totals, labels, means = cluster_bucket(sub_pts, sub_qty, threshold, max_demand, max_diam)
	# remap labels to global ids
	for local, gidx in enumerate(idxs):
	all_labels[gidx] = next_cluster_id + labels[local]
	next_cluster_id += int(labels.max()) + 1
	all_cents.extend(list(map(tuple, cents)))
	all_totals.extend(list(totals))
	all_means.extend(list(means))
	return np.array(all_cents), np.array(all_totals, dtype=int), all_labels, all_means

	# initial pass
	curr_thr = float(mean_threshold)
	cents, totals, labels, means = run_once(curr_thr)

	# auto-refine if too few clusters
	steps = 0
	while target_min_clusters and (cents.shape[0] < target_min_clusters) and (steps < refine_steps):
	curr_thr *= float(tighten_factor)
	cents, totals, labels, means = run_once(curr_thr)
	steps += 1

	return cents, totals, labels, means, curr_thr


	# ---------------- Model ----------------
	@dataclass
	class ProblemData:
	dcs: np.ndarray
	demands: np.ndarray
	demand_qty: np.ndarray
	dc_capacity: np.ndarray
	dist_miles: np.ndarray
	lane_mask: np.ndarray

	def solve_facility_location(data: ProblemData, verbose: bool=False) -> Dict:
	I, J = data.demands.shape[0], data.dcs.shape[0]
	demand, capacity, D, mask = data.demand_qty, data.dc_capacity, data.dist_miles, data.lane_mask
	cost = D * demand[:,None]
	arcs = [(i,j) for i in range(I) for j in range(J) if mask[i,j]]

	if PULP_AVAILABLE and arcs:
	x = pulp.LpVariable.dicts("x",(range(I),range(J)),0,1,cat=pulp.LpContinuous)
	y = pulp.LpVariable.dicts("y",range(J),0,1,cat=pulp.LpBinary)
	model = pulp.LpProblem("FLP", pulp.LpMinimize)
	model += pulp.lpSum(cost[i,j]x[i][j] for (i,j) in arcs) + OPEN_COST_UNITSpulp.lpSum(y[j] for j in range(J))
	for i in range(I):
	model += pulp.lpSum(x[i][j] for j in range(J) if mask[i,j]) == 1.0
	for j in range(J):
	model += pulp.lpSum(demand[i]x[i][j] for i in range(I) if mask[i,j]) <= capacity[j]y[j]
	try:
	model += pulp.lpSum(y[j] for j in range(J)) <= ALLOW_AT_MOST
	except NameError:
	pass
	for i in range(I):
	for j in range(J):
	if not mask[i,j]:
	model += x[i][j] == 0.0
	t0 = time.perf_counter()
	model.solve(pulp.PULP_CBC_CMD(msg=verbose))
	solve_time = time.perf_counter() - t0
	status = pulp.LpStatus[model.status]
	obj = pulp.value(model.objective) if pulp.value(model.objective) is not None else float("nan")
	X = np.zeros((I,J), dtype=float)
	for i in range(I):
	for j in range(J):
	val = x[i][j].value()
	X[i,j] = 0.0 if val is None else float(val)
	Y = np.array([y[j].value() or 0.0 for j in range(J)])
	open_dcs = [j for j in range(J) if Y[j] > 0.5]
	return dict(solver="PuLP-CBC", status=status, objective=obj, X=X, Y=Y,
	open_dcs=open_dcs, n_arcs=len(arcs), n_x_vars=len(arcs),
	n_y_vars=J, solve_time=solve_time)

	return dict(solver="None", status="Skipped", objective=float("nan"),
	X=np.zeros((I,J)), Y=np.zeros(J), open_dcs=[],
	n_arcs=len(arcs), n_x_vars=len(arcs), n_y_vars=J, solve_time=float("nan"))


	# ---------------- Reporter ----------------
	def report_solution_block(label, sol, D, qty, open_cost_units):
	"""
	Pretty-print the exact stats you wanted, including:
	- Solver/Status
	- Objective (total): shipping unit-miles + open-DC cost
	- Objective (unit-miles): shipping-only
	- Open-cost component
	- Open DCs list + count
	- Arcs and Solve time (s)
	- Avg assignment distance (mi/unit)
	"""
	shipping_um = float(np.sum(D * sol["X"] * qty[:, None])) if sol["X"].size else float("nan")
	open_cost = open_cost_units * len(sol["open_dcs"])
	avg_assign = shipping_um / float(qty.sum()) if qty.sum() > 0 else float("nan")
	print(f"{label}:")
	print(f" Solver: {sol['solver']} Status: {sol['status']}")
	print(f" Objective (total): {sol['objective']:.2f}")
	print(f" Objective (unit-miles): {shipping_um:.2f} \| Open-cost: {open_cost:.2f}")
	print(f" Avg assignment distance: {avg_assign:.1f} mi/unit")
	print(f" Open DCs: {sol['open_dcs']} (count={len(sol['open_dcs'])})")
	print(f" Arcs: {sol['n_arcs']} \| Solve time (s): {sol['solve_time']:.3f}")


	# ---------------- Main ----------------
	def main():
	print("=== Decision-Preserving Reduction — strict lanes + supply-aware clustering ===")
	dcs = us_lower48_sampler(N_DCS, jitter=0.3)
	demands = us_lower48_sampler(N_DEMAND, jitter=0.0)
	demand_qty = np.full(N_DEMAND, DEMAND_PER_POINT, dtype=int)
	dc_capacity = np.full(N_DCS, DC_CAPACITY, dtype=int)

	# Distances + strict masks (NO fallback)
	D = build_distance_matrix(demands, dcs)
	strict_mask = (D <= MAX_LANE_MILES)
	orphan_rows = np.where(~strict_mask.any(axis=1))[0]
	if orphan_rows.size > 0:
	raise RuntimeError(f"{orphan_rows.size} demand nodes have NO DC within {MAX_LANE_MILES} miles. Increase MAX_LANE_MILES.")

	# Visuals (pre-solve)
	plot_points_and_lanes(dcs, demands, np.ones_like(D, bool), "All DC→Demand lanes (subset drawn)", FIG_ALL_LANES)
	plot_points_and_lanes(dcs, demands, strict_mask, f"Pruned lanes (≤ {MAX_LANE_MILES} miles)", FIG_PRUNED_LANES)

	orig_arcs = N_DEMAND * N_DCS
	pruned_arcs = int(strict_mask.sum())
	print(f"[ARCS] original={orig_arcs:,} after_pruning={pruned_arcs:,}")

	# Solve BASE and report
	print("Solving BASE...")
	base = ProblemData(dcs, demands, demand_qty, dc_capacity, D, strict_mask)
	base_sol = solve_facility_location(base, verbose=False)
	report_solution_block("BASE", base_sol, D, demand_qty, OPEN_COST_UNITS)
	plot_dominant_assignments(dcs, demands, base_sol['X'], base_sol['open_dcs'], "BASE — dominant assignments", FIG_BASE_SOLUTION)

	# Supply-aware, demand-weighted clustering
	print(f"Clustering (supply-aware): mean≤{CLUSTER_MEAN_MILES} mi, max_demand≤{MAX_DEMAND_PER_CLUSTER}, diam≤{MAX_CLUSTER_DIAMETER} mi")
	cents, totals, labels, mean_dists, eff_thr = supply_aware_clustering(
	demands, demand_qty, D,
	mean_threshold=CLUSTER_MEAN_MILES,
	max_demand=MAX_DEMAND_PER_CLUSTER,
	max_diam=MAX_CLUSTER_DIAMETER,
	sig_k=SIG_K,
	target_min_clusters=TARGET_MIN_CLUSTERS,
	refine_steps=AUTO_REFINE_STEPS,
	tighten_factor=TIGHTEN_FACTOR
	)
	print(f" Clusters formed: {cents.shape[0]} (from {N_DEMAND}) \| eff_mean_threshold={eff_thr:.1f} mi \| "
	f"median mean={np.median(mean_dists):.1f} mi, p90={np.percentile(mean_dists,90):.1f} mi, max={np.max(mean_dists):.1f} mi")
	plot_cluster_links(demands, cents, labels, "Demand → Cluster centroid (membership links)", FIG_CLUSTER_LINKS)

	# Reduced problem (DC ↔ clusters)
	D2 = build_distance_matrix(cents, dcs)
	strict_mask_2 = (D2 <= MAX_LANE_MILES)
	orphan_clusters = np.where(~strict_mask_2.any(axis=1))[0]
	if orphan_clusters.size > 0:
	raise RuntimeError(f"{orphan_clusters.size} clusters have NO DC within {MAX_LANE_MILES} miles. Increase MAX_LANE_MILES or tighten clustering.")

	# Visuals: DC→cluster lanes and "matrix on a map"
	plot_points_and_lanes(dcs, cents, strict_mask_2, f"Reduced lanes (clusters, ≤ {MAX_LANE_MILES} miles)", FIG_REDUCED_LANES)
	# all allowed arcs, skinny like the ALL-lanes figure
	plot_dc_cluster_matrix_map(
	dcs, cents, totals, D2, strict_mask_2,
	title=f"DC → Cluster matrix (all allowed, skinny, ≤ {MAX_LANE_MILES} mi)",
	outfile="fig_dc_cluster_matrix_all.png",
	mode="all",
	style="fixed", lw_fixed=0.25, alpha_fixed=0.25
	)

	# top-3 per cluster, same skinny style for apples-to-apples comparison
	plot_dc_cluster_matrix_map(
	dcs, cents, totals, D2, strict_mask_2,
	title=f"DC → Cluster matrix (top-3, skinny, ≤ {MAX_LANE_MILES} mi)",
	outfile="fig_dc_cluster_matrix_top3.png",
	mode="topk", k=3,
	style="fixed", lw_fixed=0.25, alpha_fixed=0.25
	)

	clustered_arcs = int(strict_mask_2.sum())
	print(f"[ARCS] after_clustering={clustered_arcs:,}")

	# Solve REDUCED and report
	print("Solving REDUCED...")
	red = ProblemData(dcs, cents, totals, dc_capacity, D2, strict_mask_2)
	red_sol = solve_facility_location(red, verbose=False)
	report_solution_block("REDUCED", red_sol, D2, totals, OPEN_COST_UNITS)
	plot_dominant_assignments(dcs, cents, red_sol['X'], red_sol['open_dcs'], "REDUCED — dominant assignments", FIG_REDUCED_SOLUTION)

	# Decision stability
	base_set, red_set = set(base_sol['open_dcs']), set(red_sol['open_dcs'])
	jaccard = len(base_set & red_set) / max(1, len(base_set \| red_set))
	print(f"[Stability] Jaccard(base_open, reduced_open) = {jaccard:.3f} \| base={sorted(base_set)} reduced={sorted(red_set)}")

	# Summary
	lane_reduction = 1.0 - (red_sol['n_arcs'] / max(1, base_sol['n_arcs']))
	obj_gap_pct = 100.0 * (red_sol['objective'] - base_sol['objective']) / (abs(base_sol['objective']) + 1e-9)
	print("=== BEFORE vs AFTER (summary) ===")
	print(f"Nodes: {N_DEMAND} → {cents.shape[0]}")
	print(f"Arcs: {base_sol['n_arcs']} → {red_sol['n_arcs']} (Δ ≈ {lane_reduction*100:.1f}% fewer arcs)")
	print(f"Open DCs: {len(base_sol['open_dcs'])} → {len(red_sol['open_dcs'])}")
	print(f"Objective: {base_sol['objective']:.0f} → {red_sol['objective']:.0f} (gap ≈ {obj_gap_pct:.2f}%)")
	print(f"[ARCS SUMMARY] original={orig_arcs:,} pruned={pruned_arcs:,} clustered={clustered_arcs:,}")

	if __name__ == "__main__":
	main()
No results found