Last active
November 29, 2025 18:16
-
-
Save mattt/c854c34d014307aa93962da8a8cc5561 to your computer and use it in GitHub Desktop.
A survey of various compactness metrics for counties in the United States
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "geopandas>=1.1.1", | |
| # "numpy>=2.3.5", | |
| # "scipy>=1.16.3", | |
| # "shapely>=2.1.2", | |
| # ] | |
| # /// | |
| import geopandas as gpd | |
| import numpy as np | |
| import pandas as pd | |
| from shapely.geometry import Point | |
| # Load US county geometries from data folder | |
| # https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2025&layergroup=Counties+%28and+equivalent%29 | |
| counties = gpd.read_file("data/tl_2025_us_county/tl_2025_us_county.shp") | |
| # FIPS code to state name mapping | |
| FIPS_TO_STATE = { | |
| "01": "Alabama", | |
| "02": "Alaska", | |
| "04": "Arizona", | |
| "05": "Arkansas", | |
| "06": "California", | |
| "08": "Colorado", | |
| "09": "Connecticut", | |
| "10": "Delaware", | |
| "11": "District of Columbia", | |
| "12": "Florida", | |
| "13": "Georgia", | |
| "15": "Hawaii", | |
| "16": "Idaho", | |
| "17": "Illinois", | |
| "18": "Indiana", | |
| "19": "Iowa", | |
| "20": "Kansas", | |
| "21": "Kentucky", | |
| "22": "Louisiana", | |
| "23": "Maine", | |
| "24": "Maryland", | |
| "25": "Massachusetts", | |
| "26": "Michigan", | |
| "27": "Minnesota", | |
| "28": "Mississippi", | |
| "29": "Missouri", | |
| "30": "Montana", | |
| "31": "Nebraska", | |
| "32": "Nevada", | |
| "33": "New Hampshire", | |
| "34": "New Jersey", | |
| "35": "New Mexico", | |
| "36": "New York", | |
| "37": "North Carolina", | |
| "38": "North Dakota", | |
| "39": "Ohio", | |
| "40": "Oklahoma", | |
| "41": "Oregon", | |
| "42": "Pennsylvania", | |
| "44": "Rhode Island", | |
| "45": "South Carolina", | |
| "46": "South Dakota", | |
| "47": "Tennessee", | |
| "48": "Texas", | |
| "49": "Utah", | |
| "50": "Vermont", | |
| "51": "Virginia", | |
| "53": "Washington", | |
| "54": "West Virginia", | |
| "55": "Wisconsin", | |
| "56": "Wyoming", | |
| "60": "American Samoa", | |
| "66": "Guam", | |
| "69": "Northern Mariana Islands", | |
| "72": "Puerto Rico", | |
| "78": "U.S. Virgin Islands", | |
| } | |
| # Add state name column if it doesn't exist | |
| if "STATENAME" not in counties.columns or counties["STATENAME"].isna().all(): | |
| counties["STATENAME"] = counties["STATEFP"].map(FIPS_TO_STATE) | |
| def polsby_popper(geometry): | |
| """Polsby-Popper: 4π × (Area / Perimeter²)""" | |
| area = geometry.area | |
| perimeter = geometry.length | |
| return (4 * np.pi * area) / (perimeter**2) | |
| def schwartzberg(geometry): | |
| """Schwartzberg: 1 / (Perimeter / Circumference of equal-area circle)""" | |
| area = geometry.area | |
| perimeter = geometry.length | |
| circumference = 2 * np.pi * np.sqrt(area / np.pi) | |
| return circumference / perimeter | |
| def reock(geometry): | |
| """Reock: Area / Area of minimum bounding circle""" | |
| # Get minimum bounding circle using the minimum_rotated_rectangle approach | |
| # For a more accurate MBC, we use the convex hull and find the smallest circle | |
| # that contains all points | |
| coords = list( | |
| geometry.convex_hull.exterior.coords[:-1] | |
| ) # Remove duplicate last point | |
| if len(coords) < 2: | |
| return 1.0 | |
| # Find the two points farthest apart (diameter of minimum bounding circle) | |
| max_dist = 0 | |
| for i, p1 in enumerate(coords): | |
| for p2 in coords[i + 1 :]: | |
| dist = Point(p1).distance(Point(p2)) | |
| if dist > max_dist: | |
| max_dist = dist | |
| mbc_area = np.pi * (max_dist / 2) ** 2 | |
| return geometry.area / mbc_area if mbc_area > 0 else 0 | |
| def convex_hull_score(geometry): | |
| """Convex Hull: Area / Area of convex hull""" | |
| hull = geometry.convex_hull | |
| return geometry.area / hull.area | |
| # Apply to all counties | |
| counties["polsby_popper"] = counties.geometry.apply(polsby_popper) | |
| counties["schwartzberg"] = counties.geometry.apply(schwartzberg) | |
| counties["reock"] = counties.geometry.apply(reock) | |
| counties["convex_hull"] = counties.geometry.apply(convex_hull_score) | |
| # Display summary statistics | |
| print(f"Loaded {len(counties)} counties") | |
| print("\nCompactness Statistics:") | |
| print( | |
| f"Polsby-Popper: mean={counties['polsby_popper'].mean():.4f}, median={counties['polsby_popper'].median():.4f}" | |
| ) | |
| print( | |
| f"Schwartzberg: mean={counties['schwartzberg'].mean():.4f}, median={counties['schwartzberg'].median():.4f}" | |
| ) | |
| print( | |
| f"Reock: mean={counties['reock'].mean():.4f}, median={counties['reock'].median():.4f}" | |
| ) | |
| print( | |
| f"Convex Hull: mean={counties['convex_hull'].mean():.4f}, median={counties['convex_hull'].median():.4f}" | |
| ) | |
| # Top 50 most irregular counties (lowest compactness scores) | |
| # Sort by Polsby-Popper (ascending - lower = more irregular) | |
| most_irregular = counties.nsmallest(50, "polsby_popper") | |
| print("\n" + "=" * 80) | |
| print("Top 50 Most Irregular Counties (by Polsby-Popper score)") | |
| print("=" * 80) | |
| print( | |
| f"{'Rank':<6} {'County':<30} {'State':<15} {'Polsby-Popper':<15} {'Schwartzberg':<15} {'Reock':<15} {'Convex Hull':<15}" | |
| ) | |
| print("-" * 80) | |
| for idx, (_, county) in enumerate(most_irregular.iterrows(), 1): | |
| county_name = county.get("NAME", "Unknown") | |
| state_fp = county.get("STATEFP", "") | |
| state_name = county.get("STATENAME", "") | |
| # Use state name from mapping if not in data | |
| if not state_name or pd.isna(state_name): | |
| state_name = FIPS_TO_STATE.get( | |
| state_fp, f"FIPS {state_fp}" if state_fp else "Unknown" | |
| ) | |
| state_display = state_name | |
| print( | |
| f"{idx:<6} {county_name:<30} {state_display:<15} " | |
| f"{county['polsby_popper']:<15.6f} {county['schwartzberg']:<15.6f} " | |
| f"{county['reock']:<15.6f} {county['convex_hull']:<15.6f}" | |
| ) | |
| # Print results for Multnomah County, OR | |
| multnomah = counties[ | |
| (counties["NAME"].str.contains("Multnomah", case=False, na=False)) | |
| & (counties["STATEFP"] == "41") # Oregon FIPS code | |
| ] | |
| if len(multnomah) == 0: | |
| # Try alternative field names | |
| multnomah = counties[ | |
| (counties["NAME"].str.contains("Multnomah", case=False, na=False)) | |
| | (counties["NAMELSAD"].str.contains("Multnomah", case=False, na=False)) | |
| ] | |
| if len(multnomah) > 0: | |
| county = multnomah.iloc[0] | |
| total_counties = len(counties) | |
| # Calculate rankings (1 = most irregular, total_counties = most compact) | |
| pp_rank = (counties["polsby_popper"] < county["polsby_popper"]).sum() + 1 | |
| sch_rank = (counties["schwartzberg"] < county["schwartzberg"]).sum() + 1 | |
| reock_rank = (counties["reock"] < county["reock"]).sum() + 1 | |
| ch_rank = (counties["convex_hull"] < county["convex_hull"]).sum() + 1 | |
| # Calculate percentiles (lower percentile = more irregular) | |
| pp_percentile = (pp_rank / total_counties) * 100 | |
| sch_percentile = (sch_rank / total_counties) * 100 | |
| reock_percentile = (reock_rank / total_counties) * 100 | |
| ch_percentile = (ch_rank / total_counties) * 100 | |
| # Get medians and means for comparison | |
| pp_median = counties["polsby_popper"].median() | |
| pp_mean = counties["polsby_popper"].mean() | |
| sch_median = counties["schwartzberg"].median() | |
| sch_mean = counties["schwartzberg"].mean() | |
| reock_median = counties["reock"].median() | |
| reock_mean = counties["reock"].mean() | |
| ch_median = counties["convex_hull"].median() | |
| ch_mean = counties["convex_hull"].mean() | |
| print("\n" + "=" * 80) | |
| print("Multnomah County, OR - Compactness Analysis") | |
| print("=" * 80) | |
| print( | |
| f"\n{'Metric':<20} {'Score':<15} {'Rank':<10} {'Percentile':<12} {'vs Median':<15} {'vs Mean':<15}" | |
| ) | |
| print("-" * 80) | |
| print( | |
| f"{'Polsby-Popper':<20} {county['polsby_popper']:<15.6f} " | |
| f"{pp_rank}/{total_counties:<9} {pp_percentile:<11.2f}% " | |
| f"{county['polsby_popper'] / pp_median:<14.3f}x " | |
| f"{county['polsby_popper'] / pp_mean:<14.3f}x" | |
| ) | |
| print( | |
| f"{'Schwartzberg':<20} {county['schwartzberg']:<15.6f} " | |
| f"{sch_rank}/{total_counties:<9} {sch_percentile:<11.2f}% " | |
| f"{county['schwartzberg'] / sch_median:<14.3f}x " | |
| f"{county['schwartzberg'] / sch_mean:<14.3f}x" | |
| ) | |
| print( | |
| f"{'Reock':<20} {county['reock']:<15.6f} " | |
| f"{reock_rank}/{total_counties:<9} {reock_percentile:<11.2f}% " | |
| f"{county['reock'] / reock_median:<14.3f}x " | |
| f"{county['reock'] / reock_mean:<14.3f}x" | |
| ) | |
| print( | |
| f"{'Convex Hull':<20} {county['convex_hull']:<15.6f} " | |
| f"{ch_rank}/{total_counties:<9} {ch_percentile:<11.2f}% " | |
| f"{county['convex_hull'] / ch_median:<14.3f}x " | |
| f"{county['convex_hull'] / ch_mean:<14.3f}x" | |
| ) | |
| # Overall irregularity assessment | |
| avg_percentile = ( | |
| pp_percentile + sch_percentile + reock_percentile + ch_percentile | |
| ) / 4 | |
| avg_rank = (pp_rank + sch_rank + reock_rank + ch_rank) / 4 | |
| print("\n" + "-" * 80) | |
| print("Summary:") | |
| print(f" Average Rank: {avg_rank:.1f} out of {total_counties} counties") | |
| print(f" Average Percentile: {avg_percentile:.1f}%") | |
| print("=" * 80) | |
| else: | |
| print("\nMultnomah County, OR not found. Available columns:") | |
| print(counties.columns.tolist()) | |
| # All Oregon counties | |
| oregon_counties = counties[counties["STATEFP"] == "41"].copy() | |
| oregon_counties = oregon_counties.sort_values("polsby_popper", ascending=True) | |
| print("\n" + "=" * 80) | |
| print("All Oregon Counties - Compactness Scores") | |
| print("=" * 80) | |
| print( | |
| f"{'Rank':<6} {'County':<30} {'Polsby-Popper':<15} {'Schwartzberg':<15} {'Reock':<15} {'Convex Hull':<15}" | |
| ) | |
| print("-" * 80) | |
| for idx, (_, county) in enumerate(oregon_counties.iterrows(), 1): | |
| county_name = county.get("NAME", "Unknown") | |
| print( | |
| f"{idx:<6} {county_name:<30} " | |
| f"{county['polsby_popper']:<15.6f} {county['schwartzberg']:<15.6f} " | |
| f"{county['reock']:<15.6f} {county['convex_hull']:<15.6f}" | |
| ) | |
| # Summary statistics for Oregon | |
| print("\n" + "-" * 80) | |
| print("Oregon Summary Statistics:") | |
| print(f" Total Counties: {len(oregon_counties)}") | |
| print( | |
| f" Polsby-Popper: mean={oregon_counties['polsby_popper'].mean():.4f}, " | |
| f"median={oregon_counties['polsby_popper'].median():.4f}" | |
| ) | |
| print( | |
| f" Schwartzberg: mean={oregon_counties['schwartzberg'].mean():.4f}, " | |
| f"median={oregon_counties['schwartzberg'].median():.4f}" | |
| ) | |
| print( | |
| f" Reock: mean={oregon_counties['reock'].mean():.4f}, " | |
| f"median={oregon_counties['reock'].median():.4f}" | |
| ) | |
| print( | |
| f" Convex Hull: mean={oregon_counties['convex_hull'].mean():.4f}, " | |
| f"median={oregon_counties['convex_hull'].median():.4f}" | |
| ) | |
| print("=" * 80) | |
| # Summary statistics by state | |
| state_summaries = [] | |
| for state_fp, state_name in FIPS_TO_STATE.items(): | |
| state_counties = counties[counties["STATEFP"] == state_fp] | |
| if len(state_counties) > 0: | |
| state_summaries.append( | |
| { | |
| "STATEFP": state_fp, | |
| "State": state_name, | |
| "Count": len(state_counties), | |
| "PP_Mean": state_counties["polsby_popper"].mean(), | |
| "PP_Median": state_counties["polsby_popper"].median(), | |
| "Sch_Mean": state_counties["schwartzberg"].mean(), | |
| "Sch_Median": state_counties["schwartzberg"].median(), | |
| "Reock_Mean": state_counties["reock"].mean(), | |
| "Reock_Median": state_counties["reock"].median(), | |
| "CH_Mean": state_counties["convex_hull"].mean(), | |
| "CH_Median": state_counties["convex_hull"].median(), | |
| } | |
| ) | |
| # Convert to DataFrame and sort by Polsby-Popper mean | |
| state_df = pd.DataFrame(state_summaries) | |
| state_df = state_df.sort_values("PP_Mean", ascending=True) | |
| print("\n" + "=" * 100) | |
| print("State Summary Statistics (sorted by Polsby-Popper mean)") | |
| print("=" * 100) | |
| print( | |
| f"{'State':<25} {'Counties':<10} {'PP Mean':<12} {'PP Median':<12} " | |
| f"{'Sch Mean':<12} {'Sch Median':<12} {'Reock Mean':<12} {'Reock Median':<12} " | |
| f"{'CH Mean':<12} {'CH Median':<12}" | |
| ) | |
| print("-" * 100) | |
| for _, row in state_df.iterrows(): | |
| print( | |
| f"{row['State']:<25} {int(row['Count']):<10} " | |
| f"{row['PP_Mean']:<12.6f} {row['PP_Median']:<12.6f} " | |
| f"{row['Sch_Mean']:<12.6f} {row['Sch_Median']:<12.6f} " | |
| f"{row['Reock_Mean']:<12.6f} {row['Reock_Median']:<12.6f} " | |
| f"{row['CH_Mean']:<12.6f} {row['CH_Median']:<12.6f}" | |
| ) | |
| print("=" * 100) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output: