mattt · November 29, 2025 18:16 · mattt · Nov 29, 2025
diff --git a/us-county-compactness.py b/us-county-compactness.py
 # /// script
 # requires-python = ">=3.12"
 # dependencies = [
 #     "geopandas>=1.1.1",
 #     "numpy>=2.3.5",
 #     "scipy>=1.16.3",
 #     "shapely>=2.1.2",
 # ]
 # ///

 import geopandas as gpd
 import numpy as np
 import pandas as pd
 from shapely.geometry import Point

 # Load US county geometries from data folder
 # https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2025&layergroup=Counties+%28and+equivalent%29
 counties = gpd.read_file("data/tl_2025_us_county/tl_2025_us_county.shp")

 # FIPS code to state name mapping
 FIPS_TO_STATE = {
    "01": "Alabama",
    "02": "Alaska",
    "04": "Arizona",
    "05": "Arkansas",
    "06": "California",
    "08": "Colorado",
    "09": "Connecticut",
    "10": "Delaware",
    "11": "District of Columbia",
    "12": "Florida",
    "13": "Georgia",
    "15": "Hawaii",
    "16": "Idaho",
    "17": "Illinois",
    "18": "Indiana",
    "19": "Iowa",
    "20": "Kansas",
    "21": "Kentucky",
    "22": "Louisiana",
    "23": "Maine",
    "24": "Maryland",
    "25": "Massachusetts",
    "26": "Michigan",
    "27": "Minnesota",
    "28": "Mississippi",
    "29": "Missouri",
    "30": "Montana",
    "31": "Nebraska",
    "32": "Nevada",
    "33": "New Hampshire",
    "34": "New Jersey",
    "35": "New Mexico",
    "36": "New York",
    "37": "North Carolina",
    "38": "North Dakota",
    "39": "Ohio",
    "40": "Oklahoma",
    "41": "Oregon",
    "42": "Pennsylvania",
    "44": "Rhode Island",
    "45": "South Carolina",
    "46": "South Dakota",
    "47": "Tennessee",
    "48": "Texas",
    "49": "Utah",
    "50": "Vermont",
    "51": "Virginia",
    "53": "Washington",
    "54": "West Virginia",
    "55": "Wisconsin",
    "56": "Wyoming",
    "60": "American Samoa",
    "66": "Guam",
    "69": "Northern Mariana Islands",
    "72": "Puerto Rico",
    "78": "U.S. Virgin Islands",
 }

 # Add state name column if it doesn't exist
 if "STATENAME" not in counties.columns or counties["STATENAME"].isna().all():
    counties["STATENAME"] = counties["STATEFP"].map(FIPS_TO_STATE)


 def polsby_popper(geometry):
    """Polsby-Popper: 4π × (Area / Perimeter²)"""
    area = geometry.area
    perimeter = geometry.length
    return (4 * np.pi * area) / (perimeter**2)


 def schwartzberg(geometry):
    """Schwartzberg: 1 / (Perimeter / Circumference of equal-area circle)"""
    area = geometry.area
    perimeter = geometry.length
    circumference = 2 * np.pi * np.sqrt(area / np.pi)
    return circumference / perimeter


 def reock(geometry):
    """Reock: Area / Area of minimum bounding circle"""
    # Get minimum bounding circle using the minimum_rotated_rectangle approach
    # For a more accurate MBC, we use the convex hull and find the smallest circle
    # that contains all points
    coords = list(
        geometry.convex_hull.exterior.coords[:-1]
    )  # Remove duplicate last point
    if len(coords) < 2:
        return 1.0

    # Find the two points farthest apart (diameter of minimum bounding circle)
    max_dist = 0
    for i, p1 in enumerate(coords):
        for p2 in coords[i + 1 :]:
            dist = Point(p1).distance(Point(p2))
            if dist > max_dist:
                max_dist = dist

    mbc_area = np.pi * (max_dist / 2) ** 2
    return geometry.area / mbc_area if mbc_area > 0 else 0


 def convex_hull_score(geometry):
    """Convex Hull: Area / Area of convex hull"""
    hull = geometry.convex_hull
    return geometry.area / hull.area


 # Apply to all counties
 counties["polsby_popper"] = counties.geometry.apply(polsby_popper)
 counties["schwartzberg"] = counties.geometry.apply(schwartzberg)
 counties["reock"] = counties.geometry.apply(reock)
 counties["convex_hull"] = counties.geometry.apply(convex_hull_score)

 # Display summary statistics
 print(f"Loaded {len(counties)} counties")
 print("\nCompactness Statistics:")
 print(
    f"Polsby-Popper: mean={counties['polsby_popper'].mean():.4f}, median={counties['polsby_popper'].median():.4f}"
 )
 print(
    f"Schwartzberg: mean={counties['schwartzberg'].mean():.4f}, median={counties['schwartzberg'].median():.4f}"
 )
 print(
    f"Reock: mean={counties['reock'].mean():.4f}, median={counties['reock'].median():.4f}"
 )
 print(
    f"Convex Hull: mean={counties['convex_hull'].mean():.4f}, median={counties['convex_hull'].median():.4f}"
 )

 # Top 50 most irregular counties (lowest compactness scores)
 # Sort by Polsby-Popper (ascending - lower = more irregular)
 most_irregular = counties.nsmallest(50, "polsby_popper")

 print("\n" + "=" * 80)
 print("Top 50 Most Irregular Counties (by Polsby-Popper score)")
 print("=" * 80)
 print(
    f"{'Rank':<6} {'County':<30} {'State':<15} {'Polsby-Popper':<15} {'Schwartzberg':<15} {'Reock':<15} {'Convex Hull':<15}"
 )
 print("-" * 80)

 for idx, (_, county) in enumerate(most_irregular.iterrows(), 1):
    county_name = county.get("NAME", "Unknown")
    state_fp = county.get("STATEFP", "")
    state_name = county.get("STATENAME", "")

    # Use state name from mapping if not in data
    if not state_name or pd.isna(state_name):
        state_name = FIPS_TO_STATE.get(
            state_fp, f"FIPS {state_fp}" if state_fp else "Unknown"
        )

    state_display = state_name

    print(
        f"{idx:<6} {county_name:<30} {state_display:<15} "
        f"{county['polsby_popper']:<15.6f} {county['schwartzberg']:<15.6f} "
        f"{county['reock']:<15.6f} {county['convex_hull']:<15.6f}"
    )


 # Print results for Multnomah County, OR
 multnomah = counties[
    (counties["NAME"].str.contains("Multnomah", case=False, na=False))
    & (counties["STATEFP"] == "41")  # Oregon FIPS code
 ]
 if len(multnomah) == 0:
    # Try alternative field names
    multnomah = counties[
        (counties["NAME"].str.contains("Multnomah", case=False, na=False))
        | (counties["NAMELSAD"].str.contains("Multnomah", case=False, na=False))
    ]

 if len(multnomah) > 0:
    county = multnomah.iloc[0]
    total_counties = len(counties)

    # Calculate rankings (1 = most irregular, total_counties = most compact)
    pp_rank = (counties["polsby_popper"] < county["polsby_popper"]).sum() + 1
    sch_rank = (counties["schwartzberg"] < county["schwartzberg"]).sum() + 1
    reock_rank = (counties["reock"] < county["reock"]).sum() + 1
    ch_rank = (counties["convex_hull"] < county["convex_hull"]).sum() + 1

    # Calculate percentiles (lower percentile = more irregular)
    pp_percentile = (pp_rank / total_counties) * 100
    sch_percentile = (sch_rank / total_counties) * 100
    reock_percentile = (reock_rank / total_counties) * 100
    ch_percentile = (ch_rank / total_counties) * 100

    # Get medians and means for comparison
    pp_median = counties["polsby_popper"].median()
    pp_mean = counties["polsby_popper"].mean()
    sch_median = counties["schwartzberg"].median()
    sch_mean = counties["schwartzberg"].mean()
    reock_median = counties["reock"].median()
    reock_mean = counties["reock"].mean()
    ch_median = counties["convex_hull"].median()
    ch_mean = counties["convex_hull"].mean()

    print("\n" + "=" * 80)
    print("Multnomah County, OR - Compactness Analysis")
    print("=" * 80)
    print(
        f"\n{'Metric':<20} {'Score':<15} {'Rank':<10} {'Percentile':<12} {'vs Median':<15} {'vs Mean':<15}"
    )
    print("-" * 80)

    print(
        f"{'Polsby-Popper':<20} {county['polsby_popper']:<15.6f} "
        f"{pp_rank}/{total_counties:<9} {pp_percentile:<11.2f}% "
        f"{county['polsby_popper'] / pp_median:<14.3f}x "
        f"{county['polsby_popper'] / pp_mean:<14.3f}x"
    )
    print(
        f"{'Schwartzberg':<20} {county['schwartzberg']:<15.6f} "
        f"{sch_rank}/{total_counties:<9} {sch_percentile:<11.2f}% "
        f"{county['schwartzberg'] / sch_median:<14.3f}x "
        f"{county['schwartzberg'] / sch_mean:<14.3f}x"
    )
    print(
        f"{'Reock':<20} {county['reock']:<15.6f} "
        f"{reock_rank}/{total_counties:<9} {reock_percentile:<11.2f}% "
        f"{county['reock'] / reock_median:<14.3f}x "
        f"{county['reock'] / reock_mean:<14.3f}x"
    )
    print(
        f"{'Convex Hull':<20} {county['convex_hull']:<15.6f} "
        f"{ch_rank}/{total_counties:<9} {ch_percentile:<11.2f}% "
        f"{county['convex_hull'] / ch_median:<14.3f}x "
        f"{county['convex_hull'] / ch_mean:<14.3f}x"
    )

    # Overall irregularity assessment
    avg_percentile = (
        pp_percentile + sch_percentile + reock_percentile + ch_percentile
    ) / 4
    avg_rank = (pp_rank + sch_rank + reock_rank + ch_rank) / 4

    print("\n" + "-" * 80)
    print("Summary:")
    print(f"  Average Rank: {avg_rank:.1f} out of {total_counties} counties")
    print(f"  Average Percentile: {avg_percentile:.1f}%")
    print("=" * 80)
 else:
    print("\nMultnomah County, OR not found. Available columns:")
    print(counties.columns.tolist())

 # All Oregon counties
 oregon_counties = counties[counties["STATEFP"] == "41"].copy()
 oregon_counties = oregon_counties.sort_values("polsby_popper", ascending=True)

 print("\n" + "=" * 80)
 print("All Oregon Counties - Compactness Scores")
 print("=" * 80)
 print(
    f"{'Rank':<6} {'County':<30} {'Polsby-Popper':<15} {'Schwartzberg':<15} {'Reock':<15} {'Convex Hull':<15}"
 )
 print("-" * 80)

 for idx, (_, county) in enumerate(oregon_counties.iterrows(), 1):
    county_name = county.get("NAME", "Unknown")
    print(
        f"{idx:<6} {county_name:<30} "
        f"{county['polsby_popper']:<15.6f} {county['schwartzberg']:<15.6f} "
        f"{county['reock']:<15.6f} {county['convex_hull']:<15.6f}"
    )

 # Summary statistics for Oregon
 print("\n" + "-" * 80)
 print("Oregon Summary Statistics:")
 print(f"  Total Counties: {len(oregon_counties)}")
 print(
    f"  Polsby-Popper: mean={oregon_counties['polsby_popper'].mean():.4f}, "
    f"median={oregon_counties['polsby_popper'].median():.4f}"
 )
 print(
    f"  Schwartzberg: mean={oregon_counties['schwartzberg'].mean():.4f}, "
    f"median={oregon_counties['schwartzberg'].median():.4f}"
 )
 print(
    f"  Reock: mean={oregon_counties['reock'].mean():.4f}, "
    f"median={oregon_counties['reock'].median():.4f}"
 )
 print(
    f"  Convex Hull: mean={oregon_counties['convex_hull'].mean():.4f}, "
    f"median={oregon_counties['convex_hull'].median():.4f}"
 )
 print("=" * 80)

 # Summary statistics by state
 state_summaries = []
 for state_fp, state_name in FIPS_TO_STATE.items():
    state_counties = counties[counties["STATEFP"] == state_fp]
    if len(state_counties) > 0:
        state_summaries.append(
            {
                "STATEFP": state_fp,
                "State": state_name,
                "Count": len(state_counties),
                "PP_Mean": state_counties["polsby_popper"].mean(),
                "PP_Median": state_counties["polsby_popper"].median(),
                "Sch_Mean": state_counties["schwartzberg"].mean(),
                "Sch_Median": state_counties["schwartzberg"].median(),
                "Reock_Mean": state_counties["reock"].mean(),
                "Reock_Median": state_counties["reock"].median(),
                "CH_Mean": state_counties["convex_hull"].mean(),
                "CH_Median": state_counties["convex_hull"].median(),
            }
        )

 # Convert to DataFrame and sort by Polsby-Popper mean
 state_df = pd.DataFrame(state_summaries)
 state_df = state_df.sort_values("PP_Mean", ascending=True)

 print("\n" + "=" * 100)
 print("State Summary Statistics (sorted by Polsby-Popper mean)")
 print("=" * 100)
 print(
    f"{'State':<25} {'Counties':<10} {'PP Mean':<12} {'PP Median':<12} "
    f"{'Sch Mean':<12} {'Sch Median':<12} {'Reock Mean':<12} {'Reock Median':<12} "
    f"{'CH Mean':<12} {'CH Median':<12}"
 )
 print("-" * 100)

 for _, row in state_df.iterrows():
    print(
        f"{row['State']:<25} {int(row['Count']):<10} "
        f"{row['PP_Mean']:<12.6f} {row['PP_Median']:<12.6f} "
        f"{row['Sch_Mean']:<12.6f} {row['Sch_Median']:<12.6f} "
        f"{row['Reock_Mean']:<12.6f} {row['Reock_Median']:<12.6f} "
        f"{row['CH_Mean']:<12.6f} {row['CH_Median']:<12.6f}"
    )

 print("=" * 100)
	# /// script
	# requires-python = ">=3.12"
	# dependencies = [
	# "geopandas>=1.1.1",
	# "numpy>=2.3.5",
	# "scipy>=1.16.3",
	# "shapely>=2.1.2",
	# ]
	# ///

	import geopandas as gpd
	import numpy as np
	import pandas as pd
	from shapely.geometry import Point

	# Load US county geometries from data folder
	# https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2025&layergroup=Counties+%28and+equivalent%29
	counties = gpd.read_file("data/tl_2025_us_county/tl_2025_us_county.shp")

	# FIPS code to state name mapping
	FIPS_TO_STATE = {
	"01": "Alabama",
	"02": "Alaska",
	"04": "Arizona",
	"05": "Arkansas",
	"06": "California",
	"08": "Colorado",
	"09": "Connecticut",
	"10": "Delaware",
	"11": "District of Columbia",
	"12": "Florida",
	"13": "Georgia",
	"15": "Hawaii",
	"16": "Idaho",
	"17": "Illinois",
	"18": "Indiana",
	"19": "Iowa",
	"20": "Kansas",
	"21": "Kentucky",
	"22": "Louisiana",
	"23": "Maine",
	"24": "Maryland",
	"25": "Massachusetts",
	"26": "Michigan",
	"27": "Minnesota",
	"28": "Mississippi",
	"29": "Missouri",
	"30": "Montana",
	"31": "Nebraska",
	"32": "Nevada",
	"33": "New Hampshire",
	"34": "New Jersey",
	"35": "New Mexico",
	"36": "New York",
	"37": "North Carolina",
	"38": "North Dakota",
	"39": "Ohio",
	"40": "Oklahoma",
	"41": "Oregon",
	"42": "Pennsylvania",
	"44": "Rhode Island",
	"45": "South Carolina",
	"46": "South Dakota",
	"47": "Tennessee",
	"48": "Texas",
	"49": "Utah",
	"50": "Vermont",
	"51": "Virginia",
	"53": "Washington",
	"54": "West Virginia",
	"55": "Wisconsin",
	"56": "Wyoming",
	"60": "American Samoa",
	"66": "Guam",
	"69": "Northern Mariana Islands",
	"72": "Puerto Rico",
	"78": "U.S. Virgin Islands",
	}

	# Add state name column if it doesn't exist
	if "STATENAME" not in counties.columns or counties["STATENAME"].isna().all():
	counties["STATENAME"] = counties["STATEFP"].map(FIPS_TO_STATE)


	def polsby_popper(geometry):
	"""Polsby-Popper: 4π × (Area / Perimeter²)"""
	area = geometry.area
	perimeter = geometry.length
	return (4 * np.pi * area) / (perimeter**2)


	def schwartzberg(geometry):
	"""Schwartzberg: 1 / (Perimeter / Circumference of equal-area circle)"""
	area = geometry.area
	perimeter = geometry.length
	circumference = 2 * np.pi * np.sqrt(area / np.pi)
	return circumference / perimeter


	def reock(geometry):
	"""Reock: Area / Area of minimum bounding circle"""
	# Get minimum bounding circle using the minimum_rotated_rectangle approach
	# For a more accurate MBC, we use the convex hull and find the smallest circle
	# that contains all points
	coords = list(
	geometry.convex_hull.exterior.coords[:-1]
	) # Remove duplicate last point
	if len(coords) < 2:
	return 1.0

	# Find the two points farthest apart (diameter of minimum bounding circle)
	max_dist = 0
	for i, p1 in enumerate(coords):
	for p2 in coords[i + 1 :]:
	dist = Point(p1).distance(Point(p2))
	if dist > max_dist:
	max_dist = dist

	mbc_area = np.pi * (max_dist / 2) ** 2
	return geometry.area / mbc_area if mbc_area > 0 else 0


	def convex_hull_score(geometry):
	"""Convex Hull: Area / Area of convex hull"""
	hull = geometry.convex_hull
	return geometry.area / hull.area


	# Apply to all counties
	counties["polsby_popper"] = counties.geometry.apply(polsby_popper)
	counties["schwartzberg"] = counties.geometry.apply(schwartzberg)
	counties["reock"] = counties.geometry.apply(reock)
	counties["convex_hull"] = counties.geometry.apply(convex_hull_score)

	# Display summary statistics
	print(f"Loaded {len(counties)} counties")
	print("\nCompactness Statistics:")
	print(
	f"Polsby-Popper: mean={counties['polsby_popper'].mean():.4f}, median={counties['polsby_popper'].median():.4f}"
	)
	print(
	f"Schwartzberg: mean={counties['schwartzberg'].mean():.4f}, median={counties['schwartzberg'].median():.4f}"
	)
	print(
	f"Reock: mean={counties['reock'].mean():.4f}, median={counties['reock'].median():.4f}"
	)
	print(
	f"Convex Hull: mean={counties['convex_hull'].mean():.4f}, median={counties['convex_hull'].median():.4f}"
	)

	# Top 50 most irregular counties (lowest compactness scores)
	# Sort by Polsby-Popper (ascending - lower = more irregular)
	most_irregular = counties.nsmallest(50, "polsby_popper")

	print("\n" + "=" * 80)
	print("Top 50 Most Irregular Counties (by Polsby-Popper score)")
	print("=" * 80)
	print(
	f"{'Rank':<6} {'County':<30} {'State':<15} {'Polsby-Popper':<15} {'Schwartzberg':<15} {'Reock':<15} {'Convex Hull':<15}"
	)
	print("-" * 80)

	for idx, (_, county) in enumerate(most_irregular.iterrows(), 1):
	county_name = county.get("NAME", "Unknown")
	state_fp = county.get("STATEFP", "")
	state_name = county.get("STATENAME", "")

	# Use state name from mapping if not in data
	if not state_name or pd.isna(state_name):
	state_name = FIPS_TO_STATE.get(
	state_fp, f"FIPS {state_fp}" if state_fp else "Unknown"
	)

	state_display = state_name

	print(
	f"{idx:<6} {county_name:<30} {state_display:<15} "
	f"{county['polsby_popper']:<15.6f} {county['schwartzberg']:<15.6f} "
	f"{county['reock']:<15.6f} {county['convex_hull']:<15.6f}"
	)


	# Print results for Multnomah County, OR
	multnomah = counties[
	(counties["NAME"].str.contains("Multnomah", case=False, na=False))
	& (counties["STATEFP"] == "41") # Oregon FIPS code
	]
	if len(multnomah) == 0:
	# Try alternative field names
	multnomah = counties[
	(counties["NAME"].str.contains("Multnomah", case=False, na=False))
	\| (counties["NAMELSAD"].str.contains("Multnomah", case=False, na=False))
	]

	if len(multnomah) > 0:
	county = multnomah.iloc[0]
	total_counties = len(counties)

	# Calculate rankings (1 = most irregular, total_counties = most compact)
	pp_rank = (counties["polsby_popper"] < county["polsby_popper"]).sum() + 1
	sch_rank = (counties["schwartzberg"] < county["schwartzberg"]).sum() + 1
	reock_rank = (counties["reock"] < county["reock"]).sum() + 1
	ch_rank = (counties["convex_hull"] < county["convex_hull"]).sum() + 1

	# Calculate percentiles (lower percentile = more irregular)
	pp_percentile = (pp_rank / total_counties) * 100
	sch_percentile = (sch_rank / total_counties) * 100
	reock_percentile = (reock_rank / total_counties) * 100
	ch_percentile = (ch_rank / total_counties) * 100

	# Get medians and means for comparison
	pp_median = counties["polsby_popper"].median()
	pp_mean = counties["polsby_popper"].mean()
	sch_median = counties["schwartzberg"].median()
	sch_mean = counties["schwartzberg"].mean()
	reock_median = counties["reock"].median()
	reock_mean = counties["reock"].mean()
	ch_median = counties["convex_hull"].median()
	ch_mean = counties["convex_hull"].mean()

	print("\n" + "=" * 80)
	print("Multnomah County, OR - Compactness Analysis")
	print("=" * 80)
	print(
	f"\n{'Metric':<20} {'Score':<15} {'Rank':<10} {'Percentile':<12} {'vs Median':<15} {'vs Mean':<15}"
	)
	print("-" * 80)

	print(
	f"{'Polsby-Popper':<20} {county['polsby_popper']:<15.6f} "
	f"{pp_rank}/{total_counties:<9} {pp_percentile:<11.2f}% "
	f"{county['polsby_popper'] / pp_median:<14.3f}x "
	f"{county['polsby_popper'] / pp_mean:<14.3f}x"
	)
	print(
	f"{'Schwartzberg':<20} {county['schwartzberg']:<15.6f} "
	f"{sch_rank}/{total_counties:<9} {sch_percentile:<11.2f}% "
	f"{county['schwartzberg'] / sch_median:<14.3f}x "
	f"{county['schwartzberg'] / sch_mean:<14.3f}x"
	)
	print(
	f"{'Reock':<20} {county['reock']:<15.6f} "
	f"{reock_rank}/{total_counties:<9} {reock_percentile:<11.2f}% "
	f"{county['reock'] / reock_median:<14.3f}x "
	f"{county['reock'] / reock_mean:<14.3f}x"
	)
	print(
	f"{'Convex Hull':<20} {county['convex_hull']:<15.6f} "
	f"{ch_rank}/{total_counties:<9} {ch_percentile:<11.2f}% "
	f"{county['convex_hull'] / ch_median:<14.3f}x "
	f"{county['convex_hull'] / ch_mean:<14.3f}x"
	)

	# Overall irregularity assessment
	avg_percentile = (
	pp_percentile + sch_percentile + reock_percentile + ch_percentile
	) / 4
	avg_rank = (pp_rank + sch_rank + reock_rank + ch_rank) / 4

	print("\n" + "-" * 80)
	print("Summary:")
	print(f" Average Rank: {avg_rank:.1f} out of {total_counties} counties")
	print(f" Average Percentile: {avg_percentile:.1f}%")
	print("=" * 80)
	else:
	print("\nMultnomah County, OR not found. Available columns:")
	print(counties.columns.tolist())

	# All Oregon counties
	oregon_counties = counties[counties["STATEFP"] == "41"].copy()
	oregon_counties = oregon_counties.sort_values("polsby_popper", ascending=True)

	print("\n" + "=" * 80)
	print("All Oregon Counties - Compactness Scores")
	print("=" * 80)
	print(
	f"{'Rank':<6} {'County':<30} {'Polsby-Popper':<15} {'Schwartzberg':<15} {'Reock':<15} {'Convex Hull':<15}"
	)
	print("-" * 80)

	for idx, (_, county) in enumerate(oregon_counties.iterrows(), 1):
	county_name = county.get("NAME", "Unknown")
	print(
	f"{idx:<6} {county_name:<30} "
	f"{county['polsby_popper']:<15.6f} {county['schwartzberg']:<15.6f} "
	f"{county['reock']:<15.6f} {county['convex_hull']:<15.6f}"
	)

	# Summary statistics for Oregon
	print("\n" + "-" * 80)
	print("Oregon Summary Statistics:")
	print(f" Total Counties: {len(oregon_counties)}")
	print(
	f" Polsby-Popper: mean={oregon_counties['polsby_popper'].mean():.4f}, "
	f"median={oregon_counties['polsby_popper'].median():.4f}"
	)
	print(
	f" Schwartzberg: mean={oregon_counties['schwartzberg'].mean():.4f}, "
	f"median={oregon_counties['schwartzberg'].median():.4f}"
	)
	print(
	f" Reock: mean={oregon_counties['reock'].mean():.4f}, "
	f"median={oregon_counties['reock'].median():.4f}"
	)
	print(
	f" Convex Hull: mean={oregon_counties['convex_hull'].mean():.4f}, "
	f"median={oregon_counties['convex_hull'].median():.4f}"
	)
	print("=" * 80)

	# Summary statistics by state
	state_summaries = []
	for state_fp, state_name in FIPS_TO_STATE.items():
	state_counties = counties[counties["STATEFP"] == state_fp]
	if len(state_counties) > 0:
	state_summaries.append(
	{
	"STATEFP": state_fp,
	"State": state_name,
	"Count": len(state_counties),
	"PP_Mean": state_counties["polsby_popper"].mean(),
	"PP_Median": state_counties["polsby_popper"].median(),
	"Sch_Mean": state_counties["schwartzberg"].mean(),
	"Sch_Median": state_counties["schwartzberg"].median(),
	"Reock_Mean": state_counties["reock"].mean(),
	"Reock_Median": state_counties["reock"].median(),
	"CH_Mean": state_counties["convex_hull"].mean(),
	"CH_Median": state_counties["convex_hull"].median(),
	}
	)

	# Convert to DataFrame and sort by Polsby-Popper mean
	state_df = pd.DataFrame(state_summaries)
	state_df = state_df.sort_values("PP_Mean", ascending=True)

	print("\n" + "=" * 100)
	print("State Summary Statistics (sorted by Polsby-Popper mean)")
	print("=" * 100)
	print(
	f"{'State':<25} {'Counties':<10} {'PP Mean':<12} {'PP Median':<12} "
	f"{'Sch Mean':<12} {'Sch Median':<12} {'Reock Mean':<12} {'Reock Median':<12} "
	f"{'CH Mean':<12} {'CH Median':<12}"
	)
	print("-" * 100)

	for _, row in state_df.iterrows():
	print(
	f"{row['State']:<25} {int(row['Count']):<10} "
	f"{row['PP_Mean']:<12.6f} {row['PP_Median']:<12.6f} "
	f"{row['Sch_Mean']:<12.6f} {row['Sch_Median']:<12.6f} "
	f"{row['Reock_Mean']:<12.6f} {row['Reock_Median']:<12.6f} "
	f"{row['CH_Mean']:<12.6f} {row['CH_Median']:<12.6f}"
	)

	print("=" * 100)
No results found