soodoku · September 13, 2025 22:22
diff --git a/readme.md b/readme.md
diff --git a/sim.py b/sim.py
 import numpy as np
 from scipy.stats import multivariate_normal

 def simulate_personality_change(stability_scenario='baseline', years=30, n=10000):
    """
    Simulate personality change over specified years.
    
    Parameters:
    - stability_scenario: 'baseline' (no true change, r=1.0),
                         'low' (Soldz & Vaillant), 'moderate' (Roberts & DelVecchio),
                         'high' (optimistic), or 'time_decay' (realistic decay function)
    - years: time span (relevant for time_decay scenario)
    - n: number of simulated individuals
    
    Returns:
    - Dictionary with percentages for each number of traits changed
    """
    
    # Trait intercorrelations matrix (E, N, O, C, A) from literature
    trait_corr = np.array([
        [1.00, -0.22,  0.17,  0.12,  0.12],  # Extraversion
        [-0.22, 1.00,  0.05, -0.27, -0.22],  # Neuroticism
        [0.17,  0.05,  1.00,  0.05,  0.12],  # Openness
        [0.12, -0.27,  0.05,  1.00,  0.22],  # Conscientiousness
        [0.12, -0.22,  0.12,  0.22,  1.00]   # Agreeableness
    ])
    
    # Time-based stability decay function
    def get_stability(years):
        """Based on meta-analyses: ~.80 at 2 years, ~.65 at 10 years, ~.45 at 30 years"""
        if years <= 2:
            return 0.85 - 0.025 * years
        elif years <= 10:
            return 0.80 - 0.015 * years  
        else:
            return 0.65 - 0.007 * years
    
    # Set stability correlations based on scenario
    if stability_scenario == 'baseline':
        stability = [1.00, 1.00, 1.00, 1.00, 1.00]  # No true change
    elif stability_scenario == 'time_decay':
        base_stability = [get_stability(years) for _ in range(5)]
        # Trait-specific adjustments based on literature
        stability = [
            base_stability[0] * 1.05,  # E slightly more stable
            base_stability[1],         # N normal
            base_stability[2] * 0.90,  # O slightly less stable  
            base_stability[3],         # C normal
            base_stability[4]          # A normal
        ]
    elif stability_scenario == 'low':
        stability = [0.19, 0.20, 0.38, 0.12, 0.07]  # Soldz & Vaillant 1999
    elif stability_scenario == 'moderate':
        stability = [0.40, 0.40, 0.40, 0.40, 0.40]  # Roberts & DelVecchio 2000
    elif stability_scenario == 'high':
        stability = [0.50, 0.50, 0.50, 0.50, 0.50]  # Optimistic
    
    measurement_reliability = 0.82  # Gnambs 2014 meta-analysis
    
    changes = []
    for _ in range(n):
        # Generate correlated traits at Time 1 (true scores)
        true_t1 = multivariate_normal.rvs(mean=[0, 0, 0, 0, 0], cov=trait_corr)
        
        # Add measurement error for observed Time 1 scores
        error_t1 = np.random.normal(0, np.sqrt(1 - measurement_reliability), 5)
        observed_t1 = true_t1 * np.sqrt(measurement_reliability) + error_t1
        
        # Generate Time 2 true scores using stability correlations
        true_t2 = []
        for i, r in enumerate(stability):
            change_error = np.random.normal(0, 1)
            new_true = true_t1[i] * r + change_error * np.sqrt(1 - r**2)
            true_t2.append(new_true)
        true_t2 = np.array(true_t2)
        
        # Add measurement error for observed Time 2 scores
        error_t2 = np.random.normal(0, np.sqrt(1 - measurement_reliability), 5)
        observed_t2 = true_t2 * np.sqrt(measurement_reliability) + error_t2
        
        # Calculate absolute changes in observed scores
        trait_changes = [abs(o2 - o1) for o2, o1 in zip(observed_t2, observed_t1)]
        changes.append(trait_changes)
    
    changes = np.array(changes)
    
    # Calculate distribution of changes
    distribution = {}
    for i in range(6):
        distribution[i] = np.mean(np.sum(changes > 0.5, axis=1) == i) * 100
    
    prop_3plus = np.mean(np.sum(changes > 0.5, axis=1) >= 3) * 100
    
    return {'distribution': distribution, 'prop_3plus': prop_3plus, 'stability': stability}

 # Run baseline first to establish measurement error rates
 print("="*60)
 print("PERSONALITY CHANGE OVER 30 YEARS")
 print("10,000 simulated individuals per scenario")
 print("="*60)

 baseline_results = simulate_personality_change('baseline', n=10000)
 baseline_3plus = baseline_results['prop_3plus']

 print("\nBASELINE: Perfect stability (r=1.0)")
 print("Shows change due to measurement error alone")
 print(f"→ {baseline_3plus:.0f}% show apparent change on 3+ traits")

 # Now run substantive scenarios and calculate true change
 print("\n" + "="*60)
 print("TRUE PERSONALITY CHANGE (accounting for measurement error)")
 print("="*60)

 scenarios = ['low', 'moderate', 'high', 'time_decay']
 labels = {
    'low': 'Soldz & Vaillant (1999)',
    'moderate': 'Roberts & DelVecchio (2000)', 
    'high': 'Optimistic (r=0.50)',
    'time_decay': 'Time-decay model'
 }

 results = {}
 for scenario in scenarios:
    results[scenario] = simulate_personality_change(scenario, years=30, n=10000)
    observed = results[scenario]['prop_3plus']
    true_change = observed - baseline_3plus
    
    print(f"\n{labels[scenario]}")
    print(f"Stability: {results[scenario]['stability']}")
    print(f"Observed change: {observed:.0f}%")
    print(f"Measurement error: {baseline_3plus:.0f}%")
    print(f"TRUE CHANGE: {true_change:.0f}% of people genuinely change on 3+ traits")

 # Calculate adjusted percentages for true changers
 print("\n" + "="*60)
 print("SUMMARY: Who Truly Changes?")
 print("="*60)
 print(f"Measurement error alone affects {baseline_3plus:.0f}% (baseline)")
 print("\nPercentage with TRUE personality change on 3+ traits:")
 for scenario in scenarios:
    true_change = results[scenario]['prop_3plus'] - baseline_3plus
    print(f"  {labels[scenario]:35} {true_change:.0f}%")

 print("\n" + "="*60)
 print("INTERPRETATION")
 print("="*60)
 print("After accounting for measurement error:")
 true_changes = [results[s]['prop_3plus'] - baseline_3plus for s in scenarios]
 min_change = min(true_changes)
 max_change = max(true_changes)
 print(f"• Between {min_change:.0f}% and {max_change:.0f}% of people show TRUE personality change")
	import numpy as np
	from scipy.stats import multivariate_normal

	def simulate_personality_change(stability_scenario='baseline', years=30, n=10000):
	"""
	Simulate personality change over specified years.

	Parameters:
	- stability_scenario: 'baseline' (no true change, r=1.0),
	'low' (Soldz & Vaillant), 'moderate' (Roberts & DelVecchio),
	'high' (optimistic), or 'time_decay' (realistic decay function)
	- years: time span (relevant for time_decay scenario)
	- n: number of simulated individuals

	Returns:
	- Dictionary with percentages for each number of traits changed
	"""

	# Trait intercorrelations matrix (E, N, O, C, A) from literature
	trait_corr = np.array([
	[1.00, -0.22, 0.17, 0.12, 0.12], # Extraversion
	[-0.22, 1.00, 0.05, -0.27, -0.22], # Neuroticism
	[0.17, 0.05, 1.00, 0.05, 0.12], # Openness
	[0.12, -0.27, 0.05, 1.00, 0.22], # Conscientiousness
	[0.12, -0.22, 0.12, 0.22, 1.00] # Agreeableness
	])

	# Time-based stability decay function
	def get_stability(years):
	"""Based on meta-analyses: ~.80 at 2 years, ~.65 at 10 years, ~.45 at 30 years"""
	if years <= 2:
	return 0.85 - 0.025 * years
	elif years <= 10:
	return 0.80 - 0.015 * years
	else:
	return 0.65 - 0.007 * years

	# Set stability correlations based on scenario
	if stability_scenario == 'baseline':
	stability = [1.00, 1.00, 1.00, 1.00, 1.00] # No true change
	elif stability_scenario == 'time_decay':
	base_stability = [get_stability(years) for _ in range(5)]
	# Trait-specific adjustments based on literature
	stability = [
	base_stability[0] * 1.05, # E slightly more stable
	base_stability[1], # N normal
	base_stability[2] * 0.90, # O slightly less stable
	base_stability[3], # C normal
	base_stability[4] # A normal
	]
	elif stability_scenario == 'low':
	stability = [0.19, 0.20, 0.38, 0.12, 0.07] # Soldz & Vaillant 1999
	elif stability_scenario == 'moderate':
	stability = [0.40, 0.40, 0.40, 0.40, 0.40] # Roberts & DelVecchio 2000
	elif stability_scenario == 'high':
	stability = [0.50, 0.50, 0.50, 0.50, 0.50] # Optimistic

	measurement_reliability = 0.82 # Gnambs 2014 meta-analysis

	changes = []
	for _ in range(n):
	# Generate correlated traits at Time 1 (true scores)
	true_t1 = multivariate_normal.rvs(mean=[0, 0, 0, 0, 0], cov=trait_corr)

	# Add measurement error for observed Time 1 scores
	error_t1 = np.random.normal(0, np.sqrt(1 - measurement_reliability), 5)
	observed_t1 = true_t1 * np.sqrt(measurement_reliability) + error_t1

	# Generate Time 2 true scores using stability correlations
	true_t2 = []
	for i, r in enumerate(stability):
	change_error = np.random.normal(0, 1)
	new_true = true_t1[i] * r + change_error * np.sqrt(1 - r**2)
	true_t2.append(new_true)
	true_t2 = np.array(true_t2)

	# Add measurement error for observed Time 2 scores
	error_t2 = np.random.normal(0, np.sqrt(1 - measurement_reliability), 5)
	observed_t2 = true_t2 * np.sqrt(measurement_reliability) + error_t2

	# Calculate absolute changes in observed scores
	trait_changes = [abs(o2 - o1) for o2, o1 in zip(observed_t2, observed_t1)]
	changes.append(trait_changes)

	changes = np.array(changes)

	# Calculate distribution of changes
	distribution = {}
	for i in range(6):
	distribution[i] = np.mean(np.sum(changes > 0.5, axis=1) == i) * 100

	prop_3plus = np.mean(np.sum(changes > 0.5, axis=1) >= 3) * 100

	return {'distribution': distribution, 'prop_3plus': prop_3plus, 'stability': stability}

	# Run baseline first to establish measurement error rates
	print("="*60)
	print("PERSONALITY CHANGE OVER 30 YEARS")
	print("10,000 simulated individuals per scenario")
	print("="*60)

	baseline_results = simulate_personality_change('baseline', n=10000)
	baseline_3plus = baseline_results['prop_3plus']

	print("\nBASELINE: Perfect stability (r=1.0)")
	print("Shows change due to measurement error alone")
	print(f"→ {baseline_3plus:.0f}% show apparent change on 3+ traits")

	# Now run substantive scenarios and calculate true change
	print("\n" + "="*60)
	print("TRUE PERSONALITY CHANGE (accounting for measurement error)")
	print("="*60)

	scenarios = ['low', 'moderate', 'high', 'time_decay']
	labels = {
	'low': 'Soldz & Vaillant (1999)',
	'moderate': 'Roberts & DelVecchio (2000)',
	'high': 'Optimistic (r=0.50)',
	'time_decay': 'Time-decay model'
	}

	results = {}
	for scenario in scenarios:
	results[scenario] = simulate_personality_change(scenario, years=30, n=10000)
	observed = results[scenario]['prop_3plus']
	true_change = observed - baseline_3plus

	print(f"\n{labels[scenario]}")
	print(f"Stability: {results[scenario]['stability']}")
	print(f"Observed change: {observed:.0f}%")
	print(f"Measurement error: {baseline_3plus:.0f}%")
	print(f"TRUE CHANGE: {true_change:.0f}% of people genuinely change on 3+ traits")

	# Calculate adjusted percentages for true changers
	print("\n" + "="*60)
	print("SUMMARY: Who Truly Changes?")
	print("="*60)
	print(f"Measurement error alone affects {baseline_3plus:.0f}% (baseline)")
	print("\nPercentage with TRUE personality change on 3+ traits:")
	for scenario in scenarios:
	true_change = results[scenario]['prop_3plus'] - baseline_3plus
	print(f" {labels[scenario]:35} {true_change:.0f}%")

	print("\n" + "="*60)
	print("INTERPRETATION")
	print("="*60)
	print("After accounting for measurement error:")
	true_changes = [results[s]['prop_3plus'] - baseline_3plus for s in scenarios]
	min_change = min(true_changes)
	max_change = max(true_changes)
	print(f"• Between {min_change:.0f}% and {max_change:.0f}% of people show TRUE personality change")
No results found