FrankRuns · August 28, 2025 11:54
diff --git a/risk_accum_power_analysis.R b/risk_accum_power_analysis.R
 # Accumulating Risk Analysis: "It's there, you just can't see it"
 # Demonstrates how risk accumulates over time and the statistical challenges in detecting it

 # Load required libraries
 library(ggplot2)
 library(dplyr)
 library(broom)

 # Set seed for reproducibility
 set.seed(42)

 # =============================================================================
 # PART 1: Define the Risk Models
 # =============================================================================

 # Function to generate synthetic data based on accumulating risk model
 # P(incident tomorrow) = 1 - exp(lambda0 + kappa * a)
 # where:
 # - a = days since last shock (incident, audit, or training)
 # - lambda0 = risk level right after everyone is paying attention
 # - kappa = how much more likely trouble is tomorrow if we go one more day

 generate_accumulating_risk_data <- function(n_periods = 1000,
                                            lambda0 = -4,   # baseline on cloglog scale
                                            kappa   = 0.02, # per-day accumulation on cloglog scale
                                            max_days = 365) {
  data <- data.frame(
    period = seq_len(n_periods),
    days_since_last_event = numeric(n_periods),
    incident = logical(n_periods)
  )
  days_since <- 0
  for (i in seq_len(n_periods)) {
    data$days_since_last_event[i] <- days_since
    eta <- lambda0 + kappa * days_since
    p <- 1 - exp(-exp(eta))              # cloglog inverse link
    incident_occurs <- runif(1) < p
    data$incident[i] <- incident_occurs
    days_since <- if (incident_occurs) 0 else min(days_since + 1, max_days)
  }
  data
 }

 # Function to generate constant risk data (null model)
 generate_constant_risk_data <- function(n_periods = 1000,
                                        base_prob = 0.05,  # Constant 5% daily risk
                                        max_days = 365) {
  
  data <- data.frame(
    period = 1:n_periods,
    days_since_last_event = numeric(n_periods),
    incident = logical(n_periods)
  )
  
  days_since <- 0
  
  for (i in 1:n_periods) {
    # Store current days_since value
    data$days_since_last_event[i] <- days_since
    
    # Constant probability regardless of days_since
    prob_incident <- base_prob
    
    # Determine if incident occurs
    incident_occurs <- runif(1) < prob_incident
    data$incident[i] <- incident_occurs
    
    # Update days_since for next period
    if (incident_occurs) {
      days_since <- 0
    } else {
      days_since <- min(days_since + 1, max_days)
    }
  }
  
  return(data)
 }

 # =============================================================================
 # PART 2: Generate Initial Datasets and Test for Significance
 # =============================================================================

 cat("=== PART 2: Initial Analysis with Limited Data ===\n")

 # Generate datasets
 n_initial <- 500  # Relatively small sample

 accumulating_data <- generate_accumulating_risk_data(n_initial, 
                                                     lambda0 = -4,     # Baseline on cloglog scale
                                                     kappa = 0.01)     # Accumulation rate

 constant_data <- generate_constant_risk_data(n_initial, 
                                             base_prob = 0.02)  # 2% daily risk

 # Fit logistic regression models
 model_accumulating <- glm(incident ~ days_since_last_event, 
                          data = accumulating_data, 
                          family = binomial())

 model_constant <- glm(incident ~ days_since_last_event, 
                      data = constant_data, 
                      family = binomial())

 # Display results
 cat("\nAccumulating Risk Model Results:\n")
 print(summary(model_accumulating))

 cat("\nConstant Risk Model Results:\n")
 print(summary(model_constant))

 # Extract p-values
 p_val_accumulating <- summary(model_accumulating)$coefficients[2, 4]
 p_val_constant <- summary(model_constant)$coefficients[2, 4]

 cat(sprintf("\nP-value for 'days_since_last_event' in accumulating model: %.4f\n", p_val_accumulating))
 cat(sprintf("P-value for 'days_since_last_event' in constant model: %.4f\n", p_val_constant))

 # =============================================================================
 # PART 3: Power Analysis - How Sample Size Affects Detection
 # =============================================================================

 cat("\n=== PART 3: Power Analysis - The Detection Problem ===\n")

 # Function to test significance at different sample sizes
 power_analysis <- function(sample_sizes = seq(100, 3000, by=100),
                           lambda0 = -3.5,
                           kappa = 0.02,
                           alpha = 0.05,
                           n_simulations = 20) {
  
  results <- data.frame(
    sample_size = numeric(),
    p_value = numeric(),
    significant = logical(),
    coefficient = numeric()
  )
  
  for (n in sample_sizes) {
    cat(sprintf("Testing sample size: %d\n", n))
    
    sim_results <- replicate(n_simulations, {
      # Generate data
      data <- generate_accumulating_risk_data(n, lambda0, kappa)
      
      # Fit model (with error handling)
      tryCatch({
        model <- glm(incident ~ days_since_last_event, 
                     data = data, 
                     family = binomial())
        
        # Extract results
        coef_summary <- summary(model)$coefficients
        if (nrow(coef_summary) >= 2) {
          p_val <- coef_summary[2, 4]
          coef_val <- coef_summary[2, 1]
          return(c(p_val, coef_val))
        } else {
          return(c(NA, NA))
        }
      }, error = function(e) {
        return(c(NA, NA))
      })
    }, simplify = TRUE)
    
    # Calculate average p-value and coefficient (excluding NAs)
    valid_results <- !is.na(sim_results[1, ])
    if (sum(valid_results) > 0) {
      avg_p_val <- mean(sim_results[1, valid_results])
      avg_coef <- mean(sim_results[2, valid_results])
      
      results <- rbind(results, data.frame(
        sample_size = n,
        p_value = avg_p_val,
        significant = avg_p_val < alpha,
        coefficient = avg_coef
      ))
    }
  }
  
  return(results)
 }

 # Run power analysis with cloglog parameters
 cat("Running power analysis (this may take a moment)...\n")
 power_results <- power_analysis(sample_sizes = seq(500, 5000, by=500),
                                lambda0 = -4,      # Baseline on cloglog scale
                                kappa = 0.015,     # Meaningful accumulation rate
                                n_simulations = 10)

 # Create the key visualization
 power_plot <- ggplot(power_results, aes(x = sample_size, y = p_value)) +
  geom_line(color = "darkred", size = 1.2) +
  geom_point(aes(color = significant), size = 3) +
  geom_hline(yintercept = 0.05, linetype = "dashed", color = "blue", alpha = 0.7) +
  scale_color_manual(values = c("TRUE" = "green", "FALSE" = "red"),
                     labels = c("Not Significant", "Significant")) +
  labs(
    title = "Risk Effect Emerges Only Once Sample Size > N",
    subtitle = "The accumulating risk signal becomes detectable with sufficient data",
    x = "Number of Observations",
    y = "P-value for Days Since Last Event",
    color = "Statistical\nSignificance",
    caption = "Horizontal line shows α = 0.05 threshold"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 11, color = "gray40"),
    legend.position = "bottom"
  ) +
  ylim(0, max(power_results$p_value, na.rm = TRUE) * 1.1)

 print(power_plot)

 # =============================================================================
 # PART 4: Visualization of Risk Accumulation
 # =============================================================================

 cat("\n=== PART 4: Visualizing Risk Accumulation ===\n")

 # Create theoretical risk curves with cloglog link
 days_sequence <- 0:365  # Full year
 lambda0 <- -4           # Baseline on cloglog scale
 kappa_values <- c(0, 0.005, 0.01, 0.02)  # Accumulation rates

 risk_curves <- expand.grid(
  days = days_sequence,
  kappa = kappa_values
 ) %>%
  mutate(
    eta = lambda0 + kappa * days,
    risk_probability = 1 - exp(-exp(eta)),  # cloglog inverse link
    model_type = ifelse(kappa == 0, "Constant Risk", "Accumulating Risk"),
    kappa_label = paste("κ =", kappa)
  )

 risk_plot <- ggplot(risk_curves, aes(x = days, y = risk_probability, color = kappa_label)) +
  geom_line(size = 1.2) +
  labs(
    title = "Theoretical Risk Accumulation Over Time",
    subtitle = "How incident probability changes with days since last event",
    x = "Days Since Last Event",
    y = "Probability of Incident Tomorrow",
    color = "Risk Parameter"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    legend.position = "bottom"
  ) +
  scale_y_continuous(labels = scales::percent_format())

 print(risk_plot)

 # =============================================================================
 # PART 5: Summary and Key Insights
 # =============================================================================

 cat("\n=== SUMMARY AND KEY INSIGHTS ===\n")

 # Find the threshold where significance emerges
 significant_threshold <- power_results$sample_size[which(power_results$significant)[1]]
 if (is.na(significant_threshold)) {
  significant_threshold <- "Not reached in this analysis"
 }

 cat(sprintf("Threshold for statistical significance: %s observations\n", significant_threshold))
 cat(sprintf("Initial sample size tested: %d observations\n", n_initial))

 final_significant <- tail(power_results$significant, 1)
 if (!is.na(final_significant) && final_significant) {
  cat("Result: With sufficient data, the accumulating risk effect becomes detectable.\n")
 } else {
  cat("Result: Even with larger samples, detection remains challenging.\n")
 }

 cat("\nKey Takeaways:\n")
 cat("1. Rare events make statistical detection difficult\n")
 cat("2. Absence of significance ≠ absence of effect\n") 
 cat("3. The 'it's there, you just can't see it' principle applies\n")
 cat("4. Risk may be accumulating even when data suggests otherwise\n")

 # Save the workspace for further analysis
 cat("\nAnalysis complete. Plots and data objects are ready for use.\n")

 # Ultra-compact incident timeline visualization
 # Perfect for showing rare binary events with minimal visual clutter

 create_compact_timeline <- function(accumulating_data, constant_data) {
  
  # Set up ultra-flat layout
  par(mfrow = c(2, 1), 
      mar = c(1.5, 3, 1.5, 1),  # Minimal margins
      oma = c(2, 0, 2, 0),      # Outer margins for title
      bg = "white")
  
  # Elegant color palette
  acc_color <- "#E74C3C"    # Red for accumulating risk
  const_color <- "#3498DB"  # Blue for constant risk
  
  # Plot 1: Accumulating Risk Model
  plot(accumulating_data$period, rep(1, nrow(accumulating_data)),
       type = "n",           # Don't plot anything yet
       ylim = c(0.5, 1.5),   # Narrow vertical range
       xlim = c(0, max(accumulating_data$period)),
       xlab = "",
       ylab = "",
       main = sprintf("Accumulating Risk (%d incidents)", sum(accumulating_data$incident)),
       axes = FALSE,
       cex.main = 1.1,
       font.main = 2)
  
  # Add incident markers as clean vertical lines
  incident_periods_acc <- accumulating_data$period[accumulating_data$incident]
  if(length(incident_periods_acc) > 0) {
    segments(incident_periods_acc, rep(0.7, length(incident_periods_acc)),
             incident_periods_acc, rep(1.3, length(incident_periods_acc)),
             col = acc_color, lwd = 2.5, lend = 2)  # Square line ends
  }
  
  # Minimal x-axis (just tick marks, no labels on top plot)
  axis(1, at = seq(0, max(accumulating_data$period), by = 100), 
       labels = FALSE, tck = -0.02, col = "gray60")
  
  # Plot 2: Constant Risk Model
  plot(constant_data$period, rep(1, nrow(constant_data)),
       type = "n",
       ylim = c(0.5, 1.5),
       xlim = c(0, max(constant_data$period)),
       xlab = "Time Period",
       ylab = "",
       main = sprintf("Constant Risk (%d incidents)", sum(constant_data$incident)),
       axes = FALSE,
       cex.main = 1.1,
       font.main = 2)
  
  # Add incident markers
  incident_periods_const <- constant_data$period[constant_data$incident]
  if(length(incident_periods_const) > 0) {
    segments(incident_periods_const, rep(0.7, length(incident_periods_const)),
             incident_periods_const, rep(1.3, length(incident_periods_const)),
             col = const_color, lwd = 2.5, lend = 2)
  }
  
  # Full x-axis with labels on bottom plot
  axis(1, at = seq(0, max(constant_data$period), by = 100), 
       labels = seq(0, max(constant_data$period), by = 100), 
       cex.axis = 0.9, col = "gray60")
  
  # Clean overall title
  mtext("When Do Incidents Occur?", 
        outer = TRUE, cex = 1.3, font = 2, line = 0.5, col = "gray20")
  
  # Reset plotting parameters
  par(mfrow = c(1, 1), mar = c(5, 4, 4, 2) + 0.1, oma = c(0, 0, 0, 0))
 }

 # Usage:
 create_compact_timeline(accumulating_data, constant_data)

 cat("Ultra-compact timeline visualization created!\n")
 cat("Usage: create_compact_timeline(accumulating_data, constant_data)\n")
 cat("\nFeatures:\n")
 cat("- Maximally flat layout\n")
 cat("- Shows only incident occurrences\n") 
 cat("- Clean, minimal design\n")
 cat("- Perfect for rare events\n")
 cat("- No visual clutter\n")
	# Accumulating Risk Analysis: "It's there, you just can't see it"
	# Demonstrates how risk accumulates over time and the statistical challenges in detecting it

	# Load required libraries
	library(ggplot2)
	library(dplyr)
	library(broom)

	# Set seed for reproducibility
	set.seed(42)

	# =============================================================================
	# PART 1: Define the Risk Models
	# =============================================================================

	# Function to generate synthetic data based on accumulating risk model
	# P(incident tomorrow) = 1 - exp(lambda0 + kappa * a)
	# where:
	# - a = days since last shock (incident, audit, or training)
	# - lambda0 = risk level right after everyone is paying attention
	# - kappa = how much more likely trouble is tomorrow if we go one more day

	generate_accumulating_risk_data <- function(n_periods = 1000,
	lambda0 = -4, # baseline on cloglog scale
	kappa = 0.02, # per-day accumulation on cloglog scale
	max_days = 365) {
	data <- data.frame(
	period = seq_len(n_periods),
	days_since_last_event = numeric(n_periods),
	incident = logical(n_periods)
	)
	days_since <- 0
	for (i in seq_len(n_periods)) {
	data$days_since_last_event[i] <- days_since
	eta <- lambda0 + kappa * days_since
	p <- 1 - exp(-exp(eta)) # cloglog inverse link
	incident_occurs <- runif(1) < p
	data$incident[i] <- incident_occurs
	days_since <- if (incident_occurs) 0 else min(days_since + 1, max_days)
	}
	data
	}

	# Function to generate constant risk data (null model)
	generate_constant_risk_data <- function(n_periods = 1000,
	base_prob = 0.05, # Constant 5% daily risk
	max_days = 365) {

	data <- data.frame(
	period = 1:n_periods,
	days_since_last_event = numeric(n_periods),
	incident = logical(n_periods)
	)

	days_since <- 0

	for (i in 1:n_periods) {
	# Store current days_since value
	data$days_since_last_event[i] <- days_since

	# Constant probability regardless of days_since
	prob_incident <- base_prob

	# Determine if incident occurs
	incident_occurs <- runif(1) < prob_incident
	data$incident[i] <- incident_occurs

	# Update days_since for next period
	if (incident_occurs) {
	days_since <- 0
	} else {
	days_since <- min(days_since + 1, max_days)
	}
	}

	return(data)
	}

	# =============================================================================
	# PART 2: Generate Initial Datasets and Test for Significance
	# =============================================================================

	cat("=== PART 2: Initial Analysis with Limited Data ===\n")

	# Generate datasets
	n_initial <- 500 # Relatively small sample

	accumulating_data <- generate_accumulating_risk_data(n_initial,
	lambda0 = -4, # Baseline on cloglog scale
	kappa = 0.01) # Accumulation rate

	constant_data <- generate_constant_risk_data(n_initial,
	base_prob = 0.02) # 2% daily risk

	# Fit logistic regression models
	model_accumulating <- glm(incident ~ days_since_last_event,
	data = accumulating_data,
	family = binomial())

	model_constant <- glm(incident ~ days_since_last_event,
	data = constant_data,
	family = binomial())

	# Display results
	cat("\nAccumulating Risk Model Results:\n")
	print(summary(model_accumulating))

	cat("\nConstant Risk Model Results:\n")
	print(summary(model_constant))

	# Extract p-values
	p_val_accumulating <- summary(model_accumulating)$coefficients[2, 4]
	p_val_constant <- summary(model_constant)$coefficients[2, 4]

	cat(sprintf("\nP-value for 'days_since_last_event' in accumulating model: %.4f\n", p_val_accumulating))
	cat(sprintf("P-value for 'days_since_last_event' in constant model: %.4f\n", p_val_constant))

	# =============================================================================
	# PART 3: Power Analysis - How Sample Size Affects Detection
	# =============================================================================

	cat("\n=== PART 3: Power Analysis - The Detection Problem ===\n")

	# Function to test significance at different sample sizes
	power_analysis <- function(sample_sizes = seq(100, 3000, by=100),
	lambda0 = -3.5,
	kappa = 0.02,
	alpha = 0.05,
	n_simulations = 20) {

	results <- data.frame(
	sample_size = numeric(),
	p_value = numeric(),
	significant = logical(),
	coefficient = numeric()
	)

	for (n in sample_sizes) {
	cat(sprintf("Testing sample size: %d\n", n))

	sim_results <- replicate(n_simulations, {
	# Generate data
	data <- generate_accumulating_risk_data(n, lambda0, kappa)

	# Fit model (with error handling)
	tryCatch({
	model <- glm(incident ~ days_since_last_event,
	data = data,
	family = binomial())

	# Extract results
	coef_summary <- summary(model)$coefficients
	if (nrow(coef_summary) >= 2) {
	p_val <- coef_summary[2, 4]
	coef_val <- coef_summary[2, 1]
	return(c(p_val, coef_val))
	} else {
	return(c(NA, NA))
	}
	}, error = function(e) {
	return(c(NA, NA))
	})
	}, simplify = TRUE)

	# Calculate average p-value and coefficient (excluding NAs)
	valid_results <- !is.na(sim_results[1, ])
	if (sum(valid_results) > 0) {
	avg_p_val <- mean(sim_results[1, valid_results])
	avg_coef <- mean(sim_results[2, valid_results])

	results <- rbind(results, data.frame(
	sample_size = n,
	p_value = avg_p_val,
	significant = avg_p_val < alpha,
	coefficient = avg_coef
	))
	}
	}

	return(results)
	}

	# Run power analysis with cloglog parameters
	cat("Running power analysis (this may take a moment)...\n")
	power_results <- power_analysis(sample_sizes = seq(500, 5000, by=500),
	lambda0 = -4, # Baseline on cloglog scale
	kappa = 0.015, # Meaningful accumulation rate
	n_simulations = 10)

	# Create the key visualization
	power_plot <- ggplot(power_results, aes(x = sample_size, y = p_value)) +
	geom_line(color = "darkred", size = 1.2) +
	geom_point(aes(color = significant), size = 3) +
	geom_hline(yintercept = 0.05, linetype = "dashed", color = "blue", alpha = 0.7) +
	scale_color_manual(values = c("TRUE" = "green", "FALSE" = "red"),
	labels = c("Not Significant", "Significant")) +
	labs(
	title = "Risk Effect Emerges Only Once Sample Size > N",
	subtitle = "The accumulating risk signal becomes detectable with sufficient data",
	x = "Number of Observations",
	y = "P-value for Days Since Last Event",
	color = "Statistical\nSignificance",
	caption = "Horizontal line shows α = 0.05 threshold"
	) +
	theme_minimal() +
	theme(
	plot.title = element_text(size = 14, face = "bold"),
	plot.subtitle = element_text(size = 11, color = "gray40"),
	legend.position = "bottom"
	) +
	ylim(0, max(power_results$p_value, na.rm = TRUE) * 1.1)

	print(power_plot)

	# =============================================================================
	# PART 4: Visualization of Risk Accumulation
	# =============================================================================

	cat("\n=== PART 4: Visualizing Risk Accumulation ===\n")

	# Create theoretical risk curves with cloglog link
	days_sequence <- 0:365 # Full year
	lambda0 <- -4 # Baseline on cloglog scale
	kappa_values <- c(0, 0.005, 0.01, 0.02) # Accumulation rates

	risk_curves <- expand.grid(
	days = days_sequence,
	kappa = kappa_values
	) %>%
	mutate(
	eta = lambda0 + kappa * days,
	risk_probability = 1 - exp(-exp(eta)), # cloglog inverse link
	model_type = ifelse(kappa == 0, "Constant Risk", "Accumulating Risk"),
	kappa_label = paste("κ =", kappa)
	)

	risk_plot <- ggplot(risk_curves, aes(x = days, y = risk_probability, color = kappa_label)) +
	geom_line(size = 1.2) +
	labs(
	title = "Theoretical Risk Accumulation Over Time",
	subtitle = "How incident probability changes with days since last event",
	x = "Days Since Last Event",
	y = "Probability of Incident Tomorrow",
	color = "Risk Parameter"
	) +
	theme_minimal() +
	theme(
	plot.title = element_text(size = 14, face = "bold"),
	legend.position = "bottom"
	) +
	scale_y_continuous(labels = scales::percent_format())

	print(risk_plot)

	# =============================================================================
	# PART 5: Summary and Key Insights
	# =============================================================================

	cat("\n=== SUMMARY AND KEY INSIGHTS ===\n")

	# Find the threshold where significance emerges
	significant_threshold <- power_results$sample_size[which(power_results$significant)[1]]
	if (is.na(significant_threshold)) {
	significant_threshold <- "Not reached in this analysis"
	}

	cat(sprintf("Threshold for statistical significance: %s observations\n", significant_threshold))
	cat(sprintf("Initial sample size tested: %d observations\n", n_initial))

	final_significant <- tail(power_results$significant, 1)
	if (!is.na(final_significant) && final_significant) {
	cat("Result: With sufficient data, the accumulating risk effect becomes detectable.\n")
	} else {
	cat("Result: Even with larger samples, detection remains challenging.\n")
	}

	cat("\nKey Takeaways:\n")
	cat("1. Rare events make statistical detection difficult\n")
	cat("2. Absence of significance ≠ absence of effect\n")
	cat("3. The 'it's there, you just can't see it' principle applies\n")
	cat("4. Risk may be accumulating even when data suggests otherwise\n")

	# Save the workspace for further analysis
	cat("\nAnalysis complete. Plots and data objects are ready for use.\n")

	# Ultra-compact incident timeline visualization
	# Perfect for showing rare binary events with minimal visual clutter

	create_compact_timeline <- function(accumulating_data, constant_data) {

	# Set up ultra-flat layout
	par(mfrow = c(2, 1),
	mar = c(1.5, 3, 1.5, 1), # Minimal margins
	oma = c(2, 0, 2, 0), # Outer margins for title
	bg = "white")

	# Elegant color palette
	acc_color <- "#E74C3C" # Red for accumulating risk
	const_color <- "#3498DB" # Blue for constant risk

	# Plot 1: Accumulating Risk Model
	plot(accumulating_data$period, rep(1, nrow(accumulating_data)),
	type = "n", # Don't plot anything yet
	ylim = c(0.5, 1.5), # Narrow vertical range
	xlim = c(0, max(accumulating_data$period)),
	xlab = "",
	ylab = "",
	main = sprintf("Accumulating Risk (%d incidents)", sum(accumulating_data$incident)),
	axes = FALSE,
	cex.main = 1.1,
	font.main = 2)

	# Add incident markers as clean vertical lines
	incident_periods_acc <- accumulating_data$period[accumulating_data$incident]
	if(length(incident_periods_acc) > 0) {
	segments(incident_periods_acc, rep(0.7, length(incident_periods_acc)),
	incident_periods_acc, rep(1.3, length(incident_periods_acc)),
	col = acc_color, lwd = 2.5, lend = 2) # Square line ends
	}

	# Minimal x-axis (just tick marks, no labels on top plot)
	axis(1, at = seq(0, max(accumulating_data$period), by = 100),
	labels = FALSE, tck = -0.02, col = "gray60")

	# Plot 2: Constant Risk Model
	plot(constant_data$period, rep(1, nrow(constant_data)),
	type = "n",
	ylim = c(0.5, 1.5),
	xlim = c(0, max(constant_data$period)),
	xlab = "Time Period",
	ylab = "",
	main = sprintf("Constant Risk (%d incidents)", sum(constant_data$incident)),
	axes = FALSE,
	cex.main = 1.1,
	font.main = 2)

	# Add incident markers
	incident_periods_const <- constant_data$period[constant_data$incident]
	if(length(incident_periods_const) > 0) {
	segments(incident_periods_const, rep(0.7, length(incident_periods_const)),
	incident_periods_const, rep(1.3, length(incident_periods_const)),
	col = const_color, lwd = 2.5, lend = 2)
	}

	# Full x-axis with labels on bottom plot
	axis(1, at = seq(0, max(constant_data$period), by = 100),
	labels = seq(0, max(constant_data$period), by = 100),
	cex.axis = 0.9, col = "gray60")

	# Clean overall title
	mtext("When Do Incidents Occur?",
	outer = TRUE, cex = 1.3, font = 2, line = 0.5, col = "gray20")

	# Reset plotting parameters
	par(mfrow = c(1, 1), mar = c(5, 4, 4, 2) + 0.1, oma = c(0, 0, 0, 0))
	}

	# Usage:
	create_compact_timeline(accumulating_data, constant_data)

	cat("Ultra-compact timeline visualization created!\n")
	cat("Usage: create_compact_timeline(accumulating_data, constant_data)\n")
	cat("\nFeatures:\n")
	cat("- Maximally flat layout\n")
	cat("- Shows only incident occurrences\n")
	cat("- Clean, minimal design\n")
	cat("- Perfect for rare events\n")
	cat("- No visual clutter\n")
No results found