FrankRuns · August 17, 2025 11:51
diff --git a/gistfile1.txt b/gistfile1.txt
 ###############################################################
 # TUTORIAL: Estimating “chance of an incident next week”
 #           when you’ve seen zero incidents so far
 #
 # Audience: Curious operators and analysts. No Bayesian background needed.
 #
 # Big idea in plain English:
 # - You start with a reasonable guess about the weekly incident rate
 #   (call this your PRIOR belief, based on history/industry norms).
 # - You observe some weeks with no incidents.
 # - You gently update your belief (not overreacting), and use that to
 #   estimate the chance of an incident next week.
 #
 # Why this matters:
 # - “No incidents lately” does NOT mean risk is zero.
 # - This code shows how a clean streak nudges your estimated risk DOWN
 #   under a constant-risk model (Binomial + Beta prior).
 # - (In a separate article, you’ll contrast this with a “complacency” model
 #   where risk creeps UP with time since the last shock.)
 ###############################################################

 # ---------------------------
 # 1) Choose your starting belief (“prior”)
 # ---------------------------

 # PRIOR MEAN INCIDENT RATE PER WEEK (your best baseline guess).
 # Example: industry says ~5% of weeks have an incident.
 prior_mean <- 0.05

 # PRIOR STRENGTH (“how confident are you in that 5%?”).
 # Think of this as “pseudo-weeks” of experience backing up your prior mean.
 # - S = 20 means: “I’m treating this prior like it’s based on ~20 weeks of experience.”
 # - Bigger S = more stubborn (new data moves you less)
 # - Smaller S = more flexible (new data moves you more)
 S <- 20

 # Convert the prior mean + strength into Beta distribution parameters.
 # (You don’t need to know Beta math; this is just how we encode the prior cleanly.)
 alpha0 <- prior_mean * S            # “prior wins”
 beta0  <- (1 - prior_mean) * S      # “prior non-wins”

 # ---------------------------
 # 2) Observe data: clean weeks with zero incidents
 # ---------------------------

 # Imagine you’ve watched N weeks and saw ZERO incidents.
 # We’ll look at N from 0 to 26 to show how “longer clean streak => lower estimated risk”
 weeks <- 0:26    # 0 to 26 clean weeks observed
 y <- 0           # y = number of incidents observed (still zero)

 # ---------------------------
 # 3) Update belief with the data (Bayesian “posterior”)
 # ---------------------------

 # AFTER observing n clean weeks, the updated belief is still a Beta distribution.
 # Posterior Beta parameters are simply “prior + data” for this model.
 # - With zero incidents, only the “non-incident” count grows.
 post_alpha <- alpha0 + y            # stays alpha0, since y=0
 post_beta  <- beta0  + weeks        # grows as you see more clean weeks

 # The POSTERIOR MEAN is your updated best-guess incident probability per week.
 # Rule of thumb: posterior_mean = post_alpha / (post_alpha + post_beta)
 post_mean <- post_alpha / (post_alpha + post_beta)

 # For a simple “what’s the chance next week?” forecast,
 # the best single-number prediction is the posterior mean.
 pred_next <- post_mean

 # Bundle results into a friendly table
 out <- data.frame(
  weeks_clean = weeks,
  posterior_mean = round(post_mean, 4),
  predictive_next_week = round(pred_next, 4)
 )

 # ---------------------------
 # 4) Read the table
 # ---------------------------

 # What to look for:
 # - Row with weeks_clean = 0 ~ your starting belief (≈ prior_mean).
 # - As weeks_clean increases (still zero incidents), posterior_mean goes DOWN.
 #   That’s because this model assumes a *constant* underlying risk:
 #   “No incidents lately” nudges you to believe the constant rate is lower.
 # - This is DIFFERENT from a human-behavior hazard model where risk might
 #   creep UP due to complacency. (That’s your contrasting piece.)
 print(out, row.names = FALSE)

 # Optional: also print a short summary for the two-week example from the narrative.
 two_week_row <- subset(out, weeks_clean == 2)
 cat("\n--- Two-week example ---\n")
 cat("Prior mean:", prior_mean, "\n")
 cat("Posterior mean after 2 clean weeks:", two_week_row$posterior_mean, "\n")
 cat("Predictive P(incident next week):", two_week_row$predictive_next_week, "\n")

 # ---------------------------
 # 5) Visualize the effect
 # ---------------------------

 # If you don’t have ggplot2 installed, uncomment the next line:
 # install.packages("ggplot2")
 library(ggplot2)

 ggplot(out, aes(weeks_clean, posterior_mean)) +
  geom_line() +
  geom_point() +
  geom_hline(yintercept = prior_mean, linetype = "dashed") +
  labs(
    x = "Clean weeks observed (y = 0)",
    y = "Estimated incident probability per week",
    title = "Binomial + Beta prior: Clean streak lowers estimated risk"
  ) +
  annotate(
    "text",
    x = 2, y = prior_mean, vjust = -1,
    label = "Your starting belief (prior mean)"
  )

 ###############################################################
 # Key takeaway (say it out loud):
 # Under a constant-risk model (Binomial + Beta prior),
 # seeing more clean weeks makes us estimate the *constant* rate is lower.
 # This is reasonable if risk is truly constant.
 #
 # But if human behavior makes risk RISE the longer we go incident-free,
 # this model will MISLEAD you. That’s where your hazard/complacency model
 # comes in (risk as a function of time since last shock).
 ###############################################################
	###############################################################
	# TUTORIAL: Estimating “chance of an incident next week”
	# when you’ve seen zero incidents so far
	#
	# Audience: Curious operators and analysts. No Bayesian background needed.
	#
	# Big idea in plain English:
	# - You start with a reasonable guess about the weekly incident rate
	# (call this your PRIOR belief, based on history/industry norms).
	# - You observe some weeks with no incidents.
	# - You gently update your belief (not overreacting), and use that to
	# estimate the chance of an incident next week.
	#
	# Why this matters:
	# - “No incidents lately” does NOT mean risk is zero.
	# - This code shows how a clean streak nudges your estimated risk DOWN
	# under a constant-risk model (Binomial + Beta prior).
	# - (In a separate article, you’ll contrast this with a “complacency” model
	# where risk creeps UP with time since the last shock.)
	###############################################################

	# ---------------------------
	# 1) Choose your starting belief (“prior”)
	# ---------------------------

	# PRIOR MEAN INCIDENT RATE PER WEEK (your best baseline guess).
	# Example: industry says ~5% of weeks have an incident.
	prior_mean <- 0.05

	# PRIOR STRENGTH (“how confident are you in that 5%?”).
	# Think of this as “pseudo-weeks” of experience backing up your prior mean.
	# - S = 20 means: “I’m treating this prior like it’s based on ~20 weeks of experience.”
	# - Bigger S = more stubborn (new data moves you less)
	# - Smaller S = more flexible (new data moves you more)
	S <- 20

	# Convert the prior mean + strength into Beta distribution parameters.
	# (You don’t need to know Beta math; this is just how we encode the prior cleanly.)
	alpha0 <- prior_mean * S # “prior wins”
	beta0 <- (1 - prior_mean) * S # “prior non-wins”

	# ---------------------------
	# 2) Observe data: clean weeks with zero incidents
	# ---------------------------

	# Imagine you’ve watched N weeks and saw ZERO incidents.
	# We’ll look at N from 0 to 26 to show how “longer clean streak => lower estimated risk”
	weeks <- 0:26 # 0 to 26 clean weeks observed
	y <- 0 # y = number of incidents observed (still zero)

	# ---------------------------
	# 3) Update belief with the data (Bayesian “posterior”)
	# ---------------------------

	# AFTER observing n clean weeks, the updated belief is still a Beta distribution.
	# Posterior Beta parameters are simply “prior + data” for this model.
	# - With zero incidents, only the “non-incident” count grows.
	post_alpha <- alpha0 + y # stays alpha0, since y=0
	post_beta <- beta0 + weeks # grows as you see more clean weeks

	# The POSTERIOR MEAN is your updated best-guess incident probability per week.
	# Rule of thumb: posterior_mean = post_alpha / (post_alpha + post_beta)
	post_mean <- post_alpha / (post_alpha + post_beta)

	# For a simple “what’s the chance next week?” forecast,
	# the best single-number prediction is the posterior mean.
	pred_next <- post_mean

	# Bundle results into a friendly table
	out <- data.frame(
	weeks_clean = weeks,
	posterior_mean = round(post_mean, 4),
	predictive_next_week = round(pred_next, 4)
	)

	# ---------------------------
	# 4) Read the table
	# ---------------------------

	# What to look for:
	# - Row with weeks_clean = 0 ~ your starting belief (≈ prior_mean).
	# - As weeks_clean increases (still zero incidents), posterior_mean goes DOWN.
	# That’s because this model assumes a constant underlying risk:
	# “No incidents lately” nudges you to believe the constant rate is lower.
	# - This is DIFFERENT from a human-behavior hazard model where risk might
	# creep UP due to complacency. (That’s your contrasting piece.)
	print(out, row.names = FALSE)

	# Optional: also print a short summary for the two-week example from the narrative.
	two_week_row <- subset(out, weeks_clean == 2)
	cat("\n--- Two-week example ---\n")
	cat("Prior mean:", prior_mean, "\n")
	cat("Posterior mean after 2 clean weeks:", two_week_row$posterior_mean, "\n")
	cat("Predictive P(incident next week):", two_week_row$predictive_next_week, "\n")

	# ---------------------------
	# 5) Visualize the effect
	# ---------------------------

	# If you don’t have ggplot2 installed, uncomment the next line:
	# install.packages("ggplot2")
	library(ggplot2)

	ggplot(out, aes(weeks_clean, posterior_mean)) +
	geom_line() +
	geom_point() +
	geom_hline(yintercept = prior_mean, linetype = "dashed") +
	labs(
	x = "Clean weeks observed (y = 0)",
	y = "Estimated incident probability per week",
	title = "Binomial + Beta prior: Clean streak lowers estimated risk"
	) +
	annotate(
	"text",
	x = 2, y = prior_mean, vjust = -1,
	label = "Your starting belief (prior mean)"
	)

	###############################################################
	# Key takeaway (say it out loud):
	# Under a constant-risk model (Binomial + Beta prior),
	# seeing more clean weeks makes us estimate the constant rate is lower.
	# This is reasonable if risk is truly constant.
	#
	# But if human behavior makes risk RISE the longer we go incident-free,
	# this model will MISLEAD you. That’s where your hazard/complacency model
	# comes in (risk as a function of time since last shock).
	###############################################################
No results found