Skip to content

Instantly share code, notes, and snippets.

@imadmali
Last active December 8, 2018 03:54
Show Gist options
  • Select an option

  • Save imadmali/725d6bfed5377c8da87cdaaed0a9aea7 to your computer and use it in GitHub Desktop.

Select an option

Save imadmali/725d6bfed5377c8da87cdaaed0a9aea7 to your computer and use it in GitHub Desktop.
ab-testing
# data
x1 <- rnorm(10, 1.1, 3)
x2 <- rnorm(50, 0.7, 3)
v_x1 <- var(x1)
v_x2 <- var(x2)
n_x1 <- length(x1)
n_x2 <- length(x2)
# welch's t-stat
num <- mean(x1) - mean(x2)
denom <- sqrt(v_x1/n_x1 + v_x2/n_x2)
t_stat <- num/denom
# degrees of freedom
num <- denom^4
denom1 <- ((v_x1/n_x1)^2)/(n_x1-1)
denom2 <- ((v_x2/n_x2)^2)/(n_x2-1)
deg_free <- num / (denom1 + denom2)
# replicate the data generation process under the null hypothesis
# and compute the test statistic
t_stat_samples <- c()
for (i in 1:1e4) {
y1 <- rnorm(1e3, mean(x1), 3)
y2 <- rnorm(500, mean(x1), 3)
num <- mean(y1) - mean(y2)
denom <- sqrt((var(y1)/length(y1)) + (var(y2)/length(y2)))
t_stat_samples[i] <- num/denom
}
# empirical p-value
p_val_emp <- length(which(t_stat_samples >= abs(t_stat))) +
length(which(t_stat_samples <= -abs(t_stat)))
p_val_emp <- p_val_emp/1e4
# true p_value
p_val <- pt(-abs(t_stat), deg_free) * 2
# check
t.test(x1, x2)
# visualize
hist(t_stat_samples, breaks = 50, col = "#808080", border = FALSE, freq = FALSE,
main = "Distribution of t-statistic under null",
xlab = "t-statistic samples")
crit_val <- qt(1-0.05, deg_free)
lines(seq(-5,5,length.out = 1e3),
dt(seq(-5,5,length.out = 1e3), deg_free), col = "#7DBAF5", lwd = 3)
abline(v = crit_val, lwd = 1, lty = 2)
abline(v = -crit_val, lwd = 1, lty = 2)
abline(v = abs(t_stat), col = "#FF6688", lwd = 3)
abline(v = -abs(t_stat), col = "#FF6688", lwd = 3)
legend("topright", c("t-stat", "crit value", "t-stat dist"),
lty = c(1,2,1), lwd = c(2,2,2), col = c("#FF6688","#000000","#7DBAF5"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment