Last active
December 8, 2018 03:54
-
-
Save imadmali/725d6bfed5377c8da87cdaaed0a9aea7 to your computer and use it in GitHub Desktop.
ab-testing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # data | |
| x1 <- rnorm(10, 1.1, 3) | |
| x2 <- rnorm(50, 0.7, 3) | |
| v_x1 <- var(x1) | |
| v_x2 <- var(x2) | |
| n_x1 <- length(x1) | |
| n_x2 <- length(x2) | |
| # welch's t-stat | |
| num <- mean(x1) - mean(x2) | |
| denom <- sqrt(v_x1/n_x1 + v_x2/n_x2) | |
| t_stat <- num/denom | |
| # degrees of freedom | |
| num <- denom^4 | |
| denom1 <- ((v_x1/n_x1)^2)/(n_x1-1) | |
| denom2 <- ((v_x2/n_x2)^2)/(n_x2-1) | |
| deg_free <- num / (denom1 + denom2) | |
| # replicate the data generation process under the null hypothesis | |
| # and compute the test statistic | |
| t_stat_samples <- c() | |
| for (i in 1:1e4) { | |
| y1 <- rnorm(1e3, mean(x1), 3) | |
| y2 <- rnorm(500, mean(x1), 3) | |
| num <- mean(y1) - mean(y2) | |
| denom <- sqrt((var(y1)/length(y1)) + (var(y2)/length(y2))) | |
| t_stat_samples[i] <- num/denom | |
| } | |
| # empirical p-value | |
| p_val_emp <- length(which(t_stat_samples >= abs(t_stat))) + | |
| length(which(t_stat_samples <= -abs(t_stat))) | |
| p_val_emp <- p_val_emp/1e4 | |
| # true p_value | |
| p_val <- pt(-abs(t_stat), deg_free) * 2 | |
| # check | |
| t.test(x1, x2) | |
| # visualize | |
| hist(t_stat_samples, breaks = 50, col = "#808080", border = FALSE, freq = FALSE, | |
| main = "Distribution of t-statistic under null", | |
| xlab = "t-statistic samples") | |
| crit_val <- qt(1-0.05, deg_free) | |
| lines(seq(-5,5,length.out = 1e3), | |
| dt(seq(-5,5,length.out = 1e3), deg_free), col = "#7DBAF5", lwd = 3) | |
| abline(v = crit_val, lwd = 1, lty = 2) | |
| abline(v = -crit_val, lwd = 1, lty = 2) | |
| abline(v = abs(t_stat), col = "#FF6688", lwd = 3) | |
| abline(v = -abs(t_stat), col = "#FF6688", lwd = 3) | |
| legend("topright", c("t-stat", "crit value", "t-stat dist"), | |
| lty = c(1,2,1), lwd = c(2,2,2), col = c("#FF6688","#000000","#7DBAF5")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment