Last active
March 24, 2018 17:26
-
-
Save owstron/137852cb5d1b726dba905ae045b0bfee to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| rm(list=ls()) | |
| library('Matching') | |
| library('MASS') | |
| library('quantreg') | |
| nswre74_control <- read.table("nswre74_control.txt") | |
| nswre74_treated <- read.table("nswre74_treated.txt") | |
| names(nswre74_treated) <- c("treat", "age", "educ", "black", "hisp", "married", "nodegr", "re74", "re75", "re78") | |
| names(nswre74_control) <- c("treat", "age", "educ", "black", "hisp", "married", "nodegr", "re74", "re75", "re78") | |
| # | |
| # Finding point estimate of treatment effect | |
| # | |
| mean_re78_control = mean(nswre74_control$re78) | |
| mean_re78_treated = mean(nswre74_treated$re78) | |
| diff_mean = mean_re78_treated - mean_re78_control | |
| cat("Point estimate of treatment effect with is ", diff_mean) | |
| #### Finding confidence interval using Linear regression on experimental values #### | |
| nswre74_data <- rbind(nswre74_treated, nswre74_control) | |
| lm.exp <- lm(nswre74_data$re78 ~ nswre74_data$treat, data = nswre74_data) | |
| summary(lm.exp) | |
| cat("Confidence Interval of point estimate is ") | |
| print(confint(lm.exp, level=0.95)) | |
| #### Point estimate and confidence interval with cps control #### | |
| # Combining with cps_control | |
| cps_control <- read.table("cps_controls.txt") | |
| names(cps_control) <- c("treat", "age", "educ", "black", "hisp", "married", "nodegr", "re74", "re75", "re78") | |
| nswre74_cps_data <- rbind(nswre74_treated, cps_control) | |
| mean_treated <- mean(nswre74_cps_data[which(nswre74_cps_data$treat == 1),]$re78) | |
| mean_control <- mean(nswre74_cps_data[which(nswre74_cps_data$treat == 0),]$re78) | |
| diff_mean_cps = mean_treated - mean_control | |
| cat("Point Estimate with CPS control data is ", diff_mean_cps) | |
| lm.cps <- lm(nswre74_cps_data$re78 ~ nswre74_cps_data$treat, data = nswre74_cps_data) | |
| summary(lm.cps) | |
| cat("Confidence Interval of point estimate is ") | |
| print(confint(lm.cps, level=0.95)) | |
| #### Propensity Score Matching and finding treatment effect #### | |
| glm_psm <- glm(treat ~ age + educ + black + | |
| hisp + married + nodegr + re74 + re75, | |
| family=binomial, data=nswre74_cps_data) | |
| psm_score <- glm_psm$fitted | |
| Y <- nswre74_cps_data$re78 | |
| Tr <- nswre74_cps_data$treat | |
| # | |
| # Matching based on propensity score | |
| # | |
| psm_match <- Match(Y=Y, Tr=Tr, X=psm_score, M=1) | |
| psm_data <- psm_match$mdata | |
| summary(psm_match) | |
| # Checking balance of data | |
| mb.psm <- MatchBalance(treat~age + educ + black + | |
| hisp + married + nodegr + re74 + re75 | |
| , data=nswre74_cps_data, match.out=psm_match, nboots=500) | |
| # Calculating estimate using linear regression | |
| psm_data <- psm_match$mdata | |
| lm.psm <- lm(psm_data$Y ~ psm_data$Tr, data=psm_data) | |
| summary(lm.psm) | |
| cat("Point estimate is ", coef(lm.psm)[2]) | |
| cat("Confidence Interval is ") | |
| print(confint(psm_lm, level = 0.95)) | |
| #### Matching with multiple covariates and Propensity Score #### | |
| Y <- nswre74_cps_data$re78 | |
| Tr <- nswre74_cps_data$treat | |
| X <- cbind(nswre74_cps_data$age, | |
| nswre74_cps_data$educ, | |
| nswre74_cps_data$black == 1, | |
| nswre74_cps_data$black == 0, | |
| nswre74_cps_data$hisp == 1, | |
| nswre74_cps_data$hisp == 0, | |
| nswre74_cps_data$married == 1, | |
| nswre74_cps_data$married == 0, | |
| nswre74_cps_data$nodegr == 1, | |
| nswre74_cps_data$nodegr == 0, | |
| nswre74_cps_data$re74, | |
| nswre74_cps_data$re75, | |
| psm_score) | |
| mout.mv <- Match(Y=Y, Tr=Tr, X=X, M=1) | |
| summary(mout.mv) | |
| # Finding Match Balance of multivariate matching | |
| mb.mv <- MatchBalance(treat~age + educ + black + | |
| hisp + married + nodegr + re74 + re75, | |
| data=nswre74_cps_data, match.out=mout.mv, nboots=500) | |
| # Estimating estimates with linear regression | |
| mv_data <- mout.mv$mdata | |
| lm.mv <- lm(mv_data$Y ~ mv_data$Tr, data = mv_data) | |
| summary(lm.mv) | |
| cat("Point estimate is ", coef(lm.mv)[2]) | |
| cat("Confidence Interval is ") | |
| print(confint(lm.mv, level = 0.95)) | |
| #### Genetic Matching with Propensity Score #### | |
| Y <- nswre74_cps_data$re78 | |
| Tr <- nswre74_cps_data$treat | |
| X <- psm_score | |
| gout.psm <- GenMatch(Tr=Tr, X=X, M=1, estimand="ATT", pop.size = 100, max.generations = 20, wait.generations = 7) | |
| mout.gen.psm <- Match(Y=Y, Tr=Tr, X=X, M=1, estimand="ATT", Weight.matrix = gout.psm) | |
| # Finding Match Balance of multivariate matching | |
| mb.gen.psm <- MatchBalance(treat~age + educ + black + | |
| hisp + married + nodegr + re74 + re75, | |
| data=nswre74_cps_data, match.out=mout.gen.psm, nboots=500) | |
| # Estimating estimates with linear regression | |
| gen_psm_data <- mout.gen.psm$mdata | |
| lm.gen.psm <- lm(gen_psm_data$Y ~ gen_psm_data$Tr, data=gen_psm_data) | |
| summary(lm.gen.psm) | |
| cat("Point estimate is ", coef(lm.gen.psm)[2]) | |
| cat("Confidence Interval is ") | |
| print(confint(lm.gen.psm, level = 0.95)) | |
| #### Genetic Matching with all covariates #### | |
| Y <- nswre74_cps_data$re78 | |
| Tr <- nswre74_cps_data$treat | |
| X_all <- cbind(nswre74_cps_data$age, | |
| nswre74_cps_data$educ, | |
| nswre74_cps_data$black == 1, | |
| nswre74_cps_data$black == 0, | |
| nswre74_cps_data$hisp == 1, | |
| nswre74_cps_data$hisp == 0, | |
| nswre74_cps_data$married == 1, | |
| nswre74_cps_data$married == 0, | |
| nswre74_cps_data$nodegr == 1, | |
| nswre74_cps_data$nodegr == 0, | |
| nswre74_cps_data$re74, | |
| nswre74_cps_data$re75, | |
| psm_score) | |
| gout.mv <- GenMatch(Tr=Tr, X=X_all, M=1, estimand = "ATT", pop.size = 100, max.generations = 20, wait.generations = 7) | |
| mout.gen.mv <- Match(Y=Y, Tr=Tr, X=X_all, estimand="ATT", M=1, Weight.matrix = gout.mv) | |
| # Finding Match Balance of multivariate matching | |
| mb.gen.mv <- MatchBalance(treat ~ age + educ + black + | |
| hisp + married + nodegr + re74 + re75, | |
| data=nswre74_cps_data, match.out=mout.gen.mv, nboots=500) | |
| # Estimating the effect with linear regression | |
| gen_mv_data <- mout.gen.mv$mdata | |
| lm.gen.mv <- lm(gen_mv_data$Y ~ gen_mv_data$Tr, data=gen_mv_data) | |
| summary(lm.gen.mv) | |
| cat("Point estimate is ", coef(lm.gen.mv)[2]) | |
| cat("Confidence Interval is ") | |
| print(confint(lm.gen.mv, level = 0.95)) | |
| #### Genetic Matching with Propensity Score with caliper = 0.25 #### | |
| Y <- nswre74_cps_data$re78 | |
| Tr <- nswre74_cps_data$treat | |
| X <- psm_score | |
| gout.cal.psm <- GenMatch(Tr=Tr, X=X, M=1, estimand = "ATT", pop.size = 100, | |
| max.generations = 20, wait.generations = 7, caliper = 0.25) | |
| mout.gen.cal.psm <- Match(Y=Y, Tr=Tr, X=X, M=1, estimand="ATT", | |
| caliper = 0.25, Weight.matrix = gout.cal.psm) | |
| # Finding Match Balance of multivariate matching | |
| mb.gen.cal.psm <- MatchBalance(treat ~ age + educ + black + | |
| hisp + married + nodegr + re74 + re75, | |
| data=nswre74_cps_data, match.out=mout.gen.cal.psm, nboots=500) | |
| # Estimating the effect with linear regression | |
| gen_cal_psm_data <- mout.gen.cal.psm$mdata | |
| lm.gen.cal.psm <- lm(gen_cal_psm_data$Y ~ gen_cal_psm_data$Tr, data=gen_cal_psm_data) | |
| summary(lm.gen.cal.psm) | |
| cat("Point estimate is ", coef(lm.gen.cal.psm)[2]) | |
| cat("Confidence Interval is ") | |
| print(confint(lm.gen.cal.psm, level = 0.95)) | |
| #### Genetic Matching with Mulitvariate Score with caliper = 0.25 #### | |
| Y <- nswre74_cps_data$re78 | |
| Tr <- nswre74_cps_data$treat | |
| X_all <- cbind(nswre74_cps_data$age, | |
| nswre74_cps_data$educ, | |
| nswre74_cps_data$black == 1, | |
| nswre74_cps_data$black == 0, | |
| nswre74_cps_data$hisp == 1, | |
| nswre74_cps_data$hisp == 0, | |
| nswre74_cps_data$married == 1, | |
| nswre74_cps_data$married == 0, | |
| nswre74_cps_data$nodegr == 1, | |
| nswre74_cps_data$nodegr == 0, | |
| nswre74_cps_data$re74, | |
| nswre74_cps_data$re75, | |
| psm_score) | |
| gout.cal.mv <- GenMatch(Tr=Tr, X=X_all, M=1, estimand = "ATT", pop.size = 100, max.generations = 20, wait.generations = 7, caliper = 0.25) | |
| mout.gen.cal.mv <- Match(Y=Y, Tr=Tr, X=X_all, estimand="ATT", M=1, caliper = 0.25, Weight.matrix = gout.cal.mv) | |
| # Finding Match Balance of multivariate matching | |
| mb.gen.cal.mv <- MatchBalance(treat ~ age + educ + black + | |
| hisp + married + nodegr + re74 + re75, | |
| data=nswre74_cps_data, match.out=mout.gen.cal.mv, nboots=500) | |
| # Estimating the effect with linear regression | |
| gen_cal_mv_data <- mout.gen.cal.mv$mdata | |
| lm.gen.cal.mv <- lm(gen_cal_mv_data$Y ~ gen_cal_mv_data$Tr, data=gen_cal_mv_data) | |
| summary(lm.gen.cal.mv) | |
| cat("Point estimate is ", coef(lm.gen.cal.mv)[2]) | |
| cat("Confidence Interval is ") | |
| print(confint(lm.gen.cal.mv, level = 0.95)) | |
| #### Genetic Matching with Mulitvariate Score with M=2 and exact = TRUE #### | |
| Y <- nswre74_cps_data$re78 | |
| Tr <- nswre74_cps_data$treat | |
| X_all <- cbind(nswre74_cps_data$age, | |
| nswre74_cps_data$educ, | |
| nswre74_cps_data$black == 1, | |
| nswre74_cps_data$black == 0, | |
| nswre74_cps_data$hisp == 1, | |
| nswre74_cps_data$hisp == 0, | |
| nswre74_cps_data$married == 1, | |
| nswre74_cps_data$married == 0, | |
| nswre74_cps_data$nodegr == 1, | |
| nswre74_cps_data$nodegr == 0, | |
| nswre74_cps_data$re74, | |
| nswre74_cps_data$re75, | |
| psm_score) | |
| gout.3 <- GenMatch(Tr=Tr, X=X_all, M=2, estimand = "ATT", pop.size = 100, max.generations = 20, wait.generations = 7, exact = TRUE) | |
| mout.gen.3 <- Match(Y=Y, Tr=Tr, X=X_all, estimand="ATT", M=2, Weight.matrix = gout.3, exact = TRUE) | |
| # Finding Match Balance of multivariate matching | |
| mb.gen.3 <- MatchBalance(treat ~ age + educ + black + | |
| hisp + married + nodegr + re74 + re75, | |
| data=nswre74_cps_data, match.out=mout.gen.3, nboots=500) | |
| # Estimating the effect with linear regression | |
| gen3_data <- mout.gen.3$mdata | |
| lm.gen.3 <- lm(gen3_data$Y ~ gen3_data$Tr, data=gen3_data) | |
| summary(lm.gen.3) | |
| cat("Point estimate is ", coef(lm.gen.3)[2]) | |
| cat("Confidence Interval is ") | |
| print(confint(lm.gen.3, level = 0.95)) | |
| #### Printing out estimates #### | |
| coefs <- c(coef(lm.exp)[2], coef(lm.cps)[2], coef(lm.psm)[2], | |
| coef(lm.mv)[2], coef(lm.gen.psm)[2], coef(lm.gen.mv)[2], | |
| coef(lm.gen.cal.psm)[2], coef(lm.gen.cal.mv)[2], coef(lm.gen.3)[2]) | |
| lbounds <- c(confint(lm.exp)[2,1], | |
| confint(lm.cps)[2,1], | |
| confint(lm.psm)[2,1], | |
| confint(lm.mv)[2,1], | |
| confint(lm.gen.psm)[2,1], | |
| confint(lm.gen.mv)[2,1], | |
| confint(lm.gen.cal.psm)[2,1], | |
| confint(lm.gen.cal.mv)[2,1], | |
| confint(lm.gen.3)[2,1]) | |
| ubounds <- c(confint(lm.exp)[2,2], | |
| confint(lm.cps)[2,2], | |
| confint(lm.psm)[2,2], | |
| confint(lm.mv)[2,2], | |
| confint(lm.gen.psm)[2,2], | |
| confint(lm.gen.mv)[2,2], | |
| confint(lm.gen.cal.psm)[2,2], | |
| confint(lm.gen.cal.mv)[2,2], | |
| confint(lm.gen.3)[2,2]) | |
| estimates <- data.frame(coefs, lbounds, ubounds) | |
| row.names(estimates) <- c("With only Experimental Data", "With CPS Control", | |
| "After Propensity Score Matching", "After Multivariate Matching", | |
| "After GenMatch with Propensity Score", | |
| "After GenMatch with all Covariates", | |
| "GenMatch Propensity Score with Caliper = 0.25", | |
| "GenMatch all Covariates with Caliper = 0.25", | |
| "GenMatch on all Covariates with M = 2 and exact = TRUE") | |
| names(estimates) <- c("Estimate", "Lower Bound(2.5%)", "Upper Bound(97.5%)") | |
| estimates |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment