Skip to content

Instantly share code, notes, and snippets.

@owstron
Last active March 24, 2018 17:26
Show Gist options
  • Select an option

  • Save owstron/137852cb5d1b726dba905ae045b0bfee to your computer and use it in GitHub Desktop.

Select an option

Save owstron/137852cb5d1b726dba905ae045b0bfee to your computer and use it in GitHub Desktop.
rm(list=ls())
library('Matching')
library('MASS')
library('quantreg')
nswre74_control <- read.table("nswre74_control.txt")
nswre74_treated <- read.table("nswre74_treated.txt")
names(nswre74_treated) <- c("treat", "age", "educ", "black", "hisp", "married", "nodegr", "re74", "re75", "re78")
names(nswre74_control) <- c("treat", "age", "educ", "black", "hisp", "married", "nodegr", "re74", "re75", "re78")
#
# Finding point estimate of treatment effect
#
mean_re78_control = mean(nswre74_control$re78)
mean_re78_treated = mean(nswre74_treated$re78)
diff_mean = mean_re78_treated - mean_re78_control
cat("Point estimate of treatment effect with is ", diff_mean)
#### Finding confidence interval using Linear regression on experimental values ####
nswre74_data <- rbind(nswre74_treated, nswre74_control)
lm.exp <- lm(nswre74_data$re78 ~ nswre74_data$treat, data = nswre74_data)
summary(lm.exp)
cat("Confidence Interval of point estimate is ")
print(confint(lm.exp, level=0.95))
#### Point estimate and confidence interval with cps control ####
# Combining with cps_control
cps_control <- read.table("cps_controls.txt")
names(cps_control) <- c("treat", "age", "educ", "black", "hisp", "married", "nodegr", "re74", "re75", "re78")
nswre74_cps_data <- rbind(nswre74_treated, cps_control)
mean_treated <- mean(nswre74_cps_data[which(nswre74_cps_data$treat == 1),]$re78)
mean_control <- mean(nswre74_cps_data[which(nswre74_cps_data$treat == 0),]$re78)
diff_mean_cps = mean_treated - mean_control
cat("Point Estimate with CPS control data is ", diff_mean_cps)
lm.cps <- lm(nswre74_cps_data$re78 ~ nswre74_cps_data$treat, data = nswre74_cps_data)
summary(lm.cps)
cat("Confidence Interval of point estimate is ")
print(confint(lm.cps, level=0.95))
#### Propensity Score Matching and finding treatment effect ####
glm_psm <- glm(treat ~ age + educ + black +
hisp + married + nodegr + re74 + re75,
family=binomial, data=nswre74_cps_data)
psm_score <- glm_psm$fitted
Y <- nswre74_cps_data$re78
Tr <- nswre74_cps_data$treat
#
# Matching based on propensity score
#
psm_match <- Match(Y=Y, Tr=Tr, X=psm_score, M=1)
psm_data <- psm_match$mdata
summary(psm_match)
# Checking balance of data
mb.psm <- MatchBalance(treat~age + educ + black +
hisp + married + nodegr + re74 + re75
, data=nswre74_cps_data, match.out=psm_match, nboots=500)
# Calculating estimate using linear regression
psm_data <- psm_match$mdata
lm.psm <- lm(psm_data$Y ~ psm_data$Tr, data=psm_data)
summary(lm.psm)
cat("Point estimate is ", coef(lm.psm)[2])
cat("Confidence Interval is ")
print(confint(psm_lm, level = 0.95))
#### Matching with multiple covariates and Propensity Score ####
Y <- nswre74_cps_data$re78
Tr <- nswre74_cps_data$treat
X <- cbind(nswre74_cps_data$age,
nswre74_cps_data$educ,
nswre74_cps_data$black == 1,
nswre74_cps_data$black == 0,
nswre74_cps_data$hisp == 1,
nswre74_cps_data$hisp == 0,
nswre74_cps_data$married == 1,
nswre74_cps_data$married == 0,
nswre74_cps_data$nodegr == 1,
nswre74_cps_data$nodegr == 0,
nswre74_cps_data$re74,
nswre74_cps_data$re75,
psm_score)
mout.mv <- Match(Y=Y, Tr=Tr, X=X, M=1)
summary(mout.mv)
# Finding Match Balance of multivariate matching
mb.mv <- MatchBalance(treat~age + educ + black +
hisp + married + nodegr + re74 + re75,
data=nswre74_cps_data, match.out=mout.mv, nboots=500)
# Estimating estimates with linear regression
mv_data <- mout.mv$mdata
lm.mv <- lm(mv_data$Y ~ mv_data$Tr, data = mv_data)
summary(lm.mv)
cat("Point estimate is ", coef(lm.mv)[2])
cat("Confidence Interval is ")
print(confint(lm.mv, level = 0.95))
#### Genetic Matching with Propensity Score ####
Y <- nswre74_cps_data$re78
Tr <- nswre74_cps_data$treat
X <- psm_score
gout.psm <- GenMatch(Tr=Tr, X=X, M=1, estimand="ATT", pop.size = 100, max.generations = 20, wait.generations = 7)
mout.gen.psm <- Match(Y=Y, Tr=Tr, X=X, M=1, estimand="ATT", Weight.matrix = gout.psm)
# Finding Match Balance of multivariate matching
mb.gen.psm <- MatchBalance(treat~age + educ + black +
hisp + married + nodegr + re74 + re75,
data=nswre74_cps_data, match.out=mout.gen.psm, nboots=500)
# Estimating estimates with linear regression
gen_psm_data <- mout.gen.psm$mdata
lm.gen.psm <- lm(gen_psm_data$Y ~ gen_psm_data$Tr, data=gen_psm_data)
summary(lm.gen.psm)
cat("Point estimate is ", coef(lm.gen.psm)[2])
cat("Confidence Interval is ")
print(confint(lm.gen.psm, level = 0.95))
#### Genetic Matching with all covariates ####
Y <- nswre74_cps_data$re78
Tr <- nswre74_cps_data$treat
X_all <- cbind(nswre74_cps_data$age,
nswre74_cps_data$educ,
nswre74_cps_data$black == 1,
nswre74_cps_data$black == 0,
nswre74_cps_data$hisp == 1,
nswre74_cps_data$hisp == 0,
nswre74_cps_data$married == 1,
nswre74_cps_data$married == 0,
nswre74_cps_data$nodegr == 1,
nswre74_cps_data$nodegr == 0,
nswre74_cps_data$re74,
nswre74_cps_data$re75,
psm_score)
gout.mv <- GenMatch(Tr=Tr, X=X_all, M=1, estimand = "ATT", pop.size = 100, max.generations = 20, wait.generations = 7)
mout.gen.mv <- Match(Y=Y, Tr=Tr, X=X_all, estimand="ATT", M=1, Weight.matrix = gout.mv)
# Finding Match Balance of multivariate matching
mb.gen.mv <- MatchBalance(treat ~ age + educ + black +
hisp + married + nodegr + re74 + re75,
data=nswre74_cps_data, match.out=mout.gen.mv, nboots=500)
# Estimating the effect with linear regression
gen_mv_data <- mout.gen.mv$mdata
lm.gen.mv <- lm(gen_mv_data$Y ~ gen_mv_data$Tr, data=gen_mv_data)
summary(lm.gen.mv)
cat("Point estimate is ", coef(lm.gen.mv)[2])
cat("Confidence Interval is ")
print(confint(lm.gen.mv, level = 0.95))
#### Genetic Matching with Propensity Score with caliper = 0.25 ####
Y <- nswre74_cps_data$re78
Tr <- nswre74_cps_data$treat
X <- psm_score
gout.cal.psm <- GenMatch(Tr=Tr, X=X, M=1, estimand = "ATT", pop.size = 100,
max.generations = 20, wait.generations = 7, caliper = 0.25)
mout.gen.cal.psm <- Match(Y=Y, Tr=Tr, X=X, M=1, estimand="ATT",
caliper = 0.25, Weight.matrix = gout.cal.psm)
# Finding Match Balance of multivariate matching
mb.gen.cal.psm <- MatchBalance(treat ~ age + educ + black +
hisp + married + nodegr + re74 + re75,
data=nswre74_cps_data, match.out=mout.gen.cal.psm, nboots=500)
# Estimating the effect with linear regression
gen_cal_psm_data <- mout.gen.cal.psm$mdata
lm.gen.cal.psm <- lm(gen_cal_psm_data$Y ~ gen_cal_psm_data$Tr, data=gen_cal_psm_data)
summary(lm.gen.cal.psm)
cat("Point estimate is ", coef(lm.gen.cal.psm)[2])
cat("Confidence Interval is ")
print(confint(lm.gen.cal.psm, level = 0.95))
#### Genetic Matching with Mulitvariate Score with caliper = 0.25 ####
Y <- nswre74_cps_data$re78
Tr <- nswre74_cps_data$treat
X_all <- cbind(nswre74_cps_data$age,
nswre74_cps_data$educ,
nswre74_cps_data$black == 1,
nswre74_cps_data$black == 0,
nswre74_cps_data$hisp == 1,
nswre74_cps_data$hisp == 0,
nswre74_cps_data$married == 1,
nswre74_cps_data$married == 0,
nswre74_cps_data$nodegr == 1,
nswre74_cps_data$nodegr == 0,
nswre74_cps_data$re74,
nswre74_cps_data$re75,
psm_score)
gout.cal.mv <- GenMatch(Tr=Tr, X=X_all, M=1, estimand = "ATT", pop.size = 100, max.generations = 20, wait.generations = 7, caliper = 0.25)
mout.gen.cal.mv <- Match(Y=Y, Tr=Tr, X=X_all, estimand="ATT", M=1, caliper = 0.25, Weight.matrix = gout.cal.mv)
# Finding Match Balance of multivariate matching
mb.gen.cal.mv <- MatchBalance(treat ~ age + educ + black +
hisp + married + nodegr + re74 + re75,
data=nswre74_cps_data, match.out=mout.gen.cal.mv, nboots=500)
# Estimating the effect with linear regression
gen_cal_mv_data <- mout.gen.cal.mv$mdata
lm.gen.cal.mv <- lm(gen_cal_mv_data$Y ~ gen_cal_mv_data$Tr, data=gen_cal_mv_data)
summary(lm.gen.cal.mv)
cat("Point estimate is ", coef(lm.gen.cal.mv)[2])
cat("Confidence Interval is ")
print(confint(lm.gen.cal.mv, level = 0.95))
#### Genetic Matching with Mulitvariate Score with M=2 and exact = TRUE ####
Y <- nswre74_cps_data$re78
Tr <- nswre74_cps_data$treat
X_all <- cbind(nswre74_cps_data$age,
nswre74_cps_data$educ,
nswre74_cps_data$black == 1,
nswre74_cps_data$black == 0,
nswre74_cps_data$hisp == 1,
nswre74_cps_data$hisp == 0,
nswre74_cps_data$married == 1,
nswre74_cps_data$married == 0,
nswre74_cps_data$nodegr == 1,
nswre74_cps_data$nodegr == 0,
nswre74_cps_data$re74,
nswre74_cps_data$re75,
psm_score)
gout.3 <- GenMatch(Tr=Tr, X=X_all, M=2, estimand = "ATT", pop.size = 100, max.generations = 20, wait.generations = 7, exact = TRUE)
mout.gen.3 <- Match(Y=Y, Tr=Tr, X=X_all, estimand="ATT", M=2, Weight.matrix = gout.3, exact = TRUE)
# Finding Match Balance of multivariate matching
mb.gen.3 <- MatchBalance(treat ~ age + educ + black +
hisp + married + nodegr + re74 + re75,
data=nswre74_cps_data, match.out=mout.gen.3, nboots=500)
# Estimating the effect with linear regression
gen3_data <- mout.gen.3$mdata
lm.gen.3 <- lm(gen3_data$Y ~ gen3_data$Tr, data=gen3_data)
summary(lm.gen.3)
cat("Point estimate is ", coef(lm.gen.3)[2])
cat("Confidence Interval is ")
print(confint(lm.gen.3, level = 0.95))
#### Printing out estimates ####
coefs <- c(coef(lm.exp)[2], coef(lm.cps)[2], coef(lm.psm)[2],
coef(lm.mv)[2], coef(lm.gen.psm)[2], coef(lm.gen.mv)[2],
coef(lm.gen.cal.psm)[2], coef(lm.gen.cal.mv)[2], coef(lm.gen.3)[2])
lbounds <- c(confint(lm.exp)[2,1],
confint(lm.cps)[2,1],
confint(lm.psm)[2,1],
confint(lm.mv)[2,1],
confint(lm.gen.psm)[2,1],
confint(lm.gen.mv)[2,1],
confint(lm.gen.cal.psm)[2,1],
confint(lm.gen.cal.mv)[2,1],
confint(lm.gen.3)[2,1])
ubounds <- c(confint(lm.exp)[2,2],
confint(lm.cps)[2,2],
confint(lm.psm)[2,2],
confint(lm.mv)[2,2],
confint(lm.gen.psm)[2,2],
confint(lm.gen.mv)[2,2],
confint(lm.gen.cal.psm)[2,2],
confint(lm.gen.cal.mv)[2,2],
confint(lm.gen.3)[2,2])
estimates <- data.frame(coefs, lbounds, ubounds)
row.names(estimates) <- c("With only Experimental Data", "With CPS Control",
"After Propensity Score Matching", "After Multivariate Matching",
"After GenMatch with Propensity Score",
"After GenMatch with all Covariates",
"GenMatch Propensity Score with Caliper = 0.25",
"GenMatch all Covariates with Caliper = 0.25",
"GenMatch on all Covariates with M = 2 and exact = TRUE")
names(estimates) <- c("Estimate", "Lower Bound(2.5%)", "Upper Bound(97.5%)")
estimates
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment