Skip to content

Instantly share code, notes, and snippets.

#LINES 3-22 ARE EXAMPLES OF BIVARIATE ANALYSIS AND INTERPRETATION FOR 2 CATEGORICAL VARIABLES; LINES 25-49 ARE FOR QUANTITATIVE DEPENDENT AND CATEGORICAL INDEPENDENT.
####CHI SQUARE for categorical independent variable and categorical dependent variable
#only change the variable names
data.chisq1 <- chisq.test(wave5addhealth$H5OD2A, wave5addhealth$H5HR2)
data.chisq1
##INTERPRETATION OF CHI SQUARE: The chi square test of independence shows that there is a statistically significant
# association between sex at birth and living arrangements among adults in the U.S. (chi squared=14.715; p<.05).
library(ggplot2)
library(dplyr)
library(lsr)
library(descr)
library(Hmisc)
library('lehmansociology')
options(scipen = 999)
data(wave5addhealth)
########DATA MANAGEMENT#######
# Install if not already installed
install.packages("geepack")
# Load package
library(geepack)
# Make sure 'state' is a factor for clustering
df$state <- as.factor(df$state)
# Fit GEE model
library(ggplot2)
library(dplyr)
library(lsr)
library(descr)
library(Hmisc)
library('lehmansociology')
options(scipen = 999)
data(wave5addhealth)
data.aov1 <- aov(gss123$paeduc ~ gss123$race, data=gss123)
summary(data.aov1)
by(gss123$race, gss123$paeduc, mean, na.rm=T)
TukeyHSD(data.aov1)
##CHANGE VARNAME TO YOUR VARIABLE NAME; CHANGE TITLES AND X-AXIS LABELS
frequency(wave5addhealth$VARNAME, title= "Frequency Distribution of YOUR VARIABLE DESCRIPTION")
#INTERPRET THE RESULT OF THE LINE ABOVE
frequency(wave5addhealth$VARNAME, cumulative.percent=TRUE, title="YOUR VARIABLE DESCRIPTION")
#INTERPRET THE RESULT OF THE LINE ABOVE
ggplot(data=subset(wave5addhealth, !is.na(VARNAME)), aes(x = VARNAME)) +
geom_bar(color="blue", fill="yellow", aes(y = ((..count..)/sum(..count..)))) +
scale_y_continuous(labels = scales::percent) +
ggtitle("Bar Graph of of YOUR VARIABLE DESCRIPTION, Wave 5 Add Health") +
#if you need to recode any of your variable values, you should do it before you label your dummy codes
wave5addhealth$H5HR2[wave5addhealth$H5HR2 == 6] <- 5
#the line right above this note changes the old dummy code 6 to a dummy code of 5, which combines the two groups.
#if you need to code out missing data (R calls it NA), you would use the example shown in the line below.
wave5addhealth$VARNAME[wave5addhealth$VARNAME == 97] <- NA
#in the line above, the number inside the ] needs to be the dummy code for missing data on your variable.
# the two lines below tell R to treat your categorical variable as a categorical variable (factor) and label the dummy codes
library(ggplot2)
library(dplyr)
library(lsr)
library(descr)
library(Hmisc)
library('lehmansociology')
options(scipen = 999)
data(wave5addhealth)
wave5addhealth<-wave5addhealth[wave5addhealth$H5TO6<97,]
library(ggplot2)
library(dplyr)
library(lsr)
library(descr)
library(Hmisc)
library('lehmansociology')
options(scipen = 999)
data(wave5addhealth)
#Lab Report 5: Making and Comparing Confidence Intervals
library(ggplot2)
library(dplyr)
library(lsr)
library(descr)
library(Hmisc)
library('lehmansociology')
data(gss123)
options(scipen = 999)