Last active
September 24, 2025 15:34
-
-
Save naomispence/78b6f37ffaf652bf06ba830ab6a52d3d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Install if not already installed | |
| install.packages("geepack") | |
| # Load package | |
| library(geepack) | |
| # Make sure 'state' is a factor for clustering | |
| df$state <- as.factor(df$state) | |
| # Fit GEE model | |
| gee_model <- geeglm( | |
| y ~ x1 + x2, | |
| id = state, | |
| data = df, | |
| family = gaussian, | |
| corstr = "exchangeable" # or "ar1", "independence", etc. | |
| ) | |
| # Summary of model | |
| summary(gee_model) | |
| Great question! Residual diagnostics in GEE (geeglm from the geepack package) are a bit more limited than in ordinary linear models (lm), but you can still extract and plot residuals to check basic assumptions like: | |
| Normality | |
| Homoscedasticity (equal variance) | |
| Outliers | |
| ✅ Step-by-Step: How to Check Residuals from a geeglm Model | |
| # Fit GEE model | |
| library(geepack) | |
| gee_model <- geeglm( | |
| y ~ x1 + x2, | |
| id = state, | |
| data = df, | |
| family = gaussian, | |
| corstr = "exchangeable" | |
| ) | |
| # Extract residuals | |
| res <- residuals(gee_model, type = "response") # or "pearson", see below | |
| # Plot residuals vs fitted values | |
| plot(fitted(gee_model), res, | |
| xlab = "Fitted values", | |
| ylab = "Residuals", | |
| main = "Residuals vs Fitted") | |
| abline(h = 0, col = "red", lty = 2) | |
| # Histogram of residuals | |
| hist(res, breaks = 30, main = "Histogram of Residuals", xlab = "Residuals") | |
| # QQ plot to check normality | |
| qqnorm(res) | |
| qqline(res, col = "red") | |
| ##FIXED EFFECTS, NOT USING | |
| # Load necessary packages | |
| library(plm) | |
| library(dplyr) | |
| # Prepare data as panel data | |
| data_panel <- pdata.frame(data, index = c("state", "time_period")) | |
| # Fixed effects model | |
| fe_model <- plm(outcome ~ predictor1 + predictor2, data = data_panel, model = "within") | |
| summary(fe_model) | |
| # Random effects model | |
| re_model <- plm(outcome ~ predictor1 + predictor2, data = data_panel, model = "random") | |
| summary(re_model) | |
| # Hausman test to choose between fixed and random effects | |
| hausman_test <- phtest(fe_model, re_model) | |
| # Load dplyr package | |
| library(dplyr) | |
| # Group by state and time_period and then display outcome variable | |
| result <- data %>% | |
| group_by(state, time_period) %>% | |
| select(state, time_period, outcome) %>% | |
| arrange(state, time_period) | |
| # View the result | |
| print(result) | |
| hausman_test | |
| # Fixed effects model with time dummies | |
| data_panel$time_factor <- as.factor(data_panel$time_period) | |
| fe_time_model <- plm(outcome ~ predictor1 + predictor2 + time_factor, data = data_panel, model = "within") | |
| summary(fe_time_model) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment