Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created December 5, 2025 07:58
Show Gist options
  • Select an option

  • Save explodecomputer/d8d5fd4c24c830eca78f0ca964d4f209 to your computer and use it in GitHub Desktop.

Select an option

Save explodecomputer/d8d5fd4c24c830eca78f0ca964d4f209 to your computer and use it in GitHub Desktop.
BMI-alcohol-ancestry interaction
library(dplyr)
library(data.table)
library(seqminer)
library(stringr)
dat <- fread("/mnt/project/analysis/bmi-alcohol-ancestry/phen_extract.csv")
str(dat)
anc <- fread("/mnt/project/data/ancestry/ukb_InferredAncestry.txt")
str(anc)
sampleinfo_colnames <- stringr::str_replace_all(
tolower(c(
"x1",
"x2",
"genotyping.array",
"Batch",
"Plate.Name",
"Well",
"Cluster.CR",
"dQC",
"Internal.Pico..ng.uL.",
"Submitted.Gender",
"Inferred.Gender",
"X.intensity",
"Y.intensity",
"Submitted.Plate.Name",
"Submitted.Well",
"sample.qc.missing.rate",
"heterozygosity",
"heterozygosity.pc.corrected",
"het.missing.outliers",
"putative.sex.chromosome.aneuploidy",
"in.kinship.table",
"excluded.from.kinship.inference",
"excess.relatives",
"in.white.British.ancestry.subset",
"used.in.pca.calculation",
paste("pc", 1:40, sep = ""),
"in.Phasing.Input.chr1_22",
"in.Phasing.Input.chrX",
"in.Phasing.Input.chrXY"
)),
"[[:punct:]]", "_"
)
sampleinfo <- fread("/mnt/project/Bulk/Genotype Results/Genotype calls/ukb_sqc_v2.txt")
colnames(sampleinfo) <- sampleinfo_colnames
str(sampleinfo)
bg.ref.file <- "/mnt/project/Bulk/Imputation/Imputation from genotype (TOPmed)/ukb21007_c4_b0_v1.bgen"
file.exists(bg.ref.file)
bg.range <- "chr4:99318162-99318162"
geno.mat <- seqminer::readBGENToMatrixByRange(bg.ref.file, bg.range)
samplefile <- fread("/mnt/project/Bulk/Imputation/Imputation from genotype (TOPmed)/ukb21007_c4_b0_v1.sample", skip=2, header=FALSE)
str(samplefile)
str(geno.mat)
dim(geno.mat[[1]])
str(geno.mat[[1]])
# Combine everything
# genotype
# phenotypes
# ancestry
# PCs
table(samplefile$V1 %in% sampleinfo$x2)
table(samplefile$V1 %in% dat$eid)
table(samplefile$V1 %in% anc$IID)
dim(sampleinfo)
dim(samplefile)
str(dat)
d <- inner_join(dat, anc %>% select(IID, PC1, PC2, ANC=Anc_1st), by=c("eid"="IID"))
g <- tibble(eid=samplefile$V2, geno=drop(t(geno.mat[[1]])))
d <- inner_join(d, g)
str(d)
summary(lm(p21001_i0 ~ geno + PC1 + PC2, d))
summary(lm(p21001_i0 ~ geno + PC1 + PC2, d))
summary(lm(geno ~ p1558_i0 + PC1 + PC2, d))
d <- mutate(d,
alcohol = case_when(
p1558_i0 == "Never" ~ 0,
p1558_i0 == "Special occasions only" ~ 1,
p1558_i0 == "One to three times a month" ~ 2,
p1558_i0 == "Once or twice a week" ~ 3,
p1558_i0 == "Three or four times a week" ~ 4,
p1558_i0 == "Daily or almost daily" ~ 5,
TRUE ~ NA
))
table(d$alcohol)
table(d$alcohol, d$ANC)
summary(lm(p21001_i0 ~ geno * ANC, d))
summary(lm(p21001_i0 ~ geno * alcohol, d %>% filter(ANC == "EUR")))
summary(lm(p21001_i0 ~ geno * alcohol + geno * ANC, d))
summary(lm(p21001_i0 ~ geno * alcohol, d %>% filter(ANC == "SAS")))
summary(lm(alcohol ~ geno, d %>% filter(ANC == "EUR")))
summary(lm(alcohol ~ geno, d %>% filter(ANC == "EUR")))
summary(lm(p21001_i0 ~ geno, d %>% filter(ANC == "EUR")))
summary(lm(p21001_i0 ~ geno + alcohol, d %>% filter(ANC == "EUR")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment