Skip to content

Instantly share code, notes, and snippets.

@nickp60
Last active May 2, 2023 13:51
Show Gist options
  • Select an option

  • Save nickp60/6321b365aa6ee5602ef40b856644ef9c to your computer and use it in GitHub Desktop.

Select an option

Save nickp60/6321b365aa6ee5602ef40b856644ef9c to your computer and use it in GitHub Desktop.
Cumulative Microbiome Domination
library(tidyverse)
if (!"speedyseq" %in% installed.packages()) remotes::install_github("mikemc/speedyseq")
library(speedyseq)
download.file("https://figshare.com/ndownloader/files/33076496", "samples.tsv")
#this file has duplicate rows, and has multiple rows per pool
samples <- read.csv("samples.tsv") %>% distinct() %>%
select(-Pool, -Run, -ShotgunBatchID) %>% distinct()
#download.file("https://figshare.com/ndownloader/files/26770931", "metadata.tsv")
#metadata <- read.csv("metadata.tsv")
download.file("https://figshare.com/ndownloader/files/26393788", "asv_counts_w.tsv")
counts <- read.csv("asv_counts_w.tsv")
download.file("https://figshare.com/ndownloader/files/26770997", "taxonomy.tsv")
taxonomy <- read.csv("taxonomy.tsv")
phy <- phyloseq(
sample_data(samples %>% column_to_rownames("SampleID")),
tax_table(taxonomy %>% select(ASV, Kingdom:Genus) %>% column_to_rownames("ASV") %>% as.matrix()),
otu_table(counts %>% pivot_wider(names_from = "SampleID", values_from = "Count", values_fill = 0), taxa_are_rows = TRUE)
) %>% subset_samples(DayRelativeToNearestHCT > -30 & DayRelativeToNearestHCT < 30) %>%
transform_sample_counts(~./sum(.))
phy_to_df <- function(phy){
left_join(
tax_table(phy) %>% as_tibble(),
otu_table(phy) %>% as_tibble(),
) %>%
left_join(
sample_data(phy) %>% as_tibble()
)
}
make_cumu_domination_plot <- function(phy, taxa_of_interest, tax_level, domination_threshold=.3, metadata_group=NULL, any_taxa_of_interest=FALSE){
taxa_absent <- taxa_of_interest[!taxa_of_interest %in% unique(tax_table(phy_genus)[, tax_level])]
if (length(taxa_absent) > 0) warning(paste("Some", tax_level, "-level taxa of interest are absent from your dataset: ", paste(taxa_absent, collapse = ",", sep = " ")))
print(paste("Aggregating to ", tax_level, "level"))
phyglom <- speedyseq::tax_glom(phy, taxrank = tax_level)
phydf <- phy_to_df(phyglom) %>% mutate(dominated = .abundance >= domination_threshold)
print("Calculating patients per group")
if (length(metadata_group) == 0) {
phydf$group = NA
} else{
phydf$group = phydf[[ metadata_group]]
}
phydf$taxlev = phydf[[tax_level]]
phydf <- phydf %>%
group_by(group) %>%
mutate(n_patients_in_group = n_distinct(PatientID)) %>%
ungroup()
# do an inner join to subset to the taxa of interest
taxdf <- data.frame(taxa_of_interest)
colnames(taxdf) <- tax_level
print("Subsetting to taxa of interest")
phydf_of_interest <- phydf %>% inner_join(taxdf)
print("Calculating cumulative domination; this can take a little while")
if (any_taxa_of_interest){
print("Aggregating domination across all taxa of interest")
phydf_of_interest$taxlev <- "Any"
}
cumulative_domination <- purrr::map(.x = sort(unique(phydf_of_interest$DayRelativeToNearestHCT)), .f = function(x) {
phydf_of_interest %>%
filter(DayRelativeToNearestHCT <= x) %>%
group_by(group, n_patients_in_group, PatientID, taxlev) %>%
summarize(any_dominated = any(dominated), .groups="drop") %>%
group_by(group, taxlev) %>%
summarize(collection_day = x, pct_any_dominated = sum(any_dominated) / n_patients_in_group * 100, .groups="drop") %>% distinct()
}) %>%
bind_rows() %>%
ungroup()
print("generating plot")
p_cumu_dom <- ggplot(cumulative_domination, aes(x = collection_day, y = pct_any_dominated, color = taxlev)) +
geom_point() +
geom_line() +
theme_bw() +
labs(x = "Date of collection", y = "Cumulative Domination", color=tax_level)
if (length(metadata_group) != 0) {
p_cumu_dom <- p_cumu_dom + facet_grid(~group)
}
return(list("df" = cumulative_domination, "plot" = p_cumu_dom))
}
eskape_results <- make_cumu_domination_plot(
phy = phy,
taxa_of_interest = c("Enterococcus", "Staphylococcus", "Klebsiella", "Acinetobacter", "Pseudomonas", "Enterobacter"),
tax_level = "Genus",domination_threshold = .3, any_taxa_of_interest=TRUE
)
eskape_results_ungrouped <- make_cumu_domination_plot(
phy = phy,
taxa_of_interest = c("Enterococcus", "Staphylococcus", "Klebsiella", "Acinetobacter", "Pseudomonas", "Enterobacter"),
tax_level = "Genus",domination_threshold = .3,
)
make_cumu_domination_plot(
phy = phy,
taxa_of_interest = c("Enterococcaceae", "Enterobacteriaceae", "Staphylococcaceae", "Streptococcaceae"),
tax_level = "Family",domination_threshold = .3,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment