Last active
May 2, 2023 13:51
-
-
Save nickp60/6321b365aa6ee5602ef40b856644ef9c to your computer and use it in GitHub Desktop.
Cumulative Microbiome Domination
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(tidyverse) | |
| if (!"speedyseq" %in% installed.packages()) remotes::install_github("mikemc/speedyseq") | |
| library(speedyseq) | |
| download.file("https://figshare.com/ndownloader/files/33076496", "samples.tsv") | |
| #this file has duplicate rows, and has multiple rows per pool | |
| samples <- read.csv("samples.tsv") %>% distinct() %>% | |
| select(-Pool, -Run, -ShotgunBatchID) %>% distinct() | |
| #download.file("https://figshare.com/ndownloader/files/26770931", "metadata.tsv") | |
| #metadata <- read.csv("metadata.tsv") | |
| download.file("https://figshare.com/ndownloader/files/26393788", "asv_counts_w.tsv") | |
| counts <- read.csv("asv_counts_w.tsv") | |
| download.file("https://figshare.com/ndownloader/files/26770997", "taxonomy.tsv") | |
| taxonomy <- read.csv("taxonomy.tsv") | |
| phy <- phyloseq( | |
| sample_data(samples %>% column_to_rownames("SampleID")), | |
| tax_table(taxonomy %>% select(ASV, Kingdom:Genus) %>% column_to_rownames("ASV") %>% as.matrix()), | |
| otu_table(counts %>% pivot_wider(names_from = "SampleID", values_from = "Count", values_fill = 0), taxa_are_rows = TRUE) | |
| ) %>% subset_samples(DayRelativeToNearestHCT > -30 & DayRelativeToNearestHCT < 30) %>% | |
| transform_sample_counts(~./sum(.)) | |
| phy_to_df <- function(phy){ | |
| left_join( | |
| tax_table(phy) %>% as_tibble(), | |
| otu_table(phy) %>% as_tibble(), | |
| ) %>% | |
| left_join( | |
| sample_data(phy) %>% as_tibble() | |
| ) | |
| } | |
| make_cumu_domination_plot <- function(phy, taxa_of_interest, tax_level, domination_threshold=.3, metadata_group=NULL, any_taxa_of_interest=FALSE){ | |
| taxa_absent <- taxa_of_interest[!taxa_of_interest %in% unique(tax_table(phy_genus)[, tax_level])] | |
| if (length(taxa_absent) > 0) warning(paste("Some", tax_level, "-level taxa of interest are absent from your dataset: ", paste(taxa_absent, collapse = ",", sep = " "))) | |
| print(paste("Aggregating to ", tax_level, "level")) | |
| phyglom <- speedyseq::tax_glom(phy, taxrank = tax_level) | |
| phydf <- phy_to_df(phyglom) %>% mutate(dominated = .abundance >= domination_threshold) | |
| print("Calculating patients per group") | |
| if (length(metadata_group) == 0) { | |
| phydf$group = NA | |
| } else{ | |
| phydf$group = phydf[[ metadata_group]] | |
| } | |
| phydf$taxlev = phydf[[tax_level]] | |
| phydf <- phydf %>% | |
| group_by(group) %>% | |
| mutate(n_patients_in_group = n_distinct(PatientID)) %>% | |
| ungroup() | |
| # do an inner join to subset to the taxa of interest | |
| taxdf <- data.frame(taxa_of_interest) | |
| colnames(taxdf) <- tax_level | |
| print("Subsetting to taxa of interest") | |
| phydf_of_interest <- phydf %>% inner_join(taxdf) | |
| print("Calculating cumulative domination; this can take a little while") | |
| if (any_taxa_of_interest){ | |
| print("Aggregating domination across all taxa of interest") | |
| phydf_of_interest$taxlev <- "Any" | |
| } | |
| cumulative_domination <- purrr::map(.x = sort(unique(phydf_of_interest$DayRelativeToNearestHCT)), .f = function(x) { | |
| phydf_of_interest %>% | |
| filter(DayRelativeToNearestHCT <= x) %>% | |
| group_by(group, n_patients_in_group, PatientID, taxlev) %>% | |
| summarize(any_dominated = any(dominated), .groups="drop") %>% | |
| group_by(group, taxlev) %>% | |
| summarize(collection_day = x, pct_any_dominated = sum(any_dominated) / n_patients_in_group * 100, .groups="drop") %>% distinct() | |
| }) %>% | |
| bind_rows() %>% | |
| ungroup() | |
| print("generating plot") | |
| p_cumu_dom <- ggplot(cumulative_domination, aes(x = collection_day, y = pct_any_dominated, color = taxlev)) + | |
| geom_point() + | |
| geom_line() + | |
| theme_bw() + | |
| labs(x = "Date of collection", y = "Cumulative Domination", color=tax_level) | |
| if (length(metadata_group) != 0) { | |
| p_cumu_dom <- p_cumu_dom + facet_grid(~group) | |
| } | |
| return(list("df" = cumulative_domination, "plot" = p_cumu_dom)) | |
| } | |
| eskape_results <- make_cumu_domination_plot( | |
| phy = phy, | |
| taxa_of_interest = c("Enterococcus", "Staphylococcus", "Klebsiella", "Acinetobacter", "Pseudomonas", "Enterobacter"), | |
| tax_level = "Genus",domination_threshold = .3, any_taxa_of_interest=TRUE | |
| ) | |
| eskape_results_ungrouped <- make_cumu_domination_plot( | |
| phy = phy, | |
| taxa_of_interest = c("Enterococcus", "Staphylococcus", "Klebsiella", "Acinetobacter", "Pseudomonas", "Enterobacter"), | |
| tax_level = "Genus",domination_threshold = .3, | |
| ) | |
| make_cumu_domination_plot( | |
| phy = phy, | |
| taxa_of_interest = c("Enterococcaceae", "Enterobacteriaceae", "Staphylococcaceae", "Streptococcaceae"), | |
| tax_level = "Family",domination_threshold = .3, | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment