Created
December 22, 2020 04:46
-
-
Save SamBuckberry/e215b822c5f4694333b4eb723f06dc17 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(magrittr) | |
| library(GenomicRanges) | |
| library(BSgenome.Mmusculus.UCSC.mm10) | |
| library(rtracklayer) | |
| library(reshape2) | |
| library(ggplot2) | |
| library(dplyr) | |
| library(JASPAR2016) | |
| library(TFBSTools) | |
| library(stringr) | |
| source("~/polo_iPSC/R_functions/project_functions.R") | |
| # Get the PWM data from the database and reformat | |
| opts <- list() | |
| opts[["all_versions"]] <- TRUE | |
| opts[["collection"]] <- "CORE" | |
| opts[["matrixtype"]] <- "PFM" | |
| opts[["tax_group"]] <- "vertebrates" | |
| # pwm_data <- getMatrixSet(JASPAR2014, opts) | |
| # pwm_matrix_list_pfm <- Matrix(pwm_data) | |
| # | |
| # opts <- list() | |
| # opts[["all_versions"]] <- TRUE | |
| # opts[["collection"]] <- "PBM" | |
| # opts[["matrixtype"]] <- "PFM" | |
| # opts[["tax_group"]] <- "vertebrates" | |
| # pwm_data <- getMatrixSet(JASPAR2014, opts) | |
| # pwm_matrix_list_pbm <- Matrix(pwm_data) | |
| #test <- getMatrixByName(JASPAR2016, name=c("T")) | |
| pfmList <- getMatrixSet(JASPAR2016, opts) | |
| pfmList <- Matrix(pfmList) | |
| olig1 <- pfmList$MA0826.1 | |
| # Combine databases | |
| pwm_matrix_list <- c(pwm_matrix_list_pfm, pwm_matrix_list_pbm) | |
| save(all_pwm, file = "~/polo_iPSC/resources/pwm_matrix_list.Rda") | |
| load(file = "~/polo_iPSC/resources/pwm_matrix_list.Rda") | |
| ### Set the clusters into 4 groups | |
| c1 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_1.bed") | |
| c2 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_2.bed") | |
| c3 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_3.bed") | |
| c4 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_4.bed") | |
| c5 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_5.bed") | |
| c6 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_6.bed") | |
| c7 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_7.bed") | |
| c8 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_8.bed") | |
| early <- c5 %>% gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/early.bed") | |
| late <- GRangesList(c3, c7) %>% unlist() %>% | |
| gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/late.bed") | |
| trans <- GRangesList(c1, c6) %>% unlist() %>% | |
| gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/trans.bed") | |
| loss <- GRangesList(c2, c4) %>% unlist() %>% | |
| gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/loss.bed") | |
| stable <- c8 %>% gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/stable.bed") | |
| # Get mofif PWMs | |
| #os <- pwm_matrix_list$MA0142.1 | |
| #klf4 <- pwm_matrix_list$MA0039.2 | |
| zfp691 <- pwm_matrix_list$PB0098.1 | |
| olig1 <- pfmList$MA0826.1 | |
| ctcf <- pfmList$MA0139.1 | |
| klf4 <- pfmList$MA0039.2 | |
| os <- pfmList$MA0142.1 | |
| brachyury <- pfmList$MA0009.2 | |
| eomes <- pfmList$MA0800.1 | |
| meox1 <- pfmList$MA0661.1 | |
| nrf1 <- pfmList$MA0506.1 | |
| tead4 <- pfmList$MA0809.1 | |
| klf5 <- pfmList$MA0599.1 | |
| yy1 <- pfmList$MA0095.2 | |
| sp1 <- pfmList$MA0079.2 | |
| tcf4 <- pfmList$MA0830.1 | |
| myc <- pfmList$MA0147.2 | |
| ap1 <- pfmList$MA0099.1 | |
| nfy <- pfmList$MA0060.1 | |
| # Set bed file for footprinting | |
| early_bed <- "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/early.bed" | |
| late_bed <- "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/late.bed" | |
| trans_bed <- "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/trans.bed" | |
| loss_bed <- "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/loss.bed" | |
| stable_bed <- "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/stable.bed" | |
| ips_occ <- "/Volumes/Datasets/atac_iPSC_combined_replicates.occ.bigwig" | |
| d9_occ <- "/Volumes/Datasets/atac_d9_combined_replicates.occ.bigwig" | |
| mef_occ <- "/Volumes/Datasets/atac_MEF_combined_replicates.occ.bigwig" | |
| # PLot footprint series function | |
| plot_footprint_series <- function(pwm, bed_file, occ_bigwig, min.score="75%", title=""){ | |
| ### ATAC-seq data import | |
| ins_fls <- list.files("/Volumes/Datasets/", | |
| pattern = "_combined_replicates.ins.bigwig", | |
| full.names = TRUE)[c(6,2:4,1,5)] | |
| norm_factors <- read.table("~/polo_iPSC/ATACseq/mapping_stats.txt")[c(6,2:4,1,5), 6] | |
| atac_bias <- "/Volumes/Datasets/mm10_bias.Scores.bigwig" | |
| # NucleoATAC | |
| nuc_fls <- list.files("/Volumes/Datasets/", | |
| pattern = ".nucleoatac_signal.bigwig", | |
| full.names = TRUE)[c(6,2:4,1,5)] | |
| #occ_bigwig <- "/Volumes/Datasets/atac_iPSC_combined_replicates.occ.bigwig" | |
| ### Get regions with PWM match | |
| regions <- read_bed(bed_file) | |
| regions_pwm <- motif_gr(regions, pwm = pwm, min.score = min.score) | |
| # Split by nucleosome occupancy class | |
| occ <- range_summary(bigwig = occ_bigwig, | |
| gr = regions_pwm, range = 20, | |
| log = FALSE, aggregate = FALSE) | |
| occ_means <- rowMeans(occ) | |
| set_occ_class <- function(x){ | |
| dat <- NULL | |
| if (x < 0.1) { | |
| dat <- "A" | |
| } else if (x >= 0.1 & x < 0.3) { | |
| dat <- "B" | |
| } else if (x >= 0.3 & x < 0.5) { | |
| dat <- "C" | |
| } else if (x > 0.5) { | |
| dat <- "D" | |
| } | |
| return(dat) | |
| } | |
| occ_class <- lapply(occ_means, set_occ_class) %>% unlist() | |
| ## Calc ins / bias | |
| calc_ins_over_bias <- function(x, bias, occ_bigwig, gr, range=150){ | |
| # Get the bigwig file and lib size | |
| bigwig <- ins_fls[x] | |
| lib_size <- norm_factors[x] / 1e6 | |
| # Extract ATAC signal | |
| ins <- range_summary(bigwig = bigwig, gr = gr, range = range, | |
| log = FALSE, aggregate = FALSE) | |
| # Count normalise | |
| ins <- ins / lib_size | |
| # Get signal bias | |
| bias_scores <- range_summary(bigwig = bias, gr = gr, range = range, | |
| log = TRUE, aggregate = FALSE) | |
| ## INSERTION / BIAS calc on individual positions | |
| # Normalise | |
| ins_over_bias <- ins / bias_scores | |
| ins_over_bias$class <- occ_class | |
| dat_melt <- melt(ins_over_bias) | |
| dat_mean <- dat_melt %>% | |
| group_by(variable, class) %>% | |
| summarise(yy=mean(value)) | |
| # ######## AGGREGATE insertions / bias | |
| # # Average for each class for ins and bias seperately calculation | |
| # ins$class <- occ_class | |
| # dat_melt <- melt(ins) | |
| # dat_mean <- dat_melt %>% | |
| # group_by(variable, class) %>% | |
| # summarise(yy=mean(value)) | |
| # | |
| # bias_scores$class <- occ_class | |
| # bias_melt <- melt(bias_scores) | |
| # bias_mean <- bias_melt %>% | |
| # group_by(variable, class) %>% | |
| # summarise(yy=mean(value)) | |
| # | |
| # all(dat_mean$class == bias_mean$class) | |
| # all(dat_mean$variable == bias_mean$variable) | |
| # | |
| # dat_mean$yy <- dat_mean$yy / bias_mean$yy | |
| ###### | |
| # Mean calculation for each postion | |
| #agg <- colMeans(ins_over_bias) | |
| # Reformat | |
| #df <- data.frame(x=-range:range, y=agg) | |
| # Get the sample ID | |
| id <- basename(bigwig) %>% str_split(pattern = "_") %>% unlist() | |
| #df$id <- id[2] | |
| dat_mean$id <- id[2] | |
| dat_mean$variable <- as.character(dat_mean$variable) %>% as.numeric() | |
| return(dat_mean) | |
| } | |
| footprints <- lapply(1:length(ins_fls), calc_ins_over_bias, | |
| bias=atac_bias, gr=regions_pwm, | |
| occ_bigwig=occ_bigwig) | |
| footprints <- do.call(rbind, footprints) | |
| footprints$id <- factor(footprints$id, levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
| # Set the title with group sizes | |
| sz <- table(occ_class) | |
| sz <- str_c(names(sz), "=", sz) %>% | |
| paste(sep="", collapse=", ") | |
| main <- str_c(title, sz, sep = " ") | |
| gg <- ggplot(footprints, aes(x = variable, y = yy, fill = 'blue')) + | |
| #stat_smooth(span = 10) + | |
| #geom_area(aes(fill = 'blue', stat = "bin")) + | |
| geom_line(size=0.2) + | |
| facet_grid(id~class) + | |
| xlab("Distance from motif") + | |
| ylab("Normalised insertions / insertion bias") + | |
| ggtitle(main) + | |
| theme_bw() + | |
| theme(plot.background = element_blank(), | |
| panel.border = element_blank(), | |
| strip.background = element_rect(size = 0), | |
| panel.grid.minor = element_blank(), | |
| axis.line = element_line(), | |
| axis.text = element_text(size=8)) | |
| return(gg) | |
| } | |
| # Early footprints -------------------------------------------------------- | |
| pdf("~/polo_iPSC/ATACseq/plots/footprinting/early_cluster_footprint_series.pdf", width = 5, height = 5) | |
| plot_footprint_series(pwm = os, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "Oct4-Sox2") | |
| plot_footprint_series(pwm = klf4, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "Klf4") | |
| plot_footprint_series(pwm = ctcf, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "CTCF") | |
| plot_footprint_series(pwm = tead4, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "TEAD4") | |
| plot_footprint_series(pwm = tcf4, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "Tcf4") | |
| dev.off() | |
| pdf("~/polo_iPSC/ATACseq/plots/footprinting/stable_cluster_footprint_series.pdf") | |
| plot_footprint_series(pwm = os, bed_file = stable_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "Oct4-Sox2") | |
| plot_footprint_series(pwm = klf4, bed_file = stable_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "Klf4") | |
| plot_footprint_series(pwm = ctcf, bed_file = stable_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "CTCF") | |
| plot_footprint_series(pwm = sp1, bed_file = stable_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "SP1") | |
| dev.off() | |
| pdf("~/polo_iPSC/ATACseq/plots/footprinting/late_cluster_footprint_series.pdf") | |
| plot_footprint_series(pwm = yy1, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "Yy1") | |
| plot_footprint_series(pwm = nfy, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "80%", title = "Nfy") | |
| dev.off() | |
| pdf("~/polo_iPSC/ATACseq/plots/footprinting/transient_cluster_footprint_series.pdf") | |
| plot_footprint_series(pwm = os, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "75%", title = "Oct4-Sox2") | |
| plot_footprint_series(pwm = klf4, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "85%", title = "Klf4") | |
| plot_footprint_series(pwm = olig1, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "85%", title = "Olig1") | |
| plot_footprint_series(pwm = ctcf, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "85%", title = "CTCF") | |
| plot_footprint_series(pwm = brachyury, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "85%", title = "T (Brachyury)") | |
| plot_footprint_series(pwm = eomes, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "85%", title = "Eomes") | |
| plot_footprint_series(pwm = nrf1, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "85%", title = "Nrf1") | |
| plot_footprint_series(pwm = tead4, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "85%", title = "Tead4") | |
| plot_footprint_series(pwm = klf5, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "85%", title = "Klf5") | |
| plot_footprint_series(pwm = myc, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "85%", title = "c-Myc") | |
| plot_footprint_series(pwm = ap1, bed_file = trans_bed, occ_bigwig = d9_occ, | |
| min.score = "85%", title = "AP-1") | |
| dev.off() | |
| plot_footprint_series(pwm = olig1, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "Olig1") | |
| plot_footprint_series(pwm = ctcf, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "CTCF") | |
| plot_footprint_series(pwm = brachyury, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "T (Brachyury)") | |
| plot_footprint_series(pwm = eomes, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "Eomes") | |
| plot_footprint_series(pwm = nrf1, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "Nrf1") | |
| plot_footprint_series(pwm = tead4, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "Tead4") | |
| plot_footprint_series(pwm = klf5, bed_file = early_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "Klf5") | |
| dev.off() | |
| # Late footprints --------------------------------------------------------- | |
| pdf("~/polo_iPSC/ATACseq/plots/footprinting/late_cluster_footprint_series.pdf") | |
| plot_footprint_series(pwm = os, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "Oct4-Sox2") | |
| plot_footprint_series(pwm = klf4, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "Klf4") | |
| plot_footprint_series(pwm = olig1, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "Olig1") | |
| plot_footprint_series(pwm = ctcf, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "CTCF") | |
| plot_footprint_series(pwm = brachyury, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "T (Brachyury)") | |
| plot_footprint_series(pwm = eomes, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "Eomes") | |
| plot_footprint_series(pwm = nrf1, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "75%", title = "Nrf1") | |
| plot_footprint_series(pwm = tead4, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "Tead4") | |
| plot_footprint_series(pwm = klf5, bed_file = late_bed, occ_bigwig = ips_occ, | |
| min.score = "85%", title = "Klf5") | |
| dev.off() | |
| # Transient footprints ---------------------------------------------------- | |
| # Loss footprints | |
| pdf("~/polo_iPSC/ATACseq/plots/footprinting/loss_cluster_footprint_series.pdf") | |
| plot_footprint_series(pwm = meox1, bed_file = loss_bed, occ_bigwig = mef_occ, | |
| min.score = "85%", title = "Meox1") | |
| plot_footprint_series(pwm = ap1, bed_file = loss_bed, occ_bigwig = mef_occ, | |
| min.score = "85%", title = "AP1") | |
| dev.off() | |
| # Plot footprint heatmaps ------------------------------------------------- | |
| calc_position_average <- function(bigwig_list, gr, name_list, pwm, range=500, | |
| min.score="75%"){ | |
| # Set the | |
| gr <- motif_gr(gr = gr, pwm = pwm, min.score = min.score) | |
| range_average <- function(x){ | |
| df <- range_summary(bigwig = bigwig_list[x], gr = gr, range = range, | |
| log = FALSE, aggregate = FALSE) %>% melt() | |
| df$variable <- as.character(df$variable) | |
| df <- df %>% dplyr::group_by(variable) %>% dplyr::summarise(yy=mean(value)) | |
| df$variable <- as.character(df$variable) %>% as.numeric() | |
| df$id <- name_list[x] | |
| return(df) | |
| } | |
| dat <- lapply(1:length(bigwig_list), range_average) | |
| dat <- do.call(rbind, dat) | |
| return(dat) | |
| } | |
| ## Heatmap footprints | |
| name_list <- c("MEF", "d3", "d6", "d9", "d12", "iPSC") | |
| heatmap_footprints <- function(bigwig_list, gr, name_list, pwm, title="", range=500, | |
| min.score="75%", heat_cols=topo.colors(100)){ | |
| df <- calc_position_average(bigwig_list = bigwig_list, gr = gr, | |
| name_list = name_list, pwm = pwm, range = range, | |
| min.score = min.score) | |
| df$id <- factor(df$id, levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
| colnames(df) <- c("variable", "Density", "id") | |
| gg <- ggplot(df, aes(x = variable, y = 1, fill = Density)) + | |
| facet_grid(id~.) + | |
| geom_line() + | |
| geom_raster(interpolate=TRUE) + | |
| scale_fill_gradientn(colours = heat_cols) + | |
| ylab("") + | |
| xlab("Distance to motif") + | |
| ggtitle(title) + | |
| theme_bw() + | |
| scale_x_continuous(expand = c(0, 0)) + | |
| scale_y_continuous(expand = c(0, 0)) + | |
| theme(axis.text = element_text(size=8), | |
| axis.text.y = element_blank(), | |
| axis.ticks.y = element_blank(), | |
| panel.grid = element_blank(), | |
| #panel.border = element_blank(), | |
| strip.text.y = element_text(angle = 0), | |
| strip.background = element_rect(fill = "white", | |
| color = "white")) | |
| gg | |
| } | |
| heatmap_normalised_insertions <- function(bigwig_list, gr, name_list, pwm, | |
| heat_cols=topo.colors(100), | |
| title="", range=range, min.score="75%"){ | |
| calc_normalised_average <- function(bigwig_list, gr, name_list, pwm, | |
| range=500, min.score="75%"){ | |
| # Set the | |
| gr <- motif_gr(gr = gr, pwm = pwm, min.score = min.score) | |
| range_average <- function(x){ | |
| df <- range_summary(bigwig = bigwig_list[x], gr = gr, range = range, | |
| log = FALSE, aggregate = FALSE) %>% melt() | |
| df <- df %>% group_by(variable) %>% summarise(yy=mean(value)) | |
| df$variable <- as.character(df$variable) %>% as.numeric() | |
| df$id <- name_list[x] | |
| # Normalise insertion count by library size | |
| norm_factors <- read.table("ATACseq/mapping_stats.txt")[c(6,2:4,1,5), 6] | |
| lib_size <- norm_factors[x] / 1e6 | |
| df$yy <- df$yy / lib_size | |
| return(df) | |
| } | |
| dat <- lapply(1:length(bigwig_list), range_average) | |
| dat <- do.call(rbind, dat) | |
| return(dat) | |
| } | |
| df <- calc_normalised_average(bigwig_list = bigwig_list, gr = gr, | |
| name_list = name_list, pwm = pwm, range = range, | |
| min.score = min.score) | |
| df$id <- factor(df$id, levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
| colnames(df) <- c("variable", "Density", "id") | |
| gg <- ggplot(df, aes(x = variable, y = 1, fill = Density)) + | |
| facet_grid(id~.) + | |
| #geom_line() + | |
| geom_raster(interpolate=TRUE) + | |
| scale_fill_gradientn(colours = heat_cols) + | |
| ylab("") + | |
| xlab("Distance to motif") + | |
| ggtitle(title) + | |
| theme_bw() + | |
| theme(axis.text = element_text(size=8), | |
| axis.text.y = element_blank(), | |
| axis.ticks.y = element_blank(), | |
| panel.grid = element_blank(), | |
| panel.border = element_blank(), | |
| panel.spacing = unit(0, "lines"), | |
| strip.text.y = element_text(angle = 0), | |
| strip.background = element_rect(fill = "white", | |
| color = "white")) | |
| gg | |
| } | |
| calc_meth_average <- function(bigwig_list, gr, name_list, pwm, range=500, | |
| min.score="75%"){ | |
| # Set the | |
| gr <- motif_gr(gr = gr, pwm = pwm, min.score = min.score) | |
| range_average <- function(x){ | |
| df <- range_summary_meth(bigwig = bigwig_list[x], gr = gr, | |
| range = range, log = FALSE, | |
| aggregate = FALSE) %>% melt() | |
| ## Mean with NA's removed | |
| df <- df %>% group_by(variable) %>% | |
| summarise(yy=mean(value, na.rm = TRUE)) | |
| df$variable <- as.character(df$variable) %>% as.numeric() | |
| df$id <- name_list[x] | |
| return(df) | |
| } | |
| dat <- lapply(1:length(bigwig_list), range_average) | |
| dat <- do.call(rbind, dat) | |
| return(dat) | |
| } | |
| heatmap_footprints_methylation <- function(bigwig_list, gr, name_list, pwm, title="", range=500, | |
| min.score="75%", heat_cols=topo.colors(100)){ | |
| df <- calc_meth_average(bigwig_list = bigwig_list, gr = gr, | |
| name_list = name_list, pwm = pwm, range = range, | |
| min.score = min.score) | |
| df$id <- factor(df$id, levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
| colnames(df) <- c("variable", "Density", "id") | |
| gg <- ggplot(df, aes(x = variable, y = 1, fill = Density)) + | |
| facet_grid(id~.) + | |
| #geom_line() + | |
| geom_raster(interpolate=TRUE) + | |
| scale_fill_gradientn(colours = heat_cols) + | |
| scale_x_continuous(expand = c(0, 0)) + | |
| scale_y_continuous(expand = c(0, 0)) + | |
| ylab("") + | |
| xlab("Distance to motif") + | |
| ggtitle(title) + | |
| theme_bw() + | |
| theme(axis.text = element_text(size=8), | |
| axis.text.y = element_blank(), | |
| axis.ticks.y = element_blank(), | |
| panel.grid = element_blank(), | |
| #panel.border = element_blank(), | |
| strip.text.y = element_text(angle = 0), | |
| strip.background = element_rect(fill = "white", | |
| color = "white")) | |
| gg | |
| } | |
| early <- bed_to_gr("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/early.bed") | |
| trans <- bed_to_gr("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/trans.bed") | |
| late <- bed_to_gr("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/late.bed") | |
| loss <- bed_to_gr("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/loss.bed") | |
| stable <- bed_to_gr("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/stable.bed") | |
| # ChIP-seq | |
| oct_fls <- list.files(path = "/Volumes/Datasets", pattern = "_oct_subtract.bw", | |
| full.names = TRUE)[c(10,2,4,6,1,9)] | |
| sox_fls <- list.files(path = "/Volumes/Datasets", pattern = "_sox_subtract.bw", | |
| full.names = TRUE)[c(10,3,5,6,1,9)] | |
| # ATAC-seq | |
| nuc_fls <- list.files("/Volumes/Datasets/", | |
| pattern = ".nucleoatac_signal.bigwig", | |
| full.names = TRUE)[c(6,2:4,1,5)] | |
| occ_fls <- list.files("/Volumes/Datasets/", | |
| pattern = "_combined_replicates.occ.bigwig", | |
| full.names = TRUE)[c(6,2:4,1,5)] | |
| ins_fls <- list.files("/Volumes/Datasets/", | |
| pattern = "_combined_replicates.ins.bigwig", | |
| full.names = TRUE)[c(6,2:4,1,5)] | |
| # Methylation files | |
| mc_fls <- list.files("/Volumes/Datasets/", pattern = "mmiPS", full.names = TRUE) | |
| # Generate the plots | |
| heatmap_footprints(bigwig_list = occ_fls, gr = early, range = 750, | |
| title = "Early Oct4-Sox2 Nucleosome occupancy", | |
| name_list = name_list, pwm = os) | |
| heatmap_footprints(bigwig_list = nuc_fls, gr = early, range = 750, | |
| title = "C1 Oct4-Sox2 NucleoATAC signal", | |
| name_list = name_list, pwm = os_pwm) | |
| reds <- colorRampPalette(colors = c('white', 'orange', 'red')) | |
| blues <- colorRampPalette(colors = c('white', 'green', 'blue')) | |
| cool_warm <- colorRampPalette(c('blue4', 'green4', 'yellow1')) | |
| black_red <- colorRampPalette(c('black', 'orange', 'red')) | |
| pdf(file = "~/polo_iPSC/ATACseq/plots/footprinting/OS_early_cluster_oct_sox_atac_heatmap.pdf", | |
| height = 2.5, width = 3) | |
| heatmap_normalised_insertions(bigwig_list = ins_fls, gr = early, range = 500, | |
| title = "Early Oct4-Sox2 Tn5 insertions", | |
| name_list = name_list, pwm = os) | |
| heatmap_footprints(bigwig_list = oct_fls, gr = early, name_list = name_list, range = 1000, | |
| heat_cols = reds(25), pwm = os, title = "Early Oct4 ChIP-seq") | |
| heatmap_footprints(bigwig_list = sox_fls, gr = early, name_list = name_list, range = 1000, | |
| heat_cols = reds(25), pwm = os, title = "Early Sox2 ChIP-seq") | |
| heatmap_footprints_methylation(bigwig_list = mc_fls, gr = early, name_list = name_list, range = 4000, | |
| heat_cols = black_red(25), pwm = os, title = "Early DNA methylation") | |
| dev.off() | |
| heatmap_footprints_methylation(bigwig_list = mc_fls, gr = early, name_list = name_list, range = 2000, | |
| heat_cols = reds(25), pwm = os, title = "C1 DNA methylation") | |
| pdf(file = "~/polo_iPSC/ATACseq/plots/footprinting/OS_early_cluster_insertions_heatmap.pdf", | |
| height = 2, width = 3) | |
| heatmap_normalised_insertions(bigwig_list = ins_fls, gr = early, range = 500, | |
| title = "Early Oct4-Sox2 Tn5 insertions", | |
| name_list = name_list, pwm = os) | |
| heatmap_normalised_insertions(bigwig_list = ins_fls, gr = stable, range = 500, | |
| title = "Stable Oct4-Sox2 Tn5 insertions", | |
| name_list = name_list, pwm = os) | |
| heatmap_normalised_insertions(bigwig_list = ins_fls, gr = stable, range = 800, | |
| title = "Stable CTCF Tn5 insertions", | |
| name_list = name_list, pwm = ctcf) | |
| heatma | |
| dev.off() | |
| heatmap_normalised_insertions(bigwig_list = ins_fls, gr = early, range = 500, | |
| title = "C1 Klf4 Tn5 insertions", | |
| name_list = name_list, pwm = klf4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment