Created
December 22, 2020 04:48
-
-
Save SamBuckberry/ee5c88c390cc6cbb75a52fb64b62d83f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(magrittr) | |
| library(GenomicRanges) | |
| library(BSgenome.Mmusculus.UCSC.mm10) | |
| library(rtracklayer) | |
| library(reshape2) | |
| library(ggplot2) | |
| library(dplyr) | |
| library(JASPAR2016) | |
| library(TFBSTools) | |
| library(stringr) | |
| source("~/polo_iPSC/R_functions/project_functions.R") | |
| # Get the PWM data from the database and reformat | |
| opts <- list() | |
| opts[["all_versions"]] <- TRUE | |
| opts[["collection"]] <- "CORE" | |
| opts[["matrixtype"]] <- "PFM" | |
| opts[["tax_group"]] <- "vertebrates" | |
| # pwm_data <- getMatrixSet(JASPAR2014, opts) | |
| # pwm_matrix_list_pfm <- Matrix(pwm_data) | |
| # | |
| # opts <- list() | |
| # opts[["all_versions"]] <- TRUE | |
| # opts[["collection"]] <- "PBM" | |
| # opts[["matrixtype"]] <- "PFM" | |
| # opts[["tax_group"]] <- "vertebrates" | |
| # pwm_data <- getMatrixSet(JASPAR2014, opts) | |
| # pwm_matrix_list_pbm <- Matrix(pwm_data) | |
| #test <- getMatrixByName(JASPAR2016, name=c("T")) | |
| pfmList <- getMatrixSet(JASPAR2016, opts) | |
| #pfmList <- Matrix(pfmList) | |
| tf_name <- pfmList@listData$MA0002.1@name | |
| tf <- pfmList[names(pfmList) == "MA0002.1"] | |
| save(pfmList, file = "~/polo_iPSC/resources/pfmList.Rda") | |
| load(file = "~/polo_iPSC/resources/pfmList.Rda") | |
| ### Set the clusters into 4 groups | |
| c1 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_1.bed") | |
| c2 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_2.bed") | |
| c3 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_3.bed") | |
| c4 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_4.bed") | |
| c5 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_5.bed") | |
| c6 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_6.bed") | |
| c7 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_7.bed") | |
| c8 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_8.bed") | |
| early <- c5 %>% gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/early.bed") | |
| late <- GRangesList(c3, c7) %>% unlist() %>% | |
| gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/late.bed") | |
| trans <- GRangesList(c1, c6) %>% unlist() %>% | |
| gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/trans.bed") | |
| loss <- GRangesList(c2, c4) %>% unlist() %>% | |
| gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/loss.bed") | |
| stable <- c8 %>% gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/stable.bed") | |
| # Get mofif PWMs | |
| olig1 <- pfmList$MA0826.1 | |
| ctcf <- pfmList$MA0139.1 | |
| klf4 <- pfmList$MA0039.2 | |
| os <- pfmList$MA0142.1 | |
| brachyury <- pfmList$MA0009.2 | |
| eomes <- pfmList$MA0800.1 | |
| meox1 <- pfmList$MA0661.1 | |
| nrf1 <- pfmList$MA0506.1 | |
| tead4 <- pfmList$MA0809.1 | |
| klf5 <- pfmList$MA0599.1 | |
| yy1 <- pfmList$MA0095.2 | |
| sp1 <- pfmList$MA0079.2 | |
| tcf4 <- pfmList$MA0830.1 | |
| myc <- pfmList$MA0147.2 | |
| ap1 <- pfmList$MA0099.1 | |
| nfy <- pfmList$MA0060.1 | |
| ## Insertion files | |
| ins_fls <- list.files("/Volumes/Datasets/", | |
| pattern = "_combined_replicates.ins.bigwig", | |
| full.names = TRUE)[c(6,2:4,1,5)] | |
| mC_fls <- list.files("/Volumes/Datasets/", | |
| pattern = ".CG.level.unstranded.bigwig", | |
| full.names = TRUE) | |
| ## Library sizes | |
| lib_sizes <- read.table("~/polo_iPSC/ATACseq/mapping_stats.txt")[c(6,2:4,1,5), ] | |
| ####****** Make sure ins_fls and lib sizes are in correct order*******########### | |
| ## Cluster bed files | |
| cluster_beds <- c("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_1.bed", | |
| "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_2.bed", | |
| "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_3.bed", | |
| "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_4.bed", | |
| "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_5.bed", | |
| "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_6.bed", | |
| "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_7.bed", | |
| "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_8.bed") | |
| ## Nucleosome occupancy files | |
| ips_occ <- "/Volumes/Datasets/atac_iPSC_combined_replicates.occ.bigwig" | |
| d9_occ <- "/Volumes/Datasets/atac_d9_combined_replicates.occ.bigwig" | |
| mef_occ <- "/Volumes/Datasets/atac_MEF_combined_replicates.occ.bigwig" | |
| ## Bias track | |
| bias_bw <- "/Volumes/Datasets/mm10_bias.Scores.bigwig" | |
| ### Footprint analysis function | |
| cluster <- 5 | |
| pwm <- pfmList$MA0139.1 | |
| occ_bigwig <- ips_occ | |
| title <- "test" | |
| min.score="85%" | |
| width=45 | |
| mC_bw <- "/Volumes/Datasets/mmiPS_1__0Day.CG.level.unstranded.bigwig" | |
| run_footprint <- function(pwm, cluster, occ_bigwig, title="", | |
| min.score="85%", width=300){ | |
| # Get the cluster bed file to GRanges | |
| cluster_gr <- bed_to_gr(cluster_beds[cluster]) | |
| # Find the motif positions in the ranges | |
| regions_pwm <- motif_gr(gr = cluster_gr, pwm = pwm, | |
| min.score = min.score) | |
| # Calculate footprint metrics | |
| calc_footprint <- function(ins_bw, bias_bw, mC_bw, regions_pwm, occ_bigwig, | |
| width=width, lib_size=1){ | |
| # Split by nucleosome occupancy class | |
| occ <- range_summary(bigwig = occ_bigwig, | |
| gr = regions_pwm, range = 20, | |
| log = FALSE, aggregate = FALSE) | |
| occ_means <- rowMeans(occ) | |
| set_occ_class <- function(x){ | |
| dat <- NULL | |
| if (x < 0.1) { | |
| dat <- "A" | |
| } else if (x >= 0.1 & x < 0.3) { | |
| dat <- "B" | |
| } else if (x >= 0.3 & x < 0.5) { | |
| dat <- "C" | |
| } else if (x > 0.5) { | |
| dat <- "D" | |
| } | |
| return(dat) | |
| } | |
| occ_class <- lapply(occ_means, set_occ_class) %>% unlist() | |
| # Get data frame of region and class for export | |
| loci_id <- str_c(seqnames(regions_pwm), start(regions_pwm), sep = ":") %>% | |
| str_c(end(regions_pwm), sep = "-") | |
| region_class <- data.frame(loci=loci_id, occ_class=occ_class) | |
| ## Scale 0-1 function | |
| range01 <- function(x){(x-min(x))/(max(x)-min(x))} | |
| ## Calculate footprint stats for each class | |
| calc_range_summary <- function(class="A"){ | |
| # insertion sumary | |
| ins <- range_summary(bigwig = ins_bw, | |
| gr = regions_pwm[occ_class == class], | |
| range = width, log = FALSE, aggregate = FALSE) | |
| # Bias summary | |
| bias <- range_summary(bigwig = bias_bw, | |
| gr = regions_pwm[occ_class == class], | |
| range = width, log = TRUE, aggregate = FALSE) | |
| mC <- range_summary_meth(bigwig = mC_bw, | |
| gr = regions_pwm[occ_class == class], | |
| range = width, log = FALSE, aggregate = FALSE) | |
| mC_mean <- colMeans(mC, na.rm = TRUE) | |
| # Bias correction | |
| ins_over_bias <- ins / bias | |
| ins_over_bias_cpm <- (ins / lib_size) / bias | |
| df <- data.frame(position=as.numeric(colnames(ins)), | |
| signal=colSums(ins), | |
| bias=colSums(bias), | |
| sob=colSums(ins_over_bias), | |
| sob_cpm=colSums(ins_over_bias_cpm), | |
| sob_scaled=range01(colSums(ins_over_bias) / lib_size), | |
| bias_scaled=range01(colSums(bias)), | |
| class=class, | |
| mCG_mean=mC_mean) | |
| return(df) | |
| } | |
| dat <- lapply(X = unique(occ_class), calc_range_summary) | |
| dat <- do.call(rbind, dat) | |
| ## Calculate the footprint and flank average scores | |
| motif_flank <- round(x = ncol(pwm) / 2, digits = 0) - 2 | |
| dat$motif_flank <- NA | |
| dat$motif_flank[dat$position <= -motif_flank] <- "UP" | |
| dat$motif_flank[dat$position >= motif_flank] <- "DOWN" | |
| dat$motif_flank[abs(dat$position) < motif_flank] <- "MOTIF" | |
| calc_fp_scores <- function(class){ | |
| ## Calculate footprint quality 0-1 | |
| flank_up_avg <- dat$sob_scaled[dat$motif_flank == "UP" & | |
| dat$class == class] %>% mean() | |
| flank_down_avg <- dat$sob_scaled[dat$motif_flank == "DOWN" & | |
| dat$class == class] %>% mean() | |
| flank_avg <- (flank_up_avg + flank_down_avg) / 2 | |
| motif_avg <- dat$sob_scaled[dat$motif_flank == "MOTIF" & | |
| dat$class == class] %>% mean() | |
| fp_score <- flank_avg - motif_avg | |
| ## Calculate fp to flank ration | |
| flank_unscaled_up_avg <- dat$sob[dat$motif_flank == "UP" & | |
| dat$class == class] %>% mean() | |
| flank_unscaled_down_avg <- dat$sob[dat$motif_flank == "DOWN" & | |
| dat$class == class] %>% mean() | |
| flank_unscaled_avg <- (flank_unscaled_up_avg + flank_unscaled_down_avg) / 2 | |
| motif_unscaled_avg <- dat$sob[dat$motif_flank == "MOTIF" & | |
| dat$class == class] %>% mean() | |
| fp_ratio <- flank_unscaled_avg / motif_unscaled_avg | |
| fp_diff <- flank_unscaled_avg - motif_unscaled_avg | |
| # Number of reagions per class | |
| n_regions <- sum(occ_class == class) | |
| return(c(class=class, flank_avg=flank_avg, | |
| motif_avg=motif_avg, fp_score=fp_score, | |
| flank_unscaled_avg=flank_unscaled_avg, | |
| motif_unscaled_avg=motif_unscaled_avg, | |
| fp_ratio=fp_ratio, | |
| fp_diff=fp_diff, | |
| n_regions=n_regions)) | |
| } | |
| fp_scores <- lapply(unique(occ_class), calc_fp_scores) | |
| fp_scores <- do.call(rbind, fp_scores) %>% data.frame() | |
| gg <- ggplot(dat, aes(x = position, y = sob, group=class)) + | |
| geom_line() + | |
| facet_grid(class~.) | |
| return(list(fp_scores=fp_scores, dat=dat, | |
| region_class=region_class, | |
| ins_bw=ins_bw, ggplot=gg)) | |
| } | |
| calc_series <- function(x, regions_pwm, occ_bigwig){ | |
| series_dat <- lapply(ins_fls[x], FUN = calc_footprint, | |
| mC_bw = mC_fls[x], | |
| regions_pwm = regions_pwm, | |
| occ_bigwig = occ_bigwig, | |
| bias_bw = bias_bw, width = width, | |
| lib_size=lib_sizes$records[x]) | |
| return(series_dat) | |
| } | |
| series <- lapply(1:length(ins_fls), FUN = calc_series, | |
| regions_pwm=regions_pwm, occ_bigwig=occ_bigwig) | |
| names(series) <- c("MEF", "d3", "d6", "d9", "d12", "iPSC") | |
| ## Summaries data series | |
| summarise_series <- function(x){ | |
| d1 <- series[x][[1]] | |
| d1 <- d1[[1]]$dat | |
| d1$timepoint <- names(series[x]) | |
| return(d1) | |
| } | |
| series_summary <- lapply(1:length(series), summarise_series) | |
| series_summary <- do.call(rbind, series_summary) | |
| ## Get occupancy class numbers | |
| occ_table <- table(series$MEF[[1]]$region_class$occ_class) %>% | |
| data.frame() | |
| ## Summaries TF scores | |
| summarise_score <- function(x){ | |
| d1 <- series[x][[1]] | |
| d1 <- d1[[1]]$fp_scores | |
| d1$timepoint <- names(series[x]) | |
| return(d1) | |
| } | |
| score_summary <- lapply(1:length(series), summarise_score) | |
| score_summary <- do.call(rbind, score_summary) | |
| ## Generate plots | |
| plot_fp_scores <- function(fp, title){ | |
| get_fp_score <- function(x){ | |
| dat <- fp[x] | |
| dat <- dat[[1]][[1]]$fp_scores | |
| dat <- dat[ ,c(1, 8, 9)] | |
| dat$timepoint <- names(fp)[x] | |
| return(dat) | |
| } | |
| fp_scores <- lapply(1:length(fp), get_fp_score) | |
| fp_scores <- do.call(rbind, fp_scores) | |
| fp_scores$fp_diff <- as.character(fp_scores$fp_diff) %>% | |
| as.numeric() | |
| fp_scores$timepoint <- factor(fp_scores$timepoint, | |
| levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
| gg <- ggplot(fp_scores, aes(x = timepoint, y = fp_diff, | |
| group=class, colour=class)) + | |
| geom_line(size=2) + | |
| ylab("Footprint score") + | |
| xlab("Timepoint") + | |
| theme_bw() + | |
| ggtitle(title) | |
| return(gg) | |
| } | |
| plot_footprints <- function(fp, title){ | |
| # Get the data for each timepoint | |
| get_fp_data <- function(fl_name){ | |
| dat <-fp[names(fp) == fl_name] | |
| dat <- dat[[1]][[1]]$dat | |
| dat$name <- str_replace(fl_name, | |
| pattern = "_combined_replicates.ins.bigwig", | |
| replacement = "") %>% | |
| str_replace(pattern = "atac_", replacement = "") | |
| return(dat) | |
| } | |
| fp_dat <- lapply(names(fp), get_fp_data) | |
| fp_dat <- do.call(rbind, fp_dat) | |
| # set levels fo classes | |
| fp_dat$class <- factor(fp_dat$class, levels=c("A", "B", "C", "D")) | |
| #fp_dat <- fp_dat[fp_dat$class == "C", ] | |
| fp_dat$name <- factor(fp_dat$name, | |
| levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
| gg <- ggplot(fp_dat, aes(x = position, y = sob_cpm, | |
| group = name, colour=class)) + | |
| geom_line() + | |
| facet_grid(name~class) + | |
| ylab("Normalised insertions / bias") + | |
| xlab("Relative position to motif centre") + | |
| theme_bw() + | |
| theme(panel.grid = element_blank()) | |
| ggtitle(title) | |
| return(gg) | |
| } | |
| plot_fp_range <- function(fp, title){ | |
| get_fp_range <- function(x){ | |
| dat <- fp[x] | |
| dat <- dat[[1]][[1]]$fp_scores | |
| dat <- dat[ ,c(1, 5, 6, 9)] | |
| dat$timepoint <- names(fp)[x] | |
| return(dat) | |
| } | |
| fp_range <- lapply(1:length(fp), get_fp_range) | |
| fp_range <- do.call(rbind, fp_range) | |
| fp_range$flank<- as.character(fp_range$flank_unscaled_avg) %>% | |
| as.numeric() | |
| fp_range$motif <- as.character(fp_range$motif_unscaled_avg) %>% | |
| as.numeric() | |
| fp_range$timepoint <- factor(fp_range$timepoint, | |
| levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
| fp_range$counts <- str_c(fp_range$class, "=", fp_range$n_regions) | |
| gg <- ggplot(fp_range, aes(timepoint, ymin = motif, | |
| ymax=flank, colour = counts)) + | |
| #geom_linerange() + | |
| geom_errorbar(width=0.5) + | |
| geom_ribbon(aes(x=timepoint, ymin = motif, ymax=flank, group=class), | |
| fill='grey80', colour=NA) + | |
| geom_errorbar(width=0.5) + | |
| facet_grid(class~.) + | |
| theme_bw() + | |
| xlab("Timepoint") + | |
| ylab("Footprint score") + | |
| theme(panel.grid = element_blank(), | |
| axis.text.x = element_text(angle = 45, vjust = 0.5)) + | |
| ggtitle(title) | |
| return(gg) | |
| } | |
| plot_fp_class_A <- function(fp, title){ | |
| # Get the data for each timepoint | |
| get_fp_data <- function(fl_name){ | |
| dat <-fp[names(fp) == fl_name] | |
| dat <- dat[[1]][[1]]$dat | |
| dat$name <- str_replace(fl_name, | |
| pattern = "_combined_replicates.ins.bigwig", | |
| replacement = "") %>% | |
| str_replace(pattern = "atac_", replacement = "") | |
| return(dat) | |
| } | |
| fp_dat <- lapply(names(fp), get_fp_data) | |
| fp_dat <- do.call(rbind, fp_dat) | |
| fp_dat <- fp_dat[fp_dat$class == "A", ] | |
| fp_dat$name <- factor(fp_dat$name, | |
| levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
| gg <- ggplot(fp_dat, aes(x = position, y = sob_cpm, group = name)) + | |
| geom_line() + | |
| facet_grid(~name) + | |
| ylab("") + | |
| xlab("Position relative to motif centre") + | |
| theme_bw() + | |
| theme(panel.grid = element_blank(), | |
| strip.background = element_blank()) + | |
| ggtitle(str_c(title, " n=", occ_table[occ_table$Var1 == "A", 2])) | |
| return(gg) | |
| } | |
| plot_fp_mc_level <- function(fp, title){ | |
| # Get the data for each timepoint | |
| get_fp_data <- function(fl_name){ | |
| dat <-fp[names(fp) == fl_name] | |
| dat <- dat[[1]][[1]]$dat | |
| dat$name <- str_replace(fl_name, | |
| pattern = "_combined_replicates.ins.bigwig", | |
| replacement = "") %>% | |
| str_replace(pattern = "atac_", replacement = "") | |
| return(dat) | |
| } | |
| fp_dat <- lapply(names(fp), get_fp_data) | |
| fp_dat <- do.call(rbind, fp_dat) | |
| fp_dat <- fp_dat[fp_dat$class == "A", ] | |
| fp_dat$name <- factor(fp_dat$name, | |
| levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
| gg <- ggplot(fp_dat, aes(x = position, y = mCG_mean, group = name)) + | |
| #geom_line() + | |
| geom_point(alpha=0.1) + | |
| #geom_smooth() + | |
| facet_grid(~name) + | |
| ylab("mCG/CG") + | |
| xlab("Position relative to motif centre") + | |
| theme_bw() + | |
| theme(panel.grid = element_blank(), | |
| strip.background = element_blank()) + | |
| ggtitle(str_c(title, " n=", occ_table[occ_table$Var1 == "A", 2])) | |
| return(gg) | |
| } | |
| gg_scores <- plot_fp_scores(fp = series, title = title) | |
| gg_fp <- plot_footprints(fp = series, title = title) | |
| gg_range <- plot_fp_range(fp = series, title = title) | |
| gg_class_A <- plot_fp_class_A(fp = series, title = title) | |
| gg_mc_mean <- plot_fp_mc_level(fp = series, title = title) | |
| ## Output | |
| out <- list(summary=series_summary, scores=score_summary, | |
| occ_class=series$MEF[[1]]$region_class, | |
| gg_scores=gg_scores, gg_fp=gg_fp, gg_mc_mean=gg_mc_mean, | |
| gg_range=gg_range, gg_class_A=gg_class_A) | |
| return(out) | |
| } | |
| # CTCF footprinting ------------------------------------------------------- | |
| ctcf <- pfmList$MA0139.1 | |
| ctcf_c5 <- run_footprint(pwm = ctcf, cluster = 5, occ_bigwig = ips_occ, | |
| title = "C5 CTCF", min.score = "85%") | |
| # NRF1 foot printing ------------------------------------------------------ | |
| nrf1 <- pfmList$MA0506.1 | |
| nrf1_c8 <- run_footprint(pwm = nrf1, cluster = 8, occ_bigwig = ips_occ, | |
| title = "C8 NRF1", min.score = "85%") | |
| nrf1_c8$gg_fp | |
| nrf1_c8$gg_class_A + xlim(-50, 50) | |
| nrf1_c8$gg_mc_mean + | |
| geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| # Oct4-Sox2 footprinting ------------------------------------------------- | |
| os <- pfmList$MA0142.1 | |
| os_c1 <- run_footprint(pwm = os, cluster = 1, occ_bigwig = d9_occ, | |
| title = "C1 Oct4-Sox2", min.score = "75%") | |
| os_c2 <- run_footprint(pwm = os, cluster = 2, occ_bigwig = mef_occ, | |
| title = "C2 Oct4-Sox2", min.score = "75%") | |
| os_c3 <- run_footprint(pwm = os, cluster = 3, occ_bigwig = ips_occ, | |
| title = "C3 Oct4-Sox2", min.score = "75%") | |
| # os_c4 <- run_footprint(pwm = os, cluster = 4, occ_bigwig = mef_occ, | |
| # title = "C4 Oct4-Sox2", min.score = "75%") | |
| os_c5 <- run_footprint(pwm = os, cluster = 5, occ_bigwig = ips_occ, | |
| title = "C5 Oct4-Sox2", min.score = "75%") | |
| os_c6 <- run_footprint(pwm = os, cluster = 6, occ_bigwig = d9_occ, | |
| title = "C6 Oct4-Sox2", min.score = "75%") | |
| os_c7 <- run_footprint(pwm = os, cluster = 7, occ_bigwig = ips_occ, | |
| title = "C7 Oct4-Sox2", min.score = "75%") | |
| os_c8 <- run_footprint(pwm = os, cluster = 8, occ_bigwig = ips_occ, | |
| title = "C8 Oct4-Sox2", min.score = "75%") | |
| os_c1$gg_class_A + xlim(-100, 100) | |
| os_c1$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| os_c5$gg_class_A + xlim(-100, 100) | |
| os_c5$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| os_c6$gg_class_A + xlim(-40, 40) | |
| os_c6$gg_fp | |
| os_c6$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| # Sox2 footprinting ------------------------------------------------------ | |
| # sox2 <- pfmList$MA0143.3 | |
| # | |
| # sox2_c1 <- run_footprint(pwm = sox2, cluster = 1, occ_bigwig = d9_occ, | |
| # title = "C1 Sox2", min.score = "85%") | |
| # sox2_c5 <- run_footprint(pwm = sox2, cluster = 5, occ_bigwig = ips_occ, | |
| # title = "C5 Sox2", min.score = "85%") | |
| # sox2_c6 <- run_footprint(pwm = sox2, cluster = 6, occ_bigwig = d9_occ, | |
| # title = "C6 Sox2", min.score = "85%") | |
| # sox2_c7 <- run_footprint(pwm = sox2, cluster = 7, occ_bigwig = ips_occ, | |
| # title = "C7 Sox2", min.score = "85%") | |
| # sox2_c8 <- run_footprint(pwm = sox2, cluster = 8, occ_bigwig = ips_occ, | |
| # title = "C8 Sox2", min.score = "85%") | |
| # Klf4 foot printing ------------------------------------------------------ | |
| klf4 <- pfmList$MA0039.2 | |
| klf4_c1 <- run_footprint(pwm = klf4, cluster = 1, occ_bigwig = d9_occ, | |
| title = "C1 klf4") | |
| klf4_c2 <- run_footprint(pwm = klf4, cluster = 2, occ_bigwig = mef_occ, | |
| title = "C2 klf4") | |
| klf4_c3 <- run_footprint(pwm = klf4, cluster = 3, occ_bigwig = ips_occ, | |
| title = "C3 klf4") | |
| klf4_c5 <- run_footprint(pwm = klf4, cluster = 5, occ_bigwig = ips_occ, | |
| title = "C5 klf4") | |
| klf4_c6 <- run_footprint(pwm = klf4, cluster = 6, occ_bigwig = d9_occ, | |
| title = "C6 klf4") | |
| klf4_c7 <- run_footprint(pwm = klf4, cluster = 7, occ_bigwig = ips_occ, | |
| title = "C7 klf4") | |
| klf4_c8 <- run_footprint(pwm = klf4, cluster = 8, occ_bigwig = ips_occ, | |
| title = "C8 klf4") | |
| # AP1 foot printing ------------------------------------------------------- | |
| ap1 <- pfmList$MA0099.1 | |
| ap1_c1 <- run_footprint(pwm = ap1, cluster = 1, occ_bigwig = d9_occ, | |
| title = "C1 AP-1") | |
| ap1_c2 <- run_footprint(pwm = ap1, cluster = 2, occ_bigwig = mef_occ, | |
| title = "C2 AP-1") | |
| ap1_c4 <- run_footprint(pwm = ap1, cluster = 4, occ_bigwig = mef_occ, | |
| title = "C4 AP-1") | |
| # Footprint plotting ------------------------------------------------------ | |
| save(ctcf_c5, nrf1_c8, | |
| os_c1, os_c2, os_c3, os_c5, os_c6, os_c7, os_c8, | |
| klf4_c1, klf4_c2, klf4_c3, klf4_c5, klf4_c6, klf4_c7, klf4_c8, | |
| ap1_c1, ap1_c2, ap1_c4, | |
| file = "ATACseq/processed_data/footprint_data.Rdata") | |
| load("ATACseq/processed_data/footprint_data.Rdata") | |
| pdf("ATACseq/plots/footprinting/footprints_all.pdf", width = 6, height = 2) | |
| ctcf_c5$gg_class_A + xlim(-40, 40) | |
| ctcf_c5$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| nrf1_c8$gg_class_A + xlim(-40, 40) | |
| nrf1_c8$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| os_c1$gg_class_A + xlim(-40, 40) | |
| os_c1$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| os_c2$gg_class_A + xlim(-40, 40) | |
| os_c2$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| os_c3$gg_class_A + xlim(-40, 40) | |
| os_c3$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| os_c5$gg_class_A + xlim(-40, 40) | |
| os_c5$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| os_c6$gg_class_A + xlim(-40, 40) | |
| os_c6$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| os_c7$gg_class_A + xlim(-40, 40) | |
| os_c7$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| os_c8$gg_class_A + xlim(-40, 40) | |
| os_c8$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| klf4_c1$gg_class_A + xlim(-40, 40) | |
| klf4_c1$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| klf4_c2$gg_class_A + xlim(-40, 40) | |
| klf4_c2$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| klf4_c3$gg_class_A + xlim(-40, 40) | |
| klf4_c3$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| klf4_c5$gg_class_A + xlim(-40, 40) | |
| klf4_c5$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| klf4_c6$gg_class_A + xlim(-40, 40) | |
| klf4_c6$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| klf4_c7$gg_class_A + xlim(-40, 40) | |
| klf4_c7$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| klf4_c8$gg_class_A + xlim(-40, 40) | |
| klf4_c8$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| ap1_c1$gg_class_A + xlim(-40, 40) | |
| ap1_c1$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| ap1_c2$gg_class_A + xlim(-40, 40) | |
| ap1_c2$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| ap1_c4$gg_class_A + xlim(-40, 40) | |
| ap1_c4$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| dev.off() | |
| # Atf3 footprinting ------------------------------------------------------- | |
| atf3 <- pfmList$MA0605.1 | |
| atf3_c1 <- run_footprint(pwm = atf3, cluster = 1, occ_bigwig = d9_occ, | |
| title = "C1 Atf3", width = 1000) | |
| atf3_c2 <- run_footprint(pwm = atf3, cluster = 2, occ_bigwig = mef_occ, | |
| title = "C2 Atf3", width = 1000) | |
| atf3_c4 <- run_footprint(pwm = atf3, cluster = 4, occ_bigwig = mef_occ, | |
| title = "C2 Atf3", width = 1000) | |
| atf3_c1$gg_class_A + xlim(-50, 50) | |
| atf3_c1$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| atf3_c2$gg_class_A + xlim(-50, 50) | |
| atf3_c2$gg_mc_mean + | |
| geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| atf3_c4$gg_class_A + xlim(-50, 50) | |
| atf3_c4$gg_mc_mean + | |
| geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| # Nfy footprinting -------------------------------------------------------- | |
| nfy <- pfmList$MA0060.1 | |
| nfy_c3 <- run_footprint(pwm = nfy, cluster = 3, occ_bigwig = ips_occ, | |
| title = "C3 NFY", width = 250) | |
| nfy_c7 <- run_footprint(pwm = nfy, cluster = 7, occ_bigwig = ips_occ, | |
| title = "C7 NFY", width = 250) | |
| nfy_c3$gg_class_A + xlim(-40, 40) | |
| nfy_c3$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| nfy_c7$gg_class_A + xlim(-40, 40) | |
| nfy_c7$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| # AP-2 Tfap2c ------------------------------------------------------------- | |
| ap2_g <- pfmList$MA0815.1 | |
| ap2_g_c5 <- run_footprint(pwm = ap2_g, cluster = 5, occ_bigwig = ips_occ, | |
| title = "C5 AP-2 gamma") | |
| ap2_g_c6 <- run_footprint(pwm = ap2_g, cluster = 6, occ_bigwig = d9_occ, | |
| title = "C6 AP-2 gamma") | |
| ap2_g_c5$gg_class_A + xlim(-40, 40) | |
| ap2_g_c5$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| ap2_g_c6$gg_class_A + xlim(-40, 40) | |
| ap2_g_c6$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
| # Klf5 -------------------------------------------------------------------- | |
| klf5 <- pfmList$MA0599.1 | |
| klf5_c1 <- run_footprint(pwm = klf5, cluster = 1, occ_bigwig = d9_occ, | |
| title = "C1 klf5") | |
| klf5_c2 <- run_footprint(pwm = klf5, cluster = 2, occ_bigwig = mef_occ, | |
| title = "C2 klf5") | |
| klf5_c3 <- run_footprint(pwm = klf5, cluster = 3, occ_bigwig = ips_occ, | |
| title = "C3 klf5") | |
| klf5_c5 <- run_footprint(pwm = klf5, cluster = 5, occ_bigwig = ips_occ, | |
| title = "C5 klf5") | |
| klf5_c6 <- run_footprint(pwm = klf5, cluster = 6, occ_bigwig = d9_occ, | |
| title = "C6 klf5") | |
| klf5_c7 <- run_footprint(pwm = klf5, cluster = 7, occ_bigwig = ips_occ, | |
| title = "C7 klf5") | |
| klf5_c8 <- run_footprint(pwm = klf5, cluster = 8, occ_bigwig = ips_occ, | |
| title = "C8 klf5") | |
| # Yy1 --------------------------------------------------------------------- | |
| yy1 <- pfmList$MA0095.2 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment