Created
August 30, 2019 05:09
-
-
Save mbk0asis/aaee2021f2a58b609a8ee86094206f75 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| BiocUpgrade() | |
| source("http://bioconductor.org/biocLite.R") | |
| biocLite(c("DESeq2","gplots","pcaExplorer","bovine.db","calibrate","AnnotationFuncs","gage","Rtsne","ggrepel")) | |
| #library(TCGAbiolinks) | |
| library(DESeq2) | |
| library(pcaExplorer) | |
| library(ggfortify) | |
| library(ggplot2) | |
| library(gplots) | |
| library(ggrepel) | |
| library(gage) | |
| library(Rtsne) | |
| library(reshape2) | |
| library(tidyverse) | |
| library(org.Hs.eg.db) | |
| wdir<-"~/BIO0/00-NGS/SETDB1_TCGA/" | |
| setwd(wdir) | |
| luad_tumor <- read.csv("~/BIO0/00-NGS/SETDB1_TCGA/SETDB1_Top100_Bot100_LUAD/exp.LUAD.TUMOR.SETDB1.Top100.Bot100.htseq-count.csv", header=T, row.names = 1) | |
| luad_normal <- read.csv("~/BIO0/00-NGS/SETDB1_TCGA/SETDB1_Top100_Bot100_LUAD/exp.LUAD.NORMAL.htseq-counts.csv", header=T, row.names = 1) | |
| lusc_tumor <- read.csv("~/BIO0/00-NGS/SETDB1_TCGA/SETDB1_Top100_Bot100_LUSC/exp.LUSC.TUMOR.Setdb1.Top100.Bot100.htseq-count.csv", header=T, row.names = 1) | |
| lusc_normal <- read.csv("~/BIO0/00-NGS/SETDB1_TCGA/SETDB1_Top100_Bot100_LUSC/exp.LUSC.NORMAL.htseq-counts.csv", header=T, row.names = 1) | |
| nLUAD_Tumor <- ncol(luad_tumor) | |
| nLUSC_Tumor <- ncol(lusc_tumor) | |
| nLUAD_Normal <- ncol(luad_normal) | |
| nLUSC_Normal <- ncol(lusc_normal) | |
| Normal <- nLUAD_Normal + nLUSC_Normal | |
| cnts <- data.frame(luad_tumor,lusc_tumor,luad_normal,lusc_normal) | |
| head(cnts[,1:5]) | |
| dim(cnts) | |
| # To remove all zero rows | |
| cnts_noZero <- cnts[rowMeans(abs(cnts))>0,] | |
| dim(cnts_noZero) | |
| conds <- c(rep("luad_top100",100),rep("luad_bot100",100),rep("lusc_top100",100),rep("lusc_bot100",100), | |
| rep("luad_normal",nLUAD_Normal),rep("lusc_normal",nLUSC_Normal)) | |
| conds <- c(rep("luad_top100",100),rep("luad_bot100",100), | |
| rep("lusc_top100",100),rep("lusc_bot100",100), | |
| rep("normal",108)) | |
| coldat=DataFrame(conds=factor(conds)) | |
| coldat | |
| dds <- DESeqDataSetFromMatrix(cnts_noZero, colData=coldat, design = ~ conds) | |
| dds <- DESeq(dds, fitType = "mean") | |
| dds | |
| # to obtain normalized count data | |
| cntNorm <- counts(dds, normalized=T) | |
| #write.csv(cntNorm,"cntNorm.LUSC.Setdb1.Top100.Bot100.csv") | |
| ############################################################################################################ | |
| # to select Setdb1 expression Top100/Bot100 | |
| ############################################## | |
| # ggplot | |
| setdb1 <- "ENSG00000143379" | |
| exp.setdb1 <- cntNorm[grep(setdb1,rownames(cntNorm)),] | |
| names(exp.setdb1) | |
| type1 <- c(rep("luad_tumor",nLUAD_Tumor), | |
| rep("lusc_tumor",nLUSC_Tumor), | |
| rep("luad_normal",nLUAD_Normal), | |
| rep("lusc_normal",nLUSC_Normal)) | |
| type2 <- c(rep("luad_tumor",nLUAD_Tumor), | |
| rep("lusc_tumor",nLUSC_Tumor), | |
| rep("normal",nLUAD_Normal + nLUSC_Normal)) | |
| m.exp.setdb1 <- melt(exp.setdb1) | |
| m.exp.setdb1$type1 <- type1 | |
| m.exp.setdb1$type2 <- type2 | |
| m.exp.setdb1$type1 <- factor(m.exp.setdb1$type1, levels = c("luad_tumor","lusc_tumor","luad_normal","lusc_normal")) | |
| m.exp.setdb1.sorted <- m.exp.setdb1[order(type1,-m.exp.setdb1$value),] | |
| aggregate(value ~ type1, data=m.exp.setdb1.sorted, FUN = function(x){NROW(x)}) | |
| ggplot(m.exp.setdb1.sorted, aes(x=type1, y=log2(value))) + | |
| theme_bw() + | |
| geom_jitter(width=.1, alpha=.8, aes(color=type1)) + | |
| #geom_violin(aes(fill=type2), alpha=.5) + | |
| geom_boxplot(width=.4, alpha=.5, outlier.shape = NA) + | |
| scale_color_brewer(palette="Set1") | |
| ############################################### | |
| # Generic boxplot | |
| nSamples=100 # number of samples to be subsetted | |
| nCol <- ncol(cntNorm) | |
| setdb1 <- "ENSG00000143379" | |
| exp.setdb1 <- cntNorm_Tumor[grep(setdb1,rownames(cntNorm)),] | |
| exp.setdb1.Tumor.sorted <- data.frame(exp.setdb1.Tumor[order(exp.setdb1.Tumor)],exp.setdb1.Tumor[order(exp.setdb1.Tumor)]) | |
| colnames(exp.setdb1.Tumor.sorted) <- rep("SETDB1",2) | |
| dim(exp.setdb1.Tumor.sorted) | |
| head(exp.setdb1.Tumor.sorted) | |
| # LUAD_Tumor | |
| meanHIGH = log2(colMeans(exp.setdb1.Tumor.sorted[topStart:nTumor,])) | |
| meanLOW = log2(colMeans(exp.setdb1.Tumor.sorted[1:nSamples,])) | |
| high <- tail(exp.setdb1.Tumor.sorted,nSamples) | |
| nHigh <- nrow(high) | |
| low <- head(exp.setdb1.Tumor.sorted,nSamples) | |
| nLow <- nrow(low) | |
| log2FC_LUAD_Tumor <- round(meanHIGH[1]-meanLOW[1],2) | |
| # LUSC_Tumor | |
| cntNorm_Tumor <- cntNorm[,1:nTumor] | |
| dim(cntNorm_Tumor) | |
| exp.setdb1.Tumor <- cntNorm_Tumor[grep(setdb1,rownames(cntNorm_Tumor)),] | |
| exp.setdb1.Tumor.sorted <- data.frame(exp.setdb1.Tumor[order(exp.setdb1.Tumor)],exp.setdb1.Tumor[order(exp.setdb1.Tumor)]) | |
| colnames(exp.setdb1.Tumor.sorted) <- rep("SETDB1",2) | |
| dim(exp.setdb1.Tumor.sorted) | |
| head(exp.setdb1.Tumor.sorted) | |
| meanHIGH = log2(colMeans(exp.setdb1.Tumor.sorted[topStart:nTumor,])) | |
| meanLOW = log2(colMeans(exp.setdb1.Tumor.sorted[1:nSamples,])) | |
| high <- tail(exp.setdb1.Tumor.sorted,nSamples) | |
| nHigh_lusc_tumor <- nrow(high) | |
| low <- head(exp.setdb1.Tumor.sorted,nSamples) | |
| nLow_lusc_tumor <- nrow(low) | |
| log2FC_LUSC_Tumor <- round(meanHIGH[1]-meanLOW[1],2) | |
| nLUAD_Tumor <- ncol(luad_tumor) | |
| nLUAD_Normal <- ncol(luad_normal) | |
| nLUSC_Tumor <- ncol(lusc_tumor) | |
| nLUSC_Normal <- ncol(lusc_normal) | |
| # Normal | |
| cntNorm_Normal <- cntNorm[,(nTumor+1):nCol] | |
| exp.setdb1.Normal <- cntNorm_Normal[grep(setdb1,rownames(cntNorm_Normal)),] | |
| exp.setdb1.Normal.sorted <- data.frame(exp.setdb1.Normal[order(exp.setdb1.Normal)],exp.setdb1.Normal[order(exp.setdb1.Normal)]) | |
| colnames(exp.setdb1.Normal.sorted) <- rep("SETDB1",2) | |
| mean_Normal <- round(log2(colMeans(exp.setdb1.Normal.sorted)),2) | |
| # Boxplots | |
| par(mfrow=c(1,2)) | |
| boxplot(log2(exp.setdb1.Tumor.sorted[,1]),outline=T,pars=list(outcol="white"),col=(rgb(1,1,1,0)), ylim=c(10,14),#axes=F, | |
| main=paste("LUSC Tumor\nSETDB1 Top",nSamples,"vs Bot",nSamples,"\nlog2FC = ",log2FC,"(~",round(2^log2FC,2),")"), | |
| ylab = "log2(cntNorm)") | |
| stripchart(log2(high),vertical=T, method = 'jitter', add=T, | |
| pch=20, cex=.9, col=(rgb(.8,.2,.2,.5))) | |
| #stripchart(log2(mid),vertical=T, method = 'jitter', add=T, | |
| # pch=20, cex=.9, col=(rgb(.8,.8,.8,.7))) | |
| stripchart(log2(low),vertical=T, method = 'jitter', add=T, | |
| pch=20, cex=.9, col=(rgb(.2,.2,.8,.5))) | |
| points(meanHIGH[1],pch=7,lwd=4) | |
| text(meanHIGH[1],paste("mean_HIGH\n= ",round(meanHIGH[1],2)),offset=2,pos=4) | |
| points(meanLOW[1],pch=7,lwd=4) | |
| text(meanLOW[1],paste("mean_LOW\n= ",round(meanLOW[1],2)),offset=2,pos=4) | |
| #write.csv(high[,1:2], paste("LUSC.SETDB1.Top",nSamples,".samples",sep="")) | |
| #write.csv(low[,1:2], paste("LUSC.SETDB1.Bot",nSamples,".samples",sep="")) | |
| boxplot(log2(exp.setdb1.Normal.sorted[,1]),outline=T,bty=F, | |
| pars=list(outcol="white"),col=(rgb(1,1,1,0)),ylim=c(10,14), | |
| main = "LUSC Normal") | |
| stripchart(log2(exp.setdb1.Normal.sorted),vertical=T, method = 'jitter', add=T, | |
| pch=20, cex=.9, col=(rgb(.3,.3,.3,.8))) | |
| points(mean_Normal,pch=7,lwd=4) | |
| text(mean_Normal,paste("mean_Norm\n= ",round(mean_Normal,2)), offset=3, pos=4) | |
| #dev.off() | |
| # | |
| ############################################################################################################ | |
| # | |
| # PCA plot | |
| rld <- rlogTransformation(dds,fitType = "mean") | |
| #pcaplot(rld,intgroup=c("conds"),ntop=50000, text_labels=F, title="PCA", | |
| # pcX=1,pcY=2,point_size=3, ellipse = TRUE) | |
| vsd <- varianceStabilizingTransformation(dds, fitType = "mean") | |
| X=1 | |
| Y=2 | |
| ntop=30000 | |
| pcaData <- pcaplot(vsd, intgroup=c("conds"), pcX=X, pcY=Y, returnData=TRUE, ntop = ntop) | |
| percentVar <- round(100 * attr(pcaData, "percentVar")) | |
| pcaData$conds <- factor(pcaData$conds, levels = c('luad_top100','luad_bot100','lusc_top100','lusc_bot100','normal')) | |
| ggplot(pcaData, aes(pcaData[,1], pcaData[,2], color=conds)) + theme_bw() + | |
| geom_point(size=3, alpha = .7) + #ggtitle("PCA - LUAD & LUSC\nSETDB1 Top100 vs Bot100 vs Normal") + | |
| xlab(paste0("PC",X,": ",percentVar[1],"% variance")) + | |
| ylab(paste0("PC",Y,": ",percentVar[2],"% variance")) + | |
| #coord_fixed() + | |
| scale_color_manual(values=c("blue","red","orange","#00BFC4","yellow4","yellow4")) + | |
| stat_ellipse(aes(x=pcaData[,1], y=pcaData[,2]),type = "t") | |
| ######################################################## | |
| # t-SNE | |
| dta <- assay(vsd) | |
| head(dta) | |
| dta_sorted <- dta[order(-rowMeans(dta)),] | |
| dta_sorted[5,1:5] | |
| conds <- c(rep("luad_top100",100),rep("luad_bot100",100), | |
| rep("lusc_top100",100),rep("lusc_bot100",100),rep("normal",108)) | |
| goi <- data.frame(dta[grepl("ENSG00000143379", rownames(dta)), ]) | |
| colnames(goi) <-"goi" # gene of interset | |
| mid<-mean(goi) | |
| head(goi) | |
| d <- dist(t(dta_sorted[1:30000,])) | |
| for ( p in seq(10,50,10)){ | |
| set.seed(50) # tsne has some stochastic steps (gradient descent) so need to set random | |
| perplexity = 30 | |
| tsne_out <- Rtsne(d, is_distance=T, perplexity=perplexity, | |
| num_threads=20, verbose=TRUE) | |
| ## | |
| dta2 <- data.frame(tsne_out$Y, conds, goi) | |
| ggplot(dta2, aes(x=dta2[,1],y=dta2[,2], color=goi)) +#, color=conds)) + | |
| theme_bw() + | |
| geom_point(alpha=.5, size = 3) + | |
| xlab(paste0("Dimension 1")) + ylab(paste0("Dimension 2")) + | |
| scale_color_gradient2(midpoint=mid,low="steelblue",mid="white",high="red",space ="Lab") | |
| #scale_color_manual(values=c("blue","red","#00BFC4","orange","yellow4")) | |
| #stat_ellipse(aes(x=dta2[,1], y=dta2[,2]),type = "t") | |
| ggsave(paste0("tSNE_LUAD_LUSC_SETDB1_Top.Bot100_Normal_perplexity_",perplexity,".png"), | |
| width = 6, height = 4.5, units = "in") | |
| } | |
| ''' | |
| ########################################################### | |
| # DE analysis | |
| Sample <- "top100" | |
| Control <- "bot100" | |
| mean_top100 <- rowMeans(cntNorm[,1:100]) | |
| mean_bot100 <- rowMeans(cntNorm[,101:200]) | |
| sd_top100 <- rowSds(cntNorm[,1:100]) | |
| sd_bot100 <- rowSds(cntNorm[,101:200]) | |
| res <- results(dds, contrast = c("conds",Sample,Control)) | |
| res$mean_top100 <- mean_top100 | |
| res$sd_top100 <- sd_top100 | |
| res$mean_bot100 <- mean_bot100 | |
| res$sd_bot100 <- sd_bot100 | |
| res$symbol = mapIds(org.Hs.eg.db, | |
| keys=row.names(res), | |
| column="SYMBOL", | |
| keytype="ENSEMBL", | |
| multiVals="first") | |
| # add entrez ID to res | |
| res$entrez = mapIds(org.Hs.eg.db, | |
| keys=row.names(res), | |
| column="ENTREZID", | |
| keytype="ENSEMBL", | |
| multiVals="first") | |
| head(res) | |
| #res2 <- res[,c(2,5:11)] | |
| #dim(res2) | |
| #write.csv(res2, "Whole_Gene_DE_results.csv") | |
| # sig DEGs | |
| res_sig <- subset(res, padj<=.00001 & abs(log2FoldChange)>=1) | |
| head(res_sig) | |
| dim(res_sig) | |
| #write.csv(res_sig,"sig_DEGs_LUSC_SETDB1_Top100_Bot100_q0.00001_fc2.csv") | |
| # volcano plot | |
| nTop100 <- nrow(subset(res, padj<=.00001 & log2FoldChange > 1)) | |
| nBot100 <- nrow(subset(res, padj<=.00001 & log2FoldChange < -1)) | |
| nTop100 | |
| nBot100 | |
| res$padj[res$padj <= 1e-20] <- 1e-20 | |
| res$log2FoldChange[abs(res$log2FoldChange) > 6] <- 6 # outlier cutoff | |
| with(res, plot(log2FoldChange,-log10(padj), | |
| pch=19,cex=0.5,main=paste("TCGA LUSC\nSETDB1 (q<0.00001, fc>2)\n Bot100 (",nBot100,") vs Top100 (",nTop100,")",sep=""), | |
| col="lightgrey")) | |
| with(subset(res, padj<=.00001 & log2FoldChange > 1), | |
| points(log2FoldChange,-log10(padj),pch=19,cex=0.7,col=rgb(1,.6,0,.7))) | |
| with(subset(res, padj<=.00001 & log2FoldChange < -1), | |
| points(log2FoldChange,-log10(padj),pch=19,cex=0.7,col=rgb(.2,.6,.2,.7))) | |
| abline(v=c(-1,1),col="blue",lty=2) | |
| abline(v=0,col="blue") | |
| abline(h=c(-log10(.00001)),col="blue",lty=2) | |
| ################################################################################## | |
| ################################################################################## | |
| # GAGE | |
| library(gage) | |
| data(korg) # available KEGG databases | |
| head(korg,50) | |
| data(bods) # available GO databases | |
| head(bods,50) | |
| ######################################## | |
| # KEGG | |
| # KEGG gene sets | |
| kg.bta=kegg.gsets("hsa") | |
| kegg.gs=kg.bta$kg.sets[kg.bta$sigmet.idx] | |
| # to export KEGG gene sets | |
| #kegg.gs.table <- plyr::ldply(kegg.gs, rbind) | |
| #write.csv(kegg.gs.table,"kegg.gs.table.csv") | |
| # | |
| foldchanges = res$log2FoldChange | |
| names(foldchanges) = res_sig$entrez | |
| kegg.p<-gage(foldchanges, gsets=kegg.gs, same.dir=T, compare='unpaired') | |
| head(kegg.p$greater[,c(3,4)]) | |
| head(kegg.p$less[,c(3,4)]) | |
| write.table(kegg.p$greater[,c(3,4)],"gage_KEGG_SETDB1_Top100.txt") | |
| write.table(kegg.p$less[,c(3,4)],"gage_KEGG_SETDB1_Bot100.txt") | |
| # PATHVIEW | |
| library(pathview) | |
| cnts.d = log2(rowMeans(cntNorm_ent[, ft.idx]))-log2(rowMeans(cntNorm_ent[, fi.idx])) | |
| sel <- kegg.p$greater[, "p.val"] < 0.05 & !is.na(kegg.p$greater[,"p.val"]) | |
| path.ids <- rownames(kegg.p$greater)[sel] | |
| sel.l <- kegg.p$less[, "p.val"] < 0.05 & !is.na(kegg.p$less[,"p.val"]) | |
| path.ids.l <- rownames(kegg.p$less)[sel.l] | |
| path.ids2 <- substr(c(path.ids, path.ids.l), 1, 8) | |
| pv.out.list <- sapply(path.ids2, function(pid) pathview(gene.data = cnts.d, pathway.id = pid, species = "bta", kegg.native = T)) | |
| ######################################## | |
| # Gene Ontology | |
| ## GO gene sets for Human | |
| go.gs=go.gsets(species="Human") | |
| go.bp=go.gs$go.sets[go.gs$go.subs$BP] | |
| go.cc=go.gs$go.sets[go.gs$go.subs$CC] | |
| go.mf=go.gs$go.sets[go.gs$go.subs$MF] | |
| foldchanges = res$log2FoldChange | |
| names(foldchanges) = res$entrez | |
| head(foldchanges) | |
| go.p<-gage(foldchanges, gsets=go.bp, same.dir=T, compare='unpaired') | |
| head(go.p$greater[,c(3,4)]) | |
| head(go.p$less[,c(3,4)]) | |
| go_ampl <- go.p$greater[,3:4] | |
| go_ampl_sig <- subset(go_ampl,go_ampl[,1] <= 0.05) | |
| dim(go_ampl_sig) | |
| head(go_ampl_sig) | |
| tail(go_ampl_sig) | |
| write.csv(go_ampl_sig,"gage_GO_BP_LUSC_SETDB1_Top100.csv") | |
| go_dipl <- go.p$less[,3:4] | |
| go_dipl_sig <- subset(go_dipl, go_dipl[,1] <= 0.05) | |
| dim(go_dipl_sig) | |
| head(go_dipl_sig) | |
| tail(go_dipl_sig) | |
| write.csv(go_dipl_sig,"gage_GO_BP_LUSC_SETDB1_Bot100.csv") | |
| ############## | |
| foldchanges = data.frame(res$entrez, res$log2FoldChange, res$padj) | |
| res.ampl <- subset(foldchanges, res.padj<=.05 & res.log2FoldChange >= 1) | |
| res.dipl <- subset(foldchanges, res.padj<=.05 & res.log2FoldChange <= -1) | |
| res.ampl <- subset(foldchanges,res.log2FoldChange >= 1) | |
| res.dipl <- subset(foldchanges,res.log2FoldChange <= -1) | |
| head(res.dipl) | |
| terms <- read.csv("terms", header = F) | |
| for (t in terms[,1]){ | |
| print(t) | |
| glist<-data.frame(unlist(go.bp[t])) | |
| colnames(glist) <- "entrez" | |
| dim(glist) | |
| dim(res.ampl) | |
| overlapGenes <- merge(glist, foldchanges, by.x="entrez",by.y="res.entrez") | |
| overlapGenes2 <- subset(overlapGenes, res.padj <= 0.05) | |
| genes <- mapIds(org.Hs.eg.db, | |
| keys=as.character(overlapGenes2[,1]), | |
| keytype="ENTREZID", | |
| column="SYMBOL") | |
| genes <- paste(genes,collapse=",") | |
| print(genes) | |
| # output[[t]] <- paste(t,genes, sep=";") | |
| } | |
| output | |
| ######################################## | |
| # MSigDB | |
| # MSigDB gene sets - GO_BP_symbol | |
| filename=("msigdb.v6.1.symbols.gmt") | |
| msig.gs=readList(filename) | |
| foldchanges = res$log2FoldChange | |
| names(foldchanges) = res$symbol | |
| head(foldchanges) | |
| ampl.LUSC <- cntNorm[,1:74] | |
| dipl.LUSC <- cntNorm[,75:206] | |
| msig.p<-gage(foldchanges, gsets=msig.gs, same.dir=T, compare='unpaired') | |
| msig.greater <- msig.p$greater[,c(3,4)] | |
| go.msig.greater <- msig.greater[grep("GO_", rownames(msig.greater)),] | |
| sig.go.msig.greater <- subset(go.msig.greater, go.msig.greater[,2] <= 0.05) | |
| dim(sig.go.msig.greater) | |
| head(sig.go.msig.greater,20) | |
| msig.less <- msig.p$less[,c(3,4)] | |
| go.msig.less <- msig.less[grep("GO_", rownames(msig.less)),] | |
| sig.go.msig.less <- subset(go.msig.less, go.msig.less [,2] <= 0.05) | |
| dim(sig.go.msig.less) | |
| head(sig.go.msig.less,20) | |
| write.csv(sig.go.msig.greater,"gage_MSig_BP_Setdb1_Top100.csv") | |
| write.csv(sig.go.msig.less,"gage_MSig_BP_Setdb1_Bot100.csv") | |
| ######################## | |
| foldchanges = data.frame(res$symbol, res$log2FoldChange, res$padj) | |
| head(foldchanges) | |
| #res.ampl <- subset(foldchanges, res.padj<=.05 & res.log2FoldChange >= 1) | |
| #res.dipl <- subset(foldchanges, res.padj<=.05 & res.log2FoldChange <= -1) | |
| res.ampl <- subset(foldchanges, res.padj<=.05) | |
| res.dipl <- subset(foldchanges, res.padj<=.05) | |
| #res.ampl <- subset(foldchanges,res.log2FoldChange >= 1) | |
| #res.dipl <- subset(foldchanges,res.log2FoldChange <= -1) | |
| terms <- row.names(sig.go.msig.greater) | |
| terms <- row.names(sig.go.msig.less) | |
| output <- list() | |
| for (t in terms){ | |
| # print(t) | |
| glist<-data.frame(unlist(msig.gs[t])) | |
| colnames(glist) <- "symbol" | |
| overlapGenes <- merge(glist, res.ampl, by.x="symbol",by.y="res.symbol") | |
| genes <- paste(overlapGenes$symbol,collapse=",") | |
| # print(genes) | |
| output[[t]] <- paste(genes, sep=";") | |
| } | |
| write.csv(data.frame(unlist(output)),"gage_MSig_ampl.csv") | |
| ############################################################################### | |
| # survival plot | |
| surv <- read.csv("Clinical.Setdb1.Top100.Bot100.csv", header = T) | |
| surv2 <- surv[,c(8,11,12)] | |
| setdb1 <- c(rep("top100",116), rep("bot100",118)) | |
| surv <- read.csv("Clinical.Setdb1.Top100.Bot100.uniq.csv", header = T) | |
| head(surv) | |
| tail(surv) | |
| surv2 <- surv[,c(2:4)] | |
| head(surv2) | |
| dim(surv2) | |
| setdb1 <- c(rep("top100",99), rep("bot100",100)) | |
| dta <- data.frame(surv2, setdb1) | |
| head(dta) | |
| tail(dta) | |
| TCGAanalyze_survival(dta, clusterCol="setdb1",risk.table = FALSE, | |
| height = 4, width = 6, dpi = 300, | |
| conf.int = T, xlim = 4000, | |
| color = c("blue","red")) | |
| ############################################################################### | |
| # TE / Epi-drivers - PCA/heatmaps | |
| te <- "ERVL.3kb" | |
| dta <- read.csv( paste("cntNorm.LUSC.Setdb1.Top100.Bot100.Ensembl_Genes.TSS.5kb.",te,".csv",sep=""), row.names = 1, header=F) | |
| dta <- read.csv("cntNorm.LUSC.Setdb1.Top100.Bot100.EpiDrivers.csv", row.names = 1, header=F) | |
| head(dta[,1:5]) | |
| nGenes <- nrow(dta) | |
| mean_bot100 <- rowMeans(dta[,101:200]) | |
| fc <- log2(dta + 1) - log2(mean_bot100 + 1) | |
| my_palette <- colorRampPalette(c("steelblue", "lightyellow", "red")) | |
| breaks = unique(c(seq(-2,0,length=75), seq(0,2,length=75))) | |
| heatmap.2(as.matrix(fc),density="none",trace="none", col="bluered", breaks=breaks, | |
| Rowv = T, Colv = T, dendrogram="row", margins = c(8,8), | |
| ColSideColors = c(rep("orange",100),rep("darkgreen",100)), | |
| RowSideColors = c(rep("white",95),"black",rep("white",63)), | |
| main = paste("EpiDrivers (",nGenes,")",sep="")) | |
| main = paste("TSS < 5kb,\n",te," (",nGenes,")",sep="")) | |
| # PCA | |
| dta <- read.csv("cntNorm.LUSC.Setdb1.Top100.Bot100.EpiDrivers.csv", row.names = 1, header=F) | |
| head(dta[,1:5]) | |
| dta.sorted <- dta[order(rowMeans(dta),decreasing=T), ] | |
| head(dta.sorted[,1:5]) | |
| pc <- prcomp(t(dta.sorted[1:100,])) | |
| autoplot(pc, data= sampleInfo, | |
| colour="conds", alpha=.5, | |
| size=2) + theme_bw() | |
| coord_cartesian(xlim = c(-0.05, 0.1), ylim = c(-0.1,0.1)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment