Created
February 27, 2020 02:52
-
-
Save mbk0asis/7e081d732cedc4cdac6f9027ad8b3482 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(reshape2) | |
| library(ggplot2) | |
| library(ggpubr) | |
| library(tidyverse) | |
| library(gridExtra) | |
| library(Biobase) | |
| library(GEOquery) | |
| library(limma) | |
| library(matrixStats) | |
| ################################################################################### | |
| setwd("~/Desktop/public_data/") | |
| #Schabath <- read.csv("~/Desktop/public_data/48_Schabath_2016.csv", row.names = 1, header = T) | |
| Shedden <- read.csv("~/Desktop/public_data/1_Shedden_2008.csv", row.names = 1, header = T) | |
| #Rousseaux <- read.csv("~/Desktop/public_data/31_Rousseaux_2013.csv", row.names = 1, header = T) | |
| Sato <- read.csv("~/Desktop/public_data/40_Sato_2013.csv", row.names = 1, header = T) | |
| #Okayama <- read.csv("~/Desktop/public_data/32_Okayama_2012.csv", row.names = 1, header = T) | |
| #Botling <- read.csv("~/Desktop/public_data/38_Botling_2013.csv", row.names = 1, header = T) | |
| ################################################################################### | |
| #setdb1 = 9869 | |
| Schabath2 <- data.frame(t(Schabath[grep("^9869$", rownames(Schabath)),])) | |
| Shedden2 <- data.frame(t(Shedden[grep("^9869$", rownames(Shedden)),])) | |
| Rousseaux2 <- data.frame(t(Rousseaux[grep("^9869$", rownames(Rousseaux)),])) | |
| Sato2 <- data.frame(t(Sato[grep("^9869$", rownames(Sato)),])) | |
| Okayama2 <- data.frame(t(Okayama[grep("^9869$", rownames(Okayama)),])) | |
| Botling2 <- data.frame(t(Botling[grep("^9869$", rownames(Botling)),])) | |
| setdb1 <- cbind(Sato2) | |
| colnames(setdb1) <- c("setdb1") | |
| head(setdb1) | |
| m_setdb1 <- setdb1 %>% | |
| rownames_to_column("id") %>% | |
| reshape2::melt(id.var = c("id")) | |
| head(m_setdb1) | |
| p1 <- ggplot(m_setdb1, aes(x=variable, y=value, fill=variable)) + theme_bw() + | |
| geom_jitter(width=.3) + | |
| geom_boxplot(outlier.shape = NA) + | |
| theme(legend.position = "none") | |
| high3 <- m_setdb1 %>% arrange(desc(value)) %>% head(50) | |
| low3 <- m_setdb1 %>% arrange(desc(value)) %>% tail(50) | |
| val <- data.frame(high3$value,low3$value) | |
| m_val <- reshape2::melt(val) | |
| head(m_val) | |
| p2 <- ggplot(m_val, aes(x=variable, y=value, fill=variable)) + theme_bw() + | |
| geom_jitter(width=.3) + | |
| geom_boxplot(outlier.shape = NA) + | |
| stat_summary(fun.y=mean, geom="point", color="red", size=3) + | |
| stat_summary(fun.y=mean, geom="text", aes(label=round(..y..,2), size =2)) + | |
| theme(legend.position = "none") + | |
| scale_fill_manual(values=c("orange", "green3")) | |
| grid.arrange(p1, p2, nrow = 1, widths = c(1.2, 2)) | |
| #ggsave("Botling.png", p3, width = 5, height = 5, units = "in") | |
| #################################################################################### | |
| #################################################################################### | |
| # DE analysis (limma) | |
| t_df <- data.frame(t(Sato)) | |
| high_matrix <- t_df %>% | |
| rownames_to_column("id") %>% | |
| inner_join(high3, by="id") | |
| low_matrix <- t_df %>% | |
| rownames_to_column("id") %>% | |
| inner_join(low3, by="id") | |
| matrix <- rbind(high_matrix, low_matrix) | |
| head(matrix[,1:5]) | |
| dim(matrix) | |
| matrix2 <- matrix[,-c(1,20358,20359)] | |
| matrix2 <- matrix[,-c(1,19619,19620)] | |
| matrix2 <- matrix[,-c(1,12261,12262)] | |
| rownames(matrix2) <- matrix$id | |
| matrix3 <- t(matrix2) | |
| head(matrix3) | |
| # PCA | |
| pca.data <- matrix3 | |
| grps <- c(rep("High",50),rep("low",50)) | |
| pca.data <- t(pca.data) | |
| colnames(pca.data) <- paste(grps, colnames(pca.data), sep="-") | |
| pca <- prcomp(pca.data, scale=TRUE) | |
| summary(pca) | |
| grpcol <- c(rep(rgb(0,0,1),50),rep(rgb(1,0,0),50)) | |
| plot(pca$x[,1], pca$x[,2], xlab="PCA1", ylab="PCA2", main="PCA", type="p", pch=19, cex=1, col=grpcol) | |
| #text(pca$x[,1], pca$x[,2], rownames(pca.data), cex=0.75) | |
| ######################################################################### | |
| design <- cbind(high=c(rep(1,50),rep(0,50)), | |
| low = c(rep(0,50), rep(1,50))) | |
| # Differential expression analysis | |
| ###################################### | |
| ###################################### | |
| ###################################### | |
| sample <- "high" | |
| control <- "low" | |
| #cont <- paste(sample,"-",control, sep="") | |
| #c <- makeContrasts(cont, levels=design) | |
| c <- makeContrasts(HIGH.vs.LOW=high-low, levels=design) | |
| # DE analysis | |
| eset <- ExpressionSet( | |
| assayData = matrix3 | |
| ) | |
| fit <- lmFit(eset,design) | |
| fit2<- contrasts.fit(fit,c) | |
| fit2<- eBayes(fit2) | |
| n <- length(featureNames(eset)) | |
| top <- topTable(fit2, coef="HIGH.vs.LOW", n=n, adjust = "fdr") | |
| head(top) | |
| # means | |
| mean_high <- 2^(fit$coefficients[,1]) | |
| mean_low <- 2^(fit$coefficients[,2]) | |
| means <- data.frame(mean_high, mean_low) | |
| head(means) | |
| # stdev | |
| matrix4 <- 2^(matrix3) | |
| sd_high <- rowSds(matrix4[,1:50]) | |
| sd_low <- rowSds(matrix4[,51:100]) | |
| sd <- data.frame(sd_high, sd_low) | |
| rownames(sd) <- rownames(matrix4) | |
| head(sd) | |
| # attach means, sd | |
| top2 <- merge(top,means, by=0) | |
| head(top2) | |
| top3 <- merge(top2,sd, by.x="Row.names", by.y=0) | |
| head(top3) | |
| dim(top3) | |
| write.csv(top3, "DE_LIMMA_WholGenes_Sato.csv") | |
| ######################################################################### | |
| pval=1e-3 | |
| log2FoldChange=log2(1) # "log2(1)" or "0" for not to consider fold-changes | |
| nrow1 <- nrow(subset(top, adj.P.Val <= pval & logFC >= log2FoldChange)) | |
| nrow2 <- nrow(subset(top, adj.P.Val <= pval & logFC <= -log2FoldChange)) | |
| #top$logFC[abs(top) >= 4] <- 4 # replace (foldChange > 4) to 4 | |
| top$adj.P.Val[top$adj.P.Val <= 1e-20] <- 1e-20 # replace (pvalue < 1e-15) to 1e-15 | |
| with(top, plot(logFC,-log10(adj.P.Val), pch=19,cex=.8, | |
| col="grey", | |
| main=paste("DEGs\npadj < ",pval,", log2FC > ",round(log2FoldChange,3),"\n",control," (",nrow2,") < > ",sample," (",nrow1,")",sep=""))) | |
| with(subset(top, adj.P.Val<=pval & logFC>=log2FoldChange), | |
| points(logFC,-log10(adj.P.Val),pch=19,cex=1,col=rgb(1,0,0,.5))) | |
| with(subset(top, adj.P.Val<=pval & logFC<=-log2FoldChange), | |
| points(logFC,-log10(adj.P.Val),pch=19,cex=1,col=rgb(0,0,1))) | |
| abline(h=c(-log10(pval)),col="grey30",lty=2) | |
| abline(v=c(-log2FoldChange,log2FoldChange),col="grey30",lty=2) | |
| abline(v=0,col="black") | |
| options(bitmapType="cairo") | |
| install.packages("Cairo") | |
| getOption("bitmapType") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment