Created
June 18, 2012 02:13
-
-
Save astatham/2946426 to your computer and use it in GitHub Desktop.
NOMe chart generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| GpCplot <- function(amplicon, reads, main="", minScore=150) { | |
| require(Biostrings) | |
| require(GenomicRanges) | |
| if (class(amplicon)=="character") amplicon <- DNAString(amplicon) | |
| amplicon.conv <- DNAString(gsub("C", "T", gsub("GC", "GY", gsub("CG", "YG", amplicon)))) | |
| reads <- read.DNAStringSet(reads) | |
| readsHitAll <- list("plus"=pairwiseAlignment(reads, amplicon.conv, type="local-global"), "minus"=pairwiseAlignment(reverseComplement(reads), amplicon.conv, type="local-global")) | |
| readsStrand <- ifelse(score(readsHitAll$plus)>score(readsHitAll$minus), "+", "-") | |
| reads2 <- reads | |
| reads2[readsStrand=="-"] <- reverseComplement(reads2[readsStrand=="-"]) | |
| readsHitAll <- pairwiseAlignment(reads2, amplicon.conv, type="local-global") | |
| readsSummaryAll <- GRanges("hit", IRanges(start(pattern(readsHitAll)), end(pattern(readsHitAll))), readsStrand, name=gsub(".*/", "", names(reads)), score=score(readsHitAll)) | |
| Cs <- start(matchPattern("C", amplicon)) | |
| CpG <- start(matchPattern("CG", amplicon)) | |
| GpC <- start(matchPattern("GC", amplicon))+1 | |
| HCG <- setdiff(CpG, GpC) | |
| GCH <- setdiff(GpC, CpG) | |
| conversionCs <- setdiff(setdiff(Cs, CpG), GpC) | |
| values(readsSummaryAll)$conversion <- rowMeans(as.matrix(readsHitAll)[,conversionCs]=="T") | |
| readsHit <- readsHitAll[values(readsSummaryAll)$score>=minScore] | |
| readsSummary <- readsSummaryAll[values(readsSummaryAll)$score>=minScore] | |
| #set up the plot | |
| plot(x=NA, type="n", xlim=c(-10, length(amplicon)+10), ylim=c(-length(readsSummary)-5,7), xaxt="n", yaxt="n", xlab="", ylab="Clones", main=main) | |
| lines(x=c(1, length(amplicon)), y=c(3,3), col="red", lwd=3) | |
| #draw GpCs | |
| sapply(start(matchPattern("GCH", amplicon, fixed=FALSE))+1, function(x) { | |
| lines(c(x,x), y=c(3, 4), col="black", lwd=2) | |
| points(x, 4.5, cex=3, col="black", pch=19) | |
| points(x, 4.5, cex=2.8, col="white", pch=19) | |
| }) | |
| #draw CpGs | |
| sapply(start(matchPattern("HCG", amplicon, fixed=FALSE))+1, function(x) { | |
| lines(c(x,x), y=c(3, 2), col="black", lwd=2) | |
| points(x, 1.5, cex=3, col="black", pch=19) | |
| points(x, 1.5, cex=2.8, col="lightblue", pch=19) | |
| }) | |
| #create matrix of calls, sort by similarity | |
| calls.mat <- as.matrix(readsHit)[,GCH] | |
| reads.order <- hclust(dist(calls.mat=="T"))$order | |
| calls.mat <- calls.mat[reads.order,] | |
| #draw reads w/ calls | |
| for (i in 1:length(reads.order)) { | |
| lines(x=c(1, length(amplicon)), y=c(-i, -i), col="blue") | |
| sapply(1:length(GCH), function(x) { | |
| points(x=GCH[x]-1, y=-i, pch=19, col="#003366", cex=3) | |
| points(x=GCH[x]-1, y=-i, pch=19, col=ifelse(calls.mat[i, x]=="C", "#00CC99", ifelse(calls.mat[i,x]=="T", "lightgrey", "white")), cex=2.8) | |
| }) | |
| } | |
| #add % conversion | |
| text(x=length(amplicon)+12, y=-(1:length(readsSummary)), labels=paste(round(values(readsSummary)$conversion*100, 1), "%", sep=""), cex=0.7) | |
| invisible(list("readsHit"=readsHit, "readsSummary"=readsSummary, "readsHitAll"=readsHitAll, "readsSummaryAll"=readsSummaryAll, "reads.order"=reads.order, "calls.mat"=calls.mat)) | |
| } | |
| CpGplot <- function(amplicon, reads, main="", minScore=150) { | |
| require(Biostrings) | |
| require(GenomicRanges) | |
| if (class(amplicon)=="character") amplicon <- DNAString(amplicon) | |
| amplicon.conv <- DNAString(gsub("C", "T", gsub("GC", "GY", gsub("CG", "YG", amplicon)))) | |
| reads <- read.DNAStringSet(reads) | |
| readsHitAll <- list("plus"=pairwiseAlignment(reads, amplicon.conv, type="local-global"), "minus"=pairwiseAlignment(reverseComplement(reads), amplicon.conv, type="local-global")) | |
| readsStrand <- ifelse(score(readsHitAll$plus)>score(readsHitAll$minus), "+", "-") | |
| reads2 <- reads | |
| reads2[readsStrand=="-"] <- reverseComplement(reads2[readsStrand=="-"]) | |
| readsHitAll <- pairwiseAlignment(reads2, amplicon.conv, type="local-global") | |
| readsSummaryAll <- GRanges("hit", IRanges(start(pattern(readsHitAll)), end(pattern(readsHitAll))), readsStrand, name=gsub(".*/", "", names(reads)), score=score(readsHitAll)) | |
| Cs <- start(matchPattern("C", amplicon)) | |
| CpG <- start(matchPattern("CG", amplicon)) | |
| GpC <- start(matchPattern("GC", amplicon))+1 | |
| HCG <- setdiff(CpG, GpC) | |
| GCH <- setdiff(GpC, CpG) | |
| conversionCs <- setdiff(setdiff(Cs, CpG), GpC) | |
| values(readsSummaryAll)$conversion <- rowMeans(as.matrix(readsHitAll)[,conversionCs]=="T") | |
| readsHit <- readsHitAll[values(readsSummaryAll)$score>=minScore] | |
| readsSummary <- readsSummaryAll[values(readsSummaryAll)$score>=minScore] | |
| #set up the plot | |
| plot(x=NA, type="n", xlim=c(-10, length(amplicon)+10), ylim=c(-length(readsSummary)-5,7), xaxt="n", yaxt="n", xlab="", ylab="Clones", main=main) | |
| lines(x=c(1, length(amplicon)), y=c(3,3), col="red", lwd=3) | |
| #draw GpCs | |
| sapply(start(matchPattern("GCH", amplicon, fixed=FALSE))+1, function(x) { | |
| lines(c(x,x), y=c(3, 4), col="black", lwd=2) | |
| points(x, 4.5, cex=3, col="black", pch=19) | |
| points(x, 4.5, cex=2.8, col="white", pch=19) | |
| }) | |
| #draw CpGs | |
| sapply(start(matchPattern("HCG", amplicon, fixed=FALSE))+1, function(x) { | |
| lines(c(x,x), y=c(3, 2), col="black", lwd=2) | |
| points(x, 1.5, cex=3, col="black", pch=19) | |
| points(x, 1.5, cex=2.8, col="lightblue", pch=19) | |
| }) | |
| #create matrix of calls, sort by similarity | |
| calls.mat <- as.matrix(readsHit)[,HCG] | |
| calls.mat2 <- as.matrix(readsHit)[,GCH] | |
| reads.order <- hclust(dist(calls.mat2=="T"))$order | |
| calls.mat <- calls.mat[reads.order,] | |
| #draw reads w/ calls | |
| for (i in 1:length(reads.order)) { | |
| lines(x=c(1, length(amplicon)), y=c(-i, -i), col="blue") | |
| sapply(1:length(HCG), function(x) { | |
| points(x=HCG[x]-1, y=-i, pch=19, col="black", cex=3) | |
| points(x=HCG[x]-1, y=-i, pch=19, col=ifelse(calls.mat[i, x]=="C", "black", ifelse(calls.mat[i,x]=="T", "white", "white")), cex=2.8) | |
| }) | |
| } | |
| #add % conversion | |
| text(x=length(amplicon)+12, y=-(1:length(readsSummary)), labels=paste(round(values(readsSummary)$conversion*100, 1), "%", sep=""), cex=0.7) | |
| invisible(list("readsHit"=readsHit, "readsSummary"=readsSummary, "readsHitAll"=readsHitAll, "readsSummaryAll"=readsSummaryAll, "reads.order"=reads.order, "calls.mat"=calls.mat)) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment