radaniba · October 13, 2015 08:48
diff --git a/enrichment.R b/enrichment.R
 sink("log",append=FALSE,type=c("output","message"),split=FALSE)
 library(GOstats)
 library(EMA)
 library(fdrtool)
 library(org.Pt.eg.db)

 Args <- commandArgs();

 UNIV <- Args[6]
 #GL <- Args[5]
 DIR <- Args[5]
 PVCUT <- Args[7]
 FDRcut <- Args[8]

 sprintf("%s","%s","PROCESSING PLEASE WAIT...","\n")

 ALLFILES <- list.files(toString(DIR)) 

 #,pattern="^H11_"

 i<-1
 universe <- read.table(toString(UNIV),sep="\t",header=FALSE)
 while(i <= length(ALLFILES)){
 inGenes <- read.table (paste(toString(DIR),"/",toString(ALLFILES[i]),sep="",collapse=NULL), sep="\t", header=FALSE)
 OUTFILE <- paste(toString(DIR),"/",toString(ALLFILES[i]),".ENR",sep="",collapse=NULL)
 selected <- unique(inGenes$V1)
 paramK <- new("KEGGHyperGParams", geneIds=selected, universeGeneIds=universe, annotation="org.Hs.eg.db", pvalueCutoff=as.numeric(PVCUT), testDirection="over")
 hypK <- hyperGTest(paramK)
 sumTableK <- summary(hypK)


 # subset the output table to get the columns of interest 

 # (GO ID, GO description, p-value)
 # (KEGG ID, KEGG description, p-value)

 outK <- subset(sumTableK, select=c(1,7,2))

 #cat(outK$KEGGID)

 # retrieve input genes associated witsourceh each KEGG identifier

 # use the org.Hs.eg data mapping to get KEGG terms for each ID


 if(length(outK$KEGGID)>=2){
 keggMaps <- lapply(outK$KEGGID, function(x) unlist(mget(x, org.Hs.egPATH2EG)))

 # subset the selected genes based on those in the mappings

 keggSelected <- lapply(keggMaps, function(x) selected[selected %in% x])

 # join together with a semicolon to make up the last column

 outK$inGenes <- unlist(lapply(keggSelected, function(x) paste(x, collapse=";")))

 #adjusted pvalues here we use FDR-BF

 pvalsK <- outK$Pvalue

 adjpvalsK <- multiple.correction(pvalsK,typeFDR="FDR-BH")
 fdr = fdrtool(adjpvalsK, statistic="pvalue",plot=FALSE)



 # Add the adjusted Pvalues to the data frame containing the results

 outK[,5]<-as.data.frame(adjpvalsK)
 outK[,6]<-as.data.frame(fdr$qval)

 # write the final data table as a tab separated file 

 write.table(outK, file=OUTFILE, sep="\t", row.names=FALSE)
 debugging_line <- paste(as.character(ALLFILES[i]),"... Done",sep=" ")
 cat(debugging_line)
 }
 i<-i+1
 }
diff --git a/genesetenrichment-KEGG.pl b/genesetenrichment-KEGG.pl
 #!/usr/bin/perl


    use strict;
    use warnings;
 my $num_args = $#ARGV + 1;
 if ($num_args != 4 ) {
 	print "usage: perl BulkEnrichment.pl <Folder containing the Gene List> <Universe> <pvalue_cutoff> <FDR_cutoff> \n";
 	exit;
 }

    my $dir = $ARGV[0];
    my $universe=$ARGV[1];
    my $pvalue_cutoff = $ARGV[2];
    my $FDR_cutoff = $ARGV[3];

   
   
 my $Enrichment_CMD = "R --slave --vanilla --args $dir $universe $pvalue_cutoff $FDR_cutoff < Enrichment.R > LOG";
 system($Enrichment_CMD);

 exit 0;
	sink("log",append=FALSE,type=c("output","message"),split=FALSE)
	library(GOstats)
	library(EMA)
	library(fdrtool)
	library(org.Pt.eg.db)

	Args <- commandArgs();

	UNIV <- Args[6]
	#GL <- Args[5]
	DIR <- Args[5]
	PVCUT <- Args[7]
	FDRcut <- Args[8]

	sprintf("%s","%s","PROCESSING PLEASE WAIT...","\n")

	ALLFILES <- list.files(toString(DIR))

	#,pattern="^H11_"

	i<-1
	universe <- read.table(toString(UNIV),sep="\t",header=FALSE)
	while(i <= length(ALLFILES)){
	inGenes <- read.table (paste(toString(DIR),"/",toString(ALLFILES[i]),sep="",collapse=NULL), sep="\t", header=FALSE)
	OUTFILE <- paste(toString(DIR),"/",toString(ALLFILES[i]),".ENR",sep="",collapse=NULL)
	selected <- unique(inGenes$V1)
	paramK <- new("KEGGHyperGParams", geneIds=selected, universeGeneIds=universe, annotation="org.Hs.eg.db", pvalueCutoff=as.numeric(PVCUT), testDirection="over")
	hypK <- hyperGTest(paramK)
	sumTableK <- summary(hypK)


	# subset the output table to get the columns of interest

	# (GO ID, GO description, p-value)
	# (KEGG ID, KEGG description, p-value)

	outK <- subset(sumTableK, select=c(1,7,2))

	#cat(outK$KEGGID)

	# retrieve input genes associated witsourceh each KEGG identifier

	# use the org.Hs.eg data mapping to get KEGG terms for each ID


	if(length(outK$KEGGID)>=2){
	keggMaps <- lapply(outK$KEGGID, function(x) unlist(mget(x, org.Hs.egPATH2EG)))

	# subset the selected genes based on those in the mappings

	keggSelected <- lapply(keggMaps, function(x) selected[selected %in% x])

	# join together with a semicolon to make up the last column

	outK$inGenes <- unlist(lapply(keggSelected, function(x) paste(x, collapse=";")))

	#adjusted pvalues here we use FDR-BF

	pvalsK <- outK$Pvalue

	adjpvalsK <- multiple.correction(pvalsK,typeFDR="FDR-BH")
	fdr = fdrtool(adjpvalsK, statistic="pvalue",plot=FALSE)



	# Add the adjusted Pvalues to the data frame containing the results

	outK[,5]<-as.data.frame(adjpvalsK)
	outK[,6]<-as.data.frame(fdr$qval)

	# write the final data table as a tab separated file

	write.table(outK, file=OUTFILE, sep="\t", row.names=FALSE)
	debugging_line <- paste(as.character(ALLFILES[i]),"... Done",sep=" ")
	cat(debugging_line)
	}
	i<-i+1
	}
	#!/usr/bin/perl


	use strict;
	use warnings;
	my $num_args = $#ARGV + 1;
	if ($num_args != 4 ) {
	print "usage: perl BulkEnrichment.pl <Folder containing the Gene List> <Universe> <pvalue_cutoff> <FDR_cutoff> \n";
	exit;
	}

	my $dir = $ARGV[0];
	my $universe=$ARGV[1];
	my $pvalue_cutoff = $ARGV[2];
	my $FDR_cutoff = $ARGV[3];



	my $Enrichment_CMD = "R --slave --vanilla --args $dir $universe $pvalue_cutoff $FDR_cutoff < Enrichment.R > LOG";
	system($Enrichment_CMD);

	exit 0;