Created
November 28, 2013 08:46
-
-
Save menugget/7688922 to your computer and use it in GitHub Desktop.
BVSTEP (Clarke and Ainsworth 1992). Requires package "vegan".
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| bv.step <- function(fix.mat, var.mat, | |
| fix.dist.method="bray", var.dist.method="euclidean", | |
| scale.fix=FALSE, scale.var=TRUE, | |
| max.rho=0.95, | |
| min.delta.rho=0.001, | |
| random.selection=TRUE, | |
| prop.selected.var=0.2, | |
| num.restarts=10, | |
| var.always.include=NULL, | |
| var.exclude=NULL, | |
| output.best=10 | |
| ){ | |
| if(dim(fix.mat)[1] != dim(var.mat)[1]){stop("fixed and variable matrices must have the same number of rows")} | |
| if(sum(var.always.include %in% var.exclude) > 0){stop("var.always.include and var.exclude share a variable")} | |
| require(vegan) | |
| if(scale.fix){fix.mat<-scale(fix.mat)}else{fix.mat<-fix.mat} | |
| if(scale.var){var.mat<-scale(var.mat)}else{var.mat<-var.mat} | |
| fix.dist <- vegdist(as.matrix(fix.mat), method=fix.dist.method) | |
| #an initial removal phase | |
| var.dist.full <- vegdist(as.matrix(var.mat), method=var.dist.method) | |
| full.cor <- suppressWarnings(cor.test(fix.dist, var.dist.full, method="spearman"))$estimate | |
| var.comb <- combn(1:ncol(var.mat), ncol(var.mat)-1) | |
| RES <- data.frame(var.excl=rep(NA,ncol(var.comb)), n.var=ncol(var.mat)-1, rho=NA) | |
| for(i in 1:dim(var.comb)[2]){ | |
| var.dist <- vegdist(as.matrix(var.mat[,var.comb[,i]]), method=var.dist.method) | |
| temp <- suppressWarnings(cor.test(fix.dist, var.dist, method="spearman")) | |
| RES$var.excl[i] <- c(1:ncol(var.mat))[-var.comb[,i]] | |
| RES$rho[i] <- temp$estimate | |
| } | |
| delta.rho <- RES$rho - full.cor | |
| exclude <- sort(unique(c(RES$var.excl[which(abs(delta.rho) < min.delta.rho)], var.exclude))) | |
| if(random.selection){ | |
| num.restarts=num.restarts | |
| prop.selected.var=prop.selected.var | |
| prob<-rep(1,ncol(var.mat)) | |
| if(prop.selected.var< 1){ | |
| prob[exclude]<-0 | |
| } | |
| n.selected.var <- min(sum(prob),prop.selected.var*dim(var.mat)[2]) | |
| } else { | |
| num.restarts=1 | |
| prop.selected.var=1 | |
| prob<-rep(1,ncol(var.mat)) | |
| n.selected.var <- min(sum(prob),prop.selected.var*dim(var.mat)[2]) | |
| } | |
| RES_TOT <- c() | |
| for(i in 1:num.restarts){ | |
| step=1 | |
| RES <- data.frame(step=step, step.dir="F", var.incl=NA, n.var=0, rho=0) | |
| attr(RES$step.dir, "levels") <- c("F","B") | |
| best.comb <- which.max(RES$rho) | |
| best.rho <- RES$rho[best.comb] | |
| delta.rho <- Inf | |
| selected.var <- sort(unique(c(sample(1:dim(var.mat)[2], n.selected.var, prob=prob), var.always.include))) | |
| while(best.rho < max.rho & delta.rho > min.delta.rho & RES$n.var[best.comb] < length(selected.var)){ | |
| #forward step | |
| step.dir="F" | |
| step=step+1 | |
| var.comb <- combn(selected.var, RES$n.var[best.comb]+1, simplify=FALSE) | |
| if(RES$n.var[best.comb] == 0){ | |
| var.comb.incl<-1:length(var.comb) | |
| } else { | |
| var.keep <- as.numeric(unlist(strsplit(RES$var.incl[best.comb], ","))) | |
| temp <- NA*1:length(var.comb) | |
| for(j in 1:length(temp)){ | |
| temp[j] <- all(var.keep %in% var.comb[[j]]) | |
| } | |
| var.comb.incl <- which(temp==1) | |
| } | |
| RES.f <- data.frame(step=rep(step, length(var.comb.incl)), step.dir=step.dir, var.incl=NA, n.var=RES$n.var[best.comb]+1, rho=NA) | |
| for(f in 1:length(var.comb.incl)){ | |
| var.incl <- var.comb[[var.comb.incl[f]]] | |
| var.incl <- var.incl[order(var.incl)] | |
| var.dist <- vegdist(as.matrix(var.mat[,var.incl]), method=var.dist.method) | |
| temp <- suppressWarnings(cor.test(fix.dist, var.dist, method="spearman")) | |
| RES.f$var.incl[f] <- paste(var.incl, collapse=",") | |
| RES.f$rho[f] <- temp$estimate | |
| } | |
| last.F <- max(which(RES$step.dir=="F")) | |
| RES <- rbind(RES, RES.f[which.max(RES.f$rho),]) | |
| best.comb <- which.max(RES$rho) | |
| delta.rho <- RES$rho[best.comb] - best.rho | |
| best.rho <- RES$rho[best.comb] | |
| if(best.comb == step){ | |
| while(best.comb == step & RES$n.var[best.comb] > 1){ | |
| #backward step | |
| step.dir="B" | |
| step <- step+1 | |
| var.keep <- as.numeric(unlist(strsplit(RES$var.incl[best.comb], ","))) | |
| var.comb <- combn(var.keep, RES$n.var[best.comb]-1, simplify=FALSE) | |
| RES.b <- data.frame(step=rep(step, length(var.comb)), step.dir=step.dir, var.incl=NA, n.var=RES$n.var[best.comb]-1, rho=NA) | |
| for(b in 1:length(var.comb)){ | |
| var.incl <- var.comb[[b]] | |
| var.incl <- var.incl[order(var.incl)] | |
| var.dist <- vegdist(as.matrix(var.mat[,var.incl]), method=var.dist.method) | |
| temp <- suppressWarnings(cor.test(fix.dist, var.dist, method="spearman")) | |
| RES.b$var.incl[b] <- paste(var.incl, collapse=",") | |
| RES.b$rho[b] <- temp$estimate | |
| } | |
| RES <- rbind(RES, RES.b[which.max(RES.b$rho),]) | |
| best.comb <- which.max(RES$rho) | |
| best.rho<- RES$rho[best.comb] | |
| } | |
| } else { | |
| break() | |
| } | |
| } | |
| RES_TOT <- rbind(RES_TOT, RES[2:dim(RES)[1],]) | |
| print(paste(round((i/num.restarts)*100,3), "% finished")) | |
| } | |
| RES_TOT <- unique(RES_TOT[,3:5]) | |
| if(dim(RES_TOT)[1] > output.best){ | |
| order.by.best <- RES_TOT[order(RES_TOT$rho, decreasing=TRUE)[1:output.best],] | |
| } else { | |
| order.by.best <- RES_TOT[order(RES_TOT$rho, decreasing=TRUE), ] | |
| } | |
| rownames(order.by.best)<-NULL | |
| order.by.i.comb <- c() | |
| for(i in 1:length(selected.var)){ | |
| f1 <- which(RES_TOT$n.var==i) | |
| f2 <- which.max(RES_TOT$rho[f1]) | |
| order.by.i.comb <- rbind(order.by.i.comb, RES_TOT[f1[f2],]) | |
| } | |
| rownames(order.by.i.comb)<-NULL | |
| if(length(exclude)<1){var.exclude=NULL} else {var.exclude=exclude} | |
| out <- list( | |
| order.by.best=order.by.best, | |
| order.by.i.comb=order.by.i.comb, | |
| best.model.vars=paste(colnames(var.mat)[as.numeric(unlist(strsplit(order.by.best$var.incl[1], ",")))], collapse=","), | |
| best.model.rho=order.by.best$rho[1], | |
| var.always.include=var.always.include, | |
| var.exclude=var.exclude | |
| ) | |
| out | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment