[med-svn] [pvclust] 01/04: Imported Upstream version 1.3-0

Charles Plessy plessy at moszumanska.debian.org
Thu Oct 9 01:49:24 UTC 2014


This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch master
in repository pvclust.

commit b95003778ac0ce489d99ace6c95ce0458d552a56
Author: Charles Plessy <plessy at riken.jp>
Date:   Thu Oct 9 09:59:04 2014 +0900

    Imported Upstream version 1.3-0
---
 DESCRIPTION          |  15 +-
 MD5                  |  13 +
 R/pvclust.R          | 892 ++++++++++++++++++++++++++-------------------------
 man/lung.Rd          |   2 +
 man/msfit.Rd         |   2 +-
 man/msplot.Rd        |   2 +-
 man/plot.pvclust.Rd  |   2 +-
 man/print.pvclust.Rd |   2 +-
 man/pvclust.Rd       | 411 ++++++++++++------------
 man/pvpick.Rd        |   2 +-
 man/seplot.Rd        |   2 +-
 11 files changed, 689 insertions(+), 656 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 49fba41..77f54dd 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,19 +1,20 @@
 Package: pvclust
-Version: 1.2-2
-Date: 2011-04-13
+Version: 1.3-0
+Date: 2014-10-08
 Title: Hierarchical Clustering with P-Values via Multiscale Bootstrap
         Resampling
 Author: Ryota Suzuki <suzuki at ef-prime.com>, Hidetoshi Shimodaira
-        <shimo at is.titech.ac.jp>
+        <shimo at sigmath.es.osaka-u.ac.jp>
 Maintainer: Ryota Suzuki <suzuki at ef-prime.com>
 Depends: R (>= 2.10.0)
-Suggests: MASS, snow, Rmpi
+Suggests: MASS, parallel
 Description: pvclust is a package for assessing the uncertainty in
         hierarchical cluster analysis. It provides AU (approximately
         unbiased) p-values as well as BP (boostrap probability) values
         computed via multiscale bootstrap resampling.
 License: GPL (>= 2)
-URL: http://www.is.titech.ac.jp/~shimo/prog/pvclust/
-Packaged: 2011-04-13 17:44:46 UTC; suzuki
+URL: http://www.sigmath.es.osaka-u.ac.jp/shimo-lab/prog/pvclust/
+Packaged: 2014-10-08 11:57:10 UTC; suzuki
+NeedsCompilation: no
 Repository: CRAN
-Date/Publication: 2011-04-13 20:44:41
+Date/Publication: 2014-10-08 14:02:05
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..a1bdb57
--- /dev/null
+++ b/MD5
@@ -0,0 +1,13 @@
+751a6b3f35581b472f4e12f382c79822 *DESCRIPTION
+37c39c6efcd73c5db52c9f7d2d26301b *NAMESPACE
+b0aabf99eb6db36b0452ee07a612dbff *R/pvclust-internal.R
+73faece83dca117e5f5e7f58418d5c34 *R/pvclust.R
+4ce445fdf8be068ed0f770d3c7bafd17 *data/lung.RData
+d4137b4bb2fc7b26f40719391597a20a *man/lung.Rd
+305eedf26b855648f7456e9da01e5094 *man/msfit.Rd
+7fb48846f78962a79e748621b99ba738 *man/msplot.Rd
+9dfaa13e0b453115bd6191036f6a5473 *man/plot.pvclust.Rd
+3ccb94ceeb320278445c2be3fd41bcbb *man/print.pvclust.Rd
+10d182003a1d3ab8ab43b6a0f3759974 *man/pvclust.Rd
+cc1b3150c5eaedb0877afdfd2f385bed *man/pvpick.Rd
+c5bd20b8d802595502f2f6dc4ce989df *man/seplot.Rd
diff --git a/R/pvclust.R b/R/pvclust.R
index 5678e7c..a3f590a 100755
--- a/R/pvclust.R
+++ b/R/pvclust.R
@@ -1,440 +1,452 @@
-pvclust <- function(data, method.hclust="average",
-                    method.dist="correlation", use.cor="pairwise.complete.obs",
-                    nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE)
-  {
-    # data: (n,p) matrix, n-samples, p-variables
-    n <- nrow(data); p <- ncol(data)
-
-    # hclust for original data
-    METHODS <- c("ward", "single", "complete", "average", "mcquitty", 
-                 "median", "centroid")
-    method.hclust <- METHODS[pmatch(method.hclust, METHODS)]
-    distance <- dist.pvclust(data, method=method.dist, use.cor=use.cor)
-    data.hclust <- hclust(distance, method=method.hclust)
-    
-    # multiscale bootstrap
-    size <- floor(n*r)
-    rl <- length(size)
-    
-    if(rl == 1) {
-      if(r != 1.0)
-        warning("Relative sample size r is set to 1.0. AU p-values are not calculated\n")
-      
-      r <- list(1.0)
-    }
-    else
-      r <- as.list(size/n)
-    
-    mboot <- lapply(r, boot.hclust, data=data, object.hclust=data.hclust, nboot=nboot,
-                    method.dist=method.dist, use.cor=use.cor,
-                    method.hclust=method.hclust, store=store, weight=weight)
-    
-    result <- pvclust.merge(data=data, object.hclust=data.hclust, mboot=mboot)
-    
-    return(result)
-  }
-
-plot.pvclust <- function(x, print.pv=TRUE, print.num=TRUE, float=0.01,
-                         col.pv=c(2,3,8), cex.pv=0.8, font.pv=NULL,
-                         col=NULL, cex=NULL, font=NULL, lty=NULL, lwd=NULL,
-                         main=NULL, sub=NULL, xlab=NULL, ...)
-{
-  if(is.null(main))
-    main="Cluster dendrogram with AU/BP values (%)"
-  
-  if(is.null(sub))
-    sub=paste("Cluster method: ", x$hclust$method, sep="")
-  
-  if(is.null(xlab))
-    xlab=paste("Distance: ", x$hclust$dist.method)
-      
-  plot(x$hclust, main=main, sub=sub, xlab=xlab, col=col, cex=cex,
-       font=font, lty=lty, lwd=lwd, ...)
-
-  if(print.pv)
-    text(x, col=col.pv, cex=cex.pv, font=font.pv, float=float, print.num=print.num)
-}
-
-text.pvclust <- function(x, col=c(2,3,8), print.num=TRUE,  float=0.01, cex=NULL, font=NULL, ...)
-{
-  axes <- hc2axes(x$hclust)
-  usr  <- par()$usr; wid <- usr[4] - usr[3]
-  au <- as.character(round(x$edges[,"au"]*100))
-  bp <- as.character(round(x$edges[,"bp"]*100))
-  rn <- as.character(row.names(x$edges))
-  au[length(au)] <- "au"
-  bp[length(bp)] <- "bp"
-  rn[length(rn)] <- "edge #"
-  a <- text(x=axes[,1], y=axes[,2] + float * wid, au,
-            col=col[1], pos=2, offset=.3, cex=cex, font=font)
-  a <- text(x=axes[,1], y=axes[,2] + float * wid, bp,
-            col=col[2], pos=4, offset=.3, cex=cex, font=font)
-  if(print.num)
-    a <- text(x=axes[,1], y=axes[,2], rn,
-              col=col[3], pos=1, offset=.3, cex=cex, font=font)
-}
-
-print.pvclust <- function(x, which=NULL, digits=3, ...)
-{
-  if(is.null(which)) which <- 1:nrow(x$edges)
-  cat("\n")
-  cat(paste("Cluster method: ", x$hclust$method, "\n", sep=""))
-  cat(paste("Distance      : ", x$hclust$dist.method, "\n\n", sep=""))
-  cat("Estimates on edges:\n\n")
-  print(round(x$edges[which,], digits=digits))
-  cat("\n")
-}
-
-summary.pvclust <- function(object, ...){
-  class(object) <- "list"
-  summary(object, ...)
-}
-
-pvrect <- function(x, alpha=0.95, pv="au", type="geq", max.only=TRUE, border=2, ...)
-  {
-    len <- nrow(x$edges)
-    member <- hc2split(x$hclust)$member
-    order  <- x$hclust$order
-    usr <- par("usr")
-    xwd <- usr[2] - usr[1]
-    ywd <- usr[4] - usr[3]
-    cin <- par()$cin
-
-    ht <- c()
-    j <- 1
-
-    if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
-       stop("Invalid type argument: see help(pvrect)")
-    
-    for(i in (len - 1):1)
-      {
-        if     (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or EQuals
-        else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or EQuals
-        else if(pm==3) wh <- (x$edges[i,pv] >  alpha) # Greater Than
-        else if(pm==4) wh <- (x$edges[i,pv] >  alpha) # Lower Than
-
-        if(wh)
-          {
-            mi <- member[[i]]
-            ma <- match(mi, order)
-            
-            if(max.only == FALSE || (max.only && sum(match(ma, ht, nomatch=0)) == 0))
-              {
-                xl <- min(ma)
-                xr <- max(ma)
-                yt <- x$hclust$height[i]
-                yb <- usr[3]
-                
-                mx <- xwd / length(member) / 3
-                my <- ywd / 200
-                
-                rect(xl - mx, yb + my, xr + mx, yt + my, border=border, shade=NULL, ...)
-                
-                j <- j + 1
-              }
-            ht <- c(ht, ma)
-          }
-      }
-  }
-
-msplot <- function(x, edges=NULL, ...)
-  {
-    if(is.null(edges)) edges <- 1:length(x$msfit)
-    d   <- length(edges)
-
-    mfrow.bak <- par()$mfrow
-    on.exit(par(mfrow=mfrow.bak))
-
-    par(mfrow=n2mfrow(d))
-
-    for(i in edges) {
-      if(i == 1 || (i %% 10 == 1 && i > 20))
-        main <- paste(i, "st edge", sep="")
-      else if(i == 2 || (i %% 10 == 2 && i > 20))
-        main <- paste(i, "nd edge", sep="")
-      else if(i == 3 || (i %% 10 == 3 && i > 20))
-        main <- paste(i, "rd edge", sep="")
-      else
-        main <- paste(i, "th edge", sep="")
-
-      plot(x$msfit[[i]], main=main, ...)
-    }
-  }
-
-lines.pvclust <- function(x, alpha=0.95, pv="au", type="geq", col=2, lwd=2, ...)
-  {
-    len <- nrow(x$edges)
-    member <- hc2split(x$hclust)$member
-    order  <- x$hclust$order
-    usr <- par("usr")
-    xwd <- usr[2] - usr[1]
-    ywd <- usr[4] - usr[3]
-    cin <- par()$cin
-
-    ht <- c()
-    j <- 1
-    
-    if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
-       stop("Invalid type argument: see help(lines.pvclust)")
-    
-    for(i in (len - 1):1)
-      {
-        if     (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or EQuals
-        else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or EQuals
-        else if(pm==3) wh <- (x$edges[i,pv] >  alpha) # Greater Than
-        else if(pm==4) wh <- (x$edges[i,pv] >  alpha) # Lower Than
-
-        if(wh)
-          {
-            mi <- member[[i]]
-            ma <- match(mi, order)
-            
-            if(sum(match(ma, ht, nomatch=0)) == 0)
-              {
-                xl <- min(ma)
-                xr <- max(ma)
-                yt <- x$hclust$height[i]
-                yb <- usr[3]
-                
-                mx <- xwd/length(member)/10
-                
-                segments(xl-mx, yb, xr+mx, yb, xpd=TRUE, col=col, lwd=lwd, ...)
-
-                j <- j + 1
-              }
-            ht <- c(ht, ma)
-          }
-      }
-  }
-
-pvpick <- function(x, alpha=0.95, pv="au", type="geq", max.only=TRUE)
-  {
-    len <- nrow(x$edges)
-    member <- hc2split(x$hclust)$member
-    order  <- x$hclust$order
-    
-    ht <- c()
-    a  <- list(clusters=list(), edges=c()); j <- 1
-
-    if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
-       stop("Invalid type argument: see help(pickup)")
-    
-    for(i in (len - 1):1)
-      {
-        if     (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or Equals
-        else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or Equals
-        else if(pm==3) wh <- (x$edges[i,pv] >  alpha) # Greater Than
-        else if(pm==4) wh <- (x$edges[i,pv] >  alpha) # Lower Than
-
-        if(wh)
-          {
-            mi <- member[[i]]
-            ma <- match(mi, order)
-
-            if(max.only == FALSE || (max.only && sum(match(ma, ht, nomatch=0)) == 0))
-              {
-                a$clusters[[j]] <- x$hclust$labels[mi]
-                a$edges <- c(a$edges,i)
-                
-                j <- j + 1
-              }
-            ht <- c(ht, ma)
-          }
-      }
-    
-    a$edges <- a$edges[length(a$edges):1]
-    a$clusters <- a$clusters[length(a$edges):1]
-
-    return(a)
-  }
-
-parPvclust <- function(cl, data, method.hclust="average",
-                       method.dist="correlation", use.cor="pairwise.complete.obs",
-                       nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE,
-                       weight=FALSE,
-                       init.rand=TRUE, seed=NULL)
-  {
-    if(!(require(snow))) stop("Package snow is required for parPvclust.")
-    
-    if((ncl <- length(cl)) < 2 || ncl > nboot) {
-      warning("Too small value for nboot: non-parallel version is executed.")
-      return(pvclust(data,method.hclust,method.dist,use.cor,nboot,r,store))
-    }
-
-    if(init.rand) {
-      if(is.null(seed))
-        seed <- 1:length(cl)
-      else if(length(seed) != length(cl))
-        stop("seed and cl should have the same length.")
-      
-      # setting random seeds
-      parLapply(cl, as.list(seed), set.seed)
-    }
-
-    # data: (n,p) matrix, n-samples, p-variables
-    n <- nrow(data); p <- ncol(data)
-
-    # hclust for original data
-    METHODS <- c("ward", "single", "complete", "average", "mcquitty", 
-                 "median", "centroid")
-    method.hclust <- METHODS[pmatch(method.hclust, METHODS)]
-    distance <- dist.pvclust(data, method=method.dist, use.cor=use.cor)
-    data.hclust <- hclust(distance, method=method.hclust)
-    
-    # multiscale bootstrap
-    size <- floor(n*r)
-    rl <- length(size)
-    
-    if(rl == 1) {
-      if(r != 1.0)
-        warning("Relative sample size r is set to 1.0. AU p-values are not calculated\n")
-      
-      r <- list(1.0)
-    }
-    else
-      r <- as.list(size/n)
-
-    nbl <- as.list(rep(nboot %/% ncl,times=ncl))
-    
-    if((rem <- nboot %% ncl) > 0)
-    nbl[1:rem] <- lapply(nbl[1:rem], "+", 1)
-
-    cat("Multiscale bootstrap... ")
-    
-    mlist <- parLapply(cl, nbl, pvclust.node,
-                       r=r, data=data, object.hclust=data.hclust, method.dist=method.dist,
-                       use.cor=use.cor, method.hclust=method.hclust,
-                       store=store, weight=weight)
-    cat("Done.\n")
-    
-    mboot <- mlist[[1]]
-
-    for(i in 2:ncl) {
-      for(j in 1:rl) {
-        mboot[[j]]$edges.cnt <- mboot[[j]]$edges.cnt + mlist[[i]][[j]]$edges.cnt
-        mboot[[j]]$nboot <- mboot[[j]]$nboot + mlist[[i]][[j]]$nboot
-        mboot[[j]]$store <- c(mboot[[j]]$store, mlist[[i]][[j]]$store)
-      }
-    }
-
-    result <- pvclust.merge( data=data, object.hclust=data.hclust, mboot=mboot)
-    
-    return(result)
-  }
-
-msfit <- function(bp, r, nboot) {
-
-  if(length(bp) != length(r))
-    stop("bp and r should have the same length")
-
-  nboot <- rep(nboot, length=length(bp))
-
-  use <- bp > 0 & bp < 1
-
-  p <- se <- c(0,0); names(p) <- names(se) <- c("au", "bp")
-  coef <- c(0,0); names(coef) <- c("v", "c")
-
-  a <- list(p=p, se=se, coef=coef, df=0, rss=0, pchi=0); class(a) <- "msfit"
-
-  if(sum(use) < 2) {
-    # if(mean(bp) < .5) a$p[] <- c(0, 0) else a$p[] <- c(1, 1)
-    if(mean(bp) < .5) a$p[] <- c(0, bp[r==1.0]) else a$p[] <- c(1, bp[r==1.0])
-    return(a)
-  }
-
-  bp <- bp[use]; r <- r[use]; nboot <- nboot[use]
-  zz <- -qnorm(bp)
-  vv <- ((1 - bp) * bp) / (dnorm(zz)^2 * nboot)
-  a$use <- use; a$r <- r; a$zz <- zz
-
-  X   <- cbind(sqrt(r), 1/sqrt(r)); dimnames(X) <- list(NULL, c("v","c"))
-  fit <- lsfit(X, zz, 1/vv, intercept=FALSE)
-  a$coef <- coef <- fit$coef
-
-  h.au <- c(1, -1); h.bp <- c(1, 1)
-  
-  z.au <- drop(h.au %*% coef); z.bp <- drop(h.bp %*% coef)
-  a$p["au"] <- pnorm(-z.au); a$p["bp"] <- pnorm(-z.bp)
-  V <- solve(crossprod(X, X/vv))
-  vz.au <- drop(h.au %*% V %*% h.au); vz.bp <- drop(h.bp %*% V %*% h.bp)
-  a$se["au"] <- dnorm(z.au) * sqrt(vz.au); a$se["bp"] <- dnorm(z.bp) * sqrt(vz.bp)
-  a$rss <- sum(fit$residual^2/vv)
-  
-  if((a$df <- sum(use) - 2) > 0) {
-    a$pchi <- pchisq(a$rss, lower.tail=FALSE, df=a$df)
-  }
-  else a$pchi <- 1.0
-
-  return(a)
-}
-
-plot.msfit <- function(x, curve=TRUE, main=NULL, sub=NULL, xlab=NULL, ylab=NULL, ...)
-{
-  if(is.null(main)) main="Curve fitting for multiscale bootstrap resampling"
-  if(is.null(sub))
-    {
-      sub  <- paste("AU = ", round(x$p["au"], digits=2),
-                    ", BP = ", round(x$p["bp"], digits=2),
-                    ", v = ", round(x$coef["v"], digits=2),
-                    ", c = ", round(x$coef["c"], digits=2),
-                    ", pchi = ", round(x$pchi, digits=2))
-    }
-  if(is.null(xlab)) xlab=expression(sqrt(r))
-  if(is.null(ylab)) ylab=expression(z-value)
-  
-  a <- sqrt(x$r); b <- x$zz
-  
-  if(!is.null(a) && !is.null(b)) {
-    plot(a, b, main=main, sub=sub, xlab=xlab, ylab=ylab, ...)
-    if(curve) lines(x, ...)
-  }
-  else if (!is.null(a)){
-    plot(0, 0, main=main, sub=sub, xlab=xlab, ylab=ylab,
-         type="n", xaxt="n", yaxt="n", ...)
-    a <- text(mean(a), 0, "No fitting")
-  }
-}
-
-lines.msfit <- function(x, col=2, lty=1, ...) {
-  v <- x$coef["v"]; c <- x$coef["c"]
-  curve(v * x + c / x, add=TRUE, col=col, lty=lty)
-}
-
-summary.msfit <- function(object, digits=3, ...) {
-  cat("\nResult of curve fitting for multiscale bootstrap resampling:\n\n")
-
-  cat("Estimated p-values:\n")
-  pv <- data.frame(object$p, object$se)
-  names(pv) <- c("Estimate", "Std. Error"); row.names(pv) <- c("au", "bp")
-  print(pv, digits=digits); cat("\n")
-
-  cat("Estimated coefficients:\n")
-  coef <- object$coef
-  print(coef, digits=digits); cat("\n")
-
-  cat(paste("Residual sum of squares: ", round(object$rss,digits=digits)),
-      ",   p-value: ", round(object$pchi, digits=digits),
-      " on ", object$df, " DF\n\n", sep="")
-}
-
-seplot <- function(object, type=c("au", "bp"), identify=FALSE,
-                   main=NULL, xlab=NULL, ylab=NULL, ...)
-  {
-    if(!is.na(pm <- pmatch(type[1], c("au", "bp")))) {
-      wh <- c("au", "bp")[pm]
-      
-      if(is.null(main))
-        main <- "p-value vs standard error plot"
-      if(is.null(xlab))
-        xlab <- c("AU p-value", "BP value")[pm]
-      if(is.null(ylab))
-        ylab <- "Standard Error"
-      
-      plot(object$edges[,wh], object$edges[,paste("se", wh, sep=".")],
-           main=main, xlab=xlab, ylab=ylab, ...)
-      if(identify)
-        identify(x=object$edges[,wh], y=object$edges[,paste("se", wh, sep=".")],
-                 labels=row.names(object$edges))
-    }
-    else stop("'type' should be \"au\" or \"bp\".")
-  }
+pvclust <- function(data, method.hclust="average",
+                    method.dist="correlation", use.cor="pairwise.complete.obs",
+                    nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE)
+  {
+    # data: (n,p) matrix, n-samples, p-variables
+    n <- nrow(data); p <- ncol(data)
+
+    # hclust for original data
+#    METHODS <- c("ward", "single", "complete", "average", "mcquitty", 
+#                 "median", "centroid")
+#    method.hclust <- METHODS[pmatch(method.hclust, METHODS)]
+    distance <- dist.pvclust(data, method=method.dist, use.cor=use.cor)
+    data.hclust <- hclust(distance, method=method.hclust)
+    
+    # ward -> ward.D
+    if(method.hclust == "ward") method.hclust <- "ward.D"
+
+    # multiscale bootstrap
+    size <- floor(n*r)
+    rl <- length(size)
+    
+    if(rl == 1) {
+      if(r != 1.0)
+        warning("Relative sample size r is set to 1.0. AU p-values are not calculated\n")
+      
+      r <- list(1.0)
+    }
+    else
+      r <- as.list(size/n)
+    
+    mboot <- lapply(r, boot.hclust, data=data, object.hclust=data.hclust, nboot=nboot,
+                    method.dist=method.dist, use.cor=use.cor,
+                    method.hclust=method.hclust, store=store, weight=weight)
+    
+    result <- pvclust.merge(data=data, object.hclust=data.hclust, mboot=mboot)
+    
+    return(result)
+  }
+
+plot.pvclust <- function(x, print.pv=TRUE, print.num=TRUE, float=0.01,
+                         col.pv=c(2,3,8), cex.pv=0.8, font.pv=NULL,
+                         col=NULL, cex=NULL, font=NULL, lty=NULL, lwd=NULL,
+                         main=NULL, sub=NULL, xlab=NULL, ...)
+{
+  if(is.null(main))
+    main="Cluster dendrogram with AU/BP values (%)"
+  
+  if(is.null(sub))
+    sub=paste("Cluster method: ", x$hclust$method, sep="")
+  
+  if(is.null(xlab))
+    xlab=paste("Distance: ", x$hclust$dist.method)
+      
+  plot(x$hclust, main=main, sub=sub, xlab=xlab, col=col, cex=cex,
+       font=font, lty=lty, lwd=lwd, ...)
+
+  if(print.pv)
+    text(x, col=col.pv, cex=cex.pv, font=font.pv, float=float, print.num=print.num)
+}
+
+text.pvclust <- function(x, col=c(2,3,8), print.num=TRUE,  float=0.01, cex=NULL, font=NULL, ...)
+{
+  axes <- hc2axes(x$hclust)
+  usr  <- par()$usr; wid <- usr[4] - usr[3]
+  au <- as.character(round(x$edges[,"au"]*100))
+  bp <- as.character(round(x$edges[,"bp"]*100))
+  rn <- as.character(row.names(x$edges))
+  au[length(au)] <- "au"
+  bp[length(bp)] <- "bp"
+  rn[length(rn)] <- "edge #"
+  a <- text(x=axes[,1], y=axes[,2] + float * wid, au,
+            col=col[1], pos=2, offset=.3, cex=cex, font=font)
+  a <- text(x=axes[,1], y=axes[,2] + float * wid, bp,
+            col=col[2], pos=4, offset=.3, cex=cex, font=font)
+  if(print.num)
+    a <- text(x=axes[,1], y=axes[,2], rn,
+              col=col[3], pos=1, offset=.3, cex=cex, font=font)
+}
+
+print.pvclust <- function(x, which=NULL, digits=3, ...)
+{
+  if(is.null(which)) which <- 1:nrow(x$edges)
+  cat("\n")
+  cat(paste("Cluster method: ", x$hclust$method, "\n", sep=""))
+  cat(paste("Distance      : ", x$hclust$dist.method, "\n\n", sep=""))
+  cat("Estimates on edges:\n\n")
+  print(round(x$edges[which,], digits=digits))
+  cat("\n")
+}
+
+summary.pvclust <- function(object, ...){
+  class(object) <- "list"
+  summary(object, ...)
+}
+
+pvrect <- function(x, alpha=0.95, pv="au", type="geq", max.only=TRUE, border=2, ...)
+  {
+    len <- nrow(x$edges)
+    member <- hc2split(x$hclust)$member
+    order  <- x$hclust$order
+    usr <- par("usr")
+    xwd <- usr[2] - usr[1]
+    ywd <- usr[4] - usr[3]
+    cin <- par()$cin
+
+    ht <- c()
+    j <- 1
+
+    if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
+       stop("Invalid type argument: see help(pvrect)")
+    
+    for(i in (len - 1):1)
+      {
+        if     (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or EQuals
+        else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or EQuals
+        else if(pm==3) wh <- (x$edges[i,pv] >  alpha) # Greater Than
+        else if(pm==4) wh <- (x$edges[i,pv] >  alpha) # Lower Than
+
+        if(wh)
+          {
+            mi <- member[[i]]
+            ma <- match(mi, order)
+            
+            if(max.only == FALSE || (max.only && sum(match(ma, ht, nomatch=0)) == 0))
+              {
+                xl <- min(ma)
+                xr <- max(ma)
+                yt <- x$hclust$height[i]
+                yb <- usr[3]
+                
+                mx <- xwd / length(member) / 3
+                my <- ywd / 200
+                
+                rect(xl - mx, yb + my, xr + mx, yt + my, border=border, shade=NULL, ...)
+                
+                j <- j + 1
+              }
+            ht <- c(ht, ma)
+          }
+      }
+  }
+
+msplot <- function(x, edges=NULL, ...)
+  {
+    if(is.null(edges)) edges <- 1:length(x$msfit)
+    d   <- length(edges)
+
+    mfrow.bak <- par()$mfrow
+    on.exit(par(mfrow=mfrow.bak))
+
+    par(mfrow=n2mfrow(d))
+
+    for(i in edges) {
+      if(i == 1 || (i %% 10 == 1 && i > 20))
+        main <- paste(i, "st edge", sep="")
+      else if(i == 2 || (i %% 10 == 2 && i > 20))
+        main <- paste(i, "nd edge", sep="")
+      else if(i == 3 || (i %% 10 == 3 && i > 20))
+        main <- paste(i, "rd edge", sep="")
+      else
+        main <- paste(i, "th edge", sep="")
+
+      plot(x$msfit[[i]], main=main, ...)
+    }
+  }
+
+lines.pvclust <- function(x, alpha=0.95, pv="au", type="geq", col=2, lwd=2, ...)
+  {
+    len <- nrow(x$edges)
+    member <- hc2split(x$hclust)$member
+    order  <- x$hclust$order
+    usr <- par("usr")
+    xwd <- usr[2] - usr[1]
+    ywd <- usr[4] - usr[3]
+    cin <- par()$cin
+
+    ht <- c()
+    j <- 1
+    
+    if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
+       stop("Invalid type argument: see help(lines.pvclust)")
+    
+    for(i in (len - 1):1)
+      {
+        if     (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or EQuals
+        else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or EQuals
+        else if(pm==3) wh <- (x$edges[i,pv] >  alpha) # Greater Than
+        else if(pm==4) wh <- (x$edges[i,pv] >  alpha) # Lower Than
+
+        if(wh)
+          {
+            mi <- member[[i]]
+            ma <- match(mi, order)
+            
+            if(sum(match(ma, ht, nomatch=0)) == 0)
+              {
+                xl <- min(ma)
+                xr <- max(ma)
+                yt <- x$hclust$height[i]
+                yb <- usr[3]
+                
+                mx <- xwd/length(member)/10
+                
+                segments(xl-mx, yb, xr+mx, yb, xpd=TRUE, col=col, lwd=lwd, ...)
+
+                j <- j + 1
+              }
+            ht <- c(ht, ma)
+          }
+      }
+  }
+
+pvpick <- function(x, alpha=0.95, pv="au", type="geq", max.only=TRUE)
+  {
+    len <- nrow(x$edges)
+    member <- hc2split(x$hclust)$member
+    order  <- x$hclust$order
+    
+    ht <- c()
+    a  <- list(clusters=list(), edges=c()); j <- 1
+
+    if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
+       stop("Invalid type argument: see help(pickup)")
+    
+    for(i in (len - 1):1)
+      {
+        if     (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or Equals
+        else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or Equals
+        else if(pm==3) wh <- (x$edges[i,pv] >  alpha) # Greater Than
+        else if(pm==4) wh <- (x$edges[i,pv] <  alpha) # Lower Than
+
+        if(wh)
+          {
+            mi <- member[[i]]
+            ma <- match(mi, order)
+
+            if(max.only == FALSE || (max.only && sum(match(ma, ht, nomatch=0)) == 0))
+              {
+                a$clusters[[j]] <- x$hclust$labels[mi]
+                a$edges <- c(a$edges,i)
+                
+                j <- j + 1
+              }
+            ht <- c(ht, ma)
+          }
+      }
+    
+    a$edges <- a$edges[length(a$edges):1]
+    a$clusters <- a$clusters[length(a$edges):1]
+
+    return(a)
+  }
+
+parPvclust <- function(cl=NULL, data, method.hclust="average",
+                       method.dist="correlation", use.cor="pairwise.complete.obs",
+                       nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE,
+                       weight=FALSE,
+                       init.rand=TRUE, seed=NULL, iseed=NULL)
+  {
+    # if(!(require(snow))) stop("Package snow is required for parPvclust.")
+    if(!(require(parallel))) stop("Package parallel is required for parPvclust.")
+
+    if((ncl <- length(cl)) < 2 || ncl > nboot) {
+      if(ncl > nboot)
+        warning("Too small value for nboot: non-parallel version is executed.")
+      else
+        warning("Too small (or NULL) cluster: non-parallel version is executed.")
+      
+      return(pvclust(data,method.hclust,method.dist,use.cor,nboot,r,store))
+    }
+
+    if(init.rand) {
+      if(is.null(iseed) && !is.null(seed)) {
+        warning("\"seed\" option is deprecated. It is available for back compatibility but will be unavailable in the future.\nConsider using \"iseed\" instead.")
+        
+        if(length(seed) != length(cl))
+          stop("seed and cl should have the same length.")
+        
+        # setting random seeds
+        parallel::parLapply(cl, as.list(seed), set.seed)
+      } else {
+        parallel::clusterSetRNGStream(cl = cl, iseed = iseed)
+      }
+    }
+
+    # data: (n,p) matrix, n-samples, p-variables
+    n <- nrow(data); p <- ncol(data)
+
+    # hclust for original data
+    METHODS <- c("ward", "single", "complete", "average", "mcquitty", 
+                 "median", "centroid")
+    method.hclust <- METHODS[pmatch(method.hclust, METHODS)]
+    distance <- dist.pvclust(data, method=method.dist, use.cor=use.cor)
+    data.hclust <- hclust(distance, method=method.hclust)
+    
+    # multiscale bootstrap
+    size <- floor(n*r)
+    rl <- length(size)
+    
+    if(rl == 1) {
+      if(r != 1.0)
+        warning("Relative sample size r is set to 1.0. AU p-values are not calculated\n")
+      
+      r <- list(1.0)
+    }
+    else
+      r <- as.list(size/n)
+
+    nbl <- as.list(rep(nboot %/% ncl,times=ncl))
+    
+    if((rem <- nboot %% ncl) > 0)
+    nbl[1:rem] <- lapply(nbl[1:rem], "+", 1)
+
+    cat("Multiscale bootstrap... ")
+    
+    mlist <- parallel::parLapply(cl, nbl, pvclust.node,
+                       r=r, data=data, object.hclust=data.hclust, method.dist=method.dist,
+                       use.cor=use.cor, method.hclust=method.hclust,
+                       store=store, weight=weight)
+    cat("Done.\n")
+    
+    mboot <- mlist[[1]]
+
+    for(i in 2:ncl) {
+      for(j in 1:rl) {
+        mboot[[j]]$edges.cnt <- mboot[[j]]$edges.cnt + mlist[[i]][[j]]$edges.cnt
+        mboot[[j]]$nboot <- mboot[[j]]$nboot + mlist[[i]][[j]]$nboot
+        mboot[[j]]$store <- c(mboot[[j]]$store, mlist[[i]][[j]]$store)
+      }
+    }
+
+    result <- pvclust.merge( data=data, object.hclust=data.hclust, mboot=mboot)
+    
+    return(result)
+  }
+
+msfit <- function(bp, r, nboot) {
+
+  if(length(bp) != length(r))
+    stop("bp and r should have the same length")
+
+  nboot <- rep(nboot, length=length(bp))
+
+  use <- bp > 0 & bp < 1
+
+  p <- se <- c(0,0); names(p) <- names(se) <- c("au", "bp")
+  coef <- c(0,0); names(coef) <- c("v", "c")
+
+  a <- list(p=p, se=se, coef=coef, df=0, rss=0, pchi=0); class(a) <- "msfit"
+
+  if(sum(use) < 2) {
+    # if(mean(bp) < .5) a$p[] <- c(0, 0) else a$p[] <- c(1, 1)
+    if(mean(bp) < .5) a$p[] <- c(0, bp[r==1.0]) else a$p[] <- c(1, bp[r==1.0])
+    return(a)
+  }
+
+  bp <- bp[use]; r <- r[use]; nboot <- nboot[use]
+  zz <- -qnorm(bp)
+  vv <- ((1 - bp) * bp) / (dnorm(zz)^2 * nboot)
+  a$use <- use; a$r <- r; a$zz <- zz
+
+  X   <- cbind(sqrt(r), 1/sqrt(r)); dimnames(X) <- list(NULL, c("v","c"))
+  fit <- lsfit(X, zz, 1/vv, intercept=FALSE)
+  a$coef <- coef <- fit$coef
+
+  h.au <- c(1, -1); h.bp <- c(1, 1)
+  
+  z.au <- drop(h.au %*% coef); z.bp <- drop(h.bp %*% coef)
+  a$p["au"] <- pnorm(-z.au); a$p["bp"] <- pnorm(-z.bp)
+  V <- solve(crossprod(X, X/vv))
+  vz.au <- drop(h.au %*% V %*% h.au); vz.bp <- drop(h.bp %*% V %*% h.bp)
+  a$se["au"] <- dnorm(z.au) * sqrt(vz.au); a$se["bp"] <- dnorm(z.bp) * sqrt(vz.bp)
+  a$rss <- sum(fit$residual^2/vv)
+  
+  if((a$df <- sum(use) - 2) > 0) {
+    a$pchi <- pchisq(a$rss, lower.tail=FALSE, df=a$df)
+  }
+  else a$pchi <- 1.0
+
+  return(a)
+}
+
+plot.msfit <- function(x, curve=TRUE, main=NULL, sub=NULL, xlab=NULL, ylab=NULL, ...)
+{
+  if(is.null(main)) main="Curve fitting for multiscale bootstrap resampling"
+  if(is.null(sub))
+    {
+      sub  <- paste("AU = ", round(x$p["au"], digits=2),
+                    ", BP = ", round(x$p["bp"], digits=2),
+                    ", v = ", round(x$coef["v"], digits=2),
+                    ", c = ", round(x$coef["c"], digits=2),
+                    ", pchi = ", round(x$pchi, digits=2))
+    }
+  if(is.null(xlab)) xlab=expression(sqrt(r))
+  if(is.null(ylab)) ylab=expression(z-value)
+  
+  a <- sqrt(x$r); b <- x$zz
+  
+  if(!is.null(a) && !is.null(b)) {
+    plot(a, b, main=main, sub=sub, xlab=xlab, ylab=ylab, ...)
+    if(curve) lines(x, ...)
+  }
+  else if (!is.null(a)){
+    plot(0, 0, main=main, sub=sub, xlab=xlab, ylab=ylab,
+         type="n", xaxt="n", yaxt="n", ...)
+    a <- text(mean(a), 0, "No fitting")
+  }
+}
+
+lines.msfit <- function(x, col=2, lty=1, ...) {
+  v <- x$coef["v"]; c <- x$coef["c"]
+  curve(v * x + c / x, add=TRUE, col=col, lty=lty)
+}
+
+summary.msfit <- function(object, digits=3, ...) {
+  cat("\nResult of curve fitting for multiscale bootstrap resampling:\n\n")
+
+  cat("Estimated p-values:\n")
+  pv <- data.frame(object$p, object$se)
+  names(pv) <- c("Estimate", "Std. Error"); row.names(pv) <- c("au", "bp")
+  print(pv, digits=digits); cat("\n")
+
+  cat("Estimated coefficients:\n")
+  coef <- object$coef
+  print(coef, digits=digits); cat("\n")
+
+  cat(paste("Residual sum of squares: ", round(object$rss,digits=digits)),
+      ",   p-value: ", round(object$pchi, digits=digits),
+      " on ", object$df, " DF\n\n", sep="")
+}
+
+seplot <- function(object, type=c("au", "bp"), identify=FALSE,
+                   main=NULL, xlab=NULL, ylab=NULL, ...)
+  {
+    if(!is.na(pm <- pmatch(type[1], c("au", "bp")))) {
+      wh <- c("au", "bp")[pm]
+      
+      if(is.null(main))
+        main <- "p-value vs standard error plot"
+      if(is.null(xlab))
+        xlab <- c("AU p-value", "BP value")[pm]
+      if(is.null(ylab))
+        ylab <- "Standard Error"
+      
+      plot(object$edges[,wh], object$edges[,paste("se", wh, sep=".")],
+           main=main, xlab=xlab, ylab=ylab, ...)
+      if(identify)
+        identify(x=object$edges[,wh], y=object$edges[,paste("se", wh, sep=".")],
+                 labels=row.names(object$edges))
+    }
+    else stop("'type' should be \"au\" or \"bp\".")
+  }
diff --git a/man/lung.Rd b/man/lung.Rd
index 06d0462..7ebcdec 100755
--- a/man/lung.Rd
+++ b/man/lung.Rd
@@ -14,6 +14,7 @@ There are 916 observations of genes for each lung tissue.
   section in this help for original data source.
 }
 \examples{
+\donttest{
 ## Reading the data
 data(lung)
 
@@ -41,6 +42,7 @@ lung.pp$clusters[[2]]
 
 ## Print its edge number
 lung.pp$edges[2]
+}
 
 ## We recommend parallel computing for large dataset as this one
 \dontrun{
diff --git a/man/msfit.Rd b/man/msfit.Rd
index 2166d68..2f84619 100755
--- a/man/msfit.Rd
+++ b/man/msfit.Rd
@@ -66,5 +66,5 @@ msfit(bp, r, nboot)
   "An approximately unbiased test of phylogenetic tree selection",
   \emph{Systematic Biology}, 51, 492-508.
 }
-\author{Ryota Suzuki \email{ryota.suzuki at is.titech.ac.jp}}
+\author{Ryota Suzuki \email{suzuki at ef-prime.com}}
 \keyword{htest}
\ No newline at end of file
diff --git a/man/msplot.Rd b/man/msplot.Rd
index 4b618ee..3713473 100755
--- a/man/msplot.Rd
+++ b/man/msplot.Rd
@@ -12,5 +12,5 @@ msplot(x, edges=NULL, ...)
   \item{...}{other parameters to be used in the function.}
 }
 \seealso{\code{\link{plot.msfit}}}
-\author{Ryota Suzuki \email{ryota.suzuki at is.titech.ac.jp}}
+\author{Ryota Suzuki \email{suzuki at ef-prime.com}}
 \keyword{hplot}
diff --git a/man/plot.pvclust.Rd b/man/plot.pvclust.Rd
index 9dbaf97..b3296de 100755
--- a/man/plot.pvclust.Rd
+++ b/man/plot.pvclust.Rd
@@ -61,5 +61,5 @@
   "An approximately unbiased test of phylogenetic tree selection",
   \emph{Systematic Biology}, 51, 492-508.
 }
-\author{Ryota Suzuki \email{ryota.suzuki at is.titech.ac.jp}}
+\author{Ryota Suzuki \email{suzuki at ef-prime.com}}
 \keyword{hplot}
diff --git a/man/print.pvclust.Rd b/man/print.pvclust.Rd
index 0212358..3314756 100755
--- a/man/print.pvclust.Rd
+++ b/man/print.pvclust.Rd
@@ -33,5 +33,5 @@
     the boundary.}
   \item{pchi}{\eqn{p}-values of chi-square test based on asymptotic theory.}
 }
-\author{Ryota Suzuki \email{ryota.suzuki at is.titech.ac.jp}}
+\author{Ryota Suzuki \email{suzuki at ef-prime.com}}
 \keyword{print}
\ No newline at end of file
diff --git a/man/pvclust.Rd b/man/pvclust.Rd
index 00fefd4..a71d830 100755
--- a/man/pvclust.Rd
+++ b/man/pvclust.Rd
@@ -1,203 +1,208 @@
-\name{pvclust}
-\alias{pvclust}
-\alias{parPvclust}
-\title{Calculating P-values for Hierchical Clustering}
-\description{
-  calculates \eqn{p}-values for hierarchical clustering via
-  multiscale bootstrap resampling. Hierarchical clustering is done for
-  given data and \eqn{p}-values are computed for each of the clusters.
-}
-\usage{
-pvclust(data, method.hclust="average",
-        method.dist="correlation", use.cor="pairwise.complete.obs",
-        nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE)
-
-parPvclust(cl, data, method.hclust="average",
-           method.dist="correlation", use.cor="pairwise.complete.obs",
-           nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE,
-           init.rand=TRUE, seed=NULL)
-
-}
-\arguments{
-  \item{data}{numeric data matrix or data frame.}
-  \item{method.hclust}{
-    the agglomerative method used in hierarchical clustering. This
-    should be (an abbreviation of) one of \code{"average"}, \code{"ward"},
-    \code{"single"}, \code{"complete"}, \code{"mcquitty"},
-    \code{"median"} or \code{"centroid"}. The default is
-    \code{"average"}. See \code{method} argument in
-    \code{\link[stats]{hclust}}.
-  }
-  \item{method.dist}{the distance measure to be used. This should be (an
-    abbreviation of) one of \code{"correlation"}, \code{"uncentered"},
-    \code{"abscor"} or those which are allowed for \code{method}
-    argument in \code{dist} function. The default is
-    \code{"correlation"}. See \emph{details} section in this help and
-    \code{method} argument in \code{\link[stats]{dist}}.
-    }
-  \item{use.cor}{character string which specifies the method for
-    computing correlation with data including missing values. This
-    should be (an abbreviation of) one of \code{"all.obs"},
-    \code{"complete.obs"} or \code{"pairwise.complete.obs"}. See
-    the \code{use} argument in \code{\link[stats]{cor}} function.
-  }
-  \item{nboot}{the number of bootstrap replications. The default is
-    \code{1000}.}
-  \item{r}{numeric vector which specifies the relative sample sizes of
-    bootstrap replications. For original sample size \eqn{n} and
-    bootstrap sample size \eqn{n'}, this is defined as \eqn{r=n'/n}.}
-  \item{store}{locical. If \code{store=TRUE}, all bootstrap replications
-  are stored in the output object. The default is \code{FALSE}.}
-  \item{cl}{\pkg{snow} cluster object which may be generated by
-    function \code{makeCluster}. See \code{\link[snow]{snow-startstop}}
-    in \pkg{snow} package.}
-  \item{weight}{logical. If \code{weight=TRUE}, resampling is made by
-    weight vector instead of index vector. Useful for large \code{r}
-    value (\code{r>10}).  Currently, available only for distance
-    \code{"correlation"} and \code{"abscor"}.}
-  \item{init.rand}{logical. If \code{init.rand=TRUE}, random number
-    generators are initialized at child processes. Random seeds can be
-    set by \code{seed} argument.}
-  \item{seed}{integer vector of random seeds. It should have the same
-    length as \code{cl}. If \code{NULL} is specified,
-    \code{1:length(cl)} is used as seed vector. The default is \code{NULL}.}
-}
-\details{
-  Function \code{pvclust} conducts multiscale bootstrap resampling to calculate
-  \eqn{p}-values for each cluster in the result of hierarchical
-  clustering. \code{parPvclust} is the parallel version of this
-  procedure which depends on \pkg{snow} package for parallel
-  computation.
-
-  For data expressed as \eqn{(n \times p)}{(n, p)} matrix or data frame, we
-  assume that the data is \eqn{n} observations of \eqn{p} objects, which
-  are to be clustered. The \eqn{i}'th row vector corresponds to the
-  \eqn{i}'th observation of these objects and the \eqn{j}'th column
-  vector corresponds to a sample of \eqn{j}'th object with size \eqn{n}.
-
-  There are several methods to measure the dissimilarities between
-  objects. For data matrix \eqn{X=\{x_{ij}\}}{X},
-  \code{"correlation"}
-  method takes
-  \deqn{
-    1 - \frac{
-      \sum_{i=1}^n (x_{ij} - \bar{x}_j) (x_{ik} - \bar{x}_k)
-    }
-    {
-      \sqrt{\sum_{i=1}^n (x_{ij} - \bar{x}_j)^2}
-      \sqrt{\sum_{i=1}^n (x_{ik} - \bar{x}_k)^2}
-    }
-    }{%
-      1 - cor(X)[j,k]
-    }
-  for dissimilarity between \eqn{j}'th and \eqn{k}'th object, where
-  \eqn{\bar{x}_j = \frac{1}{n} \sum_{i=1}^n x_{ij} \mbox{and}
-  \bar{x}_k = \frac{1}{n} \sum_{i=1}^n x_{ik}}{cor is function \code{cor}}.
-
-  \code{"uncentered"} takes uncentered sample correlation
-  \deqn{
-    1 - \frac{
-      \sum_{i=1}^n x_{ij} x_{ik}
-    }
-    {
-      \sqrt{\sum_{i=1}^n x_{ij}^2}
-      \sqrt{\sum_{i=1}^n x_{ik}^2}
-    }
-    }{%
-      1 - sum(x[,j] * x[,k]) / (sqrt(sum(x[,j]^2)) * sqrt(sum(x[,k]^2)))
-    }
-  and \code{"abscor"} takes the absolute value of sample correlation
-  \deqn{
-    1 - \ \Biggl| \frac{
-      \sum_{i=1}^n (x_{ij} - \bar{x}_j) (x_{ik} - \bar{x}_k)
-    }
-    {
-      \sqrt{\sum_{i=1}^n (x_{ij} - \bar{x}_j)^2}
-      \sqrt{\sum_{i=1}^n (x_{ik} - \bar{x}_k)^2}
-    } \Biggl|.
-  }{%
-    1 - abs(cor(X)[j,k]).
-  }
-  }
-\value{
-  \item{hclust}{hierarchical clustering for original data generated by
-    function \code{hclust}. See \code{\link[stats]{hclust}} for details.}
-  \item{edges}{data frame object which contains \eqn{p}-values and
-    supporting informations such as standard errors.}
-  \item{count}{data frame object which contains primitive information
-    about the result of multiscale bootstrap resampling.}
-  \item{msfit}{list whose elements are results of curve fitting for
-    multiscale bootstrap resampling, of class \code{msfit}. See
-    \code{\link{msfit}} for details.}
-  \item{nboot}{numeric vector of number of bootstrap replications.}
-  \item{r}{numeric vector of the relative sample size for bootstrap
-    replications.}
-  \item{store}{list contains bootstrap replications if \code{store=TRUE}
-  was given for function \code{pvclust} or \code{parPvclust}.}
-}
-\seealso{\code{\link{lines.pvclust}}, \code{\link{print.pvclust}},
-  \code{\link{msfit}}, \code{\link{plot.pvclust}},
-  \code{\link{text.pvclust}}, \code{\link{pvrect}} and
-  \code{\link{pvpick}}.}
-\references{
-  Shimodaira, H. (2004)
-  "Approximately unbiased tests of regions using multistep-multiscale
-  bootstrap resampling",
-  \emph{Annals of Statistics}, 32, 2616-2641.
-
-  Shimodaira, H. (2002)
-  "An approximately unbiased test of phylogenetic tree selection",
-  \emph{Systematic Biology}, 51, 492-508.
-
-  Suzuki, R. and Shimodaira, H. (2004)
-  "An application of multiscale bootstrap resampling to hierarchical
-  clustering of microarray data: How accurate are these clusters?",
-  \emph{The Fifteenth International Conference on Genome Informatics 2004},
-  P034.
-
-  \url{http://www.is.titech.ac.jp/~shimo/prog/pvclust/}
-}
-\examples{
-## using Boston data in package MASS
-library(MASS)
-data(Boston)
-
-## multiscale bootstrap resampling
-boston.pv <- pvclust(Boston, nboot=100)
-
-## CAUTION: nboot=100 may be too small for actual use.
-##          We suggest nboot=1000 or larger.
-##          plot/print functions will be useful for diagnostics.
-
-## plot dendrogram with p-values
-plot(boston.pv)
-
-ask.bak <- par()$ask
-par(ask=TRUE)
-
-## highlight clusters with high au p-values
-pvrect(boston.pv)
-
-## print the result of multiscale bootstrap resampling
-print(boston.pv, digits=3)
-
-## plot diagnostic for curve fitting
-msplot(boston.pv, edges=c(2,4,6,7))
-
-par(ask=ask.bak)
-
-## Print clusters with high p-values
-boston.pp <- pvpick(boston.pv)
-boston.pp
-
-\dontrun{
-## parallel computation via snow package
-library(snow)
-cl <- makeCluster(10, type="MPI")
-
-## parallel version of pvclust
-boston.pv <- parPvclust(cl, Boston, nboot=1000)
-}
-}
-\author{Ryota Suzuki \email{ryota.suzuki at is.titech.ac.jp}}
-\keyword{cluster}
\ No newline at end of file
+\name{pvclust}
+\alias{pvclust}
+\alias{parPvclust}
+\title{Calculating P-values for Hierchical Clustering}
+\description{
+  calculates \eqn{p}-values for hierarchical clustering via
+  multiscale bootstrap resampling. Hierarchical clustering is done for
+  given data and \eqn{p}-values are computed for each of the clusters.
+}
+\usage{
+pvclust(data, method.hclust="average",
+        method.dist="correlation", use.cor="pairwise.complete.obs",
+        nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE)
+
+parPvclust(cl=NULL, data, method.hclust="average",
+           method.dist="correlation", use.cor="pairwise.complete.obs",
+           nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE,
+           init.rand=TRUE, seed=NULL, iseed=NULL)
+
+}
+\arguments{
+  \item{data}{numeric data matrix or data frame.}
+  \item{method.hclust}{
+    the agglomerative method used in hierarchical clustering. This
+    should be (an abbreviation of) one of \code{"average"}, \code{"ward"},
+    \code{"single"}, \code{"complete"}, \code{"mcquitty"},
+    \code{"median"} or \code{"centroid"}. The default is
+    \code{"average"}. See \code{method} argument in
+    \code{\link[stats]{hclust}}.
+  }
+  \item{method.dist}{the distance measure to be used. This should be (an
+    abbreviation of) one of \code{"correlation"}, \code{"uncentered"},
+    \code{"abscor"} or those which are allowed for \code{method}
+    argument in \code{dist} function. The default is
+    \code{"correlation"}. See \emph{details} section in this help and
+    \code{method} argument in \code{\link[stats]{dist}}.
+    }
+  \item{use.cor}{character string which specifies the method for
+    computing correlation with data including missing values. This
+    should be (an abbreviation of) one of \code{"all.obs"},
+    \code{"complete.obs"} or \code{"pairwise.complete.obs"}. See
+    the \code{use} argument in \code{\link[stats]{cor}} function.
+  }
+  \item{nboot}{the number of bootstrap replications. The default is
+    \code{1000}.}
+  \item{r}{numeric vector which specifies the relative sample sizes of
+    bootstrap replications. For original sample size \eqn{n} and
+    bootstrap sample size \eqn{n'}, this is defined as \eqn{r=n'/n}.}
+  \item{store}{locical. If \code{store=TRUE}, all bootstrap replications
+  are stored in the output object. The default is \code{FALSE}.}
+  \item{cl}{a cluster object created by package \pkg{parallel} or \pkg{snow}.
+    If NULL, use the registered default cluster.}
+  \item{weight}{logical. If \code{weight=TRUE}, resampling is made by
+    weight vector instead of index vector. Useful for large \code{r}
+    value (\code{r>10}).  Currently, available only for distance
+    \code{"correlation"} and \code{"abscor"}.}
+%  \item{init.rand}{logical. If \code{init.rand=TRUE}, random number
+%    generators are initialized at child processes. Random seeds can be
+%    set by \code{seed} argument.}
+%  \item{seed}{integer vector of random seeds. It should have the same
+%    length as \code{cl}. If \code{NULL} is specified,
+%    \code{1:length(cl)} is used as seed vector. The default is \code{NULL}.}
+  \item{init.rand}{logical. If \code{init.rand=TRUE}, random number generators are initialized.
+    Use \code{iseed} argument to achieve reproducible results.}
+  \item{seed}{integer vector of random seeds. It should have the same
+    length as \code{cl}. \strong{This argument is duplicated and will be unavailable in the future. Consider using \code{iseed} instead.} }
+  \item{iseed}{an integer to be passed to \code{clusterSetRNGStream} when \code{init.rand=TRUE}.}
+
+}
+\details{
+  Function \code{pvclust} conducts multiscale bootstrap resampling to calculate
+  \eqn{p}-values for each cluster in the result of hierarchical
+  clustering. \code{parPvclust} is the parallel version of this
+  procedure which depends on package \pkg{parallel} for parallel computation.
+
+  For data expressed as \eqn{(n \times p)}{(n, p)} matrix or data frame, we
+  assume that the data is \eqn{n} observations of \eqn{p} objects, which
+  are to be clustered. The \eqn{i}'th row vector corresponds to the
+  \eqn{i}'th observation of these objects and the \eqn{j}'th column
+  vector corresponds to a sample of \eqn{j}'th object with size \eqn{n}.
+
+  There are several methods to measure the dissimilarities between
+  objects. For data matrix \eqn{X=\{x_{ij}\}}{X},
+  \code{"correlation"}
+  method takes
+  \deqn{
+    1 - \frac{
+      \sum_{i=1}^n (x_{ij} - \bar{x}_j) (x_{ik} - \bar{x}_k)
+    }
+    {
+      \sqrt{\sum_{i=1}^n (x_{ij} - \bar{x}_j)^2}
+      \sqrt{\sum_{i=1}^n (x_{ik} - \bar{x}_k)^2}
+    }
+    }{%
+      1 - cor(X)[j,k]
+    }
+  for dissimilarity between \eqn{j}'th and \eqn{k}'th object, where
+  \eqn{\bar{x}_j = \frac{1}{n} \sum_{i=1}^n x_{ij} \mbox{and}
+  \bar{x}_k = \frac{1}{n} \sum_{i=1}^n x_{ik}}{cor is function \code{cor}}.
+
+  \code{"uncentered"} takes uncentered sample correlation
+  \deqn{
+    1 - \frac{
+      \sum_{i=1}^n x_{ij} x_{ik}
+    }
+    {
+      \sqrt{\sum_{i=1}^n x_{ij}^2}
+      \sqrt{\sum_{i=1}^n x_{ik}^2}
+    }
+    }{%
+      1 - sum(x[,j] * x[,k]) / (sqrt(sum(x[,j]^2)) * sqrt(sum(x[,k]^2)))
+    }
+  and \code{"abscor"} takes the absolute value of sample correlation
+  \deqn{
+    1 - \ \Biggl| \frac{
+      \sum_{i=1}^n (x_{ij} - \bar{x}_j) (x_{ik} - \bar{x}_k)
+    }
+    {
+      \sqrt{\sum_{i=1}^n (x_{ij} - \bar{x}_j)^2}
+      \sqrt{\sum_{i=1}^n (x_{ik} - \bar{x}_k)^2}
+    } \Biggl|.
+  }{%
+    1 - abs(cor(X)[j,k]).
+  }
+  }
+\value{
+  \item{hclust}{hierarchical clustering for original data generated by
+    function \code{hclust}. See \code{\link[stats]{hclust}} for details.}
+  \item{edges}{data frame object which contains \eqn{p}-values and
+    supporting informations such as standard errors.}
+  \item{count}{data frame object which contains primitive information
+    about the result of multiscale bootstrap resampling.}
+  \item{msfit}{list whose elements are results of curve fitting for
+    multiscale bootstrap resampling, of class \code{msfit}. See
+    \code{\link{msfit}} for details.}
+  \item{nboot}{numeric vector of number of bootstrap replications.}
+  \item{r}{numeric vector of the relative sample size for bootstrap
+    replications.}
+  \item{store}{list contains bootstrap replications if \code{store=TRUE}
+  was given for function \code{pvclust} or \code{parPvclust}.}
+}
+\seealso{\code{\link{lines.pvclust}}, \code{\link{print.pvclust}},
+  \code{\link{msfit}}, \code{\link{plot.pvclust}},
+  \code{\link{text.pvclust}}, \code{\link{pvrect}} and
+  \code{\link{pvpick}}.}
+\references{
+  Shimodaira, H. (2004)
+  "Approximately unbiased tests of regions using multistep-multiscale
+  bootstrap resampling",
+  \emph{Annals of Statistics}, 32, 2616-2641.
+
+  Shimodaira, H. (2002)
+  "An approximately unbiased test of phylogenetic tree selection",
+  \emph{Systematic Biology}, 51, 492-508.
+
+  Suzuki, R. and Shimodaira, H. (2004)
+  "An application of multiscale bootstrap resampling to hierarchical
+  clustering of microarray data: How accurate are these clusters?",
+  \emph{The Fifteenth International Conference on Genome Informatics 2004},
+  P034.
+
+  \url{http://www.is.titech.ac.jp/~shimo/prog/pvclust/}
+}
+\examples{
+## using Boston data in package MASS
+library(MASS)
+data(Boston)
+
+## multiscale bootstrap resampling
+boston.pv <- pvclust(Boston, nboot=100)
+
+## CAUTION: nboot=100 may be too small for actual use.
+##          We suggest nboot=1000 or larger.
+##          plot/print functions will be useful for diagnostics.
+
+## plot dendrogram with p-values
+plot(boston.pv)
+
+ask.bak <- par()$ask
+par(ask=TRUE)
+
+## highlight clusters with high au p-values
+pvrect(boston.pv)
+
+## print the result of multiscale bootstrap resampling
+print(boston.pv, digits=3)
+
+## plot diagnostic for curve fitting
+msplot(boston.pv, edges=c(2,4,6,7))
+
+par(ask=ask.bak)
+
+## Print clusters with high p-values
+boston.pp <- pvpick(boston.pv)
+boston.pp
+
+\dontrun{
+## parallel computation
+library(parallel)
+cl <- makeCluster(2, type = "PSOCK")
+
+## parallel version of pvclust
+boston.pv <- parPvclust(cl, Boston, nboot=1000)
+stopCluster(cl)
+}
+}
+\author{Ryota Suzuki \email{suzuki at ef-prime.com}}
+\keyword{cluster}
diff --git a/man/pvpick.Rd b/man/pvpick.Rd
index 7c062e3..cad64a2 100755
--- a/man/pvpick.Rd
+++ b/man/pvpick.Rd
@@ -45,5 +45,5 @@ pvrect(x, alpha=0.95, pv="au", type="geq", max.only=TRUE, border=2, ...)
     (number) corresponds to the \eqn{i}'th name vector in
     \code{clusters}.}
   }
-\author{Ryota Suzuki \email{ryota.suzuki at is.titech.ac.jp}}
+\author{Ryota Suzuki \email{suzuki at ef-prime.com}}
 \keyword{aplot}
\ No newline at end of file
diff --git a/man/seplot.Rd b/man/seplot.Rd
index a12b7fa..fe3bc4f 100755
--- a/man/seplot.Rd
+++ b/man/seplot.Rd
@@ -19,5 +19,5 @@ xlab=NULL, ylab=NULL, ...)
   \item{...}{other graphical parameters to be passed to generic
     \code{plot} or \code{identify} function.}
 }
-\author{Ryota Suzuki \email{ryota.suzuki at is.titech.ac.jp}}
+\author{Ryota Suzuki \email{suzuki at ef-prime.com}}
 \keyword{hplot}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pvclust.git



More information about the debian-med-commit mailing list