[med-svn] [pvclust] 01/05: Imported Upstream version 2.0-0
Andreas Tille
tille at debian.org
Wed Nov 4 15:57:02 UTC 2015
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository pvclust.
commit 7c70de8c2ac6d761308d246a9ad0ae68d00ed543
Author: Andreas Tille <tille at debian.org>
Date: Wed Nov 4 16:47:19 2015 +0100
Imported Upstream version 2.0-0
---
DESCRIPTION | 8 +-
MD5 | 10 +-
NAMESPACE | 6 +
R/pvclust-internal.R | 347 ++++++++++++++++++++++++++-------
R/pvclust.R | 536 +++++++++++++++++++++++----------------------------
man/pvclust.Rd | 79 +++++---
6 files changed, 590 insertions(+), 396 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index a6b4595..d387b17 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: pvclust
-Version: 1.3-2
-Date: 2014-12-19
+Version: 2.0-0
+Date: 2015-10-23
Title: Hierarchical Clustering with P-Values via Multiscale Bootstrap
Resampling
Author: Ryota Suzuki <suzuki at ef-prime.com>, Hidetoshi Shimodaira
@@ -14,7 +14,7 @@ Description: An implementation of multiscale bootstrap resampling for
BP (bootstrap probability) value for each cluster in a dendrogram.
License: GPL (>= 2)
URL: http://www.sigmath.es.osaka-u.ac.jp/shimo-lab/prog/pvclust/
-Packaged: 2014-12-19 05:24:46 UTC; suzuki
NeedsCompilation: no
+Packaged: 2015-10-23 11:28:03 UTC; suzuki
Repository: CRAN
-Date/Publication: 2014-12-22 06:28:55
+Date/Publication: 2015-10-23 16:23:16
diff --git a/MD5 b/MD5
index d3a054d..993a62a 100644
--- a/MD5
+++ b/MD5
@@ -1,13 +1,13 @@
-c8d270dc7740a01321089f869aa222d8 *DESCRIPTION
-97c5dfc9e62e9ba7cd789609bc5d7f99 *NAMESPACE
-78aaa368d0e21dfa99912aeb0e1031fc *R/pvclust-internal.R
-667584464eb31a68d58f274d440f01ad *R/pvclust.R
+a8cfdf20f013a1782f62540de8c31cc7 *DESCRIPTION
+5a968cf1c9f480e1f6ddf26248f54b56 *NAMESPACE
+7a00ed1667f341eaa681f8d9d3d058d9 *R/pvclust-internal.R
+75d16c5ca32ecd0a133b5764479989e1 *R/pvclust.R
4ce445fdf8be068ed0f770d3c7bafd17 *data/lung.RData
4dd897fecd4b0175cf3318af419bebab *man/lung.Rd
25915dafebbf8ed6fb9234776ae7349c *man/msfit.Rd
f2b43095e239bcd8edd83cb0d6b6352c *man/msplot.Rd
c2c1b6dbd2843eeccbee5ec24da11b49 *man/plot.pvclust.Rd
1a64c1ea3c377fad1a8d81c67c34f0a7 *man/print.pvclust.Rd
-10d182003a1d3ab8ab43b6a0f3759974 *man/pvclust.Rd
+4c4235b3d254542f783980bcde9b03d2 *man/pvclust.Rd
cfa83769e276f25bdb5c83b430553af5 *man/pvpick.Rd
3ab1fb9eebdc5f8b3915d71258bee39c *man/seplot.Rd
diff --git a/NAMESPACE b/NAMESPACE
index b276cec..56cb754 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -16,3 +16,9 @@ S3method(plot, msfit)
S3method(lines, msfit)
S3method(summary, msfit)
+importFrom("grDevices", "n2mfrow")
+importFrom("graphics", "curve", "lines", "par", "plot", "rect",
+ "segments", "text")
+importFrom("stats", "as.dist", "cor", "dist", "dnorm", "hclust",
+ "lsfit", "na.omit", "pchisq", "pnorm", "qnorm", "rmultinom")
+importFrom("utils", "capture.output", "head", "packageVersion")
diff --git a/R/pvclust-internal.R b/R/pvclust-internal.R
index 22d6417..e636aca 100644
--- a/R/pvclust-internal.R
+++ b/R/pvclust-internal.R
@@ -1,3 +1,176 @@
+### internal function for non-parallel pvclust
+pvclust.nonparallel <- function(data, method.hclust, method.dist, use.cor, nboot, r,
+ store, weight, iseed, quiet)
+{
+ # initialize random seed
+ if(!is.null(iseed))
+ set.seed(seed = iseed)
+
+ # data: (n,p) matrix, n-samples, p-variables
+ n <- nrow(data); p <- ncol(data)
+
+ # hclust for original data
+ # METHODS <- c("ward", "single", "complete", "average", "mcquitty",
+ # "median", "centroid")
+ # method.hclust <- METHODS[pmatch(method.hclust, METHODS)]
+
+ # Use custom distance function
+ if(is.function(method.dist)) {
+ distance <- method.dist(data)
+ } else {
+ distance <- dist.pvclust(data, method=method.dist, use.cor=use.cor)
+ }
+
+ data.hclust <- hclust(distance, method=method.hclust)
+
+ # ward -> ward.D
+ # only if R >= 3.1.0
+ if(method.hclust == "ward" && getRversion() >= '3.1.0') {
+ method.hclust <- "ward.D"
+ }
+
+ # multiscale bootstrap
+ size <- floor(n*r)
+ rl <- length(size)
+
+ if(rl == 1) {
+ if(r != 1.0)
+ warning("Relative sample size r is set to 1.0. AU p-values are not calculated\n")
+
+ r <- list(1.0)
+ }
+ else
+ r <- as.list(size/n)
+
+ mboot <- lapply(r, boot.hclust, data=data, object.hclust=data.hclust, nboot=nboot,
+ method.dist=method.dist, use.cor=use.cor,
+ method.hclust=method.hclust, store=store, weight=weight, quiet=quiet)
+
+ result <- pvclust.merge(data=data, object.hclust=data.hclust, mboot=mboot)
+
+ return(result)
+}
+
+
+### internal function for parallel pvclust
+pvclust.parallel <- function(cl, data, method.hclust, method.dist, use.cor,
+ nboot, r, store, weight, init.rand=NULL, iseed, quiet,
+ parallel.check)
+{
+ if(parallel.check) {
+ check.result <- check.parallel(cl=cl, nboot=nboot)
+ if(!check.result) {
+ msg <- paste(attr(check.result, "msg"), ". non-parallel version is executed", sep = "")
+ warning(msg)
+ return(pvclust.nonparallel(data=data, method.hclust=method.hclust, method.dist=method.dist,
+ use.cor=use.cor, nboot=nboot, r=r, store=store, weight=weight, iseed=iseed, quiet=quiet))
+ }
+ }
+
+ # check package versions
+ pkg.ver <-parallel::clusterCall(cl, packageVersion, pkg = "pvclust")
+ r.ver <- parallel::clusterCall(cl, getRversion)
+ if(length(unique(pkg.ver)) > 1 || length(unique(r.ver)) > 1) {
+
+ node.name <- parallel::clusterEvalQ(cl, Sys.info()["nodename"])
+ version.table <- data.frame(
+ node=seq_len(length(node.name)),
+ name=unlist(node.name),
+ R=unlist(lapply(r.ver, as.character)),
+ pvclust=unlist(lapply(pkg.ver, as.character)))
+
+ if(nrow(version.table) > 10)
+ table.out <- c(capture.output(print(head(version.table, n=10), row.names=FALSE)), " ...")
+ else
+ table.out <- capture.output(print(version.table, row.names=FALSE))
+
+ warning("R/pvclust versions are not unique:\n",
+ paste(table.out, collapse="\n"))
+ }
+
+ if(!is.null(init.rand))
+ warning("\"init.rand\" option is deprecated. It is available for back compatibility but will be unavailable in the future.\nSpecify a non-NULL value of \"iseed\" to initialize random seed.")
+
+ # if(init.rand) {
+ # if(is.null(iseed) && !is.null(seed)) {
+ # warning("\"seed\" option is deprecated. It is available for back compatibility but will be unavailable in the future.\nConsider using \"iseed\" instead.")
+ #
+ # if(length(seed) != length(cl))
+ # stop("seed and cl should have the same length.")
+ #
+ # # setting random seeds
+ # parallel::parLapply(cl, as.list(seed), set.seed)
+ # } else {
+ # parallel::clusterSetRNGStream(cl = cl, iseed = iseed)
+ # }
+ # }
+
+ if(!is.null(iseed) && (is.null(init.rand) || init.rand))
+ parallel::clusterSetRNGStream(cl = cl, iseed = iseed)
+
+ # data: (n,p) matrix, n-samples, p-variables
+ n <- nrow(data); p <- ncol(data)
+
+ # hclust for original data
+ if(is.function(method.dist)) {
+ # Use custom distance function
+ distance <- method.dist(data)
+ } else {
+ distance <- dist.pvclust(data, method=method.dist, use.cor=use.cor)
+ }
+
+ data.hclust <- hclust(distance, method=method.hclust)
+
+ # ward -> ward.D
+ # only if R >= 3.1.0
+ if(method.hclust == "ward" && getRversion() >= '3.1.0') {
+ method.hclust <- "ward.D"
+ }
+
+ # multiscale bootstrap
+ size <- floor(n*r)
+ rl <- length(size)
+
+ if(rl == 1) {
+ if(r != 1.0)
+ warning("Relative sample size r is set to 1.0. AU p-values are not calculated\n")
+
+ r <- list(1.0)
+ }
+ else
+ r <- as.list(size/n)
+
+ ncl <- length(cl)
+ nbl <- as.list(rep(nboot %/% ncl,times=ncl))
+
+ if((rem <- nboot %% ncl) > 0)
+ nbl[1:rem] <- lapply(nbl[1:rem], "+", 1)
+
+ if(!quiet)
+ cat("Multiscale bootstrap... ")
+
+ mlist <- parallel::parLapply(cl, nbl, pvclust.node,
+ r=r, data=data, object.hclust=data.hclust, method.dist=method.dist,
+ use.cor=use.cor, method.hclust=method.hclust,
+ store=store, weight=weight, quiet=quiet)
+ if(!quiet)
+ cat("Done.\n")
+
+ mboot <- mlist[[1]]
+
+ for(i in 2:ncl) {
+ for(j in 1:rl) {
+ mboot[[j]]$edges.cnt <- mboot[[j]]$edges.cnt + mlist[[i]][[j]]$edges.cnt
+ mboot[[j]]$nboot <- mboot[[j]]$nboot + mlist[[i]][[j]]$nboot
+ mboot[[j]]$store <- c(mboot[[j]]$store, mlist[[i]][[j]]$store)
+ }
+ }
+
+ result <- pvclust.merge( data=data, object.hclust=data.hclust, mboot=mboot)
+
+ return(result)
+}
+
hc2axes <- function(x)
{
A <- x$merge # (n,n-1) matrix
@@ -7,87 +180,88 @@ hc2axes <- function(x)
x.tmp <- rep(0,2)
zz <- match(1:length(x$order),x$order)
-
- for(i in 1:(n-1)) {
- ai <- A[i,1]
-
- if(ai < 0)
- x.tmp[1] <- zz[-ai]
- else
- x.tmp[1] <- x.axis[ai]
-
- ai <- A[i,2]
-
- if(ai < 0)
- x.tmp[2] <- zz[-ai]
- else
- x.tmp[2] <- x.axis[ai]
-
- x.axis[i] <- mean(x.tmp)
- }
+
+ for(i in 1:(n-1)) {
+ ai <- A[i,1]
+
+ if(ai < 0)
+ x.tmp[1] <- zz[-ai]
+ else
+ x.tmp[1] <- x.axis[ai]
+
+ ai <- A[i,2]
+
+ if(ai < 0)
+ x.tmp[2] <- zz[-ai]
+ else
+ x.tmp[2] <- x.axis[ai]
+
+ x.axis[i] <- mean(x.tmp)
+ }
return(data.frame(x.axis=x.axis,y.axis=y.axis))
}
hc2split <- function(x)
- {
- A <- x$merge # (n-1,n) matrix
- n <- nrow(A) + 1
- B <- list()
-
- for(i in 1:(n-1)){
- ai <- A[i,1]
-
- if(ai < 0)
- B[[i]] <- -ai
- else
- B[[i]] <- B[[ai]]
-
- ai <- A[i,2]
-
- if(ai < 0)
- B[[i]] <- sort(c(B[[i]],-ai))
- else
- B[[i]] <- sort(c(B[[i]],B[[ai]]))
- }
-
- CC <- matrix(rep(0,n*(n-1)),nrow=(n-1),ncol=n)
+{
+ A <- x$merge # (n-1,n) matrix
+ n <- nrow(A) + 1
+ B <- list()
+
+ for(i in 1:(n-1)){
+ ai <- A[i,1]
- for(i in 1:(n-1)){
- bi <- B[[i]]
- m <- length(bi)
- for(j in 1:m)
- CC[i,bi[j]] <- 1
- }
-
- split <- list(pattern=apply(CC,1,paste,collapse=""), member=B)
+ if(ai < 0)
+ B[[i]] <- -ai
+ else
+ B[[i]] <- B[[ai]]
+
+ ai <- A[i,2]
- return(split)
+ if(ai < 0)
+ B[[i]] <- sort(c(B[[i]],-ai))
+ else
+ B[[i]] <- sort(c(B[[i]],B[[ai]]))
}
-
-pvclust.node <- function(x, r,...)
- {
-# require(pvclust)
- mboot.node <- lapply(r, boot.hclust, nboot=x, ...)
- return(mboot.node)
+
+ CC <- matrix(rep(0,n*(n-1)),nrow=(n-1),ncol=n)
+
+ for(i in 1:(n-1)){
+ bi <- B[[i]]
+ m <- length(bi)
+ for(j in 1:m)
+ CC[i,bi[j]] <- 1
}
+
+ split <- list(pattern=apply(CC,1,paste,collapse=""), member=B)
+
+ return(split)
+}
+
+pvclust.node <- function(x, r, ...)
+{
+ # require(pvclust)
+ mboot.node <- lapply(r, boot.hclust, nboot=x, ...)
+ return(mboot.node)
+}
boot.hclust <- function(r, data, object.hclust, method.dist, use.cor,
- method.hclust, nboot, store, weight=F)
+ method.hclust, nboot, store, weight=FALSE, quiet=FALSE)
{
n <- nrow(data)
size <- round(n*r, digits=0)
if(size == 0)
stop("invalid scale parameter(r)")
r <- size/n
-
+
pattern <- hc2split(object.hclust)$pattern
edges.cnt <- table(factor(pattern)) - table(factor(pattern))
st <- list()
# bootstrap start
rp <- as.character(round(r,digits=2)); if(r == 1) rp <- paste(rp,".0",sep="")
- cat(paste("Bootstrap (r = ", rp, ")... ", sep=""))
+ if(!quiet)
+ cat(paste("Bootstrap (r = ", rp, ")... ", sep=""))
w0 <- rep(1,n) # equal weight
na.flag <- 0
@@ -97,7 +271,11 @@ boot.hclust <- function(r, data, object.hclust, method.dist, use.cor,
suppressWarnings(distance <- distw.pvclust(data,w1,method=method.dist,use.cor=use.cor))
} else {
smpl <- sample(1:n, size, replace=TRUE)
- suppressWarnings(distance <- dist.pvclust(data[smpl,],method=method.dist,use.cor=use.cor))
+ if(is.function(method.dist)) {
+ suppressWarnings(distance <- method.dist(data[smpl,]))
+ } else {
+ suppressWarnings(distance <- dist.pvclust(data[smpl,],method=method.dist,use.cor=use.cor))
+ }
}
if(all(is.finite(distance))) { # check if distance is valid
x.hclust <- hclust(distance,method=method.hclust)
@@ -105,18 +283,19 @@ boot.hclust <- function(r, data, object.hclust, method.dist, use.cor,
edges.cnt <- edges.cnt + table(factor(pattern.i, levels=pattern))
} else {
x.hclust <- NULL
- na.flag <- 1
+ na.flag <- 1
}
-
+
if(store)
st[[i]] <- x.hclust
}
- cat("Done.\n")
+ if(!quiet)
+ cat("Done.\n")
# bootstrap done
if(na.flag == 1)
- warning(paste("inappropriate distance matrices are omitted in computation: r = ", r), call.=FALSE)
-
+ warning(paste("inappropriate distance matrices are omitted in computation: r = ", r), call.=FALSE)
+
boot <- list(edges.cnt=edges.cnt, method.dist=method.dist, use.cor=use.cor,
method.hclust=method.hclust, nboot=nboot, size=size, r=r, store=st)
class(boot) <- "boot.hclust"
@@ -138,7 +317,7 @@ pvclust.merge <- function(data, object.hclust, mboot){
edges.bp <- edges.cnt <- data.frame(matrix(rep(0,ne*rl),nrow=ne,ncol=rl))
row.names(edges.bp) <- pattern
names(edges.cnt) <- paste("r", 1:rl, sep="")
-
+
for(j in 1:rl) {
edges.cnt[,j] <- as.vector(mboot[[j]]$edges.cnt)
edges.bp[,j] <- edges.cnt[,j] / nboot[j]
@@ -164,12 +343,12 @@ pvclust.merge <- function(data, object.hclust, mboot){
edges.pv <- data.frame(au=au, bp=bp, se.au=se.au, se.bp=se.bp,
v=v, c=cc, pchi=pchi)
-
+
row.names(edges.pv) <- row.names(edges.cnt) <- 1:ne
-
+
result <- list(hclust=object.hclust, edges=edges.pv, count=edges.cnt,
msfit=ms.fitted, nboot=nboot, r=r, store=store)
-
+
class(result) <- "pvclust"
return(result)
}
@@ -206,18 +385,18 @@ dist.pvclust <- function(x, method="euclidean", use.cor="pairwise.complete.obs")
corw <- function(x,w,
use=c("all.obs","complete.obs","pairwise.complete.obs")
- ) {
+) {
if(is.data.frame(x)) x <- as.matrix(x)
x <- x[w>0,,drop=F]
w <- w[w>0]
-
+
n <- nrow(x) # sample size
m <- ncol(x) # number of variables
if(missing(w)) w <- rep(1,n)
r <- matrix(0,m,m,dimnames=list(colnames(x),colnames(x)))
diag(r) <- 1
use <- match.arg(use)
-
+
pairu <- F
if(use=="all.obs") {
u <- rep(T,n)
@@ -268,3 +447,29 @@ distw.pvclust <- function(x,w,method="correlation", use.cor="pairwise.complete.o
}
stop("wrong method")
}
+
+### check whether parallel computation is appropriate
+check.parallel <- function(cl, nboot) {
+ res <- FALSE
+
+### will be used when defaultCluster(cl) becomes publicly available
+# # check whether cl is a cluster, or a default cluster is available
+# if(!inherits(cl, "cluster")) {
+# try_result <- try(cl <- parallel:::defaultCluster(cl), silent=TRUE)
+# if(class(try_result) == "try-error") {
+# attr(res, "msg" <- "cl is not a cluster")
+# return(res)
+# }
+# }
+
+ ncl <- length(cl)
+ if(ncl < 2) {
+ attr(res, "msg") <- "Cluster size is too small (or NULL)"
+ } else if (ncl > nboot) {
+ attr(res, "msg") <- "nboot is too small for cluster size"
+ } else {
+ res <- TRUE
+ }
+
+ return(res)
+}
\ No newline at end of file
diff --git a/R/pvclust.R b/R/pvclust.R
index 74bbc61..37e791e 100644
--- a/R/pvclust.R
+++ b/R/pvclust.R
@@ -1,41 +1,82 @@
-pvclust <- function(data, method.hclust="average",
- method.dist="correlation", use.cor="pairwise.complete.obs",
- nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE)
- {
- # data: (n,p) matrix, n-samples, p-variables
- n <- nrow(data); p <- ncol(data)
-
- # hclust for original data
-# METHODS <- c("ward", "single", "complete", "average", "mcquitty",
-# "median", "centroid")
-# method.hclust <- METHODS[pmatch(method.hclust, METHODS)]
- distance <- dist.pvclust(data, method=method.dist, use.cor=use.cor)
- data.hclust <- hclust(distance, method=method.hclust)
-
- # ward -> ward.D
- if(method.hclust == "ward") method.hclust <- "ward.D"
-
- # multiscale bootstrap
- size <- floor(n*r)
- rl <- length(size)
+pvclust <- function(data, method.hclust="average", method.dist="correlation",
+ use.cor="pairwise.complete.obs", nboot=1000, parallel=FALSE,
+ r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE, iseed=NULL, quiet=FALSE)
+{
+ p <- parallel
+
+ if(is.null(p) || (!is.logical(p) && (!is.integer(p) || p <= 0) && !inherits(p, "cluster")))
+ stop("parallel should be a logical, an integer or a cluster object.")
+
+ if(is.logical(p)) {
+ par.flag <- p
+ par.size <- NULL
+ cl <- NULL
+ } else if(is.integer(p)) {
+ par.flag <- TRUE
+ par.size <- p
+ cl <- NULL
+ } else if(inherits(p, "cluster")) {
+ par.flag <- TRUE
+ cl <- p
+ }
+
+ if(par.flag && !requireNamespace("parallel", quietly=TRUE)) {
+ warning("Package parallel is required for parallel computation. Use non-parallel mode instead.")
+ par.flag <- FALSE
+ }
+
+ if(par.flag) {
- if(rl == 1) {
- if(r != 1.0)
- warning("Relative sample size r is set to 1.0. AU p-values are not calculated\n")
+ if(is.null(cl)) {
+ if(is.null(par.size))
+ par.size <- parallel::detectCores() - 1
+
+ if(!quiet)
+ cat("Creating a temporary cluster...")
+ try_result <- try(cl <- parallel::makePSOCKcluster(par.size))
+
+ if(inherits(try_result, "try-error")) {
+ if(!quiet)
+ cat("failed to create a cluster. Use non-parallel mode instead.")
+ par.flag <- FALSE
+ } else {
+ if(!quiet) {
+ cat("done:\n")
+ print(cl)
+ }
+ on.exit(parallel::stopCluster(cl))
+ }
+
- r <- list(1.0)
}
- else
- r <- as.list(size/n)
-
- mboot <- lapply(r, boot.hclust, data=data, object.hclust=data.hclust, nboot=nboot,
- method.dist=method.dist, use.cor=use.cor,
- method.hclust=method.hclust, store=store, weight=weight)
- result <- pvclust.merge(data=data, object.hclust=data.hclust, mboot=mboot)
+ pvclust.parallel(cl=cl, data=data, method.hclust=method.hclust,
+ method.dist=method.dist, use.cor=use.cor,
+ nboot=nboot, r=r, store=store, weight=weight,
+ iseed=iseed, quiet=quiet, parallel.check=TRUE)
- return(result)
+ } else {
+ pvclust.nonparallel(data=data, method.hclust=method.hclust,
+ method.dist=method.dist, use.cor=use.cor,
+ nboot=nboot, r=r, store=store, weight=weight, iseed=iseed, quiet=quiet)
}
+}
+
+parPvclust <- function(cl=NULL, data, method.hclust="average",
+ method.dist="correlation", use.cor="pairwise.complete.obs",
+ nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE,
+ weight=FALSE, init.rand=NULL, iseed=NULL, quiet=FALSE) {
+ warning("\"parPvclust\" has been integrated into pvclust (with \"parallel\" option).\nIt is available for back compatibility but will be unavailable in the future.")
+
+ if(!requireNamespace("parallel", quietly=TRUE))
+ stop("Package parallel is required for parPvclust.")
+
+ pvclust.parallel(cl=cl, data=data, method.hclust=method.hclust,
+ method.dist=method.dist, use.cor=use.cor,
+ nboot=nboot, r=r, store=store, weight=weight,
+ init.rand=init.rand, iseed=iseed, quiet=quiet,
+ parallel.check=TRUE)
+}
plot.pvclust <- function(x, print.pv=TRUE, print.num=TRUE, float=0.01,
col.pv=c(2,3,8), cex.pv=0.8, font.pv=NULL,
@@ -50,10 +91,10 @@ plot.pvclust <- function(x, print.pv=TRUE, print.num=TRUE, float=0.01,
if(is.null(xlab))
xlab=paste("Distance: ", x$hclust$dist.method)
-
+
plot(x$hclust, main=main, sub=sub, xlab=xlab, col=col, cex=cex,
font=font, lty=lty, lwd=lwd, ...)
-
+
if(print.pv)
text(x, col=col.pv, cex=cex.pv, font=font.pv, float=float, print.num=print.num)
}
@@ -94,279 +135,192 @@ summary.pvclust <- function(object, ...){
}
pvrect <- function(x, alpha=0.95, pv="au", type="geq", max.only=TRUE, border=2, ...)
+{
+ len <- nrow(x$edges)
+ member <- hc2split(x$hclust)$member
+ order <- x$hclust$order
+ usr <- par("usr")
+ xwd <- usr[2] - usr[1]
+ ywd <- usr[4] - usr[3]
+ cin <- par()$cin
+
+ ht <- c()
+ j <- 1
+
+ if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
+ stop("Invalid type argument: see help(pvrect)")
+
+ for(i in (len - 1):1)
{
- len <- nrow(x$edges)
- member <- hc2split(x$hclust)$member
- order <- x$hclust$order
- usr <- par("usr")
- xwd <- usr[2] - usr[1]
- ywd <- usr[4] - usr[3]
- cin <- par()$cin
-
- ht <- c()
- j <- 1
-
- if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
- stop("Invalid type argument: see help(pvrect)")
+ if (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or EQuals
+ else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or EQuals
+ else if(pm==3) wh <- (x$edges[i,pv] > alpha) # Greater Than
+ else if(pm==4) wh <- (x$edges[i,pv] > alpha) # Lower Than
- for(i in (len - 1):1)
+ if(wh)
+ {
+ mi <- member[[i]]
+ ma <- match(mi, order)
+
+ if(max.only == FALSE || (max.only && sum(match(ma, ht, nomatch=0)) == 0))
{
- if (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or EQuals
- else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or EQuals
- else if(pm==3) wh <- (x$edges[i,pv] > alpha) # Greater Than
- else if(pm==4) wh <- (x$edges[i,pv] > alpha) # Lower Than
-
- if(wh)
- {
- mi <- member[[i]]
- ma <- match(mi, order)
-
- if(max.only == FALSE || (max.only && sum(match(ma, ht, nomatch=0)) == 0))
- {
- xl <- min(ma)
- xr <- max(ma)
- yt <- x$hclust$height[i]
- yb <- usr[3]
-
- mx <- xwd / length(member) / 3
- my <- ywd / 200
-
- rect(xl - mx, yb + my, xr + mx, yt + my, border=border, shade=NULL, ...)
-
- j <- j + 1
- }
- ht <- c(ht, ma)
- }
+ xl <- min(ma)
+ xr <- max(ma)
+ yt <- x$hclust$height[i]
+ yb <- usr[3]
+
+ mx <- xwd / length(member) / 3
+ my <- ywd / 200
+
+ rect(xl - mx, yb + my, xr + mx, yt + my, border=border, shade=NULL, ...)
+
+ j <- j + 1
}
+ ht <- c(ht, ma)
+ }
}
+}
msplot <- function(x, edges=NULL, ...)
- {
- if(is.null(edges)) edges <- 1:length(x$msfit)
- d <- length(edges)
-
- mfrow.bak <- par()$mfrow
- on.exit(par(mfrow=mfrow.bak))
-
- par(mfrow=n2mfrow(d))
-
- for(i in edges) {
- if(i == 1 || (i %% 10 == 1 && i > 20))
- main <- paste(i, "st edge", sep="")
- else if(i == 2 || (i %% 10 == 2 && i > 20))
- main <- paste(i, "nd edge", sep="")
- else if(i == 3 || (i %% 10 == 3 && i > 20))
- main <- paste(i, "rd edge", sep="")
- else
- main <- paste(i, "th edge", sep="")
-
- plot(x$msfit[[i]], main=main, ...)
- }
+{
+ if(is.null(edges)) edges <- 1:length(x$msfit)
+ d <- length(edges)
+
+ mfrow.bak <- par()$mfrow
+ on.exit(par(mfrow=mfrow.bak))
+
+ par(mfrow=n2mfrow(d))
+
+ for(i in edges) {
+ if(i == 1 || (i %% 10 == 1 && i > 20))
+ main <- paste(i, "st edge", sep="")
+ else if(i == 2 || (i %% 10 == 2 && i > 20))
+ main <- paste(i, "nd edge", sep="")
+ else if(i == 3 || (i %% 10 == 3 && i > 20))
+ main <- paste(i, "rd edge", sep="")
+ else
+ main <- paste(i, "th edge", sep="")
+
+ plot(x$msfit[[i]], main=main, ...)
}
+}
lines.pvclust <- function(x, alpha=0.95, pv="au", type="geq", col=2, lwd=2, ...)
+{
+ len <- nrow(x$edges)
+ member <- hc2split(x$hclust)$member
+ order <- x$hclust$order
+ usr <- par("usr")
+ xwd <- usr[2] - usr[1]
+ ywd <- usr[4] - usr[3]
+ cin <- par()$cin
+
+ ht <- c()
+ j <- 1
+
+ if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
+ stop("Invalid type argument: see help(lines.pvclust)")
+
+ for(i in (len - 1):1)
{
- len <- nrow(x$edges)
- member <- hc2split(x$hclust)$member
- order <- x$hclust$order
- usr <- par("usr")
- xwd <- usr[2] - usr[1]
- ywd <- usr[4] - usr[3]
- cin <- par()$cin
-
- ht <- c()
- j <- 1
-
- if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
- stop("Invalid type argument: see help(lines.pvclust)")
+ if (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or EQuals
+ else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or EQuals
+ else if(pm==3) wh <- (x$edges[i,pv] > alpha) # Greater Than
+ else if(pm==4) wh <- (x$edges[i,pv] > alpha) # Lower Than
- for(i in (len - 1):1)
+ if(wh)
+ {
+ mi <- member[[i]]
+ ma <- match(mi, order)
+
+ if(sum(match(ma, ht, nomatch=0)) == 0)
{
- if (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or EQuals
- else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or EQuals
- else if(pm==3) wh <- (x$edges[i,pv] > alpha) # Greater Than
- else if(pm==4) wh <- (x$edges[i,pv] > alpha) # Lower Than
-
- if(wh)
- {
- mi <- member[[i]]
- ma <- match(mi, order)
-
- if(sum(match(ma, ht, nomatch=0)) == 0)
- {
- xl <- min(ma)
- xr <- max(ma)
- yt <- x$hclust$height[i]
- yb <- usr[3]
-
- mx <- xwd/length(member)/10
-
- segments(xl-mx, yb, xr+mx, yb, xpd=TRUE, col=col, lwd=lwd, ...)
-
- j <- j + 1
- }
- ht <- c(ht, ma)
- }
+ xl <- min(ma)
+ xr <- max(ma)
+ yt <- x$hclust$height[i]
+ yb <- usr[3]
+
+ mx <- xwd/length(member)/10
+
+ segments(xl-mx, yb, xr+mx, yb, xpd=TRUE, col=col, lwd=lwd, ...)
+
+ j <- j + 1
}
+ ht <- c(ht, ma)
+ }
}
+}
pvpick <- function(x, alpha=0.95, pv="au", type="geq", max.only=TRUE)
+{
+ len <- nrow(x$edges)
+ member <- hc2split(x$hclust)$member
+ order <- x$hclust$order
+
+ ht <- c()
+ a <- list(clusters=list(), edges=c()); j <- 1
+
+ if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
+ stop("Invalid type argument: see help(pickup)")
+
+ for(i in (len - 1):1)
{
- len <- nrow(x$edges)
- member <- hc2split(x$hclust)$member
- order <- x$hclust$order
-
- ht <- c()
- a <- list(clusters=list(), edges=c()); j <- 1
-
- if(is.na(pm <- pmatch(type, c("geq", "leq", "gt", "lt"))))
- stop("Invalid type argument: see help(pickup)")
+ if (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or Equals
+ else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or Equals
+ else if(pm==3) wh <- (x$edges[i,pv] > alpha) # Greater Than
+ else if(pm==4) wh <- (x$edges[i,pv] < alpha) # Lower Than
- for(i in (len - 1):1)
- {
- if (pm==1) wh <- (x$edges[i,pv] >= alpha) # Greater than or Equals
- else if(pm==2) wh <- (x$edges[i,pv] <= alpha) # Lower than or Equals
- else if(pm==3) wh <- (x$edges[i,pv] > alpha) # Greater Than
- else if(pm==4) wh <- (x$edges[i,pv] < alpha) # Lower Than
-
- if(wh)
- {
- mi <- member[[i]]
- ma <- match(mi, order)
-
- if(max.only == FALSE || (max.only && sum(match(ma, ht, nomatch=0)) == 0))
- {
- a$clusters[[j]] <- x$hclust$labels[mi]
- a$edges <- c(a$edges,i)
-
- j <- j + 1
- }
- ht <- c(ht, ma)
- }
- }
-
- a$edges <- a$edges[length(a$edges):1]
- a$clusters <- a$clusters[length(a$edges):1]
-
- return(a)
- }
-
-parPvclust <- function(cl=NULL, data, method.hclust="average",
- method.dist="correlation", use.cor="pairwise.complete.obs",
- nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE,
- weight=FALSE,
- init.rand=TRUE, seed=NULL, iseed=NULL)
- {
- # if(!(require(snow))) stop("Package snow is required for parPvclust.")
- if(!(require(parallel))) stop("Package parallel is required for parPvclust.")
-
- if((ncl <- length(cl)) < 2 || ncl > nboot) {
- if(ncl > nboot)
- warning("Too small value for nboot: non-parallel version is executed.")
- else
- warning("Too small (or NULL) cluster: non-parallel version is executed.")
+ if(wh)
+ {
+ mi <- member[[i]]
+ ma <- match(mi, order)
- return(pvclust(data,method.hclust,method.dist,use.cor,nboot,r,store))
- }
-
- if(init.rand) {
- if(is.null(iseed) && !is.null(seed)) {
- warning("\"seed\" option is deprecated. It is available for back compatibility but will be unavailable in the future.\nConsider using \"iseed\" instead.")
-
- if(length(seed) != length(cl))
- stop("seed and cl should have the same length.")
+ if(max.only == FALSE || (max.only && sum(match(ma, ht, nomatch=0)) == 0))
+ {
+ a$clusters[[j]] <- x$hclust$labels[mi]
+ a$edges <- c(a$edges,i)
- # setting random seeds
- parallel::parLapply(cl, as.list(seed), set.seed)
- } else {
- parallel::clusterSetRNGStream(cl = cl, iseed = iseed)
- }
- }
-
- # data: (n,p) matrix, n-samples, p-variables
- n <- nrow(data); p <- ncol(data)
-
- # hclust for original data
- #METHODS <- c("ward", "single", "complete", "average", "mcquitty",
- # "median", "centroid")
- #method.hclust <- METHODS[pmatch(method.hclust, METHODS)]
-
- distance <- dist.pvclust(data, method=method.dist, use.cor=use.cor)
- data.hclust <- hclust(distance, method=method.hclust)
-
- # ward -> ward.D
- if(method.hclust == "ward") method.hclust <- "ward.D"
-
- # multiscale bootstrap
- size <- floor(n*r)
- rl <- length(size)
-
- if(rl == 1) {
- if(r != 1.0)
- warning("Relative sample size r is set to 1.0. AU p-values are not calculated\n")
-
- r <- list(1.0)
- }
- else
- r <- as.list(size/n)
-
- nbl <- as.list(rep(nboot %/% ncl,times=ncl))
-
- if((rem <- nboot %% ncl) > 0)
- nbl[1:rem] <- lapply(nbl[1:rem], "+", 1)
-
- cat("Multiscale bootstrap... ")
-
- mlist <- parallel::parLapply(cl, nbl, pvclust.node,
- r=r, data=data, object.hclust=data.hclust, method.dist=method.dist,
- use.cor=use.cor, method.hclust=method.hclust,
- store=store, weight=weight)
- cat("Done.\n")
-
- mboot <- mlist[[1]]
-
- for(i in 2:ncl) {
- for(j in 1:rl) {
- mboot[[j]]$edges.cnt <- mboot[[j]]$edges.cnt + mlist[[i]][[j]]$edges.cnt
- mboot[[j]]$nboot <- mboot[[j]]$nboot + mlist[[i]][[j]]$nboot
- mboot[[j]]$store <- c(mboot[[j]]$store, mlist[[i]][[j]]$store)
+ j <- j + 1
}
+ ht <- c(ht, ma)
}
-
- result <- pvclust.merge( data=data, object.hclust=data.hclust, mboot=mboot)
-
- return(result)
}
+
+ a$edges <- a$edges[length(a$edges):1]
+ a$clusters <- a$clusters[length(a$edges):1]
+
+ return(a)
+}
msfit <- function(bp, r, nboot) {
-
+
if(length(bp) != length(r))
stop("bp and r should have the same length")
-
+
nboot <- rep(nboot, length=length(bp))
-
+
use <- bp > 0 & bp < 1
-
+
p <- se <- c(0,0); names(p) <- names(se) <- c("au", "bp")
coef <- c(0,0); names(coef) <- c("v", "c")
-
+
a <- list(p=p, se=se, coef=coef, df=0, rss=0, pchi=0); class(a) <- "msfit"
-
+
if(sum(use) < 2) {
# if(mean(bp) < .5) a$p[] <- c(0, 0) else a$p[] <- c(1, 1)
if(mean(bp) < .5) a$p[] <- c(0, bp[r==1.0]) else a$p[] <- c(1, bp[r==1.0])
return(a)
}
-
+
bp <- bp[use]; r <- r[use]; nboot <- nboot[use]
zz <- -qnorm(bp)
vv <- ((1 - bp) * bp) / (dnorm(zz)^2 * nboot)
a$use <- use; a$r <- r; a$zz <- zz
-
+
X <- cbind(sqrt(r), 1/sqrt(r)); dimnames(X) <- list(NULL, c("v","c"))
fit <- lsfit(X, zz, 1/vv, intercept=FALSE)
a$coef <- coef <- fit$coef
-
+
h.au <- c(1, -1); h.bp <- c(1, 1)
z.au <- drop(h.au %*% coef); z.bp <- drop(h.bp %*% coef)
@@ -380,7 +334,7 @@ msfit <- function(bp, r, nboot) {
a$pchi <- pchisq(a$rss, lower.tail=FALSE, df=a$df)
}
else a$pchi <- 1.0
-
+
return(a)
}
@@ -388,13 +342,13 @@ plot.msfit <- function(x, curve=TRUE, main=NULL, sub=NULL, xlab=NULL, ylab=NULL,
{
if(is.null(main)) main="Curve fitting for multiscale bootstrap resampling"
if(is.null(sub))
- {
- sub <- paste("AU = ", round(x$p["au"], digits=2),
- ", BP = ", round(x$p["bp"], digits=2),
- ", v = ", round(x$coef["v"], digits=2),
- ", c = ", round(x$coef["c"], digits=2),
- ", pchi = ", round(x$pchi, digits=2))
- }
+ {
+ sub <- paste("AU = ", round(x$p["au"], digits=2),
+ ", BP = ", round(x$p["bp"], digits=2),
+ ", v = ", round(x$coef["v"], digits=2),
+ ", c = ", round(x$coef["c"], digits=2),
+ ", pchi = ", round(x$pchi, digits=2))
+ }
if(is.null(xlab)) xlab=expression(sqrt(r))
if(is.null(ylab)) ylab=expression(z-value)
@@ -418,16 +372,16 @@ lines.msfit <- function(x, col=2, lty=1, ...) {
summary.msfit <- function(object, digits=3, ...) {
cat("\nResult of curve fitting for multiscale bootstrap resampling:\n\n")
-
+
cat("Estimated p-values:\n")
pv <- data.frame(object$p, object$se)
names(pv) <- c("Estimate", "Std. Error"); row.names(pv) <- c("au", "bp")
print(pv, digits=digits); cat("\n")
-
+
cat("Estimated coefficients:\n")
coef <- object$coef
print(coef, digits=digits); cat("\n")
-
+
cat(paste("Residual sum of squares: ", round(object$rss,digits=digits)),
", p-value: ", round(object$pchi, digits=digits),
" on ", object$df, " DF\n\n", sep="")
@@ -435,22 +389,22 @@ summary.msfit <- function(object, digits=3, ...) {
seplot <- function(object, type=c("au", "bp"), identify=FALSE,
main=NULL, xlab=NULL, ylab=NULL, ...)
- {
- if(!is.na(pm <- pmatch(type[1], c("au", "bp")))) {
- wh <- c("au", "bp")[pm]
-
- if(is.null(main))
- main <- "p-value vs standard error plot"
- if(is.null(xlab))
- xlab <- c("AU p-value", "BP value")[pm]
- if(is.null(ylab))
- ylab <- "Standard Error"
-
- plot(object$edges[,wh], object$edges[,paste("se", wh, sep=".")],
- main=main, xlab=xlab, ylab=ylab, ...)
- if(identify)
- identify(x=object$edges[,wh], y=object$edges[,paste("se", wh, sep=".")],
- labels=row.names(object$edges))
- }
- else stop("'type' should be \"au\" or \"bp\".")
+{
+ if(!is.na(pm <- pmatch(type[1], c("au", "bp")))) {
+ wh <- c("au", "bp")[pm]
+
+ if(is.null(main))
+ main <- "p-value vs standard error plot"
+ if(is.null(xlab))
+ xlab <- c("AU p-value", "BP value")[pm]
+ if(is.null(ylab))
+ ylab <- "Standard Error"
+
+ plot(object$edges[,wh], object$edges[,paste("se", wh, sep=".")],
+ main=main, xlab=xlab, ylab=ylab, ...)
+ if(identify)
+ identify(x=object$edges[,wh], y=object$edges[,paste("se", wh, sep=".")],
+ labels=row.names(object$edges))
}
+ else stop("'type' should be \"au\" or \"bp\".")
+}
diff --git a/man/pvclust.Rd b/man/pvclust.Rd
index a71d830..fb6ac76 100644
--- a/man/pvclust.Rd
+++ b/man/pvclust.Rd
@@ -10,26 +10,29 @@
\usage{
pvclust(data, method.hclust="average",
method.dist="correlation", use.cor="pairwise.complete.obs",
- nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE)
+ nboot=1000, parallel=FALSE, r=seq(.5,1.4,by=.1),
+ store=FALSE, weight=FALSE, iseed=NULL, quiet=FALSE)
parPvclust(cl=NULL, data, method.hclust="average",
method.dist="correlation", use.cor="pairwise.complete.obs",
nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE,
- init.rand=TRUE, seed=NULL, iseed=NULL)
+ init.rand=NULL, iseed=NULL, quiet=FALSE)
}
\arguments{
\item{data}{numeric data matrix or data frame.}
\item{method.hclust}{
the agglomerative method used in hierarchical clustering. This
- should be (an abbreviation of) one of \code{"average"}, \code{"ward"},
- \code{"single"}, \code{"complete"}, \code{"mcquitty"},
+ should be (an abbreviation of) one of \code{"average"}, \code{"ward.D"},
+ \code{"ward.D2"}, \code{"single"}, \code{"complete"}, \code{"mcquitty"},
\code{"median"} or \code{"centroid"}. The default is
\code{"average"}. See \code{method} argument in
\code{\link[stats]{hclust}}.
}
- \item{method.dist}{the distance measure to be used. This should be (an
- abbreviation of) one of \code{"correlation"}, \code{"uncentered"},
+ \item{method.dist}{the distance measure to be used. This should be
+ a character string, or a function which returns a \code{dist} object.
+ A character string should be (an abbreviation of)
+ one of \code{"correlation"}, \code{"uncentered"},
\code{"abscor"} or those which are allowed for \code{method}
argument in \code{dist} function. The default is
\code{"correlation"}. See \emph{details} section in this help and
@@ -43,6 +46,15 @@ parPvclust(cl=NULL, data, method.hclust="average",
}
\item{nboot}{the number of bootstrap replications. The default is
\code{1000}.}
+ \item{parallel}{switch for parallel computation.
+ If \code{FALSE} the computation is done in non-parallel mode.
+ If \code{TRUE} or a positive integer is supplied,
+ parallel computation is done with automatically generated PSOCK cluster.
+ Use \code{TRUE} for default cluster size (\code{parallel::detectCores() - 1}),
+ or specify the size by an integer.
+ If a \code{cluster} object is supplied the cluster is used for parallel computation.
+ Note that \code{NULL} is currently not allowed for using the default cluster.
+ }
\item{r}{numeric vector which specifies the relative sample sizes of
bootstrap replications. For original sample size \eqn{n} and
bootstrap sample size \eqn{n'}, this is defined as \eqn{r=n'/n}.}
@@ -61,11 +73,12 @@ parPvclust(cl=NULL, data, method.hclust="average",
% length as \code{cl}. If \code{NULL} is specified,
% \code{1:length(cl)} is used as seed vector. The default is \code{NULL}.}
\item{init.rand}{logical. If \code{init.rand=TRUE}, random number generators are initialized.
- Use \code{iseed} argument to achieve reproducible results.}
- \item{seed}{integer vector of random seeds. It should have the same
- length as \code{cl}. \strong{This argument is duplicated and will be unavailable in the future. Consider using \code{iseed} instead.} }
- \item{iseed}{an integer to be passed to \code{clusterSetRNGStream} when \code{init.rand=TRUE}.}
-
+ Use \code{iseed} argument to achieve reproducible results. \strong{This argument is duplicated and will be unavailable in the future.}}
+% \item{seed}{integer vector of random seeds. It should have the same
+% length as \code{cl}. \strong{This argument is duplicated and will be unavailable in the future. Consider using \code{iseed} instead.} }
+ \item{iseed}{An integer. If non-\code{NULL} value is supplied random number generators are initialized.
+ It is passed to \code{set.seed} or \code{clusterSetRNGStream}.}
+ \item{quiet}{logical. If \code{TRUE} it does not report the progress.}
}
\details{
Function \code{pvclust} conducts multiscale bootstrap resampling to calculate
@@ -144,6 +157,10 @@ parPvclust(cl=NULL, data, method.hclust="average",
\code{\link{text.pvclust}}, \code{\link{pvrect}} and
\code{\link{pvpick}}.}
\references{
+ Suzuki, R. and Shimodaira, H. (2006)
+ "Pvclust: an R package for assessing the uncertainty in hierarchical clustering",
+ \emph{Bioinformatics}, 22 (12): 1540-1542.
+
Shimodaira, H. (2004)
"Approximately unbiased tests of regions using multistep-multiscale
bootstrap resampling",
@@ -159,15 +176,14 @@ parPvclust(cl=NULL, data, method.hclust="average",
\emph{The Fifteenth International Conference on Genome Informatics 2004},
P034.
- \url{http://www.is.titech.ac.jp/~shimo/prog/pvclust/}
+ \url{http://www.sigmath.es.osaka-u.ac.jp/shimo-lab/prog/pvclust/}
}
\examples{
-## using Boston data in package MASS
-library(MASS)
-data(Boston)
+### example using Boston data in package MASS
+data(Boston, package = "MASS")
-## multiscale bootstrap resampling
-boston.pv <- pvclust(Boston, nboot=100)
+## multiscale bootstrap resampling (non-parallel)
+boston.pv <- pvclust(Boston, nboot=100, parallel=FALSE)
## CAUTION: nboot=100 may be too small for actual use.
## We suggest nboot=1000 or larger.
@@ -190,19 +206,32 @@ msplot(boston.pv, edges=c(2,4,6,7))
par(ask=ask.bak)
-## Print clusters with high p-values
+## print clusters with high p-values
boston.pp <- pvpick(boston.pv)
boston.pp
-\dontrun{
-## parallel computation
-library(parallel)
-cl <- makeCluster(2, type = "PSOCK")
+### Using a custom distance measure
+
+## Define a distance function which returns an object of class "dist".
+## The function must have only one argument "x" (data matrix or data.frame).
+cosine <- function(x) {
+ x <- as.matrix(x)
+ y <- t(x) \%*\% x
+ res <- 1 - y / (sqrt(diag(y)) \%*\% t(sqrt(diag(y))))
+ res <- as.dist(res)
+ attr(res, "method") <- "cosine"
+ return(res)
+}
+
+result <- pvclust(Boston, method.dist=cosine, nboot=100)
+plot(result)
-## parallel version of pvclust
-boston.pv <- parPvclust(cl, Boston, nboot=1000)
-stopCluster(cl)
+\dontrun{
+### parallel computation
+result.par <- pvclust(Boston, nboot=1000, parallel=TRUE)
+plot(result.par)
}
+
}
\author{Ryota Suzuki \email{suzuki at ef-prime.com}}
\keyword{cluster}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pvclust.git
More information about the debian-med-commit
mailing list