[med-svn] [r-bioc-limma] 01/03: New upstream version 3.30.6+dfsg

Kevin Murray daube-guest at moszumanska.debian.org
Sun Dec 11 01:49:59 UTC 2016


This is an automated email from the git hooks/post-receive script.

daube-guest pushed a commit to branch master
in repository r-bioc-limma.

commit 1a085a212f4eae8ee49ce99be028b08f7851056d
Author: Kevin Murray <kdmfoss at gmail.com>
Date:   Sun Dec 11 11:49:55 2016 +1100

    New upstream version 3.30.6+dfsg
---
 DESCRIPTION                   |  10 +--
 NAMESPACE                     |   6 +-
 R/arrayWeights.R              | 205 ++++++++++++++++++------------------------
 R/coolmap.R                   |  81 +++++++++++++++++
 R/fitFDist.R                  |   2 +-
 R/fitFDistRobustly.R          |  44 +++++----
 R/kegga.R                     |   4 +-
 R/plots-fit.R                 |  18 +++-
 build/vignette.rds            | Bin 230 -> 230 bytes
 inst/doc/changelog.txt        |  34 +++++++
 inst/doc/intro.pdf            | Bin 43301 -> 43301 bytes
 man/camera.Rd                 |   2 +-
 man/coolmap.Rd                |  60 +++++++++++++
 man/goana.Rd                  |   7 +-
 man/plotMDS.Rd                |   4 +-
 man/plotSA.Rd                 |   2 +-
 man/roast.Rd                  |   4 +-
 man/voomWithQualityWeights.Rd |  11 +--
 18 files changed, 329 insertions(+), 165 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 42108e5..195d092 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: limma
-Version: 3.30.0
-Date: 2016-10-17
+Version: 3.30.6
+Date: 2016-11-28
 Title: Linear Models for Microarray Data
 Description: Data analysis, linear models and differential expression for microarray data.
 Author: Gordon Smyth [cre,aut], Yifang Hu [ctb], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Davis McCarthy [ctb], Di Wu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Aaron Lun [ctb], Natalie Thorne [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Yunshun Chen [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb]
@@ -9,8 +9,8 @@ License: GPL (>=2)
 Depends: R (>= 2.3.0)
 Imports: grDevices, graphics, stats, utils, methods
 Suggests: affy, AnnotationDbi, BiasedUrn, Biobase, ellipse, GO.db,
-        illuminaio, locfit, MASS, org.Hs.eg.db, splines, statmod (>=
-        1.2.2), vsn
+        gplots, illuminaio, locfit, MASS, org.Hs.eg.db, splines,
+        statmod (>= 1.2.2), vsn
 URL: http://bioinf.wehi.edu.au/limma
 biocViews: ExonArray, GeneExpression, Transcription,
         AlternativeSplicing, DifferentialExpression,
@@ -21,4 +21,4 @@ biocViews: ExonArray, GeneExpression, Transcription,
         MultipleComparison, Normalization, Preprocessing,
         QualityControl
 NeedsCompilation: yes
-Packaged: 2016-10-17 22:07:54 UTC; biocbuild
+Packaged: 2016-11-28 23:13:27 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
index 2a3dd2f..f119b94 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,10 +10,10 @@ importFrom("graphics", "abline", "axis", "barplot", "coplot", "image",
            "legend", "lines", "matplot", "mtext", "panel.smooth",
            "par", "plot", "plot.new", "points", "polygon", "rect",
            "segments", "text", "title")
-importFrom("stats", "approx", "approxfun", "as.dist", "cmdscale",
+importFrom("stats", "approx", "approxfun", "as.dendrogram", "as.dist", "cmdscale",
            "coef", "contr.sum", "contrasts<-", "cov", "cov2cor",
-           "density", "df", "dhyper", "dnorm", "filter", "fitted",
-           "hat", "integrate", "isoreg", "lm", "lm.fit", "lm.wfit", "loess",
+           "dist", "density", "df", "dhyper", "dnorm", "filter", "fitted",
+           "hat", "hclust", "integrate", "isoreg", "lm", "lm.fit", "lm.wfit", "loess",
            "lowess", "median", "model.matrix", "na.exclude", "na.omit",
            "nlminb", "nls", "nls.control", "optim", "p.adjust",
            "pbeta", "pchisq", "pexp", "pf", "pgamma", "phyper",
diff --git a/R/arrayWeights.R b/R/arrayWeights.R
index abf47d2..7c4d40b 100644
--- a/R/arrayWeights.R
+++ b/R/arrayWeights.R
@@ -1,54 +1,54 @@
-arrayWeights <- function(object, design=NULL, weights=NULL, var.design=NULL, method="genebygene", maxiter=50, tol = 1e-10, trace = FALSE)
-#	Compute array quality weights
-#	Matt Ritchie 7 Feb 2005.
+arrayWeights <- function(object, design=NULL, weights=NULL, var.design=NULL, method="genebygene", maxiter=50, tol=1e-10, trace=FALSE)
+#	Estimate array quality weights
+#	Created by Matt Ritchie 7 Feb 2005.
 #	Gordon Smyth simplified argument checking to use getEAWP, 9 Mar 2008.
 #	Cynthia Liu added var.design argument so that variance model can be modified by user, 22 Sep 2014
-#	Last modified 27 Oct 2014.
+#	Last modified 19 Oct 2016.
 {
-#	Check arguments
+#	Check object
 	y <- getEAWP(object)
-	if(is.null(design))
-		design <- matrix(1,ncol(y$exprs),1)
-	else
-		design <- as.matrix(design)
-	if(mode(design) != "numeric") stop("design must be a numeric matrix")
-	ne <- nonEstimable(design)
-	if(!is.null(ne)) cat("Coefficients not estimable:",paste(ne,collapse=" "),"\n")
-	if(missing(weights) && !is.null(y$weights)) weights <- y$weights
-	method <- match.arg(method,c("genebygene","reml"))
-
 	M <- y$exprs
-	p <- ncol(design)
-   	QR <- qr(design)
-   	nparams <- QR$rank # ncol(design)
 	ngenes <- nrow(M)
 	narrays <- ncol(M)
-	if(narrays < 3) stop("too few arrays")
+	if(narrays < 3L) stop("too few arrays")
 	if(ngenes < narrays) stop("too few probes")
 
-	Z <- var.design
-        if(!is.null(Z)){
-           if(ncol(Z)<2) stop("design matrix Z must have at least 2 columns")
-           Z=Z
-        }else{
-	   # Set up design matrix for array variance model
-	      Z <- contr.sum(narrays)
-        }
-   
-	# Intialise array variances to zero
-   	arraygammas <- rep(0, ncol(Z))
-
-	switch(method, genebygene = {  # Estimate array variances via gene-by-gene update algorithm
+#	Check design
+	if(is.null(design))
+		design <- matrix(1,narrays,1)
+	else {
+		design <- as.matrix(design)
+		if(mode(design) != "numeric") stop("design must be a numeric matrix")
+	}
+	nparams <- qr(design)$rank
+
+#	Check weights
+	if(is.null(weights) && !is.null(y$weights)) weights <- y$weights
+
+#	Check var.design
+	if(is.null(var.design)) {
+#		Set up design matrix for array variance model
+		Z <- contr.sum(narrays)
+	} else {
+		if(ncol(var.design)<2) stop("var.design must have at least 2 columns")
+		Z <- var.design
+	}
+
+#	Check method
+	method <- match.arg(method,c("genebygene","reml"))
+
+#	Intialise array variances to zero
+	arraygammas <- rep_len(0, ncol(Z))
+
+	if(method=="genebygene") {
+#		Estimate array variances via gene-by-gene update algorithm
 		Zinfo <- 10*(narrays-nparams)/narrays*crossprod(Z, Z)
 		for(i in 1:ngenes) {
-                        if(!all(is.finite(arraygammas)))
-                                stop("convergence problem at gene ", i, ": array weights not estimable")
+			if(!all(is.finite(arraygammas))) stop("convergence problem at gene ", i, ": array weights not estimable")
 		 	vary <- exp(Z%*%arraygammas)
-
-			if(!is.null(weights)) {  # combine spot weights with running weights 
-				if(max(weights[i,], na.rm=TRUE) > 1) {
-					weights[i,] <- weights[i,]/max(weights[i,])
-				}
+			if(!is.null(weights)) {
+#				combine spot weights with running weights 
+				if(max(weights[i,], na.rm=TRUE) > 1) weights[i,] <- weights[i,]/max(weights[i,])
 				w <- as.vector(1/vary*weights[i,])
 			} else {
 				w <- as.vector(1/vary)
@@ -58,28 +58,20 @@ arrayWeights <- function(object, design=NULL, weights=NULL, var.design=NULL, met
 			if (sum(obs) > 1) {
 				if(sum(obs) == narrays)	{
 					X <- design
-				} else {  # remove missing/infinite values
+				} else {
+#					remove missing/infinite values
 					X <- design[obs, , drop = FALSE]
 					y <- y[obs]
 					vary <- vary[obs]
 					Z2 <- Z[obs,]
 				}
-
 				out <- lm.wfit(X, y, w[obs])
 				d <- rep(0, narrays)
 				d[obs] <- w[obs]*out$residuals^2
 				s2 <- sum(d[obs])/out$df.residual
-                                Q <- qr.Q(out$qr)
-                                if(ncol(Q)!=out$rank)
-                                         Q <- Q[,-((out$rank+1):ncol(Q)),drop=FALSE]
-#                                if(p!=out$rank) {
-#                                        Q <- qr.Q(qr(X[,-cols[out$qr$pivot[(out$qr$rank + 1):p,drop=FALSE]]]))
-#                                        Q <- qr.Q(out$qr)   
-#                                        Q <- Q[,-cols[(out$rank+1):ncol(Q)],drop=FALSE]
-#                                }
-#				else
-#                                        Q <- qr.Q(out$qr)
-				h <- rep(1, narrays)
+				Q <- qr.Q(out$qr)
+				if(ncol(Q)!=out$rank) Q <- Q[,-((out$rank+1):ncol(Q)),drop=FALSE]
+				h <- rep_len(1, narrays)
 				h[obs] <- rowSums(Q^2)
 				Agam <- crossprod(Z, (1-h)*Z)
 				Agam.del <- crossprod(t(rep(h[narrays], length(arraygammas))-h[1:(length(narrays)-1)]))
@@ -112,31 +104,30 @@ arrayWeights <- function(object, design=NULL, weights=NULL, var.design=NULL, met
 					arraygammas <- Usgammas[1:(narrays-1)]
 				}
 
-                                if(trace && (i==1 || i%%1001==0)) {
-                                        x2 <- crossprod(Zzd, gammas.iter) / narrays
-                                        cat("Iter =", i, " X2 =", x2, " Array gammas", arraygammas, "\n")
+				if(trace && (i==1 || i%%1001==0)) {
+					x2 <- crossprod(Zzd, gammas.iter) / narrays
+					cat("Iter =", i, " X2 =", x2, " Array gammas", arraygammas, "\n")
 				}
 			}
 		}
-	}, reml = {  # Estimate array variances via reml
-#		const <- narrays * log(2 * pi)
+	}
+
+	if(method=="reml") {
+#		Estimate array variances via reml
 		iter <- 0
 		dev <- 0
 		repeat {
-#			devold <- dev
-#			dev <- 0
 			iter <- iter + 1
-			zd <- matrix(0, narrays, 1)
-			sum1minush <- matrix(0, narrays, 1)
+			zd <- matrix(0, narrays, 1L)
+			sum1minush <- matrix(0, narrays, 1L)
 			K <- matrix(0, ngenes, narrays)
 
 			for(i in 1:ngenes) {
 				vary <- exp(Z%*%arraygammas)
 
-				if(!is.null(weights)) {  # combine spot weights with running weights
-					if(max(weights[i,], na.rm=TRUE) > 1) {
-						weights[i,] <- weights[i,]/max(weights[i,])
-					}
+				if(!is.null(weights)) {
+#					combine spot weights with running weights
+					if(max(weights[i,], na.rm=TRUE) > 1) weights[i,] <- weights[i,]/max(weights[i,])
 					w <- as.vector(1/vary*weights[i,])
 				} else {
 					w <- as.vector(1/vary)
@@ -148,93 +139,71 @@ arrayWeights <- function(object, design=NULL, weights=NULL, var.design=NULL, met
 				if (n > 0) {
 					if(n == narrays)	{
 						X <- design
-						#Z2 <- Z
-					} else {  # remove missing/infinite values
+					} else {
+#						remove missing/infinite values
 						X <- design[obs, , drop = FALSE]
 						y <- y[obs]
 						vary <- vary[obs]
 						w <- w[obs]
 						const <- sum(obs) * log(2 * pi)
 					}
-					# cat(i)
 					out <- lm.wfit(X, y, w)
 					d <- rep(0, narrays)
 					d[obs] <- w*out$residuals^2
 					s2 <- sum(d[obs])/out$df.residual
-                                        Q <- qr.Q(out$qr)
-                                        if(ncol(Q)!=out$rank)
-                                                Q <- Q[,-((out$rank+1):ncol(Q)),drop=FALSE]
-#                                        if(p!=out$rank)
-#                                                Q <- qr.Q(qr(X[,-cols[out$qr$pivot[(out$qr$rank + 1):p,drop=FALSE]]]))
-#    				        else
-#                                                Q <- qr.Q(out$qr)
+					Q <- qr.Q(out$qr)
+					if(ncol(Q)!=out$rank) Q <- Q[,-((out$rank+1):ncol(Q)),drop=FALSE]
 					h <- rowSums(Q^2)
 					zd[obs] <- zd[obs] + d[obs]/s2 - 1 + h
 					sum1minush[obs,1] <- sum1minush[obs,1] + 1-h
 					K[i,][obs] <- as.vector(h[n]-h)
-#					dev <- dev + sum(d[obs]/vary) + sum(log(vary)) + const + 2 * log(prod(abs(diag(out$qr$qr))))
 				}
 			}
 			Zzd <- crossprod(Z, zd)
-#			Zinfo <- diag(sum1minush[1:(narrays-1)]) + sum1minush[narrays] - crossprod(K[,-narrays])/out$df.residual #(narrays-nparams)
-                        Zinfo <- diag(sum1minush[1:(length(arraygammas))]) + sum1minush[length(arraygammas)] - crossprod(K[,])[1:length(arraygammas),1:length(arraygammas)]/out$df.residual
+			Zinfo <- diag(sum1minush[1:(length(arraygammas))]) + sum1minush[length(arraygammas)] - crossprod(K[,])[1:length(arraygammas),1:length(arraygammas)]/out$df.residual
 
 			R <- chol(Zinfo)
 			Zinfoinv <- chol2inv(R)
 			gammas.iter <- Zinfoinv%*%Zzd
 			arraygammas <- arraygammas + gammas.iter
-#			arrayw <- drop(exp(Z %*% (-arraygammas)))
 			x2 <- crossprod(Zzd, gammas.iter) / narrays
 
-			if(trace)
-              			cat("Iter =", iter, " X2 =", x2, " Array gammas", arraygammas, "\n")
-
-                        if(!all(is.finite(arraygammas)))
-                                stop("convergence problem at iteration ", iter, ": array weights not estimable")
+			if(trace) cat("Iter =", iter, " X2 =", x2, " Array gammas", arraygammas, "\n")
+			if(!all(is.finite(arraygammas))) stop("convergence problem at iteration ", iter, ": array weights not estimable")
 
-#			if (dev < devold - 1e-50)
-#				break
-
-			if (x2  < tol)
-				break
+			if (x2  < tol) break
 
-			if (iter == maxiter)	{
+			if (iter == maxiter) {
 				warning("Maximum iterations ", maxiter, " reached", sep="")
 				break
 			}
 		}
+	}
 
-	})
-#	matrix(rep(1/exp(Z%*%arraygammas), each=ngenes), ngenes, narrays)
 	drop(exp(Z %*% (-arraygammas)))
 }
 
-voomWithQualityWeights <- function(counts, design=NULL, lib.size=NULL, normalize.method="none",
-                         plot=FALSE, span=0.5, var.design=NULL, method="genebygene", maxiter=50,
-                         tol=1e-10, trace=FALSE, replace.weights=TRUE, col=NULL, ...)
+voomWithQualityWeights <- function(counts, design=NULL, lib.size=NULL, normalize.method="none", plot=FALSE, span=0.5, var.design=NULL, method="genebygene", maxiter=50, tol=1e-10, trace=FALSE, col=NULL, ...)
 #	Combine voom weights with sample-specific weights estimated by arrayWeights() function for RNA-seq data
-#	Matt Ritchie and Cynthia Liu, 22 Sept 2014.
-#       Last modified 7 Oct 2014.
+#	Matt Ritchie and Cynthia Liu
+#	Created 22 Sept 2014. Last modified 26 Oct 2016.
 {
-    if(plot) {
-        oldpar <- par(mfrow=c(1,2))
-        on.exit(par(oldpar))
-    }
-    v <- voom(counts, design=design, lib.size=lib.size, normalize.method=normalize.method, plot=FALSE, span=span, ...)
-    aw <- arrayWeights(v, design=design, method=method, maxiter=maxiter, tol=tol, var.design=var.design)
-    v <- voom(counts, design=design, weights=aw, lib.size=lib.size, normalize.method=normalize.method, plot=plot, span=span, ...)
-    aw <- arrayWeights(v, design=design, method=method, maxiter=maxiter, tol=tol, trace=trace, var.design=var.design)
-    wts <- asMatrixWeights(aw, dim(v))*v$weights
-    attr(wts, "arrayweights") <- NULL
-    if(plot) {
-        barplot(aw, names=1:length(aw), main="Sample-specific weights", ylab="Weight", xlab="Sample", col=col)
-        abline(h=1, col=2, lty=2)
-    }
-    if(replace.weights) {
-      v$weights <- wts
-      v$sample.weights <- aw
-      return(v)
-    } else {
-        return(wts)
-    }       
+	if(plot) {
+		oldpar <- par(mfrow=c(1,2))
+		on.exit(par(oldpar))
+	}
+	v <- voom(counts, design=design, lib.size=lib.size, normalize.method=normalize.method, plot=FALSE, span=span, ...)
+	aw <- arrayWeights(v, design=design, method=method, maxiter=maxiter, tol=tol, var.design=var.design)
+	v <- voom(counts, design=design, weights=aw, lib.size=lib.size, normalize.method=normalize.method, plot=plot, span=span, ...)
+	aw <- arrayWeights(v, design=design, method=method, maxiter=maxiter, tol=tol, trace=trace, var.design=var.design)
+
+	v$weights <- t(aw * t(v$weights))
+	v$sample.weights <- aw
+
+	if(plot) {
+		barplot(aw, names=1:length(aw), main="Sample-specific weights", ylab="Weight", xlab="Sample", col=col)
+		abline(h=1, col=2, lty=2)
+	}
+
+	v		
 }
diff --git a/R/coolmap.R b/R/coolmap.R
new file mode 100644
index 0000000..bf723be
--- /dev/null
+++ b/R/coolmap.R
@@ -0,0 +1,81 @@
+coolmap <- function(x, cluster.by="de pattern", col=NULL, linkage.row="complete", linkage.col="complete", show.dendrogram="both", ...)
+#	Interface to heatmap.2 with useful defaults for log2-expression data
+#	Gordon Smyth
+#	Created 23 Sep 2016. Last modified 24 Sep 2016.
+{
+#	Check arguments
+	x <- as.matrix(x)
+	nsamples <- ncol(x)
+	if(nsamples < 2L) stop("Need at least 2 rows and 2 columns")
+	cluster.by <- match.arg(cluster.by, c("de pattern","expression level"))
+	if(is.null(col)) {
+		if(cluster.by=="de pattern") col <- "redblue" else col <- "yellowblue"
+	} else {
+		col <- match.arg(col, c("redblue","redgreen","yellowblue"))
+	}
+	show.dendrogram <- match.arg(show.dendrogram, c("both","row","column","none"))
+
+#	Require gplots package
+	suppressPackageStartupMessages(OK <- requireNamespace("gplots",quietly=TRUE))
+	if(!OK) stop("gplots package required but not installed (or can't be loaded)")
+
+#	Linkage method for rows (genes)
+	linkage.row <- as.character(linkage.row)
+	if(linkage.row %in% c("w","wa","war","ward")) linkage.row <- "ward.D2"
+	METHODS <- c("none", "ward.D", "single", "complete", "average", "mcquitty", "median", "centroid", "ward.D2")
+	linkage.row <- match.arg(linkage.row, METHODS)
+
+#	Linkage method for columns (samples)
+	linkage.col <- as.character(linkage.col)
+	if(linkage.col %in% c("w","wa","war","ward")) linkage.col <- "ward.D2"
+	linkage.col <- match.arg(linkage.col, METHODS)
+
+#	Dendrogram for columns (samples) uses Euclidean distance without scaling
+	if(linkage.col=="none") {
+		hc <- FALSE
+		if(show.dendrogram=="both") show.dendrogram <- "row"
+		if(show.dendrogram=="column") show.dendrogram <- "none"
+	} else {
+		hc <- as.dendrogram(hclust(dist(t(x), method="euclidean"), method=linkage.col))
+	}
+
+#	Standardize rows if clustering by patterns
+	if(cluster.by=="de pattern") {
+		M <- rowMeans(x, na.rm=TRUE)
+		DF <- nsamples - 1L
+		IsNA <- is.na(x)
+		if(any(IsNA)) {
+			mode(IsNA) <- "integer"
+			DF <-  DF - rowSums(IsNA)
+			DF[DF==0L] <- 1L
+		}
+		x <- x-M
+		V <- rowSums(x^2L, na.rm=TRUE) / DF
+		x <- x / sqrt(V+0.01)
+		sym <- TRUE
+		key.xlab <- "Z-Score"
+	} else {
+		sym <- FALSE
+		key.xlab <- "log2(expression)"
+	}
+
+#	Dendrogram for rows (genes) uses Euclidean distance
+#	If rows are scaled, then this is equivalent to clustering using distance = 1-correlation.
+	if(linkage.row=="none") {
+		hr <- FALSE
+		if(show.dendrogram=="both") show.dendrogram <- "column"
+		if(show.dendrogram=="row") show.dendrogram <- "none"
+	} else {
+		hr <- as.dendrogram(hclust(dist(x, method="euclidean"), method=linkage.row))
+	}
+
+#	Set color pallate
+	col <- switch(col,
+		"redblue"=gplots::colorpanel(256,"blue","white","red"),
+		"redgreen"=gplots::colorpanel(256,"green","black","red"),
+		"yellowblue"=gplots::colorpanel(256,"blue","white","yellow")
+	)
+
+#	Plot heatmap
+	gplots::heatmap.2(x, Rowv=hr, Colv=hc, scale="none", density.info="none", trace="none", col=col, symbreaks=sym, symkey=sym, dendrogram=show.dendrogram, key.title=NA, key.xlab=key.xlab, ...)
+}
\ No newline at end of file
diff --git a/R/fitFDist.R b/R/fitFDist.R
index db9bfa6..57ea394 100644
--- a/R/fitFDist.R
+++ b/R/fitFDist.R
@@ -42,7 +42,7 @@ fitFDist <- function(x,df1,covariate=NULL)
 		warning("More than half of residual variances are exactly zero: eBayes unreliable")
 		m <- 1
 	} else {
-		if(any(x==0)) warning("Zero sample variances detected, have been offset",call.=FALSE)
+		if(any(x==0)) warning("Zero sample variances detected, have been offset away from zero",call.=FALSE)
 	}
 	x <- pmax(x, 1e-5 * m)
 
diff --git a/R/fitFDistRobustly.R b/R/fitFDistRobustly.R
index 8ac2c2f..673a244 100644
--- a/R/fitFDistRobustly.R
+++ b/R/fitFDistRobustly.R
@@ -3,7 +3,7 @@ fitFDistRobustly <- function(x,df1,covariate=NULL,winsor.tail.p=c(0.05,0.1),trac
 #	given the first degrees of freedom, using first and second
 #	moments of Winsorized z-values
 #	Gordon Smyth and Belinda Phipson
-#	Created 7 Oct 2012.  Last revised 22 August 2016.
+#	Created 7 Oct 2012.  Last revised 25 November 2016.
 {
 #	Check x
 	n <- length(x)
@@ -48,10 +48,14 @@ fitFDistRobustly <- function(x,df1,covariate=NULL,winsor.tail.p=c(0.05,0.1),trac
 
 #	Avoid zero or negative x values
 	m <- median(x)
-	if(m<=0)	stop("x values are mostly <= 0")
+	if(m<=0)	stop("Variances are mostly <= 0")
 	i <- (x < m*1e-12)
 	if(any(i)) {
-		warning("small x values have been offset away from zero")
+		nzero <- sum(i)
+		if(nzero==1)
+			warning("One very small variance detected, has been offset away from zero", call.=FALSE)
+		else
+			warning(nzero," very small variances detected, have been offset away from zero", call.=FALSE)
 		x[i] <- m*1e-12
 	}
 
@@ -147,7 +151,7 @@ fitFDistRobustly <- function(x,df1,covariate=NULL,winsor.tail.p=c(0.05,0.1),trac
 			o <- order(TailP)
 			df2.shrunk[o] <- cummax(df2.shrunk[o])
 		}
-		return(list(scale=s20,df2=df2,df2.shrunk=df2.shrunk))
+		return(list(scale=s20,df2=df2,tail.p.value=TailP,df2.shrunk=df2.shrunk))
 	}
 
 #	Estimate df2 by matching variance of zwins
@@ -182,13 +186,15 @@ fitFDistRobustly <- function(x,df1,covariate=NULL,winsor.tail.p=c(0.05,0.1),trac
 #	Posterior df for outliers
 	zresid <- z-ztrendcorrected
 	Fstat <- exp(zresid)
-	TailP <- pf(Fstat,df1=df1,df2=df2,lower.tail=FALSE)
+	LogTailP <- pf(Fstat,df1=df1,df2=df2,lower.tail=FALSE,log.p=TRUE)
+	TailP <- exp(LogTailP)
 	r <- rank(Fstat)
-	EmpiricalTailProb <- (n-r+0.5)/n
-	ProbNotOutlier <- pmin(TailP/EmpiricalTailProb,1)
-	ProbOutlier <- 1-ProbNotOutlier
-	if(any(ProbNotOutlier<1)) {
-		o <- order(TailP)
+	LogEmpiricalTailProb <- log(n-r+0.5)-log(n)
+	LogProbNotOutlier <- pmin(LogTailP-LogEmpiricalTailProb,0)
+	ProbNotOutlier <- exp(LogProbNotOutlier)
+	ProbOutlier <- -expm1(LogProbNotOutlier)
+	if(any(LogProbNotOutlier < 0)) {
+		o <- order(LogTailP)
 
 #		Old calculation for df2.outlier
 #		VarOutlier <- max(zresid)^2
@@ -214,14 +220,20 @@ fitFDistRobustly <- function(x,df1,covariate=NULL,winsor.tail.p=c(0.05,0.1),trac
 #		Find df2.outlier to make maxFstat the median of the distribution
 #		Exploit fact that log(TailP) is nearly linearly with positive 2nd deriv as a function of df2
 #		Note that minTailP and NewTailP are always less than 0.5
-		df2.outlier <- log(0.5)/log(min(TailP))*df2
-#		Iterate for accuracy
-		NewTailP <- pf(max(Fstat),df1=df1,df2=df2.outlier,lower.tail=FALSE)
-		df2.outlier <- log(0.5)/log(NewTailP)*df2.outlier
-		df2.shrunk <- ProbNotOutlier*df2+ProbOutlier*df2.outlier
+		minLogTailP <- min(LogTailP)
+		if(minLogTailP == -Inf) {
+			df2.outlier <- 0
+			df2.shrunk <- ProbNotOutlier*df2
+		} else {
+			df2.outlier <- log(0.5)/minLogTailP*df2
+#			Iterate for accuracy
+			NewLogTailP <- pf(max(Fstat),df1=df1,df2=df2.outlier,lower.tail=FALSE,log.p=TRUE)
+			df2.outlier <- log(0.5)/NewLogTailP*df2.outlier
+			df2.shrunk <- ProbNotOutlier*df2+ProbOutlier*df2.outlier
+		}
 
 #		Force df2.shrunk to be monotonic in TailP
-		o <- order(TailP)
+		o <- order(LogTailP)
 		df2.ordered <- df2.shrunk[o]
 		m <- cumsum(df2.ordered)
 		m <- m/(1:n)
diff --git a/R/kegga.R b/R/kegga.R
index ef4fcda..691b908 100644
--- a/R/kegga.R
+++ b/R/kegga.R
@@ -77,7 +77,7 @@ kegga.MArrayLM <- function(de, coef = ncol(de), geneid = rownames(de), FDR = 0.0
 kegga.default <- function(de, universe=NULL, species="Hs", species.KEGG=NULL, convert=FALSE, gene.pathway=NULL, pathway.names = NULL, prior.prob=NULL, covariate=NULL, plot=FALSE, ...)
 #	KEGG (Kyoto Encyclopedia of Genes and Genomes) pathway analysis of DE genes
 #	Gordon Smyth and Yifang Hu
-#	Created 18 May 2015.  Modified 23 June 2016.
+#	Created 18 May 2015.  Modified 16 Nov 2016.
 {
 #	Ensure de is a list
 	if(!is.list(de)) de <- list(DE = de)
@@ -159,7 +159,7 @@ kegga.default <- function(de, universe=NULL, species="Hs", species.KEGG=NULL, co
 		m <- match(EG.KEGG[,1], universe)
 		InUni <- !is.na(m)
 		m <- m[InUni]
-		universe <- universe[m]
+		universe <- unique(universe[m])
 		if(!is.null(prior.prob)) prior.prob <- prior.prob[m]
 
 #		Restrict annotation to genes in universe
diff --git a/R/plots-fit.R b/R/plots-fit.R
index 6b2eec4..6ff7564 100755
--- a/R/plots-fit.R
+++ b/R/plots-fit.R
@@ -181,18 +181,28 @@ heatdiagram <- function(stat,coef,primary=1,names=NULL,treatments=colnames(stat)
 	invisible(out)
 }
 
-plotSA <- function(fit, xlab="Average log-expression", ylab="log2(sigma)", zero.weights=FALSE, pch=16, cex=0.2, ...)
+plotSA <- function(fit, xlab="Average log-expression", ylab="log2(sigma)", zero.weights=FALSE, pch=16, cex=0.3, ...)
 #	Plot log-residual variance vs intensity
 #	Gordon Smyth
-#	Created 14 Jan 2009. Last modified 26 June 2016.
+#	Created 14 Jan 2009. Last modified 28 November 2016.
 {
-	if(!is(fit,"MArrayLM")) stop("fit must be a MArrayLM object")
+	if(!is(fit,"MArrayLM")) stop("fit must be an MArrayLM object")
 	x <- fit$Amean
 	y <- log2(fit$sigma)
-	if(!is.null(fit$weights) && !zero.weights) {
+	if(!(is.null(fit$weights) || zero.weights)) {
 		allzero <- rowSums(fit$weights>0,na.rm=TRUE) == 0
 		y[allzero] <- NA
 	}
+
+#	Check for outlier variances as indicated by low prior.df
+	if(length(fit$df.prior)>1L) {
+		Outlier <- fit$df.prior < 0.75*max(fit$df.prior)
+		pch <- rep_len(pch,nrow(fit))
+		pch[Outlier] <- 1
+		cex <- rep_len(cex,nrow(fit))
+		cex[Outlier] <- 1
+	}
+
 	plot(x,y,xlab=xlab,ylab=ylab,pch=pch,cex=cex,...)
 	if(anyNA(x) || anyNA(y)) {
 		ok <- !(is.na(x) | is.na(y))
diff --git a/build/vignette.rds b/build/vignette.rds
index 96951c2..1b35650 100644
Binary files a/build/vignette.rds and b/build/vignette.rds differ
diff --git a/inst/doc/changelog.txt b/inst/doc/changelog.txt
index 526107d..7d774fa 100755
--- a/inst/doc/changelog.txt
+++ b/inst/doc/changelog.txt
@@ -1,3 +1,37 @@
+28 Nov 2016: limma 3.30.6
+
+- plotSA() now indicates, by way of an open plotting symbol, any
+  points that have low robust df.prior values.
+
+25 Nov 2016: limma 3.30.5
+
+- Clearer error message from fitFDistRobustly() when some variances
+  are zero.
+
+16 Nov 2016: limma 3.30.4
+
+- Bug fix for kegga() when the universe is explicitly specified.
+
+10 Nov 2016: limma 3.30.3
+
+- Bug fix for fitFDistRobustly() when there is an extreme outlier.
+  Previously floating point underflow for the outlier p-value could
+  cause an error.
+
+27 Oct 2016: limma 3.30.2
+
+- New function coolmap(). This is essentially a wrapper for the
+  heatmap.2() function in the ggplots package, but with sensible
+  default settings for genomic log-expression data.
+
+26 Oct 2016: limma 3.30.1
+
+- Argument 'replace.weights' removed from voomWithQualityWeights().
+  The function now always produces an EList, similar to voom(). The
+  default behavior of the function is unchanged.
+
+18 Oct 2016: limma 3.30.0 (Bioconductor 3.4 Release Branch)
+
 17 Oct 2016: limma 3.29.25
 
 - Update help page of id2indices() to use MSigDB version 5.2.
diff --git a/inst/doc/intro.pdf b/inst/doc/intro.pdf
index 5d726c6..64e6815 100644
Binary files a/inst/doc/intro.pdf and b/inst/doc/intro.pdf differ
diff --git a/man/camera.Rd b/man/camera.Rd
index ce35342..1a528c5 100644
--- a/man/camera.Rd
+++ b/man/camera.Rd
@@ -20,7 +20,7 @@ interGeneCorrelation(y, design)
   \item{index}{an index vector or a list of index vectors.  Can be any vector such that \code{y[index,]} selects the rows corresponding to the test set.  The list can be made using \code{\link{ids2indices}}.}
   \item{design}{design matrix.}
   \item{contrast}{contrast of the linear model coefficients for which the test is required. Can be an integer specifying a column of \code{design}, or else a numeric vector of same length as the number of columns of \code{design}.}
-  \item{weights}{can be a numeric matrix of individual weights, of same size as \code{y}, or a numeric vector of array weights with length equal to \code{ncol(y)}, or a numeric vector of gene weights with length equal to \code{nrow(y)}.}
+  \item{weights}{numeric matrix of observation weights of same size as \code{y}, or a numeric vector of array weights with length equal to \code{ncol(y)}, or a numeric vector of gene weights with length equal to \code{nrow(y)}.}
   \item{use.ranks}{do a rank-based test (\code{TRUE}) or a parametric test (\code{FALSE})?}
   \item{allow.neg.cor}{should reduced variance inflation factors be allowed for negative correlations?}
   \item{inter.gene.cor}{numeric, optional preset value for the inter-gene correlation within tested sets.  If \code{NA} or \code{NULL}, then an inter-gene correlation will be estimated for each tested set.}
diff --git a/man/coolmap.Rd b/man/coolmap.Rd
new file mode 100644
index 0000000..4f1d496
--- /dev/null
+++ b/man/coolmap.Rd
@@ -0,0 +1,60 @@
+\title{Heatmap of gene expression values}
+\name{coolmap}
+\alias{coolmap}
+
+\description{
+Create a heatmap of a matrix of log-expression values.
+}
+
+\usage{
+coolmap(x, cluster.by="de pattern", col=NULL,
+        linkage.row="complete", linkage.col="complete", show.dendrogram="both", ...)
+}
+
+\arguments{
+  \item{x}{any data object that can be coerced to a matrix of log-expression values, for example an \code{ExpressionSet} or \code{EList}. Rows represent genes and columns represent RNA samples.}
+  \item{cluster.by}{choices are \code{"de pattern"} or \code{"expression level"}.
+  In the former case, the intention is to cluster by relative changes in expression, so genes are clustered by Pearson correlation and log-expression values are mean-corrected by rows for the plot.
+  In the latter case, the intention is to cluster by absolute expression, so genes are clustered by Euclidean and log-expression values are not mean-corrected.}
+  \item{col}{choices are \code{"redblue"}, \code{"redgreen"} or \code{"yellowblue"}.
+  The default is \code{"redblue"} for \code{cluster.by="de pattern"} and \code{"yellowblue"} if \code{cluster.by="expression level"}.}
+  \item{linkage.row}{linkage criterion used to cluster the rows.
+  Choices are \code{"none"}, \code{"ward.D"}, \code{"single"}, \code{"complete"}, \code{"average"}, \code{"mcquitty"}, \code{"median"}, \code{"centroid"} or \code{"ward.D2"}, with \code{"ward"} is treated as \code{"ward.D2"}.}
+  \item{linkage.col}{linkage criterion used to cluster the columns.
+  Choices are the same as for \code{linkage.row}.}
+  \item{show.dendrogram}{choices are \code{"row"}, \code{"column"}, \code{"both"} or \code{"none"}.}
+  \item{\dots}{any other arguments are passed to \code{heatmap.2}.}
+}
+
+\details{
+This function is essentially a wrapper for the \code{heatmap.2} function in the ggplots package, with sensible settings for genomic log-expression data.
+Unfortunately, the default settings for \code{heatmap.2} are often not ideal for expression data, and overriding the defaults requires explicit calls to \code{hclust} and \code{as.dendrogram} as well as prior standardization of the data values.
+The \code{coolmap} function implements our preferred defaults for the two most common types of heatmaps.
+When clustering by relative expression (\code{cluster.by="de pattern"}), it implements a row standardization that takes account of \code{NA} values and standard deviations that might be zero.
+}
+
+\value{
+A plot is created on the current graphics device.
+A list is also invisibly returned, see \code{\link[gplots]{heatmap.2}} for details.
+}
+
+\author{Gordon Smyth}
+
+\seealso{
+\code{\link[gplots]{heatmap.2}}, \code{\link{hclust}}, \code{\link{dist}}.
+
+An overview of diagnostic functions available in LIMMA is given in \link{09.Diagnostics}.
+}
+
+\examples{
+# Simulate gene expression data for 50 genes and 6 microarrays.
+# Samples are in two groups
+# First 50 probes are differentially expressed in second group
+ngenes <- 50
+sd <- 0.3*sqrt(4/rchisq(ngenes,df=4))
+x <- matrix(rnorm(ngenes*6,sd=sd),ngenes,6)
+rownames(x) <- paste("Gene",1:ngenes)
+x <- x + seq(from=0, to=16, length=ngenes)
+x[,4:6] <- x[,4:6] + 2
+coolmap(x)
+}
diff --git a/man/goana.Rd b/man/goana.Rd
index b4f500e..274fc38 100644
--- a/man/goana.Rd
+++ b/man/goana.Rd
@@ -90,8 +90,8 @@ If \code{trend=TRUE} or a covariate is supplied, then a trend is fitted to the d
 The statistical approach provided here is the same as that provided by the goseq package, with one methodological difference and a few restrictions.
 Unlike the goseq package, the gene identifiers here must be Entrez Gene IDs and the user is assumed to be able to supply gene lengths if necessary.
 The goseq package has additional functionality to convert gene identifiers and to provide gene lengths.
-The only methodological difference is that \code{goana} and \code{kegga} computes gene length or abundance bias using \code{tricubeMovingAverage} instead of monotonic regression.
-While \code{tricubeMovingAverage} does not enforce monotonicity, it has the advantage of numerical stability when \code{de} contains only a small number of genes.
+The only methodological difference is that \code{goana} and \code{kegga} computes gene length or abundance bias using \code{\link{tricubeMovingAverage}} instead of monotonic regression.
+While \code{\link{tricubeMovingAverage}} does not enforce monotonicity, it has the advantage of numerical stability when \code{de} contains only a small number of genes.
 }
 
 \note{
@@ -184,6 +184,9 @@ topGO(go.de, ontology = "MF", sort = "DE3")
 
 k <- kegga(fit, species="Hs")
 k <- kegga(fit, species.KEGG="hsa") # equivalent to previous
+topKEGG(k, sort = "up")
+topKEGG(k, sort = "down")
+
 }
 }
 
diff --git a/man/plotMDS.Rd b/man/plotMDS.Rd
index 37d9ed8..1706924 100644
--- a/man/plotMDS.Rd
+++ b/man/plotMDS.Rd
@@ -29,7 +29,7 @@ Plot samples on a two-dimensional scatterplot so that distances on the plot appr
   \item{gene.selection}{character, \code{"pairwise"} to choose the top genes separately for each pairwise comparison between the samples or \code{"common"} to select the same genes for all comparisons.}
   \item{xlab}{title for the x-axis.}
   \item{ylab}{title for the y-axis.}
-  \item{plot}{logical. If \code{TRUE} then a plot is produced. If not, an \code{"MDS"} object is returned.}
+  \item{plot}{logical. If \code{TRUE} then a plot is created on the current graphics device.}
   \item{\dots}{any other arguments are passed to \code{plot}, and also to \code{text} (if \code{pch} is \code{NULL}).}
 }
 
@@ -52,7 +52,7 @@ See \code{\link[graphics]{text}} for possible values for \code{col} and \code{ce
 \value{
 If \code{plot=TRUE}, a plot is created on the current graphics device.
 
-An object of class \code{"MDS"} is also returned (invisibly if \code{plot=TRUE}).
+An object of class \code{"MDS"} is also invisibly returned.
 This is a list containing the following components:
 \item{distance.matrix}{numeric matrix of pairwise distances between columns of \code{x}}
 \item{cmdscale.out}{output from the function \code{cmdscale} given the distance matrix}
diff --git a/man/plotSA.Rd b/man/plotSA.Rd
index 552cd04..31f45a2 100755
--- a/man/plotSA.Rd
+++ b/man/plotSA.Rd
@@ -6,7 +6,7 @@ Plot log residual standard deviation versus average log expression for a fitted
 }
 \usage{
 plotSA(fit, xlab="Average log-expression", ylab="log2(sigma)",
-       zero.weights=FALSE, pch=16, cex=0.2, \dots)
+       zero.weights=FALSE, pch=16, cex=0.3, \dots)
 }
 \arguments{
   \item{fit}{an \code{MArrayLM} object.}
diff --git a/man/roast.Rd b/man/roast.Rd
index fd336dc..aadf9a4 100644
--- a/man/roast.Rd
+++ b/man/roast.Rd
@@ -30,7 +30,7 @@ Rotation gene set testing for linear models.
         If either \code{var.prior} or \code{df.prior} are \code{NULL}, then \code{y} should contain values for all genes on the arrays. If both prior parameters are given, then only \code{y} values for the test set are required.}
   \item{index}{index vector specifying which rows (probes) of \code{y} are in the test set.
         Can be a vector of integer indices, or a logical vector of length \code{nrow(y)}, or a vector of gene IDs corresponding to entries in \code{geneid}.
-        Alternatively it can be a data.frame with the first column containing the index vector and the second column containing gene weights.
+        Alternatively it can be a data.frame with the first column containing the index vector and the second column containing directional gene weights.
         For \code{mroast} or \code{fry}, \code{index} is a list of index vectors or a list of data.frames. }
   \item{design}{design matrix}
   \item{contrast}{contrast for which the test is required.
@@ -39,7 +39,7 @@ Rotation gene set testing for linear models.
         Can be either a vector of length \code{nrow(y)} or the name of the column of \code{y$genes} containing the gene identifiers.
         Defaults to \code{rownames(y)}.}
   \item{set.statistic}{summary set statistic. Possibilities are \code{"mean"},\code{"floormean"},\code{"mean50"} or \code{"msq"}.}
-  \item{gene.weights}{numeric vector of (positive or negative) probewise weights.
+  \item{gene.weights}{numeric vector of directional (positive or negative) probewise weights.
         For \code{mroast} or \code{fry}, this vector must have length equal to \code{nrow(y)}.
         For \code{roast}, can be of length \code{nrow(y)} or of length equal to the number of genes in the test set.} 
   \item{var.prior}{prior value for residual variances. If not provided, this is estimated from all the data using \code{squeezeVar}.}
diff --git a/man/voomWithQualityWeights.Rd b/man/voomWithQualityWeights.Rd
index 4c07cc9..f71afb3 100644
--- a/man/voomWithQualityWeights.Rd
+++ b/man/voomWithQualityWeights.Rd
@@ -7,7 +7,7 @@ Combine voom observational-level weights with sample-specific quality weights in
 \usage{
 voomWithQualityWeights(counts, design=NULL, lib.size=NULL, normalize.method="none",
              plot=FALSE, span=0.5, var.design=NULL, method="genebygene", maxiter=50,
-             tol=1e-10, trace=FALSE, replace.weights=TRUE, col=NULL, ...) 
+             tol=1e-10, trace=FALSE, col=NULL, ...) 
 }
 \arguments{
  \item{counts}{a numeric \code{matrix} containing raw counts, or an \code{ExpressionSet} containing raw counts, or a \code{DGEList} object.}
@@ -28,9 +28,6 @@ voomWithQualityWeights(counts, design=NULL, lib.size=NULL, normalize.method="non
  \item{trace}{logical variable. If true then output diagnostic information
           at each iteration of the '"reml"' algorithm, or at every 1000th iteration of the 
           \code{"genebygene"} algorithm.}
- \item{replace.weights}{logical variable. If TRUE then the weights in the voom object will be replaced with 
-       the combined voom and sample-specific weights and the \code{\link[limma:EList]{EList}} object from voom is returned.
-       If FALSE, then a matrix of combined weights is returned.}
  \item{col}{colours to use in the barplot of sample-specific weights (only used if \code{plot=TRUE}). If \code{NULL}, bars are plotted in grey.}
  \item{\dots}{other arguments are passed to \code{lmFit}.}
 }
@@ -40,10 +37,8 @@ This function is intended to process RNA-Seq data prior to linear modelling in l
 It combines observational-level weights from \code{voom} with sample-specific weights estimated using the \code{arrayWeights} function.
 }
 \value{
-If \code{replace.weights=TRUE}, then an \code{\link[limma:EList]{EList}} object is returned similar to that from \code{\link{voom}}.
-The \code{sample.weights} component contains the vector of sample quality factors and the \code{weights} component contains the matrix of weights consolidating both the voom precision weights and the sample quality factors.
-
-If \code{replace.weights=FALSE} then just the matrix of weights is returned.
+An \code{\link[limma:EList]{EList}} object similar to that from \code{\link{voom}}, but with one extra component: the \code{sample.weights} component containing the vector of sample quality factors.
+The \code{weights} component combines the sample weights and the usuall voom precision weights.
 }
 
 \author{Matthew Ritchie and Cynthia Liu}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-limma.git



More information about the debian-med-commit mailing list