[med-svn] [r-bioc-limma] 01/03: New upstream version 3.32.7+dfsg
Steffen Möller
moeller at moszumanska.debian.org
Mon Sep 25 21:14:31 UTC 2017
This is an automated email from the git hooks/post-receive script.
moeller pushed a commit to branch master
in repository r-bioc-limma.
commit 0ca9553cf1c457b600982d516c168bc07c5f33bd
Author: Steffen Moeller <moeller at debian.org>
Date: Mon Sep 25 23:03:59 2017 +0200
New upstream version 3.32.7+dfsg
---
DESCRIPTION | 6 +-
NAMESPACE | 8 +--
R/alias2Symbol.R | 30 ++++++++
R/barcodeplot.R | 22 +++---
R/classes.R | 8 +--
R/diffSplice.R | 8 ++-
R/fitFDist.R | 42 +++++++----
R/geneset-camera.R | 4 +-
R/geneset-cameraPR.R | 89 +++++++++++++++++++++++
R/geneset-fry.R | 2 +-
R/geneset-roast.R | 1 -
R/kegga.R | 171 +++++++++++++++++++++++---------------------
R/plotWithHighlights.R | 5 +-
R/plots-fit.R | 66 ++++++++++-------
R/read.idat.R | 35 ++++++---
R/toptable.R | 4 +-
R/wsva.R | 42 +++++++++++
build/vignette.rds | Bin 230 -> 231 bytes
inst/NEWS.Rd | 97 +++++++++++++++++++++++++
inst/doc/changelog.txt | 150 +++++++++++++++++++++++++++++++++++---
inst/doc/intro.pdf | Bin 43301 -> 43301 bytes
man/08Tests.Rd | 4 +-
man/alias2Symbol.Rd | 18 +++--
man/barcodeplot.Rd | 19 ++---
man/camera.Rd | 17 +++--
man/coolmap.Rd | 6 +-
man/detectionPValue.Rd | 4 +-
man/dim.Rd | 7 --
man/ebayes.Rd | 2 +-
man/geneSetTest.Rd | 10 +--
man/goana.Rd | 28 ++++++--
man/kooperberg.Rd | 4 +-
man/plotSA.Rd | 25 ++++---
man/plotWithHighlights.Rd | 2 +-
man/propTrueNull.Rd | 3 +-
man/propexpr.Rd | 4 +-
man/volcanoplot.Rd | 20 +++---
man/wsva.Rd | 40 +++++++++++
src/init.c | 32 +++++++++
tests/limma-Tests.R | 4 ++
tests/limma-Tests.Rout.save | 131 ++++++++++++++++++---------------
41 files changed, 874 insertions(+), 296 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index 2ce7e60..b7a20e1 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: limma
-Version: 3.30.8
-Date: 2017-01-12
+Version: 3.32.7
+Date: 2017-09-21
Title: Linear Models for Microarray Data
Description: Data analysis, linear models and differential expression for microarray data.
Author: Gordon Smyth [cre,aut], Yifang Hu [ctb], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Davis McCarthy [ctb], Di Wu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Aaron Lun [ctb], Natalie Thorne [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Yunshun Chen [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb]
@@ -21,4 +21,4 @@ biocViews: ExonArray, GeneExpression, Transcription,
MultipleComparison, Normalization, Preprocessing,
QualityControl
NeedsCompilation: yes
-Packaged: 2017-01-12 23:15:02 UTC; biocbuild
+Packaged: 2017-09-21 22:27:18 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
index c24c1b4..b886092 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,7 +20,7 @@ importFrom("stats", "approx", "approxfun", "as.dendrogram", "as.dist", "cmdscale
"pnorm", "ppoints", "predict", "pt", "qchisq", "qf",
"qnorm", "qt", "quantile", "residuals", "rnorm", "sd",
"uniroot", "var", "weighted.mean")
-importFrom("utils", "getFromNamespace", "read.table", "write.table")
+importFrom("utils", "getFromNamespace", "read.delim", "read.table", "write.table")
if( tools:::.OStype() == "windows" ) importFrom("utils", "winMenuAddItem")
S3method("[",MAList)
@@ -55,6 +55,7 @@ S3method(avearrays,default)
S3method(avearrays,MAList)
S3method(avearrays,EList)
S3method(camera,default)
+S3method(cameraPR,default)
S3method(cbind,MAList)
S3method(cbind,RGList)
S3method(cbind,EList)
@@ -83,11 +84,6 @@ S3method(goana,default)
S3method(goana,MArrayLM)
S3method(kegga,default)
S3method(kegga,MArrayLM)
-S3method(length,MAList)
-S3method(length,MArrayLM)
-S3method(length,RGList)
-S3method(length,EList)
-S3method(length,EListRaw)
S3method(merge,MAList)
S3method(merge,RGList)
S3method(merge,EList)
diff --git a/R/alias2Symbol.R b/R/alias2Symbol.R
index 6c55588..ceb82c5 100644
--- a/R/alias2Symbol.R
+++ b/R/alias2Symbol.R
@@ -88,3 +88,33 @@ alias2SymbolTable <- function(alias,species="Hs")
Symbol
}
+
+alias2SymbolUsingNCBI <- function(alias,gene.info.file,required.columns=c("GeneID","Symbol","description"))
+# Convert gene aliases to current symbols etc using a gene_info file downloaded from the NCBI
+# Gordon Smyth
+# Created 2 March 2017. Last modified 25 July 2017.
+{
+ alias <- as.character(alias)
+ if(is.data.frame(gene.info.file)) {
+ OK <- all(c("GeneID","Symbol","Synonyms") %in% names(gene.info.file))
+ if(!OK) stop("The gene.info.file must include columns GeneID, Symbol and Synonyms")
+ NCBI <- gene.info.file
+ NCBI$Symbol <- as.character(NCBI$Symbol)
+ } else {
+ gene.info.file <- as.character(gene.info.file)
+ NCBI <- read.delim(gene.info.file,comment.char="",quote="",colClasses="character")
+ }
+ m <- match(alias,NCBI$Symbol)
+ EntrezID <- NCBI$GeneID[m]
+ i <- which(is.na(EntrezID))
+ if(any(i)) {
+ S <- strsplit(NCBI$Synonyms,split="\\|")
+ N <- vapply(S,length,1)
+ Index <- rep(1:nrow(NCBI),N)
+ IS <- data.frame(Index=Index,Synonym=unlist(S),stringsAsFactors=FALSE)
+ m <- match(alias[i],IS$Synonym)
+ EntrezID[i] <- NCBI$GeneID[IS$Index[m]]
+ m <- match(EntrezID,NCBI$GeneID)
+ }
+ NCBI[m,required.columns,drop=FALSE]
+}
diff --git a/R/barcodeplot.R b/R/barcodeplot.R
index d90931d..308b5dc 100644
--- a/R/barcodeplot.R
+++ b/R/barcodeplot.R
@@ -1,9 +1,9 @@
## BARCODEPLOT.R
-barcodeplot <- function (statistics, index = NULL, index2 = NULL, gene.weights = NULL, weights.label = "Weight", labels = c("Up", "Down"), quantiles = c(-1,1)*sqrt(2), col.bars = NULL, alpha = 0.4, worm = TRUE, span.worm = 0.45,...)
+barcodeplot <- function (statistics, index = NULL, index2 = NULL, gene.weights = NULL, weights.label = "Weight", labels = c("Down", "Up"), quantiles = c(-1,1)*sqrt(2), col.bars = NULL, alpha = 0.4, worm = TRUE, span.worm = 0.45, xlab = "Statistic", ...)
# Barcode plot of one or two gene sets.
# Gordon Smyth, Di Wu and Yifang Hu
-# 20 October 2008. Last revised 26 October 2015.
+# 20 October 2008. Last revised 5 March 2017.
{
# Check statistics
if(!is.vector(statistics, mode = "numeric")) stop("statistics should be a numeric vector")
@@ -110,7 +110,7 @@ barcodeplot <- function (statistics, index = NULL, index2 = NULL, gene.weights =
}
# Sort statistics and indexes
- ostat <- order(statistics, na.last = TRUE, decreasing=TRUE)
+ ostat <- order(statistics, na.last = TRUE, decreasing=FALSE)
statistics <- statistics[ostat]
set1 <- set1[ostat,]
if(TWO) set2 <- set2[ostat,]
@@ -235,7 +235,7 @@ barcodeplot <- function (statistics, index = NULL, index2 = NULL, gene.weights =
# worm plot setting
ylim.worm <- ylim <- c(-1, 1)
ylab.worm <- ""
- xlab.worm <- "statistics"
+ xlab.worm <- xlab
if(!TWO) ylim.worm <- c(0, 1)
@@ -272,9 +272,9 @@ barcodeplot <- function (statistics, index = NULL, index2 = NULL, gene.weights =
rect.yb <- 0.5
rect.yt <- 1
- rect(npos + 0.5, rect.yb, n - nneg + 0.5, rect.yt, col = "lightgray", border = NA)
- if (npos) rect(0.5, rect.yb, npos + 0.5, rect.yt, col = "pink", border = NA)
- if (nneg) rect(n - nneg + 0.5, rect.yb, n + 0.5, rect.yt, col = "lightblue", border = NA)
+ rect(nneg + 0.5, rect.yb, n - npos + 0.5, rect.yt, col = "lightgray", border = NA)
+ if (nneg) rect(0.5, rect.yb, nneg + 0.5, rect.yt, col = "lightblue", border = NA)
+ if (npos) rect(n - npos + 0.5, rect.yb, n + 0.5, rect.yt, col = "pink", border = NA)
segments(r, barlim[2]/2, r, barlim[2], lwd = lwd, col = col.bars.alpha[1])
segments(r, barlim[2]/2-shift, r, barlim[2]/2*(1+len.up)-shift, lwd = lwd, col = col.bars[1])
@@ -286,9 +286,9 @@ barcodeplot <- function (statistics, index = NULL, index2 = NULL, gene.weights =
if(!TWO) rect.yb <- 0
rect.yt <- 0.5
- rect(npos + 0.5, rect.yb, n - nneg + 0.5, rect.yt, col = "lightgray", border = NA)
- if (npos) rect(0.5, rect.yb, npos + 0.5, rect.yt, col = "pink", border = NA)
- if (nneg) rect(n - nneg + 0.5, rect.yb, n + 0.5, rect.yt, col = "lightblue", border = NA)
+ rect(nneg + 0.5, rect.yb, n - npos + 0.5, rect.yt, col = "lightgray", border = NA)
+ if (nneg) rect(0.5, rect.yb, nneg + 0.5, rect.yt, col = "lightblue", border = NA)
+ if (npos) rect(n - npos + 0.5, rect.yb, n + 0.5, rect.yt, col = "pink", border = NA)
segments(r, barlim[1], r, barlim[2]/2, lwd = lwd, col = col.bars.alpha[1])
segments(r, barlim[2]/2+shift, r, barlim[2]/2*(1+len.up)+shift, lwd = lwd, col = col.bars[1])
@@ -308,7 +308,7 @@ barcodeplot <- function (statistics, index = NULL, index2 = NULL, gene.weights =
axis(side = 4, at = lab.at, padj = -3.8, cex.axis = 0.85, labels = labels[2], tick = FALSE)
# label statistics on x axis
- prob <- (10:0)/10
+ prob <- (0:10)/10
axis(at = seq(1,n,len=11), side = 1, cex.axis = 0.7, las = 2, labels = format(quantile(statistics, p = prob), digits = 1))
# create worm
diff --git a/R/classes.R b/R/classes.R
index 9f6b2db..3937a38 100755
--- a/R/classes.R
+++ b/R/classes.R
@@ -28,7 +28,7 @@ representation("list")
printHead <- function(x)
# Print leading 5 elements or rows of atomic object
# Gordon Smyth
-# May 2003. Last modified 14 April 2009.
+# May 2003. Last modified 4 March 2017.
{
if(is.atomic(x)) {
d <- dim(x)
@@ -62,7 +62,7 @@ printHead <- function(x)
TwoD={
n <- d[1]
if(n > 10) {
- print(x[1:5,])
+ print(x[1:5,,drop=FALSE])
cat(n-5,"more rows ...\n")
} else
print(x)
@@ -72,7 +72,7 @@ printHead <- function(x)
if(n > 10) {
dn <- dimnames(x)
dim(x) <- c(d[1],prod(d[-1]))
- x <- x[1:5,]
+ x <- x[1:5,,drop=FALSE]
dim(x) <- c(5,d[-1])
if(!is.null(dn[[1]])) dn[[1]] <- dn[[1]][1:5]
dimnames(x) <- dn
@@ -131,7 +131,7 @@ dim.RGList <- function(x) if(is.null(x$R)) c(0,0) else dim(as.matrix(x$R))
dim.MAList <- function(x) if(is.null(x$M)) c(0,0) else dim(as.matrix(x$M))
dim.EListRaw <- dim.EList <- function(x) if(is.null(x$E)) c(0,0) else dim(as.matrix(x$E))
dim.MArrayLM <- function(x) if(is.null(x$coefficients)) c(0,0) else dim(as.matrix(x$coefficients))
-length.RGList <- length.MAList <- length.EListRaw <- length.EList <- length.MArrayLM <- function(x) prod(dim(x))
+#length.RGList <- length.MAList <- length.EListRaw <- length.EList <- length.MArrayLM <- function(x) prod(dim(x))
dimnames.RGList <- function(x) dimnames(x$R)
dimnames.MAList <- function(x) dimnames(x$M)
diff --git a/R/diffSplice.R b/R/diffSplice.R
index 1dd77c5..95745f5 100644
--- a/R/diffSplice.R
+++ b/R/diffSplice.R
@@ -2,7 +2,7 @@ diffSplice <- function(fit,geneid,exonid=NULL,robust=FALSE,verbose=TRUE)
# Test for splicing variants between conditions
# using linear model fit of exon data.
# Gordon Smyth and Charity Law
-# Created 13 Dec 2013. Last modified 18 Jan 2016.
+# Created 13 Dec 2013. Last modified 20 April 2017.
{
# Make sure there is always an annotation frame
exon.genes <- fit$genes
@@ -28,6 +28,12 @@ diffSplice <- function(fit,geneid,exonid=NULL,robust=FALSE,verbose=TRUE)
}
}
+# Treat NA geneids as genes with one exon
+ if(anyNA(geneid)) {
+ isna <- which(is.na(geneid))
+ geneid[isna] <- paste0("NA",1:length(isna))
+ }
+
# Sort by geneid
if(is.null(exonid))
o <- order(geneid)
diff --git a/R/fitFDist.R b/R/fitFDist.R
index df9c43f..1ce8367 100644
--- a/R/fitFDist.R
+++ b/R/fitFDist.R
@@ -2,11 +2,11 @@ fitFDist <- function(x,df1,covariate=NULL)
# Moment estimation of the parameters of a scaled F-distribution.
# The numerator degrees of freedom are given, the denominator is to be estimated.
# Gordon Smyth and Belinda Phipson
-# 8 Sept 2002. Last revised 12 Jan 2017.
+# 8 Sept 2002. Last revised 25 Jan 2017.
{
# Check x
n <- length(x)
- if(n==0) return(list(scale=NA,df2=NA))
+ if(n <= 1L) return(list(scale=NA,df2=NA))
# Check df1
ok <- is.finite(df1) & df1 > 1e-15
@@ -28,16 +28,21 @@ fitFDist <- function(x,df1,covariate=NULL)
if(anyNA(covariate)) stop("NA covariate values not allowed")
isfin <- is.finite(covariate)
if(!all(isfin)) {
- if(!any(isfin))
- covariate <- sign(covariate)
- else {
+ if(any(isfin)) {
r <- range(covariate[isfin])
covariate[covariate == -Inf] <- r[1]-1
covariate[covariate == Inf] <- r[2]+1
+ } else {
+ covariate <- sign(covariate)
}
}
splinedf <- min(4L,length(unique(covariate)))
- if(splinedf < 2L) covariate <- NULL
+# If covariate takes only one value, recall with NULL covariate
+ if(splinedf < 2L) {
+ out <- Recall(x=x,df1=df1)
+ out$scale <- rep_len(out$scale,n)
+ return(out)
+ }
}
# Remove missing or infinite values and zero degrees of freedom
@@ -46,15 +51,19 @@ fitFDist <- function(x,df1,covariate=NULL)
notallok <- !all(ok)
if(notallok) {
x <- x[ok]
- if(length(df1)>1) df1 <- df1[ok]
+ if(length(df1)>1L) df1 <- df1[ok]
if(!is.null(covariate)) {
- covariate2 <- covariate[!ok]
+ covariate.notok <- covariate[!ok]
covariate <- covariate[ok]
}
}
-# Need enough observations to estimate variance around trend
- if(nok <= splinedf) return(list(scale=NA,df2=NA))
+# Check whether enough observations to estimate variance around trend
+ if(nok <= splinedf) {
+ s20 <- NA
+ if(!is.null(covariate)) s20 <- rep_len(s20,n)
+ return(list(scale=s20,df2=NA))
+ }
# Avoid exactly zero values
x <- pmax(x,0)
@@ -73,14 +82,14 @@ fitFDist <- function(x,df1,covariate=NULL)
if(is.null(covariate)) {
emean <- mean(e)
- evar <- sum((e-emean)^2)/(nok-1)
+ evar <- sum((e-emean)^2)/(nok-1L)
} else {
if(!requireNamespace("splines",quietly=TRUE)) stop("splines package required but is not available")
design <- try(splines::ns(covariate,df=splinedf,intercept=TRUE),silent=TRUE)
if(is(design,"try-error")) stop("Problem with covariate")
fit <- lm.fit(design,e)
if(notallok) {
- design2 <- predict(design,newx=covariate2)
+ design2 <- predict(design,newx=covariate.notok)
emean <- rep_len(0,n)
emean[ok] <- fit$fitted
emean[!ok] <- design2 %*% fit$coefficients
@@ -97,9 +106,12 @@ fitFDist <- function(x,df1,covariate=NULL)
s20 <- exp(emean+digamma(df2/2)-log(df2/2))
} else {
df2 <- Inf
-# Use simple pooled variance, which is MLE of the scale in this case.
-# Versions of limma before Jan 2017 returned the limiting value of the evar>0 estimate, which is larger.
- s20 <- mean(x)
+ if(is.null(covariate))
+# Use simple pooled variance, which is MLE of the scale in this case.
+# Versions of limma before Jan 2017 returned the limiting value of the evar>0 estimate, which is larger.
+ s20 <- mean(x)
+ else
+ s20 <- exp(emean)
}
list(scale=s20,df2=df2)
diff --git a/R/geneset-camera.R b/R/geneset-camera.R
index 4cbb49c..d8f69cc 100644
--- a/R/geneset-camera.R
+++ b/R/geneset-camera.R
@@ -24,7 +24,7 @@ camera <- function(y,...) UseMethod("camera")
camera.default <- function(y,index,design=NULL,contrast=ncol(design),weights=NULL,use.ranks=FALSE,allow.neg.cor=FALSE,inter.gene.cor=0.01,trend.var=FALSE,sort=TRUE,...)
# Competitive gene set test allowing for correlation between genes
# Gordon Smyth and Di Wu
-# Created 2007. Last modified 4 June 2016.
+# Created 2007. Last modified 30 July 2017.
{
# Issue warning if extra arguments found
dots <- names(list(...))
@@ -34,7 +34,7 @@ camera.default <- function(y,index,design=NULL,contrast=ncol(design),weights=NUL
y <- getEAWP(y)
G <- nrow(y$exprs)
n <- ncol(y$exprs)
- ID <- rownames(y)
+ ID <- rownames(y$exprs)
if(G<3) stop("Two few genes in dataset: need at least 3")
# Check index
diff --git a/R/geneset-cameraPR.R b/R/geneset-cameraPR.R
new file mode 100644
index 0000000..a854e74
--- /dev/null
+++ b/R/geneset-cameraPR.R
@@ -0,0 +1,89 @@
+cameraPR <- function(statistic,...) UseMethod("cameraPR")
+
+cameraPR.default <- function(statistic,index,use.ranks=FALSE,inter.gene.cor=0.01,sort=TRUE,...)
+# Competitive gene set test allowing for correlation between genes: pre-ranked statistic.
+# Gordon Smyth
+# Created 18 April 2017.
+{
+# Issue warning if extra arguments found
+ dots <- names(list(...))
+ if(length(dots)) warning("Extra arguments disregarded: ",sQuote(dots))
+
+# Check statistic
+ if(is.list(statistic)) stop("statistic should be a numeric vector")
+ storage.mode(statistic) <- "numeric"
+ if(anyNA(statistic)) stop("NA values for statistic not allowed")
+ G <- length(statistic)
+ ID <- names(statistic)
+ if(G<3) stop("Two few genes in dataset: need at least 3")
+
+# Check index
+ if(!is.list(index)) index <- list(set1=index)
+ nsets <- length(index)
+
+# Check inter.gene.cor
+ if(anyNA(inter.gene.cor)) stop("NA inter.gene.cor not allowed")
+ if(any(abs(inter.gene.cor) >= 1)) stop("inter.gene.cor too large or small")
+ if(length(inter.gene.cor) > 1L) {
+ if(length(inter.gene.cor) != nsets) stop("Length of inter.gene.cor doesn't match number of sets")
+ fixed.cor <- FALSE
+ } else {
+ fixed.cor <- TRUE
+ inter.gene.cor <- rep_len(inter.gene.cor,nsets)
+ }
+
+# Set df
+ if(use.ranks)
+ df.camera <- Inf
+ else
+ df.camera <- G-2L
+
+# Global statistics
+ meanStat <- mean(statistic)
+ varStat <- var(statistic)
+
+ NGenes <- Down <- Up <- rep_len(0,nsets)
+ for (i in 1:nsets) {
+ iset <- index[[i]]
+ if(is.character(iset)) iset <- which(ID %in% iset)
+ StatInSet <- statistic[iset]
+ m <- length(StatInSet)
+ NGenes[i] <- m
+ if(use.ranks) {
+ p.value <- rankSumTestWithCorrelation(iset,statistics=statistic,correlation=inter.gene.cor[i],df=df.camera)
+ Down[i] <- p.value[1]
+ Up[i] <- p.value[2]
+ } else {
+ vif <- 1+(m-1)*inter.gene.cor[i]
+ m2 <- G-m
+ meanStatInSet <- mean(StatInSet)
+ delta <- G/m2*(meanStatInSet-meanStat)
+ varStatPooled <- ( (G-1L)*varStat - delta^2*m*m2/G ) / (G-2L)
+ two.sample.t <- delta / sqrt( varStatPooled * (vif/m + 1/m2) )
+ Down[i] <- pt(two.sample.t,df=df.camera)
+ Up[i] <- pt(two.sample.t,df=df.camera,lower.tail=FALSE)
+ }
+ }
+ TwoSided <- 2*pmin(Down,Up)
+
+# Assemble into data.frame
+ D <- (Down < Up)
+ Direction <- rep_len("Up",nsets)
+ Direction[D] <- "Down"
+ if(fixed.cor)
+ tab <- data.frame(NGenes=NGenes,Direction=Direction,PValue=TwoSided,stringsAsFactors=FALSE)
+ else
+ tab <- data.frame(NGenes=NGenes,Correlation=inter.gene.cor,Direction=Direction,PValue=TwoSided,stringsAsFactors=FALSE)
+ rownames(tab) <- names(index)
+
+# Add FDR
+ if(nsets>1L) tab$FDR <- p.adjust(tab$PValue,method="BH")
+
+# Sort by p-value
+ if(sort && nsets>1L) {
+ o <- order(tab$PValue)
+ tab <- tab[o,]
+ }
+
+ tab
+}
diff --git a/R/geneset-fry.R b/R/geneset-fry.R
index 246d99b..ea82d86 100644
--- a/R/geneset-fry.R
+++ b/R/geneset-fry.R
@@ -38,7 +38,7 @@ fry.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),geneid=NU
# Estimate genewise sds robustly
OK <- requireNamespace("statmod",quietly=TRUE)
- if(!OK) stop("statmod package required but not installed")
+ if(!OK) stop("statmod package required but isn't installed (or can't be loaded)")
gq <- statmod::gauss.quad.prob(128,"uniform")
df.residual <- ncol(Effects)-1
Eu2max <- sum( (df.residual+1)*gq$nodes^df.residual*qchisq(gq$nodes,df=1)*gq$weights )
diff --git a/R/geneset-roast.R b/R/geneset-roast.R
index 45912b6..7c611c3 100644
--- a/R/geneset-roast.R
+++ b/R/geneset-roast.R
@@ -123,7 +123,6 @@ mroast.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),geneid
correlation=Dots$correlation)
ngenes <- nrow(Effects)
df.residual <- ncol(Effects)-1L
- geneid <- rownames(Effects)
# Empirical Bayes posterior variances
s2 <- rowMeans(Effects[,-1L,drop=FALSE]^2)
diff --git a/R/kegga.R b/R/kegga.R
index 691b908..5d04fca 100644
--- a/R/kegga.R
+++ b/R/kegga.R
@@ -74,10 +74,10 @@ kegga.MArrayLM <- function(de, coef = ncol(de), geneid = rownames(de), FDR = 0.0
kegga(de=DEGenes, universe = universe, ...)
}
-kegga.default <- function(de, universe=NULL, species="Hs", species.KEGG=NULL, convert=FALSE, gene.pathway=NULL, pathway.names = NULL, prior.prob=NULL, covariate=NULL, plot=FALSE, ...)
+kegga.default <- function(de, universe=NULL, restrict.universe=TRUE, species="Hs", species.KEGG=NULL, convert=FALSE, gene.pathway=NULL, pathway.names = NULL,prior.prob=NULL, covariate=NULL, plot=FALSE, ...)
# KEGG (Kyoto Encyclopedia of Genes and Genomes) pathway analysis of DE genes
# Gordon Smyth and Yifang Hu
-# Created 18 May 2015. Modified 16 Nov 2016.
+# Created 18 May 2015. Modified 14 Jul 2016.
{
# Ensure de is a list
if(!is.list(de)) de <- list(DE = de)
@@ -86,11 +86,10 @@ kegga.default <- function(de, universe=NULL, species="Hs", species.KEGG=NULL, co
# Stop if components of de are not vectors
if(!all(vapply(de,is.vector,TRUE))) stop("components of de should be vectors")
-# Ensure gene IDs are of character mode
+# Ensure de gene IDs are unique and of character mode
for (s in 1:nsets) de[[s]] <- unique(as.character(de[[s]]))
- if(!is.null(universe)) universe <- as.character(universe)
-# Ensure all gene sets have unique names
+# Ensure de components have unique names
names(de) <- trimWhiteSpace(names(de))
NAME <- names(de)
i <- which(NAME == "" | is.na(NAME))
@@ -104,68 +103,72 @@ kegga.default <- function(de, universe=NULL, species="Hs", species.KEGG=NULL, co
species.KEGG <- switch(species, "Ag"="aga", "At"="ath", "Bt"="bta", "Ce"="cel", "Cf"="cfa", "Dm"="dme", "Dr"="dre", "EcK12"="eco", "EcSakai"="ecs", "Gg"="gga", "Hs"="hsa", "Mm"="mmu", "Mmu"="mcc", "Pf"="pfa", "Pt"="ptr", "Rn"="rno", "Ss"="ssc", "Xl"="xla")
}
-# prior.prob and covariate must have same length as universe
- if(is.null(universe)) {
- if(!is.null(prior.prob)) stop("prior.prob ignored unless universe is specified.")
- if(!is.null(covariate)) stop("covariate ignored unless universe is specified.")
- prior.prob <- covariate <- NULL
- } else {
- lu <- length(universe)
- if(!lu) stop("No genes in universe")
- if(!is.null(prior.prob) && length(prior.prob)!=lu) stop("universe and prior.prob must have same length")
- if(!is.null(covariate) && length(covariate)!=lu) stop("universe and covariate must have same length")
- }
-
-# Fit trend in DE with respect to the covariate, combining all de lists
- if(!is.null(covariate)) {
- if(!is.null(prior.prob)) warning("prior.prob being recomputed from covariate")
- covariate <- as.numeric(covariate)
- isDE <- as.numeric(universe %in% unlist(de))
- o <- order(covariate)
- prior.prob <- covariate
- span <- approx(x=c(20,200),y=c(1,0.5),xout=sum(isDE),rule=2)$y
- prior.prob[o] <- tricubeMovingAverage(isDE[o],span=span)
- if(plot) barcodeplot(covariate, index=as.logical(isDE), worm=TRUE, span.worm=span, main="DE status vs covariate")
- }
-
# Get pathway annotation
if(is.null(gene.pathway))
- EG.KEGG <- getGeneKEGGLinks(species.KEGG, convert=convert)
+ GeneID.PathID <- getGeneKEGGLinks(species.KEGG, convert=convert)
else {
- EG.KEGG <- gene.pathway
- d <- dim(EG.KEGG)
+ GeneID.PathID <- gene.pathway
+ d <- dim(GeneID.PathID)
if(is.null(d)) stop("gene.pathway must be data.frame or matrix")
if(d[2] < 2) stop("gene.pathway must have at least 2 columns")
- isna <- rowSums(is.na(EG.KEGG[,1:2])) > 0.5
- EG.KEGG <- EG.KEGG[!isna,]
+ isna <- rowSums(is.na(GeneID.PathID[,1:2])) > 0.5
+ GeneID.PathID <- GeneID.PathID[!isna,]
+
+# Remove repeated rows
+ ID.ID <- paste(GeneID.PathID[,1],GeneID.PathID[,2],sep=".")
+ if(anyDuplicated(ID.ID)) {
+ d <- duplicated(ID.ID)
+ GeneID.PathID <- GeneID.PathID[!d,]
+ }
}
-# Make sure that universe matches annotated genes
+# Get pathway names
+ if(is.null(pathway.names))
+ PathID.PathName <- getKEGGPathwayNames(species.KEGG, remove.qualifier=TRUE)
+ else {
+ PathID.PathName <- pathway.names
+ d <- dim(PathID.PathName)
+ if(is.null(d)) stop("pathway.names must be data.frame or matrix")
+ if(d[2] < 2) stop("pathway.names must have at least 2 columns")
+ isna <- rowSums(is.na(PathID.PathName[,1:2])) > 0.5
+ PathID.PathName <- PathID.PathName[!isna,]
+ }
+
+# Universe defaults to all annotated genes
+# prior.prob and covariate must have same length as universe
+# Ensure universe unique
if(is.null(universe)) {
- universe <- as.character(unique(EG.KEGG[,1]))
+ universe <- unique(GeneID.PathID[,1])
+ prior.prob <- covariate <- NULL
} else {
-
-# Consolidate replicated entries in universe, averaging corresponding prior.probs
universe <- as.character(universe)
- dup <- duplicated(universe)
+ lu <- length(universe)
+ if(!lu) stop("No genes in universe")
+ if(!is.null(prior.prob) && length(prior.prob)!=lu) stop("universe and prior.prob must have same length")
+ if(!is.null(covariate) && length(covariate)!=lu) stop("universe and covariate must have same length")
+ if(restrict.universe) {
+ i <- universe %in% GeneID.PathID[,1]
+ universe <- universe[i]
+ if(!is.null(prior.prob)) prior.prob <- prior.prob[i]
+ if(!is.null(covariate)) covariate <- covariate[i]
+ }
+ }
+
+# Consolidate any replicated entries in universe, averaging corresponding prior.probs
+ if(anyDuplicated(universe)) {
+ d <- duplicated(universe)
+ if(!is.null(covariate)) {
+ covariate <- rowsum(covariate,group=universe,reorder=FALSE)
+ n <- rowsum(rep_len(1L,length(universe)),group=universe,reorder=FALSE)
+ covariate <- covariate/n
+ }
if(!is.null(prior.prob)) {
prior.prob <- rowsum(prior.prob,group=universe,reorder=FALSE)
n <- rowsum(rep_len(1L,length(universe)),group=universe,reorder=FALSE)
prior.prob <- prior.prob/n
}
- universe <- universe[!dup]
-
-# Restrict universe to genes with annotation
- m <- match(EG.KEGG[,1], universe)
- InUni <- !is.na(m)
- m <- m[InUni]
- universe <- unique(universe[m])
- if(!is.null(prior.prob)) prior.prob <- prior.prob[m]
-
-# Restrict annotation to genes in universe
- EG.KEGG <- EG.KEGG[InUni,]
+ universe <- universe[!d]
}
-
NGenes <- length(universe)
if(NGenes<1L) stop("No annotated genes found in universe")
@@ -176,33 +179,55 @@ kegga.default <- function(de, universe=NULL, species="Hs", species.KEGG=NULL, co
nde[s] <- length(de[[s]])
}
+# Restrict pathways to universe
+ i <- GeneID.PathID[,1] %in% universe
+ if(sum(i)==0L) stop("Pathways do not overlap with universe")
+ GeneID.PathID <- GeneID.PathID[i,]
+
+# Get prior.prob trend in DE with respect to the covariate, combining all de lists
+ if(!is.null(covariate)) {
+ if(!is.null(prior.prob)) message("prior.prob being recomputed from covariate")
+ covariate <- as.numeric(covariate)
+ isDE <- (universe %in% unlist(de))
+ o <- order(covariate)
+ prior.prob <- covariate
+ span <- approx(x=c(20,200),y=c(1,0.5),xout=sum(isDE),rule=2)$y
+ prior.prob[o] <- tricubeMovingAverage(isDE[o],span=span)
+ if(plot) barcodeplot(covariate, index=isDE, worm=TRUE, span.worm=span, main="DE status vs covariate")
+ }
+
# Overlap pathways with DE genes
+# Create incidence matrix (X) of gene.pathway by DE sets
if(is.null(prior.prob)) {
- X <- matrix(1,nrow(EG.KEGG),nsets+1)
+ X <- matrix(1,nrow(GeneID.PathID),nsets+1)
colnames(X) <- c("N",names(de))
} else {
- X <- matrix(1,nrow(EG.KEGG),nsets+2)
- X[,nsets+2] <- prior.prob
+ names(prior.prob) <- universe
+ X <- matrix(1,nrow(GeneID.PathID),nsets+2)
+ X[,nsets+2] <- prior.prob[GeneID.PathID[,1]]
colnames(X) <- c("N",names(de),"PP")
}
- for (s in 1:nsets) X[,s+1] <- (EG.KEGG[,1] %in% de[[s]])
+ for (s in 1:nsets) X[,s+1] <- (GeneID.PathID[,1] %in% de[[s]])
# Count #genes and #DE genes and sum prior.prob for each pathway
- S <- rowsum(X, group=EG.KEGG[,2], reorder=FALSE)
+ S <- rowsum(X, group=GeneID.PathID[,2], reorder=FALSE)
# Overlap tests
PValue <- matrix(0,nrow=nrow(S),ncol=nsets)
- if(length(prior.prob)) {
+ colnames(PValue) <- paste("P", names(de), sep=".")
+ if(!is.null(prior.prob)) {
-# Calculate average prior prob for each set
+# Probability ratio for each pathway vs rest of universe
SumPP <- sum(prior.prob)
- AvePP.set <- S[,"PP"]/S[,"N"]
- W <- AvePP.set*(NGenes-S[,"N"])/(SumPP-S[,"N"]*AvePP.set)
+ M2 <- NGenes-S[,"N"]
+ Odds <- S[,"PP"] / (SumPP-S[,"PP"]) * M2 / S[,"N"]
# Wallenius' noncentral hypergeometric test
- if(!requireNamespace("BiasedUrn",quietly=TRUE)) stop("BiasedUrn package required but is not available")
- for(j in 1:nsets) for(i in 1:nrow(S))
- PValue[i,j] <- BiasedUrn::pWNCHypergeo(S[i,1+j], S[i,"N"], NGenes-S[i,"N"], nde[j], W[i], lower.tail=FALSE) + BiasedUrn::dWNCHypergeo(S[i,1+j], S[i,"N"], NGenes-S[i,"N"], nde[j], W[i])
+# Note that dWNCHypergeo() is more accurate than pWNCHypergeo(), hence use of 2-terms
+ if(!requireNamespace("BiasedUrn",quietly=TRUE)) stop("BiasedUrn package required but is not installed (or can't be loaded)")
+ for(j in seq_len(nsets)) for(i in seq_len(nrow(S)))
+ PValue[i,j] <- BiasedUrn::pWNCHypergeo(S[i,1L+j], S[i,"N"], M2[i], nde[j], Odds[i], lower.tail=FALSE) +
+ BiasedUrn::dWNCHypergeo(S[i,1L+j], S[i,"N"], M2[i], nde[j], Odds[i])
# Remove sum of prob column, not needed for output
S <- S[,-ncol(S)]
@@ -210,21 +235,8 @@ kegga.default <- function(de, universe=NULL, species="Hs", species.KEGG=NULL, co
} else {
# Fisher's exact test
- for(j in 1:nsets)
- PValue[,j] <- phyper(q=S[,1+j]-0.5,m=nde[j],n=NGenes-nde[j], k=S[,"N"],lower.tail=FALSE)
-
- }
-
-# Get list of pathway names
- if(is.null(pathway.names))
- PathID.PathName <- getKEGGPathwayNames(species.KEGG, remove.qualifier=TRUE)
- else {
- PathID.PathName <- pathway.names
- d <- dim(PathID.PathName)
- if(is.null(d)) stop("pathway.names must be data.frame or matrix")
- if(d[2] < 2) stop("pathway.names must have at least 2 columns")
- isna <- rowSums(is.na(PathID.PathName[,1:2])) > 0.5
- PathID.PathName <- PathID.PathName[!isna,]
+ for(j in seq_len(nsets))
+ PValue[,j] <- phyper(S[,1L+j]-0.5, nde[j], NGenes-nde[j], S[,"N"], lower.tail=FALSE)
}
@@ -234,9 +246,6 @@ kegga.default <- function(de, universe=NULL, species="Hs", species.KEGG=NULL, co
Results <- data.frame(Pathway = PathID.PathName[m,2], S, PValue, stringsAsFactors=FALSE)
rownames(Results) <- g
-# Name p-value columns
- colnames(Results)[2+nsets+(1L:nsets)] <- paste0("P.", names(de))
-
Results
}
diff --git a/R/plotWithHighlights.R b/R/plotWithHighlights.R
index 9ad8a96..97013e5 100644
--- a/R/plotWithHighlights.R
+++ b/R/plotWithHighlights.R
@@ -1,11 +1,11 @@
-plotWithHighlights <- function(x, y, status=NULL, values=NULL, hl.pch=16, hl.col=NULL, hl.cex=1, legend="topleft", bg.pch=16, bg.col="black", bg.cex=0.3, pch=NULL, col=NULL, cex=NULL, ...)
+plotWithHighlights <- function(x, y, status=NULL, values=NULL, hl.pch=16, hl.col=NULL, hl.cex=1, legend="topright", bg.pch=16, bg.col="black", bg.cex=0.3, pch=NULL, col=NULL, cex=NULL, ...)
# Scatterplot with color coding for special points
# Replaces the earlier function .plotMAxy, which in turn was based on the original plotMA
# created by Gordon Smyth 7 April 2003 and modified by James Wettenhall 27 June 2003.
# Gordon Smyth
-# Last modified 20 May 2015.
+# Last modified 27 May 2017.
{
# If no status information, just plot all points normally
if(is.null(status) || all(is.na(status))) {
@@ -13,6 +13,7 @@ plotWithHighlights <- function(x, y, status=NULL, values=NULL, hl.pch=16, hl.col
return(invisible())
}
# From here, status is not NULL and not all NA
+ if(is.factor(status)) status <- as.character(status)
# Check values
if(is.null(values)) {
diff --git a/R/plots-fit.R b/R/plots-fit.R
index 6ff7564..04e5e83 100755
--- a/R/plots-fit.R
+++ b/R/plots-fit.R
@@ -1,14 +1,23 @@
# PRESENTATION PLOTS FROM FITTED MODEL OBJECTS
-volcanoplot <- function(fit,coef=1,highlight=0,names=fit$genes$ID,xlab="Log Fold Change",ylab="Log Odds",pch=16,cex=0.35, ...)
-# Volcano plot of log-fold-change and B-statistic
+volcanoplot <- function(fit,coef=1,style="p-value",highlight=0,names=fit$genes$ID,xlab="Log2 Fold Change",ylab=NULL,pch=16,cex=0.35, ...)
+# Volcano plot of log-fold-change vs significance (p-value or B-statistic)
# Gordon Smyth
-# 27 Oct 2006. Last modified 26 Sep 2007.
+# Created 27 Oct 2006. Last modified 21 Sep 2017.
{
if(!is(fit,"MArrayLM")) stop("fit must be an MArrayLM")
- if(is.null(fit$lods)) stop("No B-statistics found, perhaps eBayes() not yet run")
x <- as.matrix(fit$coef)[,coef]
- y <- as.matrix(fit$lods)[,coef]
+ style <- match.arg(tolower(style), c("p-value","b-statistic"))
+ if(style=="p-value") {
+ if(is.null(fit$p.value)) stop("No p-values found in linear model fit object")
+ y <- as.matrix(fit$p.value)[,coef]
+ y <- -log10(y)
+ if(is.null(ylab)) ylab="-log10(P-value)"
+ } else {
+ if(is.null(fit$lods)) stop("No B-statistics found in linear model fit object")
+ y <- as.matrix(fit$lods)[,coef]
+ if(is.null(ylab)) ylab="Log Odds of Differential Expression"
+ }
plot(x,y,xlab=xlab,ylab=ylab,pch=pch,cex=cex,...)
if(highlight>0) {
if(is.null(names)) names <- 1:length(x)
@@ -23,7 +32,7 @@ volcanoplot <- function(fit,coef=1,highlight=0,names=fit$genes$ID,xlab="Log Fold
heatDiagram <- function(results,coef,primary=1,names=NULL,treatments=colnames(coef),limit=NULL,orientation="landscape",low="green",high="red",cex=1,mar=NULL,ncolors=123,...) {
# Heat diagram to display fold changes of genes under different conditions
# Gordon Smyth
-# 27 Oct 2002. Last revised 11 Oct 2004.
+# Created 27 Oct 2002. Last revised 11 Oct 2004.
# Check input
results <- as.matrix(results)
@@ -181,43 +190,48 @@ heatdiagram <- function(stat,coef,primary=1,names=NULL,treatments=colnames(stat)
invisible(out)
}
-plotSA <- function(fit, xlab="Average log-expression", ylab="log2(sigma)", zero.weights=FALSE, pch=16, cex=0.3, ...)
+plotSA <- function(fit, xlab="Average log-expression", ylab="sqrt(sigma)", zero.weights=FALSE, pch=16, cex=0.3, col=c("black","red"),...)
# Plot log-residual variance vs intensity
# Gordon Smyth
-# Created 14 Jan 2009. Last modified 28 November 2016.
+# Created 14 Jan 2009. Last modified 12 April 2017.
{
if(!is(fit,"MArrayLM")) stop("fit must be an MArrayLM object")
x <- fit$Amean
- y <- log2(fit$sigma)
+ y <- sqrt(fit$sigma)
if(!(is.null(fit$weights) || zero.weights)) {
allzero <- rowSums(fit$weights>0,na.rm=TRUE) == 0
y[allzero] <- NA
}
+ colv <- rep_len(col[1],nrow(fit))
-# Check for outlier variances as indicated by low prior.df
+# Check for outlier variances
if(length(fit$df.prior)>1L) {
- Outlier <- fit$df.prior < 0.75*max(fit$df.prior)
- pch <- rep_len(pch,nrow(fit))
- pch[Outlier] <- 1
- cex <- rep_len(cex,nrow(fit))
- cex[Outlier] <- 1
+ df2 <- max(fit$df.prior)
+ s2 <- fit$sigma^2 / fit$s2.prior
+ pdn <- pf(s2, df1=fit$df.residual, df2=df2)
+ pup <- pf(s2, df1=fit$df.residual, df2=df2, lower.tail=FALSE)
+ FDR <- p.adjust(2*pmin(pdn,pup),method="BH")
+ colv[FDR <= 0.5] <- col[2]
}
- plot(x,y,xlab=xlab,ylab=ylab,pch=pch,cex=cex,...)
- if(anyNA(x) || anyNA(y)) {
- ok <- !(is.na(x) | is.na(y))
- lines(lowess(x[ok],y[ok],f=0.4),col="red")
- } else {
- lines(lowess(x,y,f=0.4),col="red")
- }
+ plot(x,y,xlab=xlab,ylab=ylab,pch=pch,cex=cex,col=colv,...)
+# if(anyNA(x) || anyNA(y)) {
+# ok <- !(is.na(x) | is.na(y))
+# lines(lowess(x[ok],y[ok],f=0.4),col="red")
+# } else {
+# lines(lowess(x,y,f=0.4),col="red")
+# }
if(!is.null(fit$s2.prior)) {
- if(length(fit$s2.prior)==1) {
- abline(h=log2(fit$s2.prior)/2,col="blue")
+ if(length(fit$s2.prior)==1L) {
+ abline(h=sqrt(sqrt(fit$s2.prior)),col="blue")
} else {
o <- order(x)
- lines(x[o],log2(fit$s2.prior[o])/2,col="blue")
- legend("topright",legend=c("lowess","prior"),col=c("red","blue"),lty=1)
+ lines(x[o],sqrt(sqrt(fit$s2.prior[o])),col="blue")
+# legend("topright",legend=c("lowess","prior"),col=c("red","blue"),lty=1)
}
}
+
+ if(length(fit$df.prior)>1L) legend("topright",legend=c("Normal","Outlier"),col=col,pch=pch)
+
invisible()
}
diff --git a/R/read.idat.R b/R/read.idat.R
index 4c5f607..065afa3 100644
--- a/R/read.idat.R
+++ b/R/read.idat.R
@@ -1,7 +1,7 @@
read.idat <- function(idatfiles, bgxfile, dateinfo=FALSE, annotation="Symbol", tolerance=0L, verbose=TRUE)
# Read GenomeStudio IDAT files for Illumina gene expression BeadChips
# Matt Ritchie and Gordon Smyth
-# Created 30 September 2013. Last modified 14 June 2016.
+# Created 30 September 2013. Last modified 9 May 2017.
{
# Need illuminaio package
OK <- requireNamespace("illuminaio",quietly=TRUE)
@@ -58,7 +58,11 @@ read.idat <- function(idatfiles, bgxfile, dateinfo=FALSE, annotation="Symbol", t
if(verbose) cat("\t", idatfiles[j], "... ")
tmp <- illuminaio::readIDAT(idatfiles[j])
if(verbose) cat("Done\n")
- ind <- match(elist$genes$Array_Address_Id, tmp$Quants$IllumicodeBinData)
+ if("IllumicodeBinData" %in% colnames(tmp$Quants)) {
+ ind <- match(elist$genes$Array_Address_Id, tmp$Quants$IllumicodeBinData)
+ } else {
+ ind <- match(elist$genes$Array_Address_Id, rownames(tmp$Quants))
+ }
# Check for whether values are available for all probes
if(anyNA(ind)) {
@@ -68,15 +72,26 @@ read.idat <- function(idatfiles, bgxfile, dateinfo=FALSE, annotation="Symbol", t
" missing - please check that you have the right files, or consider setting \'tolerance\'=", sum(is.na(ind)))
i <- which(!is.na(ind))
ind <- ind[i]
- elist$E[i,j] <- tmp$Quants$MeanBinData[ind]
- elist$other$STDEV[i,j] <- tmp$Quants$DevBinData[ind]
- elist$other$NumBeads[i,j] <- tmp$Quants$NumGoodBeadsBinData[ind]
- } else {
- elist$E[,j] <- tmp$Quants$MeanBinData[ind]
- elist$other$STDEV[,j] <- tmp$Quants$DevBinData[ind]
- elist$other$NumBeads[,j] <- tmp$Quants$NumGoodBeadsBinData[ind]
+ if("MeanBinData" %in% colnames(tmp$Quants) && "DevBinData" %in% colnames(tmp$Quants) && "NumGoodBeadsBinData" %in% colnames(tmp$Quants)) {
+ elist$E[i,j] <- tmp$Quants$MeanBinData[ind]
+ elist$other$STDEV[i,j] <- tmp$Quants$DevBinData[ind]
+ elist$other$NumBeads[i,j] <- tmp$Quants$NumGoodBeadsBinData[ind]
+ } else { # if idat is in SNP format, use different headings
+ elist$E[i,j] <- tmp$Quants[ind,"Mean"]
+ elist$other$STDEV[i,j] <- tmp$Quants[ind,"SD"]
+ elist$other$NumBeads[i,j] <- tmp$Quants[ind,"NBeads"]
+ }
+ } else { # When no data is missing...
+ if("MeanBinData" %in% colnames(tmp$Quants) && "DevBinData" %in% colnames(tmp$Quants) && "NumGoodBeadsBinData" %in% colnames(tmp$Quants)) {
+ elist$E[,j] <- tmp$Quants$MeanBinData[ind]
+ elist$other$STDEV[,j] <- tmp$Quants$DevBinData[ind]
+ elist$other$NumBeads[,j] <- tmp$Quants$NumGoodBeadsBinData[ind]
+ } else { # if idat is in SNP format, use different headings
+ elist$E[,j] <- tmp$Quants[ind,"Mean"]
+ elist$other$STDEV[,j] <- tmp$Quants[ind,"SD"]
+ elist$other$NumBeads[,j] <- tmp$Quants[ind,"NBeads"]
+ }
}
-
if(dateinfo) {
elist$targets$DecodeInfo[j] = paste(tmp$RunInfo[1,], collapse=" ")
elist$targets$ScanInfo[j] = paste(tmp$RunInfo[2,], collapse=" ")
diff --git a/R/toptable.R b/R/toptable.R
index 611c58b..47e8904 100644
--- a/R/toptable.R
+++ b/R/toptable.R
@@ -135,7 +135,7 @@ topTableF <- function(fit,number=10,genelist=fit$genes,adjust.method="BH",sort.b
toptable <- function(fit,coef=1,number=10,genelist=NULL,A=NULL,eb=NULL,adjust.method="BH",sort.by="B",resort.by=NULL,p.value=1,lfc=0,confint=FALSE,...)
# Summary table of top genes
# Gordon Smyth
-# 21 Nov 2002. Last revised 7 Dec 2013.
+# 21 Nov 2002. Last revised 2 March 2017.
{
# Check fit
fit$coefficients <- as.matrix(fit$coefficients)
@@ -216,7 +216,7 @@ toptable <- function(fit,coef=1,number=10,genelist=NULL,A=NULL,eb=NULL,adjust.me
# Thin out fit by p.value and lfc thresholds
if(p.value < 1 | lfc > 0) {
- sig <- (adj.P.Value < p.value) & (abs(M) > lfc)
+ sig <- (adj.P.Value <= p.value) & (abs(M) >= lfc)
if(any(is.na(sig))) sig[is.na(sig)] <- FALSE
if(all(!sig)) return(data.frame())
genelist <- genelist[sig,,drop=FALSE]
diff --git a/R/wsva.R b/R/wsva.R
new file mode 100644
index 0000000..d83237a
--- /dev/null
+++ b/R/wsva.R
@@ -0,0 +1,42 @@
+wsva <- function(y, design, n.sv=1L, weight.by.sd=FALSE, plot=FALSE, ...)
+# Weighted surrogate variable analysis
+# Yifang Hu and Gordon Smyth
+# Created 26 Nov 2015. Last modified 2 Mar 2017.
+{
+ y <- as.matrix(y)
+ ngenes <- nrow(y)
+ narrays <- ncol(y)
+ p <- ncol(design)
+ d <- narrays-p
+ n.sv <- max(n.sv,1L)
+ n.sv <- min(n.sv, d)
+ if(n.sv <= 0L) stop("No residual df")
+
+ if(weight.by.sd) {
+ if(plot) message("Plot not available with weight.by.sd=TRUE")
+ for(i in 1L:n.sv) {
+ Effects <- .lmEffects(y, design, ...)[,-1L]
+ s <- sqrt(rowMeans(Effects^2))
+ Effects <- s * Effects
+ u <- drop(svd(Effects,nu=1L,nv=0L)$u)
+ u <- u*s
+ sv <- colSums(u*y)
+ design <- cbind(design, sv)
+ }
+ SV <- t(design[,-(1:p),drop=FALSE])
+ } else {
+ Effects <- .lmEffects(y, design, ...)[,-1L]
+ SVD <- svd(Effects,nu=n.sv,nv=0L)
+ SV <- crossprod(SVD$u,y)
+ if(plot) {
+ lambda <- SVD$d^2
+ lambda <- lambda/sum(lambda)
+ plot(lambda,xlab="Surrogate variable number",ylab="Proportion variance explained")
+ }
+ }
+
+ A <- rowMeans(SV^2)
+ SV <- t( SV / sqrt(A) )
+ colnames(SV) <- paste0("SV",1L:n.sv)
+ SV
+}
diff --git a/build/vignette.rds b/build/vignette.rds
index 1b35650..18bda60 100644
Binary files a/build/vignette.rds and b/build/vignette.rds differ
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index 144507b..a73fe0d 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -3,6 +3,103 @@
\encoding{UTF-8}
+\section{Version 3.32.0}{\itemize{
+
+\item
+New function cameraPR(), which implemented a pre-ranked version
+of camera().
+
+\item
+New function alias2SymbolUsingNCBI(), which converts gene aliases
+ or synonyms into official gene symbols using an NCBI gene-info
+ file.
+
+\item
+New function wsva() for weighted surrogate variable analysis.
+
+\item
+New function coolmap(). This is essentially a wrapper for the
+ heatmap.2() function in the ggplots package, but with sensible
+ default settings for genomic log-expression data.
+
+\item
+decideTests() is now an S3 generic function with a default method
+ and a method for MArrayLM objects. decideTests() now selects all
+ null hypotheses as rejected if p.value=1.
+
+\item
+length() methods removed all limma data objects (objects of class
+ EList, EListRaw, RGList, MAList or MArrayLM). length(x) will now
+ return the number of list components in the object rather than the
+ number of elements in the expression matrix.
+
+\item
+New argument 'style' for volcanoplot(). The default is now to use
+ -log10(p-value) for the y-axis instead of the B-statistic.
+
+\item
+New argument 'xlab' for barcodeplot().
+
+\item
+New argument 'col' for plotSA(). plotSA() now longer plots a
+ lowess curve trend, but if appropriate both high and low outlier
+ variances are highlighted in a different color.
+
+\item
+Argument 'replace.weights' removed from voomWithQualityWeights().
+ The function now always produces an EList, similar to voom(). The
+ default behavior of the function is unchanged.
+
+\item
+barcodeplot() now ranks statistics from low to high, instead of
+ from high to low, following the usual style of axes in R plots.
+ This means that left and right are now interchanged.
+
+\item
+plotSA() now plots quarter-root variances instead of
+ log2(variances).
+
+\item
+Default for 'legend' argument of plotWithHighlights() changed from
+ "topleft" to "topright".
+
+\item
+fitFDist() now estimates the scale by mean(x) when df2 is estimated
+ to be Inf. This will make the results from eBayes() less
+ conservative than before when df.prior=Inf.
+
+\item
+plotSA() now indicates, by way of an open plotting symbol, any
+ points that have low robust df.prior values.
+
+\item
+Clearer error message from fitFDistRobustly() when some variances
+ are zero.
+
+\item
+C functions are now registered using R_registerRoutines.
+
+\item
+Bug fix for contrastAsCoef() when there is more than one contrast.
+ Previously the coefficients for the transformed design matrix were
+ correct only for the first contrast.
+
+\item
+Bug fix for kegga() when the universe is explicitly specified.
+
+\item
+Bug fix for fitFDistRobustly() when there is an extreme outlier.
+ Previously floating point underflow for the outlier p-value could
+ cause an error.
+
+\item
+Bug fix to mroast(), which was ignoring 'geneid' argument.
+
+\item
+Bug fix to printHead() for arrays with 1 column.
+}}
+
+
\section{Version 3.30.0}{\itemize{
\item
diff --git a/inst/doc/changelog.txt b/inst/doc/changelog.txt
index 8ac66bf..9296fbd 100755
--- a/inst/doc/changelog.txt
+++ b/inst/doc/changelog.txt
@@ -1,4 +1,130 @@
-12 Jan 2016: limma 3.30.8
+21 Sep 2017: limma 3.32.7
+
+- volcanoplot() didn't work with MArrayLM objects produced by
+ treat(). Now fixed.
+
+10 Sep 2017: limma 3.32.6
+
+- read.idat() can now handle idat files in SNP format.
+
+- Bug fix to cameraPR() when 'index' contains character row names.
+
+30 Jul 2017: limma 3.32.5
+
+- Bug fix to camera() when 'index' contains character row names.
+
+25 Jul 2017: limma 3.32.4
+
+- alias2SymbolUsingNCBI() now always produces a data.frame.
+ Previously a single-column data.frame was returned as a vector.
+
+- Default value for 'style' in volcanoplot() changed to "p-value"
+ instead of "p". This doesn't change the behavior of the function.
+
+15 Jul 2017: limma 3.32.3
+
+- New argument 'restrict.universe' for the default kegga() method.
+ The default is now not to the restrict the universe to genes with
+ KEGG annotation.
+
+- Bug fix for kegga() when trend=TRUE or a prior.prob or covariate is
+ specified. Previously the Down p-values were substantially too small
+ and the Up p-values were too large.
+
+- plotWithHighlights() checks whether 'status' is a factor and
+ converts to character.
+
+ 1 May 2017: limma 3.32.2
+
+- The gene.info.file argument of alias2SymbolUsingNCBI() will now
+ optionally accept a data.frame instead of a file name.
+
+25 Apr 2017: limma 3.32.1
+
+- diffSplice() now treats any NA geneid as a separate gene with one
+ exon. Previously all NA geneids were treated as the same gene.
+
+25 Apr 2017: limma 3.32.0 (Bioconductor 3.5 Release Branch)
+
+22 Apr 2017: limma 3.31.22
+
+- Minor edits to coolmap.Rd.
+
+- New function cameraPR() giving a pre-ranked version of camera().
+
+12 Apr 2017: limma 3.31.21
+
+- plotSA() now plots quarter-root variances instead of
+ log2(variances).
+
+ 7 Apr 2017: limma 3.31.20
+
+- New argument 'style' for volcanoplot(). The default is now to use
+ -log10(p-value) for the y-axis instead of the B-statistic.
+
+- NEWS.Rd updated to contain changes since version 3.30.0.
+
+ 5 Mar 2017: limma 3.31.19
+
+- New argument 'xlab' for barcodeplot().
+
+- length() methods removed all limma data objects (objects of class
+ EList, EListRaw, RGList, MAList or MArrayLM). length(x) will now
+ return the number of list components in the object rather than the
+ number of elements in the expression matrix.
+
+ 4 Mar 2017: limma 3.31.18
+
+- Bug fix to mroast(), which was ignoring 'geneid' argument.
+
+ 4 Mar 2017: limma 3.31.17
+
+- Bug fix to printHead() for arrays with 1 column.
+
+ 2 Mar 2017: limma 3.31.16
+
+- New function alias2SymbolUsingNCBI(), which converts gene aliases
+ or synonyms into official gene symbols using an NCBI gene-info
+ file.
+
+27 Feb 2017: limma 3.31.15
+
+- C functions are now registered using R_registerRoutines.
+
+- New 'plot' argument for wsva(). Variables from wsva() are now
+ normalized in size.
+
+19 Feb 2017: limma 3.31.14
+
+- new function wsva() for weighted surrogate variable analysis.
+
+08 Feb 2017: limma 3.31.13
+
+- barcodeplot() now ranks statistics from low to high, instead of
+ from high to low, following the usual style of axes in R plots.
+ This means that left and right are now interchanged.
+
+08 Feb 2017: limma 3.31.12
+
+- Default for 'legend' argument of plotWithHighlights() changed from
+ "topleft" to "topright".
+
+02 Feb 2017: limma 3.31.11
+
+- New argument 'col' for plotSA(). plotSA() now longer plots a
+ lowess curve trend, but if appropriate both high and low outlier
+ variances are highlighted in a different color.
+
+25 Jan 2017: limma 3.31.10
+
+- Fix two bugs in fitFDist() introduced in version 3.31.9. If the
+ estimated 'df2' was infinite, and 'covariate' was not NULL, a
+ scalar 'scale' value was returned instead of a vector. Similarly,
+ if the 'covariate' vector took on only one unique value, a scalar
+ 'scale' was returned instead of a vector. The function now returns
+ a vector 'scale' whenever covariate is not NULL.
+
+12 Jan 2017: limma 3.31.9
- Bug fix to fitFDist() when 'covariate' is not NULL. The new
estimate for df2 will usually be slightly lower than before. This
@@ -15,47 +141,51 @@
an NA result without an error if there are too few observations to
do the estimation.
- - decideTests() is now an S3 generic function with a default method
- and a method for MArrayLM objects.
+ 8 Jan 2017: limma 3.31.8
+
+- decideTests() is now an S3 generic function with a default method
+ and a method for MArrayLM objects. decideTests() now selects all
+ null hypotheses as rejected if p.value=1.
-14 Dec 2016: limma 3.30.7
+14 Dec 2016: limma 3.31.7
- Bug fix for contrastAsCoef() when there is more than one contrast.
Previously the coefficients for the transformed design matrix were
correct only for the first contrast.
-28 Nov 2016: limma 3.30.6
+28 Nov 2016: limma 3.31.6
- plotSA() now indicates, by way of an open plotting symbol, any
points that have low robust df.prior values.
-25 Nov 2016: limma 3.30.5
+25 Nov 2016: limma 3.31.5
- Clearer error message from fitFDistRobustly() when some variances
are zero.
-16 Nov 2016: limma 3.30.4
+16 Nov 2016: limma 3.31.4
- Bug fix for kegga() when the universe is explicitly specified.
-10 Nov 2016: limma 3.30.3
+10 Nov 2016: limma 3.31.3
- Bug fix for fitFDistRobustly() when there is an extreme outlier.
Previously floating point underflow for the outlier p-value could
cause an error.
-27 Oct 2016: limma 3.30.2
+27 Oct 2016: limma 3.31.2
- New function coolmap(). This is essentially a wrapper for the
heatmap.2() function in the ggplots package, but with sensible
default settings for genomic log-expression data.
-26 Oct 2016: limma 3.30.1
+26 Oct 2016: limma 3.31.1
- Argument 'replace.weights' removed from voomWithQualityWeights().
The function now always produces an EList, similar to voom(). The
default behavior of the function is unchanged.
+18 Oct 2016: limma 3.31.0 (Bioconductor 3.5 Developmental Branch)
18 Oct 2016: limma 3.30.0 (Bioconductor 3.4 Release Branch)
17 Oct 2016: limma 3.29.25
diff --git a/inst/doc/intro.pdf b/inst/doc/intro.pdf
index 4a10eb4..aa43f6b 100644
Binary files a/inst/doc/intro.pdf and b/inst/doc/intro.pdf differ
diff --git a/man/08Tests.Rd b/man/08Tests.Rd
index 5d1d38e..7cfcff5 100644
--- a/man/08Tests.Rd
+++ b/man/08Tests.Rd
@@ -38,13 +38,13 @@ Self-contained gene set testing for an individual set is provided by \code{\link
Gene set enrichment analysis for a large database of gene sets is provided by \code{\link{romer}}.
\code{\link{topRomer}} is used to rank results from \code{romer}.
-The functions \code{\link{alias2Symbol}} and \code{\link{alias2SymbolTable}} are provided to help match gene sets with microarray probes by way of official gene symbols.
+The functions \code{\link{alias2Symbol}}, \code{\link{alias2SymbolTable}} and \code{\link{alias2SymbolUsingNCBI}} are provided to help match gene sets with microarray probes by way of official gene symbols.
}
\section{Global Tests}{
The function \code{\link{genas}} can test for associations between two contrasts in a linear model.
-Given a set of p-values, the function \code{\link{convest}} can be used to estimate the proportion of true null hypotheses.
+Given a set of p-values, the function \code{\link{propTrueNull}} can be used to estimate the proportion of true null hypotheses.
When evaluating test procedures with simulated or known results, the utility function \code{\link{auROC}} can be used to compute the area under the Receiver Operating Curve for the test results for a given probe.
}
diff --git a/man/alias2Symbol.Rd b/man/alias2Symbol.Rd
index acbbab9..d12a7a0 100644
--- a/man/alias2Symbol.Rd
+++ b/man/alias2Symbol.Rd
@@ -1,6 +1,7 @@
\name{alias2Symbol}
\alias{alias2Symbol}
\alias{alias2SymbolTable}
+\alias{alias2SymbolUsingNCBI}
\title{Convert Gene Aliases to Official Gene Symbols}
\description{
Maps gene alias names to official gene symbols.
@@ -8,6 +9,8 @@ Maps gene alias names to official gene symbols.
\usage{
alias2Symbol(alias, species = "Hs", expand.symbols = FALSE)
alias2SymbolTable(alias, species = "Hs")
+alias2SymbolUsingNCBI(alias, gene.info.file,
+ required.columns = c("GeneID","Symbol","description"))
}
\arguments{
\item{alias}{character vector of gene aliases}
@@ -16,7 +19,9 @@ alias2SymbolTable(alias, species = "Hs")
\item{expand.symbols}{logical.
This affects those elements of \code{alias} that are the official gene symbol for one gene and also an alias for another gene.
If \code{FALSE}, then these elements will just return themselves.
- If \code{TRUE}, then all the genes for which they are aliases will be returned.}
+ If \code{TRUE}, then all the genes for which they are aliases will also be returned.}
+ \item{gene.info.file}{either the name of a gene information file downloaded from the NCBI or a data.frame resulting from reading such a file.}
+ \item{required.columns}{character vector of columns from the gene information file that are required in the output.}
}
\details{
Aliases are mapped via NCBI Entrez Gene identity numbers using Bioconductor organism packages.
@@ -24,7 +29,7 @@ Aliases are mapped via NCBI Entrez Gene identity numbers using Bioconductor orga
\code{alias2Symbol} maps a set of aliases to a set of symbols, without necessarily preserving order.
The output vector may be longer or shorter than the original vector, because some aliases might not be found and some aliases may map to more than one symbol.
\code{alias2SymbolTable} maps each alias to a gene symbol and returns a table with one row for each alias.
-If an alias maps to more than one symbol, then the first one found is returned.
+If an alias maps to more than one symbol, then the one with the lowest Entrez ID number is returned.
\code{species} can be any character string XX for which an organism package org.XX.eg.db exists and is installed.
The only requirement of the organism package is that it contains objects \code{org.XX.egALIAS2EG} and \code{org.XX.egSYMBOL} linking the aliases and symbols to Entrez Gene Ids.
@@ -49,12 +54,17 @@ At the time of writing (June 2016), the following organism packages are availabl
\tab org.Xl.eg.db \tab Xenopus
}
+\code{alias2SymbolUsingNCBI} is analogous to \code{alias2SymbolTable} but uses a gene-info file from NCBI instead of a Bioconductor organism package.
+It also gives the option of returning multiple columns from the gene-info file.
+NCBI gene-info files can be downloaded from \url{ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/GENE_INFO}.
+For example, the human file is \url{ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/GENE_INFO/Mammalia/Homo_sapiens.gene_info.gz} and the mouse file is \url{ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/GENE_INFO/Mammalia/Mus_musculus.gene_info.gz}.
}
\value{
-Character vector of gene symbols.
-
+\code{alias2Symbol} and \code{alias2SymbolTable} produce a character vector of gene symbols.
\code{alias2SymbolTable} returns a vector of the same length and order as \code{alias}, including \code{NA} values where no gene symbol was found.
\code{alias2Symbol} returns an unordered vector that may be longer or shorter than \code{alias}.
+
+\code{alias2SymbolUsingNCBI} returns a data.frame with rows corresponding to the entries of \code{alias} and columns as specified by \code{required.columns}.
}
\author{Gordon Smyth and Yifang Hu}
\seealso{
diff --git a/man/barcodeplot.Rd b/man/barcodeplot.Rd
index a15d859..9a3cbc4 100644
--- a/man/barcodeplot.Rd
+++ b/man/barcodeplot.Rd
@@ -6,9 +6,9 @@ Display the enrichment of one or two gene sets in a ranked gene list.
}
\usage{
barcodeplot(statistics, index = NULL, index2 = NULL, gene.weights = NULL,
- weights.label = "Weight", labels = c("Up","Down"),
+ weights.label = "Weight", labels = c("Down","Up"),
quantiles = c(-1,1)*sqrt(2), col.bars = NULL, alpha = 0.4,
- worm = TRUE, span.worm=0.45, \dots)
+ worm = TRUE, span.worm = 0.45, xlab = "Statistic", \dots)
}
\arguments{
\item{statistics}{numeric vector giving the values of statistics to rank genes by.}
@@ -22,13 +22,14 @@ barcodeplot(statistics, index = NULL, index2 = NULL, gene.weights = NULL,
Positive and negative weights correspond to positive and negative genes.
Ignored if \code{index2} is non-null.}
\item{weights.label}{label describing the entries in \code{gene.weights}.}
- \item{labels}{character vector of labels for high and low statistics. First label is associated with high statistics and is displayed at the left end of the plot. Second label is associated with low or negative statistics and is displayed at the right end of the plot.}
+ \item{labels}{character vector of labels for low and high statistics. First label is associated with low statistics or negative statistics and is displayed at the left end of the plot. Second label is associated with high or positive statistics and is displayed at the right end of the plot.}
\item{quantiles}{numeric vector of length 2, giving cutoff values for \code{statistics} considered small or large respectively. Used to color the rectangle of the barcodeplot.}
\item{col.bars}{character vector of colors for the vertical bars of the barcodeplot showing the ranks of the gene set members.
Defaults to \code{"black"} for one set or \code{c("red","blue")} for two sets.}
\item{alpha}{transparency for vertical bars. When \code{gene.weights} are not \code{NULL}, values \code{0<alpha<1} give semitransparent colors for the vertical bars inside the rectangle. This helps distinguish position bars from the weighted bars and also helps to show the density of the bars when there are many bars. Ignored if \code{gene.weights=NULL}.}
\item{worm}{logical, should enrichment worms be plotted?}
\item{span.worm}{loess span for enrichment worms. Larger spans give smoother worms.}
+ \item{xlab}{x-axis label for \code{statistics}.}
\item{\ldots}{other arguments are passed to \code{plot}.}
}
@@ -42,7 +43,7 @@ If there are two sets, then one is considered to be the positive set and the oth
For example, the first set and second sets often correspond to genes that are expected to be up- or down-regulated respectively.
The function can optionally display varying weights for different genes, for example log-fold-changes from a previous experiment.
-The statistics are ranked left to right from largest to smallest.
+The statistics are ranked left to right from smallest to largest.
The ranked statistics are represented by a shaded bar or bed, and the positions of the specified subsets are marked by vertical bars, forming a pattern like a barcode.
An enrichment worm optionally shows the relative enrichment of the vertical bars in each part of the plot.
@@ -66,7 +67,7 @@ The function can be used with any of four different calling sequences:
There is a topic page on \link{10.GeneSetTests}.
}
-\author{Gordon Smyth, Di Wu and Yifang Hu}
+\author{Yifang Hu, Gordon Smyth and Di Wu}
\references{
Ng, AP, Hu, Y, Metcalf, D, Hyland, CD, Ierino, H, Phipson, B, Wu, D, Baldwin, TM, Kauppi, M, Kiu, H, Di, Rago, L, Hilton, DJ, Smyth, GK, Alexander, WS (2015).
@@ -76,7 +77,7 @@ Early lineage priming by trisomy of Erg leads to myeloproliferation in a down sy
Lim E, Vaillant F, Wu D, Forrest NC, Pal B, Hart AH, Asselin-Labat ML, Gyorki DE, Ward T, Partanen A, Feleppa F, Huschtscha LI, Thorne HJ; kConFab; Fox SB, Yan M, French JD, Brown MA, Smyth GK, Visvader JE, and Lindeman GJ (2009).
Aberrant luminal progenitors as the candidate target population for basal tumor development in BRCA1 mutation carriers.
-\emph{Nat Med}, 15, 907-913.
+\emph{Nature Medicine} 15, 907-913.
Liu, GJ, Cimmino, L, Jude, JG, Hu, Y, Witkowski, MT, McKenzie, MD, Kartal-Kaess, M, Best, SA, Tuohey, L, Liao, Y, Shi, W, Mullighan, CG, Farrar, MA, Nutt, SL, Smyth, GK, Zuber, J, and Dickins, RA (2014).
Pax5 loss imposes a reversible differentiation block in B progenitor acute lymphoblastic leukemia.
@@ -85,16 +86,16 @@ Pax5 loss imposes a reversible differentiation block in B progenitor acute lymph
Sheikh, B, Lee, S, El-saafin, F, Vanyai, H, Hu, Y, Pang, SHM, Grabow, S, Strasser, A, Nutt, SL, Alexander, WS, Smyth, GK, Voss, AK, and Thomas, T (2015).
MOZ regulates B cell progenitors in mice, consequently, Moz haploinsufficiency dramatically retards MYC-induced lymphoma development.
-\emph{Blood}, 125, 1910-1921.
+\emph{Blood} 125, 1910-1921.
\url{http://www.ncbi.nlm.nih.gov/pubmed/25605372}
Subramanian A, Tamayo P, Mootha VK, Mukherjee S, Ebert BL, Gillette MA, Paulovich A, Pomeroy SL, Golub TR, Lander ES, and Mesirov JP (2005).
Gene set enrichment analysis: a knowledge-based approach for interpreting genome-wide expression profiles.
-\emph{Proc Natl Acad Sci USA}, 102, 15545-15550.
+\emph{Proc Natl Acad Sci USA} 102, 15545-15550.
Witkowski, MT, Cimmino, L, Hu, Y, Trimarchi, T, Tagoh, H, McKenzie, MD, Best, SA, Tuohey, L, Willson, TA, Nutt, SL, Meinrad Busslinger, M, Aifantis, I, Smyth, GK, and Dickins, RA (2015).
Activated Notch counteracts Ikaros tumor suppression in mouse and human T cell acute lymphoblastic leukemia.
-\emph{Leukemia}.
+\emph{Leukemia} 29, 1301-1311.
\url{http://www.ncbi.nlm.nih.gov/pubmed/25655195}
}
diff --git a/man/camera.Rd b/man/camera.Rd
index 1a528c5..ac23b0b 100644
--- a/man/camera.Rd
+++ b/man/camera.Rd
@@ -2,6 +2,8 @@
\alias{camera}
\alias{camera.default}
\alias{interGeneCorrelation}
+\alias{cameraPR}
+\alias{cameraPR.default}
\title{Competitive Gene Set Test Accounting for Inter-gene Correlation}
\description{
Test whether a set of genes is highly ranked relative to other genes in terms of differential expression, accounting for inter-gene correlation.
@@ -10,14 +12,15 @@ Test whether a set of genes is highly ranked relative to other genes in terms of
\method{camera}{default}(y, index, design, contrast = ncol(design), weights = NULL,
use.ranks = FALSE, allow.neg.cor=FALSE, inter.gene.cor=0.01, trend.var = FALSE,
sort = TRUE, \dots)
+\method{cameraPR}{default}(statistic, index, use.ranks = FALSE, inter.gene.cor=0.01, sort = TRUE, \dots)
interGeneCorrelation(y, design)
}
\arguments{
\item{y}{a numeric matrix of log-expression values or log-ratios of expression values, or any data object containing such a matrix.
Rows correspond to probes and columns to samples.
- Any type of object that can be processed by \code{\link{getEAWP}} is acceptable.
- }
- \item{index}{an index vector or a list of index vectors. Can be any vector such that \code{y[index,]} selects the rows corresponding to the test set. The list can be made using \code{\link{ids2indices}}.}
+ Any type of object that can be processed by \code{\link{getEAWP}} is acceptable.}
+ \item{statistic}{a numeric vector of genewise statistics. If \code{index} contains gene IDs, then \code{statistic} should be a named vector with the gene IDs as names.}
+ \item{index}{an index vector or a list of index vectors. Can be any vector such that \code{y[index,]} of \code{statistic[index]} selects the rows corresponding to the test set. The list can be made using \code{\link{ids2indices}}.}
\item{design}{design matrix.}
\item{contrast}{contrast of the linear model coefficients for which the test is required. Can be an integer specifying a column of \code{design}, or else a numeric vector of same length as the number of columns of \code{design}.}
\item{weights}{numeric matrix of observation weights of same size as \code{y}, or a numeric vector of array weights with length equal to \code{ncol(y)}, or a numeric vector of gene weights with length equal to \code{nrow(y)}.}
@@ -53,6 +56,8 @@ However, in this mode, highly co-regulated gene sets that are biological interpr
With \code{interGeneCorrelation=0.01}, \code{camera} will rank biologically interpetable sets more highly.
This gives a useful compromise between strict error rate control and interpretable gene set rankings.
+
+\code{cameraPR} is a "pre-ranked" version of \code{camera} where the genes are pre-ranked according to a pre-computed statistic.
}
\note{The default settings for \code{inter.gene.cor} and \code{allow.neg.cor} were changed to the current values in limma 3.29.6.
@@ -60,7 +65,7 @@ Previously, the default was to estimate an inter-gene correlation for each set.
To reproduce the earlier default, use \code{allow.neg.cor=TRUE} and \code{inter.gene.cor=NA}.}
\value{
-\code{camera} returns a data.frame with a row for each set and the following columns:
+\code{camera} and \code{cameraPR} return a data.frame with a row for each set and the following columns:
\item{NGenes}{number of genes in set.}
\item{Correlation}{inter-gene correlation (only included if the \code{inter.gene.cor} was not preset).}
\item{Direction}{direction of change (\code{"Up"} or \code{"Down"}).}
@@ -113,6 +118,10 @@ camera(y, index2, design)
camera(y, list(set1=index1,set2=index2), design, inter.gene.cor=NA)
camera(y, list(set1=index1,set2=index2), design, inter.gene.cor=0.01)
+
+# Pre-ranked version
+fit <- eBayes(lmFit(y, design))
+cameraPR(fit$t[,2], list(set1=index1,set2=index2))
}
\keyword{gene set test}
diff --git a/man/coolmap.Rd b/man/coolmap.Rd
index 4f1d496..35caeda 100644
--- a/man/coolmap.Rd
+++ b/man/coolmap.Rd
@@ -19,7 +19,7 @@ coolmap(x, cluster.by="de pattern", col=NULL,
\item{col}{choices are \code{"redblue"}, \code{"redgreen"} or \code{"yellowblue"}.
The default is \code{"redblue"} for \code{cluster.by="de pattern"} and \code{"yellowblue"} if \code{cluster.by="expression level"}.}
\item{linkage.row}{linkage criterion used to cluster the rows.
- Choices are \code{"none"}, \code{"ward.D"}, \code{"single"}, \code{"complete"}, \code{"average"}, \code{"mcquitty"}, \code{"median"}, \code{"centroid"} or \code{"ward.D2"}, with \code{"ward"} is treated as \code{"ward.D2"}.}
+ Choices are \code{"none"}, \code{"ward"}, \code{"ward.D"}, \code{"ward.D2"}, \code{"single"}, \code{"complete"}, \code{"average"}, \code{"mcquitty"}, \code{"median"} or \code{"centroid"}, with \code{"ward"} treated as \code{"ward.D2"}.}
\item{linkage.col}{linkage criterion used to cluster the columns.
Choices are the same as for \code{linkage.row}.}
\item{show.dendrogram}{choices are \code{"row"}, \code{"column"}, \code{"both"} or \code{"none"}.}
@@ -27,8 +27,8 @@ coolmap(x, cluster.by="de pattern", col=NULL,
}
\details{
-This function is essentially a wrapper for the \code{heatmap.2} function in the ggplots package, with sensible settings for genomic log-expression data.
-Unfortunately, the default settings for \code{heatmap.2} are often not ideal for expression data, and overriding the defaults requires explicit calls to \code{hclust} and \code{as.dendrogram} as well as prior standardization of the data values.
+This function calls the \code{heatmap.2} function in the ggplots package with sensible argument settings for genomic log-expression data.
+The default settings for \code{heatmap.2} are often not ideal for expression data, and overriding the defaults requires explicit calls to \code{hclust} and \code{as.dendrogram} as well as prior standardization of the data values.
The \code{coolmap} function implements our preferred defaults for the two most common types of heatmaps.
When clustering by relative expression (\code{cluster.by="de pattern"}), it implements a row standardization that takes account of \code{NA} values and standard deviations that might be zero.
}
diff --git a/man/detectionPValue.Rd b/man/detectionPValue.Rd
index 4280439..fe20cc8 100644
--- a/man/detectionPValue.Rd
+++ b/man/detectionPValue.Rd
@@ -33,8 +33,8 @@ When used on Illumina BeadChip data, this function produces essentially the same
\references{
Shi, W, de Graaf, C, Kinkel, S, Achtman, A, Baldwin, T, Schofield, L, Scott, H, Hilton, D, Smyth, GK (2010).
Estimating the proportion of microarray probes expressed in an RNA sample.
-\emph{Nucleic Acids Research} 38, 2168-2176.
-\url{http://nar.oxfordjournals.org/content/38/7/2168}
+\emph{Nucleic Acids Research} 38(7), 2168-2176.
+\url{https://www.ncbi.nlm.nih.gov/pubmed/20056656}
}
\author{Gordon Smyth}
diff --git a/man/dim.Rd b/man/dim.Rd
index 3297fb6..d507496 100755
--- a/man/dim.Rd
+++ b/man/dim.Rd
@@ -4,18 +4,12 @@
\alias{dim.EListRaw}
\alias{dim.EList}
\alias{dim.MArrayLM}
-\alias{length.RGList}
-\alias{length.MAList}
-\alias{length.EListRaw}
-\alias{length.EList}
-\alias{length.MArrayLM}
\title{Retrieve the Dimensions of an RGList, MAList or MArrayLM Object}
\description{
Retrieve the number of rows (genes) and columns (arrays) for an RGList, MAList or MArrayLM object.
}
\usage{
\method{dim}{RGList}(x)
-\method{length}{RGList}(x)
}
\arguments{
\item{x}{an object of class \code{RGList}, \code{MAList} or \code{MArrayLM}}
@@ -44,6 +38,5 @@ MA <- new("MAList",list(M=M,A=A))
dim(M)
ncol(M)
nrow(M)
-length(M)
}
\keyword{array}
diff --git a/man/ebayes.Rd b/man/ebayes.Rd
index 1998871..7959730 100755
--- a/man/ebayes.Rd
+++ b/man/ebayes.Rd
@@ -29,7 +29,7 @@ treat(fit, lfc=0, trend=FALSE, robust=FALSE, winsor.tail.p=c(0.05,0.1))
\item{df.prior}{degrees of freedom associated with \code{s2.prior}}
\item{df.total}{numeric vector of total degrees of freedom associated with t-statistics and p-values. Equal to \code{df.prior+df.residual} or \code{sum(df.residual)}, whichever is smaller.}
\item{s2.post}{numeric vector giving the posterior values for \code{sigma^2}}
- \item{lods}{numeric vector or matrix giving the log-odds of differential expression}
+ \item{lods}{numeric vector or matrix giving the log-odds of differential expression (natural log scale).}
\item{var.prior}{estimated prior value for the variance of the log2-fold-change for differentially expressed gene}
\item{F}{numeric vector of moderated F-statistics for testing all contrasts defined by the columns of \code{fit} simultaneously equal to zero}
\item{F.p.value}{numeric vector giving p-values corresponding to \code{F}}
diff --git a/man/geneSetTest.Rd b/man/geneSetTest.Rd
index f708c53..7e27cc7 100755
--- a/man/geneSetTest.Rd
+++ b/man/geneSetTest.Rd
@@ -44,7 +44,7 @@ By contrast, a \emph{self-contained} gene set test such as \code{\link{roast}} t
Because it is based on permuting genes, \code{geneSetTest} assumes that the different genes (or probes) are statistically independent.
(Strictly speaking, it assumes that the genes in the set are no more correlated on average than randomly chosen genes.)
If inter-gene correlations are present, then a statistically significant result from \code{geneSetTest} indicates either that the set is highly ranked or that the genes in the set are positively correlated on average (Wu and Smyth, 2012).
-Unless gene sets with positive correlations are particularly of interest, it may be advisable to use \code{\link{camera}} instead to adjust the test for inter-gene correlations.
+Unless gene sets with positive correlations are particularly of interest, it may be advisable to use \code{\link{camera}} or \code{\link{cameraPR}} instead to adjust the test for inter-gene correlations.
Inter-gene correlations are likely to be present in differential expression experiments with biologically heterogeneous experimental units.
On the other hand, the assumption of independence between genes should hold when the replicates are purely technical, i.e., when there is no biological variability between the replicate arrays in each experimental condition.
@@ -61,7 +61,7 @@ There are four possible alternatives to test for.
\code{alternative=="mixed"} test whether the genes in the set tend to be differentially expressed, without regard for direction.
In this case, the test will be significant if the set contains mostly large test statistics, even if some are positive and some are negative.
-The latter three alternatives are appropriate if you have a prior expection that all the genes in the set will react in the same direction.
+The latter three alternatives are appropriate when there is a prior expection that all the genes in the set will react in the same direction.
The \code{"mixed"} alternative is appropriate if you know only that the genes are involved in the relevant pathways, possibly in different directions.
The \code{"mixed"} is the only meaningful alternative with F-like statistics.
@@ -72,12 +72,12 @@ If \code{ranks.only} is \code{FALSE}, then the p-value is obtained by simulation
}
\note{
-This function does not does correct for inter-gene correlation, so it is more likely to assign small p-values to sets containing positive correlated genes.
-For this reason, the alternative \code{camera} is now recommended over \code{geneSetTest} in those contexts for which \code{camera} is applicable.
+Wu and Smyth (2012) show that \code{geneSetTest} does not does correct for inter-gene correlations and is more likely to assign small p-values to sets containing positive correlated genes.
+The function \code{\link{cameraPR}} is recommended as a alternative.
}
\seealso{
-\code{\link{camera}}, \code{\link{roast}}, \code{\link{romer}}, \code{\link{wilcox.test}}, \code{\link{barcodeplot}}
+\code{\link{cameraPR}}, \code{\link{camera}}, \code{\link{roast}}, \code{\link{barcodeplot}}, \code{\link{wilcox.test}}.
There is a topic page on \link{10.GeneSetTests}.
}
diff --git a/man/goana.Rd b/man/goana.Rd
index 274fc38..b48ac5e 100644
--- a/man/goana.Rd
+++ b/man/goana.Rd
@@ -15,11 +15,11 @@ Test for over-representation of gene ontology (GO) terms or KEGG pathways in one
\usage{
\method{goana}{MArrayLM}(de, coef = ncol(de), geneid = rownames(de), FDR = 0.05, trend = FALSE, \dots)
+\method{kegga}{MArrayLM}(de, coef = ncol(de), geneid = rownames(de), FDR = 0.05, trend = FALSE, \dots)
\method{goana}{default}(de, universe = NULL, species = "Hs", prior.prob = NULL, covariate=NULL,
plot=FALSE, \dots)
-\method{kegga}{MArrayLM}(de, coef = ncol(de), geneid = rownames(de), FDR = 0.05, trend = FALSE, \dots)
-\method{kegga}{default}(de, universe = NULL, species = "Hs", species.KEGG = NULL, convert = FALSE,
- gene.pathway = NULL, pathway.names = NULL,
+\method{kegga}{default}(de, universe = NULL, restrict.universe = TRUE, species = "Hs", species.KEGG = NULL,
+ convert = FALSE, gene.pathway = NULL, pathway.names = NULL,
prior.prob = NULL, covariate=NULL, plot=FALSE, \dots)
getGeneKEGGLinks(species.KEGG = "hsa", convert = FALSE)
getKEGGPathwayNames(species.KEGG = NULL, remove.qualifier = FALSE)
@@ -27,30 +27,47 @@ getKEGGPathwayNames(species.KEGG = NULL, remove.qualifier = FALSE)
\arguments{
\item{de}{a character vector of Entrez Gene IDs, or a list of such vectors, or an \code{MArrayLM} fit object.}
+
\item{coef}{column number or column name specifying for which coefficient or contrast differential expression should be assessed.}
+
\item{geneid}{Entrez Gene identifiers. Either a vector of length \code{nrow(de)} or the name of the column of \code{de$genes} containing the Entrez Gene IDs.}
+
\item{FDR}{false discovery rate cutoff for differentially expressed genes. Numeric value between 0 and 1.}
+
\item{species}{character string specifying the species.
Possible values include \code{"Hs"} (human), \code{"Mm"} (mouse), \code{"Rn"} (rat), \code{"Dm"} (fly) or \code{"Pt"} (chimpanzee), but other values are possible if the corresponding organism package is available.
See \code{\link{alias2Symbol}} for other possible values.
Ignored if \code{species.KEGG} or is not \code{NULL} or if \code{gene.pathway} and \code{pathway.names} are not \code{NULL}.}
+
\item{species.KEGG}{three-letter KEGG species identifier. See \url{http://www.kegg.jp/kegg/catalog/org_list.html} or \url{http://rest.kegg.jp/list/organism} for possible values. Ignored if \code{gene.pathway} and \code{pathway.names} are not \code{NULL}.}
+
\item{convert}{if \code{TRUE} then KEGG gene identifiers will be converted to NCBI Entrez Gene identifiers. Note that KEGG IDs are the same as Entrez Gene IDs for most species anyway.}
+
\item{gene.pathway}{data.frame linking genes to pathways. First column gives gene IDs, second column gives pathway IDs. By default this is obtained automatically by \code{getGeneKEGGLinks(species.KEGG)}.}
+
\item{remove.qualifier}{if \code{TRUE}, the species qualifier will be removed from the pathway names.}
+
\item{pathway.names}{data.frame giving full names of pathways. First column gives pathway IDs, second column gives pathway names. By default this is obtained automatically using \code{getKEGGPathwayNames(species.KEGG, remove=TRUE)}.}
+
\item{trend}{adjust analysis for gene length or abundance?
Can be logical, or a numeric vector of covariate values, or the name of the column of \code{de$genes} containing the covariate values.
If \code{TRUE}, then \code{de$Amean} is used as the covariate.}
+
\item{universe}{vector specifying the set of Entrez Gene identifiers to be the background universe.
If \code{NULL} then all Entrez Gene IDs associated with any gene ontology term will be used as the universe.}
+
+ \item{restrict.universe}{logical, should the \code{universe} be restricted to gene identifiers found in \code{gene.pathway}?}
+
\item{prior.prob}{optional numeric vector of the same length as \code{universe} giving the prior probability that each gene in the universe appears in a gene set.
Will be computed from \code{covariate} if the latter is provided.
Ignored if \code{universe} is \code{NULL}.}
+
\item{covariate}{optional numeric vector of the same length as \code{universe} giving a covariate against which \code{prior.prob} should be computed.
Ignored if \code{universe} is \code{NULL}.}
+
\item{plot}{logical, should the \code{prior.prob} vs \code{covariate} trend be plotted?}
- \item{\dots}{any other arguments in a call to the \code{MArrayLM} method are passed to the default method.}
+
+ \item{\dots}{any other arguments in a call to the \code{MArrayLM} methods are passed to the corresponding default method.}
}
\details{
@@ -99,6 +116,9 @@ While \code{\link{tricubeMovingAverage}} does not enforce monotonicity, it has t
The default for \code{kegga} with \code{species="Dm"} changed from \code{convert=TRUE} to \code{convert=FALSE} in limma 3.27.8.
Users wanting to use Entrez Gene IDs for Drosophila should set \code{convert=TRUE}, otherwise fly-base IDs are assumed.
+
+Bug fix: results from \code{kegga} with \code{trend=TRUE} or with non-NULL \code{covariate} were incorrect prior to limma 3.32.3.
+The results were biased towards significant Down p-values and against significant Up p-values.
}
\value{
diff --git a/man/kooperberg.Rd b/man/kooperberg.Rd
index e5ed572..4779d74 100755
--- a/man/kooperberg.Rd
+++ b/man/kooperberg.Rd
@@ -7,7 +7,7 @@ background correct GenePix microarray data.
}
\usage{
-kooperberg(RG, a=TRUE, layout=RG$printer, verbose=TRUE)
+kooperberg(RG, a = TRUE, layout = RG$printer, verbose = TRUE)
}
\arguments{
\item{RG}{an RGList of GenePix data, read in using \code{read.maimages}, with \code{other.columns=c("F635 SD","B635 SD","F532 SD","B532 SD","B532 Mean","B635 Mean","F Pixels","B Pixels")}.}
@@ -44,7 +44,7 @@ Improved background correction for spotted DNA microarrays.
Ritchie, M. E., Silver, J., Oshlack, A., Silver, J., Holmes, M., Diyagama, D., Holloway, A., and Smyth, G. K. (2007).
A comparison of background correction methods for two-colour microarrays.
\emph{Bioinformatics} 23, 2700-2707.
-\url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btm412}
+\url{https://www.ncbi.nlm.nih.gov/pubmed/17720982}
}
\seealso{
diff --git a/man/plotSA.Rd b/man/plotSA.Rd
index 31f45a2..3d79e8b 100755
--- a/man/plotSA.Rd
+++ b/man/plotSA.Rd
@@ -2,25 +2,32 @@
\name{plotSA}
\alias{plotSA}
\description{
-Plot log residual standard deviation versus average log expression for a fitted microarray linear model.
+Plot residual standard deviation versus average log expression for a fitted microarray linear model.
}
\usage{
-plotSA(fit, xlab="Average log-expression", ylab="log2(sigma)",
- zero.weights=FALSE, pch=16, cex=0.3, \dots)
+plotSA(fit, xlab = "Average log-expression", ylab = "sqrt(sigma)", zero.weights = FALSE,
+ pch = 16, cex = 0.3, col = c("black","red"), \dots)
}
\arguments{
\item{fit}{an \code{MArrayLM} object.}
- \item{xlab}{character string giving label for x-axis}
- \item{ylab}{character string giving label for y-axis}
- \item{pch}{vector or list of plotting characters. Default is integer code 16 which gives a solid circle.}
- \item{cex}{numeric expansion factor for plotting character.
- Defaults to 0.2.}
- \item{zero.weights}{logical, should spots with zero or negative weights be plotted?}
+ \item{xlab}{label for x-axis}
+ \item{ylab}{label for y-axis}
+ \item{zero.weights}{logical, should genes with all zero weights be plotted?}
+ \item{pch}{vector of codes for plotting characters.}
+ \item{cex}{numeric, vector of expansion factors for plotting characters.}
+ \item{col}{plotting colors for regular and outlier variances respectively.}
\item{\dots}{any other arguments are passed to \code{plot}}
}
\details{
This plot is used to check the mean-variance relationship of the expression data, after fitting a linear model.
+A scatterplot of residual-variances vs average log-expression is created.
+If robust empirical Bayes was used to create \code{fit}, then outlier variances are highlighted in the color given by \code{col[2]}.
+
+The y-axis is square-root \code{fit$sigma}, where \code{sigma} is the estimated residual standard deviation.
+The y-axis therefore corresponds to quarter-root variances.
+The y-axis was changed from log2-variance to quarter-root variance in limma version 3.31.21.
+The quarter-root scale matches the similar plot produced by the \code{voom} function and gives a better plot when some of the variances are close to zero.
See \code{\link[graphics]{points}} for possible values for \code{pch} and \code{cex}.
}
diff --git a/man/plotWithHighlights.Rd b/man/plotWithHighlights.Rd
index d43981c..bd4e631 100644
--- a/man/plotWithHighlights.Rd
+++ b/man/plotWithHighlights.Rd
@@ -7,7 +7,7 @@ This is the engine for \code{plotMD} and \code{plotMA}.
}
\usage{
plotWithHighlights(x, y, status = NULL, values = NULL,
- hl.pch = 16, hl.col = NULL, hl.cex = 1, legend = "topleft",
+ hl.pch = 16, hl.col = NULL, hl.cex = 1, legend = "topright",
bg.pch = 16, bg.col = "black", bg.cex = 0.3,
pch = NULL, col = NULL, cex = NULL, \dots)
}
diff --git a/man/propTrueNull.Rd b/man/propTrueNull.Rd
index d0355c2..f80f707 100644
--- a/man/propTrueNull.Rd
+++ b/man/propTrueNull.Rd
@@ -47,7 +47,6 @@ Numeric value in the interval [0,1] representing the estimated proportion of tru
Langaas, M, Ferkingstad, E, and Lindqvist, B (2005).
Estimating the proportion of true null hypotheses, with application to DNA microarray data.
\emph{Journal of the Royal Statistical Society Series} B 67, 555-572.
-Preprint at \url{http://www.math.ntnu.no/~mettela/pi0.imf}
Mosig MO, Lipkin E, Khutoreskaya G, Tchourzyna E, Soller M, Friedmann A (2001).
A whole genome scan for quantitative trait loci affecting milk protein percentage in Israeli-Holstein cattle, by means of selective milk DNA pooling in a daughter design, using an adjusted false discovery rate criterion.
@@ -68,7 +67,7 @@ limma powers differential expression analyses for RNA-sequencing and microarray
\url{http://nar.oxfordjournals.org/content/43/7/e47}
}
-\author{Belinda Phipson and Gordon Smyth for \code{propTrueNull}; Egil Ferkingstad, Mette Langaas and Marcus Davy for \code{convest}}
+\author{Belinda Phipson and Gordon Smyth for \code{propTrueNull}. Egil Ferkingstad, Mette Langaas and Marcus Davy for \code{convest}.}
\seealso{
See \link{08.Tests} for other functions for producing or interpreting p-values.
diff --git a/man/propexpr.Rd b/man/propexpr.Rd
index f3ce8b6..503ec54 100644
--- a/man/propexpr.Rd
+++ b/man/propexpr.Rd
@@ -34,8 +34,8 @@ Numeric vector giving the proportions of expressed probes in each array.
\references{
Shi, W, de Graaf, C, Kinkel, S, Achtman, A, Baldwin, T, Schofield, L, Scott, H, Hilton, D, Smyth, GK (2010).
Estimating the proportion of microarray probes expressed in an RNA sample.
-\emph{Nucleic Acids Research} 38, 2168-2176.
-\url{http://nar.oxfordjournals.org/content/38/7/2168}
+\emph{Nucleic Acids Research} 38(7), 2168-2176.
+\url{https://www.ncbi.nlm.nih.gov/pubmed/20056656}
}
\author{Wei Shi and Gordon Smyth}
diff --git a/man/volcanoplot.Rd b/man/volcanoplot.Rd
index 799c688..19892f0 100755
--- a/man/volcanoplot.Rd
+++ b/man/volcanoplot.Rd
@@ -2,16 +2,17 @@
\name{volcanoplot}
\alias{volcanoplot}
\description{
-Creates a volcano plot of log-fold changes versus log-odds of differential expression.
+Creates a volcano plot for a specified coefficient of a linear model.
}
\usage{
-volcanoplot(fit, coef=1, highlight=0, names=fit$genes$ID,
- xlab="Log Fold Change", ylab="Log Odds", pch=16, cex=0.35, \dots)
+volcanoplot(fit, coef = 1L, style = "p-value", highlight = 0, names = fit$genes$ID,
+ xlab = "Log2 Fold Change", ylab = NULL, pch=16, cex=0.35, \dots)
}
\arguments{
- \item{fit}{an \code{MArrayLM} fitted linear model object}
- \item{coef}{integer giving the coefficient}
- \item{highlight}{number of top genes to be highlighted}
+ \item{fit}{an \code{MArrayLM} fitted linear model object.}
+ \item{coef}{index indicating which coefficient of the linear model is to be plotted.}
+ \item{style}{character string indicating which significance statistic to plot on the y-axis. Possibilities are \code{"p-value"} or \code{"B-statistic"}.}
+ \item{highlight}{number of top genes to be highlighted by name.}
\item{names}{character vector giving text labels for the probes to be used in highlighting}
\item{xlab}{character string giving label for x-axis}
\item{ylab}{character string giving label for y-axis}
@@ -21,10 +22,13 @@ volcanoplot(fit, coef=1, highlight=0, names=fit$genes$ID,
}
\details{
-A volcano plot is any plot which displays fold changes versus a measure of statistical significance of the change.
+A volcano plot displays log fold changes on the x-axis versus a measure of statistical significance on the y-axis.
+Here the significance measure can be -log(p-value) or the B-statistics, which give the posterior log-odds of differential expression.
+
+The plot is optionally annotated with the names of the most significant genes.
}
-\value{A plot is created on the current graphics device.}
+\value{No value is returned but a plot is created on the current graphics device.}
\author{Gordon Smyth}
\seealso{
An overview of presentation plots following the fitting of a linear model in LIMMA is given in \link{06.LinearModels}.
diff --git a/man/wsva.Rd b/man/wsva.Rd
new file mode 100644
index 0000000..67beeee
--- /dev/null
+++ b/man/wsva.Rd
@@ -0,0 +1,40 @@
+\name{wsva}
+\alias{wsva}
+\title{Weighted Surrogate Variable Analysis}
+\description{
+Calculate surrogate variables from the singular vectors of the linear model residual space.
+}
+
+\usage{
+wsva(y, design, n.sv = 1L, weight.by.sd = FALSE, plot = FALSE, ...)
+}
+
+\arguments{
+ \item{y}{numeric matrix giving log-expression or log-ratio values for a series of microarrays, or any object that can coerced to a matrix including \code{ExpressionSet}, \code{MAList}, \code{EList} or \code{PLMSet} objects.
+ Rows correspond to genes and columns to samples.}
+ \item{design}{design matrix}
+ \item{n.sv}{number of surrogate variables required.}
+ \item{weight.by.sd}{logical, should the surrogate variables be especially tuned to the more variable genes?}
+ \item{plot}{logical. If \code{TRUE}, plots the proportion of variance explained by each surrogate variable.}
+ \item{\dots}{other arguments can be included that would be suitable for \code{lmFit}.}
+}
+
+\value{
+Numeric matrix, same as \code{design} but with \code{n.sv} columns added containing the surrogate variables.
+}
+
+\details{
+The function constructs surrogate variables that explain a high proportion of the residual variability for many of the genes.
+The surrogate variables can be included in the design matrix to remove unwanted variation.
+The surrogate variables are constructed from the singular vectors of a representation of the linear model residual space.
+
+If \code{weight.by.sd=FALSE}, then the method is a simplification of the approach by Leek and Storey (2007).
+}
+
+\author{Gordon Smyth and Yifang Hu}
+
+\references{
+Leek, JT, Storey, JD (2007).
+Capturing heterogeneity in gene expression studies by surrogate variable analysis.
+PLoS Genetics 3, 1724-1735.
+}
diff --git a/src/init.c b/src/init.c
new file mode 100644
index 0000000..5dc1ea1
--- /dev/null
+++ b/src/init.c
@@ -0,0 +1,32 @@
+#include <R.h>
+#include <Rinternals.h>
+#include <stdlib.h> // for NULL
+#include <R_ext/Rdynload.h>
+
+/* .C calls */
+extern void fit_saddle_nelder_mead(void *, void *, void *, void *, void *, void *);
+extern void normexp_gm2loglik(void *, void *, void *, void *, void *, void *);
+extern void normexp_hm2loglik(void *, void *, void *, void *, void *, void *);
+extern void normexp_m2loglik(void *, void *, void *, void *, void *, void *);
+
+/* .Call calls */
+extern SEXP weighted_lowess(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
+
+static const R_CMethodDef CEntries[] = {
+ {"fit_saddle_nelder_mead", (DL_FUNC) &fit_saddle_nelder_mead, 6},
+ {"normexp_gm2loglik", (DL_FUNC) &normexp_gm2loglik, 6},
+ {"normexp_hm2loglik", (DL_FUNC) &normexp_hm2loglik, 6},
+ {"normexp_m2loglik", (DL_FUNC) &normexp_m2loglik, 6},
+ {NULL, NULL, 0}
+};
+
+static const R_CallMethodDef CallEntries[] = {
+ {"weighted_lowess", (DL_FUNC) &weighted_lowess, 6},
+ {NULL, NULL, 0}
+};
+
+void R_init_limma(DllInfo *dll)
+{
+ R_registerRoutines(dll, CEntries, CallEntries, NULL, NULL);
+ R_useDynamicSymbols(dll, FALSE);
+}
diff --git a/tests/limma-Tests.R b/tests/limma-Tests.R
index 974fff2..c0b0680 100755
--- a/tests/limma-Tests.R
+++ b/tests/limma-Tests.R
@@ -232,6 +232,9 @@ mroast(y=y,list(set1=iset1,set2=iset2),design,contrast=2,array.weights=c(0.5,1,0
mroast(y=y,list(set1=iset1,set2=iset2),design,contrast=2,weights=w)
mroast(y=y,list(set1=iset1,set2=iset2),design,contrast=2,weights=w,array.weights=c(0.5,1,0.5,1))
fry(y=y,list(set1=iset1,set2=iset2),design,contrast=2,weights=w,array.weights=c(0.5,1,0.5,1))
+rownames(y) <- paste0("Gene",1:100)
+iset1A <- rownames(y)[1:5]
+fry(y=y,index=iset1A,design,contrast=2,weights=w,array.weights=c(0.5,1,0.5,1))
### camera
@@ -239,6 +242,7 @@ camera(y=y,iset1,design,contrast=2,weights=c(0.5,1,0.5,1),allow.neg.cor=TRUE,int
camera(y=y,list(set1=iset1,set2=iset2),design,contrast=2,allow.neg.cor=TRUE,inter.gene.cor=NA)
camera(y=y,iset1,design,contrast=2,weights=c(0.5,1,0.5,1))
camera(y=y,list(set1=iset1,set2=iset2),design,contrast=2)
+camera(y=y,iset1A,design,contrast=2)
### with EList arg
diff --git a/tests/limma-Tests.Rout.save b/tests/limma-Tests.Rout.save
index 3c3c730..f0cc583 100755
--- a/tests/limma-Tests.Rout.save
+++ b/tests/limma-Tests.Rout.save
@@ -1,6 +1,6 @@
-R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
-Copyright (C) 2016 The R Foundation for Statistical Computing
+R version 3.4.1 (2017-06-30) -- "Single Candle"
+Copyright (C) 2017 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64 (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
@@ -1195,6 +1195,11 @@ set2 5 0 0 Down 0.791 0.791 0.146 0.146
NGenes Direction PValue FDR PValue.Mixed FDR.Mixed
set1 5 Up 0.0007432594 0.001486519 1.820548e-05 3.641096e-05
set2 5 Down 0.8208140511 0.820814051 2.211837e-01 2.211837e-01
+> rownames(y) <- paste0("Gene",1:100)
+> iset1A <- rownames(y)[1:5]
+> fry(y=y,index=iset1A,design,contrast=2,weights=w,array.weights=c(0.5,1,0.5,1))
+ NGenes Direction PValue PValue.Mixed
+set1 5 Up 0.0007432594 1.820548e-05
>
> ### camera
>
@@ -1212,6 +1217,9 @@ set1 5 Up 1.105329e-10
NGenes Direction PValue FDR
set1 5 Up 7.334400e-12 1.466880e-11
set2 5 Down 8.677115e-01 8.677115e-01
+> camera(y=y,iset1A,design,contrast=2)
+ NGenes Direction PValue
+set1 5 Up 7.3344e-12
>
> ### with EList arg
>
@@ -1234,35 +1242,46 @@ set1 5 Up 7.3344e-12
> fit <- lmFit(y,design)
> fit <- eBayes(fit,trend=TRUE)
> topTable(fit,coef=2)
- logFC AveExpr t P.Value adj.P.Val B
-2 3.729512 1.73488969 4.865697 0.0004854886 0.02902331 0.1596831
-3 3.488703 1.03931081 4.754954 0.0005804663 0.02902331 -0.0144071
-4 2.696676 1.74060725 3.356468 0.0063282637 0.21094212 -2.3434702
-1 2.391846 1.72305203 3.107124 0.0098781268 0.24695317 -2.7738874
-33 -1.492317 -0.07525287 -2.783817 0.0176475742 0.29965463 -3.3300835
-5 2.387967 1.63066783 2.773444 0.0179792778 0.29965463 -3.3478204
-80 -1.839760 -0.32802306 -2.503584 0.0291489863 0.37972679 -3.8049642
-39 1.366141 -0.27360750 2.451133 0.0320042242 0.37972679 -3.8925860
-95 -1.907074 1.26297763 -2.414217 0.0341754107 0.37972679 -3.9539571
-50 1.034777 0.01608433 2.054690 0.0642289403 0.59978803 -4.5350317
+ logFC AveExpr t P.Value adj.P.Val B
+Gene2 3.729512 1.73488969 4.865697 0.0004854886 0.02902331 0.1596831
+Gene3 3.488703 1.03931081 4.754954 0.0005804663 0.02902331 -0.0144071
+Gene4 2.696676 1.74060725 3.356468 0.0063282637 0.21094212 -2.3434702
+Gene1 2.391846 1.72305203 3.107124 0.0098781268 0.24695317 -2.7738874
+Gene33 -1.492317 -0.07525287 -2.783817 0.0176475742 0.29965463 -3.3300835
+Gene5 2.387967 1.63066783 2.773444 0.0179792778 0.29965463 -3.3478204
+Gene80 -1.839760 -0.32802306 -2.503584 0.0291489863 0.37972679 -3.8049642
+Gene39 1.366141 -0.27360750 2.451133 0.0320042242 0.37972679 -3.8925860
+Gene95 -1.907074 1.26297763 -2.414217 0.0341754107 0.37972679 -3.9539571
+Gene50 1.034777 0.01608433 2.054690 0.0642289403 0.59978803 -4.5350317
> fit$df.prior
[1] 9.098442
> fit$s2.prior
- [1] 0.6901845 0.6977354 0.3860494 0.7014122 0.6341068 0.2926337 0.3077620
- [8] 0.3058098 0.2985145 0.2832520 0.3232434 0.3279710 0.2816081 0.2943502
- [15] 0.3127994 0.2894802 0.2812758 0.2840051 0.2839124 0.2954261 0.2838592
- [22] 0.2812704 0.3157029 0.2844541 0.4778832 0.2818242 0.2930360 0.2940957
- [29] 0.2941862 0.3234399 0.3164779 0.2853510 0.2988244 0.3450090 0.3048596
- [36] 0.3089086 0.3104534 0.4551549 0.3220008 0.2813286 0.2826027 0.2822504
- [43] 0.2823330 0.3170673 0.3146173 0.3146793 0.2916540 0.2975003 0.3538946
- [50] 0.2907240 0.3199596 0.2816641 0.2814293 0.2996822 0.2812885 0.2896157
- [57] 0.2955317 0.2815907 0.2919420 0.2849675 0.3540805 0.3491713 0.2975019
- [64] 0.2939325 0.2986943 0.3265466 0.3402343 0.3394927 0.2813283 0.2814440
- [71] 0.3089669 0.3030850 0.2859286 0.2813216 0.3475231 0.3334419 0.2949550
- [78] 0.3108702 0.2959688 0.3295294 0.3413700 0.2946268 0.3029565 0.2920284
- [85] 0.2926205 0.2818046 0.3425116 0.2882936 0.2945459 0.3077919 0.2892134
- [92] 0.2823787 0.3048049 0.2961408 0.4590012 0.2812784 0.2846345 0.2819651
- [99] 0.3137551 0.2856081
+ Gene1 Gene2 Gene3 Gene4 Gene5 Gene6 Gene7 Gene8
+0.6901845 0.6977354 0.3860494 0.7014122 0.6341068 0.2926337 0.3077620 0.3058098
+ Gene9 Gene10 Gene11 Gene12 Gene13 Gene14 Gene15 Gene16
+0.2985145 0.2832520 0.3232434 0.3279710 0.2816081 0.2943502 0.3127994 0.2894802
+ Gene17 Gene18 Gene19 Gene20 Gene21 Gene22 Gene23 Gene24
+0.2812758 0.2840051 0.2839124 0.2954261 0.2838592 0.2812704 0.3157029 0.2844541
+ Gene25 Gene26 Gene27 Gene28 Gene29 Gene30 Gene31 Gene32
+0.4778832 0.2818242 0.2930360 0.2940957 0.2941862 0.3234399 0.3164779 0.2853510
+ Gene33 Gene34 Gene35 Gene36 Gene37 Gene38 Gene39 Gene40
+0.2988244 0.3450090 0.3048596 0.3089086 0.3104534 0.4551549 0.3220008 0.2813286
+ Gene41 Gene42 Gene43 Gene44 Gene45 Gene46 Gene47 Gene48
+0.2826027 0.2822504 0.2823330 0.3170673 0.3146173 0.3146793 0.2916540 0.2975003
+ Gene49 Gene50 Gene51 Gene52 Gene53 Gene54 Gene55 Gene56
+0.3538946 0.2907240 0.3199596 0.2816641 0.2814293 0.2996822 0.2812885 0.2896157
+ Gene57 Gene58 Gene59 Gene60 Gene61 Gene62 Gene63 Gene64
+0.2955317 0.2815907 0.2919420 0.2849675 0.3540805 0.3491713 0.2975019 0.2939325
+ Gene65 Gene66 Gene67 Gene68 Gene69 Gene70 Gene71 Gene72
+0.2986943 0.3265466 0.3402343 0.3394927 0.2813283 0.2814440 0.3089669 0.3030850
+ Gene73 Gene74 Gene75 Gene76 Gene77 Gene78 Gene79 Gene80
+0.2859286 0.2813216 0.3475231 0.3334419 0.2949550 0.3108702 0.2959688 0.3295294
+ Gene81 Gene82 Gene83 Gene84 Gene85 Gene86 Gene87 Gene88
+0.3413700 0.2946268 0.3029565 0.2920284 0.2926205 0.2818046 0.3425116 0.2882936
+ Gene89 Gene90 Gene91 Gene92 Gene93 Gene94 Gene95 Gene96
+0.2945459 0.3077919 0.2892134 0.2823787 0.3048049 0.2961408 0.4590012 0.2812784
+ Gene97 Gene98 Gene99 Gene100
+0.2846345 0.2819651 0.3137551 0.2856081
> summary(fit$s2.post)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.2335 0.2603 0.2997 0.3375 0.3655 0.7812
@@ -1272,17 +1291,17 @@ set1 5 Up 7.3344e-12
> fit <- lmFit(y,design)
> fit <- eBayes(fit,trend=TRUE)
> topTable(fit,coef=2)
- logFC AveExpr t P.Value adj.P.Val B
-3 3.488703 1.03931081 4.604490 0.0007644061 0.07644061 -0.2333915
-2 3.729512 1.73488969 4.158038 0.0016033158 0.08016579 -0.9438583
-4 2.696676 1.74060725 2.898102 0.0145292666 0.44537707 -3.0530813
-33 -1.492317 -0.07525287 -2.784004 0.0178150826 0.44537707 -3.2456324
-5 2.387967 1.63066783 2.495395 0.0297982959 0.46902627 -3.7272957
-80 -1.839760 -0.32802306 -2.491115 0.0300256116 0.46902627 -3.7343584
-39 1.366141 -0.27360750 2.440729 0.0328318388 0.46902627 -3.8172597
-1 2.638272 1.47993643 2.227507 0.0530016060 0.58890673 -3.9537576
-95 -1.907074 1.26297763 -2.288870 0.0429197808 0.53649726 -4.0642439
-50 1.034777 0.01608433 2.063663 0.0635275235 0.60439978 -4.4204731
+ logFC AveExpr t P.Value adj.P.Val B
+Gene3 3.488703 1.03931081 4.604490 0.0007644061 0.07644061 -0.2333915
+Gene2 3.729512 1.73488969 4.158038 0.0016033158 0.08016579 -0.9438583
+Gene4 2.696676 1.74060725 2.898102 0.0145292666 0.44537707 -3.0530813
+Gene33 -1.492317 -0.07525287 -2.784004 0.0178150826 0.44537707 -3.2456324
+Gene5 2.387967 1.63066783 2.495395 0.0297982959 0.46902627 -3.7272957
+Gene80 -1.839760 -0.32802306 -2.491115 0.0300256116 0.46902627 -3.7343584
+Gene39 1.366141 -0.27360750 2.440729 0.0328318388 0.46902627 -3.8172597
+Gene1 2.638272 1.47993643 2.227507 0.0530016060 0.58890673 -3.9537576
+Gene95 -1.907074 1.26297763 -2.288870 0.0429197808 0.53649726 -4.0642439
+Gene50 1.034777 0.01608433 2.063663 0.0635275235 0.60439978 -4.4204731
> fit$df.residual[1]
[1] 0
> fit$df.prior
@@ -1346,40 +1365,40 @@ set1 5 Up 7.3344e-12
> go <- goana(fit,FDR=0.8,geneid=EB)
> topGO(go,n=10,truncate.term=30)
Term Ont N Up Down P.Up
-GO:0009653 anatomical structure morpho... BP 10 1 4 0.936627742
-GO:0048856 anatomical structure develo... BP 23 4 6 0.844070086
GO:0032502 developmental process BP 23 4 6 0.844070086
GO:0055082 cellular chemical homeostas... BP 2 0 2 1.000000000
GO:0006915 apoptotic process BP 5 4 1 0.009503355
+GO:0098609 cell-cell adhesion BP 5 4 0 0.009503355
+GO:0040011 locomotion BP 5 4 0 0.009503355
GO:0012501 programmed cell death BP 5 4 1 0.009503355
-GO:0006897 endocytosis BP 3 3 0 0.010952381
-GO:0031988 membrane-bounded vesicle CC 19 4 5 0.691026648
-GO:0016485 protein processing BP 7 1 3 0.849770470
-GO:0005615 extracellular space CC 13 5 4 0.143422146
+GO:0042981 regulation of apoptotic pro... BP 5 4 1 0.009503355
+GO:0043067 regulation of programmed ce... BP 5 4 1 0.009503355
+GO:0097190 apoptotic signaling pathway BP 3 3 0 0.010952381
+GO:0031252 cell leading edge CC 3 3 0 0.010952381
P.Down
-GO:0009653 0.008224876
-GO:0048856 0.009014340
GO:0032502 0.009014340
GO:0055082 0.009090909
GO:0006915 0.416247633
+GO:0098609 1.000000000
+GO:0040011 1.000000000
GO:0012501 0.416247633
-GO:0006897 1.000000000
-GO:0031988 0.020081679
-GO:0016485 0.020760307
-GO:0005615 0.023836810
+GO:0042981 0.416247633
+GO:0043067 0.416247633
+GO:0097190 1.000000000
+GO:0031252 1.000000000
> topGO(go,n=10,truncate.term=30,sort="down")
Term Ont N Up Down P.Up P.Down
-GO:0009653 anatomical structure morpho... BP 10 1 4 0.9366277 0.008224876
-GO:0048856 anatomical structure develo... BP 23 4 6 0.8440701 0.009014340
GO:0032502 developmental process BP 23 4 6 0.8440701 0.009014340
GO:0055082 cellular chemical homeostas... BP 2 0 2 1.0000000 0.009090909
-GO:0031988 membrane-bounded vesicle CC 19 4 5 0.6910266 0.020081679
GO:0016485 protein processing BP 7 1 3 0.8497705 0.020760307
GO:0005615 extracellular space CC 13 5 4 0.1434221 0.023836810
-GO:0031982 vesicle CC 20 4 5 0.7361976 0.025464546
GO:0009887 animal organ morphogenesis BP 3 0 2 1.0000000 0.025788497
GO:0019725 cellular homeostasis BP 3 0 2 1.0000000 0.025788497
+GO:0072359 circulatory system developm... BP 3 0 2 1.0000000 0.025788497
+GO:0007507 heart development BP 3 0 2 1.0000000 0.025788497
+GO:0048232 male gamete generation BP 3 0 2 1.0000000 0.025788497
+GO:0007283 spermatogenesis BP 3 0 2 1.0000000 0.025788497
>
> proc.time()
user system elapsed
- 3.13 0.26 4.14
+ 3.46 0.12 3.62
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-limma.git
More information about the debian-med-commit
mailing list