[med-svn] [r-bioc-limma] 01/04: Imported Upstream version 3.28.17+dfsg
Gordon Ball
chronitis-guest at moszumanska.debian.org
Thu Jul 28 13:12:27 UTC 2016
This is an automated email from the git hooks/post-receive script.
chronitis-guest pushed a commit to branch master
in repository r-bioc-limma.
commit d85c65fdd31c04d7fd692571e565fcdedc50cb11
Author: Gordon Ball <gordon at chronitis.net>
Date: Thu Jul 28 14:59:02 2016 +0200
Imported Upstream version 3.28.17+dfsg
---
DESCRIPTION | 9 ++--
NAMESPACE | 2 +-
R/alias2Symbol.R | 110 ++++++++++++++++++--------------------------
R/combine.R | 4 +-
R/geneset-camera.R | 2 +-
R/geneset-fry.R | 13 ++++--
R/geneset-roast.R | 13 ++++--
R/goana.R | 53 ++++++++-------------
R/kegga.R | 6 +--
R/lmfit.R | 5 +-
R/norm.R | 4 +-
R/plots-fit.R | 16 ++++---
build/vignette.rds | Bin 229 -> 230 bytes
inst/doc/changelog.txt | 40 ++++++++++++++++
inst/doc/intro.pdf | Bin 46191 -> 46191 bytes
man/alias2Symbol.Rd | 28 +++++++++--
man/ebayes.Rd | 4 +-
man/fitfdist.Rd | 2 +-
man/getEAWP.Rd | 3 ++
man/goana.Rd | 8 +++-
man/ids2indices.Rd | 6 +--
man/removeBatchEffect.Rd | 35 +++++++++-----
man/squeezeVar.Rd | 5 +-
tests/limma-Tests.Rout.save | 16 +++----
24 files changed, 220 insertions(+), 164 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index cb99d7f..f57cdb0 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: limma
-Version: 3.28.10
-Date: 2016-06-15
+Version: 3.28.17
+Date: 2016-07-24
Title: Linear Models for Microarray Data
Description: Data analysis, linear models and differential expression for microarray data.
Author: Gordon Smyth [cre,aut], Yifang Hu [ctb], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Davis McCarthy [ctb], Di Wu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Aaron Lun [ctb], Natalie Thorne [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Yunshun Chen [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb]
@@ -9,8 +9,7 @@ License: GPL (>=2)
Depends: R (>= 2.3.0)
Imports: grDevices, graphics, stats, utils, methods
Suggests: affy, AnnotationDbi, BiasedUrn, Biobase, ellipse, GO.db,
- illuminaio, locfit, MASS, org.Hs.eg.db, org.Mm.eg.db,
- org.Dm.eg.db, org.Pt.eg.db, org.Rn.eg.db, splines, statmod (>=
+ illuminaio, locfit, MASS, org.Hs.eg.db, splines, statmod (>=
1.2.2), vsn
URL: http://bioinf.wehi.edu.au/limma
biocViews: ExonArray, GeneExpression, Transcription,
@@ -22,4 +21,4 @@ biocViews: ExonArray, GeneExpression, Transcription,
MultipleComparison, Normalization, Preprocessing,
QualityControl
NeedsCompilation: yes
-Packaged: 2016-06-16 01:27:03 UTC; biocbuild
+Packaged: 2016-07-25 01:26:42 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
index ea342dc..2a3dd2f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,7 +20,7 @@ importFrom("stats", "approx", "approxfun", "as.dist", "cmdscale",
"pnorm", "ppoints", "predict", "pt", "qchisq", "qf",
"qnorm", "qt", "quantile", "residuals", "rnorm", "sd",
"uniroot", "var", "weighted.mean")
-importFrom("utils", "read.table", "write.table")
+importFrom("utils", "getFromNamespace", "read.table", "write.table")
if( tools:::.OStype() == "windows" ) importFrom("utils", "winMenuAddItem")
S3method("[",MAList)
diff --git a/R/alias2Symbol.R b/R/alias2Symbol.R
index 46ef938..6c55588 100644
--- a/R/alias2Symbol.R
+++ b/R/alias2Symbol.R
@@ -4,49 +4,38 @@ alias2Symbol <- function(alias,species="Hs",expand.symbols=FALSE)
# Convert a set of alias names to official gene symbols
# via Entrez Gene identifiers
# Di Wu, Gordon Smyth and Yifang Hu
-# 4 Sep 2008. Last revised 2 May 2016.
+# 4 Sep 2008. Last revised 23 June 2016.
{
alias <- as.character(alias)
- species <- match.arg(species,c("Dm","Hs","Mm","Rn"))
# Get access to required annotation functions
suppressPackageStartupMessages(OK <- requireNamespace("AnnotationDbi",quietly=TRUE))
if(!OK) stop("AnnotationDbi package required but not installed (or can't be loaded)")
-# Get alias to symbol mappings
- switch(species,
- Hs = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Hs.eg.db",quietly=TRUE))
- if(!OK) stop("org.Hs.eg.db package required but not installed (or can't be loaded)")
- ALIAS2EG <- org.Hs.eg.db::org.Hs.egALIAS2EG
- SYMBOL <- org.Hs.eg.db::org.Hs.egSYMBOL
- }, Mm = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Mm.eg.db",quietly=TRUE))
- if(!OK) stop("org.Mm.eg.db package required but not installed (or can't be loaded)")
- ALIAS2EG <- org.Mm.eg.db::org.Mm.egALIAS2EG
- SYMBOL <- org.Mm.eg.db::org.Mm.egSYMBOL
- }, Rn = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Rn.eg.db",quietly=TRUE))
- if(!OK) stop("org.Rn.eg.db package required but not installed (or can't be loaded)")
- ALIAS2EG <- org.Rn.eg.db::org.Rn.egALIAS2EG
- SYMBOL <- org.Rn.eg.db::org.Rn.egSYMBOL
- }, Dm = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Dm.eg.db",quietly=TRUE))
- if(!OK) stop("org.Dm.eg.db package required but not installed (or can't be loaded)")
- ALIAS2EG <- org.Dm.eg.db::org.Dm.egALIAS2EG
- SYMBOL <- org.Dm.eg.db::org.Dm.egSYMBOL
- }
- )
+# Load appropriate organism package
+ orgPkg <- paste0("org.",species,".eg.db")
+ suppressPackageStartupMessages(OK <- requireNamespace(orgPkg,quietly=TRUE))
+ if(!OK) stop(orgPkg," package required but not not installed (or can't be loaded)")
+
+# Get alias to Entrez Gene mappings
+ obj <- paste0("org.",species,".egALIAS2EG")
+ egALIAS2EG <- tryCatch(getFromNamespace(obj,orgPkg), error=function(e) FALSE)
+ if(is.logical(egALIAS2EG)) stop("Can't find alias mappings in package ",orgPkg)
+
+# Get symbol to Entrez Gene mappings
+ obj <- paste0("org.",species,".egSYMBOL")
+ egSYMBOL <- tryCatch(getFromNamespace(obj,orgPkg), error=function(e) FALSE)
+ if(is.logical(egSYMBOL)) stop("Can't find symbol mappings in package ",orgPkg)
if(expand.symbols) {
- alias <- intersect(alias,AnnotationDbi::Rkeys(ALIAS2EG))
- eg <- AnnotationDbi::mappedLkeys(ALIAS2EG[alias])
- AnnotationDbi::mappedRkeys(SYMBOL[eg])
+ alias <- intersect(alias,AnnotationDbi::Rkeys(egALIAS2EG))
+ eg <- AnnotationDbi::mappedLkeys(egALIAS2EG[alias])
+ AnnotationDbi::mappedRkeys(egSYMBOL[eg])
} else {
- isSymbol <- alias %in% AnnotationDbi::Rkeys(SYMBOL)
- alias2 <- intersect(alias[!isSymbol],AnnotationDbi::Rkeys(ALIAS2EG))
- eg <- AnnotationDbi::mappedLkeys(ALIAS2EG[alias2])
- c(alias[isSymbol],AnnotationDbi::mappedRkeys(SYMBOL[eg]))
+ isSymbol <- alias %in% AnnotationDbi::Rkeys(egSYMBOL)
+ alias2 <- intersect(alias[!isSymbol],AnnotationDbi::Rkeys(egALIAS2EG))
+ eg <- AnnotationDbi::mappedLkeys(egALIAS2EG[alias2])
+ c(alias[isSymbol],AnnotationDbi::mappedRkeys(egSYMBOL[eg]))
}
}
@@ -54,55 +43,48 @@ alias2SymbolTable <- function(alias,species="Hs")
# Convert a vector of alias names to the vector of corresponding official gene symbols
# via Entrez Gene identifiers
# Di Wu, Gordon Smyth and Yifang Hu
-# Created 3 Sep 2009. Last modified 13 April 2016.
+# Created 3 Sep 2009. Last modified 23 April 2016.
{
alias <- as.character(alias)
- species <- match.arg(species,c("Dm","Hs","Mm","Rn"))
# Get access to required annotation functions
suppressPackageStartupMessages(OK <- requireNamespace("AnnotationDbi",quietly=TRUE))
if(!OK) stop("AnnotationDbi package required but not installed (or can't be loaded)")
-# Get alias to symbol mappings
- switch(species,
- Hs = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Hs.eg.db",quietly=TRUE))
- if(!OK) stop("org.Hs.eg.db package required but not installed (or can't be loaded)")
- ALIAS2EG <- org.Hs.eg.db::org.Hs.egALIAS2EG
- SYMBOL <- org.Hs.eg.db::org.Hs.egSYMBOL
- }, Mm = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Mm.eg.db",quietly=TRUE))
- if(!OK) stop("org.Mm.eg.db package required but not installed (or can't be loaded)")
- ALIAS2EG <- org.Mm.eg.db::org.Mm.egALIAS2EG
- SYMBOL <- org.Mm.eg.db::org.Mm.egSYMBOL
- }, Rn = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Rn.eg.db",quietly=TRUE))
- if(!OK) stop("org.Rn.eg.db package required but not installed (or can't be loaded)")
- ALIAS2EG <- org.Rn.eg.db::org.Rn.egALIAS2EG
- SYMBOL <- org.Rn.eg.db::org.Rn.egSYMBOL
- }, Dm = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Dm.eg.db",quietly=TRUE))
- if(!OK) stop("org.Dm.eg.db package required but not installed (or can't be loaded)")
- ALIAS2EG <- org.Dm.eg.db::org.Dm.egALIAS2EG
- SYMBOL <- org.Dm.eg.db::org.Dm.egSYMBOL
- }
- )
+# Load appropriate organism package
+ orgPkg <- paste0("org.",species,".eg.db")
+ suppressPackageStartupMessages(OK <- requireNamespace(orgPkg,quietly=TRUE))
+ if(!OK) stop(orgPkg," package required but not not installed (or can't be loaded)")
+
+# Get alias to Entrez Gene mappings
+ obj <- paste0("org.",species,".egALIAS2EG")
+ egALIAS2EG <- tryCatch(getFromNamespace(obj,orgPkg), error=function(e) FALSE)
+ if(is.logical(egALIAS2EG)) stop("Can't find alias mappings in package ",orgPkg)
+
+# Get symbol to Entrez Gene mappings
+ obj <- paste0("org.",species,".egSYMBOL")
+ egSYMBOL <- tryCatch(getFromNamespace(obj,orgPkg), error=function(e) FALSE)
+ if(is.logical(egSYMBOL)) stop("Can't find symbol mappings in package ",orgPkg)
- isSymbol <- alias %in% AnnotationDbi::Rkeys(SYMBOL)
+# Output vector same length as input
Symbol <- alias
+
+# Check which aliases are already symbols
+ isSymbol <- alias %in% AnnotationDbi::Rkeys(egSYMBOL)
Symbol[!isSymbol] <- NA
+# Now deal with rest
OtherAliases <- alias[!isSymbol]
- isAlias <- OtherAliases %in% AnnotationDbi::Rkeys(ALIAS2EG)
+ isAlias <- OtherAliases %in% AnnotationDbi::Rkeys(egALIAS2EG)
if(!any(isAlias)) return(Symbol)
OtherAliases <- OtherAliases[isAlias]
-
- AliasTbl <- AnnotationDbi::toTable(ALIAS2EG[OtherAliases])
+ AliasTbl <- AnnotationDbi::toTable(egALIAS2EG[OtherAliases])
if(anyDuplicated(AliasTbl$alias_symbol)) warning("Multiple symbols ignored for one or more aliases")
- SymbolTbl <- AnnotationDbi::toTable(SYMBOL[AliasTbl$gene_id])
+ SymbolTbl <- AnnotationDbi::toTable(egSYMBOL[AliasTbl$gene_id])
m <- match(OtherAliases,AliasTbl$alias_symbol)
GeneID <- AliasTbl$gene_id[m]
m <- match(GeneID,SymbolTbl$gene_id)
Symbol[!isSymbol][isAlias] <- SymbolTbl$symbol[m]
+
Symbol
}
diff --git a/R/combine.R b/R/combine.R
index 3342d70..1e62144 100644
--- a/R/combine.R
+++ b/R/combine.R
@@ -129,7 +129,7 @@ rbind.EListRaw <- rbind.EList <- function(..., deparse.level=1)
makeUnique <- function(x)
# Add characters to the elements of a character vector to make all values unique
# Gordon Smyth
-# 10 April 2003
+# 10 April 2003. Last modified 28 June 2016.
{
x <- as.character(x)
tab <- table(x)
@@ -139,7 +139,7 @@ makeUnique <- function(x)
u <- names(tab)
for (i in 1:lentab) {
n <- tab[i]
- x[x==u[i]] <- paste(x[x==u[i]],formatC(1:n,width=1+floor(log(n,10)),flag="0"),sep="")
+ x[x==u[i]] <- paste0(x[x==u[i]],formatC(1:n,width=1+floor(log10(n)),flag="0"))
}
}
x
diff --git a/R/geneset-camera.R b/R/geneset-camera.R
index 8c9a3a5..4cbb49c 100644
--- a/R/geneset-camera.R
+++ b/R/geneset-camera.R
@@ -40,7 +40,7 @@ camera.default <- function(y,index,design=NULL,contrast=ncol(design),weights=NUL
# Check index
if(!is.list(index)) index <- list(set1=index)
nsets <- length(index)
- if(nsets==0) stop("index is empty")
+ if(nsets==0L) stop("index is empty")
# Check design
if(is.null(design)) design <- y$design
diff --git a/R/geneset-fry.R b/R/geneset-fry.R
index 18f5989..3789e04 100644
--- a/R/geneset-fry.R
+++ b/R/geneset-fry.R
@@ -88,20 +88,23 @@ fry.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),geneid=NU
.fryEffects <- function(effects,index=NULL,geneid=rownames(effects),sort=TRUE)
# fry given the effects matrix
# Gordon Smyth and Goknur Giner
-# Created 30 January 2015. Last modified 9 May 2016
+# Created 30 January 2015. Last modified 28 June 2016
{
G <- nrow(effects)
neffects <- ncol(effects)
- df.residual <- neffects-1
+ df.residual <- neffects-1L
# Check index
if(is.null(index)) index <- list(set1=1L:G)
if(is.data.frame(index) || !is.list(index)) index <- list(set1=index)
- if(is.null(names(index))) names(index) <- paste("set",1:nsets,sep="")
+ nsets <- length(index)
+ if(nsets==0L) stop("index is empty")
+ if(is.null(names(index)))
+ names(index) <- paste0("set",formatC(1L:nsets,width=1L+floor(log10(nsets)),flag="0"))
+ else
+ if(anyDuplicated(names(index))) stop("Gene sets don't have unique names",call. =FALSE)
# Global statistics
- nsets <- length(index)
- if(nsets==0) stop("index is empty")
NGenes <- rep.int(0L,nsets)
PValue.Mixed <- t.stat <- rep.int(0,nsets)
for (i in 1:nsets) {
diff --git a/R/geneset-roast.R b/R/geneset-roast.R
index 6bfb763..b6a6241 100644
--- a/R/geneset-roast.R
+++ b/R/geneset-roast.R
@@ -95,7 +95,7 @@ mroast <- function(y,...) UseMethod("mroast")
mroast.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),geneid=NULL,set.statistic="mean",gene.weights=NULL,var.prior=NULL,df.prior=NULL,nrot=999,approx.zscore=TRUE,adjust.method="BH",midp=TRUE,sort="directional",...)
# Rotation gene set testing with multiple sets
# Gordon Smyth and Di Wu
-# Created 28 Jan 2010. Last revised 9 May 2016.
+# Created 28 Jan 2010. Last revised 28 June 2016.
{
# Partial matching of extra arguments
Dots <- list(...)
@@ -138,11 +138,14 @@ mroast.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),geneid
}
# Check index
- if(is.null(index)) index <- rep_len(TRUE,length.out=ngenes)
- if(!is.list(index)) index <- list(set = index)
+ if(is.null(index)) index <- list(set1=1L:ngenes)
+ if(is.data.frame(index) || !is.list(index)) index <- list(set1=index)
nsets <- length(index)
- if(nsets==0) stop("index is empty")
- if(is.null(names(index))) names(index) <- paste("set",1:nsets,sep="")
+ if(nsets==0L) stop("index is empty")
+ if(is.null(names(index)))
+ names(index) <- paste0("set",formatC(1L:nsets,width=1L+floor(log10(nsets)),flag="0"))
+ else
+ if(anyDuplicated(names(index))) stop("Gene sets don't have unique names",call. =FALSE)
# Check gene.weights
lgw <- length(gene.weights)
diff --git a/R/goana.R b/R/goana.R
index f0fd1e3..3335a64 100644
--- a/R/goana.R
+++ b/R/goana.R
@@ -77,7 +77,7 @@ goana.MArrayLM <- function(de, coef = ncol(de), geneid = rownames(de), FDR = 0.0
goana.default <- function(de, universe = NULL, species = "Hs", prior.prob = NULL, covariate=NULL, plot=FALSE, ...)
# Gene ontology analysis of DE genes
# Gordon Smyth and Yifang Hu
-# Created 20 June 2014. Last modified 10 April 2016.
+# Created 20 June 2014. Last modified 23 June 2016.
{
# Ensure de is a list
if(!is.list(de)) de <- list(DE = de)
@@ -97,9 +97,6 @@ goana.default <- function(de, universe = NULL, species = "Hs", prior.prob = NULL
NAME[i] <- paste0("DE",i)
names(de) <- makeUnique(NAME)
-# Select species
- species <- match.arg(species, c("Hs", "Mm", "Rn", "Dm", "Pt"))
-
# Fit trend in DE with respect to the covariate, combining all de lists
if(!is.null(covariate)) {
covariate <- as.numeric(covariate)
@@ -120,34 +117,19 @@ goana.default <- function(de, universe = NULL, species = "Hs", prior.prob = NULL
suppressPackageStartupMessages(OK <- requireNamespace("AnnotationDbi",quietly=TRUE))
if(!OK) stop("AnnotationDbi package required but not installed (or can't be loaded)")
-# Get gene-GOterm mappings
- switch(species,
- Hs = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Hs.eg.db",quietly=TRUE))
- if(!OK) stop("org.Hs.eg.db package required but not installed (or can't be loaded)")
- GO2ALLEGS <- org.Hs.eg.db::org.Hs.egGO2ALLEGS
- }, Mm = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Mm.eg.db",quietly=TRUE))
- if(!OK) stop("org.Mm.eg.db package required but not installed (or can't be loaded)")
- GO2ALLEGS <- org.Mm.eg.db::org.Mm.egGO2ALLEGS
- }, Rn = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Rn.eg.db",quietly=TRUE))
- if(!OK) stop("org.Rn.eg.db package required but not installed (or can't be loaded)")
- GO2ALLEGS <- org.Rn.eg.db::org.Rn.egGO2ALLEGS
- }, Dm = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Dm.eg.db",quietly=TRUE))
- if(!OK) stop("org.Dm.eg.db package required but not installed (or can't be loaded)")
- GO2ALLEGS <- org.Dm.eg.db::org.Dm.egGO2ALLEGS
- }, Pt = {
- suppressPackageStartupMessages(OK <- requireNamespace("org.Pt.eg.db",quietly=TRUE))
- if(!OK) stop("org.Pt.eg.db package required but not installed (or can't be loaded)")
- GO2ALLEGS <- org.Pt.eg.db::org.Pt.egGO2ALLEGS
- }
- )
+# Load appropriate organism package
+ orgPkg <- paste0("org.",species,".eg.db")
+ suppressPackageStartupMessages(OK <- requireNamespace(orgPkg,quietly=TRUE))
+ if(!OK) stop(orgPkg," package required but not not installed (or can't be loaded)")
+
+# Get GO to Entrez Gene mappings
+ obj <- paste0("org.",species,".egGO2ALLEGS")
+ egGO2ALLEGS <- tryCatch(getFromNamespace(obj,orgPkg), error=function(e) FALSE)
+ if(is.logical(egGO2ALLEGS)) stop("Can't find gene ontology mappings in package ",orgPkg)
# Convert gene-GOterm mappings to data.frame and remove duplicate entries
if(is.null(universe)) {
- EG.GO <- AnnotationDbi::toTable(GO2ALLEGS)
+ EG.GO <- AnnotationDbi::toTable(egGO2ALLEGS)
d <- duplicated(EG.GO[,c("gene_id", "go_id", "Ontology")])
EG.GO <- EG.GO[!d, ]
universe <- unique(EG.GO$gene_id)
@@ -165,12 +147,12 @@ goana.default <- function(de, universe = NULL, species = "Hs", prior.prob = NULL
}
universe <- universe[!dup]
- m <- match(AnnotationDbi::Lkeys(GO2ALLEGS),universe,0L)
+ m <- match(AnnotationDbi::Lkeys(egGO2ALLEGS),universe,0L)
universe <- universe[m]
if(!is.null(prior.prob)) prior.prob <- prior.prob[m]
- AnnotationDbi::Lkeys(GO2ALLEGS) <- universe
- EG.GO <- AnnotationDbi::toTable(GO2ALLEGS)
+ AnnotationDbi::Lkeys(egGO2ALLEGS) <- universe
+ EG.GO <- AnnotationDbi::toTable(egGO2ALLEGS)
d <- duplicated(EG.GO[,c("gene_id", "go_id", "Ontology")])
EG.GO <- EG.GO[!d, ]
}
@@ -237,7 +219,7 @@ goana.default <- function(de, universe = NULL, species = "Hs", prior.prob = NULL
topGO <- function(results, ontology = c("BP", "CC", "MF"), sort = NULL, number = 20L, truncate.term=NULL)
# Extract top GO terms from goana output
# Gordon Smyth and Yifang Hu
-# Created 20 June 2014. Last modified 22 April 2015.
+# Created 20 June 2014. Last modified 23 June 2016.
{
# Check results
if(!is.data.frame(results)) stop("results should be a data.frame.")
@@ -275,10 +257,11 @@ topGO <- function(results, ontology = c("BP", "CC", "MF"), sort = NULL, number =
# Sort by minimum p-value for specified gene lists
P.col <- 3L+nsets+isort
if(length(P.col)==1L) {
- o <- order(results[,P.col])
+ P <- results[,P.col]
} else {
- o <- order(do.call("pmin",as.data.frame(results[,P.col,drop=FALSE])))
+ P <- do.call("pmin",as.data.frame(results[,P.col,drop=FALSE]))
}
+ o <- order(P,results$N,results$Term)
tab <- results[o[1L:number],,drop=FALSE]
# Truncate Term column for readability
diff --git a/R/kegga.R b/R/kegga.R
index 57d743f..ef4fcda 100644
--- a/R/kegga.R
+++ b/R/kegga.R
@@ -77,7 +77,7 @@ kegga.MArrayLM <- function(de, coef = ncol(de), geneid = rownames(de), FDR = 0.0
kegga.default <- function(de, universe=NULL, species="Hs", species.KEGG=NULL, convert=FALSE, gene.pathway=NULL, pathway.names = NULL, prior.prob=NULL, covariate=NULL, plot=FALSE, ...)
# KEGG (Kyoto Encyclopedia of Genes and Genomes) pathway analysis of DE genes
# Gordon Smyth and Yifang Hu
-# Created 18 May 2015. Modified 11 Feb 2016.
+# Created 18 May 2015. Modified 23 June 2016.
{
# Ensure de is a list
if(!is.list(de)) de <- list(DE = de)
@@ -99,9 +99,9 @@ kegga.default <- function(de, universe=NULL, species="Hs", species.KEGG=NULL, co
# Select species
if(is.null(species.KEGG)) {
- species <- match.arg(species, c("Hs", "Mm", "Rn", "Dm", "Pt"))
+ species <- match.arg(species, c("Ag", "At", "Bt", "Ce", "Dm", "Dr", "EcK12", "EcSakai", "Gg", "Hs", "Mm", "Mmu", "Pf", "Pt", "Rn", "Ss", "Xl"))
# Convert from Bioconductor to KEGG species codes
- species.KEGG <- switch(species, "Hs"="hsa", "Dm"="dme", "Mm"="mmu", "Rn"="rno", "Pt"="ptr")
+ species.KEGG <- switch(species, "Ag"="aga", "At"="ath", "Bt"="bta", "Ce"="cel", "Cf"="cfa", "Dm"="dme", "Dr"="dre", "EcK12"="eco", "EcSakai"="ecs", "Gg"="gga", "Hs"="hsa", "Mm"="mmu", "Mmu"="mcc", "Pf"="pfa", "Pt"="ptr", "Rn"="rno", "Ss"="ssc", "Xl"="xla")
}
# prior.prob and covariate must have same length as universe
diff --git a/R/lmfit.R b/R/lmfit.R
index 313345e..6580e89 100644
--- a/R/lmfit.R
+++ b/R/lmfit.R
@@ -403,7 +403,7 @@ getEAWP <- function(object)
# Given any microarray data object, extract basic information needed
# for linear modelling.
# Gordon Smyth
-# 9 March 2008. Last modified 18 August 2015.
+# 9 March 2008. Last modified 18 July 2016.
{
y <- list()
@@ -413,7 +413,10 @@ getEAWP <- function(object)
y$exprs <- as.matrix(object$E)
y$Amean <- rowMeans(y$exprs,na.rm=TRUE)
} else {
+ if(is(object,"EListRaw")) stop("EListRaw object: please normalize first")
+ if(is(object,"RGList")) stop("RGList object: please normalize first")
y$printer <- object$printer
+ if(is.null(object$M)) stop("data object isn't of a recognized data class")
y$exprs <- as.matrix(object$M)
if(!is.null(object$A)) y$Amean <- rowMeans(as.matrix(object$A),na.rm=TRUE)
}
diff --git a/R/norm.R b/R/norm.R
index dbdf393..8ab726b 100755
--- a/R/norm.R
+++ b/R/norm.R
@@ -424,8 +424,8 @@ normalizeBetweenArrays <- function(object, method=NULL, targets=NULL, cyclic.met
normalizeVSN <- function(x,...)
{
- if(!requireNamespace("Biobase",quietly=TRUE)) stop("Biobase package required but is not available")
- if(!requireNamespace("vsn",quietly=TRUE)) stop("vsn package required but is not available")
+ if(!requireNamespace("Biobase",quietly=TRUE)) stop("Biobase package required but is not installed (or can't be loaded)")
+ if(!requireNamespace("vsn",quietly=TRUE)) stop("vsn package required but is not installed (or can't be loaded)")
UseMethod("normalizeVSN")
}
diff --git a/R/plots-fit.R b/R/plots-fit.R
index b39be46..6b2eec4 100755
--- a/R/plots-fit.R
+++ b/R/plots-fit.R
@@ -183,21 +183,23 @@ heatdiagram <- function(stat,coef,primary=1,names=NULL,treatments=colnames(stat)
plotSA <- function(fit, xlab="Average log-expression", ylab="log2(sigma)", zero.weights=FALSE, pch=16, cex=0.2, ...)
# Plot log-residual variance vs intensity
-# Gordon Smyth 14 Jan 2009.
-# Last modified 27 October 2010.
+# Gordon Smyth
+# Created 14 Jan 2009. Last modified 26 June 2016.
{
if(!is(fit,"MArrayLM")) stop("fit must be a MArrayLM object")
x <- fit$Amean
y <- log2(fit$sigma)
if(!is.null(fit$weights) && !zero.weights) {
- w <- fit$weights
- w[is.na(w)] <- 0
- w[w<0] <- 0
- allzero <- apply(w==0,1,all)
+ allzero <- rowSums(fit$weights>0,na.rm=TRUE) == 0
y[allzero] <- NA
}
plot(x,y,xlab=xlab,ylab=ylab,pch=pch,cex=cex,...)
- lines(lowess(x,y,f=0.4),col="red")
+ if(anyNA(x) || anyNA(y)) {
+ ok <- !(is.na(x) | is.na(y))
+ lines(lowess(x[ok],y[ok],f=0.4),col="red")
+ } else {
+ lines(lowess(x,y,f=0.4),col="red")
+ }
if(!is.null(fit$s2.prior)) {
if(length(fit$s2.prior)==1) {
abline(h=log2(fit$s2.prior)/2,col="blue")
diff --git a/build/vignette.rds b/build/vignette.rds
index 543d17a..96951c2 100644
Binary files a/build/vignette.rds and b/build/vignette.rds differ
diff --git a/inst/doc/changelog.txt b/inst/doc/changelog.txt
index 66c0037..78a36b8 100755
--- a/inst/doc/changelog.txt
+++ b/inst/doc/changelog.txt
@@ -1,3 +1,43 @@
+24 Jul 2016: limma 3.28.17
+
+- Update Phipson et al (2016) reference for robust empirical Bayes.
+
+21 Jul 2016: limma 3.28.16
+
+- Added more explanation about the 'batch2' and 'covariates'
+ arguments to the help page for removeBatchEffect().
+
+18 Jul 2016: limma 3.28.15
+
+- More informative error message from getEAWP() and lmFit() when
+ 'object' is a non-normalized data object.
+
+28 Jun 2016: limma 3.28.14
+
+- Bug fix for fry() when 'index' has NULL names.
+
+26 Jun 2016: limma 3.28.13
+
+- The lowess line drawn by plotSA() is now more robust with respect
+ to NA variances.
+
+23 Jun 2016: limma 3.28.12
+
+- goana() now works for any species for which an Entrez Gene based
+ organism package exists.
+
+- The 'species' argument of kegga() can now accept any Bioconductor
+ species abbreviation.
+
+- topGO() now breaks ties by the number of genes in the GO Term and
+ by the name of the Term if the p-values are equal. (This is the
+ same behavior as topKEGG.)
+
+22 Jun 2016: limma 3.28.11
+
+- alias2Symbol() and alias2SymbolTable() now work for any species for
+ which an Entrez Gene based organism package exists.
+
15 Jun 2016: limma 3.28.10
- new argument 'annotation' for read.idat(), to allow users to read
diff --git a/inst/doc/intro.pdf b/inst/doc/intro.pdf
index c3ef58e..ab5ca24 100644
Binary files a/inst/doc/intro.pdf and b/inst/doc/intro.pdf differ
diff --git a/man/alias2Symbol.Rd b/man/alias2Symbol.Rd
index 776b3d0..acbbab9 100644
--- a/man/alias2Symbol.Rd
+++ b/man/alias2Symbol.Rd
@@ -12,7 +12,7 @@ alias2SymbolTable(alias, species = "Hs")
\arguments{
\item{alias}{character vector of gene aliases}
\item{species}{character string specifying the species.
- Possible values are \code{"Dm"}, \code{"Hs"}, \code{"Mm"} or \code{"Rn"}.}
+ Possible values include \code{"Hs"} (human), \code{"Mm"} (mouse), \code{"Rn"} (rat), \code{"Dm"} (fly) or \code{"Pt"} (chimpanzee), but other values are possible if the corresponding organism package is available.}
\item{expand.symbols}{logical.
This affects those elements of \code{alias} that are the official gene symbol for one gene and also an alias for another gene.
If \code{FALSE}, then these elements will just return themselves.
@@ -20,13 +20,35 @@ alias2SymbolTable(alias, species = "Hs")
}
\details{
Aliases are mapped via NCBI Entrez Gene identity numbers using Bioconductor organism packages.
-Possible species are \code{"Dm"} for fly, \code{"Hs"} for human, \code{"Mm"} for mouse and \code{"Rn"} for rat.
-The appropriate Bioconductor organism package is assumed to be installed.
\code{alias2Symbol} maps a set of aliases to a set of symbols, without necessarily preserving order.
The output vector may be longer or shorter than the original vector, because some aliases might not be found and some aliases may map to more than one symbol.
\code{alias2SymbolTable} maps each alias to a gene symbol and returns a table with one row for each alias.
If an alias maps to more than one symbol, then the first one found is returned.
+
+\code{species} can be any character string XX for which an organism package org.XX.eg.db exists and is installed.
+The only requirement of the organism package is that it contains objects \code{org.XX.egALIAS2EG} and \code{org.XX.egSYMBOL} linking the aliases and symbols to Entrez Gene Ids.
+At the time of writing (June 2016), the following organism packages are available from Bioconductor:
+\tabular{lll}{
+\tab Package \tab Species\cr
+\tab org.Ag.eg.db \tab Anopheles\cr
+\tab org.Bt.eg.db \tab Bovine\cr
+\tab org.Ce.eg.db \tab Worm\cr
+\tab org.Cf.eg.db \tab Canine\cr
+\tab org.Dm.eg.db \tab Fly\cr
+\tab org.Dr.eg.db \tab Zebrafish\cr
+\tab org.EcK12.eg.db \tab E coli strain K12\cr
+\tab org.EcSakai.eg.db \tab E coli strain Sakai\cr
+\tab org.Gg.eg.db \tab Chicken\cr
+\tab org.Hs.eg.db \tab Human\cr
+\tab org.Mm.eg.db \tab Mouse\cr
+\tab org.Mmu.eg.db \tab Rhesus\cr
+\tab org.Pt.eg.db \tab Chimp\cr
+\tab org.Rn.eg.db \tab Rat\cr
+\tab org.Ss.eg.db \tab Pig\cr
+\tab org.Xl.eg.db \tab Xenopus
+}
+
}
\value{
Character vector of gene symbols.
diff --git a/man/ebayes.Rd b/man/ebayes.Rd
index fd51bf1..a67d15f 100755
--- a/man/ebayes.Rd
+++ b/man/ebayes.Rd
@@ -95,11 +95,11 @@ Loennstedt, I., and Speed, T. P. (2002). Replicated microarray data. \emph{Stati
Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
-\emph{Annals of Applied Statistics} 10.
+\emph{Annals of Applied Statistics} 10, 946-963.
\url{http://arxiv.org/abs/1602.08678}
Smyth, G. K. (2004). Linear models and empirical Bayes methods for assessing differential expression in microarray experiments.
-\emph{Statistical Applications in Genetics and Molecular Biology}, Volume \bold{3}, Article 3.
+\emph{Statistical Applications in Genetics and Molecular Biology} 3, Article 3.
\url{http://www.statsci.org/smyth/pubs/ebayes.pdf}
}
diff --git a/man/fitfdist.Rd b/man/fitfdist.Rd
index 3db08b8..cdd2509 100755
--- a/man/fitfdist.Rd
+++ b/man/fitfdist.Rd
@@ -60,7 +60,7 @@ PhD Thesis. University of Melbourne, Australia.
Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
-\emph{Annals of Applied Statistics} 10.
+\emph{Annals of Applied Statistics} 10, 946-963.
\url{http://arxiv.org/abs/1602.08678}
}
diff --git a/man/getEAWP.Rd b/man/getEAWP.Rd
index 2ee17b6..3d6b2e4 100644
--- a/man/getEAWP.Rd
+++ b/man/getEAWP.Rd
@@ -21,6 +21,9 @@ For other data objects, \code{Amean} is the vector of row means of the matrix of
From April 2013, the rownames of the output \code{exprs} matrix are required to be unique.
If \code{object} has no row names, then the output rownames of \code{exprs} are \code{1:nrow(object)}.
If \code{object} has row names but with duplicated names, then the rownames of \code{exprs} are set to \code{1:nrow(object)} and the original row names are preserved in the \code{ID} column of \code{probes}.
+
+\code{object} should be a normalized data object.
+\code{getEAWP} will return an error if \code{object} is a non-normalized data object such as \code{RGList} or \code{EListRaw}, because these do not contain log-expression values.
}
\value{
A list with components
diff --git a/man/goana.Rd b/man/goana.Rd
index 192f567..513f754 100644
--- a/man/goana.Rd
+++ b/man/goana.Rd
@@ -30,7 +30,10 @@ getKEGGPathwayNames(species.KEGG = NULL, remove.qualifier = FALSE)
\item{coef}{column number or column name specifying for which coefficient or contrast differential expression should be assessed.}
\item{geneid}{Entrez Gene identifiers. Either a vector of length \code{nrow(de)} or the name of the column of \code{de$genes} containing the Entrez Gene IDs.}
\item{FDR}{false discovery rate cutoff for differentially expressed genes. Numeric value between 0 and 1.}
- \item{species}{two-letter Bioconductor species identifier. Possible values are \code{"Hs"}, \code{"Mm"}, \code{"Rn"}, \code{"Dm"} or \code{"Pt"}. Ignored if \code{species.KEGG} or is not \code{NULL} or if \code{gene.pathway} and \code{pathway.names} are not \code{NULL}.}
+ \item{species}{character string specifying the species.
+ Possible values include \code{"Hs"} (human), \code{"Mm"} (mouse), \code{"Rn"} (rat), \code{"Dm"} (fly) or \code{"Pt"} (chimpanzee), but other values are possible if the corresponding organism package is available.
+ See \code{\link{alias2Symbol}} for other possible values.
+ Ignored if \code{species.KEGG} or is not \code{NULL} or if \code{gene.pathway} and \code{pathway.names} are not \code{NULL}.}
\item{species.KEGG}{three-letter KEGG species identifier. See \url{http://www.kegg.jp/kegg/catalog/org_list.html} or \url{http://rest.kegg.jp/list/organism} for possible values. Ignored if \code{gene.pathway} and \code{pathway.names} are not \code{NULL}.}
\item{convert}{if \code{TRUE} then KEGG gene identifiers will be converted to NCBI Entrez Gene identifiers. Note that KEGG IDs are the same as Entrez Gene IDs for most species anyway.}
\item{gene.pathway}{data.frame linking genes to pathways. First column gives gene IDs, second column gives pathway IDs. By default this is obtained automatically by \code{getGeneKEGGLinks(species.KEGG)}.}
@@ -56,7 +59,8 @@ The default method accepts the gene list as a vector of gene IDs,
while the \code{MArrayLM} method extracts the gene lists automatically from a linear model fit object.
\code{goana} uses annotation from the appropriate Bioconductor organism package.
-\code{goana} can be used for any of the five species specified.
+The \code{species} can be any character string XX for which an organism package org.XX.eg.db exists and is installed.
+See \code{\link{alias2Symbol}} for other possible values for \code{species}.
\code{kegga} reads KEGG pathway annotation from the KEGG website.
Note that the species name can be provided in either Bioconductor or KEGG format.
diff --git a/man/ids2indices.Rd b/man/ids2indices.Rd
index 69ae0fd..5c1fd01 100644
--- a/man/ids2indices.Rd
+++ b/man/ids2indices.Rd
@@ -31,10 +31,10 @@ There is a topic page on \link{10.GeneSetTests}.
\dontrun{
-download.file("http://bioinf.wehi.edu.au/software/MSigDB/human_c2_v5.rdata",
- "human_c2_v5.rdata", mode = "wb")
+download.file("http://bioinf.wehi.edu.au/software/MSigDB/human_c2_v5p1.rdata",
+ "human_c2_v5p1.rdata", mode = "wb")
-load("human_c2_v5.rdata")
+load("human_c2_v5p1.rdata")
c2.indices <- ids2indices(Hs.c2, y$genes$GeneID)
camera(y, c2.indices, design)
diff --git a/man/removeBatchEffect.Rd b/man/removeBatchEffect.Rd
index 6bc2bc1..46ae005 100644
--- a/man/removeBatchEffect.Rd
+++ b/man/removeBatchEffect.Rd
@@ -12,8 +12,8 @@ removeBatchEffect(x, batch=NULL, batch2=NULL, covariates=NULL,
\item{x}{numeric matrix, or any data object that can be processed by \code{\link{getEAWP}} containing log-expression values for a series of samples.
Rows correspond to probes and columns to samples.}
\item{batch}{factor or vector indicating batches.}
- \item{batch2}{factor or vector indicating batches.}
- \item{covariates}{matrix or vector of covariates to be adjusted for.}
+ \item{batch2}{optional factor or vector indicating a second series of batches.}
+ \item{covariates}{matrix or vector of numeric covariates to be adjusted for.}
\item{design}{optional design matrix relating to treatment conditions to be preserved}
\item{\dots}{other arguments are passed to \code{\link{lmFit}}.}
}
@@ -22,26 +22,39 @@ A numeric matrix of log-expression values with batch and covariate effects remov
}
\details{
This function is useful for removing batch effects, associated with hybridization time or other technical variables, prior to clustering or unsupervised analysis such as PCA, MDS or heatmaps.
-It is not intended to use with linear modelling.
-For linear modelling, it is better to include the batch factors in the linear model.
-
The design matrix is used to describe comparisons between the samples, for example treatment effects, which should not be removed.
-
The function (in effect) fits a linear model to the data, including both batches and regular treatments, then removes the component due to the batch effects.
+In most applications, only the first \code{batch} argument will be needed.
+This covers the situation where the data has been collected in a series of separate batches.
+
+The \code{batch2} argument is used when there is a second series of batch effects, independent of the first series.
+For example, \code{batch} might correspond to time of data collection while \code{batch2} might correspond to operator or some other change in operating characteristics.
+If \code{batch2} is included, then the effects of \code{batch} and \code{batch2} are assumed to be additive.
+
+The \code{covariates} argument allows correction for one or more continuous numeric effects, similar to the analysis of covariance method in statistics.
+If \code{covariates} contains more than one column, then the columns are assumed to have additive effects.
+
The data object \code{x} can be of any class for which \code{lmFit} works.
If \code{x} contains weights, then these will be used in estimating the batch effects.
}
+\note{
+This function is not intended to be used prior to linear modelling.
+For linear modelling, it is better to include the batch factors in the linear model.
+}
+
\seealso{
\link{05.Normalization}
}
\author{Gordon Smyth and Carolyn de Graaf}
\examples{
-y <- matrix(rnorm(10*6),10,6)
-colnames(y) <- c("A1","A2","A3","B1","B2","B3")
-y[,1:3] <- y[,1:3] + 10
-y
-removeBatchEffect(y,batch=c("A","A","A","B","B","B"))
+y <- matrix(rnorm(10*9),10,9)
+y[,1:3] <- y[,1:3] + 5
+batch <- c("A","A","A","B","B","B","C","C","C")
+y2 <- removeBatchEffect(y, batch)
+par(mfrow=c(1,2))
+boxplot(as.data.frame(y),main="Original")
+boxplot(as.data.frame(y2),main="Batch corrected")
}
diff --git a/man/squeezeVar.Rd b/man/squeezeVar.Rd
index 9203938..4af6f95 100755
--- a/man/squeezeVar.Rd
+++ b/man/squeezeVar.Rd
@@ -55,7 +55,7 @@ A list with components
\references{
Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
-\emph{Annals of Applied Statistics} 10.
+\emph{Annals of Applied Statistics} 10, 946-963.
\url{http://arxiv.org/abs/1602.08678}
Sartor MA, Tomlinson CR, Wesselkamper SC, Sivaganesan S, Leikauf GD, Medvedovic M (2006).
@@ -64,8 +64,7 @@ BMC bioinformatics 7, 538.
Smyth, G. K. (2004). Linear models and empirical Bayes methods for
assessing differential expression in microarray experiments.
-\emph{Statistical Applications in Genetics and Molecular Biology}, \bold{3},
-No. 1, Article 3.
+\emph{Statistical Applications in Genetics and Molecular Biology} 3, Article 3.
\url{http://www.statsci.org/smyth/pubs/ebayes.pdf}
}
diff --git a/tests/limma-Tests.Rout.save b/tests/limma-Tests.Rout.save
index f085afd..5596aa1 100755
--- a/tests/limma-Tests.Rout.save
+++ b/tests/limma-Tests.Rout.save
@@ -1351,8 +1351,8 @@ GO:0006915 apoptotic process BP 5 4 1 0.003446627
GO:0012501 programmed cell death BP 5 4 1 0.003446627
GO:0055082 cellular chemical homeostas... BP 2 0 2 1.000000000
GO:0006897 endocytosis BP 3 3 0 0.005046382
-GO:0044767 single-organism development... BP 23 3 5 0.845083285
GO:0048856 anatomical structure develo... BP 23 3 5 0.845083285
+GO:0044767 single-organism development... BP 23 3 5 0.845083285
GO:1902589 single-organism organelle o... BP 7 1 3 0.762502428
GO:0008219 cell death BP 6 4 1 0.009129968
GO:0016265 death BP 6 4 1 0.009129968
@@ -1362,8 +1362,8 @@ GO:0006915 0.3096959098
GO:0012501 0.3096959098
GO:0055082 0.0042424242
GO:0006897 1.0000000000
-GO:0044767 0.0066515474
GO:0048856 0.0066515474
+GO:0044767 0.0066515474
GO:1902589 0.0066732856
GO:0008219 0.3605604217
GO:0016265 0.3605604217
@@ -1371,15 +1371,15 @@ GO:0016265 0.3605604217
Term Ont N Up Down P.Up P.Down
GO:0032502 developmental process BP 24 3 6 0.8685205 0.0006606503
GO:0055082 cellular chemical homeostas... BP 2 0 2 1.0000000 0.0042424242
-GO:0044767 single-organism development... BP 23 3 5 0.8450833 0.0066515474
GO:0048856 anatomical structure develo... BP 23 3 5 0.8450833 0.0066515474
+GO:0044767 single-organism development... BP 23 3 5 0.8450833 0.0066515474
GO:1902589 single-organism organelle o... BP 7 1 3 0.7625024 0.0066732856
-GO:0007283 spermatogenesis BP 3 0 2 1.0000000 0.0122943723
-GO:0048232 male gamete generation BP 3 0 2 1.0000000 0.0122943723
-GO:0001889 liver development BP 3 0 2 1.0000000 0.0122943723
+GO:0031225 anchored component of membr... CC 3 0 2 1.0000000 0.0122943723
GO:0019725 cellular homeostasis BP 3 0 2 1.0000000 0.0122943723
-GO:0035295 tube development BP 3 0 2 1.0000000 0.0122943723
+GO:0061008 hepaticobiliary system deve... BP 3 0 2 1.0000000 0.0122943723
+GO:0001889 liver development BP 3 0 2 1.0000000 0.0122943723
+GO:0048232 male gamete generation BP 3 0 2 1.0000000 0.0122943723
>
> proc.time()
user system elapsed
- 2.73 0.20 2.94
+ 2.77 0.23 3.04
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-limma.git
More information about the debian-med-commit
mailing list