[med-svn] [r-bioc-deseq2] 03/06: New upstream version 1.18.0

Andreas Tille tille at debian.org
Fri Nov 10 08:28:17 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-bioc-deseq2.

commit 36eda24191eb31a01e3632008bfced0eb64c79ad
Author: Andreas Tille <tille at debian.org>
Date:   Fri Nov 10 09:03:04 2017 +0100

    New upstream version 1.18.0
---
 DESCRIPTION                               |   9 +-
 NAMESPACE                                 |   4 +
 NEWS                                      |  18 +
 R/AllClasses.R                            |  41 +-
 R/AllGenerics.R                           |   8 +
 R/RcppExports.R                           |   6 +-
 R/core.R                                  |  29 +-
 R/helper.R                                |  76 +--
 R/lfcShrink.R                             | 318 ++++++++++++
 R/methods.R                               |  32 +-
 R/plots.R                                 |  43 +-
 R/results.R                               |  12 +-
 build/vignette.rds                        | Bin 219 -> 221 bytes
 inst/doc/DESeq2.R                         | 101 ++--
 inst/doc/DESeq2.Rmd                       | 444 ++++++++++-------
 inst/doc/DESeq2.html                      | 802 ++++++++++++++++++------------
 man/DESeq.Rd                              |   6 +-
 man/DESeq2-package.Rd                     |   4 +-
 man/DESeqResults.Rd                       |   4 +-
 man/estimateDispersionsGeneEst.Rd         |   4 +-
 man/lfcShrink.Rd                          | 102 +++-
 man/normTransform.Rd                      |   2 +-
 man/priorInfo.Rd                          |  30 ++
 src/RcppExports.cpp                       |  18 +-
 tests/testthat/test_construction_errors.R |   2 +
 tests/testthat/test_interactions.R        |  10 +-
 tests/testthat/test_lfcShrink.R           |  56 ++-
 tests/testthat/test_nbinomWald.R          |  10 +
 tests/testthat/test_parallel.R            |  14 +
 tests/testthat/test_results.R             |  15 +-
 vignettes/DESeq2.Rmd                      | 444 ++++++++++-------
 vignettes/library.bib                     |  69 ++-
 32 files changed, 1825 insertions(+), 908 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 19a566f..de9724e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: DESeq2
 Type: Package
 Title: Differential gene expression analysis based on the negative
         binomial distribution
-Version: 1.16.1
+Version: 1.18.0
 Author: Michael Love, Simon Anders, Wolfgang Huber
 Maintainer: Michael Love <michaelisaiahlove at gmail.com>
 Description: Estimate variance-mean dependence in count data from
@@ -16,11 +16,12 @@ Imports: BiocGenerics (>= 0.7.5), Biobase, BiocParallel, genefilter,
 Depends: S4Vectors (>= 0.9.25), IRanges, GenomicRanges,
         SummarizedExperiment (>= 1.1.6)
 Suggests: testthat, knitr, BiocStyle, vsn, pheatmap, RColorBrewer, IHW,
-        tximport, tximportData, readr, pbapply, airway, pasilla (>=
-        0.2.10)
+        apeglm, ashr, tximport, tximportData, readr, pbapply, airway,
+        pasilla (>= 0.2.10)
 LinkingTo: Rcpp, RcppArmadillo
+URL: https://github.com/mikelove/DESeq2
 biocViews: Sequencing, ChIPSeq, RNASeq, SAGE, DifferentialExpression,
         GeneExpression, Transcription
 RoxygenNote: 6.0.1
 NeedsCompilation: yes
-Packaged: 2017-05-05 23:25:16 UTC; biocbuild
+Packaged: 2017-10-30 23:40:09 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
index 59ee26a..c4e6404 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,6 +4,7 @@ S3method(coef,DESeqDataSet)
 export("dispersionFunction<-")
 export("dispersions<-")
 export("normalizationFactors<-")
+export("priorInfo<-")
 export(DESeq)
 export(DESeqDataSet)
 export(DESeqDataSetFromHTSeqCount)
@@ -33,6 +34,7 @@ export(normalizationFactors)
 export(normalizeGeneLength)
 export(plotCounts)
 export(plotSparsity)
+export(priorInfo)
 export(removeResults)
 export(replaceOutliers)
 export(replaceOutliersWithTrimmedMean)
@@ -52,6 +54,7 @@ exportMethods("design<-")
 exportMethods("dispersionFunction<-")
 exportMethods("dispersions<-")
 exportMethods("normalizationFactors<-")
+exportMethods("priorInfo<-")
 exportMethods("sizeFactors<-")
 exportMethods(counts)
 exportMethods(design)
@@ -63,6 +66,7 @@ exportMethods(normalizationFactors)
 exportMethods(plotDispEsts)
 exportMethods(plotMA)
 exportMethods(plotPCA)
+exportMethods(priorInfo)
 exportMethods(show)
 exportMethods(sizeFactors)
 import(Biobase)
diff --git a/NEWS b/NEWS
index 18be64e..bc358ef 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,21 @@
+CHANGES IN VERSION 1.18.0
+-------------------------
+
+    o lfcShrink() offers alternative estimators type="apeglm"
+      and type="ashr", making use of shrinkage estimators
+      in the 'apeglm' and 'ashr' packages, respectively.
+      See ?lfcShrink for more details and appropriate
+      references. The integration of these alternative
+      shrinkage estimators is still in development.
+      Additionally, the DESeqResults object gains priorInfo(res),
+      which passes along details of the fitted prior on LFC.
+
+    o Factor levels using characters other than letters,
+      numbers, '_' and '.' will print a message (not a warning
+      or error) that it is recommended to restrict to these
+      "safe characters". This follows a suggestion from the
+      Bioconductor support site to avoid user errors.
+
 CHANGES IN VERSION 1.16.0
 -------------------------
 
diff --git a/R/AllClasses.R b/R/AllClasses.R
index 968359f..5f02513 100644
--- a/R/AllClasses.R
+++ b/R/AllClasses.R
@@ -24,18 +24,29 @@ setValidity( "DESeqDataSet", function( object ) {
   }
   designVarsClass <- sapply(designVars, function(v) class(colData(object)[[v]]))
   if (any(designVarsClass == "character")) {
-    return("
-
-variables in design formula are character vectors.
+    return("variables in design formula are character vectors.
   convert these columns of colData(object) to factors before including in the design formula")
   }
   designFactors <- designVars[designVarsClass == "factor"]
-  if (any(sapply(designFactors,function(v) any(duplicated(make.names(levels(colData(object)[[v]]))))))) {
-    return("
-
-factors in the design formula have non-unique level names after make.names() is applied.
-make.names() is used during the analysis to turn factor levels into safe column names.
-please only use letters and numbers for levels of factors in the design")
+  # levels would duplicate after make.names()
+  if (any(sapply(designFactors,function(v) {
+    factor.lvls <- levels(colData(object)[[v]])
+    factor.nms <- make.names(factor.lvls)
+    any(duplicated(factor.nms))
+  }))) {
+    return("levels of factors in the design have non-unique level names after make.names() is applied.
+  best to only uobject letters and numbers for levels of factors in the design")
+  }
+  # levels contain characters other than letters, numbers, and underscore
+  if (any(sapply(designFactors,function(v) {
+    factor.lvls <- levels(colData(object)[[v]])
+    any(!grepl("^[A-Za-z0-9_.]+$",factor.lvls))
+  }))) {
+    # just a warning for now
+    message("  Note: levels of factors in the design contain characters other than
+  letters, numbers, '_' and '.'. It is recommended (but not required) to use
+  only letters, numbers, and delimiters '_' or '.', as these are safe characters
+  for column names in R. [This is a message, not an warning or error]")
   }
   # else...
   TRUE
@@ -207,7 +218,6 @@ DESeqDataSet <- function(se, design, ignoreRank=FALSE) {
     stop("design contains one or more variables with all samples having the same value,
   remove these variables from the design")
   }
-
   
   modelMatrix <- stats::model.matrix.default(design, data=as.data.frame(colData(se)))
   if (!ignoreRank) {
@@ -356,7 +366,11 @@ DESeqDataSetFromTximport <- function(txi, colData, design, ...)
 
 #' @rdname DESeqResults
 #' @export
-setClass("DESeqResults", contains="DataFrame")
+setClass("DESeqResults",
+         contains="DataFrame",
+         representation = representation( 
+           priorInfo = "list")
+         )
 
 #' DESeqResults object and constructor
 #'
@@ -368,14 +382,15 @@ setClass("DESeqResults", contains="DataFrame")
 #'
 #' @param DataFrame a DataFrame of results, standard column names are:
 #' baseMean, log2FoldChange, lfcSE, stat, pvalue, padj.
+#' @param priorInfo a list giving information on the log fold change prior
 #'
 #' @return a DESeqResults object
 #' @docType class
 #' @aliases DESeqResults-class
 #' @rdname DESeqResults
 #' @export
-DESeqResults <- function(DataFrame) {
-  new("DESeqResults", DataFrame)
+DESeqResults <- function(DataFrame, priorInfo=list()) {
+  new("DESeqResults", DataFrame, priorInfo=priorInfo)
 }
 
 #' @rdname DESeqTransform
diff --git a/R/AllGenerics.R b/R/AllGenerics.R
index 0309ac1..23524c8 100644
--- a/R/AllGenerics.R
+++ b/R/AllGenerics.R
@@ -21,3 +21,11 @@ setGeneric("normalizationFactors", function(object,...) standardGeneric("normali
 #' @rdname normalizationFactors
 #' @export
 setGeneric("normalizationFactors<-", function(object,...,value) standardGeneric("normalizationFactors<-"))
+
+#' @rdname priorInfo
+#' @export
+setGeneric("priorInfo", function(object,...) standardGeneric("priorInfo"))
+
+#' @rdname priorInfo
+#' @export
+setGeneric("priorInfo<-", function(object,...,value) standardGeneric("priorInfo<-"))
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 851e9e8..8cb3381 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -2,14 +2,14 @@
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 fitDisp <- function(ySEXP, xSEXP, mu_hatSEXP, log_alphaSEXP, log_alpha_prior_meanSEXP, log_alpha_prior_sigmasqSEXP, min_log_alphaSEXP, kappa_0SEXP, tolSEXP, maxitSEXP, usePriorSEXP, weightsSEXP, useWeightsSEXP) {
-    .Call('DESeq2_fitDisp', PACKAGE = 'DESeq2', ySEXP, xSEXP, mu_hatSEXP, log_alphaSEXP, log_alpha_prior_meanSEXP, log_alpha_prior_sigmasqSEXP, min_log_alphaSEXP, kappa_0SEXP, tolSEXP, maxitSEXP, usePriorSEXP, weightsSEXP, useWeightsSEXP)
+    .Call('_DESeq2_fitDisp', PACKAGE = 'DESeq2', ySEXP, xSEXP, mu_hatSEXP, log_alphaSEXP, log_alpha_prior_meanSEXP, log_alpha_prior_sigmasqSEXP, min_log_alphaSEXP, kappa_0SEXP, tolSEXP, maxitSEXP, usePriorSEXP, weightsSEXP, useWeightsSEXP)
 }
 
 fitBeta <- function(ySEXP, xSEXP, nfSEXP, alpha_hatSEXP, contrastSEXP, beta_matSEXP, lambdaSEXP, weightsSEXP, useWeightsSEXP, tolSEXP, maxitSEXP, useQRSEXP) {
-    .Call('DESeq2_fitBeta', PACKAGE = 'DESeq2', ySEXP, xSEXP, nfSEXP, alpha_hatSEXP, contrastSEXP, beta_matSEXP, lambdaSEXP, weightsSEXP, useWeightsSEXP, tolSEXP, maxitSEXP, useQRSEXP)
+    .Call('_DESeq2_fitBeta', PACKAGE = 'DESeq2', ySEXP, xSEXP, nfSEXP, alpha_hatSEXP, contrastSEXP, beta_matSEXP, lambdaSEXP, weightsSEXP, useWeightsSEXP, tolSEXP, maxitSEXP, useQRSEXP)
 }
 
 fitDispGrid <- function(ySEXP, xSEXP, mu_hatSEXP, disp_gridSEXP, log_alpha_prior_meanSEXP, log_alpha_prior_sigmasqSEXP, usePriorSEXP, weightsSEXP, useWeightsSEXP) {
-    .Call('DESeq2_fitDispGrid', PACKAGE = 'DESeq2', ySEXP, xSEXP, mu_hatSEXP, disp_gridSEXP, log_alpha_prior_meanSEXP, log_alpha_prior_sigmasqSEXP, usePriorSEXP, weightsSEXP, useWeightsSEXP)
+    .Call('_DESeq2_fitDispGrid', PACKAGE = 'DESeq2', ySEXP, xSEXP, mu_hatSEXP, disp_gridSEXP, log_alpha_prior_meanSEXP, log_alpha_prior_sigmasqSEXP, usePriorSEXP, weightsSEXP, useWeightsSEXP)
 }
 
diff --git a/R/core.R b/R/core.R
index 96bbdfb..307468a 100644
--- a/R/core.R
+++ b/R/core.R
@@ -9,7 +9,8 @@
 # AllGenerics .... the generics defined in DESeq2
 # results ........ results() function and helpers
 # plots .......... all plotting functions
-# helper ......... lfcShrink, collapseReplicates, fpkm, fpm, DESeqParallel
+# lfcShrink ...... log2 fold change shrinkage
+# helper ......... unmix, collapseReplicates, fpkm, fpm, DESeqParallel
 # expanded ....... helpers for dealing with expanded model matrices
 # wrappers ....... the R wrappers for the C++ functions (mine)
 # RcppExports .... the R wrappers for the C++ functions (auto)
@@ -58,11 +59,11 @@
 #'
 #' DESeq2 reference:
 #' 
-#' Michael I Love, Wolfgang Huber, Simon Anders: Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology 2014, 15:550. \url{http://dx.doi.org/10.1186/s13059-014-0550-8}
+#' Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. \url{http://dx.doi.org/10.1186/s13059-014-0550-8}
 #'
 #' DESeq reference:
 #' 
-#' Simon Anders, Wolfgang Huber: Differential expression analysis for sequence count data. Genome Biology 2010, 11:106. \url{http://dx.doi.org/10.1186/gb-2010-11-10-r106}
+#' Simon Anders, Wolfgang Huber (2010) Differential expression analysis for sequence count data. Genome Biology, 11:106. \url{http://dx.doi.org/10.1186/gb-2010-11-10-r106}
 #'
 #' @author Michael Love, Wolfgang Huber, Simon Anders
 #' 
@@ -113,7 +114,7 @@ NULL
 #' of counts around the expected value for each group, which is critical for
 #' differential expression analysis. If an experimental design is
 #' supplied which does not contain the necessary degrees of freedom for differential
-#' analysis, \code{DESeq} will provide a message to the user and follow
+#' analysis, \code{DESeq} will provide a warning to the user and follow
 #' the strategy outlined in Anders and Huber (2010)
 #' under the section 'Working without replicates', wherein all the samples
 #' are considered as replicates of a single group for the estimation of dispersion.
@@ -121,6 +122,8 @@ NULL
 #' may be expected, which will make that approach conservative."
 #' Furthermore, "while one may not want to draw strong conclusions from such an analysis,
 #' it may still be useful for exploration and hypothesis generation."
+#' We provide this approach for data exploration only, but for accurately
+#' identifying differential expression, biological replicates are required.
 #'
 #' The argument \code{minReplicatesForReplace} is used to decide which samples
 #' are eligible for automatic replacement in the case of extreme Cook's distance.
@@ -203,7 +206,8 @@ NULL
 #' 
 #' @references
 #'
-#' Michael I Love, Wolfgang Huber, Simon Anders: Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology 2014, 15:550. \url{http://dx.doi.org/10.1186/s13059-014-0550-8}
+#' Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. \url{https://doi.org/10.1186/s13059-014-0550-8}
+#' 
 #' @import BiocGenerics BiocParallel S4Vectors IRanges GenomicRanges SummarizedExperiment Biobase Rcpp methods
 #'
 #' @importFrom locfit locfit
@@ -537,6 +541,7 @@ estimateSizeFactorsForMatrix <- function(counts, locfunc=stats::median,
 #' default is NULL, in which case a lienar model is used if the
 #' number of groups defined by the model matrix is equal to the number
 #' of columns of the model matrix
+#' @param minmu lower bound on the estimated count for fitting gene-wise dispersion
 #' 
 #' @return a DESeqDataSet with gene-wise, fitted, or final MAP
 #' dispersion estimates in the metadata columns of the object.
@@ -568,7 +573,8 @@ estimateSizeFactorsForMatrix <- function(counts, locfunc=stats::median,
 #' @export
 estimateDispersionsGeneEst <- function(object, minDisp=1e-8, kappa_0=1,
                                        dispTol=1e-6, maxit=100, quiet=FALSE,
-                                       modelMatrix=NULL, niter=1, linearMu=NULL) {
+                                       modelMatrix=NULL, niter=1, linearMu=NULL,
+                                       minmu=0.5) {
   if (!is.null(mcols(object)$dispGeneEst)) {
     if (!quiet) message("found already estimated gene-wise dispersions, removing these")
     removeCols <- c("dispGeneEst")
@@ -628,6 +634,8 @@ estimateDispersionsGeneEst <- function(object, minDisp=1e-8, kappa_0=1,
   # (we need this already to decide about linear mu fitting)
   wlist <- getAndCheckWeights(object, modelMatrix)
   weights <- wlist$weights
+  # don't let weights go below 1e-6
+  weights <- pmax(weights, 1e-6)
   useWeights <- wlist$useWeights
   
   # use a linear model to estimate the expected counts
@@ -646,10 +654,9 @@ estimateDispersionsGeneEst <- function(object, minDisp=1e-8, kappa_0=1,
   fitidx <- rep(TRUE,nrow(objectNZ))
   mu <- matrix(0, nrow=nrow(objectNZ), ncol=ncol(objectNZ))
   dispIter <- numeric(nrow(objectNZ))
-  # bound the estimated count
+  # bound the estimated count by 'minmu'
   # this helps make the fitting more robust,
   # because 1/mu occurs in the weights for the NB GLM
-  minmu <- 0.5
   for (iter in seq_len(niter)) {
     if (!linearMu) {
       fit <- fitNbinomGLMs(objectNZ[fitidx,,drop=FALSE],
@@ -1241,7 +1248,11 @@ nbinomWaldTest <- function(object,
   # if useT is set to TRUE, use a t-distribution
   if (useT) {
     dispPriorVar <- attr( dispersionFunction(object), "dispPriorVar" )
-    stopifnot(length(df)==1)
+    stopifnot(length(df)==1 | length(df)==nrow(object))
+    if (length(df) == nrow(object)) {
+      df <- df[!mcols(object)$allZero]
+      stopifnot(length(df)==nrow(WaldStatistic))
+    }
     WaldPvalue <- 2*pt(abs(WaldStatistic),df=df,lower.tail=FALSE)
   } else {
     WaldPvalue <- 2*pnorm(abs(WaldStatistic),lower.tail=FALSE)
diff --git a/R/helper.R b/R/helper.R
index 6eeb73e..de54e62 100644
--- a/R/helper.R
+++ b/R/helper.R
@@ -1,77 +1,3 @@
-#' Shrink log2 fold changes
-#'
-#' This function adds shrunken log2 fold changes (LFC) to a
-#' results table which was run without LFC moderation.
-#' Note: this function is still being prototyped.
-#'
-#' @param dds a DESeqDataSet object, which has been run through
-#' \code{\link{DESeq}}, or at the least, \code{\link{estimateDispersions}}
-#' @param coef the number of the coefficient (LFC) to shrink,
-#' consult \code{resultsNames(dds)} after running \code{DESeq(dds, betaPrior=FALSE)}.
-#' only \code{coef} or \code{contrast} can be specified, not both
-#' @param contrast see argument description in \code{\link{results}}.
-#' only \code{coef} or \code{contrast} can be specified, not both
-#' @param res a DESeqResults object (can be missing)
-#' @param type at this time, ignored argument, because only one
-#' shrinkage estimator, but more to come!
-#'
-#' @return if \code{res} is not missing, a DESeqResults object with
-#' the \code{log2FoldChange} column replaced with a shrunken LFC.
-#' If \code{res} is missing, just the shrunken LFC vector.
-#'
-#' @export
-#' 
-#' @examples
-#'
-#'  dds <- makeExampleDESeqDataSet(betaSD=1)
-#'  dds <- DESeq(dds, betaPrior=FALSE)
-#'  res <- results(dds)
-#'  res.shr <- lfcShrink(dds=dds, coef=2, res=res)
-#'  res.shr <- lfcShrink(dds=dds, contrast=c("condition","B","A"), res=res)
-#' 
-lfcShrink <- function(dds, coef, contrast, res, type="normal") {
-  if (is.null(dispersions(dds))) {
-    stop("lfcShrink requires dispersion estimates, first call estimateDispersions()")
-  }
-
-  # match the shrinkage type
-  type <- match.arg(type, choices=c("normal"))
-
-  # fit MLE coefficients... TODO skip this step
-  dds <- estimateMLEForBetaPriorVar(dds)
-
-  stopifnot(missing(coef) | missing(contrast))
-  if (missing(contrast)) {
-    modelMatrixType <- "standard"
-  } else {
-    modelMatrixType <- "expanded"
-  }
-  attr(dds,"modelMatrixType") <- modelMatrixType
-  betaPriorVar <- estimateBetaPriorVar(dds)
-
-  dds.shr <- nbinomWaldTest(dds,
-                            betaPrior=TRUE,
-                            betaPriorVar=betaPriorVar,
-                            modelMatrixType=modelMatrixType,
-                            quiet=TRUE)
-
-  if (missing(contrast)) {
-    rn <- resultsNames(dds.shr)
-    res.shr <- results(dds.shr, name=rn[coef])
-  } else {
-    res.shr <- results(dds.shr, contrast=contrast)
-  }
-  
-  if (!missing(res)) {
-    res <- res[,c("baseMean","log2FoldChange","stat","pvalue","padj")]
-    res$log2FoldChange <- res.shr$log2FoldChange
-    mcols(res)$description[2] <- mcols(res.shr)$description[2]
-    return(res)
-  } else {
-    return(res.shr$log2FoldChange)
-  }
-}
-
 #' Unmix samples using loss in a variance stabilized space
 #'
 #' Unmixes samples in \code{x} according to \code{pure} components,
@@ -410,7 +336,7 @@ normalizeGeneLength <- function(...) {
 #' Normalized counts transformation
 #'
 #' A simple function for creating a \code{\link{DESeqTransform}}
-#' object after applying: f(count + pc).
+#' object after applying: \code{f(count(dds,normalized=TRUE) + pc)}.
 #' 
 #' @param object a DESeqDataSet object
 #' @param f a function to apply to normalized counts
diff --git a/R/lfcShrink.R b/R/lfcShrink.R
new file mode 100644
index 0000000..c6a5e89
--- /dev/null
+++ b/R/lfcShrink.R
@@ -0,0 +1,318 @@
+#' Shrink log2 fold changes
+#'
+#' Adds shrunken log2 fold changes (LFC) and SE to a
+#' results table from \code{DESeq} run without LFC shrinkage.
+#' Three shrinkage esimators for LFC are available via \code{type}.
+#'
+#' As of DESeq2 version 1.18, \code{type="apeglm"} and \code{type="ashr"}
+#' are new features, and still under development.
+#' Specifying \code{type="apeglm"} passes along DESeq2 MLE log2
+#' fold changes and standard errors to the \code{apeglm} function
+#' in the apeglm package, and re-estimates posterior LFCs for
+#' the coefficient specified by \code{coef}.
+#' Specifying \code{type="ashr"} passes along DESeq2 MLE log2
+#' fold changes and standard errors to the \code{ash} function
+#' in the ashr package, 
+#' with arguments \code{mixcompdist="normal"} and \code{method="shrink"}
+#' (\code{coef} and \code{contrast} ignored).
+#' See vignette for a comparison of shrinkage estimators on an example dataset.
+#' For all shrinkage methods, details on the prior is included in
+#' \code{priorInfo(res)}, including the \code{fitted_g} mixture for ashr.
+#' The integration of shrinkage methods from
+#' external packages will likely evolve over time. We will likely incorporate an
+#' \code{lfcThreshold} argument which can be passed to apeglm
+#' to specify regions of the posterior at an arbitrary threshold.
+#'
+#' For \code{type="normal"}, shrinkage cannot be applied to coefficients
+#' in a model with interaction terms.
+#' 
+#' @param dds a DESeqDataSet object, after running \code{\link{DESeq}}
+#' @param coef the name or number of the coefficient (LFC) to shrink,
+#' consult \code{resultsNames(dds)} after running \code{DESeq(dds)}.
+#' note: only \code{coef} or \code{contrast} can be specified, not both.
+#' \code{type="apeglm"} requires use of \code{coef}.
+#' @param contrast see argument description in \code{\link{results}}.
+#' only \code{coef} or \code{contrast} can be specified, not both.
+#' @param res a DESeqResults object. Results table produced by the
+#' default pipeline, i.e. \code{DESeq} followed by \code{results}.
+#' If not provided, it will be generated internally using \code{coef} or \code{contrast}
+#' @param type \code{"normal"} is the original DESeq2 shrinkage estimator;
+#' \code{"apeglm"} is the adaptive t prior shrinkage estimator from the 'apeglm' package;
+#' \code{"ashr"} is the adaptive shrinkage estimator from the 'ashr' package,
+#' using a fitted mixture of normals prior
+#' - see the Stephens (2016) reference below for citation
+#' @param svalue logical, should p-values and adjusted p-values be replaced
+#' with s-values when using \code{apeglm} or \code{ashr}.
+#' See Stephens (2016) reference on s-values.
+#' @param returnList logical, should \code{lfcShrink} return a list, where
+#' the first element is the results table, and the second element is the
+#' output of \code{apeglm} or \code{ashr}
+#' @param parallel if FALSE, no parallelization. if TRUE, parallel
+#' execution using \code{BiocParallel}, see same argument of \code{\link{DESeq}}
+#' parallelization only used with \code{normal} or \code{apeglm}
+#' @param BPPARAM see same argument of \code{\link{DESeq}}
+#' @param ... arguments passed to \code{apeglm} and \code{ashr}
+#'
+#' @references
+#'
+#' \code{type="normal"}:
+#'
+#' Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. \url{https://doi.org/10.1186/s13059-014-0550-8}
+#' 
+#' \code{type="ashr"}:
+#'
+#' Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2. \url{https://doi.org/10.1093/biostatistics/kxw041}
+#' 
+#' @return a DESeqResults object with the \code{log2FoldChange} and \code{lfcSE}
+#' columns replaced with shrunken LFC and SE.
+#' \code{priorInfo(res)} contains information about the shrinkage procedure,
+#' relevant to the various methods specified by \code{type}.
+#'
+#' @export
+#' 
+#' @examples
+#'
+#'  set.seed(1)
+#'  dds <- makeExampleDESeqDataSet(n=500,betaSD=1)
+#'  dds <- DESeq(dds)
+#'  res <- results(dds)
+#' 
+#'  res.shr <- lfcShrink(dds=dds, coef=2)
+#'  res.shr <- lfcShrink(dds=dds, contrast=c("condition","B","A"))
+#'  res.ape <- lfcShrink(dds=dds, coef=2, type="apeglm")
+#'  res.ash <- lfcShrink(dds=dds, coef=2, type="ashr")
+#' 
+lfcShrink <- function(dds, coef, contrast, res,
+                      type=c("normal","apeglm","ashr"),
+                      svalue=FALSE, returnList=FALSE,
+                      parallel=FALSE, BPPARAM=bpparam(),
+                      ...) {  
+
+  # TODO: lfcThreshold for types: normal and apeglm
+  
+  type <- match.arg(type, choices=c("normal","apeglm","ashr"))
+  if (attr(dds,"betaPrior")) {
+    stop("lfcShrink should be used downstream of DESeq() with betaPrior=FALSE (the default)")
+  }
+  if (!missing(coef)) {
+    if (is.numeric(coef)) {
+      stopifnot(coef <= length(resultsNames(dds)))
+      coefAlpha <- resultsNames(dds)[coef]
+      coefNum <- coef
+    } else if (is.character(coef)) {
+      stopifnot(coef %in% resultsNames(dds))
+      coefNum <- which(resultsNames(dds) == coef)
+      coefAlpha <- coef
+    }
+  }
+  if (missing(res)) {
+    if (!missing(coef)) {
+      res <- results(dds, name=coefAlpha)
+    } else if (!missing(contrast)) {
+      res <- results(dds, contrast=contrast)
+    } else {
+      stop("one of coef or contrast required if 'res' is missing")
+    }
+  }
+  if (type %in% c("normal","apeglm")) {
+    if (is.null(dispersions(dds))) {
+      stop("type='normal' and 'apeglm' require dispersion estimates, first call estimateDispersions()")
+    }
+    stopifnot(all(rownames(dds) == rownames(res)))
+    if (parallel) {
+      nworkers <- BPPARAM$workers
+      parallelIdx <- factor(sort(rep(seq_len(nworkers),length=nrow(dds))))
+    }
+  }
+  
+  if (type == "normal") {
+
+    ############
+    ## normal ##
+    ############
+    
+    termsOrder <- attr(terms.formula(design(dds)),"order")
+    interactionPresent <- any(termsOrder > 1)
+    if (interactionPresent) {
+      stop("LFC shrinkage type='normal' not implemented for designs with interactions")
+    }
+    stopifnot(missing(coef) | missing(contrast))
+    # find and rename the MLE columns for estimateBetaPriorVar
+    betaCols <- grep("log2 fold change \\(MLE\\)", mcols(mcols(dds))$description)
+    stopifnot(length(betaCols) > 0)
+    if (!any(grepl("MLE_",names(mcols(dds))[betaCols]))) {
+      names(mcols(dds))[betaCols] <- paste0("MLE_", names(mcols(dds))[betaCols])
+    }
+    if (missing(contrast)) {
+      modelMatrixType <- "standard"
+    } else {
+      modelMatrixType <- "expanded"
+    }
+    attr(dds,"modelMatrixType") <- modelMatrixType
+    betaPriorVar <- estimateBetaPriorVar(dds)
+    stopifnot(length(betaPriorVar) > 0)
+    # parallel fork
+    if (!parallel) {
+      dds.shr <- nbinomWaldTest(dds,
+                                betaPrior=TRUE,
+                                betaPriorVar=betaPriorVar,
+                                modelMatrixType=modelMatrixType,
+                                quiet=TRUE)
+    } else {
+      dds.shr <- do.call(rbind, bplapply(levels(parallelIdx), function(l) {
+        nbinomWaldTest(dds[parallelIdx == l,,drop=FALSE],
+                       betaPrior=TRUE,
+                       betaPriorVar=betaPriorVar,
+                       modelMatrixType=modelMatrixType,
+                       quiet=TRUE)
+      }, BPPARAM=BPPARAM))
+    }
+    if (missing(contrast)) {
+      # parallel not necessary here
+      res.shr <- results(dds.shr, name=coefAlpha)
+    } else {
+      res.shr <- results(dds.shr, contrast=contrast, parallel=parallel, BPPARAM=BPPARAM)
+    }
+    res$log2FoldChange <- res.shr$log2FoldChange
+    res$lfcSE <- res.shr$lfcSE
+    mcols(res)$description[2:3] <- mcols(res.shr)$description[2:3]
+    deseq2.version <- packageVersion("DESeq2")
+    priorInfo(res) <- list(type="normal",
+                           package="DESeq2",
+                           version=deseq2.version,
+                           betaPriorVar=betaPriorVar)
+    return(res)
+    
+  } else if (type == "apeglm") {
+
+    ############
+    ## apeglm ##
+    ############
+    
+    if (!requireNamespace("apeglm", quietly=TRUE)) {
+      stop("type='apeglm' requires installing the Bioconductor package 'apeglm'")
+    }
+    message("using 'apeglm' for LFC shrinkage")
+    if (!missing(contrast)) {
+      stop("type='apeglm' shrinkage only for use with 'coef'")
+    }
+    stopifnot(!missing(coef))
+    incomingCoef <- gsub(" ","_",sub("log2 fold change \\(MLE\\): ","",mcols(res)[2,2]))
+    if (coefAlpha != incomingCoef) {
+      stop("'coef' should specify same coefficient as in results 'res'")
+    }
+    Y <- counts(dds)
+    if (attr(dds, "modelMatrixType") == "user-supplied") {
+      message("using supplied model matrix")
+      design <- attr(dds, "modelMatrix")
+    } else {
+      design <- model.matrix(design(dds), data=colData(dds))
+    }
+    disps <- dispersions(dds)
+    if (is.null(normalizationFactors(dds))) {
+      offset <- matrix(log(sizeFactors(dds)),
+                       nrow=nrow(dds), ncol=ncol(dds), byrow=TRUE)
+    } else {
+      offset <- log(normalizationFactors(dds))
+    }
+    if ("weights" %in% assayNames(dds)) {
+      weights <- assays(dds)[["weights"]]
+    } else {
+      weights <- matrix(1, nrow=nrow(dds), ncol=ncol(dds))
+    }
+    mle <- log(2) * cbind(res$log2FoldChange, res$lfcSE)
+    if (!parallel) {
+      fit <- apeglm::apeglm(Y=Y,
+                            x=design,
+                            log.lik=apeglm::logLikNB,
+                            param=disps,
+                            coef=coefNum,
+                            mle=mle,
+                            weights=weights,
+                            offset=offset, ...)                 
+    } else {
+      fitList <- bplapply(levels(parallelIdx), function(l) {
+        idx <- parallelIdx == l
+        apeglm::apeglm(Y=Y[idx,,drop=FALSE],
+                       x=design,
+                       log.lik=apeglm::logLikNB,
+                       param=disps[idx],
+                       coef=coefNum,
+                       mle=mle,
+                       weights=weights[idx,,drop=FALSE],
+                       offset=offset[idx,,drop=FALSE], ...)
+      })
+      fit <- list()
+      for (param in c("map","se","fsr","svalue","interval","diag")) {
+        fit[[param]] <- do.call(rbind, lapply(fitList, `[[`, param))
+      }
+      fit$prior.control <- fitList[[1]]$prior.control
+      fit$svalue <- apeglm::svalue(fit$fsr[,1])
+    }
+    stopifnot(nrow(fit$map) == nrow(dds))
+    conv <- fit$diag[,"conv"]
+    if (!all(conv[!is.na(conv)] == 0)) {
+      message("Some rows did not converge in finding the MAP")
+    }
+    res$log2FoldChange <- log2(exp(1)) * fit$map[,coefNum]
+    res$lfcSE <- log2(exp(1)) * fit$se[,coefNum]
+    mcols(res)$description[2] <- sub("MLE","MAP",mcols(res)$description[2])
+    if (svalue) {
+      coefAlphaSpaces <- gsub("_"," ",coefAlpha)
+      res <- res[,1:3]
+      res$svalue <- as.numeric(fit$svalue)
+      mcols(res)[4,] <- DataFrame(type="results",
+                                  description=paste("s-value:",coefAlphaSpaces))
+    } else{
+      res <- res[,c(1:3,5:6)]
+    }
+    priorInfo(res) <- list(type="apeglm",
+                           package="apeglm",
+                           version=packageVersion("apeglm"),
+                           prior.control=fit$prior.control)
+    if (returnList) {
+      return(list(res=res, fit=fit))
+    } else{
+      return(res)
+    }
+
+  } else if (type == "ashr") {
+
+    ##########
+    ## ashr ##
+    ##########
+    
+    if (!requireNamespace("ashr", quietly=TRUE)) {
+      stop("type='ashr' requires installing the CRAN package 'ashr'")
+    }
+    message("using 'ashr' for LFC shrinkage. If used in published research, please cite:
+    Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
+    https://doi.org/10.1093/biostatistics/kxw041")
+    betahat <- res$log2FoldChange
+    sebetahat <- res$lfcSE
+    fit <- ashr::ash(betahat, sebetahat,
+                     mixcompdist="normal", method="shrink", ...)
+    res$log2FoldChange <- fit$result$PosteriorMean
+    res$lfcSE <- fit$result$PosteriorSD
+    mcols(res)$description[2] <- sub("MLE","PostMean",mcols(res)$description[2])
+    if (svalue) {
+      coefAlphaSpaces <- sub(".*p-value: ","",mcols(res)$description[5])
+      res <- res[,1:3]
+      res$svalue <- fit$result$svalue
+      mcols(res)[4,] <- DataFrame(type="results",
+                                  description=paste("s-value:",coefAlphaSpaces))
+    } else {
+      res <- res[,c(1:3,5:6)]
+    }
+    priorInfo(res) <- list(type="ashr",
+                           package="ashr",
+                           version=packageVersion("ashr"),
+                           fitted_g=fit$fitted_g)
+    if (returnList) {
+      return(list(res=res, fit=fit))
+    } else{
+      return(res)
+    }
+
+  }
+}
diff --git a/R/methods.R b/R/methods.R
index 6d3f1a5..c7d4652 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -572,7 +572,9 @@ checkForExperimentalReplicates <- function(object, modelMatrix) {
     if (!is.null(modelMatrix)) stop("same number of samples and coefficients to fit with supplied model matrix")
     warning("same number of samples and coefficients to fit,
   estimating dispersion by treating samples as replicates.
-  read the ?DESeq section on 'Experiments without replicates'")
+  please read the ?DESeq section on 'Experiments without replicates'.
+  in summary: this analysis only potentially useful for data exploration,
+  accurate differential expression analysis requires replication")
   }
   noReps
 }
@@ -689,8 +691,6 @@ setMethod("show", signature(object="DESeqResults"), function(object) {
   # Temporary hack for backward compatibility with "old" DESeqDataSet
   # objects. Remove once all serialized DESeqDataSet objects around have
   # been updated.
-  if (!.hasSlot(object, "rowRanges"))
-    object <- updateObject(object)
   cat(mcols(object)$description[ colnames(object) == "log2FoldChange"],"\n")
   cat(mcols(object)$description[ colnames(object) == "pvalue"],"\n")
   show(DataFrame(object))
@@ -808,3 +808,29 @@ summary.DESeqResults <- function(object, alpha, ...) {
   if (ihw) cat("[2] see metadata(res)$ihwResult on hypothesis weighting\n")
   cat("\n")
 }
+
+#' Accessors for the 'priorInfo' slot of a DESeqResults object.
+#' 
+#' The priorInfo slot contains details about the prior on log fold changes
+#' 
+#' @docType methods
+#' @name priorInfo
+#' @rdname priorInfo
+#' @aliases priorInfo priorInfo,DESeqResults-method priorInfo<-,DESeqResults,list-method
+#' 
+#' @param object a \code{DESeqResults} object
+#' @param value a \code{list}
+#' @param ... additional arguments
+#'
+#' @export
+setMethod("priorInfo", signature(object="DESeqResults"),
+          function(object) object at priorInfo)
+
+#' @name priorInfo
+#' @rdname priorInfo
+#' @exportMethod "priorInfo<-"
+setReplaceMethod("priorInfo", signature(object="DESeqResults", value="list"),
+                 function(object, value) {
+                   object at priorInfo <- value
+                   object
+                 })
diff --git a/R/plots.R b/R/plots.R
index cf4e66e..7d70824 100644
--- a/R/plots.R
+++ b/R/plots.R
@@ -97,24 +97,27 @@ plotMA.DESeqResults <- function(object, alpha, main="", xlab="mean of normalized
       metadata(object)$alpha
     }
   }
+
+  # TODO: MA plot should use s-value if present in 'object'
+  
   df <- if (MLE) {
-    # test if MLE is there
-    if (is.null(object$lfcMLE)) {
-      stop("lfcMLE column is not present: you should first run results() with addMLE=TRUE")
-    }
-    data.frame(mean = object$baseMean,
-               lfc = object$lfcMLE,
-               isDE = ifelse(is.na(object$padj), FALSE, object$padj < alpha))
+          # test if MLE is there
+          if (is.null(object$lfcMLE)) {
+            stop("lfcMLE column is not present: you should first run results() with addMLE=TRUE")
+          }
+          data.frame(mean = object$baseMean,
+                     lfc = object$lfcMLE,
+                     isDE = ifelse(is.na(object$padj), FALSE, object$padj < alpha))
+        } else {
+          data.frame(mean = object$baseMean,
+                     lfc = object$log2FoldChange,
+                     isDE = ifelse(is.na(object$padj), FALSE, object$padj < alpha))
+        }
+  if (missing(ylim)) {
+    plotMA(df, main=main, xlab=xlab, ...)
   } else {
-    data.frame(mean = object$baseMean,
-               lfc = object$log2FoldChange,
-               isDE = ifelse(is.na(object$padj), FALSE, object$padj < alpha))
-  }
-    if (missing(ylim)) {
-      plotMA(df, main=main, xlab=xlab, ...)
-    } else {
-       plotMA(df, main=main, xlab=xlab, ylim=ylim, ...)
-    }  
+    plotMA(df, main=main, xlab=xlab, ylim=ylim, ...)
+  }  
 }
 
 #' MA-plot from base means and log fold changes
@@ -197,7 +200,7 @@ plotPCA.DESeqTransform = function(object, intgroup="condition", ntop=500, return
   
   # add the intgroup factors together to create a new grouping factor
   group <- if (length(intgroup) > 1) {
-    factor(apply( intgroup.df, 1, paste, collapse=" : "))
+    factor(apply( intgroup.df, 1, paste, collapse=":"))
   } else {
     colData(object)[[intgroup]]
   }
@@ -313,12 +316,12 @@ plotCounts <- function(dds, gene, intgroup="condition",
   } else if (length(intgroup) == 2) {
     lvls <- as.vector(t(outer(levels(colData(dds)[[intgroup[1]]]),
                               levels(colData(dds)[[intgroup[2]]]),
-                              function(x,y) paste(x,y,sep=" : "))))
+                              function(x,y) paste(x,y,sep=":"))))
     droplevels(factor(apply( as.data.frame(colData(dds)[, intgroup, drop=FALSE]),
-                            1, paste, collapse=" : "), levels=lvls))
+                            1, paste, collapse=":"), levels=lvls))
   } else {
     factor(apply( as.data.frame(colData(dds)[, intgroup, drop=FALSE]),
-                 1, paste, collapse=" : "))
+                 1, paste, collapse=":"))
   }
   data <- data.frame(count=cnts + pc, group=as.integer(group))
   logxy <- if (transform) "y" else "" 
diff --git a/R/results.R b/R/results.R
index b427f97..ef144b6 100644
--- a/R/results.R
+++ b/R/results.R
@@ -490,8 +490,18 @@ of length 3 to 'contrast' instead of using 'name'")
     res$pvalue[nowZero] <- 1
   }
 
+  # add prior information
+  deseq2.version <- packageVersion("DESeq2")
+  if (!attr(object,"betaPrior")) {
+    priorInfo <- list(type="none", package="DESeq2", version=deseq2.version)
+  } else {
+    betaPriorVar <- attr(object, "betaPriorVar")
+    priorInfo <- list(type="normal", package="DESeq2", version=deseq2.version,
+                      betaPriorVar=betaPriorVar)
+  }
+  
   # make results object
-  deseqRes <- DESeqResults(res)
+  deseqRes <- DESeqResults(res, priorInfo=priorInfo)
   
   # p-value adjustment
   if (missing(filterFun)) {
diff --git a/build/vignette.rds b/build/vignette.rds
index ac13429..99f023e 100644
Binary files a/build/vignette.rds and b/build/vignette.rds differ
diff --git a/inst/doc/DESeq2.R b/inst/doc/DESeq2.R
index a859340..ab31807 100644
--- a/inst/doc/DESeq2.R
+++ b/inst/doc/DESeq2.R
@@ -8,7 +8,9 @@ knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
 #                                colData = coldata,
 #                                design= ~ batch + condition)
 #  dds <- DESeq(dds)
-#  res <- results(dds, contrast=c("condition","treated","control"))
+#  res <- results(dds, contrast=c("condition","treat","ctrl"))
+#  resultsNames(dds)
+#  res <- lfcShrink(dds, coef=2)
 
 ## ----txiSetup------------------------------------------------------------
 library("tximport")
@@ -47,12 +49,13 @@ coldata <- read.csv(pasAnno, row.names=1)
 coldata <- coldata[,c("condition","type")]
 
 ## ----showPasilla---------------------------------------------------------
-head(cts)
-head(coldata)
+head(cts,2)
+coldata
 
 ## ----reorderPasila-------------------------------------------------------
-rownames(coldata) <- sub("fb","",rownames(coldata))
+rownames(coldata) <- sub("fb", "", rownames(coldata))
 all(rownames(coldata) %in% colnames(cts))
+all(rownames(coldata) == colnames(cts))
 cts <- cts[, rownames(coldata)]
 all(rownames(coldata) == colnames(cts))
 
@@ -100,13 +103,14 @@ ddsSE <- DESeqDataSet(se, design = ~ cell + dex)
 ddsSE
 
 ## ----prefilter-----------------------------------------------------------
-dds <- dds[ rowSums(counts(dds)) > 1, ]
+keep <- rowSums(counts(dds)) >= 10
+dds <- dds[keep,]
 
 ## ----factorlvl-----------------------------------------------------------
-dds$condition <- factor(dds$condition, levels=c("untreated","treated"))
+dds$condition <- factor(dds$condition, levels = c("untreated","treated"))
 
 ## ----relevel-------------------------------------------------------------
-dds$condition <- relevel(dds$condition, ref="untreated")
+dds$condition <- relevel(dds$condition, ref = "untreated")
 
 ## ----droplevels----------------------------------------------------------
 dds$condition <- droplevels(dds$condition)
@@ -118,7 +122,7 @@ res
 
 ## ----lfcShrink-----------------------------------------------------------
 resultsNames(dds)
-resLFC <- lfcShrink(dds, coef=2, res=res)
+resLFC <- lfcShrink(dds, coef=2)
 resLFC
 
 ## ----parallel, eval=FALSE------------------------------------------------
@@ -126,7 +130,7 @@ resLFC
 #  register(MulticoreParam(4))
 
 ## ----resOrder------------------------------------------------------------
-resOrdered <- res[order(res$padj),]
+resOrdered <- res[order(res$pvalue),]
 
 ## ----sumRes--------------------------------------------------------------
 summary(res)
@@ -156,6 +160,17 @@ plotMA(resLFC, ylim=c(-2,2))
 #  idx <- identify(res$baseMean, res$log2FoldChange)
 #  rownames(res)[idx]
 
+## ----warning=FALSE-------------------------------------------------------
+resApe <- lfcShrink(dds, coef=2, type="apeglm")
+resAsh <- lfcShrink(dds, coef=2, type="ashr")
+
+## ----fig.width=8, fig.height=3-------------------------------------------
+par(mfrow=c(1,3), mar=c(4,4,2,1))
+xlim <- c(1,1e5); ylim <- c(-3,3)
+plotMA(resLFC, xlim=xlim, ylim=ylim, main="normal")
+plotMA(resApe, xlim=xlim, ylim=ylim, main="apeglm")
+plotMA(resAsh, xlim=xlim, ylim=ylim, main="ashr")
+
 ## ----plotCounts----------------------------------------------------------
 plotCounts(dds, gene=which.min(res$padj), intgroup="condition")
 
@@ -184,6 +199,11 @@ colData(dds)
 ## ----copyMultifactor-----------------------------------------------------
 ddsMF <- dds
 
+## ----fixLevels-----------------------------------------------------------
+levels(ddsMF$type)
+levels(ddsMF$type) <- sub("-.*", "", levels(ddsMF$type))
+levels(ddsMF$type)
+
 ## ----replaceDesign-------------------------------------------------------
 design(ddsMF) <- formula(~ type + condition)
 ddsMF <- DESeq(ddsMF)
@@ -194,23 +214,21 @@ head(resMF)
 
 ## ----multiTypeResults----------------------------------------------------
 resMFType <- results(ddsMF,
-                     contrast=c("type", "single-read", "paired-end"))
+                     contrast=c("type", "single", "paired"))
 head(resMFType)
 
 ## ----rlogAndVST----------------------------------------------------------
+vsd <- vst(dds, blind=FALSE)
 rld <- rlog(dds, blind=FALSE)
-vsd <- varianceStabilizingTransformation(dds, blind=FALSE)
-vsd.fast <- vst(dds, blind=FALSE)
-head(assay(rld), 3)
+head(assay(vsd), 3)
 
 ## ----meansd--------------------------------------------------------------
 # this gives log2(n + 1)
 ntd <- normTransform(dds)
 library("vsn")
-notAllZero <- (rowSums(counts(dds))>0)
-meanSdPlot(assay(ntd)[notAllZero,])
-meanSdPlot(assay(rld[notAllZero,]))
-meanSdPlot(assay(vsd[notAllZero,]))
+meanSdPlot(assay(ntd))
+meanSdPlot(assay(vsd))
+meanSdPlot(assay(rld))
 
 ## ----heatmap-------------------------------------------------------------
 library("pheatmap")
@@ -219,18 +237,18 @@ select <- order(rowMeans(counts(dds,normalized=TRUE)),
 df <- as.data.frame(colData(dds)[,c("condition","type")])
 pheatmap(assay(ntd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
          cluster_cols=FALSE, annotation_col=df)
-pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,
-         cluster_cols=FALSE, annotation_col=df)
 pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
          cluster_cols=FALSE, annotation_col=df)
+pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,
+         cluster_cols=FALSE, annotation_col=df)
 
 ## ----sampleClust---------------------------------------------------------
-sampleDists <- dist(t(assay(rld)))
+sampleDists <- dist(t(assay(vsd)))
 
-## ----figHeatmapSamples---------------------------------------------------
+## ----figHeatmapSamples, fig.height=4, fig.width=6------------------------
 library("RColorBrewer")
 sampleDistMatrix <- as.matrix(sampleDists)
-rownames(sampleDistMatrix) <- paste(rld$condition, rld$type, sep="-")
+rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-")
 colnames(sampleDistMatrix) <- NULL
 colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
 pheatmap(sampleDistMatrix,
@@ -239,10 +257,10 @@ pheatmap(sampleDistMatrix,
          col=colors)
 
 ## ----figPCA--------------------------------------------------------------
-plotPCA(rld, intgroup=c("condition", "type"))
+plotPCA(vsd, intgroup=c("condition", "type"))
 
 ## ----figPCA2-------------------------------------------------------------
-pcaData <- plotPCA(rld, intgroup=c("condition", "type"), returnData=TRUE)
+pcaData <- plotPCA(vsd, intgroup=c("condition", "type"), returnData=TRUE)
 percentVar <- round(100 * attr(pcaData, "percentVar"))
 ggplot(pcaData, aes(PC1, PC2, color=condition, shape=type)) +
   geom_point(size=3) +
@@ -265,7 +283,7 @@ ggplot(pcaData, aes(PC1, PC2, color=condition, shape=type)) +
 #  resultsNames(dds)
 #  results(dds, contrast=c("group", "IB", "IA"))
 
-## ----interFig, echo=FALSE, results="hide"--------------------------------
+## ----interFig, echo=FALSE, results="hide", fig.height=3------------------
 npg <- 20
 mu <- 2^c(8,10,9,11,10,12)
 cond <- rep(rep(c("A","B"),each=npg),3)
@@ -273,7 +291,7 @@ geno <- rep(c("I","II","III"),each=2*npg)
 table(cond, geno)
 counts <- rnbinom(6*npg, mu=rep(mu,each=npg), size=1/.01)
 d <- data.frame(log2c=log2(counts+1), cond, geno)
-library(ggplot2)
+library("ggplot2")
 plotit <- function(d, title) {
   ggplot(d, aes(x=cond, y=log2c, group=geno)) + 
     geom_jitter(size=1.5, position = position_jitter(width=.15)) +
@@ -284,7 +302,7 @@ plotit <- function(d, title) {
 plotit(d, "Gene 1") + ylim(7,13)
 lm(log2c ~ cond + geno + geno:cond, data=d)
 
-## ----interFig2, echo=FALSE, results="hide"-------------------------------
+## ----interFig2, echo=FALSE, results="hide", fig.height=3-----------------
 mu[4] <- 2^12
 mu[6] <- 2^8
 counts <- rnbinom(6*npg, mu=rep(mu,each=npg), size=1/.01)
@@ -329,24 +347,18 @@ resNoFilt <- results(dds, independentFiltering=FALSE)
 addmargins(table(filtering=(res$padj < .1),
                  noFiltering=(resNoFilt$padj < .1)))
 
-## ----ddsNoPrior----------------------------------------------------------
-ddsNoPrior <- DESeq(dds, betaPrior=FALSE)
-
 ## ----lfcThresh-----------------------------------------------------------
 par(mfrow=c(2,2),mar=c(2,2,1,1))
-yl <- c(-2.5,2.5)
+ylim <- c(-2.5,2.5)
 resGA <- results(dds, lfcThreshold=.5, altHypothesis="greaterAbs")
-resLA <- results(ddsNoPrior, lfcThreshold=.5, altHypothesis="lessAbs")
+resLA <- results(dds, lfcThreshold=.5, altHypothesis="lessAbs")
 resG <- results(dds, lfcThreshold=.5, altHypothesis="greater")
 resL <- results(dds, lfcThreshold=.5, altHypothesis="less")
-plotMA(resGA, ylim=yl)
-abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
-plotMA(resLA, ylim=yl)
-abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
-plotMA(resG, ylim=yl)
-abline(h=.5,col="dodgerblue",lwd=2)
-plotMA(resL, ylim=yl)
-abline(h=-.5,col="dodgerblue",lwd=2)
+drawLines <- function() abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
+plotMA(resGA, ylim=ylim); drawLines()
+plotMA(resLA, ylim=ylim); drawLines()
+plotMA(resG, ylim=ylim); drawLines()
+plotMA(resL, ylim=ylim); drawLines()
 
 ## ----mcols---------------------------------------------------------------
 mcols(dds,use.names=TRUE)[1:4,1:4]
@@ -370,6 +382,11 @@ head(coef(dds))
 ## ----betaPriorVar--------------------------------------------------------
 attr(dds, "betaPriorVar")
 
+## ----priorInfo-----------------------------------------------------------
+priorInfo(resLFC)
+priorInfo(resApe)
+priorInfo(resAsh)
+
 ## ----dispPriorVar--------------------------------------------------------
 dispersionFunction(dds)
 attr(dispersionFunction(dds), "dispPriorVar")
@@ -478,8 +495,8 @@ dds2 <- makeExampleDESeqDataSet(n=1000,m=12,
 dds2 <- dds2[,7:12]
 dds2$condition <- rep("C",6)
 mcols(dds2) <- NULL
-dds <- cbind(dds1, dds2)
-rld <- rlog(dds, blind=FALSE, fitType="mean")
+dds12 <- cbind(dds1, dds2)
+rld <- rlog(dds12, blind=FALSE, fitType="mean")
 plotPCA(rld)
 
 ## ----convertNA, eval=FALSE-----------------------------------------------
diff --git a/inst/doc/DESeq2.Rmd b/inst/doc/DESeq2.Rmd
index 2969fc5..7e498fc 100644
--- a/inst/doc/DESeq2.Rmd
+++ b/inst/doc/DESeq2.Rmd
@@ -34,10 +34,6 @@ vignette: >
   %\VignetteEncoding[utf8]{inputenc}
 ---
 
-
-<!-- This is the source document -->
-
-
 ```{r setup, echo=FALSE, results="hide"}
 knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
                       dev="png",
@@ -46,11 +42,11 @@ knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
 
 # Standard workflow
 
-**If you use DESeq2 in published research, please cite:**
+**Note:** if you use DESeq2 in published research, please cite:
 
-> Love, M.I., Huber, W., Anders, S.,
-> Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2, 
-> *Genome Biology* 2014, **15**:550.
+> Love, M.I., Huber, W., Anders, S. (2014)
+> Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2.
+> *Genome Biology*, **15**:550.
 > [10.1186/s13059-014-0550-8](http://dx.doi.org/10.1186/s13059-014-0550-8)
 
 Other Bioconductor packages with similar aims are
@@ -70,14 +66,16 @@ you have a count matrix called `cts` and a table of sample
 information called `coldata`.  The `design` indicates how to model the
 samples, here, that we want to measure the effect of the condition,
 controlling for batch differences. The two factor variables `batch`
-and `condition` should be columns of `coldata`.
+and `condition` should  be columns of `coldata`.
 
 ```{r quickStart, eval=FALSE}
 dds <- DESeqDataSetFromMatrix(countData = cts,
                               colData = coldata,
                               design= ~ batch + condition)
 dds <- DESeq(dds)
-res <- results(dds, contrast=c("condition","treated","control"))
+res <- results(dds, contrast=c("condition","treat","ctrl"))
+resultsNames(dds)
+res <- lfcShrink(dds, coef=2)
 ```
 
 The following starting functions will be explained below:
@@ -183,7 +181,7 @@ package. This workflow allows users to import transcript abundance estimates
 from a variety of external software, including the following methods:
 
 * [Salmon](http://combine-lab.github.io/salmon/)
-  [@Patro2016Salmon]
+  [@Patro2017Salmon]
 * [Sailfish](http://www.cs.cmu.edu/~ckingsf/software/sailfish/)
   [@Patro2014Sailfish]
 * [kallisto](https://pachterlab.github.io/kallisto/about.html)
@@ -211,6 +209,15 @@ Note that the tximport-to-DESeq2 approach uses *estimated* gene
 counts from the transcript abundance quantifiers, but not *normalized*
 counts.
 
+A tutorial on how to use the *Salmon* software for quantifying
+transcript abundance can be
+found [here](https://combine-lab.github.io/salmon/getting_started/).
+We recommend using the `--gcBias` 
+[flag](http://salmon.readthedocs.io/en/latest/salmon.html#gcbias)
+which estimates a correction factor for systematic biases
+commonly present in RNA-seq data [@Love2016Modeling; @Patro2017Salmon], 
+unless you are certain that your data do not contain such bias.
+
 Here, we demonstrate how to import transcript abundances
 and construct of a gene-level *DESeqDataSet* object
 from *Salmon* `quant.sf` files, which are
@@ -224,7 +231,6 @@ For a typical use, the `condition` information should already be
 present as a column of the sample table `samples`, while here we
 construct artificial condition labels for demonstration.
 
-
 ```{r txiSetup}
 library("tximport")
 library("readr")
@@ -303,31 +309,39 @@ coldata <- read.csv(pasAnno, row.names=1)
 coldata <- coldata[,c("condition","type")]
 ```
 
-We examine the count matrix and column data to see if they are consisent:
+We examine the count matrix and column data to see if they are
+consistent in terms of sample order.
 
 ```{r showPasilla}
-head(cts)
-head(coldata)
+head(cts,2)
+coldata
 ```
 
 Note that these are not in the same order with respect to samples! 
 
-It is critical that the columns of the count matrix and the rows of
-the column data (information about samples) are in the same order.
-We should re-arrange one or the other so that they are consistent in
-terms of sample order (if we do not, later functions would produce
-an error). We additionally need to chop off the `"fb"` of the 
-row names of `coldata`, so the naming is consistent.
+It is absolutely critical that the columns of the count matrix and the
+rows of the column data (information about samples) are in the same
+order.  DESeq2 will not make guesses as to which column of the count
+matrix belongs to which row of the column data, these must be provided
+to DESeq2 already in consistent order.
+
+As they are not in the correct order as given, we need to re-arrange
+one or the other so that they are consistent in terms of sample order
+(if we do not, later functions would produce an error). We
+additionally need to chop off the `"fb"` of the row names of
+`coldata`, so the naming is consistent.
 
 ```{r reorderPasila}
-rownames(coldata) <- sub("fb","",rownames(coldata))
+rownames(coldata) <- sub("fb", "", rownames(coldata))
 all(rownames(coldata) %in% colnames(cts))
+all(rownames(coldata) == colnames(cts))
 cts <- cts[, rownames(coldata)]
 all(rownames(coldata) == colnames(cts))
 ```
 
 If you have used the *featureCounts* function [@Liao2013feature] in the 
-[Rsubread](http://bioconductor.org/packages/Rsubread) package, the matrix of read counts can be directly 
+[Rsubread](http://bioconductor.org/packages/Rsubread) package, 
+the matrix of read counts can be directly 
 provided from the `"counts"` element in the list output.
 The count matrix and column data can typically be read into R 
 from flat files using base R functions such as *read.csv*
@@ -433,20 +447,20 @@ ddsSE
 
 ### Pre-filtering
 
-While it is not necessary to pre-filter low count genes before running the DESeq2
-functions, there are two reasons which make pre-filtering useful:
-by removing rows in which there are no reads or nearly no reads,
-we reduce the memory size of the `dds` data object and 
-we increase the speed of the transformation
-and testing functions within DESeq2. Here we perform a minimal
-pre-filtering to remove rows that have only 0 or 1 read. Note that more strict
-filtering to increase power is *automatically* 
-applied via [independent filtering](#indfilt) on the mean of
-normalized counts within the *results* function. 
+While it is not necessary to pre-filter low count genes before running
+the DESeq2 functions, there are two reasons which make pre-filtering
+useful: by removing rows in which there are very few reads, we reduce
+the memory size of the `dds` data object, and we increase the speed of
+the transformation and testing functions within DESeq2. Here we
+perform a minimal pre-filtering to keep only rows that have at least
+10 reads total. Note that more strict filtering to increase power is
+*automatically* applied via [independent filtering](#indfilt) on the
+mean of normalized counts within the *results* function.
 
 ```{r prefilter}
-dds <- dds[ rowSums(counts(dds)) > 1, ]
-``` 
+keep <- rowSums(counts(dds)) >= 10
+dds <- dds[keep,]
+```
 
 ### Note on factor levels 
 
@@ -457,17 +471,19 @@ control group), the comparisons will be based on the alphabetical
 order of the levels. There are two solutions: you can either
 explicitly tell *results* which comparison to make using the
 `contrast` argument (this will be shown later), or you can explicitly
-set the factors levels. Setting the factor levels can be done in two
-ways, either using factor:
+set the factors levels. You should only change the factor levels
+of variables in the design **before** running the DESeq2 analysis, not
+during or afterward. Setting the factor levels can be done in two
+ways, either using factor: 
 
 ```{r factorlvl}
-dds$condition <- factor(dds$condition, levels=c("untreated","treated"))
+dds$condition <- factor(dds$condition, levels = c("untreated","treated"))
 ``` 
 
 ...or using *relevel*, just specifying the reference level:
 
 ```{r relevel}
-dds$condition <- relevel(dds$condition, ref="untreated")
+dds$condition <- relevel(dds$condition, ref = "untreated")
 ``` 
 
 If you need to subset the columns of a *DESeqDataSet*,
@@ -548,29 +564,33 @@ fold changes.
 
 ```{r lfcShrink}
 resultsNames(dds)
-resLFC <- lfcShrink(dds, coef=2, res=res)
+resLFC <- lfcShrink(dds, coef=2)
 resLFC
 ```
 
-The above steps should take less than 30 seconds for most analyses. For
-experiments with many samples (e.g. 100 samples), one can take
-advantage of parallelized computation.  Both of the above functions
-have an argument `parallel` which if set to `TRUE` can
-be used to distribute computation across cores specified by the
-*register* function of [BiocParallel](http://bioconductor.org/packages/BiocParallel). For example,
-the following chunk (not evaluated here), would register 4 cores, and
-then the two functions above, with `parallel=TRUE`, would
-split computation over these cores. 
+<a name="parallel"/>
+
+The above steps should take less than 30 seconds for most
+analyses. For experiments with many samples (e.g. 100 samples), one
+can take advantage of parallelized computation. Parallelizing `DESeq`,
+`results`, and `lfcShrink` can be easily accomplished by loading the
+BiocParallel package, and then setting the following arguments:
+`parallel=TRUE` and `BPPARAM=MulticoreParam(4)`, for example,
+splitting the job over 4 cores. Note that `results` for coefficients
+or contrasts listed in `resultsNames(dds)` is fast and will not need
+parallelization. As an alternative to `BPPARAM`, one can `register`
+cores at the beginning of an analysis, and then just specify
+`parallel=TRUE` to the functions when called.
 
 ```{r parallel, eval=FALSE}
 library("BiocParallel")
 register(MulticoreParam(4))
 ```
 
-We can order our results table by the smallest adjusted *p* value:
+We can order our results table by the smallest *p* value:
 
 ```{r resOrder}
-resOrdered <- res[order(res$padj),]
+resOrdered <- res[order(res$pvalue),]
 ```
 
 We can summarize some basic tallies using the
@@ -606,12 +626,22 @@ sum(res05$padj < 0.05, na.rm=TRUE)
 
 <a name="IHW"/>
 
-A generalization of the idea of *p* value filtering is to *weight* hypotheses
-to optimize power. A Bioconductor package, [IHW](http://bioconductor.org/packages/IHW), is available
-that implements the method of *Independent Hypothesis Weighting* [@Ignatiadis2015].
-Here we show the use of *IHW* for *p* value adjustment of DESeq2 results.
-For more details, please see the vignette of the [IHW](http://bioconductor.org/packages/IHW) package.
-Note that the *IHW* result object is stored in the metadata.
+A generalization of the idea of *p* value filtering is to *weight*
+hypotheses to optimize power. A Bioconductor
+package, [IHW](http://bioconductor.org/packages/IHW), is available
+that implements the method of *Independent Hypothesis Weighting*
+[@Ignatiadis2016].  Here we show the use of *IHW* for *p* value
+adjustment of DESeq2 results.  For more details, please see the
+vignette of the [IHW](http://bioconductor.org/packages/IHW)
+package. The *IHW* result object is stored in the metadata.
+
+**Note:** If the results of independent hypothesis weighting are used
+in published research, please cite:
+
+> Ignatiadis, N., Klaus, B., Zaugg, J.B., Huber, W. (2016)
+> Data-driven hypothesis weighting increases detection power in genome-scale multiple testing.
+> *Nature Methods*, **13**:7.
+> [10.1038/nmeth.3885](http://dx.doi.org/10.1038/nmeth.3885)
 
 ```{r IHW}
 library("IHW")
@@ -646,7 +676,7 @@ either up or down.
 plotMA(res, ylim=c(-2,2))
 ```
 
-It is also useful to visualize the MA-plot for the shrunken log2 fold
+It is more useful visualize the MA-plot for the shrunken log2 fold
 changes, which remove the noise associated with log2 fold changes from
 low count genes without requiring arbitrary filtering thresholds.
 
@@ -664,6 +694,75 @@ idx <- identify(res$baseMean, res$log2FoldChange)
 rownames(res)[idx]
 ``` 
 
+### Alternative shrinkage estimators
+
+The moderated log fold changes proposed by @Love2014 use a normal
+prior distribution, centered on zero and with a scale that is fit to
+the data. The shrunken log fold changes are useful for ranking and
+visualization, without the need for arbitrary filters on low count
+genes. The normal prior can sometimes produce too strong of
+shrinkage for certain datasets. In DESeq2 version 1.18, we include two
+additional adaptive shrinkage estimators, available via the `type`
+argument of `lfcShrink`. For more details, see `?lfcShrink`
+
+The options for `type` are:
+
+* `normal` is the the original DESeq2 shrinkage estimator, an adaptive
+  normal prior
+* `apeglm` is the adaptive t prior shrinkage estimator from the 
+  [apeglm](http://bioconductor.org/packages/apeglm) package
+* `ashr` is the adaptive shrinkage estimator from the
+  [ashr](https://github.com/stephens999/ashr) package [@Stephens2016].
+  Here DESeq2 uses the ashr option to fit a mixture of normal distributions to
+  form the prior, with `method="shrinkage"`
+
+**Note:** if the shrinkage estimator `type="ashr"` is used in published
+research, please cite:
+
+> Stephens, M. (2016) 
+> False discovery rates: a new deal. *Biostatistics*, **18**:2.
+> [10.1093/biostatistics/kxw041](https://doi.org/10.1093/biostatistics/kxw041)
+
+```{r warning=FALSE}
+resApe <- lfcShrink(dds, coef=2, type="apeglm")
+resAsh <- lfcShrink(dds, coef=2, type="ashr")
+```
+
+```{r fig.width=8, fig.height=3}
+par(mfrow=c(1,3), mar=c(4,4,2,1))
+xlim <- c(1,1e5); ylim <- c(-3,3)
+plotMA(resLFC, xlim=xlim, ylim=ylim, main="normal")
+plotMA(resApe, xlim=xlim, ylim=ylim, main="apeglm")
+plotMA(resAsh, xlim=xlim, ylim=ylim, main="ashr")
+```
+
+**Note:** due to the nature of the statistical model and optimization
+approach, `apeglm` is usually a factor of ~5 slower than `normal`. For
+example, with 10,000 genes and 10 samples, `normal` may take ~3
+seconds, while `apeglm` takes ~15 seconds (on a laptop).  However,
+`apeglm` can be more than an order of magnitude slower when there are
+many coefficients, e.g. 10 or more coefficients in
+`resultsNames(dds)`. The method `ashr` is fairly fast and does not
+depend on the number of coefficients, as it uses only the estimated
+MLE coefficients and their standard errors. A solution for speeding up
+`normal` and `apeglm` is to use multiple cores. This can be easily
+accomplished by loading the BiocParallel package, and then setting the
+following arguments of `lfcShrink`: `parallel=TRUE` and
+`BPPARAM=MulticoreParam(4)`, for example, splitting the job over 4
+cores. This approach can also be used with `DESeq` and `results`, as
+mentioned [above](#parallel).
+
+**Note:** If there is unwanted variation present in the data (e.g. batch
+effects) it is always recommend to correct for this, which can be
+accommodated in DESeq2 by including in the design any known batch
+variables or by using functions/packages such as 
+`svaseq` in [sva](http://bioconductor.org/packages/sva) [@Leek2014] or 
+the `RUV` functions in [RUVSeq](http://bioconductor.org/packages/RUVSeq) [@Risso2014]
+to estimate variables that capture the unwanted variation.
+In addition, the ashr developers have a 
+[specific method](https://github.com/dcgerard/vicar) 
+for accounting for unwanted variation in combination with ashr [@Gerard2017].
+
 ### Plot counts 
 
 It can also be useful to examine the counts of reads for a single gene
@@ -808,11 +907,22 @@ the analysis using a multi-factor design.
 ddsMF <- dds
 ```
 
+We change the levels of `type` so it only contains letters (numbers, underscore and
+period are also allowed in design factor levels). Be careful when
+changing level names to use the same order as the current levels.
+
+```{r fixLevels}
+levels(ddsMF$type)
+levels(ddsMF$type) <- sub("-.*", "", levels(ddsMF$type))
+levels(ddsMF$type)
+```
+
 We can account for the different types of sequencing, and get a clearer picture
 of the differences attributable to the treatment.  As `condition` is the
 variable of interest, we put it at the end of the formula. Thus the *results*
 function will by default pull the `condition` results unless 
 `contrast` or `name` arguments are specified. 
+
 Then we can re-run *DESeq*:
 
 ```{r replaceDesign}
@@ -837,7 +947,7 @@ described in the help page for *results* and [below](#contrasts)
 
 ```{r multiTypeResults}
 resMFType <- results(ddsMF,
-                     contrast=c("type", "single-read", "paired-end"))
+                     contrast=c("type", "single", "paired"))
 head(resMFType)
 ```
 
@@ -870,18 +980,19 @@ where *n* represents the count values and $n_0$ is a positive constant.
 
 In this section, we discuss two alternative
 approaches that offer more theoretical justification and a rational way
-of choosing the parameter equivalent to $n_0$ above.
-The *regularized logarithm* or *rlog* incorporates a prior on
-the sample differences [@Love2014], 
-and the other uses the concept of variance stabilizing
-transformations (VST) [@Tibshirani1988; @sagmb2003; @Anders:2010:GB].
+of choosing parameters equivalent to $n_0$ above.
+One makes use of the concept of variance stabilizing
+transformations (VST) [@Tibshirani1988; @sagmb2003; @Anders:2010:GB],
+and the other is the *regularized logarithm* or *rlog*, which
+incorporates a prior on the sample differences [@Love2014].
 Both transformations produce transformed data on the log2 scale
-which has been normalized with respect to library size.
+which has been normalized with respect to library size or other
+normalization factors.
 
-The point of these two transformations, the *rlog* and the VST,
+The point of these two transformations, the VST and the *rlog*,
 is to remove the dependence of the variance on the mean,
 particularly the high variance of the logarithm of count data when the
-mean is low. Both *rlog* and VST use the experiment-wide trend
+mean is low. Both VST and *rlog* use the experiment-wide trend
 of variance over mean, in order to transform the data to remove the
 experiment-wide trend. Note that we do not require or
 desire that all the genes have *exactly* the same variance after
@@ -897,12 +1008,12 @@ the *rlog* function might take too long, and so the *vst* function
 will be a faster choice. 
 The rlog and VST have similar properties, but the rlog requires
 fitting a shrinkage term for each sample and each gene which takes
-time.  See the DESeq2 paper for more discussion on the differences
+time. See the DESeq2 paper for more discussion on the differences
 [@Love2014].
 
 ### Blind dispersion estimation
 
-The two functions, *rlog* and *vst* have an argument
+The two functions, *vst* and *rlog* have an argument
 `blind`, for whether the transformation should be blind to the
 sample information specified by the design formula. When
 `blind` equals `TRUE` (the default), the functions
@@ -935,22 +1046,29 @@ experimental group in applying the transformation.
 These transformation functions return an object of class *DESeqTransform*
 which is a subclass of *RangedSummarizedExperiment*. 
 For ~20 samples, running on a newly created `DESeqDataSet`,
-*rlog* may take 30 seconds, 
-*varianceStabilizingTransformation* may take 5 seconds, and
-*vst* less than 1 second (by subsetting to 1000 genes for
-calculating the global dispersion trend).
-However, the running times are shorter and more similar with `blind=FALSE` and
+*rlog* may take 30 seconds, while *vst* takes less than 1 second.
+The running times are shorter when using `blind=FALSE` and
 if the function *DESeq* has already been run, because then
 it is not necessary to re-estimate the dispersion values.
 The *assay* function is used to extract the matrix of normalized values.
 
 ```{r rlogAndVST}
+vsd <- vst(dds, blind=FALSE)
 rld <- rlog(dds, blind=FALSE)
-vsd <- varianceStabilizingTransformation(dds, blind=FALSE)
-vsd.fast <- vst(dds, blind=FALSE)
-head(assay(rld), 3)
+head(assay(vsd), 3)
 ```
 
+### Variance stabilizing transformation
+
+Above, we used a parametric fit for the dispersion. In this case, the
+closed-form expression for the variance stabilizing transformation is
+used by the *vst* function. If a local fit is used (option
+`fitType="locfit"` to *estimateDispersions*) a numerical integration
+is used instead. The transformed data should be approximated variance
+stabilized and also includes correction for size factors or
+normalization factors. The transformed data is on the log2 scale for
+large counts.
+
 ### Regularized log transformation
 
 The function *rlog*, stands for *regularized log*,
@@ -979,16 +1097,6 @@ in sequencing depth. Without priors, this design matrix would lead to
 a non-unique solution, however the addition of a prior on
 non-intercept betas allows for a unique solution to be found. 
 
-### Variance stabilizing transformation
-
-Above, we used a parametric fit for the dispersion. In this case, the
-closed-form expression for the variance stabilizing transformation is
-used by *varianceStabilizingTransformation*, which is
-derived in the file `vst.pdf`, that is distributed in the
-package alongside this vignette. If a local fit is used (option
-`fitType="locfit"` to *estimateDispersions*) a numerical integration
-is used instead. 
-
 ### Effects of transformations on the variance
 
 The figure below plots the standard deviation of the transformed data,
@@ -1010,10 +1118,9 @@ differences due to the experimental conditions.
 # this gives log2(n + 1)
 ntd <- normTransform(dds)
 library("vsn")
-notAllZero <- (rowSums(counts(dds))>0)
-meanSdPlot(assay(ntd)[notAllZero,])
-meanSdPlot(assay(rld[notAllZero,]))
-meanSdPlot(assay(vsd[notAllZero,]))
+meanSdPlot(assay(ntd))
+meanSdPlot(assay(vsd))
+meanSdPlot(assay(rld))
 ```
 
 ## Data quality assessment by sample clustering and visualization
@@ -1043,34 +1150,33 @@ select <- order(rowMeans(counts(dds,normalized=TRUE)),
 df <- as.data.frame(colData(dds)[,c("condition","type")])
 pheatmap(assay(ntd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
          cluster_cols=FALSE, annotation_col=df)
-pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,
-         cluster_cols=FALSE, annotation_col=df)
 pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
          cluster_cols=FALSE, annotation_col=df)
+pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,
+         cluster_cols=FALSE, annotation_col=df)
 ```
 
 ### Heatmap of the sample-to-sample distances
 
-Another use of the transformed data is sample clustering. Here, we apply the
-*dist* function to the transpose of the transformed count matrix to get
-sample-to-sample distances. We could alternatively use the variance stabilized
-transformation here.
+Another use of the transformed data is sample clustering. Here, we
+apply the *dist* function to the transpose of the transformed count
+matrix to get sample-to-sample distances.
 
 ```{r sampleClust}
-sampleDists <- dist(t(assay(rld)))
+sampleDists <- dist(t(assay(vsd)))
 ```
 
-A heatmap of this distance matrix gives us an overview over similarities
-and dissimilarities between samples.
-We have to provide a hierarchical clustering `hc` to the heatmap
-function based on the sample distances, or else the heatmap
-function would calculate a clustering based on the distances between
-the rows/columns of the distance matrix.
+A heatmap of this distance matrix gives us an overview over
+similarities and dissimilarities between samples.  We have to provide
+a hierarchical clustering `hc` to the heatmap function based on the
+sample distances, or else the heatmap function would calculate a
+clustering based on the distances between the rows/columns of the
+distance matrix.
 
-```{r figHeatmapSamples}
+```{r figHeatmapSamples, fig.height=4, fig.width=6}
 library("RColorBrewer")
 sampleDistMatrix <- as.matrix(sampleDists)
-rownames(sampleDistMatrix) <- paste(rld$condition, rld$type, sep="-")
+rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-")
 colnames(sampleDistMatrix) <- NULL
 colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
 pheatmap(sampleDistMatrix,
@@ -1087,14 +1193,14 @@ components. This type of plot is useful for visualizing the overall
 effect of experimental covariates and batch effects.
 
 ```{r figPCA}
-plotPCA(rld, intgroup=c("condition", "type"))
+plotPCA(vsd, intgroup=c("condition", "type"))
 ```
 
 It is also possible to customize the PCA plot using the
 *ggplot* function.
 
 ```{r figPCA2}
-pcaData <- plotPCA(rld, intgroup=c("condition", "type"), returnData=TRUE)
+pcaData <- plotPCA(vsd, intgroup=c("condition", "type"), returnData=TRUE)
 percentVar <- round(100 * attr(pcaData, "percentVar"))
 ggplot(pcaData, aes(PC1, PC2, color=condition, shape=type)) +
   geom_point(size=3) +
@@ -1199,7 +1305,7 @@ Here, the y-axis represents log2(n+1), and each
 group has 20 samples (black dots). A red line connects the mean of the
 groups within each genotype. 
 
-```{r interFig, echo=FALSE, results="hide"}
+```{r interFig, echo=FALSE, results="hide", fig.height=3}
 npg <- 20
 mu <- 2^c(8,10,9,11,10,12)
 cond <- rep(rep(c("A","B"),each=npg),3)
@@ -1207,7 +1313,7 @@ geno <- rep(c("I","II","III"),each=2*npg)
 table(cond, geno)
 counts <- rnbinom(6*npg, mu=rep(mu,each=npg), size=1/.01)
 d <- data.frame(log2c=log2(counts+1), cond, geno)
-library(ggplot2)
+library("ggplot2")
 plotit <- function(d, title) {
   ggplot(d, aes(x=cond, y=log2c, group=geno)) + 
     geom_jitter(size=1.5, position = position_jitter(width=.15)) +
@@ -1231,7 +1337,7 @@ obtained by adding the main condition effect and the interaction
 term for that genotype.  Such a plot can be made using the
 *plotCounts* function as shown above.
 
-```{r interFig2, echo=FALSE, results="hide"}
+```{r interFig2, echo=FALSE, results="hide", fig.height=3}
 mu[4] <- 2^12
 mu[6] <- 2^8
 counts <- rnbinom(6*npg, mu=rep(mu,each=npg), size=1/.01)
@@ -1262,13 +1368,6 @@ can be found by typing `?results`. In particular, we show how to
 test for differences in the condition effect across genotype, and we
 show how to obtain the condition effect for non-reference genotypes.
 
-Note that for DESeq2 versions higher than 1.10, the *DESeq* function
-will turn off log fold change shrinkage (setting `betaPrior=FALSE`),
-for designs which contain an interaction term. Turning off the log
-fold change shrinkage allows the software to use standard model
-matrices (as would be produced by *model.matrix*), where the
-interaction coefficients are easier to interpret.
-
 ## Time-series experiments
 
 There are a number of ways to analyze time-series experiments,
@@ -1523,45 +1622,22 @@ specified by the `name` argument, and $x$ is the `lfcThreshold`.
 * `greater` - $\beta > x$
 * `less` - $\beta < -x$
 
-The test `altHypothesis="lessAbs"` requires that the user have
-run *DESeq* with the argument `betaPrior=FALSE`.  To
-understand the reason for this requirement, consider that during
-hypothesis testing, the null hypothesis is favored unless the data
-provide strong evidence to reject the null.  For this test, including
-a zero-centered prior on log fold change would favor the alternative
-hypothesis, shrinking log fold changes toward zero.  Removing the
-prior on log fold changes for tests of small log fold change allows
-for detection of only those genes where the data alone provides
-evidence against the null.
-
 The four possible values of `altHypothesis` are demonstrated
 in the following code and visually by MA-plots in the following figures.
-First we run *DESeq* and specify `betaPrior=FALSE` in order 
-to demonstrate `altHypothesis="lessAbs"`.
-
-```{r ddsNoPrior}
-ddsNoPrior <- DESeq(dds, betaPrior=FALSE)
-```
-
-In order to produce results tables for the following tests, the same arguments
-(except `ylim`) would be provided to the *results* function. 
 
 ```{r lfcThresh}
 par(mfrow=c(2,2),mar=c(2,2,1,1))
-yl <- c(-2.5,2.5)
+ylim <- c(-2.5,2.5)
 resGA <- results(dds, lfcThreshold=.5, altHypothesis="greaterAbs")
-resLA <- results(ddsNoPrior, lfcThreshold=.5, altHypothesis="lessAbs")
+resLA <- results(dds, lfcThreshold=.5, altHypothesis="lessAbs")
 resG <- results(dds, lfcThreshold=.5, altHypothesis="greater")
 resL <- results(dds, lfcThreshold=.5, altHypothesis="less")
-plotMA(resGA, ylim=yl)
-abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
-plotMA(resLA, ylim=yl)
-abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
-plotMA(resG, ylim=yl)
-abline(h=.5,col="dodgerblue",lwd=2)
-plotMA(resL, ylim=yl)
-abline(h=-.5,col="dodgerblue",lwd=2)
-``` 
+drawLines <- function() abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
+plotMA(resGA, ylim=ylim); drawLines()
+plotMA(resLA, ylim=ylim); drawLines()
+plotMA(resG, ylim=ylim); drawLines()
+plotMA(resL, ylim=ylim); drawLines()
+```
 
 <a name="access"/>
 
@@ -1622,6 +1698,17 @@ The beta prior variance $\sigma_r^2$ is stored as an attribute of the
 attr(dds, "betaPriorVar")
 ``` 
 
+General information about the prior used for log fold change shrinkage
+is also stored in a slot of the *DESeqResults* object. This would
+also contain information about what other packages were used
+for log2 fold change shrinkage.
+
+```{r priorInfo}
+priorInfo(resLFC)
+priorInfo(resApe)
+priorInfo(resAsh)
+```
+
 The dispersion prior variance $\sigma_d^2$ is stored as an
 attribute of the dispersion function:
 
@@ -1955,7 +2042,12 @@ version *DESeq*, are as follows:
 
 ## Methods changes since the 2014 DESeq2 paper
 
-* In version 1.16 (Novermber 2016), the log2 fold change 
+* In version 1.18 (November 2018), we add two 
+  [alternative shrinkage estimators](#alternative-shrinkage-estimators),
+  which can be used via `lfcShrink`: an estimator using a t prior from
+  the apeglm packages, and an estimator with a fitted mixture of
+  normals prior from the ashr package.
+* In version 1.16 (November 2016), the log2 fold change 
   shrinkage is no longer default for the *DESeq* and *nbinomWaldTest*
   functions, by setting the defaults of these to `betaPrior=FALSE`,
   and by introducing a separate function *lfcShrink*, which performs
@@ -1965,7 +2057,7 @@ version *DESeq*, are as follows:
   as an inference engine by a wider community, and certain sequencing
   datasets show better performance with the testing separated from the
   use of the LFC prior. Also, the separation of LFC shrinkage to a separate
-  function *lfcShrink* allows for easier methods development of
+  function `lfcShrink` allows for easier methods development of
   alternative effect size estimators.
 * A small change to the independent filtering routine: instead
   of taking the quantile of the filter (the mean of normalized counts) which
@@ -1978,7 +2070,7 @@ version *DESeq*, are as follows:
 * For the calculation of the beta prior variance, instead of
   matching the empirical quantile to the quantile of a Normal
   distribution, DESeq2 now uses the weighted quantile function
-  of the \CRANpkg{Hmisc} package. The weighting is described in the
+  of the Hmisc package. The weighting is described in the
   manual page for *nbinomWaldTest*.  The weights are the
   inverse of the expected variance of log counts (as used in the
   diagonals of the matrix $W$ in the GLM). The effect of the change
@@ -2033,28 +2125,19 @@ $$ W = \frac{c^t \beta}{\sqrt{c^t \Sigma c}} $$
 
 ## Expanded model matrices 
 
-DESeq2 uses *expanded model matrices* in conjunction with the log2
-fold change prior, in order to produce shrunken log2 fold change estimates and test 
-results which are independent of the choice of reference level. 
-Another way of saying this is that the shrinkage is *symmetric*
-with respect to all the levels of the factors in the design.
-The expanded model matrices differ from the standard model matrices, in that
-they have an indicator column (and therefore a coefficient) for
-each level of factors in the design formula in addition to an intercept. 
-Note that in version 1.10 and onward, standard model matrices are used for
-designs with interaction terms, as the shrinkage of log2 fold changes
-is not recommended for these designs.
-
-The expanded model matrices are not full rank, but a coefficient
-vector $\beta_i$ can still be found due to the zero-centered prior on
-non-intercept coefficients. The prior variance for the log2 fold
-changes is calculated by first generating maximum likelihood estimates
-for a standard model matrix. The prior variance for each level of a
-factor is then set as the average of the mean squared maximum
-likelihood estimates for each level and every possible contrast, such
-that that this prior value will be reference-level-independent. The
-`contrast` argument of the *results* function is
-used in order to generate comparisons of interest.
+For the specific combination of `lfcShrink` with the type `normal` and
+using `contrast`, DESeq2 uses *expanded model matrices* to produce
+shrunken log2 fold change estimates where the shrinkage is independent
+of the choice of reference level. In all other cases, DESeq2 uses
+standard model matrices, as produced by `model.matrix`.  The expanded
+model matrices differ from the standard model matrices, in that they
+have an indicator column (and therefore a coefficient) for each level
+of factors in the design formula in addition to an intercept. This is
+described in the DESeq2 paper, but the DESeq2 software package has
+moved away from this approach, with more support for shrinkage of
+individual coefficients (although the expanded model matrix approach
+is still supported using the above combination of functions and
+arguments).
 
 <a name="indfilttheory"/>
 
@@ -2243,8 +2326,8 @@ dds2 <- makeExampleDESeqDataSet(n=1000,m=12,
 dds2 <- dds2[,7:12]
 dds2$condition <- rep("C",6)
 mcols(dds2) <- NULL
-dds <- cbind(dds1, dds2)
-rld <- rlog(dds, blind=FALSE, fitType="mean")
+dds12 <- cbind(dds1, dds2)
+rld <- rlog(dds12, blind=FALSE, fitType="mean")
 plotPCA(rld)
 ``` 
 
@@ -2330,7 +2413,7 @@ See the help page for *results* for more details.
 See the manual page for *DESeq*, which links to the 
 subfunctions which are called in order, where complete details are
 listed. Also you can read the three steps listed in the 
-[the DESeq2 model](#theory) in this document.
+[DESeq2 model](#theory) in this document.
 
 
 ## Is there an official Galaxy tool for DESeq2?
@@ -2383,7 +2466,9 @@ feedback of many individuals, including but not limited to:
 
 The Bionconductor Core Team,
 Alejandro Reyes, Andrzej Oles, Aleksandra Pekowska, Felix Klein,
-Nikolaos Ignatiadis,
+Nikolaos Ignatiadis (IHW),
+Anqi Zhu (apeglm),
+Joseph Ibrahim (apeglm),
 Vince Carey,
 Owen Solberg,
 Ruping Sun,
@@ -2408,7 +2493,8 @@ Bjorn Gruning,
 Ryan McMinds,
 Paul Gordon,
 Leonardo Collado Torres,
-Enrico Ferrero.
+Enrico Ferrero,
+Peter Langfelder.
 
 # Session info
 
diff --git a/inst/doc/DESeq2.html b/inst/doc/DESeq2.html
index 5bf07da..3a11161 100644
--- a/inst/doc/DESeq2.html
+++ b/inst/doc/DESeq2.html
@@ -11,6 +11,7 @@
 
 <meta name="author" content="Michael I. Love, Simon Anders, and Wolfgang Huber" />
 
+<meta name="date" content="2017-10-30" />
 
 <title>Analyzing RNA-seq data with DESeq2</title>
 
@@ -156,10 +157,10 @@ $(document).ready(function () {
 
 <h1 class="title toc-ignore">Analyzing RNA-seq data with DESeq2</h1>
 <h4 class="author"><em>Michael I. Love, Simon Anders, and Wolfgang Huber</em></h4>
-<h4 class="date"><em>5 May 2017</em></h4>
+<h4 class="date"><em>30 October 2017</em></h4>
 <div class="abstract">
 <p class="abstract">Abstract</p>
-<p>A basic task in the analysis of count data from RNA-seq is the detection of differentially expressed genes. The count data are presented as a table which reports, for each sample, the number of sequence fragments that have been assigned to each gene. Analogous data also arise for other assay types, including comparative ChIP-Seq, HiC, shRNA screening, mass spectrometry. An important analysis question is the quantification and statistical inference of systematic changes between conditi [...]
+<p>A basic task in the analysis of count data from RNA-seq is the detection of differentially expressed genes. The count data are presented as a table which reports, for each sample, the number of sequence fragments that have been assigned to each gene. Analogous data also arise for other assay types, including comparative ChIP-Seq, HiC, shRNA screening, mass spectrometry. An important analysis question is the quantification and statistical inference of systematic changes between conditi [...]
 </div>
 
 </div>
@@ -184,6 +185,7 @@ $(document).ready(function () {
 <li><a href="#differential-expression-analysis">Differential expression analysis</a></li>
 <li><a href="#exploring-and-exporting-results">Exploring and exporting results</a><ul>
 <li><a href="#ma-plot">MA-plot</a></li>
+<li><a href="#alternative-shrinkage-estimators">Alternative shrinkage estimators</a></li>
 <li><a href="#plot-counts">Plot counts</a></li>
 <li><a href="#more-information-on-results-columns">More information on results columns</a></li>
 <li><a href="#rich-visualization-and-reporting-of-results">Rich visualization and reporting of results</a></li>
@@ -195,8 +197,8 @@ $(document).ready(function () {
 <li><a href="#count-data-transformations">Count data transformations</a><ul>
 <li><a href="#blind-dispersion-estimation">Blind dispersion estimation</a></li>
 <li><a href="#extracting-transformed-values">Extracting transformed values</a></li>
-<li><a href="#regularized-log-transformation">Regularized log transformation</a></li>
 <li><a href="#variance-stabilizing-transformation">Variance stabilizing transformation</a></li>
+<li><a href="#regularized-log-transformation">Regularized log transformation</a></li>
 <li><a href="#effects-of-transformations-on-the-variance">Effects of transformations on the variance</a></li>
 </ul></li>
 <li><a href="#data-quality-assessment-by-sample-clustering-and-visualization">Data quality assessment by sample clustering and visualization</a><ul>
@@ -260,12 +262,11 @@ $(document).ready(function () {
 </ul>
 </div>
 
-<!-- This is the source document -->
 <div id="standard-workflow" class="section level1">
 <h1>Standard workflow</h1>
-<p><strong>If you use DESeq2 in published research, please cite:</strong></p>
+<p><strong>Note:</strong> if you use DESeq2 in published research, please cite:</p>
 <blockquote>
-<p>Love, M.I., Huber, W., Anders, S., Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2, <em>Genome Biology</em> 2014, <strong>15</strong>:550. <a href="http://dx.doi.org/10.1186/s13059-014-0550-8">10.1186/s13059-014-0550-8</a></p>
+<p>Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. <em>Genome Biology</em>, <strong>15</strong>:550. <a href="http://dx.doi.org/10.1186/s13059-014-0550-8">10.1186/s13059-014-0550-8</a></p>
 </blockquote>
 <p>Other Bioconductor packages with similar aims are <a href="http://bioconductor.org/packages/edgeR">edgeR</a>, <a href="http://bioconductor.org/packages/limma">limma</a>, <a href="http://bioconductor.org/packages/DSS">DSS</a>, <a href="http://bioconductor.org/packages/EBSeq">EBSeq</a>, and <a href="http://bioconductor.org/packages/baySeq">baySeq</a>.</p>
 <div id="quick-start" class="section level2">
@@ -275,7 +276,9 @@ $(document).ready(function () {
                               <span class="dt">colData =</span> coldata,
                               <span class="dt">design=</span> ~<span class="st"> </span>batch +<span class="st"> </span>condition)
 dds <-<span class="st"> </span><span class="kw">DESeq</span>(dds)
-res <-<span class="st"> </span><span class="kw">results</span>(dds, <span class="dt">contrast=</span><span class="kw">c</span>(<span class="st">"condition"</span>,<span class="st">"treated"</span>,<span class="st">"control"</span>))</code></pre></div>
+res <-<span class="st"> </span><span class="kw">results</span>(dds, <span class="dt">contrast=</span><span class="kw">c</span>(<span class="st">"condition"</span>,<span class="st">"treat"</span>,<span class="st">"ctrl"</span>))
+<span class="kw">resultsNames</span>(dds)
+res <-<span class="st"> </span><span class="kw">lfcShrink</span>(dds, <span class="dt">coef=</span><span class="dv">2</span>)</code></pre></div>
 <p>The following starting functions will be explained below:</p>
 <ul>
 <li>If you have transcript quantification files, as produced by <em>Salmon</em>, <em>Sailfish</em>, or <em>kallisto</em>, you would use <em>DESeqDataSetFromTximport</em>.</li>
@@ -316,13 +319,14 @@ res <-<span class="st"> </span><span class="kw">results</span>(dds, <span cla
 <h3>Transcript abundance files and <em>tximport</em> input</h3>
 <p>A newer and recommended pipeline is to use fast transcript abundance quantifiers upstream of DESeq2, and then to create gene-level count matrices for use with DESeq2 by importing the quantification data using the <a href="http://bioconductor.org/packages/tximport">tximport</a> package. This workflow allows users to import transcript abundance estimates from a variety of external software, including the following methods:</p>
 <ul>
-<li><a href="http://combine-lab.github.io/salmon/">Salmon</a> <span class="citation">(Patro et al. 2016)</span></li>
+<li><a href="http://combine-lab.github.io/salmon/">Salmon</a> <span class="citation">(Patro et al. 2017)</span></li>
 <li><a href="http://www.cs.cmu.edu/~ckingsf/software/sailfish/">Sailfish</a> <span class="citation">(Patro, Mount, and Kingsford 2014)</span></li>
 <li><a href="https://pachterlab.github.io/kallisto/about.html">kallisto</a> <span class="citation">(Bray et al. 2016)</span></li>
 <li><a href="http://deweylab.github.io/RSEM/">RSEM</a> <span class="citation">(Li and Dewey 2011)</span></li>
 </ul>
 <p>Some advantages of using the above methods for transcript abundance estimation are: (i) this approach corrects for potential changes in gene length across samples (e.g. from differential isoform usage) <span class="citation">(Trapnell et al. 2013)</span>, (ii) some of these methods (<em>Salmon</em>, <em>Sailfish</em>, <em>kallisto</em>) are substantially faster and require less memory and disk usage compared to alignment-based methods that require creation and storage of BAM files, an [...]
 <p>Full details on the motivation and methods for importing transcript level abundance and count estimates, summarizing to gene-level count matrices and producing an offset which corrects for potential changes in average transcript length across samples are described in <span class="citation">(Soneson, Love, and Robinson 2015)</span>. Note that the tximport-to-DESeq2 approach uses <em>estimated</em> gene counts from the transcript abundance quantifiers, but not <em>normalized</em> counts.</p>
+<p>A tutorial on how to use the <em>Salmon</em> software for quantifying transcript abundance can be found <a href="https://combine-lab.github.io/salmon/getting_started/">here</a>. We recommend using the <code>--gcBias</code> <a href="http://salmon.readthedocs.io/en/latest/salmon.html#gcbias">flag</a> which estimates a correction factor for systematic biases commonly present in RNA-seq data <span class="citation">(Love, Hogenesch, and Irizarry 2016; Patro et al. 2017)</span>, unless you  [...]
 <p>Here, we demonstrate how to import transcript abundances and construct of a gene-level <em>DESeqDataSet</em> object from <em>Salmon</em> <code>quant.sf</code> files, which are stored in the <a href="http://bioconductor.org/packages/tximportData">tximportData</a> package. You do not need the <code>tximportData</code> package for your analysis, it is only used here for demonstration.</p>
 <p>Note that, instead of locating <code>dir</code> using <em>system.file</em>, a user would typically just provide a path, e.g. <code>/path/to/quant/files</code>. For a typical use, the <code>condition</code> information should already be present as a column of the sample table <code>samples</code>, while here we construct artificial condition labels for demonstration.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(<span class="st">"tximport"</span>)
@@ -368,35 +372,31 @@ pasAnno <-<span class="st"> </span><span class="kw">system.file</span>(<span
 cts <-<span class="st"> </span><span class="kw">as.matrix</span>(<span class="kw">read.csv</span>(pasCts,<span class="dt">sep=</span><span class="st">"</span><span class="ch">\t</span><span class="st">"</span>,<span class="dt">row.names=</span><span class="st">"gene_id"</span>))
 coldata <-<span class="st"> </span><span class="kw">read.csv</span>(pasAnno, <span class="dt">row.names=</span><span class="dv">1</span>)
 coldata <-<span class="st"> </span>coldata[,<span class="kw">c</span>(<span class="st">"condition"</span>,<span class="st">"type"</span>)]</code></pre></div>
-<p>We examine the count matrix and column data to see if they are consisent:</p>
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(cts)</code></pre></div>
+<p>We examine the count matrix and column data to see if they are consistent in terms of sample order.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(cts,<span class="dv">2</span>)</code></pre></div>
 <pre><code>##             untreated1 untreated2 untreated3 untreated4 treated1 treated2
 ## FBgn0000003          0          0          0          0        0        0
 ## FBgn0000008         92        161         76         70      140       88
-## FBgn0000014          5          1          0          0        4        0
-## FBgn0000015          0          2          1          2        1        0
-## FBgn0000017       4664       8714       3564       3150     6205     3072
-## FBgn0000018        583        761        245        310      722      299
 ##             treated3
 ## FBgn0000003        1
-## FBgn0000008       70
-## FBgn0000014        0
-## FBgn0000015        0
-## FBgn0000017     3334
-## FBgn0000018      308</code></pre>
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(coldata)</code></pre></div>
+## FBgn0000008       70</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">coldata</code></pre></div>
 <pre><code>##              condition        type
 ## treated1fb     treated single-read
 ## treated2fb     treated  paired-end
 ## treated3fb     treated  paired-end
 ## untreated1fb untreated single-read
 ## untreated2fb untreated single-read
-## untreated3fb untreated  paired-end</code></pre>
+## untreated3fb untreated  paired-end
+## untreated4fb untreated  paired-end</code></pre>
 <p>Note that these are not in the same order with respect to samples!</p>
-<p>It is critical that the columns of the count matrix and the rows of the column data (information about samples) are in the same order. We should re-arrange one or the other so that they are consistent in terms of sample order (if we do not, later functions would produce an error). We additionally need to chop off the <code>"fb"</code> of the row names of <code>coldata</code>, so the naming is consistent.</p>
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">rownames</span>(coldata) <-<span class="st"> </span><span class="kw">sub</span>(<span class="st">"fb"</span>,<span class="st">""</span>,<span class="kw">rownames</span>(coldata))
+<p>It is absolutely critical that the columns of the count matrix and the rows of the column data (information about samples) are in the same order. DESeq2 will not make guesses as to which column of the count matrix belongs to which row of the column data, these must be provided to DESeq2 already in consistent order.</p>
+<p>As they are not in the correct order as given, we need to re-arrange one or the other so that they are consistent in terms of sample order (if we do not, later functions would produce an error). We additionally need to chop off the <code>"fb"</code> of the row names of <code>coldata</code>, so the naming is consistent.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">rownames</span>(coldata) <-<span class="st"> </span><span class="kw">sub</span>(<span class="st">"fb"</span>, <span class="st">""</span>, <span class="kw">rownames</span>(coldata))
 <span class="kw">all</span>(<span class="kw">rownames</span>(coldata) %in%<span class="st"> </span><span class="kw">colnames</span>(cts))</code></pre></div>
 <pre><code>## [1] TRUE</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">all</span>(<span class="kw">rownames</span>(coldata) ==<span class="st"> </span><span class="kw">colnames</span>(cts))</code></pre></div>
+<pre><code>## [1] FALSE</code></pre>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">cts <-<span class="st"> </span>cts[, <span class="kw">rownames</span>(coldata)]
 <span class="kw">all</span>(<span class="kw">rownames</span>(coldata) ==<span class="st"> </span><span class="kw">colnames</span>(cts))</code></pre></div>
 <pre><code>## [1] TRUE</code></pre>
@@ -488,15 +488,16 @@ ddsSE</code></pre></div>
 </div>
 <div id="pre-filtering" class="section level3">
 <h3>Pre-filtering</h3>
-<p>While it is not necessary to pre-filter low count genes before running the DESeq2 functions, there are two reasons which make pre-filtering useful: by removing rows in which there are no reads or nearly no reads, we reduce the memory size of the <code>dds</code> data object and we increase the speed of the transformation and testing functions within DESeq2. Here we perform a minimal pre-filtering to remove rows that have only 0 or 1 read. Note that more strict filtering to increase po [...]
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">dds <-<span class="st"> </span>dds[ <span class="kw">rowSums</span>(<span class="kw">counts</span>(dds)) ><span class="st"> </span><span class="dv">1</span>, ]</code></pre></div>
+<p>While it is not necessary to pre-filter low count genes before running the DESeq2 functions, there are two reasons which make pre-filtering useful: by removing rows in which there are very few reads, we reduce the memory size of the <code>dds</code> data object, and we increase the speed of the transformation and testing functions within DESeq2. Here we perform a minimal pre-filtering to keep only rows that have at least 10 reads total. Note that more strict filtering to increase powe [...]
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">keep <-<span class="st"> </span><span class="kw">rowSums</span>(<span class="kw">counts</span>(dds)) >=<span class="st"> </span><span class="dv">10</span>
+dds <-<span class="st"> </span>dds[keep,]</code></pre></div>
 </div>
 <div id="note-on-factor-levels" class="section level3">
 <h3>Note on factor levels</h3>
-<p>By default, R will choose a <em>reference level</em> for factors based on alphabetical order. Then, if you never tell the DESeq2 functions which level you want to compare against (e.g. which level represents the control group), the comparisons will be based on the alphabetical order of the levels. There are two solutions: you can either explicitly tell <em>results</em> which comparison to make using the <code>contrast</code> argument (this will be shown later), or you can explicitly s [...]
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">dds$condition <-<span class="st"> </span><span class="kw">factor</span>(dds$condition, <span class="dt">levels=</span><span class="kw">c</span>(<span class="st">"untreated"</span>,<span class="st">"treated"</span>))</code></pre></div>
+<p>By default, R will choose a <em>reference level</em> for factors based on alphabetical order. Then, if you never tell the DESeq2 functions which level you want to compare against (e.g. which level represents the control group), the comparisons will be based on the alphabetical order of the levels. There are two solutions: you can either explicitly tell <em>results</em> which comparison to make using the <code>contrast</code> argument (this will be shown later), or you can explicitly s [...]
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">dds$condition <-<span class="st"> </span><span class="kw">factor</span>(dds$condition, <span class="dt">levels =</span> <span class="kw">c</span>(<span class="st">"untreated"</span>,<span class="st">"treated"</span>))</code></pre></div>
 <p>…or using <em>relevel</em>, just specifying the reference level:</p>
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">dds$condition <-<span class="st"> </span><span class="kw">relevel</span>(dds$condition, <span class="dt">ref=</span><span class="st">"untreated"</span>)</code></pre></div>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">dds$condition <-<span class="st"> </span><span class="kw">relevel</span>(dds$condition, <span class="dt">ref =</span> <span class="st">"untreated"</span>)</code></pre></div>
 <p>If you need to subset the columns of a <em>DESeqDataSet</em>, i.e., when removing certain samples from the analysis, it is possible that all the samples for one or more levels of a variable in the design formula would be removed. In this case, the <em>droplevels</em> function can be used to remove those levels which do not have samples in the current <em>DESeqDataSet</em>:</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">dds$condition <-<span class="st"> </span><span class="kw">droplevels</span>(dds$condition)</code></pre></div>
 </div>
@@ -520,110 +521,128 @@ res <-<span class="st"> </span><span class="kw">results</span>(dds)
 res</code></pre></div>
 <pre><code>## log2 fold change (MLE): condition treated vs untreated 
 ## Wald test p-value: condition treated vs untreated 
-## DataFrame with 11638 rows and 6 columns
-##                 baseMean log2FoldChange     lfcSE         stat     pvalue
-##                <numeric>      <numeric> <numeric>    <numeric>  <numeric>
-## FBgn0000008   95.1440790    0.002151683 0.2238867  0.009610592 0.99233197
-## FBgn0000014    1.0565722   -0.496689957 2.1597256 -0.229978272 0.81810865
-## FBgn0000015    0.8467233   -1.882756713 2.1063362 -0.893853836 0.37140010
-## FBgn0000017 4352.5928988   -0.240025055 0.1260345 -1.904439437 0.05685298
-## FBgn0000018  418.6149305   -0.104798934 0.1482908 -0.706712077 0.47974542
-## ...                  ...            ...       ...          ...        ...
-## FBgn0261570  3208.384460     0.29543213 0.1270246   2.32578599 0.02002997
-## FBgn0261572     6.197137    -0.95912781 0.7769982  -1.23440151 0.21705333
-## FBgn0261573  2240.983986     0.01261611 0.1127225   0.11192186 0.91088536
-## FBgn0261574  4857.742672     0.01525741 0.1931199   0.07900487 0.93702875
-## FBgn0261575    10.683554     0.16355063 0.9386206   0.17424573 0.86167235
+## DataFrame with 9921 rows and 6 columns
+##                baseMean log2FoldChange     lfcSE        stat     pvalue
+##               <numeric>      <numeric> <numeric>   <numeric>  <numeric>
+## FBgn0000008   95.144292    0.002276428 0.2237292  0.01017493 0.99188172
+## FBgn0000014    1.056523   -0.495113878 2.1431096 -0.23102593 0.81729466
+## FBgn0000017 4352.553569   -0.239918945 0.1263378 -1.89902705 0.05756092
+## FBgn0000018  418.610484   -0.104673913 0.1484903 -0.70492106 0.48085936
+## FBgn0000024    6.406200    0.210848562 0.6895923  0.30575830 0.75978868
+## ...                 ...            ...       ...         ...        ...
+## FBgn0261570 3208.388610     0.29553289 0.1273514  2.32061001  0.0203079
+## FBgn0261572    6.197188    -0.95882276 0.7753130 -1.23669125  0.2162017
+## FBgn0261573 2240.979511     0.01271946 0.1133028  0.11226079  0.9106166
+## FBgn0261574 4857.680373     0.01539243 0.1925619  0.07993497  0.9362890
+## FBgn0261575   10.682520     0.16356865 0.9308661  0.17571663  0.8605166
 ##                  padj
 ##             <numeric>
-## FBgn0000008 0.9970815
+## FBgn0000008 0.9972093
 ## FBgn0000014        NA
-## FBgn0000015        NA
-## FBgn0000017 0.2862230
-## FBgn0000018 0.8282460
+## FBgn0000017 0.2880108
+## FBgn0000018 0.8268644
+## FBgn0000024 0.9435005
 ## ...               ...
-## FBgn0261570 0.1428209
-## FBgn0261572 0.6097343
-## FBgn0261573 0.9824950
-## FBgn0261574 0.9888664
-## FBgn0261575 0.9688434</code></pre>
+## FBgn0261570 0.1442486
+## FBgn0261572 0.6078453
+## FBgn0261573 0.9826550
+## FBgn0261574 0.9881787
+## FBgn0261575 0.9679223</code></pre>
 <p><a name="lfcShrink"></a></p>
 <p>In previous versions of DESeq2, the <em>DESeq</em> function by default would produce moderated, or shrunken, log2 fold changes through the use of the <code>betaPrior</code> argument. In version 1.16 and higher, we have split the moderation of log2 fold changes into a separate function, <em>lfcShrink</em>, for reasons described in the <a href="#changes">changes section</a> below.</p>
 <p>Here we provide the <code>dds</code> object and the number of the coefficient we want to moderate. It is also possible to specify a <code>contrast</code>, instead of <code>coef</code>, which works the same as the <code>contrast</code> argument of the <em>results</em> function. If a results object is provided, the <code>log2FoldChange</code> column will be swapped out, otherwise <em>lfcShrink</em> returns a vector of shrunken log2 fold changes.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">resultsNames</span>(dds)</code></pre></div>
 <pre><code>## [1] "Intercept"                      "condition_treated_vs_untreated"</code></pre>
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">resLFC <-<span class="st"> </span><span class="kw">lfcShrink</span>(dds, <span class="dt">coef=</span><span class="dv">2</span>, <span class="dt">res=</span>res)
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">resLFC <-<span class="st"> </span><span class="kw">lfcShrink</span>(dds, <span class="dt">coef=</span><span class="dv">2</span>)
 resLFC</code></pre></div>
 <pre><code>## log2 fold change (MAP): condition treated vs untreated 
 ## Wald test p-value: condition treated vs untreated 
-## DataFrame with 11638 rows and 5 columns
-##                 baseMean log2FoldChange         stat     pvalue      padj
-##                <numeric>      <numeric>    <numeric>  <numeric> <numeric>
-## FBgn0000008   95.1440790    0.001476959  0.009610592 0.99233197 0.9970815
-## FBgn0000014    1.0565722   -0.011952307 -0.229978272 0.81810865        NA
-## FBgn0000015    0.8467233   -0.046559241 -0.893853836 0.37140010        NA
-## FBgn0000017 4352.5928988   -0.209784559 -1.904439437 0.05685298 0.2862230
-## FBgn0000018  418.6149305   -0.087416357 -0.706712077 0.47974542 0.8282460
-## ...                  ...            ...          ...        ...       ...
-## FBgn0261570  3208.384460     0.25779079   2.32578599 0.02002997 0.1428209
-## FBgn0261572     6.197137    -0.14722257  -1.23440151 0.21705333 0.6097343
-## FBgn0261573  2240.983986     0.01131286   0.11192186 0.91088536 0.9824950
-## FBgn0261574  4857.742672     0.01140563   0.07900487 0.93702875 0.9888664
-## FBgn0261575    10.683554     0.01883364   0.17424573 0.86167235 0.9688434</code></pre>
-<p>The above steps should take less than 30 seconds for most analyses. For experiments with many samples (e.g. 100 samples), one can take advantage of parallelized computation. Both of the above functions have an argument <code>parallel</code> which if set to <code>TRUE</code> can be used to distribute computation across cores specified by the <em>register</em> function of <a href="http://bioconductor.org/packages/BiocParallel">BiocParallel</a>. For example, the following chunk (not eval [...]
+## DataFrame with 9921 rows and 6 columns
+##                baseMean log2FoldChange      lfcSE        stat     pvalue
+##               <numeric>      <numeric>  <numeric>   <numeric>  <numeric>
+## FBgn0000008   95.144292     0.00155932 0.15353974  0.01017493 0.99188172
+## FBgn0000014    1.056523    -0.01200958 0.05031619 -0.23102593 0.81729466
+## FBgn0000017 4352.553569    -0.20935007 0.11026137 -1.89902705 0.05756092
+## FBgn0000018  418.610484    -0.08715582 0.12358964 -0.70492106 0.48085936
+## FBgn0000024    6.406200     0.03956173 0.12886543  0.30575830 0.75978868
+## ...                 ...            ...        ...         ...        ...
+## FBgn0261570 3208.388610     0.25744403  0.1109237  2.32061001  0.0203079
+## FBgn0261572    6.197188    -0.14674317  0.1220335 -1.23669125  0.2162017
+## FBgn0261573 2240.979511     0.01138380  0.1014129  0.11226079  0.9106166
+## FBgn0261574 4857.680373     0.01150000  0.1438479  0.07993497  0.9362890
+## FBgn0261575   10.682520     0.01898573  0.1043720  0.17571663  0.8605166
+##                  padj
+##             <numeric>
+## FBgn0000008 0.9972093
+## FBgn0000014        NA
+## FBgn0000017 0.2880108
+## FBgn0000018 0.8268644
+## FBgn0000024 0.9435005
+## ...               ...
+## FBgn0261570 0.1442486
+## FBgn0261572 0.6078453
+## FBgn0261573 0.9826550
+## FBgn0261574 0.9881787
+## FBgn0261575 0.9679223</code></pre>
+<p><a name="parallel"></a></p>
+<p>The above steps should take less than 30 seconds for most analyses. For experiments with many samples (e.g. 100 samples), one can take advantage of parallelized computation. Parallelizing <code>DESeq</code>, <code>results</code>, and <code>lfcShrink</code> can be easily accomplished by loading the BiocParallel package, and then setting the following arguments: <code>parallel=TRUE</code> and <code>BPPARAM=MulticoreParam(4)</code>, for example, splitting the job over 4 cores. Note that  [...]
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(<span class="st">"BiocParallel"</span>)
 <span class="kw">register</span>(<span class="kw">MulticoreParam</span>(<span class="dv">4</span>))</code></pre></div>
-<p>We can order our results table by the smallest adjusted <em>p</em> value:</p>
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">resOrdered <-<span class="st"> </span>res[<span class="kw">order</span>(res$padj),]</code></pre></div>
+<p>We can order our results table by the smallest <em>p</em> value:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">resOrdered <-<span class="st"> </span>res[<span class="kw">order</span>(res$pvalue),]</code></pre></div>
 <p>We can summarize some basic tallies using the <em>summary</em> function.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">summary</span>(res)</code></pre></div>
 <pre><code>## 
-## out of 11638 with nonzero total read count
+## out of 9921 with nonzero total read count
 ## adjusted p-value < 0.1
-## LFC > 0 (up)     : 515, 4.4% 
-## LFC < 0 (down)   : 537, 4.6% 
-## outliers [1]     : 1, 0.0086% 
-## low counts [2]   : 3159, 27% 
+## LFC > 0 (up)     : 518, 5.2% 
+## LFC < 0 (down)   : 536, 5.4% 
+## outliers [1]     : 1, 0.01% 
+## low counts [2]   : 1539, 16% 
 ## (mean count < 6)
 ## [1] see 'cooksCutoff' argument of ?results
 ## [2] see 'independentFiltering' argument of ?results</code></pre>
 <p>How many adjusted p-values were less than 0.1?</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sum</span>(res$padj <<span class="st"> </span><span class="fl">0.1</span>, <span class="dt">na.rm=</span><span class="ot">TRUE</span>)</code></pre></div>
-<pre><code>## [1] 1052</code></pre>
+<pre><code>## [1] 1054</code></pre>
 <p>The <em>results</em> function contains a number of arguments to customize the results table which is generated. You can read about these arguments by looking up <code>?results</code>. Note that the <em>results</em> function automatically performs independent filtering based on the mean of normalized counts for each gene, optimizing the number of genes which will have an adjusted <em>p</em> value below a given FDR cutoff, <code>alpha</code>. Independent filtering is further discussed < [...]
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">res05 <-<span class="st"> </span><span class="kw">results</span>(dds, <span class="dt">alpha=</span><span class="fl">0.05</span>)
 <span class="kw">summary</span>(res05)</code></pre></div>
 <pre><code>## 
-## out of 11638 with nonzero total read count
+## out of 9921 with nonzero total read count
 ## adjusted p-value < 0.05
-## LFC > 0 (up)     : 408, 3.5% 
-## LFC < 0 (down)   : 433, 3.7% 
-## outliers [1]     : 1, 0.0086% 
-## low counts [2]   : 3159, 27% 
-## (mean count < 6)
+## LFC > 0 (up)     : 407, 4.1% 
+## LFC < 0 (down)   : 431, 4.3% 
+## outliers [1]     : 1, 0.01% 
+## low counts [2]   : 1347, 14% 
+## (mean count < 5)
 ## [1] see 'cooksCutoff' argument of ?results
 ## [2] see 'independentFiltering' argument of ?results</code></pre>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sum</span>(res05$padj <<span class="st"> </span><span class="fl">0.05</span>, <span class="dt">na.rm=</span><span class="ot">TRUE</span>)</code></pre></div>
-<pre><code>## [1] 841</code></pre>
+<pre><code>## [1] 838</code></pre>
 <p><a name="IHW"></a></p>
-<p>A generalization of the idea of <em>p</em> value filtering is to <em>weight</em> hypotheses to optimize power. A Bioconductor package, <a href="http://bioconductor.org/packages/IHW">IHW</a>, is available that implements the method of <em>Independent Hypothesis Weighting</em> <span class="citation">(Ignatiadis et al. 2015)</span>. Here we show the use of <em>IHW</em> for <em>p</em> value adjustment of DESeq2 results. For more details, please see the vignette of the <a href="http://bioc [...]
+<p>A generalization of the idea of <em>p</em> value filtering is to <em>weight</em> hypotheses to optimize power. A Bioconductor package, <a href="http://bioconductor.org/packages/IHW">IHW</a>, is available that implements the method of <em>Independent Hypothesis Weighting</em> <span class="citation">(Ignatiadis et al. 2016)</span>. Here we show the use of <em>IHW</em> for <em>p</em> value adjustment of DESeq2 results. For more details, please see the vignette of the <a href="http://bioc [...]
+<p><strong>Note:</strong> If the results of independent hypothesis weighting are used in published research, please cite:</p>
+<blockquote>
+<p>Ignatiadis, N., Klaus, B., Zaugg, J.B., Huber, W. (2016) Data-driven hypothesis weighting increases detection power in genome-scale multiple testing. <em>Nature Methods</em>, <strong>13</strong>:7. <a href="http://dx.doi.org/10.1038/nmeth.3885">10.1038/nmeth.3885</a></p>
+</blockquote>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(<span class="st">"IHW"</span>)
 resIHW <-<span class="st"> </span><span class="kw">results</span>(dds, <span class="dt">filterFun=</span>ihw)
 <span class="kw">summary</span>(resIHW)</code></pre></div>
 <pre><code>## 
-## out of 11638 with nonzero total read count
+## out of 9921 with nonzero total read count
 ## adjusted p-value < 0.1
-## LFC > 0 (up)     : 515, 4.4% 
-## LFC < 0 (down)   : 549, 4.7% 
-## outliers [1]     : 1, 0.0086% 
+## LFC > 0 (up)     : 504, 5.1% 
+## LFC < 0 (down)   : 540, 5.4% 
+## outliers [1]     : 1, 0.01% 
 ## [1] see 'cooksCutoff' argument of ?results
 ## [2] see metadata(res)$ihwResult on hypothesis weighting</code></pre>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sum</span>(resIHW$padj <<span class="st"> </span><span class="fl">0.1</span>, <span class="dt">na.rm=</span><span class="ot">TRUE</span>)</code></pre></div>
-<pre><code>## [1] 1064</code></pre>
+<pre><code>## [1] 1044</code></pre>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">metadata</span>(resIHW)$ihwResult</code></pre></div>
-<pre><code>## ihwResult object with 11638 hypothesis tests 
+<pre><code>## ihwResult object with 9921 hypothesis tests 
 ## Nominal FDR control level: 0.1 
-## Split into 7 bins, based on an ordinal covariate</code></pre>
+## Split into 6 bins, based on an ordinal covariate</code></pre>
 <p>If a multi-factor design is used, or if the variable in the design formula has more than two levels, the <code>contrast</code> argument of <em>results</em> can be used to extract different comparisons from the <em>DESeqDataSet</em> returned by <em>DESeq</em>. The use of the <code>contrast</code> argument is further discussed <a href="#contrasts">below</a>.</p>
 <p>For advanced users, note that all the values calculated by the DESeq2 package are stored in the <em>DESeqDataSet</em> object, and access to these values is discussed <a href="#access">below</a>.</p>
 </div>
@@ -633,19 +652,43 @@ resIHW <-<span class="st"> </span><span class="kw">results</span>(dds, <span
 <h3>MA-plot</h3>
 <p>In DESeq2, the function <em>plotMA</em> shows the log2 fold changes attributable to a given variable over the mean of normalized counts for all the samples in the <em>DESeqDataSet</em>. Points will be colored red if the adjusted <em>p</em> value is less than 0.1. Points which fall out of the window are plotted as open triangles pointing either up or down.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plotMA</span>(res, <span class="dt">ylim=</span><span class="kw">c</span>(-<span class="dv">2</span>,<span class="dv">2</span>))</code></pre></div>
-<p><img src=" [...]
-<p>It is also useful to visualize the MA-plot for the shrunken log2 fold changes, which remove the noise associated with log2 fold changes from low count genes without requiring arbitrary filtering thresholds.</p>
+<p><img src=" [...]
+<p>It is more useful visualize the MA-plot for the shrunken log2 fold changes, which remove the noise associated with log2 fold changes from low count genes without requiring arbitrary filtering thresholds.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plotMA</span>(resLFC, <span class="dt">ylim=</span><span class="kw">c</span>(-<span class="dv">2</span>,<span class="dv">2</span>))</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 <p>After calling <em>plotMA</em>, one can use the function <em>identify</em> to interactively detect the row number of individual genes by clicking on the plot. One can then recover the gene identifiers by saving the resulting indices:</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">idx <-<span class="st"> </span><span class="kw">identify</span>(res$baseMean, res$log2FoldChange)
 <span class="kw">rownames</span>(res)[idx]</code></pre></div>
 </div>
+<div id="alternative-shrinkage-estimators" class="section level3">
+<h3>Alternative shrinkage estimators</h3>
+<p>The moderated log fold changes proposed by <span class="citation">Love, Huber, and Anders (2014)</span> use a normal prior distribution, centered on zero and with a scale that is fit to the data. The shrunken log fold changes are useful for ranking and visualization, without the need for arbitrary filters on low count genes. The normal prior can sometimes produce too strong of shrinkage for certain datasets. In DESeq2 version 1.18, we include two additional adaptive shrinkage estimato [...]
+<p>The options for <code>type</code> are:</p>
+<ul>
+<li><code>normal</code> is the the original DESeq2 shrinkage estimator, an adaptive normal prior</li>
+<li><code>apeglm</code> is the adaptive t prior shrinkage estimator from the <a href="http://bioconductor.org/packages/apeglm">apeglm</a> package</li>
+<li><code>ashr</code> is the adaptive shrinkage estimator from the <a href="https://github.com/stephens999/ashr">ashr</a> package <span class="citation">(Stephens 2016)</span>. Here DESeq2 uses the ashr option to fit a mixture of normal distributions to form the prior, with <code>method="shrinkage"</code></li>
+</ul>
+<p><strong>Note:</strong> if the shrinkage estimator <code>type="ashr"</code> is used in published research, please cite:</p>
+<blockquote>
+<p>Stephens, M. (2016) False discovery rates: a new deal. <em>Biostatistics</em>, <strong>18</strong>:2. <a href="https://doi.org/10.1093/biostatistics/kxw041">10.1093/biostatistics/kxw041</a></p>
+</blockquote>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">resApe <-<span class="st"> </span><span class="kw">lfcShrink</span>(dds, <span class="dt">coef=</span><span class="dv">2</span>, <span class="dt">type=</span><span class="st">"apeglm"</span>)
+resAsh <-<span class="st"> </span><span class="kw">lfcShrink</span>(dds, <span class="dt">coef=</span><span class="dv">2</span>, <span class="dt">type=</span><span class="st">"ashr"</span>)</code></pre></div>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">par</span>(<span class="dt">mfrow=</span><span class="kw">c</span>(<span class="dv">1</span>,<span class="dv">3</span>), <span class="dt">mar=</span><span class="kw">c</span>(<span class="dv">4</span>,<span class="dv">4</span>,<span class="dv">2</span>,<span class="dv">1</span>))
+xlim <-<span class="st"> </span><span class="kw">c</span>(<span class="dv">1</span>,<span class="fl">1e5</span>); ylim <-<span class="st"> </span><span class="kw">c</span>(-<span class="dv">3</span>,<span class="dv">3</span>)
+<span class="kw">plotMA</span>(resLFC, <span class="dt">xlim=</span>xlim, <span class="dt">ylim=</span>ylim, <span class="dt">main=</span><span class="st">"normal"</span>)
+<span class="kw">plotMA</span>(resApe, <span class="dt">xlim=</span>xlim, <span class="dt">ylim=</span>ylim, <span class="dt">main=</span><span class="st">"apeglm"</span>)
+<span class="kw">plotMA</span>(resAsh, <span class="dt">xlim=</span>xlim, <span class="dt">ylim=</span>ylim, <span class="dt">main=</span><span class="st">"ashr"</span>)</code></pre></div>
+<p><img src=" [...]
+<p><strong>Note:</strong> due to the nature of the statistical model and optimization approach, <code>apeglm</code> is usually a factor of ~5 slower than <code>normal</code>. For example, with 10,000 genes and 10 samples, <code>normal</code> may take ~3 seconds, while <code>apeglm</code> takes ~15 seconds (on a laptop). However, <code>apeglm</code> can be more than an order of magnitude slower when there are many coefficients, e.g. 10 or more coefficients in <code>resultsNames(dds)</code [...]
+<p><strong>Note:</strong> If there is unwanted variation present in the data (e.g. batch effects) it is always recommend to correct for this, which can be accommodated in DESeq2 by including in the design any known batch variables or by using functions/packages such as <code>svaseq</code> in <a href="http://bioconductor.org/packages/sva">sva</a> <span class="citation">(Leek 2014)</span> or the <code>RUV</code> functions in <a href="http://bioconductor.org/packages/RUVSeq">RUVSeq</a> <spa [...]
+</div>
 <div id="plot-counts" class="section level3">
 <h3>Plot counts</h3>
 <p>It can also be useful to examine the counts of reads for a single gene across the groups. A simple function for making this plot is <em>plotCounts</em>, which normalizes counts by sequencing depth and adds a pseudocount of 1/2 to allow for log scale plotting. The counts are grouped by the variables in <code>intgroup</code>, where more than one variable can be specified. Here we specify the gene which had the smallest <em>p</em> value from the results table created above. You can selec [...]
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plotCounts</span>(dds, <span class="dt">gene=</span><span class="kw">which.min</span>(res$padj), <span class="dt">intgroup=</span><span class="st">"condition"</span>)</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 <p>For customized plotting, an argument <code>returnData</code> specifies that the function should only return a <em>data.frame</em> for plotting with <em>ggplot</em>.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">d <-<span class="st"> </span><span class="kw">plotCounts</span>(dds, <span class="dt">gene=</span><span class="kw">which.min</span>(res$padj), <span class="dt">intgroup=</span><span class="st">"condition"</span>, 
                 <span class="dt">returnData=</span><span class="ot">TRUE</span>)
@@ -653,7 +696,7 @@ resIHW <-<span class="st"> </span><span class="kw">results</span>(dds, <span
 <span class="kw">ggplot</span>(d, <span class="kw">aes</span>(<span class="dt">x=</span>condition, <span class="dt">y=</span>count)) +<span class="st"> </span>
 <span class="st">  </span><span class="kw">geom_point</span>(<span class="dt">position=</span><span class="kw">position_jitter</span>(<span class="dt">w=</span><span class="fl">0.1</span>,<span class="dt">h=</span><span class="dv">0</span>)) +<span class="st"> </span>
 <span class="st">  </span><span class="kw">scale_y_log10</span>(<span class="dt">breaks=</span><span class="kw">c</span>(<span class="dv">25</span>,<span class="dv">100</span>,<span class="dv">400</span>))</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 </div>
 <div id="more-information-on-results-columns" class="section level3">
 <h3>More information on results columns</h3>
@@ -691,33 +734,33 @@ resIHW <-<span class="st"> </span><span class="kw">results</span>(dds, <span
 resSig</code></pre></div>
 <pre><code>## log2 fold change (MLE): condition treated vs untreated 
 ## Wald test p-value: condition treated vs untreated 
-## DataFrame with 1052 rows and 6 columns
-##              baseMean log2FoldChange      lfcSE      stat        pvalue
-##             <numeric>      <numeric>  <numeric> <numeric>     <numeric>
-## FBgn0039155  730.5958      -4.619006 0.16872512 -27.37593 5.307306e-165
-## FBgn0025111 1501.4105       2.899863 0.12693550  22.84517 1.632133e-115
-## FBgn0029167 3706.1165      -2.197001 0.09701773 -22.64535 1.550285e-113
-## FBgn0003360 4343.0354      -3.179672 0.14352683 -22.15385 9.577104e-109
-## FBgn0035085  638.2326      -2.560409 0.13731558 -18.64617  1.356647e-77
-## ...               ...            ...        ...       ...           ...
-## FBgn0004359  83.96562      0.6448247  0.2573869  2.505274    0.01223565
-## FBgn0030026 212.16680      0.5660727  0.2260159  2.504571    0.01226001
-## FBgn0038874 103.79261     -0.6831454  0.2727706 -2.504469    0.01226354
-## FBgn0053329 602.55858     -0.4998614  0.1997516 -2.502415    0.01233494
-## FBgn0031183 428.52319     -0.3472728  0.1388560 -2.500957    0.01238581
+## DataFrame with 1054 rows and 6 columns
+##               baseMean log2FoldChange      lfcSE      stat        pvalue
+##              <numeric>      <numeric>  <numeric> <numeric>     <numeric>
+## FBgn0039155   730.5677      -4.618741 0.16912665 -27.30936 3.283533e-164
+## FBgn0025111  1501.4479       2.899946 0.12735926  22.76981 9.134947e-115
+## FBgn0029167  3706.0240      -2.196912 0.09792037 -22.43570 1.765125e-111
+## FBgn0003360  4342.8321      -3.179541 0.14356712 -22.14672 1.121885e-108
+## FBgn0035085   638.2193      -2.560242 0.13781525 -18.57735  4.901323e-77
+## ...                ...            ...        ...       ...           ...
+## FBgn0037073  973.10163     -0.2521459 0.10099319 -2.496662    0.01253684
+## FBgn0029976 2312.58850     -0.2211265 0.08858313 -2.496259    0.01255108
+## FBgn0030938   24.80638      0.9576449 0.38364585  2.496169    0.01255428
+## FBgn0034753 7775.27113      0.3935148 0.15767268  2.495770    0.01256840
+## FBgn0039260 1088.27659     -0.2592536 0.10387902 -2.495727    0.01256994
 ##                      padj
 ##                 <numeric>
-## FBgn0039155 4.499534e-161
-## FBgn0025111 6.918613e-112
-## FBgn0029167 4.381106e-110
-## FBgn0003360 2.029867e-105
-## FBgn0035085  2.300330e-74
+## FBgn0039155 2.751929e-160
+## FBgn0025111 3.827999e-111
+## FBgn0029167 4.931170e-108
+## FBgn0003360 2.350629e-105
+## FBgn0035085  8.215598e-74
 ## ...                   ...
-## FBgn0004359    0.09898268
-## FBgn0030026    0.09901933
-## FBgn0038874    0.09901933
-## FBgn0053329    0.09950107
-## FBgn0031183    0.09981644</code></pre>
+## FBgn0037073     0.0999513
+## FBgn0029976     0.0999513
+## FBgn0030938     0.0999513
+## FBgn0034753     0.0999513
+## FBgn0039260     0.0999513</code></pre>
 </div>
 </div>
 <div id="multi-factor-designs" class="section level2">
@@ -729,16 +772,23 @@ resSig</code></pre></div>
 <pre><code>## DataFrame with 7 rows and 3 columns
 ##            condition        type sizeFactor
 ##             <factor>    <factor>  <numeric>
-## treated1     treated single-read  1.6355751
-## treated2     treated  paired-end  0.7612698
-## treated3     treated  paired-end  0.8326526
-## untreated1 untreated single-read  1.1382630
-## untreated2 untreated single-read  1.7930004
-## untreated3 untreated  paired-end  0.6495470
-## untreated4 untreated  paired-end  0.7516892</code></pre>
+## treated1     treated single-read  1.6355014
+## treated2     treated  paired-end  0.7612159
+## treated3     treated  paired-end  0.8326603
+## untreated1 untreated single-read  1.1383376
+## untreated2 untreated single-read  1.7935406
+## untreated3 untreated  paired-end  0.6494828
+## untreated4 untreated  paired-end  0.7516005</code></pre>
 <p>We create a copy of the <em>DESeqDataSet</em>, so that we can rerun the analysis using a multi-factor design.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">ddsMF <-<span class="st"> </span>dds</code></pre></div>
-<p>We can account for the different types of sequencing, and get a clearer picture of the differences attributable to the treatment. As <code>condition</code> is the variable of interest, we put it at the end of the formula. Thus the <em>results</em> function will by default pull the <code>condition</code> results unless <code>contrast</code> or <code>name</code> arguments are specified. Then we can re-run <em>DESeq</em>:</p>
+<p>We change the levels of <code>type</code> so it only contains letters (numbers, underscore and period are also allowed in design factor levels). Be careful when changing level names to use the same order as the current levels.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">levels</span>(ddsMF$type)</code></pre></div>
+<pre><code>## [1] "paired-end"  "single-read"</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">levels</span>(ddsMF$type) <-<span class="st"> </span><span class="kw">sub</span>(<span class="st">"-.*"</span>, <span class="st">""</span>, <span class="kw">levels</span>(ddsMF$type))
+<span class="kw">levels</span>(ddsMF$type)</code></pre></div>
+<pre><code>## [1] "paired" "single"</code></pre>
+<p>We can account for the different types of sequencing, and get a clearer picture of the differences attributable to the treatment. As <code>condition</code> is the variable of interest, we put it at the end of the formula. Thus the <em>results</em> function will by default pull the <code>condition</code> results unless <code>contrast</code> or <code>name</code> arguments are specified.</p>
+<p>Then we can re-run <em>DESeq</em>:</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">design</span>(ddsMF) <-<span class="st"> </span><span class="kw">formula</span>(~<span class="st"> </span>type +<span class="st"> </span>condition)
 ddsMF <-<span class="st"> </span><span class="kw">DESeq</span>(ddsMF)</code></pre></div>
 <p>Again, we access the results using the <em>results</em> function.</p>
@@ -747,45 +797,45 @@ ddsMF <-<span class="st"> </span><span class="kw">DESeq</span>(ddsMF)</code><
 <pre><code>## log2 fold change (MLE): condition treated vs untreated 
 ## Wald test p-value: condition treated vs untreated 
 ## DataFrame with 6 rows and 6 columns
-##                 baseMean log2FoldChange     lfcSE        stat     pvalue
-##                <numeric>      <numeric> <numeric>   <numeric>  <numeric>
-## FBgn0000008   95.1440790    -0.04067393 0.2222916 -0.18297560 0.85481716
-## FBgn0000014    1.0565722    -0.08498351 2.1115371 -0.04024722 0.96789603
-## FBgn0000015    0.8467233    -1.86105812 2.2635706 -0.82217807 0.41097556
-## FBgn0000017 4352.5928988    -0.25612969 0.1118570 -2.28979575 0.02203316
-## FBgn0000018  418.6149305    -0.06468996 0.1317230 -0.49110616 0.62335136
-## FBgn0000024    6.4062892     0.31109845 0.7658820  0.40619635 0.68459834
+##                baseMean log2FoldChange     lfcSE        stat     pvalue
+##               <numeric>      <numeric> <numeric>   <numeric>  <numeric>
+## FBgn0000008   95.144292    -0.04055736 0.2200633 -0.18429862 0.85377920
+## FBgn0000014    1.056523    -0.08351882 2.0760816 -0.04022906 0.96791051
+## FBgn0000017 4352.553569    -0.25605701 0.1122166 -2.28181137 0.02250048
+## FBgn0000018  418.610484    -0.06461523 0.1313488 -0.49193622 0.62276444
+## FBgn0000024    6.406200     0.30898382 0.7560075  0.40870468 0.68275640
+## FBgn0000032  989.720217    -0.04837925 0.1208420 -0.40035128 0.68889781
 ##                  padj
 ##             <numeric>
-## FBgn0000008 0.9504077
+## FBgn0000008 0.9494634
 ## FBgn0000014        NA
-## FBgn0000015        NA
-## FBgn0000017 0.1303866
-## FBgn0000018 0.8640563
-## FBgn0000024 0.8919545</code></pre>
+## FBgn0000017 0.1302627
+## FBgn0000018 0.8593904
+## FBgn0000024 0.8877697
+## FBgn0000032 0.8902013</code></pre>
 <p>It is also possible to retrieve the log2 fold changes, <em>p</em> values and adjusted <em>p</em> values of the <code>type</code> variable. The <code>contrast</code> argument of the function <em>results</em> takes a character vector of length three: the name of the variable, the name of the factor level for the numerator of the log2 ratio, and the name of the factor level for the denominator. The <code>contrast</code> argument can also take other forms, as described in the help page fo [...]
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">resMFType <-<span class="st"> </span><span class="kw">results</span>(ddsMF,
-                     <span class="dt">contrast=</span><span class="kw">c</span>(<span class="st">"type"</span>, <span class="st">"single-read"</span>, <span class="st">"paired-end"</span>))
+                     <span class="dt">contrast=</span><span class="kw">c</span>(<span class="st">"type"</span>, <span class="st">"single"</span>, <span class="st">"paired"</span>))
 <span class="kw">head</span>(resMFType)</code></pre></div>
-<pre><code>## log2 fold change (MLE): type single-read vs paired-end 
-## Wald test p-value: type single.read vs paired.end 
+<pre><code>## log2 fold change (MLE): type single vs paired 
+## Wald test p-value: type single vs paired 
 ## DataFrame with 6 rows and 6 columns
-##                 baseMean log2FoldChange     lfcSE       stat    pvalue
-##                <numeric>      <numeric> <numeric>  <numeric> <numeric>
-## FBgn0000008   95.1440790    -0.26225891 0.2207626 -1.1879680 0.2348460
-## FBgn0000014    1.0565722     3.29057851 2.0869706  1.5767249 0.1148588
-## FBgn0000015    0.8467233    -0.58154078 2.1821934 -0.2664937 0.7898590
-## FBgn0000017 4352.5928988    -0.09976491 0.1117182 -0.8930049 0.3718545
-## FBgn0000018  418.6149305     0.22930201 0.1306356  1.7552790 0.0792116
-## FBgn0000024    6.4062892     0.30788127 0.7611816  0.4044781 0.6858612
+##                baseMean log2FoldChange     lfcSE       stat     pvalue
+##               <numeric>      <numeric> <numeric>  <numeric>  <numeric>
+## FBgn0000008   95.144292     -0.2623745 0.2185279 -1.2006453 0.22988882
+## FBgn0000014    1.056523      3.2898915 2.0531720  1.6023457 0.10907917
+## FBgn0000017 4352.553569     -0.1000200 0.1120782 -0.8924127 0.37217178
+## FBgn0000018  418.610484      0.2290491 0.1302603  1.7583952 0.07868028
+## FBgn0000024    6.406200      0.3060704 0.7514061  0.4073303 0.68376543
+## FBgn0000032  989.720217      0.2374130 0.1202745  1.9739259 0.04839017
 ##                  padj
 ##             <numeric>
-## FBgn0000008 0.5310094
+## FBgn0000008 0.5361332
 ## FBgn0000014        NA
-## FBgn0000015        NA
-## FBgn0000017 0.6693382
-## FBgn0000018 0.2848511
-## FBgn0000024        NA</code></pre>
+## FBgn0000017 0.6831308
+## FBgn0000018 0.2917133
+## FBgn0000024 0.8804532
+## FBgn0000032 0.2175655</code></pre>
 <p>If the variable is continuous or an interaction term (see <a href="#interactions">section on interactions</a>) then the results can be extracted using the <code>name</code> argument to <em>results</em>, where the name is one of elements returned by <code>resultsNames(dds)</code>.</p>
 <p><a name="transform"></a></p>
 </div>
@@ -798,29 +848,32 @@ ddsMF <-<span class="st"> </span><span class="kw">DESeq</span>(ddsMF)</code><
 <p>Maybe the most obvious choice of transformation is the logarithm. Since count values for a gene can be zero in some conditions (and non-zero in others), some advocate the use of <em>pseudocounts</em>, i.e. transformations of the form:</p>
 <p><span class="math display">\[ y = \log_2(n + n_0) \]</span></p>
 <p>where <em>n</em> represents the count values and <span class="math inline">\(n_0\)</span> is a positive constant.</p>
-<p>In this section, we discuss two alternative approaches that offer more theoretical justification and a rational way of choosing the parameter equivalent to <span class="math inline">\(n_0\)</span> above. The <em>regularized logarithm</em> or <em>rlog</em> incorporates a prior on the sample differences <span class="citation">(Love, Huber, and Anders 2014)</span>, and the other uses the concept of variance stabilizing transformations (VST) <span class="citation">(Tibshirani 1988; Huber  [...]
-<p>The point of these two transformations, the <em>rlog</em> and the VST, is to remove the dependence of the variance on the mean, particularly the high variance of the logarithm of count data when the mean is low. Both <em>rlog</em> and VST use the experiment-wide trend of variance over mean, in order to transform the data to remove the experiment-wide trend. Note that we do not require or desire that all the genes have <em>exactly</em> the same variance after transformation. Indeed, in [...]
+<p>In this section, we discuss two alternative approaches that offer more theoretical justification and a rational way of choosing parameters equivalent to <span class="math inline">\(n_0\)</span> above. One makes use of the concept of variance stabilizing transformations (VST) <span class="citation">(Tibshirani 1988; Huber et al. 2003; Anders and Huber 2010)</span>, and the other is the <em>regularized logarithm</em> or <em>rlog</em>, which incorporates a prior on the sample differences [...]
+<p>The point of these two transformations, the VST and the <em>rlog</em>, is to remove the dependence of the variance on the mean, particularly the high variance of the logarithm of count data when the mean is low. Both VST and <em>rlog</em> use the experiment-wide trend of variance over mean, in order to transform the data to remove the experiment-wide trend. Note that we do not require or desire that all the genes have <em>exactly</em> the same variance after transformation. Indeed, in [...]
 <p><strong>Note on running time:</strong> if you have many samples (e.g. 100s), the <em>rlog</em> function might take too long, and so the <em>vst</em> function will be a faster choice. The rlog and VST have similar properties, but the rlog requires fitting a shrinkage term for each sample and each gene which takes time. See the DESeq2 paper for more discussion on the differences <span class="citation">(Love, Huber, and Anders 2014)</span>.</p>
 <div id="blind-dispersion-estimation" class="section level3">
 <h3>Blind dispersion estimation</h3>
-<p>The two functions, <em>rlog</em> and <em>vst</em> have an argument <code>blind</code>, for whether the transformation should be blind to the sample information specified by the design formula. When <code>blind</code> equals <code>TRUE</code> (the default), the functions will re-estimate the dispersions using only an intercept. This setting should be used in order to compare samples in a manner wholly unbiased by the information about experimental groups, for example to perform sample  [...]
+<p>The two functions, <em>vst</em> and <em>rlog</em> have an argument <code>blind</code>, for whether the transformation should be blind to the sample information specified by the design formula. When <code>blind</code> equals <code>TRUE</code> (the default), the functions will re-estimate the dispersions using only an intercept. This setting should be used in order to compare samples in a manner wholly unbiased by the information about experimental groups, for example to perform sample  [...]
 <p>However, blind dispersion estimation is not the appropriate choice if one expects that many or the majority of genes (rows) will have large differences in counts which are explainable by the experimental design, and one wishes to transform the data for downstream analysis. In this case, using blind dispersion estimation will lead to large estimates of dispersion, as it attributes differences due to experimental design as unwanted <em>noise</em>, and will result in overly shrinking the [...]
 </div>
 <div id="extracting-transformed-values" class="section level3">
 <h3>Extracting transformed values</h3>
-<p>These transformation functions return an object of class <em>DESeqTransform</em> which is a subclass of <em>RangedSummarizedExperiment</em>. For ~20 samples, running on a newly created <code>DESeqDataSet</code>, <em>rlog</em> may take 30 seconds, <em>varianceStabilizingTransformation</em> may take 5 seconds, and <em>vst</em> less than 1 second (by subsetting to 1000 genes for calculating the global dispersion trend). However, the running times are shorter and more similar with <code>b [...]
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rld <-<span class="st"> </span><span class="kw">rlog</span>(dds, <span class="dt">blind=</span><span class="ot">FALSE</span>)
-vsd <-<span class="st"> </span><span class="kw">varianceStabilizingTransformation</span>(dds, <span class="dt">blind=</span><span class="ot">FALSE</span>)
-vsd.fast <-<span class="st"> </span><span class="kw">vst</span>(dds, <span class="dt">blind=</span><span class="ot">FALSE</span>)
-<span class="kw">head</span>(<span class="kw">assay</span>(rld), <span class="dv">3</span>)</code></pre></div>
-<pre><code>##               treated1   treated2   treated3 untreated1 untreated2
-## FBgn0000008  6.5052069  6.6702250  6.5002811  6.4775687  6.5312761
-## FBgn0000014  0.1781321  0.1489758  0.1486405  0.1997286  0.1537222
-## FBgn0000015 -0.2871970 -0.2937085 -0.2939834 -0.2948680 -0.2801519
-##             untreated3 untreated4
-## FBgn0000008  6.6743786  6.5524385
-## FBgn0000014  0.1495966  0.1490241
-## FBgn0000015 -0.2765435 -0.2635561</code></pre>
+<p>These transformation functions return an object of class <em>DESeqTransform</em> which is a subclass of <em>RangedSummarizedExperiment</em>. For ~20 samples, running on a newly created <code>DESeqDataSet</code>, <em>rlog</em> may take 30 seconds, while <em>vst</em> takes less than 1 second. The running times are shorter when using <code>blind=FALSE</code> and if the function <em>DESeq</em> has already been run, because then it is not necessary to re-estimate the dispersion values. The [...]
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">vsd <-<span class="st"> </span><span class="kw">vst</span>(dds, <span class="dt">blind=</span><span class="ot">FALSE</span>)
+rld <-<span class="st"> </span><span class="kw">rlog</span>(dds, <span class="dt">blind=</span><span class="ot">FALSE</span>)
+<span class="kw">head</span>(<span class="kw">assay</span>(vsd), <span class="dv">3</span>)</code></pre></div>
+<pre><code>##              treated1  treated2  treated3 untreated1 untreated2 untreated3
+## FBgn0000008  7.607799  7.834807  7.594933   7.567177   7.642058   7.844499
+## FBgn0000014  6.318607  6.040987  6.040987   6.412578   6.173698   6.040987
+## FBgn0000017 11.938303 12.024550 12.013558  12.045714  12.284641  12.455933
+##             untreated4
+## FBgn0000008   7.669033
+## FBgn0000014   6.040987
+## FBgn0000017  12.077397</code></pre>
+</div>
+<div id="variance-stabilizing-transformation" class="section level3">
+<h3>Variance stabilizing transformation</h3>
+<p>Above, we used a parametric fit for the dispersion. In this case, the closed-form expression for the variance stabilizing transformation is used by the <em>vst</em> function. If a local fit is used (option <code>fitType="locfit"</code> to <em>estimateDispersions</em>) a numerical integration is used instead. The transformed data should be approximated variance stabilized and also includes correction for size factors or normalization factors. The transformed data is on the lo [...]
 </div>
 <div id="regularized-log-transformation" class="section level3">
 <h3>Regularized log transformation</h3>
@@ -829,10 +882,6 @@ vsd.fast <-<span class="st"> </span><span class="kw">vst</span>(dds, <span cl
 <p>where <span class="math inline">\(q_{ij}\)</span> is a parameter proportional to the expected true concentration of fragments for gene <em>i</em> and sample <em>j</em> (see formula <a href="#theory">below</a>), <span class="math inline">\(\beta_{i0}\)</span> is an intercept which does not undergo shrinkage, and <span class="math inline">\(\beta_{ij}\)</span> is the sample-specific effect which is shrunk toward zero based on the dispersion-mean trend over the entire dataset. The trend  [...]
 <p>Note that, as <span class="math inline">\(q_{ij}\)</span> represents the part of the mean value <span class="math inline">\(\mu_{ij}\)</span> after the size factor <span class="math inline">\(s_j\)</span> has been divided out, it is clear that the rlog transformation inherently accounts for differences in sequencing depth. Without priors, this design matrix would lead to a non-unique solution, however the addition of a prior on non-intercept betas allows for a unique solution to be fo [...]
 </div>
-<div id="variance-stabilizing-transformation" class="section level3">
-<h3>Variance stabilizing transformation</h3>
-<p>Above, we used a parametric fit for the dispersion. In this case, the closed-form expression for the variance stabilizing transformation is used by <em>varianceStabilizingTransformation</em>, which is derived in the file <code>vst.pdf</code>, that is distributed in the package alongside this vignette. If a local fit is used (option <code>fitType="locfit"</code> to <em>estimateDispersions</em>) a numerical integration is used instead.</p>
-</div>
 <div id="effects-of-transformations-on-the-variance" class="section level3">
 <h3>Effects of transformations on the variance</h3>
 <p>The figure below plots the standard deviation of the transformed data, across samples, against the mean, using the shifted logarithm transformation, the regularized log transformation and the variance stabilizing transformation. The shifted logarithm has elevated standard deviation in the lower count range, and the regularized log to a lesser extent, while for the variance stabilized data the standard deviation is roughly constant along the whole dynamic range.</p>
@@ -840,13 +889,12 @@ vsd.fast <-<span class="st"> </span><span class="kw">vst</span>(dds, <span cl
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># this gives log2(n + 1)</span>
 ntd <-<span class="st"> </span><span class="kw">normTransform</span>(dds)
 <span class="kw">library</span>(<span class="st">"vsn"</span>)
-notAllZero <-<span class="st"> </span>(<span class="kw">rowSums</span>(<span class="kw">counts</span>(dds))><span class="dv">0</span>)
-<span class="kw">meanSdPlot</span>(<span class="kw">assay</span>(ntd)[notAllZero,])</code></pre></div>
-<p><img src=" [...]
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">meanSdPlot</span>(<span class="kw">assay</span>(rld[notAllZero,]))</code></pre></div>
-<p><img src=" [...]
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">meanSdPlot</span>(<span class="kw">assay</span>(vsd[notAllZero,]))</code></pre></div>
-<p><img src=" [...]
+<span class="kw">meanSdPlot</span>(<span class="kw">assay</span>(ntd))</code></pre></div>
+<p><img src=" [...]
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">meanSdPlot</span>(<span class="kw">assay</span>(vsd))</code></pre></div>
+<p><img src=" [...]
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">meanSdPlot</span>(<span class="kw">assay</span>(rld))</code></pre></div>
+<p><img src=" [...]
 </div>
 </div>
 <div id="data-quality-assessment-by-sample-clustering-and-visualization" class="section level2">
@@ -863,43 +911,43 @@ df <-<span class="st"> </span><span class="kw">as.data.frame</span>(<span cla
 <span class="kw">pheatmap</span>(<span class="kw">assay</span>(ntd)[select,], <span class="dt">cluster_rows=</span><span class="ot">FALSE</span>, <span class="dt">show_rownames=</span><span class="ot">FALSE</span>,
          <span class="dt">cluster_cols=</span><span class="ot">FALSE</span>, <span class="dt">annotation_col=</span>df)</code></pre></div>
 <p><img src=" [...]
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">pheatmap</span>(<span class="kw">assay</span>(rld)[select,], <span class="dt">cluster_rows=</span><span class="ot">FALSE</span>, <span class="dt">show_rownames=</span><span class="ot">FALSE</span>,
-         <span class="dt">cluster_cols=</span><span class="ot">FALSE</span>, <span class="dt">annotation_col=</span>df)</code></pre></div>
-<p><img src=" [...]
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">pheatmap</span>(<span class="kw">assay</span>(vsd)[select,], <span class="dt">cluster_rows=</span><span class="ot">FALSE</span>, <span class="dt">show_rownames=</span><span class="ot">FALSE</span>,
          <span class="dt">cluster_cols=</span><span class="ot">FALSE</span>, <span class="dt">annotation_col=</span>df)</code></pre></div>
 <p><img src=" [...]
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">pheatmap</span>(<span class="kw">assay</span>(rld)[select,], <span class="dt">cluster_rows=</span><span class="ot">FALSE</span>, <span class="dt">show_rownames=</span><span class="ot">FALSE</span>,
+         <span class="dt">cluster_cols=</span><span class="ot">FALSE</span>, <span class="dt">annotation_col=</span>df)</code></pre></div>
+<p><img src=" [...]
 </div>
 <div id="heatmap-of-the-sample-to-sample-distances" class="section level3">
 <h3>Heatmap of the sample-to-sample distances</h3>
-<p>Another use of the transformed data is sample clustering. Here, we apply the <em>dist</em> function to the transpose of the transformed count matrix to get sample-to-sample distances. We could alternatively use the variance stabilized transformation here.</p>
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">sampleDists <-<span class="st"> </span><span class="kw">dist</span>(<span class="kw">t</span>(<span class="kw">assay</span>(rld)))</code></pre></div>
+<p>Another use of the transformed data is sample clustering. Here, we apply the <em>dist</em> function to the transpose of the transformed count matrix to get sample-to-sample distances.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">sampleDists <-<span class="st"> </span><span class="kw">dist</span>(<span class="kw">t</span>(<span class="kw">assay</span>(vsd)))</code></pre></div>
 <p>A heatmap of this distance matrix gives us an overview over similarities and dissimilarities between samples. We have to provide a hierarchical clustering <code>hc</code> to the heatmap function based on the sample distances, or else the heatmap function would calculate a clustering based on the distances between the rows/columns of the distance matrix.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(<span class="st">"RColorBrewer"</span>)
 sampleDistMatrix <-<span class="st"> </span><span class="kw">as.matrix</span>(sampleDists)
-<span class="kw">rownames</span>(sampleDistMatrix) <-<span class="st"> </span><span class="kw">paste</span>(rld$condition, rld$type, <span class="dt">sep=</span><span class="st">"-"</span>)
+<span class="kw">rownames</span>(sampleDistMatrix) <-<span class="st"> </span><span class="kw">paste</span>(vsd$condition, vsd$type, <span class="dt">sep=</span><span class="st">"-"</span>)
 <span class="kw">colnames</span>(sampleDistMatrix) <-<span class="st"> </span><span class="ot">NULL</span>
 colors <-<span class="st"> </span><span class="kw">colorRampPalette</span>( <span class="kw">rev</span>(<span class="kw">brewer.pal</span>(<span class="dv">9</span>, <span class="st">"Blues"</span>)) )(<span class="dv">255</span>)
 <span class="kw">pheatmap</span>(sampleDistMatrix,
          <span class="dt">clustering_distance_rows=</span>sampleDists,
          <span class="dt">clustering_distance_cols=</span>sampleDists,
          <span class="dt">col=</span>colors)</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 </div>
 <div id="principal-component-plot-of-the-samples" class="section level3">
 <h3>Principal component plot of the samples</h3>
 <p>Related to the distance matrix is the PCA plot, which shows the samples in the 2D plane spanned by their first two principal components. This type of plot is useful for visualizing the overall effect of experimental covariates and batch effects.</p>
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plotPCA</span>(rld, <span class="dt">intgroup=</span><span class="kw">c</span>(<span class="st">"condition"</span>, <span class="st">"type"</span>))</code></pre></div>
-<p><img src=" [...]
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plotPCA</span>(vsd, <span class="dt">intgroup=</span><span class="kw">c</span>(<span class="st">"condition"</span>, <span class="st">"type"</span>))</code></pre></div>
+<p><img src=" [...]
 <p>It is also possible to customize the PCA plot using the <em>ggplot</em> function.</p>
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">pcaData <-<span class="st"> </span><span class="kw">plotPCA</span>(rld, <span class="dt">intgroup=</span><span class="kw">c</span>(<span class="st">"condition"</span>, <span class="st">"type"</span>), <span class="dt">returnData=</span><span class="ot">TRUE</span>)
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">pcaData <-<span class="st"> </span><span class="kw">plotPCA</span>(vsd, <span class="dt">intgroup=</span><span class="kw">c</span>(<span class="st">"condition"</span>, <span class="st">"type"</span>), <span class="dt">returnData=</span><span class="ot">TRUE</span>)
 percentVar <-<span class="st"> </span><span class="kw">round</span>(<span class="dv">100</span> *<span class="st"> </span><span class="kw">attr</span>(pcaData, <span class="st">"percentVar"</span>))
 <span class="kw">ggplot</span>(pcaData, <span class="kw">aes</span>(PC1, PC2, <span class="dt">color=</span>condition, <span class="dt">shape=</span>type)) +
 <span class="st">  </span><span class="kw">geom_point</span>(<span class="dt">size=</span><span class="dv">3</span>) +
 <span class="st">  </span><span class="kw">xlab</span>(<span class="kw">paste0</span>(<span class="st">"PC1: "</span>,percentVar[<span class="dv">1</span>],<span class="st">"% variance"</span>)) +
 <span class="st">  </span><span class="kw">ylab</span>(<span class="kw">paste0</span>(<span class="st">"PC2: "</span>,percentVar[<span class="dv">2</span>],<span class="st">"% variance"</span>)) +<span class="st"> </span>
 <span class="st">  </span><span class="kw">coord_fixed</span>()</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 </div>
 </div>
 </div>
@@ -938,13 +986,12 @@ dds <-<span class="st"> </span><span class="kw">DESeq</span>(dds)
 <p>The following two plots diagram hypothetical genotype-specific condition effects, which could be modeled with interaction terms by using a design of <code>~genotype + condition + genotype:condition</code>.</p>
 <p>In the first plot (Gene 1), note that the condition effect is consistent across genotypes. Although condition A has a different baseline for I,II, and III, the condition effect is a log2 fold change of about 2 for each genotype. Using a model with an interaction term <code>genotype:condition</code>, the interaction terms for genotype II and genotype III will be nearly 0.</p>
 <p>Here, the y-axis represents log2(n+1), and each group has 20 samples (black dots). A red line connects the mean of the groups within each genotype.</p>
-<p><img src=" [...]
+<p><img src=" [...]
 <p>In the second plot (Gene 2), we can see that the condition effect is not consistent across genotype. Here the main condition effect (the effect for the reference genotype I) is again 2. However, this time the interaction terms will be around 1 for genotype II and -4 for genotype III. This is because the condition effect is higher by 1 for genotype II compared to genotype I, and lower by 4 for genotype III compared to genotype I. The condition effect for genotype II (or III) is obtaine [...]
-<p><img src=" [...]
+<p><img src=" [...]
 <p>Now we will continue to explain the use of interactions in order to test for <em>differences</em> in condition effects. We continue with the example of condition effects across three genotypes (I, II, and III).</p>
 <p>The key point to remember about designs with interaction terms is that, unlike for a design <code>~genotype + condition</code>, where the condition effect represents the <em>overall</em> effect controlling for differences due to genotype, by adding <code>genotype:condition</code>, the main condition effect only represents the effect of condition for the <em>reference level</em> of genotype (I, or whichever level was defined by the user as the reference level). The interaction terms <c [...]
 <p>This genotype-condition interaction example is examined in further detail in Example 3 in the help page for <em>results</em>, which can be found by typing <code>?results</code>. In particular, we show how to test for differences in the condition effect across genotype, and we show how to obtain the condition effect for non-reference genotypes.</p>
-<p>Note that for DESeq2 versions higher than 1.10, the <em>DESeq</em> function will turn off log fold change shrinkage (setting <code>betaPrior=FALSE</code>), for designs which contain an interaction term. Turning off the log fold change shrinkage allows the software to use standard model matrices (as would be produced by <em>model.matrix</em>), where the interaction coefficients are easier to interpret.</p>
 </div>
 <div id="time-series-experiments" class="section level2">
 <h2>Time-series experiments</h2>
@@ -972,13 +1019,13 @@ res <-<span class="st"> </span><span class="kw">results</span>(dds)</code></p
 <p><strong>Note on many outliers:</strong> if there are very many outliers (e.g. many hundreds or thousands) reported by <code>summary(res)</code>, one might consider further exploration to see if a single sample or a few samples should be removed due to low quality. The automatic outlier filtering/replacement is most useful in situations which the number of outliers is limited. When there are thousands of reported outliers, it might make more sense to turn off the outlier filtering/repl [...]
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">par</span>(<span class="dt">mar=</span><span class="kw">c</span>(<span class="dv">8</span>,<span class="dv">5</span>,<span class="dv">2</span>,<span class="dv">2</span>))
 <span class="kw">boxplot</span>(<span class="kw">log10</span>(<span class="kw">assays</span>(dds)[[<span class="st">"cooks"</span>]]), <span class="dt">range=</span><span class="dv">0</span>, <span class="dt">las=</span><span class="dv">2</span>)</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 </div>
 <div id="dispersion-plot-and-fitting-alternatives" class="section level2">
 <h2>Dispersion plot and fitting alternatives</h2>
 <p>Plotting the dispersion estimates is a useful diagnostic. The dispersion plot below is typical, with the final estimates shrunk from the gene-wise estimates towards the fitted estimates. Some gene-wise estimates are flagged as outliers and not shrunk towards the fitted value, (this outlier detection is described in the manual page for <em>estimateDispersionsMAP</em>). The amount of shrinkage can be more or less than seen here, depending on the sample size, the number of coefficients,  [...]
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plotDispEsts</span>(dds)</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 <div id="local-or-mean-dispersion-fit" class="section level3">
 <h3>Local or mean dispersion fit</h3>
 <p>A local smoothed dispersion fit is automatically substitited in the case that the parametric curve doesn’t fit the observed dispersion mean relationship. This can be prespecified by providing the argument <code>fitType="local"</code> to either <em>DESeq</em> or <em>estimateDispersions</em>. Additionally, using the mean of gene-wise disperion estimates as the fitted value can be specified by providing the argument <code>fitType="mean"</code>.</p>
@@ -1003,23 +1050,23 @@ ddsCustom <-<span class="st"> </span><span class="kw">estimateDispersionsMAP<
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">metadata</span>(res)$alpha</code></pre></div>
 <pre><code>## [1] 0.1</code></pre>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">metadata</span>(res)$filterThreshold</code></pre></div>
-<pre><code>## 27.14286% 
-##  5.561712</code></pre>
+<pre><code>## 15.5102% 
+## 6.150425</code></pre>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plot</span>(<span class="kw">metadata</span>(res)$filterNumRej, 
      <span class="dt">type=</span><span class="st">"b"</span>, <span class="dt">ylab=</span><span class="st">"number of rejections"</span>,
      <span class="dt">xlab=</span><span class="st">"quantiles of filter"</span>)
 <span class="kw">lines</span>(<span class="kw">metadata</span>(res)$lo.fit, <span class="dt">col=</span><span class="st">"red"</span>)
 <span class="kw">abline</span>(<span class="dt">v=</span><span class="kw">metadata</span>(res)$filterTheta)</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 <p>Independent filtering can be turned off by setting <code>independentFiltering</code> to <code>FALSE</code>.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">resNoFilt <-<span class="st"> </span><span class="kw">results</span>(dds, <span class="dt">independentFiltering=</span><span class="ot">FALSE</span>)
 <span class="kw">addmargins</span>(<span class="kw">table</span>(<span class="dt">filtering=</span>(res$padj <<span class="st"> </span>.<span class="dv">1</span>),
                  <span class="dt">noFiltering=</span>(resNoFilt$padj <<span class="st"> </span>.<span class="dv">1</span>)))</code></pre></div>
 <pre><code>##          noFiltering
 ## filtering FALSE TRUE  Sum
-##     FALSE  7426    0 7426
-##     TRUE    113  939 1052
-##     Sum    7539  939 8478</code></pre>
+##     FALSE  7327    0 7327
+##     TRUE     74  980 1054
+##     Sum    7401  980 8381</code></pre>
 </div>
 <div id="tests-of-log2-fold-change-above-or-below-a-threshold" class="section level2">
 <h2>Tests of log2 fold change above or below a threshold</h2>
@@ -1030,25 +1077,19 @@ ddsCustom <-<span class="st"> </span><span class="kw">estimateDispersionsMAP<
 <li><code>greater</code> - <span class="math inline">\(\beta > x\)</span></li>
 <li><code>less</code> - <span class="math inline">\(\beta < -x\)</span></li>
 </ul>
-<p>The test <code>altHypothesis="lessAbs"</code> requires that the user have run <em>DESeq</em> with the argument <code>betaPrior=FALSE</code>. To understand the reason for this requirement, consider that during hypothesis testing, the null hypothesis is favored unless the data provide strong evidence to reject the null. For this test, including a zero-centered prior on log fold change would favor the alternative hypothesis, shrinking log fold changes toward zero. Removing the  [...]
-<p>The four possible values of <code>altHypothesis</code> are demonstrated in the following code and visually by MA-plots in the following figures. First we run <em>DESeq</em> and specify <code>betaPrior=FALSE</code> in order to demonstrate <code>altHypothesis="lessAbs"</code>.</p>
-<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">ddsNoPrior <-<span class="st"> </span><span class="kw">DESeq</span>(dds, <span class="dt">betaPrior=</span><span class="ot">FALSE</span>)</code></pre></div>
-<p>In order to produce results tables for the following tests, the same arguments (except <code>ylim</code>) would be provided to the <em>results</em> function.</p>
+<p>The four possible values of <code>altHypothesis</code> are demonstrated in the following code and visually by MA-plots in the following figures.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">par</span>(<span class="dt">mfrow=</span><span class="kw">c</span>(<span class="dv">2</span>,<span class="dv">2</span>),<span class="dt">mar=</span><span class="kw">c</span>(<span class="dv">2</span>,<span class="dv">2</span>,<span class="dv">1</span>,<span class="dv">1</span>))
-yl <-<span class="st"> </span><span class="kw">c</span>(-<span class="fl">2.5</span>,<span class="fl">2.5</span>)
+ylim <-<span class="st"> </span><span class="kw">c</span>(-<span class="fl">2.5</span>,<span class="fl">2.5</span>)
 resGA <-<span class="st"> </span><span class="kw">results</span>(dds, <span class="dt">lfcThreshold=</span>.<span class="dv">5</span>, <span class="dt">altHypothesis=</span><span class="st">"greaterAbs"</span>)
-resLA <-<span class="st"> </span><span class="kw">results</span>(ddsNoPrior, <span class="dt">lfcThreshold=</span>.<span class="dv">5</span>, <span class="dt">altHypothesis=</span><span class="st">"lessAbs"</span>)
+resLA <-<span class="st"> </span><span class="kw">results</span>(dds, <span class="dt">lfcThreshold=</span>.<span class="dv">5</span>, <span class="dt">altHypothesis=</span><span class="st">"lessAbs"</span>)
 resG <-<span class="st"> </span><span class="kw">results</span>(dds, <span class="dt">lfcThreshold=</span>.<span class="dv">5</span>, <span class="dt">altHypothesis=</span><span class="st">"greater"</span>)
 resL <-<span class="st"> </span><span class="kw">results</span>(dds, <span class="dt">lfcThreshold=</span>.<span class="dv">5</span>, <span class="dt">altHypothesis=</span><span class="st">"less"</span>)
-<span class="kw">plotMA</span>(resGA, <span class="dt">ylim=</span>yl)
-<span class="kw">abline</span>(<span class="dt">h=</span><span class="kw">c</span>(-.<span class="dv">5</span>,.<span class="dv">5</span>),<span class="dt">col=</span><span class="st">"dodgerblue"</span>,<span class="dt">lwd=</span><span class="dv">2</span>)
-<span class="kw">plotMA</span>(resLA, <span class="dt">ylim=</span>yl)
-<span class="kw">abline</span>(<span class="dt">h=</span><span class="kw">c</span>(-.<span class="dv">5</span>,.<span class="dv">5</span>),<span class="dt">col=</span><span class="st">"dodgerblue"</span>,<span class="dt">lwd=</span><span class="dv">2</span>)
-<span class="kw">plotMA</span>(resG, <span class="dt">ylim=</span>yl)
-<span class="kw">abline</span>(<span class="dt">h=</span>.<span class="dv">5</span>,<span class="dt">col=</span><span class="st">"dodgerblue"</span>,<span class="dt">lwd=</span><span class="dv">2</span>)
-<span class="kw">plotMA</span>(resL, <span class="dt">ylim=</span>yl)
-<span class="kw">abline</span>(<span class="dt">h=</span>-.<span class="dv">5</span>,<span class="dt">col=</span><span class="st">"dodgerblue"</span>,<span class="dt">lwd=</span><span class="dv">2</span>)</code></pre></div>
-<p><img src=" [...]
+drawLines <-<span class="st"> </span>function() <span class="kw">abline</span>(<span class="dt">h=</span><span class="kw">c</span>(-.<span class="dv">5</span>,.<span class="dv">5</span>),<span class="dt">col=</span><span class="st">"dodgerblue"</span>,<span class="dt">lwd=</span><span class="dv">2</span>)
+<span class="kw">plotMA</span>(resGA, <span class="dt">ylim=</span>ylim); <span class="kw">drawLines</span>()
+<span class="kw">plotMA</span>(resLA, <span class="dt">ylim=</span>ylim); <span class="kw">drawLines</span>()
+<span class="kw">plotMA</span>(resG, <span class="dt">ylim=</span>ylim); <span class="kw">drawLines</span>()
+<span class="kw">plotMA</span>(resL, <span class="dt">ylim=</span>ylim); <span class="kw">drawLines</span>()</code></pre></div>
+<p><img src=" [...]
 <p><a name="access"></a></p>
 </div>
 <div id="access-to-all-calculated-values" class="section level2">
@@ -1056,12 +1097,12 @@ resL <-<span class="st"> </span><span class="kw">results</span>(dds, <span cl
 <p>All row-wise calculated values (intermediate dispersion calculations, coefficients, standard errors, etc.) are stored in the <em>DESeqDataSet</em> object, e.g. <code>dds</code> in this vignette. These values are accessible by calling <em>mcols</em> on <code>dds</code>. Descriptions of the columns are accessible by two calls to <em>mcols</em>. Note that the call to <code>substr</code> below is only for display purposes.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">mcols</span>(dds,<span class="dt">use.names=</span><span class="ot">TRUE</span>)[<span class="dv">1</span>:<span class="dv">4</span>,<span class="dv">1</span>:<span class="dv">4</span>]</code></pre></div>
 <pre><code>## DataFrame with 4 rows and 4 columns
-##                    gene     baseMean      baseVar   allZero
-##                <factor>    <numeric>    <numeric> <logical>
-## FBgn0000008 FBgn0000008   95.1440790 2.246236e+02     FALSE
-## FBgn0000014 FBgn0000014    1.0565722 2.962193e+00     FALSE
-## FBgn0000015 FBgn0000015    0.8467233 1.008136e+00     FALSE
-## FBgn0000017 FBgn0000017 4352.5928988 3.616417e+05     FALSE</code></pre>
+##                    gene    baseMean      baseVar   allZero
+##                <factor>   <numeric>    <numeric> <logical>
+## FBgn0000008 FBgn0000008   95.144292 2.248206e+02     FALSE
+## FBgn0000014 FBgn0000014    1.056523 2.961952e+00     FALSE
+## FBgn0000017 FBgn0000017 4352.553569 3.615380e+05     FALSE
+## FBgn0000018 FBgn0000018  418.610484 2.349027e+03     FALSE</code></pre>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">substr</span>(<span class="kw">names</span>(<span class="kw">mcols</span>(dds)),<span class="dv">1</span>,<span class="dv">10</span>) </code></pre></div>
 <pre><code>##  [1] "gene"       "baseMean"   "baseVar"    "allZero"    "dispGeneEs"
 ##  [6] "dispFit"    "dispersion" "dispIter"   "dispOutlie" "dispMAP"   
@@ -1078,77 +1119,154 @@ resL <-<span class="st"> </span><span class="kw">results</span>(dds, <span cl
 ## allZero  intermediate                all counts for a gene are zero</code></pre>
 <p>The mean values <span class="math inline">\(\mu_{ij} = s_j q_{ij}\)</span> and the Cook’s distances for each gene and sample are stored as matrices in the assays slot:</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">assays</span>(dds)[[<span class="st">"mu"</span>]])</code></pre></div>
-<pre><code>##                 treated1     treated2     treated3  untreated1  untreated2
-## FBgn0000008  154.3987025   71.8640585   78.6026192  107.292174  169.007435
-## FBgn0000014    1.4994497    0.6979109    0.7633527    1.472389    2.319318
-## FBgn0000015    0.5665956    0.2637189    0.2884474    1.454158    2.290600
-## FBgn0000017 6450.3164679 3002.2656444 3283.7825771 5301.611319 8351.137808
-## FBgn0000018  658.3598354  306.4300993  335.1634866  492.700578  776.105635
-## FBgn0000024   11.4502130    5.3294410    5.8291729    6.885336   10.845833
+<pre><code>##               treated1     treated2     treated3  untreated1  untreated2
+## FBgn0000008  154.39604   71.8609681   78.6055335  107.292913  169.048447
+## FBgn0000014    1.50181    0.6989914    0.7645958    1.473259    2.321236
+## FBgn0000017 6450.25959 3002.1618954 3283.9320689 5301.761109 8353.342790
+## FBgn0000018  658.34912  306.4172250  335.1762452  492.704366  776.294589
+## FBgn0000024   11.44974    5.3290832    5.8292484    6.885635   10.848861
+## FBgn0000032 1561.83053  726.9270379  795.1533243 1158.470643 1825.261870
 ##               untreated3   untreated4
-## FBgn0000008   61.2260212   70.8539000
-## FBgn0000014    0.8402153    0.9723404
-## FBgn0000015    0.8298117    0.9603008
-## FBgn0000017 3025.3517513 3501.0926097
-## FBgn0000018  281.1583999  325.3709575
-## FBgn0000024    3.9291004    4.5469570</code></pre>
+## FBgn0000008   61.2163768   70.8413790
+## FBgn0000014    0.8405737    0.9727364
+## FBgn0000017 3024.9398185 3500.5486968
+## FBgn0000018  281.1143362  325.3137193
+## FBgn0000024    3.9286252    4.5463198
+## FBgn0000032  660.9697995  764.8935546</code></pre>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">assays</span>(dds)[[<span class="st">"cooks"</span>]])</code></pre></div>
-<pre><code>##               treated1    treated2    treated3 untreated1   untreated2
-## FBgn0000008 0.08830715 0.303802564 0.077771958 0.09822247 0.0136367583
-## FBgn0000014 1.88689792 0.218390054 0.251831283 1.88370036 0.1838377649
-## FBgn0000015 0.08092977 0.072800933 0.077912313 0.12891986 0.0028137530
-## FBgn0000017 0.01373552 0.004963502 0.002162421 0.08041054 0.0106072921
-## FBgn0000018 0.09518037 0.004725965 0.054717320 0.18464143 0.0022810730
-## FBgn0000024 0.06630034 0.131135115 0.031232734 0.27067340 0.0004894252
+<pre><code>##               treated1    treated2   treated3 untreated1   untreated2
+## FBgn0000008 0.08830678 0.303871771 0.07781046 0.09824099 0.0137763910
+## FBgn0000014 1.88673190 0.218246742 0.25186426 1.88310396 0.1847650641
+## FBgn0000017 0.01372829 0.004978868 0.00214944 0.08044192 0.0104725895
+## FBgn0000018 0.09518416 0.004710886 0.05477260 0.18460854 0.0023367713
+## FBgn0000024 0.06631472 0.131131695 0.03122767 0.27064873 0.0004706695
+## FBgn0000032 0.07377787 0.015891436 0.02053258 0.34090628 0.0217426932
 ##             untreated3   untreated4
-## FBgn0000008 0.18898455 0.0005301336
-## FBgn0000014 0.15380104 0.1890109982
-## FBgn0000015 0.00324020 0.1048705154
-## FBgn0000017 0.17246780 0.0550852696
-## FBgn0000018 0.07648755 0.0108795118
-## FBgn0000024 0.03105357 0.0814894716</code></pre>
+## FBgn0000008 0.18921869 0.0005147318
+## FBgn0000014 0.15347774 0.1887766572
+## FBgn0000017 0.17278836 0.0549333693
+## FBgn0000018 0.07634172 0.0108036598
+## FBgn0000024 0.03100295 0.0813891462
+## FBgn0000032 0.02482481 0.0774936225</code></pre>
 <p>The dispersions <span class="math inline">\(\alpha_i\)</span> can be accessed with the <em>dispersions</em> function.</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">dispersions</span>(dds))</code></pre></div>
-<pre><code>## [1] 0.03040956 2.86301787 2.20957889 0.01283362 0.01560434 0.23856732</code></pre>
+<pre><code>## [1] 0.03035168 2.80571497 0.01289667 0.01565291 0.23770103 0.01691016</code></pre>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">mcols</span>(dds)$dispersion)</code></pre></div>
-<pre><code>## [1] 0.03040956 2.86301787 2.20957889 0.01283362 0.01560434 0.23856732</code></pre>
+<pre><code>## [1] 0.03035168 2.80571497 0.01289667 0.01565291 0.23770103 0.01691016</code></pre>
 <p>The size factors <span class="math inline">\(s_j\)</span> are accessible via <em>sizeFactors</em>:</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sizeFactors</span>(dds)</code></pre></div>
 <pre><code>##   treated1   treated2   treated3 untreated1 untreated2 untreated3 
-##  1.6355751  0.7612698  0.8326526  1.1382630  1.7930004  0.6495470 
+##  1.6355014  0.7612159  0.8326603  1.1383376  1.7935406  0.6494828 
 ## untreated4 
-##  0.7516892</code></pre>
+##  0.7516005</code></pre>
 <p>For advanced users, we also include a convenience function <em>coef</em> for extracting the matrix <span class="math inline">\([\beta_{ir}]\)</span> for all genes <em>i</em> and model coefficients <span class="math inline">\(r\)</span>. This function can also return a matrix of standard errors, see <code>?coef</code>. The columns of this matrix correspond to the effects returned by <em>resultsNames</em>. Note that the <em>results</em> function is best for building results tables with  [...]
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">coef</span>(dds))</code></pre></div>
 <pre><code>##              Intercept condition_treated_vs_untreated
-## FBgn0000008  6.5585671                    0.002151683
-## FBgn0000014  0.3713251                   -0.496689957
-## FBgn0000015  0.3533500                   -1.882756713
-## FBgn0000017 12.1853813                   -0.240025055
-## FBgn0000018  8.7577334                   -0.104798934
-## FBgn0000024  2.5966931                    0.210811388</code></pre>
+## FBgn0000008  6.5584825                    0.002276428
+## FBgn0000014  0.3720829                   -0.495113878
+## FBgn0000017 12.1853275                   -0.239918945
+## FBgn0000018  8.7576500                   -0.104673913
+## FBgn0000024  2.5966613                    0.210848562
+## FBgn0000032  9.9910773                   -0.091788071</code></pre>
 <p>The beta prior variance <span class="math inline">\(\sigma_r^2\)</span> is stored as an attribute of the <em>DESeqDataSet</em>:</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">attr</span>(dds, <span class="st">"betaPriorVar"</span>)</code></pre></div>
 <pre><code>## [1] 1e+06 1e+06</code></pre>
+<p>General information about the prior used for log fold change shrinkage is also stored in a slot of the <em>DESeqResults</em> object. This would also contain information about what other packages were used for log2 fold change shrinkage.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">priorInfo</span>(resLFC)</code></pre></div>
+<pre><code>## $type
+## [1] "normal"
+## 
+## $package
+## [1] "DESeq2"
+## 
+## $version
+## [1] '1.18.0'
+## 
+## $betaPriorVar
+##        Intercept conditiontreated 
+##     1.000000e+06     1.094939e-01</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">priorInfo</span>(resApe)</code></pre></div>
+<pre><code>## $type
+## [1] "apeglm"
+## 
+## $package
+## [1] "apeglm"
+## 
+## $version
+## [1] '1.0.0'
+## 
+## $prior.control
+## $prior.control$no.shrink
+## [1] 1
+## 
+## $prior.control$prior.mean
+## [1] 0
+## 
+## $prior.control$prior.scale
+## [1] 0.4045597
+## 
+## $prior.control$prior.df
+## [1] 1
+## 
+## $prior.control$prior.no.shrink.mean
+## [1] 0
+## 
+## $prior.control$prior.no.shrink.scale
+## [1] 15
+## 
+## $prior.control$prior.var
+## [1] 0.04091713</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">priorInfo</span>(resAsh)</code></pre></div>
+<pre><code>## $type
+## [1] "ashr"
+## 
+## $package
+## [1] "ashr"
+## 
+## $version
+## [1] '2.0.5'
+## 
+## $fitted_g
+## $pi
+##  [1] 1.502747e-01 1.758840e-05 2.349157e-05 4.109161e-05 1.166040e-04
+##  [6] 7.118828e-04 1.045939e-02 1.664217e-01 2.877691e-01 3.076675e-02
+## [11] 9.989217e-02 8.123288e-02 3.932428e-02 1.038946e-01 3.968250e-10
+## [16] 4.355989e-03 2.469746e-02 0.000000e+00 0.000000e+00 0.000000e+00
+## [21] 0.000000e+00 3.092738e-07 0.000000e+00
+## 
+## $mean
+##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+## 
+## $sd
+##  [1]  0.005779228  0.008173063  0.011558456  0.016346125  0.023116912
+##  [6]  0.032692251  0.046233824  0.065384502  0.092467649  0.130769003
+## [11]  0.184935298  0.261538006  0.369870596  0.523076013  0.739741191
+## [16]  1.046152026  1.479482383  2.092304051  2.958964766  4.184608102
+## [21]  5.917929531  8.369216205 11.835859063
+## 
+## attr(,"row.names")
+##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
+## attr(,"class")
+## [1] "normalmix"</code></pre>
 <p>The dispersion prior variance <span class="math inline">\(\sigma_d^2\)</span> is stored as an attribute of the dispersion function:</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">dispersionFunction</span>(dds)</code></pre></div>
 <pre><code>## function (q) 
 ## coefs[1] + coefs[2]/q
-## <environment: 0x9c11c08>
+## <environment: 0x11e70e18>
 ## attr(,"coefficients")
 ## asymptDisp  extraPois 
-## 0.01396112 2.72102337 
+##  0.0140678  2.6551441 
 ## attr(,"fitType")
 ## [1] "parametric"
 ## attr(,"varLogDispEsts")
-## [1] 0.9891644
+## [1] 0.974076
 ## attr(,"dispPriorVar")
-## [1] 0.4988066</code></pre>
+## [1] 0.4837182</code></pre>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">attr</span>(<span class="kw">dispersionFunction</span>(dds), <span class="st">"dispPriorVar"</span>)</code></pre></div>
-<pre><code>## [1] 0.4988066</code></pre>
+<pre><code>## [1] 0.4837182</code></pre>
 <p>The version of DESeq2 which was used to construct the <em>DESeqDataSet</em> object, or the version used when <em>DESeq</em> was run, is stored here:</p>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">metadata</span>(dds)[[<span class="st">"version"</span>]]</code></pre></div>
-<pre><code>## [1] '1.16.1'</code></pre>
+<pre><code>## [1] '1.18.0'</code></pre>
 </div>
 <div id="sample-gene-dependent-normalization-factors" class="section level2">
 <h2>Sample-/gene-dependent normalization factors</h2>
@@ -1426,9 +1544,10 @@ m1 <-<span class="st"> </span>m1[,-idx]
 <div id="methods-changes-since-the-2014-deseq2-paper" class="section level2">
 <h2>Methods changes since the 2014 DESeq2 paper</h2>
 <ul>
-<li>In version 1.16 (Novermber 2016), the log2 fold change shrinkage is no longer default for the <em>DESeq</em> and <em>nbinomWaldTest</em> functions, by setting the defaults of these to <code>betaPrior=FALSE</code>, and by introducing a separate function <em>lfcShrink</em>, which performs log2 fold change shrinkage for visualization and ranking of genes. While for the majority of bulk RNA-seq experiments, the LFC shrinkage did not affect statistical testing, DESeq2 has become used as a [...]
+<li>In version 1.18 (November 2018), we add two <a href="#alternative-shrinkage-estimators">alternative shrinkage estimators</a>, which can be used via <code>lfcShrink</code>: an estimator using a t prior from the apeglm packages, and an estimator with a fitted mixture of normals prior from the ashr package.</li>
+<li>In version 1.16 (November 2016), the log2 fold change shrinkage is no longer default for the <em>DESeq</em> and <em>nbinomWaldTest</em> functions, by setting the defaults of these to <code>betaPrior=FALSE</code>, and by introducing a separate function <em>lfcShrink</em>, which performs log2 fold change shrinkage for visualization and ranking of genes. While for the majority of bulk RNA-seq experiments, the LFC shrinkage did not affect statistical testing, DESeq2 has become used as an [...]
 <li>A small change to the independent filtering routine: instead of taking the quantile of the filter (the mean of normalized counts) which directly <em>maximizes</em> the number of rejections, the threshold chosen is the lowest quantile of the filter for which the number of rejections is close to the peak of a curve fit to the number of rejections over the filter quantiles. ``Close to’’ is defined as within 1 residual standard deviation. This change was introduced in version 1.10 (Octob [...]
-<li>For the calculation of the beta prior variance, instead of matching the empirical quantile to the quantile of a Normal distribution, DESeq2 now uses the weighted quantile function of the  package. The weighting is described in the manual page for <em>nbinomWaldTest</em>. The weights are the inverse of the expected variance of log counts (as used in the diagonals of the matrix <span class="math inline">\(W\)</span> in the GLM). The effect of the change is that the estimated prior vari [...]
+<li>For the calculation of the beta prior variance, instead of matching the empirical quantile to the quantile of a Normal distribution, DESeq2 now uses the weighted quantile function of the Hmisc package. The weighting is described in the manual page for <em>nbinomWaldTest</em>. The weights are the inverse of the expected variance of log counts (as used in the diagonals of the matrix <span class="math inline">\(W\)</span> in the GLM). The effect of the change is that the estimated prior [...]
 </ul>
 <p>For a list of all changes since version 1.0.0, see the <code>NEWS</code> file included in the package.</p>
 </div>
@@ -1444,7 +1563,7 @@ idx <-<span class="st"> </span>!<span class="kw">is.na</span>(W)
 m <-<span class="st"> </span><span class="kw">ncol</span>(dds)
 p <-<span class="st"> </span><span class="dv">3</span>
 <span class="kw">abline</span>(<span class="dt">h=</span><span class="kw">qf</span>(.<span class="dv">99</span>, p, m -<span class="st"> </span>p))</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 </div>
 <div id="contrasts-1" class="section level2">
 <h2>Contrasts</h2>
@@ -1453,8 +1572,7 @@ p <-<span class="st"> </span><span class="dv">3</span>
 </div>
 <div id="expanded-model-matrices" class="section level2">
 <h2>Expanded model matrices</h2>
-<p>DESeq2 uses <em>expanded model matrices</em> in conjunction with the log2 fold change prior, in order to produce shrunken log2 fold change estimates and test results which are independent of the choice of reference level. Another way of saying this is that the shrinkage is <em>symmetric</em> with respect to all the levels of the factors in the design. The expanded model matrices differ from the standard model matrices, in that they have an indicator column (and therefore a coefficient [...]
-<p>The expanded model matrices are not full rank, but a coefficient vector <span class="math inline">\(\beta_i\)</span> can still be found due to the zero-centered prior on non-intercept coefficients. The prior variance for the log2 fold changes is calculated by first generating maximum likelihood estimates for a standard model matrix. The prior variance for each level of a factor is then set as the average of the mean squared maximum likelihood estimates for each level and every possibl [...]
+<p>For the specific combination of <code>lfcShrink</code> with the type <code>normal</code> and using <code>contrast</code>, DESeq2 uses <em>expanded model matrices</em> to produce shrunken log2 fold change estimates where the shrinkage is independent of the choice of reference level. In all other cases, DESeq2 uses standard model matrices, as produced by <code>model.matrix</code>. The expanded model matrices differ from the standard model matrices, in that they have an indicator column  [...]
 <p><a name="indfilttheory"></a></p>
 </div>
 <div id="independent-filtering-and-multiple-testing" class="section level2">
@@ -1475,7 +1593,7 @@ p <-<span class="st"> </span><span class="dv">3</span>
      <span class="dt">ylab=</span><span class="kw">expression</span>(-log[<span class="dv">10</span>](pvalue)),
      <span class="dt">ylim=</span><span class="kw">c</span>(<span class="dv">0</span>,<span class="dv">30</span>),
      <span class="dt">cex=</span>.<span class="dv">4</span>, <span class="dt">col=</span><span class="kw">rgb</span>(<span class="dv">0</span>,<span class="dv">0</span>,<span class="dv">0</span>,.<span class="dv">3</span>))</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 </div>
 <div id="why-does-it-work" class="section level3">
 <h3>Why does it work?</h3>
@@ -1490,7 +1608,7 @@ colori <-<span class="st"> </span><span class="kw">c</span>(<span class="st">
 <span class="kw">text</span>(<span class="dt">x =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="kw">length</span>(h1$counts)), <span class="dt">y =</span> <span class="dv">0</span>, <span class="dt">label =</span> <span class="kw">paste</span>(<span class="kw">c</span>(<span class="dv">0</span>,<span class="dv">1</span>)),
      <span class="dt">adj =</span> <span class="kw">c</span>(<span class="fl">0.5</span>,<span class="fl">1.7</span>), <span class="dt">xpd=</span><span class="ot">NA</span>)
 <span class="kw">legend</span>(<span class="st">"topright"</span>, <span class="dt">fill=</span><span class="kw">rev</span>(colori), <span class="dt">legend=</span><span class="kw">rev</span>(<span class="kw">names</span>(colori)))</code></pre></div>
-<p><img src=" [...]
+<p><img src=" [...]
 <p><a name="FAQ"></a></p>
 </div>
 </div>
@@ -1557,7 +1675,7 @@ res <-<span class="st"> </span><span class="kw">results</span>(dds, <span cla
 </div>
 <div id="what-are-the-exact-steps-performed-by-deseq" class="section level2">
 <h2>What are the exact steps performed by DESeq()?</h2>
-<p>See the manual page for <em>DESeq</em>, which links to the subfunctions which are called in order, where complete details are listed. Also you can read the three steps listed in the <a href="#theory">the DESeq2 model</a> in this document.</p>
+<p>See the manual page for <em>DESeq</em>, which links to the subfunctions which are called in order, where complete details are listed. Also you can read the three steps listed in the <a href="#theory">DESeq2 model</a> in this document.</p>
 </div>
 <div id="is-there-an-official-galaxy-tool-for-deseq2" class="section level2">
 <h2>Is there an official Galaxy tool for DESeq2?</h2>
@@ -1586,18 +1704,18 @@ res <-<span class="st"> </span><span class="kw">results</span>(dds, <span cla
 <div id="acknowledgments" class="section level1">
 <h1>Acknowledgments</h1>
 <p>We have benefited in the development of DESeq2 from the help and feedback of many individuals, including but not limited to:</p>
-<p>The Bionconductor Core Team, Alejandro Reyes, Andrzej Oles, Aleksandra Pekowska, Felix Klein, Nikolaos Ignatiadis, Vince Carey, Owen Solberg, Ruping Sun, Devon Ryan, Steve Lianoglou, Jessica Larson, Christina Chaivorapol, Pan Du, Richard Bourgon, Willem Talloen, Elin Videvall, Hanneke van Deutekom, Todd Burwell, Jesse Rowley, Igor Dolgalev, Stephen Turner, Ryan C Thompson, Tyr Wiesner-Hanks, Konrad Rudolph, David Robinson, Mingxiang Teng, Mathias Lesche, Sonali Arora, Jordan Ramilowsk [...]
+<p>The Bionconductor Core Team, Alejandro Reyes, Andrzej Oles, Aleksandra Pekowska, Felix Klein, Nikolaos Ignatiadis (IHW), Anqi Zhu (apeglm), Joseph Ibrahim (apeglm), Vince Carey, Owen Solberg, Ruping Sun, Devon Ryan, Steve Lianoglou, Jessica Larson, Christina Chaivorapol, Pan Du, Richard Bourgon, Willem Talloen, Elin Videvall, Hanneke van Deutekom, Todd Burwell, Jesse Rowley, Igor Dolgalev, Stephen Turner, Ryan C Thompson, Tyr Wiesner-Hanks, Konrad Rudolph, David Robinson, Mingxiang Te [...]
 </div>
 <div id="session-info" class="section level1">
 <h1>Session info</h1>
 <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sessionInfo</span>()</code></pre></div>
-<pre><code>## R version 3.4.0 (2017-04-21)
+<pre><code>## R version 3.4.2 (2017-09-28)
 ## Platform: x86_64-pc-linux-gnu (64-bit)
-## Running under: Ubuntu 16.04.2 LTS
+## Running under: Ubuntu 16.04.3 LTS
 ## 
 ## Matrix products: default
-## BLAS: /home/biocbuild/bbs-3.5-bioc/R/lib/libRblas.so
-## LAPACK: /home/biocbuild/bbs-3.5-bioc/R/lib/libRlapack.so
+## BLAS: /home/biocbuild/bbs-3.6-bioc/R/lib/libRblas.so
+## LAPACK: /home/biocbuild/bbs-3.6-bioc/R/lib/libRlapack.so
 ## 
 ## locale:
 ##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
@@ -1613,53 +1731,62 @@ res <-<span class="st"> </span><span class="kw">results</span>(dds, <span cla
 ## 
 ## other attached packages:
 ##  [1] RColorBrewer_1.1-2         pheatmap_1.0.8            
-##  [3] hexbin_1.27.1              vsn_3.44.0                
-##  [5] ggplot2_2.2.1              IHW_1.4.0                 
-##  [7] airway_0.110.0             pasilla_1.4.0             
-##  [9] DESeq2_1.16.1              SummarizedExperiment_1.6.1
-## [11] DelayedArray_0.2.1         matrixStats_0.52.2        
-## [13] Biobase_2.36.2             GenomicRanges_1.28.1      
-## [15] GenomeInfoDb_1.12.0        IRanges_2.10.0            
-## [17] S4Vectors_0.14.0           BiocGenerics_0.22.0       
-## [19] tximportData_1.4.0         readr_1.1.0               
-## [21] tximport_1.4.0            
+##  [3] hexbin_1.27.1              vsn_3.46.0                
+##  [5] ggplot2_2.2.1              IHW_1.6.0                 
+##  [7] airway_0.111.1             pasilla_1.5.0             
+##  [9] DESeq2_1.18.0              SummarizedExperiment_1.8.0
+## [11] DelayedArray_0.4.0         matrixStats_0.52.2        
+## [13] Biobase_2.38.0             GenomicRanges_1.30.0      
+## [15] GenomeInfoDb_1.14.0        IRanges_2.12.0            
+## [17] S4Vectors_0.16.0           BiocGenerics_0.24.0       
+## [19] tximportData_1.5.0         readr_1.1.1               
+## [21] tximport_1.6.0            
 ## 
 ## loaded via a namespace (and not attached):
-##  [1] splines_3.4.0           Formula_1.2-1          
-##  [3] affy_1.54.0             latticeExtra_0.6-28    
-##  [5] GenomeInfoDbData_0.99.0 slam_0.1-40            
-##  [7] yaml_2.1.14             RSQLite_1.1-2          
-##  [9] backports_1.0.5         lattice_0.20-35        
-## [11] limma_3.32.2            digest_0.6.12          
-## [13] XVector_0.16.0          checkmate_1.8.2        
-## [15] colorspace_1.3-2        preprocessCore_1.38.1  
-## [17] htmltools_0.3.6         Matrix_1.2-10          
-## [19] plyr_1.8.4              XML_3.98-1.7           
-## [21] genefilter_1.58.1       zlibbioc_1.22.0        
-## [23] xtable_1.8-2            scales_0.4.1           
-## [25] affyio_1.46.0           fdrtool_1.2.15         
-## [27] BiocParallel_1.10.1     htmlTable_1.9          
-## [29] tibble_1.3.0            annotate_1.54.0        
-## [31] nnet_7.3-12             lazyeval_0.2.0         
-## [33] survival_2.41-3         magrittr_1.5           
-## [35] memoise_1.1.0           evaluate_0.10          
-## [37] foreign_0.8-68          BiocInstaller_1.26.0   
-## [39] tools_3.4.0             data.table_1.10.4      
-## [41] hms_0.3                 BiocStyle_2.4.0        
-## [43] stringr_1.2.0           munsell_0.4.3          
-## [45] locfit_1.5-9.1          cluster_2.0.6          
-## [47] AnnotationDbi_1.38.0    compiler_3.4.0         
-## [49] grid_3.4.0              RCurl_1.95-4.8         
-## [51] rjson_0.2.15            htmlwidgets_0.8        
-## [53] labeling_0.3            bitops_1.0-6           
-## [55] base64enc_0.1-3         rmarkdown_1.5          
-## [57] gtable_0.2.0            codetools_0.2-15       
-## [59] DBI_0.6-1               R6_2.2.0               
-## [61] gridExtra_2.2.1         knitr_1.15.1           
-## [63] Hmisc_4.0-3             rprojroot_1.2          
-## [65] lpsymphony_1.4.1        stringi_1.1.5          
-## [67] Rcpp_0.12.10            geneplotter_1.54.0     
-## [69] rpart_4.1-11            acepack_1.4.1</code></pre>
+##  [1] bitops_1.0-6            bit64_0.9-7            
+##  [3] doParallel_1.0.11       rprojroot_1.2          
+##  [5] numDeriv_2016.8-1       tools_3.4.2            
+##  [7] backports_1.1.1         affyio_1.48.0          
+##  [9] R6_2.2.2                rpart_4.1-11           
+## [11] Hmisc_4.0-3             DBI_0.7                
+## [13] lazyeval_0.2.1          colorspace_1.3-2       
+## [15] nnet_7.3-12             apeglm_1.0.0           
+## [17] gridExtra_2.3           preprocessCore_1.40.0  
+## [19] bit_1.1-12              compiler_3.4.2         
+## [21] fdrtool_1.2.15          htmlTable_1.9          
+## [23] labeling_0.3            slam_0.1-40            
+## [25] scales_0.5.0            checkmate_1.8.5        
+## [27] SQUAREM_2017.10-1       affy_1.56.0            
+## [29] genefilter_1.60.0       stringr_1.2.0          
+## [31] digest_0.6.12           foreign_0.8-69         
+## [33] rmarkdown_1.6           XVector_0.18.0         
+## [35] pscl_1.5.2              base64enc_0.1-3        
+## [37] htmltools_0.3.6         lpsymphony_1.6.0       
+## [39] limma_3.34.0            bbmle_1.0.20           
+## [41] htmlwidgets_0.9         rlang_0.1.2            
+## [43] RSQLite_2.0             BiocInstaller_1.28.0   
+## [45] BiocParallel_1.12.0     acepack_1.4.1          
+## [47] RCurl_1.95-4.8          magrittr_1.5           
+## [49] GenomeInfoDbData_0.99.1 Formula_1.2-2          
+## [51] Matrix_1.2-11           Rcpp_0.12.13           
+## [53] munsell_0.4.3           stringi_1.1.5          
+## [55] yaml_2.1.14             MASS_7.3-47            
+## [57] zlibbioc_1.24.0         plyr_1.8.4             
+## [59] grid_3.4.2              blob_1.1.0             
+## [61] lattice_0.20-35         splines_3.4.2          
+## [63] annotate_1.56.0         hms_0.3                
+## [65] locfit_1.5-9.1          knitr_1.17             
+## [67] rjson_0.2.15            geneplotter_1.56.0     
+## [69] codetools_0.2-15        XML_3.98-1.9           
+## [71] evaluate_0.10.1         latticeExtra_0.6-28    
+## [73] data.table_1.10.4-3     foreach_1.4.3          
+## [75] gtable_0.2.0            assertthat_0.2.0       
+## [77] ashr_2.0.5              emdbook_1.3.9          
+## [79] xtable_1.8-2            coda_0.19-1            
+## [81] survival_2.41-3         truncnorm_1.0-7        
+## [83] tibble_1.3.4            iterators_1.0.8        
+## [85] AnnotationDbi_1.40.0    memoise_1.1.0          
+## [87] cluster_2.0.6           BiocStyle_2.6.0</code></pre>
 </div>
 <div id="references" class="section level1 unnumbered">
 <h1>References</h1>
@@ -1685,11 +1812,17 @@ res <-<span class="st"> </span><span class="kw">results</span>(dds, <span cla
 <div id="ref-CR">
 <p>Cox, D. R., and N. Reid. 1987. “Parameter orthogonality and approximate conditional inference.” <em>Journal of the Royal Statistical Society, Series B</em> 49 (1): 1–39. <a href="http://www.jstor.org/stable/2345476" class="uri">http://www.jstor.org/stable/2345476</a>.</p>
 </div>
+<div id="ref-Gerard2017">
+<p>Gerard, David, and Matthew Stephens. 2017. “Empirical Bayes Shrinkage and False Discovery Rate Estimation, Allowing For Unwanted Variation.” <em>ArXiv</em>. <a href="https://arxiv.org/abs/1709.10066" class="uri">https://arxiv.org/abs/1709.10066</a>.</p>
+</div>
 <div id="ref-sagmb2003">
 <p>Huber, Wolfgang, Anja von Heydebreck, Holger Sültmann, Annemarie Poustka, and Martin Vingron. 2003. “Parameter Estimation for the Calibration and Variance Stabilization of Microarray Data.” <em>Statistical Applications in Genetics and Molecular Biology</em> 2 (1): Article 3.</p>
 </div>
-<div id="ref-Ignatiadis2015">
-<p>Ignatiadis, Nikolaos, Bernd Klaus, Judith Zaugg, and Wolfgang Huber. 2015. “Data-Driven Hypothesis Weighting Increases Detection Power in Big Data Analytics.” <em>BioRxiv</em>. <a href="http://dx.doi.org/10.1101/034330" class="uri">http://dx.doi.org/10.1101/034330</a>.</p>
+<div id="ref-Ignatiadis2016">
+<p>Ignatiadis, Nikolaos, Bernd Klaus, Judith Zaugg, and Wolfgang Huber. 2016. “Data-Driven Hypothesis Weighting Increases Detection Power in Genome-Scale Multiple Testing.” <em>Nature Methods</em>. <a href="http://dx.doi.org/10.1038/nmeth.3885" class="uri">http://dx.doi.org/10.1038/nmeth.3885</a>.</p>
+</div>
+<div id="ref-Leek2014">
+<p>Leek, Jeffrey T. 2014. “svaseq: removing batch effects and other unwanted noise from sequencing data.” <em>Nucleic Acids Research</em> 42 (21). <a href="http://dx.doi.org/10.1093/nar/gku864" class="uri">http://dx.doi.org/10.1093/nar/gku864</a>.</p>
 </div>
 <div id="ref-Li2011RSEM">
 <p>Li, Bo, and Colin N. Dewey. 2011. “RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome.” <em>BMC Bioinformatics</em> 12: 323+. doi:<a href="https://doi.org/10.1186/1471-2105-12-3231">10.1186/1471-2105-12-3231</a>.</p>
@@ -1697,21 +1830,30 @@ res <-<span class="st"> </span><span class="kw">results</span>(dds, <span cla
 <div id="ref-Liao2013feature">
 <p>Liao, Y., G. K. Smyth, and W. Shi. 2013. “featureCounts: an efficient general purpose program for assigning sequence reads to genomic features.” <em>Bioinformatics</em>, November.</p>
 </div>
+<div id="ref-Love2016Modeling">
+<p>Love, Michael I., John B. Hogenesch, and Rafael A. Irizarry. 2016. “Modeling of Rna-Seq Fragment Sequence Bias Reduces Systematic Errors in Transcript Abundance Estimation.” <em>Nature Biotechnology</em> 34 (12): 1287–91. <a href="http://dx.doi.org/10.1038/nbt.3682" class="uri">http://dx.doi.org/10.1038/nbt.3682</a>.</p>
+</div>
 <div id="ref-Love2014">
 <p>Love, Michael I., Wolfgang Huber, and Simon Anders. 2014. “Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2.” <em>Genome Biology</em> 15 (12): 550. <a href="http://dx.doi.org/10.1186/s13059-014-0550-8" class="uri">http://dx.doi.org/10.1186/s13059-014-0550-8</a>.</p>
 </div>
-<div id="ref-Patro2016Salmon">
-<p>Patro, Rob, Geet Duggal, Michael I. Love, Rafael A. Irizarry, and Carl Kingsford. 2016. “Salmon Provides Accurate, Fast, and Bias-Aware Transcript Expression Estimates Using Dual-Phase Inference.” <em>BioRxiv</em>. <a href="http://biorxiv.org/content/early/2016/08/30/021592" class="uri">http://biorxiv.org/content/early/2016/08/30/021592</a>.</p>
+<div id="ref-Patro2017Salmon">
+<p>Patro, Rob, Geet Duggal, Michael I. Love, Rafael A. Irizarry, and Carl Kingsford. 2017. “Salmon Provides Fast and Bias-Aware Quantification of Transcript Expression.” <em>Nature Methods</em>. <a href="http://dx.doi.org/10.1038/nmeth.4197" class="uri">http://dx.doi.org/10.1038/nmeth.4197</a>.</p>
 </div>
 <div id="ref-Patro2014Sailfish">
 <p>Patro, Rob, Stephen M. Mount, and Carl Kingsford. 2014. “Sailfish enables alignment-free isoform quantification from RNA-seq reads using lightweight algorithms.” <em>Nature Biotechnology</em> 32: 462–64. <a href="http://dx.doi.org/10.1038/nbt.2862" class="uri">http://dx.doi.org/10.1038/nbt.2862</a>.</p>
 </div>
+<div id="ref-Risso2014">
+<p>Risso, Davide, John Ngai, Terence P Speed, and Sandrine Dudoit. 2014. “Normalization of RNA-seq data using factor analysis of control genes or samples.” <em>Nature Biotechnology</em> 32 (9). <a href="http://dx.doi.org/10.1038/nbt.2931" class="uri">http://dx.doi.org/10.1038/nbt.2931</a>.</p>
+</div>
 <div id="ref-Robert2015Errors">
 <p>Robert, Christelle, and Mick Watson. 2015. “Errors in RNA-Seq quantification affect genes of relevance to human disease.” <em>Genome Biology</em>. doi:<a href="https://doi.org/10.1186/s13059-015-0734-x">10.1186/s13059-015-0734-x</a>.</p>
 </div>
 <div id="ref-Soneson2015">
 <p>Soneson, Charlotte, Michael I. Love, and Mark Robinson. 2015. “Differential analyses for RNA-seq: transcript-level estimates improve gene-level inferences.” <em>F1000Research</em> 4 (1521). <a href="http://dx.doi.org/10.12688/f1000research.7563.1" class="uri">http://dx.doi.org/10.12688/f1000research.7563.1</a>.</p>
 </div>
+<div id="ref-Stephens2016">
+<p>Stephens, Matthew. 2016. “False Discovery Rates: A New Deal.” <em>Biostatistics</em> 18 (2). <a href="https://doi.org/10.1093/biostatistics/kxw041" class="uri">https://doi.org/10.1093/biostatistics/kxw041</a>.</p>
+</div>
 <div id="ref-Tibshirani1988">
 <p>Tibshirani, Robert. 1988. “Estimating Transformations for Regression via Additivity and Variance Stabilization.” <em>Journal of the American Statistical Association</em> 83: 394–405.</p>
 </div>
diff --git a/man/DESeq.Rd b/man/DESeq.Rd
index bc51dbf..5c8d8c6 100644
--- a/man/DESeq.Rd
+++ b/man/DESeq.Rd
@@ -119,7 +119,7 @@ Experiments without replicates do not allow for estimation of the dispersion
 of counts around the expected value for each group, which is critical for
 differential expression analysis. If an experimental design is
 supplied which does not contain the necessary degrees of freedom for differential
-analysis, \code{DESeq} will provide a message to the user and follow
+analysis, \code{DESeq} will provide a warning to the user and follow
 the strategy outlined in Anders and Huber (2010)
 under the section 'Working without replicates', wherein all the samples
 are considered as replicates of a single group for the estimation of dispersion.
@@ -127,6 +127,8 @@ As noted in the reference above: "Some overestimation of the variance
 may be expected, which will make that approach conservative."
 Furthermore, "while one may not want to draw strong conclusions from such an analysis,
 it may still be useful for exploration and hypothesis generation."
+We provide this approach for data exploration only, but for accurately
+identifying differential expression, biological replicates are required.
 
 The argument \code{minReplicatesForReplace} is used to decide which samples
 are eligible for automatic replacement in the case of extreme Cook's distance.
@@ -172,7 +174,7 @@ resLRT <- results(ddsLRT)
 
 }
 \references{
-Michael I Love, Wolfgang Huber, Simon Anders: Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology 2014, 15:550. \url{http://dx.doi.org/10.1186/s13059-014-0550-8}
+Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. \url{https://doi.org/10.1186/s13059-014-0550-8}
 }
 \seealso{
 \code{\link{nbinomWaldTest}}, \code{\link{nbinomLRT}}
diff --git a/man/DESeq2-package.Rd b/man/DESeq2-package.Rd
index f7e63fc..26e6654 100644
--- a/man/DESeq2-package.Rd
+++ b/man/DESeq2-package.Rd
@@ -18,11 +18,11 @@ support site: \url{http://support.bioconductor.org}.
 \references{
 DESeq2 reference:
 
-Michael I Love, Wolfgang Huber, Simon Anders: Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology 2014, 15:550. \url{http://dx.doi.org/10.1186/s13059-014-0550-8}
+Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. \url{http://dx.doi.org/10.1186/s13059-014-0550-8}
 
 DESeq reference:
 
-Simon Anders, Wolfgang Huber: Differential expression analysis for sequence count data. Genome Biology 2010, 11:106. \url{http://dx.doi.org/10.1186/gb-2010-11-10-r106}
+Simon Anders, Wolfgang Huber (2010) Differential expression analysis for sequence count data. Genome Biology, 11:106. \url{http://dx.doi.org/10.1186/gb-2010-11-10-r106}
 }
 \author{
 Michael Love, Wolfgang Huber, Simon Anders
diff --git a/man/DESeqResults.Rd b/man/DESeqResults.Rd
index 0fec033..e17b782 100644
--- a/man/DESeqResults.Rd
+++ b/man/DESeqResults.Rd
@@ -7,11 +7,13 @@
 \alias{DESeqResults-class}
 \title{DESeqResults object and constructor}
 \usage{
-DESeqResults(DataFrame)
+DESeqResults(DataFrame, priorInfo = list())
 }
 \arguments{
 \item{DataFrame}{a DataFrame of results, standard column names are:
 baseMean, log2FoldChange, lfcSE, stat, pvalue, padj.}
+
+\item{priorInfo}{a list giving information on the log fold change prior}
 }
 \value{
 a DESeqResults object
diff --git a/man/estimateDispersionsGeneEst.Rd b/man/estimateDispersionsGeneEst.Rd
index c61f7f4..4dcd796 100644
--- a/man/estimateDispersionsGeneEst.Rd
+++ b/man/estimateDispersionsGeneEst.Rd
@@ -12,7 +12,7 @@
 \usage{
 estimateDispersionsGeneEst(object, minDisp = 1e-08, kappa_0 = 1,
   dispTol = 1e-06, maxit = 100, quiet = FALSE, modelMatrix = NULL,
-  niter = 1, linearMu = NULL)
+  niter = 1, linearMu = NULL, minmu = 0.5)
 
 estimateDispersionsFit(object, fitType = c("parametric", "local", "mean"),
   minDisp = 1e-08, quiet = FALSE)
@@ -51,6 +51,8 @@ default is NULL, in which case a lienar model is used if the
 number of groups defined by the model matrix is equal to the number
 of columns of the model matrix}
 
+\item{minmu}{lower bound on the estimated count for fitting gene-wise dispersion}
+
 \item{fitType}{either "parametric", "local", or "mean"
 for the type of fitting of dispersions to the mean intensity.
 See \code{\link{estimateDispersions}} for description.}
diff --git a/man/lfcShrink.Rd b/man/lfcShrink.Rd
index d61982f..a6cd51a 100644
--- a/man/lfcShrink.Rd
+++ b/man/lfcShrink.Rd
@@ -1,43 +1,103 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helper.R
+% Please edit documentation in R/lfcShrink.R
 \name{lfcShrink}
 \alias{lfcShrink}
 \title{Shrink log2 fold changes}
 \usage{
-lfcShrink(dds, coef, contrast, res, type = "normal")
+lfcShrink(dds, coef, contrast, res, type = c("normal", "apeglm", "ashr"),
+  svalue = FALSE, returnList = FALSE, parallel = FALSE,
+  BPPARAM = bpparam(), ...)
 }
 \arguments{
-\item{dds}{a DESeqDataSet object, which has been run through
-\code{\link{DESeq}}, or at the least, \code{\link{estimateDispersions}}}
+\item{dds}{a DESeqDataSet object, after running \code{\link{DESeq}}}
 
-\item{coef}{the number of the coefficient (LFC) to shrink,
-consult \code{resultsNames(dds)} after running \code{DESeq(dds, betaPrior=FALSE)}.
-only \code{coef} or \code{contrast} can be specified, not both}
+\item{coef}{the name or number of the coefficient (LFC) to shrink,
+consult \code{resultsNames(dds)} after running \code{DESeq(dds)}.
+note: only \code{coef} or \code{contrast} can be specified, not both.
+\code{type="apeglm"} requires use of \code{coef}.}
 
 \item{contrast}{see argument description in \code{\link{results}}.
-only \code{coef} or \code{contrast} can be specified, not both}
+only \code{coef} or \code{contrast} can be specified, not both.}
 
-\item{res}{a DESeqResults object (can be missing)}
+\item{res}{a DESeqResults object. Results table produced by the
+default pipeline, i.e. \code{DESeq} followed by \code{results}.
+If not provided, it will be generated internally using \code{coef} or \code{contrast}}
 
-\item{type}{at this time, ignored argument, because only one
-shrinkage estimator, but more to come!}
+\item{type}{\code{"normal"} is the original DESeq2 shrinkage estimator;
+\code{"apeglm"} is the adaptive t prior shrinkage estimator from the 'apeglm' package;
+\code{"ashr"} is the adaptive shrinkage estimator from the 'ashr' package,
+using a fitted mixture of normals prior
+- see the Stephens (2016) reference below for citation}
+
+\item{svalue}{logical, should p-values and adjusted p-values be replaced
+with s-values when using \code{apeglm} or \code{ashr}.
+See Stephens (2016) reference on s-values.}
+
+\item{returnList}{logical, should \code{lfcShrink} return a list, where
+the first element is the results table, and the second element is the
+output of \code{apeglm} or \code{ashr}}
+
+\item{parallel}{if FALSE, no parallelization. if TRUE, parallel
+execution using \code{BiocParallel}, see same argument of \code{\link{DESeq}}
+parallelization only used with \code{normal} or \code{apeglm}}
+
+\item{BPPARAM}{see same argument of \code{\link{DESeq}}}
+
+\item{...}{arguments passed to \code{apeglm} and \code{ashr}}
 }
 \value{
-if \code{res} is not missing, a DESeqResults object with
-the \code{log2FoldChange} column replaced with a shrunken LFC.
-If \code{res} is missing, just the shrunken LFC vector.
+a DESeqResults object with the \code{log2FoldChange} and \code{lfcSE}
+columns replaced with shrunken LFC and SE.
+\code{priorInfo(res)} contains information about the shrinkage procedure,
+relevant to the various methods specified by \code{type}.
 }
 \description{
-This function adds shrunken log2 fold changes (LFC) to a
-results table which was run without LFC moderation.
-Note: this function is still being prototyped.
+Adds shrunken log2 fold changes (LFC) and SE to a
+results table from \code{DESeq} run without LFC shrinkage.
+Three shrinkage esimators for LFC are available via \code{type}.
+}
+\details{
+As of DESeq2 version 1.18, \code{type="apeglm"} and \code{type="ashr"}
+are new features, and still under development.
+Specifying \code{type="apeglm"} passes along DESeq2 MLE log2
+fold changes and standard errors to the \code{apeglm} function
+in the apeglm package, and re-estimates posterior LFCs for
+the coefficient specified by \code{coef}.
+Specifying \code{type="ashr"} passes along DESeq2 MLE log2
+fold changes and standard errors to the \code{ash} function
+in the ashr package, 
+with arguments \code{mixcompdist="normal"} and \code{method="shrink"}
+(\code{coef} and \code{contrast} ignored).
+See vignette for a comparison of shrinkage estimators on an example dataset.
+For all shrinkage methods, details on the prior is included in
+\code{priorInfo(res)}, including the \code{fitted_g} mixture for ashr.
+The integration of shrinkage methods from
+external packages will likely evolve over time. We will likely incorporate an
+\code{lfcThreshold} argument which can be passed to apeglm
+to specify regions of the posterior at an arbitrary threshold.
+
+For \code{type="normal"}, shrinkage cannot be applied to coefficients
+in a model with interaction terms.
 }
 \examples{
 
- dds <- makeExampleDESeqDataSet(betaSD=1)
- dds <- DESeq(dds, betaPrior=FALSE)
+ set.seed(1)
+ dds <- makeExampleDESeqDataSet(n=500,betaSD=1)
+ dds <- DESeq(dds)
  res <- results(dds)
- res.shr <- lfcShrink(dds=dds, coef=2, res=res)
- res.shr <- lfcShrink(dds=dds, contrast=c("condition","B","A"), res=res)
 
+ res.shr <- lfcShrink(dds=dds, coef=2)
+ res.shr <- lfcShrink(dds=dds, contrast=c("condition","B","A"))
+ res.ape <- lfcShrink(dds=dds, coef=2, type="apeglm")
+ res.ash <- lfcShrink(dds=dds, coef=2, type="ashr")
+
+}
+\references{
+\code{type="normal"}:
+
+Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. \url{https://doi.org/10.1186/s13059-014-0550-8}
+
+\code{type="ashr"}:
+
+Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2. \url{https://doi.org/10.1093/biostatistics/kxw041}
 }
diff --git a/man/normTransform.Rd b/man/normTransform.Rd
index 6fc7094..4b846b1 100644
--- a/man/normTransform.Rd
+++ b/man/normTransform.Rd
@@ -15,7 +15,7 @@ normTransform(object, f = log2, pc = 1)
 }
 \description{
 A simple function for creating a \code{\link{DESeqTransform}}
-object after applying: f(count + pc).
+object after applying: \code{f(count(dds,normalized=TRUE) + pc)}.
 }
 \seealso{
 \code{\link{varianceStabilizingTransformation}}, \code{\link{rlog}}
diff --git a/man/priorInfo.Rd b/man/priorInfo.Rd
new file mode 100644
index 0000000..64e995d
--- /dev/null
+++ b/man/priorInfo.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/AllGenerics.R, R/methods.R
+\docType{methods}
+\name{priorInfo}
+\alias{priorInfo}
+\alias{priorInfo<-}
+\alias{priorInfo}
+\alias{priorInfo,DESeqResults-method}
+\alias{priorInfo<-,DESeqResults,list-method}
+\alias{priorInfo}
+\title{Accessors for the 'priorInfo' slot of a DESeqResults object.}
+\usage{
+priorInfo(object, ...)
+
+priorInfo(object, ...) <- value
+
+\S4method{priorInfo}{DESeqResults}(object)
+
+\S4method{priorInfo}{DESeqResults,list}(object) <- value
+}
+\arguments{
+\item{object}{a \code{DESeqResults} object}
+
+\item{...}{additional arguments}
+
+\item{value}{a \code{list}}
+}
+\description{
+The priorInfo slot contains details about the prior on log fold changes
+}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 90704b9..1107242 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -8,7 +8,7 @@ using namespace Rcpp;
 
 // fitDisp
 Rcpp::List fitDisp(SEXP ySEXP, SEXP xSEXP, SEXP mu_hatSEXP, SEXP log_alphaSEXP, SEXP log_alpha_prior_meanSEXP, SEXP log_alpha_prior_sigmasqSEXP, SEXP min_log_alphaSEXP, SEXP kappa_0SEXP, SEXP tolSEXP, SEXP maxitSEXP, SEXP usePriorSEXP, SEXP weightsSEXP, SEXP useWeightsSEXP);
-RcppExport SEXP DESeq2_fitDisp(SEXP ySEXPSEXP, SEXP xSEXPSEXP, SEXP mu_hatSEXPSEXP, SEXP log_alphaSEXPSEXP, SEXP log_alpha_prior_meanSEXPSEXP, SEXP log_alpha_prior_sigmasqSEXPSEXP, SEXP min_log_alphaSEXPSEXP, SEXP kappa_0SEXPSEXP, SEXP tolSEXPSEXP, SEXP maxitSEXPSEXP, SEXP usePriorSEXPSEXP, SEXP weightsSEXPSEXP, SEXP useWeightsSEXPSEXP) {
+RcppExport SEXP _DESeq2_fitDisp(SEXP ySEXPSEXP, SEXP xSEXPSEXP, SEXP mu_hatSEXPSEXP, SEXP log_alphaSEXPSEXP, SEXP log_alpha_prior_meanSEXPSEXP, SEXP log_alpha_prior_sigmasqSEXPSEXP, SEXP min_log_alphaSEXPSEXP, SEXP kappa_0SEXPSEXP, SEXP tolSEXPSEXP, SEXP maxitSEXPSEXP, SEXP usePriorSEXPSEXP, SEXP weightsSEXPSEXP, SEXP useWeightsSEXPSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -31,7 +31,7 @@ END_RCPP
 }
 // fitBeta
 Rcpp::List fitBeta(SEXP ySEXP, SEXP xSEXP, SEXP nfSEXP, SEXP alpha_hatSEXP, SEXP contrastSEXP, SEXP beta_matSEXP, SEXP lambdaSEXP, SEXP weightsSEXP, SEXP useWeightsSEXP, SEXP tolSEXP, SEXP maxitSEXP, SEXP useQRSEXP);
-RcppExport SEXP DESeq2_fitBeta(SEXP ySEXPSEXP, SEXP xSEXPSEXP, SEXP nfSEXPSEXP, SEXP alpha_hatSEXPSEXP, SEXP contrastSEXPSEXP, SEXP beta_matSEXPSEXP, SEXP lambdaSEXPSEXP, SEXP weightsSEXPSEXP, SEXP useWeightsSEXPSEXP, SEXP tolSEXPSEXP, SEXP maxitSEXPSEXP, SEXP useQRSEXPSEXP) {
+RcppExport SEXP _DESeq2_fitBeta(SEXP ySEXPSEXP, SEXP xSEXPSEXP, SEXP nfSEXPSEXP, SEXP alpha_hatSEXPSEXP, SEXP contrastSEXPSEXP, SEXP beta_matSEXPSEXP, SEXP lambdaSEXPSEXP, SEXP weightsSEXPSEXP, SEXP useWeightsSEXPSEXP, SEXP tolSEXPSEXP, SEXP maxitSEXPSEXP, SEXP useQRSEXPSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -53,7 +53,7 @@ END_RCPP
 }
 // fitDispGrid
 Rcpp::List fitDispGrid(SEXP ySEXP, SEXP xSEXP, SEXP mu_hatSEXP, SEXP disp_gridSEXP, SEXP log_alpha_prior_meanSEXP, SEXP log_alpha_prior_sigmasqSEXP, SEXP usePriorSEXP, SEXP weightsSEXP, SEXP useWeightsSEXP);
-RcppExport SEXP DESeq2_fitDispGrid(SEXP ySEXPSEXP, SEXP xSEXPSEXP, SEXP mu_hatSEXPSEXP, SEXP disp_gridSEXPSEXP, SEXP log_alpha_prior_meanSEXPSEXP, SEXP log_alpha_prior_sigmasqSEXPSEXP, SEXP usePriorSEXPSEXP, SEXP weightsSEXPSEXP, SEXP useWeightsSEXPSEXP) {
+RcppExport SEXP _DESeq2_fitDispGrid(SEXP ySEXPSEXP, SEXP xSEXPSEXP, SEXP mu_hatSEXPSEXP, SEXP disp_gridSEXPSEXP, SEXP log_alpha_prior_meanSEXPSEXP, SEXP log_alpha_prior_sigmasqSEXPSEXP, SEXP usePriorSEXPSEXP, SEXP weightsSEXPSEXP, SEXP useWeightsSEXPSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -70,3 +70,15 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+
+static const R_CallMethodDef CallEntries[] = {
+    {"_DESeq2_fitDisp", (DL_FUNC) &_DESeq2_fitDisp, 13},
+    {"_DESeq2_fitBeta", (DL_FUNC) &_DESeq2_fitBeta, 12},
+    {"_DESeq2_fitDispGrid", (DL_FUNC) &_DESeq2_fitDispGrid, 9},
+    {NULL, NULL, 0}
+};
+
+RcppExport void R_init_DESeq2(DllInfo *dll) {
+    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
+    R_useDynamicSymbols(dll, FALSE);
+}
diff --git a/tests/testthat/test_construction_errors.R b/tests/testthat/test_construction_errors.R
index 1c4347c..0ad101e 100644
--- a/tests/testthat/test_construction_errors.R
+++ b/tests/testthat/test_construction_errors.R
@@ -1,6 +1,7 @@
 context("construction_errors")
 test_that("proper errors thrown in object construction", {
   coldata <- DataFrame(x=factor(c("A","A","B","B")),
+                       xx=factor(c("A","A","B","B ")),
                        name=letters[1:4],
                        ident=factor(rep("A",4)),
                        num=1:4,
@@ -22,6 +23,7 @@ test_that("proper errors thrown in object construction", {
   expect_message(DESeqDataSetFromMatrix(counts, coldata, ~ missinglevels))
   expect_message(DESeqDataSetFromMatrix(counts, coldata, ~ notref))
   expect_error(DESeqDataSetFromMatrix(counts, coldata, ~ident + x), "design contains")
+  expect_message(DESeqDataSetFromMatrix(counts, coldata, ~xx), "characters other than")
 
   # same colnames but in different order:
   expect_error(DESeqDataSetFromMatrix(matrix(1:16, ncol=4, dimnames=list(1:4, 4:1)), coldata, ~ x))
diff --git a/tests/testthat/test_interactions.R b/tests/testthat/test_interactions.R
index 19d1f97..40acb1b 100644
--- a/tests/testthat/test_interactions.R
+++ b/tests/testthat/test_interactions.R
@@ -6,5 +6,13 @@ test_that("interactions throw error", {
   dds <- DESeq(dds)
   expect_equal(resultsNames(dds)[4], "conditionB.groupY")
   # interactions error
-  expect_error(DESeq(dds, betaPrior=TRUE))
+  expect_error(DESeq(dds, betaPrior=TRUE), "designs with interactions")
+
+  # also lfcShrink
+  res <- results(dds, name="conditionB.groupY")
+  expect_error(res <- lfcShrink(dds, coef=4, res=res), "not implemented")
+
+  res <- results(dds, contrast=c("condition","B","A"))
+  expect_error(res <- lfcShrink(dds, contrast=c("condition","B","A"), res=res),
+               "not implemented")  
 })
diff --git a/tests/testthat/test_lfcShrink.R b/tests/testthat/test_lfcShrink.R
index 61dbb12..e0a9f47 100644
--- a/tests/testthat/test_lfcShrink.R
+++ b/tests/testthat/test_lfcShrink.R
@@ -4,8 +4,7 @@ test_that("LFC shrinkage works", {
   dds <- estimateSizeFactors(dds)
   expect_error(lfcShrink(dds, 2, 1))
   dds <- estimateDispersions(dds)
-  lfc <- lfcShrink(dds=dds, coef=2)
-  dds <- DESeq(dds, betaPrior=FALSE)
+  dds <- DESeq(dds)
   res <- results(dds)
   res.shr <- lfcShrink(dds=dds, coef=2, res=res)
   plotMA(res.shr)
@@ -13,4 +12,57 @@ test_that("LFC shrinkage works", {
                        contrast=c("condition","B","A"),
                        res=res)
   plotMA(res.shr)
+
+  # testing out various methods for LFC shrinkage
+  set.seed(1)
+  dds <- makeExampleDESeqDataSet(betaSD=1,n=1000,m=10)
+  dds <- DESeq(dds)
+  res <- results(dds, name="condition_B_vs_A")
+
+  # dds and res must match
+  expect_error(lfcShrink(dds=dds, coef=2, res=res[1:500,], type="normal"), "rownames")
+  expect_error(lfcShrink(dds=dds, coef=2, res=res[1:500,], type="apeglm"), "rownames")  
+
+  # try out various types and ways of specifying coefs
+  res.n <- lfcShrink(dds=dds, coef="condition_B_vs_A", res=res, type="normal")
+  res.n <- lfcShrink(dds=dds, coef=2, res=res, type="normal")
+  res.n <- lfcShrink(dds=dds, coef=2, type="normal")
+  res.ape <- lfcShrink(dds=dds, coef=2, type="apeglm")
+  res.ash <- lfcShrink(dds=dds, res=res, type="ashr")
+
+  # prior info
+  str(priorInfo(res.n))
+  str(priorInfo(res.ape))
+  str(priorInfo(res.ash))
+
+  # plot against true
+  par(mfrow=c(1,3))
+  plot(mcols(dds)$trueBeta, res.n$log2FoldChange); abline(0,1,col="red")
+  plot(mcols(dds)$trueBeta, res.ape$log2FoldChange); abline(0,1,col="red")
+  plot(mcols(dds)$trueBeta, res.ash$log2FoldChange); abline(0,1,col="red")
+  
+  # s-value returned
+  res.ape <- lfcShrink(dds=dds, coef=2, type="apeglm", svalue=TRUE)
+  expect_true("svalue" %in% names(res.ape))
+  res.ash <- lfcShrink(dds=dds, res=res, type="ashr", svalue=TRUE)
+  expect_true("svalue" %in% names(res.ash))
+
+  # TODO add tests of new plotMA() with svalue
+  
+  # list returned
+  res.ape <- lfcShrink(dds=dds, coef=2, type="apeglm", returnList=TRUE)
+  names(res.ape)
+  res.ash <- lfcShrink(dds=dds, res=res, type="ashr", returnList=TRUE)
+  names(res.ash)
+
+  # test wrong coef specified
+  resInt <- results(dds, name="Intercept")
+  expect_error(lfcShrink(dds=dds, coef=2, res=resInt, type="apeglm"))
+
+  # test supplied model.matrix
+  full <- model.matrix(~condition, colData(dds))
+  dds <- DESeq(dds, full=full)
+  res <- results(dds)
+  res.ape <- lfcShrink(dds=dds, coef=2, res=res, type="apeglm")
+  
 })  
diff --git a/tests/testthat/test_nbinomWald.R b/tests/testthat/test_nbinomWald.R
index 33ca300..035e67d 100644
--- a/tests/testthat/test_nbinomWald.R
+++ b/tests/testthat/test_nbinomWald.R
@@ -16,6 +16,16 @@ test_that("nbinomWald throws various errors and works with edge cases",{
   dds <- nbinomWaldTest(dds, modelMatrixType="standard")
   covarianceMatrix(dds, 1)
 
+  # changing 'df'
+  dds <- makeExampleDESeqDataSet(n=100, m=4)
+  counts(dds)[1:4,] <- rep(0L, 16)
+  dds <- estimateSizeFactors(dds)
+  dds <- estimateDispersions(dds)
+  dds <- nbinomWaldTest(dds)
+  round(head(results(dds)$pvalue,8),3)
+  dds <- nbinomWaldTest(dds, useT=TRUE, df=rep(1,100))
+  round(head(results(dds)$pvalue,8),3)
+  
   # try nbinom after no fitted dispersions
   dds <- makeExampleDESeqDataSet(n=100, m=4)
   dds <- estimateSizeFactors(dds)
diff --git a/tests/testthat/test_parallel.R b/tests/testthat/test_parallel.R
index 63a7f4b..1f186f9 100644
--- a/tests/testthat/test_parallel.R
+++ b/tests/testthat/test_parallel.R
@@ -64,4 +64,18 @@ test_that("parallel execution works as expected", {
   
   dds <- makeExampleDESeqDataSet(n=100,m=8)
   dds <- DESeq(dds, parallel=TRUE, test="LRT", reduced=~1)
+
+  # lfcShrink parallel test
+  dds <- makeExampleDESeqDataSet(n=100)
+  dds <- DESeq(dds)
+  res <- results(dds)
+  res <- lfcShrink(dds, coef=2)
+  res2 <- lfcShrink(dds, coef=2, parallel=TRUE)
+  expect_equal(res$log2FoldChange, res2$log2FoldChange)
+  res <- lfcShrink(dds, coef=2, type="apeglm", svalue=TRUE)
+  res2 <- lfcShrink(dds, coef=2, type="apeglm", parallel=TRUE, svalue=TRUE)
+  expect_equal(res$log2FoldChange, res2$log2FoldChange)
+  # this should be checked with number of workers > 1
+  expect_equal(res$svalue, res2$svalue)
+  
 })
diff --git a/tests/testthat/test_results.R b/tests/testthat/test_results.R
index 4b61a90..ec5f1ba 100644
--- a/tests/testthat/test_results.R
+++ b/tests/testthat/test_results.R
@@ -41,9 +41,9 @@ test_that("results works as expected and throws errors", {
   # check to see if the contrasts with expanded model matrix
   # are close to expected (although shrunk due to the beta prior).
   # lfcShrink() here calls results()
-  lfc31 <- lfcShrink(dds,contrast=c("condition","3","1"))[1]
-  lfc21 <- lfcShrink(dds,contrast=c("condition","2","1"))[1]
-  lfc32 <- lfcShrink(dds,contrast=c("condition","3","2"))[1]
+  lfc31 <- lfcShrink(dds,contrast=c("condition","3","1"))[1,"log2FoldChange"]
+  lfc21 <- lfcShrink(dds,contrast=c("condition","2","1"))[1,"log2FoldChange"]
+  lfc32 <- lfcShrink(dds,contrast=c("condition","3","2"))[1,"log2FoldChange"]
   expect_equal(lfc31, 3, tolerance=.1)
   expect_equal(lfc21, 1, tolerance=.1)
   expect_equal(lfc32, 2, tolerance=.1)
@@ -55,9 +55,12 @@ test_that("results works as expected and throws errors", {
   dds2 <- dds
   colData(dds2)$condition <- relevel(colData(dds2)$condition, "2")
   dds2 <- DESeq(dds2)
-  expect_equal(lfcShrink(dds2,contrast=c("condition","3","1"))[1], lfc31, tolerance=1e-6)
-  expect_equal(lfcShrink(dds2,contrast=c("condition","2","1"))[1], lfc21, tolerance=1e-6)
-  expect_equal(lfcShrink(dds2,contrast=c("condition","3","2"))[1], lfc32, tolerance=1e-6)
+  expect_equal(lfcShrink(dds2,contrast=c("condition","3","1"))[1,"log2FoldChange"],
+               lfc31, tolerance=1e-6)
+  expect_equal(lfcShrink(dds2,contrast=c("condition","2","1"))[1,"log2FoldChange"],
+               lfc21, tolerance=1e-6)
+  expect_equal(lfcShrink(dds2,contrast=c("condition","3","2"))[1,"log2FoldChange"],
+               lfc32, tolerance=1e-6)
 
   # test a number of contrast as list options
   expect_equal(results(dds, contrast=list("condition_3_vs_1","condition_2_vs_1"))[1,2],
diff --git a/vignettes/DESeq2.Rmd b/vignettes/DESeq2.Rmd
index 2969fc5..7e498fc 100644
--- a/vignettes/DESeq2.Rmd
+++ b/vignettes/DESeq2.Rmd
@@ -34,10 +34,6 @@ vignette: >
   %\VignetteEncoding[utf8]{inputenc}
 ---
 
-
-<!-- This is the source document -->
-
-
 ```{r setup, echo=FALSE, results="hide"}
 knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
                       dev="png",
@@ -46,11 +42,11 @@ knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
 
 # Standard workflow
 
-**If you use DESeq2 in published research, please cite:**
+**Note:** if you use DESeq2 in published research, please cite:
 
-> Love, M.I., Huber, W., Anders, S.,
-> Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2, 
-> *Genome Biology* 2014, **15**:550.
+> Love, M.I., Huber, W., Anders, S. (2014)
+> Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2.
+> *Genome Biology*, **15**:550.
 > [10.1186/s13059-014-0550-8](http://dx.doi.org/10.1186/s13059-014-0550-8)
 
 Other Bioconductor packages with similar aims are
@@ -70,14 +66,16 @@ you have a count matrix called `cts` and a table of sample
 information called `coldata`.  The `design` indicates how to model the
 samples, here, that we want to measure the effect of the condition,
 controlling for batch differences. The two factor variables `batch`
-and `condition` should be columns of `coldata`.
+and `condition` should  be columns of `coldata`.
 
 ```{r quickStart, eval=FALSE}
 dds <- DESeqDataSetFromMatrix(countData = cts,
                               colData = coldata,
                               design= ~ batch + condition)
 dds <- DESeq(dds)
-res <- results(dds, contrast=c("condition","treated","control"))
+res <- results(dds, contrast=c("condition","treat","ctrl"))
+resultsNames(dds)
+res <- lfcShrink(dds, coef=2)
 ```
 
 The following starting functions will be explained below:
@@ -183,7 +181,7 @@ package. This workflow allows users to import transcript abundance estimates
 from a variety of external software, including the following methods:
 
 * [Salmon](http://combine-lab.github.io/salmon/)
-  [@Patro2016Salmon]
+  [@Patro2017Salmon]
 * [Sailfish](http://www.cs.cmu.edu/~ckingsf/software/sailfish/)
   [@Patro2014Sailfish]
 * [kallisto](https://pachterlab.github.io/kallisto/about.html)
@@ -211,6 +209,15 @@ Note that the tximport-to-DESeq2 approach uses *estimated* gene
 counts from the transcript abundance quantifiers, but not *normalized*
 counts.
 
+A tutorial on how to use the *Salmon* software for quantifying
+transcript abundance can be
+found [here](https://combine-lab.github.io/salmon/getting_started/).
+We recommend using the `--gcBias` 
+[flag](http://salmon.readthedocs.io/en/latest/salmon.html#gcbias)
+which estimates a correction factor for systematic biases
+commonly present in RNA-seq data [@Love2016Modeling; @Patro2017Salmon], 
+unless you are certain that your data do not contain such bias.
+
 Here, we demonstrate how to import transcript abundances
 and construct of a gene-level *DESeqDataSet* object
 from *Salmon* `quant.sf` files, which are
@@ -224,7 +231,6 @@ For a typical use, the `condition` information should already be
 present as a column of the sample table `samples`, while here we
 construct artificial condition labels for demonstration.
 
-
 ```{r txiSetup}
 library("tximport")
 library("readr")
@@ -303,31 +309,39 @@ coldata <- read.csv(pasAnno, row.names=1)
 coldata <- coldata[,c("condition","type")]
 ```
 
-We examine the count matrix and column data to see if they are consisent:
+We examine the count matrix and column data to see if they are
+consistent in terms of sample order.
 
 ```{r showPasilla}
-head(cts)
-head(coldata)
+head(cts,2)
+coldata
 ```
 
 Note that these are not in the same order with respect to samples! 
 
-It is critical that the columns of the count matrix and the rows of
-the column data (information about samples) are in the same order.
-We should re-arrange one or the other so that they are consistent in
-terms of sample order (if we do not, later functions would produce
-an error). We additionally need to chop off the `"fb"` of the 
-row names of `coldata`, so the naming is consistent.
+It is absolutely critical that the columns of the count matrix and the
+rows of the column data (information about samples) are in the same
+order.  DESeq2 will not make guesses as to which column of the count
+matrix belongs to which row of the column data, these must be provided
+to DESeq2 already in consistent order.
+
+As they are not in the correct order as given, we need to re-arrange
+one or the other so that they are consistent in terms of sample order
+(if we do not, later functions would produce an error). We
+additionally need to chop off the `"fb"` of the row names of
+`coldata`, so the naming is consistent.
 
 ```{r reorderPasila}
-rownames(coldata) <- sub("fb","",rownames(coldata))
+rownames(coldata) <- sub("fb", "", rownames(coldata))
 all(rownames(coldata) %in% colnames(cts))
+all(rownames(coldata) == colnames(cts))
 cts <- cts[, rownames(coldata)]
 all(rownames(coldata) == colnames(cts))
 ```
 
 If you have used the *featureCounts* function [@Liao2013feature] in the 
-[Rsubread](http://bioconductor.org/packages/Rsubread) package, the matrix of read counts can be directly 
+[Rsubread](http://bioconductor.org/packages/Rsubread) package, 
+the matrix of read counts can be directly 
 provided from the `"counts"` element in the list output.
 The count matrix and column data can typically be read into R 
 from flat files using base R functions such as *read.csv*
@@ -433,20 +447,20 @@ ddsSE
 
 ### Pre-filtering
 
-While it is not necessary to pre-filter low count genes before running the DESeq2
-functions, there are two reasons which make pre-filtering useful:
-by removing rows in which there are no reads or nearly no reads,
-we reduce the memory size of the `dds` data object and 
-we increase the speed of the transformation
-and testing functions within DESeq2. Here we perform a minimal
-pre-filtering to remove rows that have only 0 or 1 read. Note that more strict
-filtering to increase power is *automatically* 
-applied via [independent filtering](#indfilt) on the mean of
-normalized counts within the *results* function. 
+While it is not necessary to pre-filter low count genes before running
+the DESeq2 functions, there are two reasons which make pre-filtering
+useful: by removing rows in which there are very few reads, we reduce
+the memory size of the `dds` data object, and we increase the speed of
+the transformation and testing functions within DESeq2. Here we
+perform a minimal pre-filtering to keep only rows that have at least
+10 reads total. Note that more strict filtering to increase power is
+*automatically* applied via [independent filtering](#indfilt) on the
+mean of normalized counts within the *results* function.
 
 ```{r prefilter}
-dds <- dds[ rowSums(counts(dds)) > 1, ]
-``` 
+keep <- rowSums(counts(dds)) >= 10
+dds <- dds[keep,]
+```
 
 ### Note on factor levels 
 
@@ -457,17 +471,19 @@ control group), the comparisons will be based on the alphabetical
 order of the levels. There are two solutions: you can either
 explicitly tell *results* which comparison to make using the
 `contrast` argument (this will be shown later), or you can explicitly
-set the factors levels. Setting the factor levels can be done in two
-ways, either using factor:
+set the factors levels. You should only change the factor levels
+of variables in the design **before** running the DESeq2 analysis, not
+during or afterward. Setting the factor levels can be done in two
+ways, either using factor: 
 
 ```{r factorlvl}
-dds$condition <- factor(dds$condition, levels=c("untreated","treated"))
+dds$condition <- factor(dds$condition, levels = c("untreated","treated"))
 ``` 
 
 ...or using *relevel*, just specifying the reference level:
 
 ```{r relevel}
-dds$condition <- relevel(dds$condition, ref="untreated")
+dds$condition <- relevel(dds$condition, ref = "untreated")
 ``` 
 
 If you need to subset the columns of a *DESeqDataSet*,
@@ -548,29 +564,33 @@ fold changes.
 
 ```{r lfcShrink}
 resultsNames(dds)
-resLFC <- lfcShrink(dds, coef=2, res=res)
+resLFC <- lfcShrink(dds, coef=2)
 resLFC
 ```
 
-The above steps should take less than 30 seconds for most analyses. For
-experiments with many samples (e.g. 100 samples), one can take
-advantage of parallelized computation.  Both of the above functions
-have an argument `parallel` which if set to `TRUE` can
-be used to distribute computation across cores specified by the
-*register* function of [BiocParallel](http://bioconductor.org/packages/BiocParallel). For example,
-the following chunk (not evaluated here), would register 4 cores, and
-then the two functions above, with `parallel=TRUE`, would
-split computation over these cores. 
+<a name="parallel"/>
+
+The above steps should take less than 30 seconds for most
+analyses. For experiments with many samples (e.g. 100 samples), one
+can take advantage of parallelized computation. Parallelizing `DESeq`,
+`results`, and `lfcShrink` can be easily accomplished by loading the
+BiocParallel package, and then setting the following arguments:
+`parallel=TRUE` and `BPPARAM=MulticoreParam(4)`, for example,
+splitting the job over 4 cores. Note that `results` for coefficients
+or contrasts listed in `resultsNames(dds)` is fast and will not need
+parallelization. As an alternative to `BPPARAM`, one can `register`
+cores at the beginning of an analysis, and then just specify
+`parallel=TRUE` to the functions when called.
 
 ```{r parallel, eval=FALSE}
 library("BiocParallel")
 register(MulticoreParam(4))
 ```
 
-We can order our results table by the smallest adjusted *p* value:
+We can order our results table by the smallest *p* value:
 
 ```{r resOrder}
-resOrdered <- res[order(res$padj),]
+resOrdered <- res[order(res$pvalue),]
 ```
 
 We can summarize some basic tallies using the
@@ -606,12 +626,22 @@ sum(res05$padj < 0.05, na.rm=TRUE)
 
 <a name="IHW"/>
 
-A generalization of the idea of *p* value filtering is to *weight* hypotheses
-to optimize power. A Bioconductor package, [IHW](http://bioconductor.org/packages/IHW), is available
-that implements the method of *Independent Hypothesis Weighting* [@Ignatiadis2015].
-Here we show the use of *IHW* for *p* value adjustment of DESeq2 results.
-For more details, please see the vignette of the [IHW](http://bioconductor.org/packages/IHW) package.
-Note that the *IHW* result object is stored in the metadata.
+A generalization of the idea of *p* value filtering is to *weight*
+hypotheses to optimize power. A Bioconductor
+package, [IHW](http://bioconductor.org/packages/IHW), is available
+that implements the method of *Independent Hypothesis Weighting*
+[@Ignatiadis2016].  Here we show the use of *IHW* for *p* value
+adjustment of DESeq2 results.  For more details, please see the
+vignette of the [IHW](http://bioconductor.org/packages/IHW)
+package. The *IHW* result object is stored in the metadata.
+
+**Note:** If the results of independent hypothesis weighting are used
+in published research, please cite:
+
+> Ignatiadis, N., Klaus, B., Zaugg, J.B., Huber, W. (2016)
+> Data-driven hypothesis weighting increases detection power in genome-scale multiple testing.
+> *Nature Methods*, **13**:7.
+> [10.1038/nmeth.3885](http://dx.doi.org/10.1038/nmeth.3885)
 
 ```{r IHW}
 library("IHW")
@@ -646,7 +676,7 @@ either up or down.
 plotMA(res, ylim=c(-2,2))
 ```
 
-It is also useful to visualize the MA-plot for the shrunken log2 fold
+It is more useful visualize the MA-plot for the shrunken log2 fold
 changes, which remove the noise associated with log2 fold changes from
 low count genes without requiring arbitrary filtering thresholds.
 
@@ -664,6 +694,75 @@ idx <- identify(res$baseMean, res$log2FoldChange)
 rownames(res)[idx]
 ``` 
 
+### Alternative shrinkage estimators
+
+The moderated log fold changes proposed by @Love2014 use a normal
+prior distribution, centered on zero and with a scale that is fit to
+the data. The shrunken log fold changes are useful for ranking and
+visualization, without the need for arbitrary filters on low count
+genes. The normal prior can sometimes produce too strong of
+shrinkage for certain datasets. In DESeq2 version 1.18, we include two
+additional adaptive shrinkage estimators, available via the `type`
+argument of `lfcShrink`. For more details, see `?lfcShrink`
+
+The options for `type` are:
+
+* `normal` is the the original DESeq2 shrinkage estimator, an adaptive
+  normal prior
+* `apeglm` is the adaptive t prior shrinkage estimator from the 
+  [apeglm](http://bioconductor.org/packages/apeglm) package
+* `ashr` is the adaptive shrinkage estimator from the
+  [ashr](https://github.com/stephens999/ashr) package [@Stephens2016].
+  Here DESeq2 uses the ashr option to fit a mixture of normal distributions to
+  form the prior, with `method="shrinkage"`
+
+**Note:** if the shrinkage estimator `type="ashr"` is used in published
+research, please cite:
+
+> Stephens, M. (2016) 
+> False discovery rates: a new deal. *Biostatistics*, **18**:2.
+> [10.1093/biostatistics/kxw041](https://doi.org/10.1093/biostatistics/kxw041)
+
+```{r warning=FALSE}
+resApe <- lfcShrink(dds, coef=2, type="apeglm")
+resAsh <- lfcShrink(dds, coef=2, type="ashr")
+```
+
+```{r fig.width=8, fig.height=3}
+par(mfrow=c(1,3), mar=c(4,4,2,1))
+xlim <- c(1,1e5); ylim <- c(-3,3)
+plotMA(resLFC, xlim=xlim, ylim=ylim, main="normal")
+plotMA(resApe, xlim=xlim, ylim=ylim, main="apeglm")
+plotMA(resAsh, xlim=xlim, ylim=ylim, main="ashr")
+```
+
+**Note:** due to the nature of the statistical model and optimization
+approach, `apeglm` is usually a factor of ~5 slower than `normal`. For
+example, with 10,000 genes and 10 samples, `normal` may take ~3
+seconds, while `apeglm` takes ~15 seconds (on a laptop).  However,
+`apeglm` can be more than an order of magnitude slower when there are
+many coefficients, e.g. 10 or more coefficients in
+`resultsNames(dds)`. The method `ashr` is fairly fast and does not
+depend on the number of coefficients, as it uses only the estimated
+MLE coefficients and their standard errors. A solution for speeding up
+`normal` and `apeglm` is to use multiple cores. This can be easily
+accomplished by loading the BiocParallel package, and then setting the
+following arguments of `lfcShrink`: `parallel=TRUE` and
+`BPPARAM=MulticoreParam(4)`, for example, splitting the job over 4
+cores. This approach can also be used with `DESeq` and `results`, as
+mentioned [above](#parallel).
+
+**Note:** If there is unwanted variation present in the data (e.g. batch
+effects) it is always recommend to correct for this, which can be
+accommodated in DESeq2 by including in the design any known batch
+variables or by using functions/packages such as 
+`svaseq` in [sva](http://bioconductor.org/packages/sva) [@Leek2014] or 
+the `RUV` functions in [RUVSeq](http://bioconductor.org/packages/RUVSeq) [@Risso2014]
+to estimate variables that capture the unwanted variation.
+In addition, the ashr developers have a 
+[specific method](https://github.com/dcgerard/vicar) 
+for accounting for unwanted variation in combination with ashr [@Gerard2017].
+
 ### Plot counts 
 
 It can also be useful to examine the counts of reads for a single gene
@@ -808,11 +907,22 @@ the analysis using a multi-factor design.
 ddsMF <- dds
 ```
 
+We change the levels of `type` so it only contains letters (numbers, underscore and
+period are also allowed in design factor levels). Be careful when
+changing level names to use the same order as the current levels.
+
+```{r fixLevels}
+levels(ddsMF$type)
+levels(ddsMF$type) <- sub("-.*", "", levels(ddsMF$type))
+levels(ddsMF$type)
+```
+
 We can account for the different types of sequencing, and get a clearer picture
 of the differences attributable to the treatment.  As `condition` is the
 variable of interest, we put it at the end of the formula. Thus the *results*
 function will by default pull the `condition` results unless 
 `contrast` or `name` arguments are specified. 
+
 Then we can re-run *DESeq*:
 
 ```{r replaceDesign}
@@ -837,7 +947,7 @@ described in the help page for *results* and [below](#contrasts)
 
 ```{r multiTypeResults}
 resMFType <- results(ddsMF,
-                     contrast=c("type", "single-read", "paired-end"))
+                     contrast=c("type", "single", "paired"))
 head(resMFType)
 ```
 
@@ -870,18 +980,19 @@ where *n* represents the count values and $n_0$ is a positive constant.
 
 In this section, we discuss two alternative
 approaches that offer more theoretical justification and a rational way
-of choosing the parameter equivalent to $n_0$ above.
-The *regularized logarithm* or *rlog* incorporates a prior on
-the sample differences [@Love2014], 
-and the other uses the concept of variance stabilizing
-transformations (VST) [@Tibshirani1988; @sagmb2003; @Anders:2010:GB].
+of choosing parameters equivalent to $n_0$ above.
+One makes use of the concept of variance stabilizing
+transformations (VST) [@Tibshirani1988; @sagmb2003; @Anders:2010:GB],
+and the other is the *regularized logarithm* or *rlog*, which
+incorporates a prior on the sample differences [@Love2014].
 Both transformations produce transformed data on the log2 scale
-which has been normalized with respect to library size.
+which has been normalized with respect to library size or other
+normalization factors.
 
-The point of these two transformations, the *rlog* and the VST,
+The point of these two transformations, the VST and the *rlog*,
 is to remove the dependence of the variance on the mean,
 particularly the high variance of the logarithm of count data when the
-mean is low. Both *rlog* and VST use the experiment-wide trend
+mean is low. Both VST and *rlog* use the experiment-wide trend
 of variance over mean, in order to transform the data to remove the
 experiment-wide trend. Note that we do not require or
 desire that all the genes have *exactly* the same variance after
@@ -897,12 +1008,12 @@ the *rlog* function might take too long, and so the *vst* function
 will be a faster choice. 
 The rlog and VST have similar properties, but the rlog requires
 fitting a shrinkage term for each sample and each gene which takes
-time.  See the DESeq2 paper for more discussion on the differences
+time. See the DESeq2 paper for more discussion on the differences
 [@Love2014].
 
 ### Blind dispersion estimation
 
-The two functions, *rlog* and *vst* have an argument
+The two functions, *vst* and *rlog* have an argument
 `blind`, for whether the transformation should be blind to the
 sample information specified by the design formula. When
 `blind` equals `TRUE` (the default), the functions
@@ -935,22 +1046,29 @@ experimental group in applying the transformation.
 These transformation functions return an object of class *DESeqTransform*
 which is a subclass of *RangedSummarizedExperiment*. 
 For ~20 samples, running on a newly created `DESeqDataSet`,
-*rlog* may take 30 seconds, 
-*varianceStabilizingTransformation* may take 5 seconds, and
-*vst* less than 1 second (by subsetting to 1000 genes for
-calculating the global dispersion trend).
-However, the running times are shorter and more similar with `blind=FALSE` and
+*rlog* may take 30 seconds, while *vst* takes less than 1 second.
+The running times are shorter when using `blind=FALSE` and
 if the function *DESeq* has already been run, because then
 it is not necessary to re-estimate the dispersion values.
 The *assay* function is used to extract the matrix of normalized values.
 
 ```{r rlogAndVST}
+vsd <- vst(dds, blind=FALSE)
 rld <- rlog(dds, blind=FALSE)
-vsd <- varianceStabilizingTransformation(dds, blind=FALSE)
-vsd.fast <- vst(dds, blind=FALSE)
-head(assay(rld), 3)
+head(assay(vsd), 3)
 ```
 
+### Variance stabilizing transformation
+
+Above, we used a parametric fit for the dispersion. In this case, the
+closed-form expression for the variance stabilizing transformation is
+used by the *vst* function. If a local fit is used (option
+`fitType="locfit"` to *estimateDispersions*) a numerical integration
+is used instead. The transformed data should be approximated variance
+stabilized and also includes correction for size factors or
+normalization factors. The transformed data is on the log2 scale for
+large counts.
+
 ### Regularized log transformation
 
 The function *rlog*, stands for *regularized log*,
@@ -979,16 +1097,6 @@ in sequencing depth. Without priors, this design matrix would lead to
 a non-unique solution, however the addition of a prior on
 non-intercept betas allows for a unique solution to be found. 
 
-### Variance stabilizing transformation
-
-Above, we used a parametric fit for the dispersion. In this case, the
-closed-form expression for the variance stabilizing transformation is
-used by *varianceStabilizingTransformation*, which is
-derived in the file `vst.pdf`, that is distributed in the
-package alongside this vignette. If a local fit is used (option
-`fitType="locfit"` to *estimateDispersions*) a numerical integration
-is used instead. 
-
 ### Effects of transformations on the variance
 
 The figure below plots the standard deviation of the transformed data,
@@ -1010,10 +1118,9 @@ differences due to the experimental conditions.
 # this gives log2(n + 1)
 ntd <- normTransform(dds)
 library("vsn")
-notAllZero <- (rowSums(counts(dds))>0)
-meanSdPlot(assay(ntd)[notAllZero,])
-meanSdPlot(assay(rld[notAllZero,]))
-meanSdPlot(assay(vsd[notAllZero,]))
+meanSdPlot(assay(ntd))
+meanSdPlot(assay(vsd))
+meanSdPlot(assay(rld))
 ```
 
 ## Data quality assessment by sample clustering and visualization
@@ -1043,34 +1150,33 @@ select <- order(rowMeans(counts(dds,normalized=TRUE)),
 df <- as.data.frame(colData(dds)[,c("condition","type")])
 pheatmap(assay(ntd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
          cluster_cols=FALSE, annotation_col=df)
-pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,
-         cluster_cols=FALSE, annotation_col=df)
 pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
          cluster_cols=FALSE, annotation_col=df)
+pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,
+         cluster_cols=FALSE, annotation_col=df)
 ```
 
 ### Heatmap of the sample-to-sample distances
 
-Another use of the transformed data is sample clustering. Here, we apply the
-*dist* function to the transpose of the transformed count matrix to get
-sample-to-sample distances. We could alternatively use the variance stabilized
-transformation here.
+Another use of the transformed data is sample clustering. Here, we
+apply the *dist* function to the transpose of the transformed count
+matrix to get sample-to-sample distances.
 
 ```{r sampleClust}
-sampleDists <- dist(t(assay(rld)))
+sampleDists <- dist(t(assay(vsd)))
 ```
 
-A heatmap of this distance matrix gives us an overview over similarities
-and dissimilarities between samples.
-We have to provide a hierarchical clustering `hc` to the heatmap
-function based on the sample distances, or else the heatmap
-function would calculate a clustering based on the distances between
-the rows/columns of the distance matrix.
+A heatmap of this distance matrix gives us an overview over
+similarities and dissimilarities between samples.  We have to provide
+a hierarchical clustering `hc` to the heatmap function based on the
+sample distances, or else the heatmap function would calculate a
+clustering based on the distances between the rows/columns of the
+distance matrix.
 
-```{r figHeatmapSamples}
+```{r figHeatmapSamples, fig.height=4, fig.width=6}
 library("RColorBrewer")
 sampleDistMatrix <- as.matrix(sampleDists)
-rownames(sampleDistMatrix) <- paste(rld$condition, rld$type, sep="-")
+rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-")
 colnames(sampleDistMatrix) <- NULL
 colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
 pheatmap(sampleDistMatrix,
@@ -1087,14 +1193,14 @@ components. This type of plot is useful for visualizing the overall
 effect of experimental covariates and batch effects.
 
 ```{r figPCA}
-plotPCA(rld, intgroup=c("condition", "type"))
+plotPCA(vsd, intgroup=c("condition", "type"))
 ```
 
 It is also possible to customize the PCA plot using the
 *ggplot* function.
 
 ```{r figPCA2}
-pcaData <- plotPCA(rld, intgroup=c("condition", "type"), returnData=TRUE)
+pcaData <- plotPCA(vsd, intgroup=c("condition", "type"), returnData=TRUE)
 percentVar <- round(100 * attr(pcaData, "percentVar"))
 ggplot(pcaData, aes(PC1, PC2, color=condition, shape=type)) +
   geom_point(size=3) +
@@ -1199,7 +1305,7 @@ Here, the y-axis represents log2(n+1), and each
 group has 20 samples (black dots). A red line connects the mean of the
 groups within each genotype. 
 
-```{r interFig, echo=FALSE, results="hide"}
+```{r interFig, echo=FALSE, results="hide", fig.height=3}
 npg <- 20
 mu <- 2^c(8,10,9,11,10,12)
 cond <- rep(rep(c("A","B"),each=npg),3)
@@ -1207,7 +1313,7 @@ geno <- rep(c("I","II","III"),each=2*npg)
 table(cond, geno)
 counts <- rnbinom(6*npg, mu=rep(mu,each=npg), size=1/.01)
 d <- data.frame(log2c=log2(counts+1), cond, geno)
-library(ggplot2)
+library("ggplot2")
 plotit <- function(d, title) {
   ggplot(d, aes(x=cond, y=log2c, group=geno)) + 
     geom_jitter(size=1.5, position = position_jitter(width=.15)) +
@@ -1231,7 +1337,7 @@ obtained by adding the main condition effect and the interaction
 term for that genotype.  Such a plot can be made using the
 *plotCounts* function as shown above.
 
-```{r interFig2, echo=FALSE, results="hide"}
+```{r interFig2, echo=FALSE, results="hide", fig.height=3}
 mu[4] <- 2^12
 mu[6] <- 2^8
 counts <- rnbinom(6*npg, mu=rep(mu,each=npg), size=1/.01)
@@ -1262,13 +1368,6 @@ can be found by typing `?results`. In particular, we show how to
 test for differences in the condition effect across genotype, and we
 show how to obtain the condition effect for non-reference genotypes.
 
-Note that for DESeq2 versions higher than 1.10, the *DESeq* function
-will turn off log fold change shrinkage (setting `betaPrior=FALSE`),
-for designs which contain an interaction term. Turning off the log
-fold change shrinkage allows the software to use standard model
-matrices (as would be produced by *model.matrix*), where the
-interaction coefficients are easier to interpret.
-
 ## Time-series experiments
 
 There are a number of ways to analyze time-series experiments,
@@ -1523,45 +1622,22 @@ specified by the `name` argument, and $x$ is the `lfcThreshold`.
 * `greater` - $\beta > x$
 * `less` - $\beta < -x$
 
-The test `altHypothesis="lessAbs"` requires that the user have
-run *DESeq* with the argument `betaPrior=FALSE`.  To
-understand the reason for this requirement, consider that during
-hypothesis testing, the null hypothesis is favored unless the data
-provide strong evidence to reject the null.  For this test, including
-a zero-centered prior on log fold change would favor the alternative
-hypothesis, shrinking log fold changes toward zero.  Removing the
-prior on log fold changes for tests of small log fold change allows
-for detection of only those genes where the data alone provides
-evidence against the null.
-
 The four possible values of `altHypothesis` are demonstrated
 in the following code and visually by MA-plots in the following figures.
-First we run *DESeq* and specify `betaPrior=FALSE` in order 
-to demonstrate `altHypothesis="lessAbs"`.
-
-```{r ddsNoPrior}
-ddsNoPrior <- DESeq(dds, betaPrior=FALSE)
-```
-
-In order to produce results tables for the following tests, the same arguments
-(except `ylim`) would be provided to the *results* function. 
 
 ```{r lfcThresh}
 par(mfrow=c(2,2),mar=c(2,2,1,1))
-yl <- c(-2.5,2.5)
+ylim <- c(-2.5,2.5)
 resGA <- results(dds, lfcThreshold=.5, altHypothesis="greaterAbs")
-resLA <- results(ddsNoPrior, lfcThreshold=.5, altHypothesis="lessAbs")
+resLA <- results(dds, lfcThreshold=.5, altHypothesis="lessAbs")
 resG <- results(dds, lfcThreshold=.5, altHypothesis="greater")
 resL <- results(dds, lfcThreshold=.5, altHypothesis="less")
-plotMA(resGA, ylim=yl)
-abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
-plotMA(resLA, ylim=yl)
-abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
-plotMA(resG, ylim=yl)
-abline(h=.5,col="dodgerblue",lwd=2)
-plotMA(resL, ylim=yl)
-abline(h=-.5,col="dodgerblue",lwd=2)
-``` 
+drawLines <- function() abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
+plotMA(resGA, ylim=ylim); drawLines()
+plotMA(resLA, ylim=ylim); drawLines()
+plotMA(resG, ylim=ylim); drawLines()
+plotMA(resL, ylim=ylim); drawLines()
+```
 
 <a name="access"/>
 
@@ -1622,6 +1698,17 @@ The beta prior variance $\sigma_r^2$ is stored as an attribute of the
 attr(dds, "betaPriorVar")
 ``` 
 
+General information about the prior used for log fold change shrinkage
+is also stored in a slot of the *DESeqResults* object. This would
+also contain information about what other packages were used
+for log2 fold change shrinkage.
+
+```{r priorInfo}
+priorInfo(resLFC)
+priorInfo(resApe)
+priorInfo(resAsh)
+```
+
 The dispersion prior variance $\sigma_d^2$ is stored as an
 attribute of the dispersion function:
 
@@ -1955,7 +2042,12 @@ version *DESeq*, are as follows:
 
 ## Methods changes since the 2014 DESeq2 paper
 
-* In version 1.16 (Novermber 2016), the log2 fold change 
+* In version 1.18 (November 2018), we add two 
+  [alternative shrinkage estimators](#alternative-shrinkage-estimators),
+  which can be used via `lfcShrink`: an estimator using a t prior from
+  the apeglm packages, and an estimator with a fitted mixture of
+  normals prior from the ashr package.
+* In version 1.16 (November 2016), the log2 fold change 
   shrinkage is no longer default for the *DESeq* and *nbinomWaldTest*
   functions, by setting the defaults of these to `betaPrior=FALSE`,
   and by introducing a separate function *lfcShrink*, which performs
@@ -1965,7 +2057,7 @@ version *DESeq*, are as follows:
   as an inference engine by a wider community, and certain sequencing
   datasets show better performance with the testing separated from the
   use of the LFC prior. Also, the separation of LFC shrinkage to a separate
-  function *lfcShrink* allows for easier methods development of
+  function `lfcShrink` allows for easier methods development of
   alternative effect size estimators.
 * A small change to the independent filtering routine: instead
   of taking the quantile of the filter (the mean of normalized counts) which
@@ -1978,7 +2070,7 @@ version *DESeq*, are as follows:
 * For the calculation of the beta prior variance, instead of
   matching the empirical quantile to the quantile of a Normal
   distribution, DESeq2 now uses the weighted quantile function
-  of the \CRANpkg{Hmisc} package. The weighting is described in the
+  of the Hmisc package. The weighting is described in the
   manual page for *nbinomWaldTest*.  The weights are the
   inverse of the expected variance of log counts (as used in the
   diagonals of the matrix $W$ in the GLM). The effect of the change
@@ -2033,28 +2125,19 @@ $$ W = \frac{c^t \beta}{\sqrt{c^t \Sigma c}} $$
 
 ## Expanded model matrices 
 
-DESeq2 uses *expanded model matrices* in conjunction with the log2
-fold change prior, in order to produce shrunken log2 fold change estimates and test 
-results which are independent of the choice of reference level. 
-Another way of saying this is that the shrinkage is *symmetric*
-with respect to all the levels of the factors in the design.
-The expanded model matrices differ from the standard model matrices, in that
-they have an indicator column (and therefore a coefficient) for
-each level of factors in the design formula in addition to an intercept. 
-Note that in version 1.10 and onward, standard model matrices are used for
-designs with interaction terms, as the shrinkage of log2 fold changes
-is not recommended for these designs.
-
-The expanded model matrices are not full rank, but a coefficient
-vector $\beta_i$ can still be found due to the zero-centered prior on
-non-intercept coefficients. The prior variance for the log2 fold
-changes is calculated by first generating maximum likelihood estimates
-for a standard model matrix. The prior variance for each level of a
-factor is then set as the average of the mean squared maximum
-likelihood estimates for each level and every possible contrast, such
-that that this prior value will be reference-level-independent. The
-`contrast` argument of the *results* function is
-used in order to generate comparisons of interest.
+For the specific combination of `lfcShrink` with the type `normal` and
+using `contrast`, DESeq2 uses *expanded model matrices* to produce
+shrunken log2 fold change estimates where the shrinkage is independent
+of the choice of reference level. In all other cases, DESeq2 uses
+standard model matrices, as produced by `model.matrix`.  The expanded
+model matrices differ from the standard model matrices, in that they
+have an indicator column (and therefore a coefficient) for each level
+of factors in the design formula in addition to an intercept. This is
+described in the DESeq2 paper, but the DESeq2 software package has
+moved away from this approach, with more support for shrinkage of
+individual coefficients (although the expanded model matrix approach
+is still supported using the above combination of functions and
+arguments).
 
 <a name="indfilttheory"/>
 
@@ -2243,8 +2326,8 @@ dds2 <- makeExampleDESeqDataSet(n=1000,m=12,
 dds2 <- dds2[,7:12]
 dds2$condition <- rep("C",6)
 mcols(dds2) <- NULL
-dds <- cbind(dds1, dds2)
-rld <- rlog(dds, blind=FALSE, fitType="mean")
+dds12 <- cbind(dds1, dds2)
+rld <- rlog(dds12, blind=FALSE, fitType="mean")
 plotPCA(rld)
 ``` 
 
@@ -2330,7 +2413,7 @@ See the help page for *results* for more details.
 See the manual page for *DESeq*, which links to the 
 subfunctions which are called in order, where complete details are
 listed. Also you can read the three steps listed in the 
-[the DESeq2 model](#theory) in this document.
+[DESeq2 model](#theory) in this document.
 
 
 ## Is there an official Galaxy tool for DESeq2?
@@ -2383,7 +2466,9 @@ feedback of many individuals, including but not limited to:
 
 The Bionconductor Core Team,
 Alejandro Reyes, Andrzej Oles, Aleksandra Pekowska, Felix Klein,
-Nikolaos Ignatiadis,
+Nikolaos Ignatiadis (IHW),
+Anqi Zhu (apeglm),
+Joseph Ibrahim (apeglm),
 Vince Carey,
 Owen Solberg,
 Ruping Sun,
@@ -2408,7 +2493,8 @@ Bjorn Gruning,
 Ryan McMinds,
 Paul Gordon,
 Leonardo Collado Torres,
-Enrico Ferrero.
+Enrico Ferrero,
+Peter Langfelder.
 
 # Session info
 
diff --git a/vignettes/library.bib b/vignettes/library.bib
index f0968d4..0cc9adb 100644
--- a/vignettes/library.bib
+++ b/vignettes/library.bib
@@ -1,9 +1,58 @@
- at article{Ignatiadis2015,
+ at article{Leek2014,
+  author = {Leek, Jeffrey T.},
+  journal = {Nucleic Acids Research},
+  title = {{svaseq: removing batch effects and other unwanted noise from sequencing data}},
+  url = {http://dx.doi.org/10.1093/nar/gku864},
+  year = 2014,
+  volume = 42,
+  issue = 21,
+}
+
+ at article{Risso2014,
+  author = {Risso, Davide and Ngai, John and Speed, Terence P and Dudoit, Sandrine},
+  journal = {Nature Biotechnology},
+  title = {{Normalization of RNA-seq data using factor analysis of control genes or samples}},
+  url = {http://dx.doi.org/10.1038/nbt.2931},
+  year = 2014,
+  volume = 32,
+  issue = 9,
+}
+
+ at article{Gerard2017,
+  author = {Gerard, David and Stephens, Matthew},
+  journal = {arXiv},
+  title = {{Empirical Bayes Shrinkage and False Discovery Rate Estimation, Allowing For Unwanted Variation}},
+  url = {https://arxiv.org/abs/1709.10066},
+  year = 2017
+}
+
+ at article{Stephens2016,
+  author = {Stephens, Matthew},
+  journal = {Biostatistics},
+  title = {False discovery rates: a new deal},
+  url = {https://doi.org/10.1093/biostatistics/kxw041},
+  year = 2016,
+  volume = 18,
+  issue = 2,
+}
+
+ at article{Love2016Modeling,
+  author =	 {Love, Michael I. and Hogenesch, John B. and Irizarry, Rafael A.},
+  journal =	 {Nature Biotechnology},
+  title =	 {Modeling of RNA-seq fragment sequence bias reduces systematic errors in transcript abundance estimation},
+  url =		 {http://dx.doi.org/10.1038/nbt.3682},
+  volume =       34,
+  issue =        12,
+  pages =        {1287--1291},
+  year =	 2016
+}
+
+ at article{Ignatiadis2016,
   author = {Ignatiadis, Nikolaos and Klaus, Bernd and Zaugg, Judith and Huber, Wolfgang},
-  journal = {bioRxiv},
-  title = {Data-driven hypothesis weighting increases detection power in big data analytics},
-  url = {http://dx.doi.org/10.1101/034330},
-  year = 2015
+  journal = {Nature Methods},
+  title = {Data-driven hypothesis weighting increases detection power in genome-scale multiple testing},
+  url = {http://dx.doi.org/10.1038/nmeth.3885},
+  year = 2016
 }
 
 @article{Love2014,
@@ -281,12 +330,12 @@ journal = {Bioinformatics}
   year = {2014}
 }
 
- at article{Patro2016Salmon,
+ at article{Patro2017Salmon,
   author = {Patro, Rob and Duggal, Geet and Love, Michael I. and Irizarry, Rafael A. and Kingsford, Carl},
-  journal = {bioRxiv},
-  title = {Salmon provides accurate, fast, and bias-aware transcript expression estimates using dual-phase inference},
-  url = {http://biorxiv.org/content/early/2016/08/30/021592},
-  year = 2016
+  journal = {Nature Methods},
+  title = {Salmon provides fast and bias-aware quantification of transcript expression},
+  url = {http://dx.doi.org/10.1038/nmeth.4197},
+  year = 2017
 }
 
 @article{Bray2016Near,

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-deseq2.git



More information about the debian-med-commit mailing list