[med-svn] [r-bioc-edger] 01/03: New upstream version 3.18.1+dfsg
Andreas Tille
tille at debian.org
Fri Sep 8 19:15:18 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-bioc-edger.
commit 3caac50fbf5615e437222f987b1b0d65c2c1052d
Author: Andreas Tille <tille at debian.org>
Date: Fri Sep 8 21:00:38 2017 +0200
New upstream version 3.18.1+dfsg
---
DESCRIPTION | 6 ++--
NAMESPACE | 9 ++----
R/S3methods.R | 2 +-
R/decidetestsDGE.R | 57 +++++++++++++++++++++++++++++-----
R/gini.R | 11 +++++--
R/glmfit.R | 5 ++-
R/normalizeChIPtoInput.R | 6 ++--
R/plotMD.R | 42 +++++++++++++++++++++----
R/topTags.R | 2 +-
build/vignette.rds | Bin 228 -> 228 bytes
inst/NEWS.Rd | 27 ++++++++++++++++
inst/doc/edgeR.pdf | Bin 45765 -> 45765 bytes
man/addPriorCount.Rd | 9 ++++++
man/decidetestsDGE.Rd | 68 ++++++++++++++++++++++++++++-------------
man/dim.Rd | 7 -----
man/estimateDisp.Rd | 4 +--
man/gini.Rd | 2 +-
man/nbinomDeviance.Rd | 6 ++--
man/plotMD.Rd | 34 ++++++++++++---------
man/topTags.Rd | 2 +-
src/R_initialize_levenberg.cpp | 17 ++++++-----
src/R_levenberg.cpp | 26 +++++++++++-----
src/adj_coxreid.cpp | 1 +
src/fmm_spline.c | 17 ++++++-----
src/glm_levenberg.cpp | 7 +++--
tests/edgeR-Tests.Rout.save | 28 ++++++++---------
26 files changed, 275 insertions(+), 120 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index a8d4580..4e08836 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: edgeR
-Version: 3.16.5
-Date: 2016-12-12
+Version: 3.18.1
+Date: 2017-04-17
Title: Empirical Analysis of Digital Gene Expression Data in R
Description: Differential expression analysis of RNA-seq expression profiles with biological replication. Implements a range of statistical methodology based on the negative binomial distributions, including empirical Bayes estimation, exact tests, generalized linear models and quasi-likelihood tests. As well as RNA-seq, it be applied to differential signal analysis of other types of genomic data that produce counts, including ChIP-seq, SAGE and CAGE.
Author: Yunshun Chen <yuchen at wehi.edu.au>, Aaron Lun <alun at wehi.edu.au>, Davis McCarthy <dmccarthy at wehi.edu.au>, Xiaobei Zhou <xiaobei.zhou at uzh.ch>, Mark Robinson <mark.robinson at imls.uzh.ch>, Gordon Smyth <smyth at wehi.edu.au>
@@ -16,4 +16,4 @@ biocViews: GeneExpression, Transcription, AlternativeSplicing,
TimeCourse, SAGE, Sequencing, ChIPSeq, RNASeq, BatchEffect,
MultipleComparison, Normalization, QualityControl
NeedsCompilation: yes
-Packaged: 2016-12-14 23:27:46 UTC; biocbuild
+Packaged: 2017-05-05 22:32:41 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
index b092605..91086b0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -24,7 +24,7 @@ if( tools:::.OStype() == "windows" ) importFrom("utils", "winMenuAddItem")
importFrom("limma",camera,contrastAsCoef,decideTests,fry,goana,kegga,is.fullrank,
lmFit,loessFit,mroast,nonEstimable,plotMD,plotMDS,plotWithHighlights,removeExt,
roast,romer,squeezeVar,subsetListOfArrays,zscoreGamma,zscoreT)
-importClassesFrom("limma","LargeDataObject","Roast","MDS")
+importClassesFrom("limma","LargeDataObject","Roast","MDS","TestResults")
S3method(as.data.frame,TopTags)
S3method(as.matrix,DGEList)
@@ -41,6 +41,8 @@ S3method(dim,DGEExact)
S3method(dim,DGEGLM)
S3method(dim,DGELRT)
S3method(dim,TopTags)
+S3method(decideTests,DGEExact)
+S3method(decideTests,DGELRT)
S3method(dimnames,DGEList)
S3method(dimnames,DGEExact)
S3method(dimnames,DGEGLM)
@@ -74,11 +76,6 @@ S3method(goana,DGEExact)
S3method(goana,DGELRT)
S3method(kegga,DGEExact)
S3method(kegga,DGELRT)
-S3method(length,DGEList)
-S3method(length,DGEExact)
-S3method(length,TopTags)
-S3method(length,DGEGLM)
-S3method(length,DGELRT)
S3method(mroast,DGEList)
S3method(plotMD,DGEGLM)
S3method(plotMD,DGELRT)
diff --git a/R/S3methods.R b/R/S3methods.R
index 116cd0c..8e2e55b 100644
--- a/R/S3methods.R
+++ b/R/S3methods.R
@@ -19,7 +19,7 @@ dim.DGEExact <- dim.TopTags <- dim.DGELRT <- function(x) if(is.null(x$table)) c(
# S3 length methods
-length.DGEList <- length.DGEExact <- length.TopTags <- length.DGEGLM <- length.DGELRT <- function(x) prod(dim(x))
+#length.DGEList <- length.DGEExact <- length.TopTags <- length.DGEGLM <- length.DGELRT <- function(x) prod(dim(x))
# S3 dimnames methods
# These enable rownames() and colnames() as well
diff --git a/R/decidetestsDGE.R b/R/decidetestsDGE.R
index 88bd1ab..a3a968c 100644
--- a/R/decidetestsDGE.R
+++ b/R/decidetestsDGE.R
@@ -1,14 +1,55 @@
-# DECIDETESTSDGE.R
+# decideTestsDGE.R
-decideTestsDGE <- function(object,adjust.method="BH",p.value=0.05,lfc=0)
- ## Accept or reject hypothesis tests across genes and contrasts
- ## Davis McCarthy
- ## 15 August 2010. Last modified 17 Oct 2014 (Mark Robinson).
+decideTests.DGEExact <- decideTests.DGELRT <- function(object,adjust.method="BH",p.value=0.05,lfc=0,...)
{
- if(!is(object,"DGEExact") & !is(object,"DGELRT")) stop("Need DGEExact or DGELRT object") # Expects a DGEExact or DGELRT object
- decideTests(new("MArrayLM", list(p.value=object$table$PValue, coefficients=object$table$logFC)),
- method="separate", adjust.method=adjust.method, p.value=p.value, lfc=lfc)
+ decideTestsDGE(object=object,adjust.method=adjust.method,p.value=p.value,lfc=lfc)
}
+decideTestsDGE <- function(object,adjust.method="BH",p.value=0.05,lfc=0)
+# Accept or reject hypothesis tests across genes and contrasts
+# edgeR team. Original author was Davis McCarthy.
+# Created 15 August 2010. Last modified 6 Jan 2017.
+{
+# Check object class
+ if( !(is(object,"DGEExact") || is(object,"DGELRT")) ) stop("Need DGEExact or DGELRT object")
+
+# Apply multiple testing
+ p <- object$table$PValue
+ p <- p.adjust(p, method=adjust.method)
+ isDE <- as.integer(p < p.value)
+
+# Extract logFC
+ logFC <- object$table$logFC
+# Check for F-test with multiple logFC columns
+ FTest <- is.null(logFC)
+# With multiple contrasts, apply lfc threshold to maximum logFC
+ if(FTest) {
+ if(lfc>0) {
+ coef.col <- grep("^logFC",colnames(object$table))
+ logFC <- object$table[,coef.col]
+ SmallFC <- rowSums(abs(logFC) >= lfc) == 0
+ isDE[SmallFC] <- 0L
+ }
+
+# With single contrast, apply directionality and lfc threshold
+ } else {
+ isDE[isDE & logFC<0] <- -1L
+ SmallFC <- (abs(logFC) < lfc)
+ isDE[SmallFC] <- 0L
+ }
+
+# Assemble TestResults object
+ isDE <- matrix(isDE, ncol=1)
+ row.names(isDE) <- row.names(object)
+ colnames(isDE) <- paste(object$comparison,collapse="+")
+
+# Record possible values
+ if(FTest)
+ attr(isDE,"levels") <- c(0L,1L)
+ else
+ attr(isDE,"levels") <- c(-1L,0L,1L)
+
+ new("TestResults", isDE)
+}
diff --git a/R/gini.R b/R/gini.R
index e376a66..2d7b29a 100644
--- a/R/gini.R
+++ b/R/gini.R
@@ -1,10 +1,15 @@
gini <- function(x)
# Gini diversity index for columns of a numeric matrix
# Gordon Smyth
-# 5 Feb 2016
+# Created 5 Feb 2016. Last revised 17 April 2017.
{
+ x <- as.matrix(x)
d <- dim(x)
- if(is.null(d)) d <- dim(x) <- c(length(x),1)
for (j in 1:d[2]) x[,j] <- sort.int(x[,j],na.last=TRUE)
- (2*colSums((1:d[1])*x)/colSums(x)-d[1]-1)/d[1]
+ i <- 1:d[1]
+ m <- 0.75*d[1]
+ S1 <- colSums((i-m)*x , na.rm=TRUE)
+ S2 <- colSums(x, na.rm=TRUE)
+ (2*(S1/S2+m)-d[1]-1)/d[1]
}
+
diff --git a/R/glmfit.R b/R/glmfit.R
index 811acfb..1bad913 100644
--- a/R/glmfit.R
+++ b/R/glmfit.R
@@ -88,7 +88,7 @@ glmFit.default <- function(y, design=NULL, dispersion=NULL, offset=NULL, lib.siz
glmLRT <- function(glmfit,coef=ncol(glmfit$design),contrast=NULL)
# Tagwise likelihood ratio tests for DGEGLM
# Gordon Smyth, Davis McCarthy and Yunshun Chen.
-# Created 1 July 2010. Last modified 11 June 2015.
+# Created 1 July 2010. Last modified 21 March 2017.
{
# Check glmfit
if(!is(glmfit,"DGEGLM")) {
@@ -126,10 +126,9 @@ glmLRT <- function(glmfit,coef=ncol(glmfit$design),contrast=NULL)
ncontrasts <- qrc$rank
if(ncontrasts==0) stop("contrasts are all zero")
coef <- 1:ncontrasts
- if(ncontrasts < ncol(contrast)) contrast <- contrast[,qrc$pivot[coef]]
logFC <- drop((glmfit$coefficients %*% contrast)/log(2))
if(ncontrasts>1) {
- coef.name <- paste("LR test of",ncontrasts,"contrasts")
+ coef.name <- paste("LR test on",ncontrasts,"degrees of freedom")
} else {
contrast <- drop(contrast)
i <- contrast!=0
diff --git a/R/normalizeChIPtoInput.R b/R/normalizeChIPtoInput.R
index 5eb5d07..ee8b6a6 100644
--- a/R/normalizeChIPtoInput.R
+++ b/R/normalizeChIPtoInput.R
@@ -21,9 +21,9 @@ normalizeChIPtoInput <- function(input,response,dispersion=0.01,niter=6,loss="p"
n <- length(response)
# Handle special cases
- if(n==0) return(p=numeric(0),scaling.factor=NA,prop.enriched=NA)
- if(all(input==0)) return(p=rep(0,1),scaling.factor=0,prop.enriched=1)
- if(n==1) return(p=1,scaling.factor=input/response,prop.enriched=0)
+ if(n==0) return(list(p=numeric(0),scaling.factor=NA,prop.enriched=NA))
+ if(all(input==0)) return(list(p=rep(0,1),scaling.factor=0,prop.enriched=1))
+ if(n==1) return(list(p=1,scaling.factor=input/response,prop.enriched=0))
# Reset zero inputs to minimum positive value
input[input==0] <- min(input[input>0])
diff --git a/R/plotMD.R b/R/plotMD.R
index e4cc98b..f4fb2a2 100644
--- a/R/plotMD.R
+++ b/R/plotMD.R
@@ -39,19 +39,49 @@ plotMD.DGEGLM <- function(object, column=ncol(object), coef=NULL, xlab="Average
plotWithHighlights(x=object$AveLogCPM,y=logFC,xlab=xlab,ylab=ylab,main=main,status=status,...)
}
-plotMD.DGELRT <- function(object, xlab="Average log CPM", ylab="log-fold-change", main=object$comparison, status=object$genes$Status, ...)
+plotMD.DGELRT <- function(object, xlab="Average log CPM", ylab="log-fold-change", main=object$comparison, status=object$genes$Status, contrast=1, values=names(table(status)), col=NULL, adjust.method="BH", p.value=0.05, ...)
# Mean-difference plot with color coding for controls
# Gordon Smyth
-# Created 24 June 2015.
+# Created 24 June 2015. Last modified 21 March 2017.
{
- plotWithHighlights(x=object$table$logCPM,y=object$table$logFC,xlab=xlab,ylab=ylab,main=main,status=status,...)
+ logFC <- object$table$logFC
+ FTest <- is.null(logFC)
+
+ if(is.null(status)) {
+ status <- decideTestsDGE(object, adjust.method=adjust.method, p.value=p.value)
+ if(FTest) {
+ status <- c("non-DE", "DE")[status+1L]
+ values <- "DE"
+ col <- "red"
+ } else {
+ status <- c("Down", "non-DE", "Up")[status+2L]
+ values <- c("Up","Down")
+ col <- c("red","blue")
+ }
+ }
+
+# Multiple contrasts
+ if(FTest) {
+ sel <- grep("^logFC", names(object$table))[contrast]
+ if(is.na(sel)) stop("Selected contrast does not exist.")
+ logFC <- object$table[, sel]
+ contrast.name <- gsub("logFC[.]", "", names(object$table)[sel])
+ main <- paste0("Contrast ", contrast.name)
+ }
+ plotWithHighlights(x=object$table$logCPM,y=logFC,xlab=xlab,ylab=ylab,main=main,status=status,values=values,col=col, ...)
}
-plotMD.DGEExact <- function(object, xlab="Average log CPM", ylab="log-fold-change", main=NULL, status=object$genes$Status, ...)
+plotMD.DGEExact <- function(object, xlab="Average log CPM", ylab="log-fold-change", main=NULL, status=object$genes$Status, values=names(table(status)), col=NULL, adjust.method="BH", p.value=0.05, ...)
# Mean-difference plot with color coding for controls
# Gordon Smyth
-# Created 24 June 2015. Last modified 14 Aug 2016.
+# Created 24 June 2015. Last modified 7 Feb 2017.
{
+ if(is.null(status)) {
+ status <- decideTestsDGE(object, adjust.method=adjust.method, p.value=p.value)
+ status <- c("Down", "non-DE", "Up")[status+2L]
+ values <- c("Up","Down")
+ col <- c("red","blue")
+ }
if(is.null(main)) main <- paste(object$comparison[2],"vs",object$comparison[1])
- plotWithHighlights(x=object$table$logCPM,y=object$table$logFC,xlab=xlab,ylab=ylab,main=main,status=status,...)
+ plotWithHighlights(x=object$table$logCPM,y=object$table$logFC,xlab=xlab,ylab=ylab,main=main,status=status,values=values,col=col,...)
}
diff --git a/R/topTags.R b/R/topTags.R
index e2c1228..c1a51ca 100644
--- a/R/topTags.R
+++ b/R/topTags.R
@@ -38,8 +38,8 @@ topTags <- function(object,n=10,adjust.method="BH",sort.by="PValue",p.value=1)
tab <- object$table[o,]
# Add adjusted p-values if appropriate
+ adj.p.val <- p.adjust(object$table$PValue,method=adjust.method)
if(adjust.method != "none") {
- adj.p.val <- p.adjust(object$table$PValue,method=adjust.method)
if(adjust.method %in% FWER.methods) adjustment <- "FWER"
if(adjust.method %in% FDR.methods) adjustment <- "FDR"
tab[[adjustment]] <- adj.p.val[o]
diff --git a/build/vignette.rds b/build/vignette.rds
index 553db84..f35f801 100644
Binary files a/build/vignette.rds and b/build/vignette.rds differ
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index 667ff21..ddb9c58 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -2,6 +2,33 @@
\title{edgeR News}
\encoding{UTF-8}
+\section{Version 3.18.0}{\itemize{
+\item
+roast.DGEList(), mroast.DGEList(), fry.DGEList() and camera.DGEList() now have explicit arguments instead of passing arguments with ... to the default method.
+
+\item
+New function scaleOffset() to ensure scale of offsets are consistent with library sizes.
+
+\item
+Added decideTests() S3 methods for DGEExact and DGELRT objects. It now works for F-tests with multiple contrasts.
+
+\item
+Report log-fold changes for redundant contrasts in F-tests with multiple contrasts.
+
+\item
+Modified plotMD() S3 method for DGELRT and DGEExact objects. It now automatically uses decideTests() and highlights the DE genes on the MD plot.
+
+\item
+New argument 'plot' in plotMDS.DGEList().
+
+\item
+Removed S3 length methods for data objects.
+
+\item
+gini() now support NA values and avoids integer overflow.
+}}
+
+
\section{Version 3.16.0}{\itemize{
\item
estimateDisp() now respects weights in calculating the APLs.
diff --git a/inst/doc/edgeR.pdf b/inst/doc/edgeR.pdf
index 1678357..14a9302 100644
Binary files a/inst/doc/edgeR.pdf and b/inst/doc/edgeR.pdf differ
diff --git a/man/addPriorCount.Rd b/man/addPriorCount.Rd
index a74f6a3..1248ba3 100644
--- a/man/addPriorCount.Rd
+++ b/man/addPriorCount.Rd
@@ -39,6 +39,15 @@ However, it is also possible to use gene-specific values by supplying a vector o
A list is returned containing \code{y}, a matrix of counts with the added priors; and \code{offset}, a compressedMatrix containing the (log-transformed) modified library sizes.
}
+\examples{
+original <- matrix(rnbinom(1000, mu=20, size=10), nrow=200)
+head(original)
+
+out <- addPriorCount(original)
+head(out$y)
+head(out$offset)
+}
+
\author{
Aaron Lun
}
diff --git a/man/decidetestsDGE.Rd b/man/decidetestsDGE.Rd
index 06ebc3e..3415812 100644
--- a/man/decidetestsDGE.Rd
+++ b/man/decidetestsDGE.Rd
@@ -1,37 +1,63 @@
-\name{decideTestsDGE}
+\name{decideTests}
\alias{decideTestsDGE}
+\alias{decideTests.DGEExact}
+\alias{decideTests.DGELRT}
\title{Multiple Testing Across Genes and Contrasts}
\description{
-Classify a series of related differential expression statistics as up, down or not significant.
-A number of different multiple testing schemes are offered which adjust for multiple testing down the genes as well as across contrasts for each gene.
+Identify which genes are significantly differentially expressed from an edgeR fit object containing p-values and test statistics.
}
\usage{
decideTestsDGE(object, adjust.method="BH", p.value=0.05, lfc=0)
+\method{decideTests}{DGELRT}(object, adjust.method="BH", p.value=0.05, lfc=0, \dots)
}
\arguments{
- \item{object}{\code{DGEExact} object, output from \code{exactTest},
- or \code{DGELRT} object, output from \code{glmLRT} or \code{glmQLFTest}, from which
- p-values for differential expression and log-fold change values may be extracted.}
-
- \item{adjust.method}{character string specifying p-value adjustment method. Possible values are \code{"none"}, \code{"BH"}, \code{"fdr"} (equivalent to \code{"BH"}), \code{"BY"} and \code{"holm"}. See \code{\link[stats]{p.adjust}} for details.}
-
- \item{p.value}{numeric value between 0 and 1 giving the desired size of the test}
- \item{lfc}{numeric value giving the desired absolute minimum log-fold-change}
+ \item{object}{\code{DGEExact}, \code{DGELRT} or \code{glmQLFTest} object from which p-values and log-fold-changes can be extracted.}
+ \item{adjust.method}{character string specifying p-value adjustment method.
+ Possible values are \code{"none"}, \code{"BH"}, \code{"fdr"} (equivalent to \code{"BH"}), \code{"BY"} and \code{"holm"}.
+ See \code{\link[stats]{p.adjust}} for details.}
+ \item{p.value}{numeric value between 0 and 1 giving the required family-wise error rate or false discovery rate.}
+ \item{lfc}{numeric, minimum absolute log2-fold-change required.}
+ \item{\dots}{other arguments are not used.}
}
\value{
-An object of class \code{TestResults} (see \code{\link[limma:TestResults]{TestResults}}).
-This is essentially a numeric matrix with elements \code{-1}, \code{0}
-or \code{1} depending on whether each DE p-value is classified as
-significant with negative log-fold change, not significant or
-significant with positive log-fold change, respectively.
+An object of class \code{\link[limma:TestResults]{TestResults}}.
+This is essentially a single-column integer matrix with elements \code{-1}, \code{0}
+or \code{1} indicating whether each gene is classified as
+significantly down-regulated, not significant or
+significant up-regulated for the comparison contained in \code{object}.
+To be considered significant, genes have to have adjusted p-value below \code{p.value} and log2-fold-change greater than \code{lfc}.
+
+If \code{object} contains F-tests or LRTs for multiple contrasts, then the genes are simply classified as significant (1) or not significant.
+In this case, the log2-fold-change theshold \code{lfc} has to be achieved by at least one of the contrastsf or a gene to be significant.
}
\details{
-These functions implement multiple testing procedures for determining
-whether each log-fold change in a matrix of log-fold changes should be considered significantly different from zero.
-
+This function applies a multiple testing procedure and significance level cutoff to the genewise tests contained in \code{object}.
+}
+\note{
+Although this function enables users to set p-value and lfc cutoffs simultaneously, this combination criterion not usually recommended.
+Unless the fold changes and p-values are very highly correlated, the addition of a fold change cutoff can increase the family-wise error rate or false discovery rate above the nominal level.
+Users wanting to use fold change thresholding should considering using \code{glmTreat} instead and leaving \code{lfc} at the default value when using \code{decideTestsDGE}.
}
\seealso{
-Adapted from \code{\link[limma:decideTests]{decideTests}} in the limma package.
+\code{\link[limma:decideTests]{decideTests}} and \code{\link[limma:TestResults]{TestResults}} in the limma package.
+}
+\author{Davis McCarthy, Gordon Smyth and the edgeR team}
+\examples{
+ngenes <- 100
+x1 <- rnorm(6)
+x2 <- rnorm(6)
+design <- cbind(Intercept=1,x1,x2)
+beta <- matrix(0,ngenes,3)
+beta[,1] <- 4
+beta[1:20,2] <- rnorm(20)
+mu <- 2^(beta \%*\% t(design))
+y <- matrix(rnbinom(ngenes*6,mu=mu,size=10),ngenes,6)
+fit <- glmFit(y,design,dispersion=0.1)
+lrt <- glmLRT(fit,coef=2:3)
+res <- decideTests(lrt,p.value=0.1)
+summary(res)
+lrt <- glmLRT(fit,coef=2)
+res <- decideTests(lrt,p.value=0.1)
+summary(res)
}
-\author{Davis McCarthy, Gordon Smyth}
\keyword{htest}
diff --git a/man/dim.Rd b/man/dim.Rd
index f03f31a..06c4271 100644
--- a/man/dim.Rd
+++ b/man/dim.Rd
@@ -4,18 +4,12 @@
\alias{dim.TopTags}
\alias{dim.DGEGLM}
\alias{dim.DGELRT}
-\alias{length.DGEList}
-\alias{length.DGEExact}
-\alias{length.TopTags}
-\alias{length.DGEGLM}
-\alias{length.DGELRT}
\title{Retrieve the Dimensions of a DGEList, DGEExact, DGEGLM, DGELRT or TopTags Object}
\description{
Retrieve the number of rows (genes) and columns (libraries) for an DGEList, DGEExact or TopTags Object.
}
\usage{
\method{dim}{DGEList}(x)
-\method{length}{DGEList}(x)
}
\arguments{
\item{x}{an object of class \code{DGEList}, \code{DGEExact}, \code{TopTags}, \code{DGEGLM} or \code{DGELRT}}
@@ -44,6 +38,5 @@ MA <- new("MAList",list(M=M,A=A))
dim(M)
ncol(M)
nrow(M)
-length(M)
}
\keyword{array}
diff --git a/man/estimateDisp.Rd b/man/estimateDisp.Rd
index e9af62d..5c1b4d4 100644
--- a/man/estimateDisp.Rd
+++ b/man/estimateDisp.Rd
@@ -74,14 +74,14 @@ Somnath Datta and Daniel S. Nettleton (eds), Springer, New York, pages 51-74.
Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
\emph{Annals of Applied Statistics} 10, 946-963.
-\url{http://arxiv.org/abs/1602.08678}
+\url{http://projecteuclid.org/euclid.aoas/1469199900}
}
\author{Yunshun Chen, Gordon Smyth}
\examples{
# True dispersion is 1/5=0.2
y <- matrix(rnbinom(1000, mu=10, size=5), ncol=4)
-group <- c(1,1,2,2)
+group <- factor(c(1,1,2,2))
design <- model.matrix(~group)
d <- DGEList(counts=y, group=group)
d1 <- estimateDisp(d)
diff --git a/man/gini.Rd b/man/gini.Rd
index f2d7506..d5d158a 100644
--- a/man/gini.Rd
+++ b/man/gini.Rd
@@ -8,7 +8,7 @@ Gini index for each column of a matrix.
gini(x)
}
\arguments{
- \item{x}{non-negative numeric matrix}
+ \item{x}{a non-negative numeric matrix, or an object that can be coerced to such a matrix by \code{as.matrix}.}
}
\details{
The Gini coefficient or index is a measure of inequality or diversity.
diff --git a/man/nbinomDeviance.Rd b/man/nbinomDeviance.Rd
index 58367f1..66d6a5b 100644
--- a/man/nbinomDeviance.Rd
+++ b/man/nbinomDeviance.Rd
@@ -38,9 +38,9 @@ Care is taken to ensure accurate computation for small dispersion values.
}
\references{
-Jorgensen, B. (2006).
-Generalized linear models. Encyclopedia of Environmetrics, Wiley.
-\url{http://onlinelibrary.wiley.com/doi/10.1002/9780470057339.vag010/full}.
+Jorgensen, B. (2013).
+Generalized linear models. Encyclopedia of Environmetrics 3, Wiley.
+\url{http://onlinelibrary.wiley.com/doi/10.1002/9780470057339.vag010.pub2/abstract}.
McCarthy, DJ, Chen, Y, Smyth, GK (2012).
Differential expression analysis of multifactor RNA-Seq experiments with respect to biological variation.
diff --git a/man/plotMD.Rd b/man/plotMD.Rd
index 139dcb4..c491762 100644
--- a/man/plotMD.Rd
+++ b/man/plotMD.Rd
@@ -10,19 +10,19 @@ Creates a mean-difference plot (aka MA plot) with color coding for highlighted p
}
\usage{
-\method{plotMD}{DGEList}(object, column = 1, xlab = "Average log CPM (this sample and others)",
- ylab = "log-ratio (this sample vs others)",
- main = colnames(object)[column], status=object$genes$Status,
- zero.weights = FALSE, prior.count = 3, \dots)
-\method{plotMD}{DGEGLM}(object, column = ncol(object), coef = NULL, xlab = "Average log CPM",
- ylab = "log-fold-change", main = colnames(object)[column],
- status=object$genes$Status, zero.weights = FALSE, \dots)
-\method{plotMD}{DGELRT}(object, xlab = "Average log CPM",
- ylab = "log-fold-change", main = object$comparison,
- status=object$genes$Status, \dots)
-\method{plotMD}{DGEExact}(object, xlab = "Average log CPM",
- ylab = "log-fold-change", main = NULL,
- status=object$genes$Status, \dots)
+\method{plotMD}{DGEList}(object, column=1, xlab="Average log CPM (this sample and others)",
+ ylab="log-ratio (this sample vs others)",
+ main=colnames(object)[column], status=object$genes$Status,
+ zero.weights=FALSE, prior.count=3, \dots)
+\method{plotMD}{DGEGLM}(object, column=ncol(object), coef=NULL, xlab="Average log CPM",
+ ylab="log-fold-change", main=colnames(object)[column],
+ status=object$genes$Status, zero.weights=FALSE, \dots)
+\method{plotMD}{DGELRT}(object, xlab="Average log CPM", ylab="log-fold-change",
+ main=object$comparison, status=object$genes$Status, contrast=1,
+ values=names(table(status)), col=NULL, adjust.method="BH", p.value=0.05, \dots)
+\method{plotMD}{DGEExact}(object, xlab="Average log CPM", ylab="log-fold-change",
+ main=NULL, status=object$genes$Status, values=names(table(status)),
+ col=NULL, adjust.method="BH", p.value=0.05, \dots)
}
\arguments{
@@ -33,9 +33,15 @@ Creates a mean-difference plot (aka MA plot) with color coding for highlighted p
\item{ylab}{character string, label for y-axis}
\item{main}{character string, title for plot}
\item{status}{vector giving the control status of each spot on the array, of same length as the number of rows of \code{object}.
- If \code{NULL}, then all points are plotted in the default color, symbol and size.}
+ If \code{NULL} under the \code{DGEList} or \code{DGEGLM} method, then all points are plotted in the default color, symbol and size.
+ If \code{NULL} under the \code{DGELRT} or \code{DGEExact} method, then \code{\link{decideTestsDGE}} is run to determine the status of all the genes. The up-regulated DE genes are highlighted in red and down-regulated in blue.}
\item{zero.weights}{logical, should spots with zero or negative weights be plotted?}
\item{prior.count}{the average prior count to be added to each observation. Larger values produce more shrinkage.}
+ \item{contrast}{integer specifying which log-fold-change to be plotted in the case of testing multiple contrasts. Only used for the \code{DGELRT} method with multiple contrasts.}
+ \item{values}{character vector giving values of \code{status} to be highlighted on the plot. Defaults to unique values of \code{status}.}
+ \item{col}{vector of colors for highlighted points, either of unit length or of same length as \code{values}.}
+ \item{adjust.method}{character string passed to \code{\link{decideTestsDGE}} specifying p-value adjustment method. Only used when \code{status} is \code{NULL}. See \code{\link{decideTestsDGE}} for details.}
+ \item{p.value}{numeric value between 0 and 1 giving the desired size of the test. Only used and passed to \code{\link{decideTestsDGE}} when \code{status} is \code{NULL}.}
\item{\dots}{other arguments are passed to \code{\link{plotWithHighlights}}.}
}
diff --git a/man/topTags.Rd b/man/topTags.Rd
index f1bbd0e..66c9b74 100755
--- a/man/topTags.Rd
+++ b/man/topTags.Rd
@@ -27,7 +27,7 @@ topTags(object, n=10, adjust.method="BH", sort.by="PValue", p.value=1)
an object of class \code{TopTags} containing the following elements for the top \code{n} most differentially expressed tags as determined by \code{sort.by}:
-\item{table}{a data frame containing the elements \code{logFC}, the log-abundance ratio, i.e. fold change, for each tag in the two groups being compared, \code{logCPM}, the log-average concentration/abundance for each tag in the two groups being compared, \code{PValue}, exact p-value for differential expression using the NB model, \code{FDR}, the p-value adjusted for multiple testing as found using \code{p.adjust} using the method specified.}
+\item{table}{a data frame containing the elements \code{logFC}, the log-abundance ratio, i.e. fold change, for each tag in the two groups being compared, \code{logCPM}, the log-average concentration/abundance for each tag in the two groups being compared, \code{PValue}, exact p-value for differential expression using the NB model. When \code{adjust.method} is not \code{"none"}, there is an extra column of \code{FDR} showing the adjusted p-value if \code{adjust.method} is one of the \code [...]
\item{adjust.method}{character string stating the method used to adjust p-values for multiple testing.}
\item{comparison}{a vector giving the names of the two groups being compared.}
\item{test}{character string stating the name of the test.}
diff --git a/src/R_initialize_levenberg.cpp b/src/R_initialize_levenberg.cpp
index aee5b31..2bd4a01 100644
--- a/src/R_initialize_levenberg.cpp
+++ b/src/R_initialize_levenberg.cpp
@@ -21,17 +21,19 @@ struct QRdecomposition {
// Setting up the workspace for dgeqrf.
double tmpwork;
- lwork_geqrf=lwork_oqrmqr=-1;
+ lwork_geqrf=lwork_ormqr=-1;
F77_CALL(dgeqrf)(&NR, &NC, Xcopy, &NR, tau, &tmpwork, &lwork_geqrf, &info);
// Loading up the optimal WORK.
lwork_geqrf=tmpwork+0.5;
+ if (lwork_geqrf < 1) { lwork_geqrf = 1; }
work_geqrf=(double*)R_alloc(lwork_geqrf, sizeof(double));
// Repeating for dormqr
- F77_CALL(dormqr)(&side, &trans_ormqr, &NR, &unity, &NC, Xcopy, &NR, tau, effects, &NR, &tmpwork, &lwork_oqrmqr, &info);
- lwork_oqrmqr=tmpwork+0.5;
- work_oqrmqr=(double*)R_alloc(lwork_oqrmqr, sizeof(double));
+ F77_CALL(dormqr)(&side, &trans_ormqr, &NR, &unity, &NC, Xcopy, &NR, tau, effects, &NR, &tmpwork, &lwork_ormqr, &info);
+ lwork_ormqr=tmpwork+0.5;
+ if (lwork_ormqr < 1) { lwork_ormqr = 1; }
+ work_ormqr=(double*)R_alloc(lwork_ormqr, sizeof(double));
return;
}
@@ -69,7 +71,7 @@ struct QRdecomposition {
effects[row]=y[row]*weights[row];
}
- F77_CALL(dormqr)(&side, &trans_ormqr, &NR, &unity, &NC, Xcopy, &NR, tau, effects, &NR, work_oqrmqr, &lwork_oqrmqr, &info);
+ F77_CALL(dormqr)(&side, &trans_ormqr, &NR, &unity, &NC, Xcopy, &NR, tau, effects, &NR, work_ormqr, &lwork_ormqr, &info);
if (info) {
throw std::runtime_error("Q**T multiplication failed");
}
@@ -83,9 +85,9 @@ struct QRdecomposition {
}
const double* X;
- double * Xcopy, * tau, * effects, *weights, *work_geqrf, *work_oqrmqr;
+ double * Xcopy, * tau, * effects, *weights, *work_geqrf, *work_ormqr;
const int NR, NC;
- int lwork_geqrf, lwork_oqrmqr, info;
+ int lwork_geqrf, lwork_ormqr, info;
int row, coef, index;
};
@@ -128,6 +130,7 @@ SEXP R_get_levenberg_start(SEXP y, SEXP offset, SEXP disp, SEXP weights, SEXP de
QR.store_weights(NULL);
QR.decompose();
double sum_exprs=0, sum_weight=0, curN, curweight;
+
for (int tag=0; tag<num_tags; ++tag) {
counts.fill_and_next(count_ptr);
diff --git a/src/R_levenberg.cpp b/src/R_levenberg.cpp
index 9bb0742..e98173e 100644
--- a/src/R_levenberg.cpp
+++ b/src/R_levenberg.cpp
@@ -9,14 +9,26 @@ SEXP R_levenberg (SEXP design, SEXP y, SEXP disp, SEXP offset, SEXP weights,
double* count_ptr=(double*)R_alloc(num_libs, sizeof(double));
// Getting and checking the dimensions of the arguments.
- if (!isReal(design)) { throw std::runtime_error("design matrix should be double precision"); }
- const int dlen=LENGTH(design);
- if (dlen%num_libs!=0) { throw std::runtime_error("size of design matrix is incompatible with number of libraries"); }
- const int num_coefs=dlen/num_libs;
+ if (!isReal(design)) { throw std::runtime_error("design matrix should be double precision"); }
+ SEXP des_dims=getAttrib(design, R_DimSymbol);
+ if (!isInteger(des_dims) || LENGTH(des_dims)!=2) {
+ throw std::runtime_error("design matrix dimensions should be an integer vector of length 2");
+ }
+ if (INTEGER(des_dims)[0]!=num_libs) {
+ throw std::runtime_error("number of rows of design matrix should be equal to number of libraries");
+ }
+ const int num_coefs=INTEGER(des_dims)[1];
+
if (!isReal(beta)) { throw std::runtime_error("starting coefficient values must be positive"); }
- if (LENGTH(beta)!=num_tags*num_coefs) {
- throw std::runtime_error("dimensions of the beta matrix do not match to the number of tags and coefficients");
- }
+ SEXP bdims=getAttrib(beta, R_DimSymbol);
+ if (!isInteger(bdims) || LENGTH(bdims)!=2) {
+ throw std::runtime_error("beta matrix dimensions should be an integer vector of length 2");
+ }
+ if (INTEGER(bdims)[0]!=num_tags) {
+ throw std::runtime_error("number of rows of beta matrix should be equal to number of genes");
+ } else if (INTEGER(bdims)[1]!=num_coefs) {
+ throw std::runtime_error("number of columns of beta matrix should be equal to number of coefficients");
+ }
// Initializing pointers to the assorted features.
const double *design_ptr=REAL(design), *bptr=REAL(beta);
diff --git a/src/adj_coxreid.cpp b/src/adj_coxreid.cpp
index e5ca7b9..b5da08c 100644
--- a/src/adj_coxreid.cpp
+++ b/src/adj_coxreid.cpp
@@ -20,6 +20,7 @@ adj_coxreid::adj_coxreid (const int& nl, const int& nc, const double* d) : ncoef
throw std::runtime_error("failed to identify optimal size of workspace through ILAENV");
}
lwork=int(temp_work+0.5);
+ if (lwork < 1) { lwork = 1; }
work=new double [lwork];
// Saving a local copy of the design pointer.
diff --git a/src/fmm_spline.c b/src/fmm_spline.c
index 294e047..318ce89 100644
--- a/src/fmm_spline.c
+++ b/src/fmm_spline.c
@@ -20,13 +20,16 @@
/* Comment from A. Lun:
* This is copied straight from 'splines.c' in the 'stats' package of
- * R's core installation. I have removed all functions except for
- * fmm_spline. I have commented out the error call for the instance where
- * less than 2 points are supplied, as that is unnecessary for my code
- * (and fails to compile without a definition of EDOM). I've also const'ifed
- * the 'x' and 'y' pointers to protect them from modification. Otherwise
- * the function and comments have not been modified.
- */
+ * R's core installation (current as of r72293), with a few modifications:
+ *
+ * - commented out all functions except for fmm_spline.
+ *
+ * - commented out the error call in fmm_spline when fewer than 2 points
+ * are supplied, as that is unnecessary for my code (and fails to compile
+ * without a definition of EDOM).
+ *
+ * - const'ifed the 'x' and 'y' pointers to protect them from modification.
+ */
/* Spline Interpolation
* --------------------
diff --git a/src/glm_levenberg.cpp b/src/glm_levenberg.cpp
index 4010b32..e7079c2 100644
--- a/src/glm_levenberg.cpp
+++ b/src/glm_levenberg.cpp
@@ -80,11 +80,14 @@ int glm_levenberg::fit(const double* offset, const double* y, const double* w,
return 0;
}
- // Otherwise, we compute 'mu' based on 'beta', and proceed to iterating using reweighted least squares.
+ // Otherwise, we compute 'mu' based on 'beta'. Returning if there are no coefficients!
autofill(offset, mu, beta);
dev=nb_deviance(y, mu, w, disp);
+ if (ncoefs==0) {
+ return 0;
+ }
- // Assorted temporary objects.
+ // Iterating using reweighted least squares; setting up assorted temporary objects.
double max_info=-1, lambda=0;
double denom, weight, deriv;
int row, col, index;
diff --git a/tests/edgeR-Tests.Rout.save b/tests/edgeR-Tests.Rout.save
index 5518a77..b9c36ec 100644
--- a/tests/edgeR-Tests.Rout.save
+++ b/tests/edgeR-Tests.Rout.save
@@ -1,7 +1,7 @@
-R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
-Copyright (C) 2016 The R Foundation for Statistical Computing
-Platform: x86_64-apple-darwin13.4.0 (64-bit)
+R version 3.3.3 (2017-03-06) -- "Another Canoe"
+Copyright (C) 2017 The R Foundation for Statistical Computing
+Platform: x86_64-w64-mingw32/x64 (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
@@ -298,7 +298,7 @@ Tag.8 -0.7918166 12.86353 0.7356185 0.391068049 0.8603497
> dglm2 <- estimateDisp(dglm, design)
> summary(dglm2$tagwise.dispersion)
Min. 1st Qu. Median Mean 3rd Qu. Max.
- 0.1766 0.1789 0.1814 0.1846 0.1870 0.2119
+ 0.1423 0.1618 0.1788 0.1863 0.2015 0.2692
> dglm2 <- estimateDisp(dglm, design, prior.df=20)
> summary(dglm2$tagwise.dispersion)
Min. 1st Qu. Median Mean 3rd Qu. Max.
@@ -306,7 +306,7 @@ Tag.8 -0.7918166 12.86353 0.7356185 0.391068049 0.8603497
> dglm2 <- estimateDisp(dglm, design, robust=TRUE)
> summary(dglm2$tagwise.dispersion)
Min. 1st Qu. Median Mean 3rd Qu. Max.
- 0.1766 0.1789 0.1814 0.1846 0.1870 0.2119
+ 0.1423 0.1605 0.1783 0.1867 0.2031 0.2740
>
> # Continuous trend
> nlibs <- 3
@@ -475,7 +475,7 @@ $AveLogCPM
>
> lrt <- glmLRT(fit,contrast=cbind(c(0,1,0),c(0,0,1)))
> topTags(lrt)
-Coefficient: LR test of 2 contrasts
+Coefficient: LR test on 2 degrees of freedom
logFC.1 logFC.2 logCPM LR PValue FDR
1 -7.2381060 -0.0621100 17.19071 10.7712171 0.004582051 0.02291026
5 -1.6684268 -0.9326507 17.33529 1.7309951 0.420842115 0.90967967
@@ -486,13 +486,13 @@ Coefficient: LR test of 2 contrasts
> fit <- glmFit(y,design,dispersion=2/3,prior.count=0.5/7)
> lrt <- glmLRT(fit,contrast=cbind(c(-1,1,0),c(0,-1,1),c(-1,0,1)))
> topTags(lrt)
-Coefficient: LR test of 2 contrasts
- logFC.1 logFC.2 logCPM LR PValue FDR
-1 -7.2381060 7.1759960 17.19071 10.7712171 0.004582051 0.02291026
-5 -1.6684268 0.7357761 17.33529 1.7309951 0.420842115 0.90967967
-2 1.2080938 -0.1660740 18.24544 1.0496688 0.591653347 0.90967967
-4 0.5416704 -0.6923084 17.57744 0.3958596 0.820427427 0.90967967
-3 0.2876249 -0.4884392 18.06216 0.1893255 0.909679672 0.90967967
+Coefficient: LR test on 2 degrees of freedom
+ logFC.1 logFC.2 logFC.3 logCPM LR PValue FDR
+1 -7.2381060 7.1759960 -0.0621100 17.19071 10.7712171 0.004582051 0.02291026
+5 -1.6684268 0.7357761 -0.9326507 17.33529 1.7309951 0.420842115 0.90967967
+2 1.2080938 -0.1660740 1.0420198 18.24544 1.0496688 0.591653347 0.90967967
+4 0.5416704 -0.6923084 -0.1506381 17.57744 0.3958596 0.820427427 0.90967967
+3 0.2876249 -0.4884392 -0.2008143 18.06216 0.1893255 0.909679672 0.90967967
>
> # simple Good-Turing algorithm runs.
> test1<-1:9
@@ -537,4 +537,4 @@ $n0
>
> proc.time()
user system elapsed
- 3.259 0.065 3.693
+ 3.29 0.14 4.24
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-edger.git
More information about the debian-med-commit
mailing list