[med-svn] [r-bioc-edger] 01/03: Imported Upstream version 3.4.0~dfsg

Charles Plessy plessy at alioth.debian.org
Wed Oct 16 06:49:39 UTC 2013


This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch master
in repository r-bioc-edger.

commit decd0fdb67300bcdfb3b080a1914939f1f7a4ac9
Author: Charles Plessy <plessy at debian.org>
Date:   Wed Oct 16 14:21:43 2013 +0900

    Imported Upstream version 3.4.0~dfsg
---
 DESCRIPTION                |    6 +++---
 R/condLogLikDerSize.R      |   19 ++++++++-----------
 R/glmQLFTest.R             |   10 ++++------
 R/glmfit.R                 |    1 -
 R/predFC.R                 |   17 +++++++++--------
 build/vignette.rds         |  Bin 0 -> 227 bytes
 inst/NEWS.Rd               |   42 +++++++++++++++++++++++-------------------
 inst/doc/edgeR.R           |    2 --
 inst/doc/edgeR.pdf         |  Bin 49354 -> 49354 bytes
 src/core/glm_levenberg.cpp |   24 ++++++++++++++++--------
 10 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index dc548c6..5a6d032 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: edgeR
-Version: 3.2.4
-Date: 2013/07/14
+Version: 3.4.0
+Date: 2013/08/31
 Title: Empirical analysis of digital gene expression data in R
 Author: Mark Robinson <mrobinson at wehi.edu.au>, Davis McCarthy <dmccarthy at wehi.edu.au>, Yunshun Chen <yuchen at wehi.edu.au>, Aaron Lun <alun at wehi.edu.au>, Gordon Smyth <smyth at wehi.edu.au>
 Maintainer: Mark Robinson <mrobinson at wehi.edu.au>, Davis McCarthy
@@ -12,4 +12,4 @@ biocViews: Bioinformatics, DifferentialExpression, SAGE,
         HighThroughputSequencing, RNAseq, ChIPseq
 Description: Differential expression analysis of RNA-seq and digital gene expression profiles with biological replication.  Uses empirical Bayes estimation and exact tests based on the negative binomial distribution.  Also useful for differential signal analysis with other types of genome-scale count data.
 License: GPL (>=2)
-Packaged: 2013-07-15 04:54:44 UTC; biocbuild
+Packaged: 2013-10-15 03:48:20 UTC; biocbuild
diff --git a/R/condLogLikDerSize.R b/R/condLogLikDerSize.R
index ebd6610..79be071 100644
--- a/R/condLogLikDerSize.R
+++ b/R/condLogLikDerSize.R
@@ -1,9 +1,7 @@
 condLogLikDerSize <- function(y, r, der=1L)
-# Calculate derivatives of the conditional log-likelihood function l_g{r}
-# with respect to r=1/phi (phi is the dispersion parameter)
-# der is derivative (0th deriv is the function)
-# For a single group of replicate libraries, all of the same total size
-# Written by Mark Robinson
+#	Derivatives of the conditional log-likelihood function (given the row sum)
+#	with respect to r=1/dispersion
+#	for a single group of replicate libraries, all of the same total size
 {
 #	Vector interpreted as matrix of one row, i.e., one gene
 	if (is.vector(y)) {
@@ -12,13 +10,12 @@ condLogLikDerSize <- function(y, r, der=1L)
 		y <- as.matrix(y)
 	}
 
-	t <- rowSums(y,na.rm=TRUE)
-	n <- rowSums(!is.na(y))
-	g <- dim(y)[1]
+	n <- ncol(y)
+	m <- rowMeans(y)
 
 	switch(der+1L,
-		rowSums(lgamma(y+r)) + lgamma(n*r) - lgamma(t+n*r) - n*lgamma(r),
-		rowSums(digamma(y+r)) + n*digamma(n*r) - n*digamma(t+n*r) - n*digamma(r),
-		rowSums(trigamma(y+r)) + n^2*trigamma(n*r) - n^2*trigamma(t+n*r) - n*trigamma(r)
+		rowSums(lgamma(y+r)) + lgamma(n*r) - lgamma(n*(m+r)) - n*lgamma(r),
+		rowSums(digamma(y+r)) + n*digamma(n*r) - n*digamma(n*(m+r)) - n*digamma(r),
+		rowSums(trigamma(y+r)) + n^2*trigamma(n*r) - n^2*trigamma(n*(m+r)) - n*trigamma(r)
 	)
 }
diff --git a/R/glmQLFTest.R b/R/glmQLFTest.R
index a3a0b23..372e2f0 100644
--- a/R/glmQLFTest.R
+++ b/R/glmQLFTest.R
@@ -1,10 +1,8 @@
 glmQLFTest <- function(y, design=NULL, dispersion=NULL, coef=ncol(glmfit$design), contrast=NULL, abundance.trend=TRUE, robust=FALSE, winsor.tail.p=c(0.05,0.1), plot=FALSE)
 #	Quasi-likelihood F-tests for DGE glms.
 #	Davis McCarthy and Gordon Smyth.
-#	Created 18 Feb 2011. Last modified 11 March 2013.
+#	Created 18 Feb 2011. Last modified 21 July 2013.
 {
-	if(abundance.trend) A <- y$AveLogCPM
-
 #	Initial fit with trended dispersion
 	if(is(y,"DGEList")) {
 		if(is.null(dispersion)) {
@@ -21,6 +19,7 @@ glmQLFTest <- function(y, design=NULL, dispersion=NULL, coef=ncol(glmfit$design)
 
 #	Call glmLRT to get most of the results that we need for the QL F-test calculations
 	out <- glmLRT(glmfit, coef=coef, contrast=contrast)
+	if(is.null(out$AveLogCPM)) out$AveLogCPM <- aveLogCPM(glmfit$counts)
 
 #	Residual deviances
 	df.residual <- glmfit$df.residual
@@ -37,9 +36,7 @@ glmQLFTest <- function(y, design=NULL, dispersion=NULL, coef=ncol(glmfit$design)
 	s2[df.residual==0] <- 0
 	s2 <- pmax(s2,0)
 	if(abundance.trend) {
-		if(is.null(A)) A <- out$AveLogCPM
-		if(is.null(A)) A <- aveLogCPM(glmfit$counts)
-		if(is.null(out$AveLogCPM)) out$AveLogCPM <- A
+		A <- out$AveLogCPM
 	} else {
 		A <- NULL
 	}
@@ -47,6 +44,7 @@ glmQLFTest <- function(y, design=NULL, dispersion=NULL, coef=ncol(glmfit$design)
 
 #	Plot
 	if(plot) {
+		if(!abundance.trend) A <- out$AveLogCPM
 		plot(A,sqrt(sqrt(s2)),xlab="Average Log2 CPM",ylab="Quarter-Root Mean Deviance",pch=16,cex=0.2)
 		o <- order(A)
 		points(A[o],sqrt(sqrt(s2.fit$var.post[o])),pch=16,cex=0.2,col="red")
diff --git a/R/glmfit.R b/R/glmfit.R
index 40fbd7a..a09492f 100644
--- a/R/glmfit.R
+++ b/R/glmfit.R
@@ -120,7 +120,6 @@ glmLRT <- function(glmfit,coef=ncol(glmfit$design),contrast=NULL,test="chisq")
 		}
 		stop("glmfit must be an DGEGLM object (usually produced by glmFit).")
 	}
-	ngenes <- nrow(glmfit)
 	nlibs <- ncol(glmfit)
 
 #	Check test
diff --git a/R/predFC.R b/R/predFC.R
index 05b27df..df06e76 100644
--- a/R/predFC.R
+++ b/R/predFC.R
@@ -22,12 +22,6 @@ predFC.default <- function(y,design=NULL,prior.count=0.125,offset=NULL,dispersio
 	ngenes <- nrow(y)
 	nsamples <- ncol(y)
 
-#	Check design
-	if(is.null(design))
-		return(cpm(y,prior.count=prior.count,log=TRUE))
-	else
-		design <- as.matrix(design)
-
 #	Check prior.count
 	if(prior.count<0) stop("prior.count should be non-negative")
 
@@ -38,6 +32,13 @@ predFC.default <- function(y,design=NULL,prior.count=0.125,offset=NULL,dispersio
 	} else
 		lib.size <- exp(offset)
 
+#	Check design
+	if(is.null(design)) {
+		warning("Behaviour of predFC with design=NULL is scheduled to be deprecated April 2014. Use cpm() instead.",call.=FALSE)
+		return(cpm(y,lib.size=lib.size,log=TRUE,prior.count=prior.count))
+	} else
+		design <- as.matrix(design)
+
 #	Add prior counts in proportion to library sizes
 	if(is.null(dim(lib.size)))
 		ave.lib.size <- mean(lib.size)
@@ -49,7 +50,7 @@ predFC.default <- function(y,design=NULL,prior.count=0.125,offset=NULL,dispersio
 	y <- y+prior.count
 
 #	Return matrix of coefficients on log2 scale
-   g <- glmFit(y,design,offset=log(lib.size),dispersion=dispersion,prior.count=0)
-   g$coefficients/log(2)
+	g <- glmFit(y,design,offset=log(lib.size),dispersion=dispersion,prior.count=0)
+	g$coefficients/log(2)
 }
 
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..139a0ca
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index 244825c..4228c74 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -2,45 +2,52 @@
 \title{edgeR News}
 \encoding{UTF-8}
 
-\section{Version 3.2.4}{\itemize{
+\section{Version 3.3.8}{\itemize{
 \item
-Refinement to cutWithMinN() to make the bin numbers more equal in the worst case.
+predFC() with design=NULL now uses normalization factors correctly.
+However this use of predFC() to compute counts per million is being phased out in favour of cpm().
+}}
 
+\section{Version 3.3.5}{\itemize{
 \item
-estimateDisp() now creates the design matrix correctly when the design matrix is not given as an argument and there is only one group.  Previously this case gave an error.
+Refinement to cutWithMinN() to make the bin numbers more equal in the worst case.
 
 \item
-plotMDS.DGEList now gives a friendly error message when there are fewer than 3 data columns.
+estimateDisp() now creates the design matrix correctly when the design matrix is not given as an argument and there is only one group.  Previously this case gave an error.
 
 \item
-Refinement to computation for nbins in dispBinTrend.  Now changes more smoothly with the number of genes.  trace argument is retired.
+Minor edit to glm.h code.
+}}
 
+\section{Version 3.3.4}{\itemize{
 \item
-Fixes to calcNormFactors with method="TMM" so that it takes account of lib.size and refCol if these are preset.
+plotMDS.DGEList now gives a friendly error message when there are fewer than 3 data columns.
+}}
 
+\section{Version 3.3.3}{\itemize{
 \item
-Some fixes and cleaning up of subsetting code.
+DGEList() accepts NULL as a possible value again for the group, lib.size and norm.factors arguments.
+It is treated the same way as a missing argument.
+}}
 
+\section{Version 3.3.2}{\itemize{
 \item
-Minor edit to glm.h code.
+Update to cutWithMinN() so that it does not fail even when there are many repeated x values.
 
 \item
-Edits to camera.DGEList, cutWithMinN and plotMDS.DGEList help pages.  Edits to help pages for the data classes.
-}}
+Refinement to computation for nbins in dispBinTrend.  Now changes more smoothly with the number of genes.  trace argument is retired.
 
-\section{Version 3.2.3}{\itemize{
 \item
-Update to cutWithMinN() so that it does not fail even when there are many repeated x values.
-}}
+Fixes to calcNormFactors with method="TMM" so that it takes account of lib.size and refCol if these are preset.
 
-\section{Version 3.2.2}{\itemize{
 \item
-Restore acceptance in DGEList() of NULL value of lib.size.
+Updates to help pages for the data classes.
 }}
 
-\section{Version 3.2.1}{\itemize{
+\section{Version 3.3.1}{\itemize{
 \item
 Updates to DGEList() and DGEList-class documentation.
+Arguments lib.size, group and norm.factors are now set to their defaults in the function definition rather than set to NULL.
 }}
 
 \section{Version 3.2.0}{\itemize{
@@ -56,9 +63,6 @@ The function estimateDisp() provides a simpler alternative pipeline and in princ
 It can also incorporate automatic estimation of the prior degrees of freedom, and can do this in a robust fashion.
 
 \item
-Default prior.df for estimateTagwiseDisp() and estimateGLMTagwiseDisp() reduced from 20 to 10.
-
-\item
 glmLRT() now permits the contrast argument to be a matrix with multiple columns, making the treatment of this argument analogous to that of the coef argument.
 
 \item
diff --git a/inst/doc/edgeR.R b/inst/doc/edgeR.R
deleted file mode 100644
index 447a637..0000000
--- a/inst/doc/edgeR.R
+++ /dev/null
@@ -1,2 +0,0 @@
-### R code from vignette source 'edgeR.Rnw'
-
diff --git a/inst/doc/edgeR.pdf b/inst/doc/edgeR.pdf
index f4c429d..1e5ab00 100644
Binary files a/inst/doc/edgeR.pdf and b/inst/doc/edgeR.pdf differ
diff --git a/src/core/glm_levenberg.cpp b/src/core/glm_levenberg.cpp
index aa6884e..b4c2f3a 100644
--- a/src/core/glm_levenberg.cpp
+++ b/src/core/glm_levenberg.cpp
@@ -29,7 +29,16 @@ double glm_levenberg::nb_deviance (const double* y, const double* mu, const doub
     }
     return dev*2;
 }
-    
+
+void glm_levenberg::autofill(const double* offset, double* mu, const double* beta) {
+	for (int lib=0; lib<nlibs; ++lib) {
+		double& cur_mean=(mu[lib]=offset[lib]);
+		for (int coef=0; coef<ncoefs; ++coef) { cur_mean+=design[coef*nlibs+lib]*beta[coef]; }
+		cur_mean=std::exp(cur_mean);
+	}
+	return;
+}
+
 /* Now, the actual constructors for the GLM object. */
 
 glm_levenberg::glm_levenberg(const int& nl, const int& nc, const double*d, const int& mi, const double& tol) : nlibs(nl), ncoefs(nc),
@@ -84,8 +93,11 @@ int glm_levenberg::fit(const double* offset, const double* y, const double& disp
         for (int lib=0; lib<nlibs; ++lib) { mu[lib]=0; }
         return 0;
     }
-
-    // Iterating using reweighted least squares.
+    
+	/* Otherwise, we have to make sure 'beta' and 'mu' make sense relative to one another.
+ 	 * We then proceed to iterating using reweighted least squares.
+ 	 */
+	autofill(offset, mu, beta);
 	dev=nb_deviance(y, mu, disp);
     double max_info=-1, lambda=0;
 
@@ -172,11 +184,7 @@ int glm_levenberg::fit(const double* offset, const double* y, const double& disp
 
             // Updating beta and the means. 'dbeta' stores 'Y' from the solution of (X*VX)Y=dl, corresponding to a NR step.
             for (int i=0; i<ncoefs; ++i) { beta_new[i]=beta[i]+dbeta[i]; }
-            for (int lib=0; lib<nlibs; ++lib) {
-                double& cur_mean=(mu_new[lib]=offset[lib]);
-                for (int coef=0; coef<ncoefs; ++coef) { cur_mean+=design[coef*nlibs+lib]*beta_new[coef]; }
-                cur_mean=std::exp(cur_mean);
-            }
+            autofill(offset, mu_new, beta_new);
 
             /* Checking if the deviance has decreased or if it's too small to care about. Either case is good
              * and means that we'll be using the updated fitted values and coefficients. Otherwise, if we have

-- 
Alioth's /git/debian-med/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-edger.git



More information about the debian-med-commit mailing list