[med-svn] [r-bioc-limma] 01/06: Imported Upstream version 3.30.0+dfsg

Andreas Tille tille at debian.org
Fri Oct 28 07:47:40 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-bioc-limma.

commit 75e8fabf9f86884b3500fe24c1119e1d90526f82
Author: Andreas Tille <tille at debian.org>
Date:   Fri Oct 28 09:25:42 2016 +0200

    Imported Upstream version 3.30.0+dfsg
---
 DESCRIPTION                   |   6 +--
 R/cumOverlap.R                |  45 +++++++++++++++++
 R/dups.R                      |   9 ++--
 R/fitFDistRobustly.R          |  46 ++++++++---------
 R/geneset-fry.R               |   2 +-
 R/geneset-roast.R             |   2 +-
 R/norm.R                      |   6 +--
 R/plotMDS.R                   |  11 +++--
 R/plotrldf.R                  |  22 ++++++---
 R/voom.R                      |   5 +-
 inst/NEWS.Rd                  | 111 +++++++++++++++++++++++++++++++++++++++++-
 inst/doc/changelog.txt        |  77 ++++++++++++++++++++++-------
 inst/doc/intro.pdf            | Bin 46191 -> 43301 bytes
 man/01Introduction.Rd         |   5 ++
 man/cumOverlap.Rd             |  49 +++++++++++++++++++
 man/detectionPValue.Rd        |   2 +
 man/ebayes.Rd                 |   2 +-
 man/fitfdist.Rd               |   9 +---
 man/goana.Rd                  |  25 ++++++----
 man/ids2indices.Rd            |   6 +--
 man/plotMDS.Rd                |  10 ++--
 man/plotRLDF.Rd               |  19 +++++---
 man/squeezeVar.Rd             |   2 +-
 man/voom.Rd                   |  14 ++++--
 man/voomWithQualityWeights.Rd |  10 ++--
 tests/limma-Tests.Rout.save   |  56 ++++++++++-----------
 26 files changed, 418 insertions(+), 133 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f57cdb0..42108e5 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: limma
-Version: 3.28.17
-Date: 2016-07-24
+Version: 3.30.0
+Date: 2016-10-17
 Title: Linear Models for Microarray Data
 Description: Data analysis, linear models and differential expression for microarray data.
 Author: Gordon Smyth [cre,aut], Yifang Hu [ctb], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Davis McCarthy [ctb], Di Wu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Aaron Lun [ctb], Natalie Thorne [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Yunshun Chen [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb]
@@ -21,4 +21,4 @@ biocViews: ExonArray, GeneExpression, Transcription,
         MultipleComparison, Normalization, Preprocessing,
         QualityControl
 NeedsCompilation: yes
-Packaged: 2016-07-25 01:26:42 UTC; biocbuild
+Packaged: 2016-10-17 22:07:54 UTC; biocbuild
diff --git a/R/cumOverlap.R b/R/cumOverlap.R
new file mode 100644
index 0000000..6ca4053
--- /dev/null
+++ b/R/cumOverlap.R
@@ -0,0 +1,45 @@
+cumOverlap <- function(ol1, ol2)
+#	Cumulative overlap analysis
+#	Di Wu and Gordon Smyth
+#	Createdin 2007. Last modified 16 April 2010
+#	Based on hypergeometric distribution. Test whether two ordered lists of genes are significantly overlapped. 
+{
+#	Check for duplicates
+	if(anyDuplicated(ol1)) stop("Duplicate IDs found in ol1")
+	if(anyDuplicated(ol2)) stop("Duplicate IDs found in ol2")
+
+#	Reduce to IDs found in both lists
+	m1 <- match(ol1,ol2)
+	redo <- FALSE
+	if(anyNA(m1)) {
+		ol1 <- ol1[!is.na(m1)]
+		redo <- TRUE
+	}
+	m2 <- match(ol2,ol1)
+	if(anyNA(m2)) {
+		ol2 <- ol2[!is.na(m2)]
+		redo <- TRUE
+	}
+
+#	Match ol1 to ol2
+	if(redo) m1 <- match(ol1,ol2)
+
+#	Count overlaps
+	ngenes <- length(ol1)
+	i <- 1L:ngenes
+	inoverlap <- m1 <= i
+	noverlap <- cumsum(inoverlap)
+
+#	Hypergeometric p-valules
+	p <- phyper(noverlap-0.5,m=i,n=ngenes-i,k=i,lower.tail=FALSE)
+
+#	Bonferroni
+	p.b <- p*i
+	nmin <- which.min(p.b)
+	p.b <- pmin(p.b,1)
+
+#	Which are ids contribute to overlap
+	idoverlap <- ol1[(1:nmin)[inoverlap[1:nmin]]]
+
+	list(n.min=nmin,p.min=p.b[nmin],n.overlap=noverlap,id.overlap=idoverlap,p.value=p,adj.p.value=p.b)
+}
diff --git a/R/dups.R b/R/dups.R
index d6e8802..1dc7df1 100755
--- a/R/dups.R
+++ b/R/dups.R
@@ -33,7 +33,7 @@ uniquegenelist <- function(genelist,ndups=2,spacing=1) {
 duplicateCorrelation <- function(object,design=NULL,ndups=2,spacing=1,block=NULL,trim=0.15,weights=NULL)
 #	Estimate the correlation between duplicates given a series of arrays
 #	Gordon Smyth
-#	25 Apr 2002. Last revised 14 January 2015.
+#	25 Apr 2002. Last revised 17 Aug 2016.
 {
 #	Extract components from y
 	y <- getEAWP(object)
@@ -65,10 +65,9 @@ duplicateCorrelation <- function(object,design=NULL,ndups=2,spacing=1,block=NULL
 
 #	Check weights
 	if(!is.null(weights)) {
-		weights <- as.matrix(weights)
-		if(any(dim(weights) != dim(M))) weights <- array(weights,dim(M))
-		M[weights < 1e-15 ] <- NA
-		weights[weights < 1e-15] <- NA
+		weights <- asMatrixWeights(weights,dim(M))
+		weights[weights <= 0] <- NA
+		M[!is.finite(weights)] <- NA
 	}
 
 #	Setup spacing or blocking arguments
diff --git a/R/fitFDistRobustly.R b/R/fitFDistRobustly.R
index 0d82ad0..8ac2c2f 100644
--- a/R/fitFDistRobustly.R
+++ b/R/fitFDistRobustly.R
@@ -3,7 +3,7 @@ fitFDistRobustly <- function(x,df1,covariate=NULL,winsor.tail.p=c(0.05,0.1),trac
 #	given the first degrees of freedom, using first and second
 #	moments of Winsorized z-values
 #	Gordon Smyth and Belinda Phipson
-#	Created 7 Oct 2012.  Last revised 20 November 2015.
+#	Created 7 Oct 2012.  Last revised 22 August 2016.
 {
 #	Check x
 	n <- length(x)
@@ -191,24 +191,24 @@ fitFDistRobustly <- function(x,df1,covariate=NULL,winsor.tail.p=c(0.05,0.1),trac
 		o <- order(TailP)
 
 #		Old calculation for df2.outlier
-		VarOutlier <- max(zresid)^2
-		VarOutlier <- VarOutlier-trigamma(df1/2)
-		if(trace) cat("VarOutlier",VarOutlier,"\n")
-		if(VarOutlier > 0) {
-			df2.outlier.old <- 2*trigammaInverse(VarOutlier)
-			if(trace) cat("df2.outlier.old",df2.outlier.old,"\n")
-			if(df2.outlier.old < df2) {
-				df2.shrunk.old <- ProbNotOutlier*df2+ProbOutlier*df2.outlier.old
+#		VarOutlier <- max(zresid)^2
+#		VarOutlier <- VarOutlier-trigamma(df1/2)
+#		if(trace) cat("VarOutlier",VarOutlier,"\n")
+#		if(VarOutlier > 0) {
+#			df2.outlier.old <- 2*trigammaInverse(VarOutlier)
+#			if(trace) cat("df2.outlier.old",df2.outlier.old,"\n")
+#			if(df2.outlier.old < df2) {
+#				df2.shrunk.old <- ProbNotOutlier*df2+ProbOutlier*df2.outlier.old
 #				Make df2.shrunk.old monotonic in TailP
-				df2.ordered <- df2.shrunk.old[o]
-				df2.ordered[1] <- min(df2.ordered[1],NonRobust$df2)
-				m <- cumsum(df2.ordered)
-				m <- m/(1:n)
-				imin <- which.min(m)
-				df2.ordered[1:imin] <- m[imin]
-				df2.shrunk.old[o] <- cummax(df2.ordered)
-			}
-		}
+#				df2.ordered <- df2.shrunk.old[o]
+#				df2.ordered[1] <- min(df2.ordered[1],NonRobust$df2)
+#				m <- cumsum(df2.ordered)
+#				m <- m/(1:n)
+#				imin <- which.min(m)
+#				df2.ordered[1:imin] <- m[imin]
+#				df2.shrunk.old[o] <- cummax(df2.ordered)
+#			}
+#		}
 
 #		New calculation for df2.outlier
 #		Find df2.outlier to make maxFstat the median of the distribution
@@ -230,15 +230,15 @@ fitFDistRobustly <- function(x,df1,covariate=NULL,winsor.tail.p=c(0.05,0.1),trac
 		df2.shrunk[o] <- cummax(df2.ordered)
 
 #		Use isoreg() instead. This gives similar results.
-		df2.shrunk.iso <- rep.int(df2,n)
-		o <- o[1:(n/2)]
-		df2.shrunk.iso[o] <- ProbNotOutlier[o]*df2+ProbOutlier[o]*df2.outlier
-		df2.shrunk.iso[o] <- isoreg(TailP[o],df2.shrunk.iso[o])$yf
+#		df2.shrunk.iso <- rep.int(df2,n)
+#		o <- o[1:(n/2)]
+#		df2.shrunk.iso[o] <- ProbNotOutlier[o]*df2+ProbOutlier[o]*df2.outlier
+#		df2.shrunk.iso[o] <- isoreg(TailP[o],df2.shrunk.iso[o])$yf
 
 	} else {
 		df2.outlier <- df2.outlier2 <- df2
 		df2.shrunk2 <- df2.shrunk <- rep.int(df2,n)
 	}
 
-	list(scale=s20,df2=df2,tail.p.value=TailP,prob.outlier=ProbOutlier,df2.outlier=df2.outlier,df2.outlier.old=df2.outlier.old,df2.shrunk=df2.shrunk,df2.shrunk.old=df2.shrunk.old,df2.shrunk.iso=df2.shrunk.iso)
+	list(scale=s20,df2=df2,tail.p.value=TailP,prob.outlier=ProbOutlier,df2.outlier=df2.outlier,df2.shrunk=df2.shrunk)
 }
diff --git a/R/geneset-fry.R b/R/geneset-fry.R
index 3789e04..246d99b 100644
--- a/R/geneset-fry.R
+++ b/R/geneset-fry.R
@@ -19,7 +19,7 @@ fry.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),geneid=NU
 	if(is.null(Dots$trend)) Dots$trend <- FALSE
 	if(is.null(Dots$robust)) Dots$robust <- FALSE
 	if(Dots$robust & is.null(Dots$winsor.tail.p)) Dots$winsor.tail.p <- c(0.05,0.1)
-	if(!is.null(Dots$block) & is.null(Dots$correlation)) stop("correlation must be set")
+	if(!is.null(Dots$block) & is.null(Dots$correlation)) stop("Intra-block correlation must be specified")
 
 #	Covariate for trended eBayes
 	covariate <- NULL
diff --git a/R/geneset-roast.R b/R/geneset-roast.R
index b6a6241..45912b6 100644
--- a/R/geneset-roast.R
+++ b/R/geneset-roast.R
@@ -33,7 +33,7 @@ roast.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),geneid=
 	if(is.null(Dots$trend)) Dots$trend <- FALSE
 	if(is.null(Dots$robust)) Dots$robust <- FALSE
 	if(Dots$robust & is.null(Dots$winsor.tail.p)) Dots$winsor.tail.p <- c(0.05,0.1)
-	if(!is.null(Dots$block) & is.null(Dots$correlation)) stop("correlation must be set")
+	if(!is.null(Dots$block) & is.null(Dots$correlation)) stop("Intra-block correlation must be specified")
 
 #	Covariate for trended eBayes
 	covariate <- NULL
diff --git a/R/norm.R b/R/norm.R
index 8ab726b..6ad5605 100755
--- a/R/norm.R
+++ b/R/norm.R
@@ -148,8 +148,8 @@ normalizeRobustSpline <- function(M,A,layout=NULL,df=5,method="M") {
 #	Gordon Smyth
 #	27 April 2003.  Last revised 14 January 2015.
 
-	if(!requireNamespace("MASS",quietly=TRUE)) stop("MASS package required but is not available")
-	if(!requireNamespace("splines",quietly=TRUE)) stop("splines package required but is not available")
+	if(!requireNamespace("MASS",quietly=TRUE)) stop("MASS package required but is not installed (or can't be loaded)")
+	if(!requireNamespace("splines",quietly=TRUE)) stop("splines package required but is not installed (or can't be loaded)")
 	if(is.null(layout)) {
 		ngrids <- 1
 		nspots <- length(M)
@@ -257,7 +257,7 @@ normalizeForPrintorder.rg <- function(R,G,printorder,method="loess",separate.cha
 	if(method=="plate") {
 		# Correct for plate pack (usually four 384-well plates)
 		plate <- 1 + (printorder-0.5) %/% plate.size
-		if(!requireNamespace("MASS",quietly=TRUE)) stop("MASS package required but is not available")
+		if(!requireNamespace("MASS",quietly=TRUE)) stop("MASS package required but is not installed (or can't be loaded)")
 		hubermu <- function(...) MASS::huber(...)$mu
 		if(separate.channels) {
 			plate.mR <- tapply(Rf,plate,hubermu)
diff --git a/R/plotMDS.R b/R/plotMDS.R
index be94e7d..e99c2a7 100644
--- a/R/plotMDS.R
+++ b/R/plotMDS.R
@@ -55,13 +55,13 @@ plotMDS.MDS <- function(x,labels=NULL,pch=NULL,cex=1,dim.plot=NULL,xlab=NULL,yla
 		text(x$x, x$y, labels = labels, cex = cex, ...)
 	}
 
-	return(invisible(x))
+	invisible(x)
 }
 
-plotMDS.default <- function(x,top=500,labels=NULL,pch=NULL,cex=1,dim.plot=c(1,2),ndim=max(dim.plot),gene.selection="pairwise",xlab=NULL,ylab=NULL,...)
+plotMDS.default <- function(x,top=500,labels=NULL,pch=NULL,cex=1,dim.plot=c(1,2),ndim=max(dim.plot),gene.selection="pairwise",xlab=NULL,ylab=NULL,plot=TRUE,...)
 #	Multi-dimensional scaling with top-distance
 #	Di Wu and Gordon Smyth
-#	19 March 2009.  Last modified 14 Jan 2015
+#	19 March 2009.  Last modified 6 Oct 2016
 {
 #	Check x
 	x <- as.matrix(x)
@@ -133,5 +133,8 @@ plotMDS.default <- function(x,top=500,labels=NULL,pch=NULL,cex=1,dim.plot=c(1,2)
 		mds$y <- a1[,dim.plot[2]]
 	mds$top <- top
 	mds$axislabel <- axislabel
-	plotMDS(mds,labels=labels,pch=pch,cex=cex,xlab=xlab,ylab=ylab,...)
+	if(plot)
+		plotMDS(mds,labels=labels,pch=pch,cex=cex,xlab=xlab,ylab=ylab,...)
+	else
+		mds
 }
diff --git a/R/plotrldf.R b/R/plotrldf.R
index c8fee14..9e1d63f 100644
--- a/R/plotrldf.R
+++ b/R/plotrldf.R
@@ -1,9 +1,9 @@
 #  Plot regularized linear discriminant functions
 
-plotRLDF <- function(y,design=NULL,z=NULL,labels.y=NULL,labels.z=NULL,col.y="black",col.z="black",show.dimensions=c(1,2),ndim=max(show.dimensions),nprobes=100,plot=TRUE,var.prior=NULL,df.prior=NULL,trend=FALSE,robust=FALSE,...)
+plotRLDF <- function(y,design=NULL,z=NULL,nprobes=100,plot=TRUE,labels.y=NULL,labels.z=NULL,pch.y=NULL,pch.z=NULL,col.y="black",col.z="black",show.dimensions=c(1,2),ndim=max(show.dimensions),var.prior=NULL,df.prior=NULL,trend=FALSE,robust=FALSE,...)
 #	Regularized linear discriminant function
 #	Di Wu and Gordon Smyth
-#	29 June 2009.  Last revised 26 Jan 2016.
+#	29 June 2009.  Last revised 4 Sep 2016.
 {
 #	Check y
 	y <- as.matrix(y)
@@ -11,9 +11,9 @@ plotRLDF <- function(y,design=NULL,z=NULL,labels.y=NULL,labels.z=NULL,col.y="bla
 	n <- ncol(y)
 
 #	Check labels.y
-	if(is.null(labels.y))
+	if(is.null(labels.y)) {
 		labels.y <- colnames(y)
-	else {
+	} else {
 		if(length(labels.y) != n) stop("length(labels.y) doesn't agree with ncol(y)")
 		labels.y <- as.character(labels.y)
 	}
@@ -138,8 +138,18 @@ plotRLDF <- function(y,design=NULL,z=NULL,labels.y=NULL,labels.z=NULL,col.y="bla
 #	Make plot
 	if(plot) {
 		plot(c(d1.y,d1.z),c(d2.y,d2.z),type="n", xlab=paste("Discriminant Function",d1), ylab=paste("Discriminant Function",d2))
-		text(d1.y,d2.y,labels=labels.y,col=col.y,...)
-		if(!is.null(z)) text(d1.z,d2.z,labels=labels.z,col=col.z,...)
+		if(is.null(pch.y)) {
+			text(d1.y,d2.y,labels=labels.y,col=col.y,...)
+		} else {
+			points(d1.y,d2.y,pch=pch.y,col=col.y,...)
+		}
+		if(!is.null(z)) {
+			if(is.null(pch.z)) {
+				text(d1.z,d2.z,labels=labels.z,col=col.z,...)
+			} else {
+				points(d1.z,d2.z,pch=pch.z,col=col.z,...)
+			}
+		}
 	}
 
 #	Output
diff --git a/R/voom.R b/R/voom.R
index eaa7cc0..55da865 100644
--- a/R/voom.R
+++ b/R/voom.R
@@ -2,7 +2,7 @@ voom <- function(counts,design=NULL,lib.size=NULL,normalize.method="none",span=0
 #	Linear modelling of count data with mean-variance modelling at the observational level.
 #	Creates an EList object for entry to lmFit() etc in the limma pipeline.
 #	Gordon Smyth and Charity Law
-#	Created 22 June 2011.  Last modified 27 Jan 2016.
+#	Created 22 June 2011.  Last modified 9 Sep 2016.
 {
 	out <- list()
 
@@ -24,6 +24,9 @@ voom <- function(counts,design=NULL,lib.size=NULL,normalize.method="none",span=0
 		}
 	}
 
+	n <- nrow(counts)
+	if(n < 2L) stop("Need at least two genes to fit a mean-variance trend")
+
 #	Check design
 	if(is.null(design)) {
 		design <- matrix(1,ncol(counts),1)
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index 2f162b9..144507b 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -3,6 +3,115 @@
 \encoding{UTF-8}
 
 
+\section{Version 3.30.0}{\itemize{
+
+\item
+  New function cumOverlap() to analyse the overlap between two
+  ordered lists.
+
+\item
+  New function detectionPValues() to compute detection p-values from
+  negative control probes.
+
+\item
+  New function fitmixture() to estimate genewise fold changes and
+  expression values from mixture experiments.  Previously this
+  funtion was only available as part of the illumina package available
+  from http://bioinf.wehi.edu.au/illumina
+
+\item
+  New function logcosh() to compute log(cosh(x)) accurately without
+  floating underflow or overflow.
+
+\item
+  The default settings for the 'inter.gene.cor' and 'use.neg.cor'
+  arguments of camera() have been changed.  camera() now uses by
+  default a preset value for the inter-gene correlation. This has the
+  effect that it tends to rank highly co-regulated,
+  biologically-interpretable more highly than before.
+
+\item
+  New flexibility for the roast() and mroast() functions, similar to
+  that previously implemented for fry().  The index vector for each
+  gene set can now be a data.frame, allowing each gene set to have
+  its own set of gene weights.  The indices can now optionally be a
+  vector of gene names instead of a vector of indices.  roast() and
+  mroast() now support the robust empirical Bayes option of
+  squeezeVar().  roast() and mroast() can now accept, via '...', any
+  argument that would be normally passed to lmFit() or eBayes().
+
+\item
+  Slight change to the standardize="posterior.sd" method for fry().
+
+\item
+  goana(), alias2Symbol() and alias2SymbolTable() now work for any species for which an Entrez Gene based
+  organism package exists.
+
+\item
+  The 'species' argument of kegga() can now accept any Bioconductor
+  species abbreviation.
+
+\item
+  topGO() now breaks ties by the number of genes in the GO Term and
+  by the name of the Term if the p-values are equal. (This is the
+  same behavior as topKEGG.)
+
+\item
+  New argument 'plot' for plotMDS(), to optionally allow an MDS object
+  to be returned without making a plot.
+
+\item
+  New arguments 'annotation' and 'verbose' for read.idat().
+  The first of these to allows users to read
+  any required columns from the manifest file.
+
+\item
+  New arguments 'pch.y' and 'pch.z' for plotRLDF.
+
+\item
+  Unnecessary argument 'design' removed from fitted.MArrayLM.
+
+\item
+  normalizeBetweenArrays() now checks whether the input 'object' is a
+  data.frame, and converts to a matrix if possible.
+
+\item
+  duplicateCorrelation() now expands weights using expandAsWeights(),
+  making it consistent with lmFit().
+
+\item
+  The lowess line drawn by plotSA() is now more robust with respect
+  to NA variances.
+
+\item
+  More informative error message from voom() when there is only one
+  row of data.
+
+\item
+  More informative error message from getEAWP() and lmFit() when
+  'object' is a non-normalized data object.
+
+\item
+  Update Phipson et al (2016) reference for robust empirical Bayes.
+
+\item
+  Bug fix to fitFDistRobustly(), which in some circumstances was
+  trying to save extra (undocumented) results that had not been
+  computed.
+
+\item
+  Bug fixes for fry() with robust=TRUE or when 'index' has NULL names.
+
+\item
+  Bug fix to propTrueNull() when method="hist" and all the p-values
+  are less than 1/nbins.
+
+\item
+  Bug fix to alias2Symbol() with expand.symbol=TRUE.
+
+}}
+
+
 \section{Version 3.28.0}{\itemize{
 
 \item
@@ -60,7 +169,7 @@
   a vector of genewise values.
 
 \item
-  new argument 'save.plot' for voom().
+  New argument 'save.plot' for voom().
 
 \item
   diffSplice() now returns genewise residual variances.
diff --git a/inst/doc/changelog.txt b/inst/doc/changelog.txt
index 78a36b8..526107d 100755
--- a/inst/doc/changelog.txt
+++ b/inst/doc/changelog.txt
@@ -1,27 +1,69 @@
-24 Jul 2016: limma 3.28.17
+17 Oct 2016: limma 3.29.25
+
+- Update help page of id2indices() to use MSigDB version 5.2.
+
+- Revise help page for voom().
+
+16 Oct 2016: limma 3.29.24
+
+- Update section on RNA-seq in User's Guide.
+
+16 Oct 2016: limma 3.29.23
+
+- New 'plot' argument for plotMDS(), to optionally return an MDS
+  object without making a plot.
+
+14 Oct 2016: limma 3.29.22
+
+- NEWS.Rd updated for Bioconductor 3.4 release.
+
+- More informative error message from voom() when there is only one
+  row of data.
+
+ 6 Sep 2016: limma 3.29.21
+
+- New arguments 'pch.y' and 'pch.z' for plotRLDF.
+
+22 Aug 2016: limma 3.29.20
+
+- Bug fix to fitFDistRobustly(), which in some circumstances was
+  trying to save extra (undocumented) results that had not been
+  computed.
+
+17 Aug 2016: limma 3.29.19
+
+- duplicateCorrelation() now expands weights using expandAsWeights(),
+  making it consistent with lmFit().
+
+16 Aug 2016: limma 3.29.18
+
+- New function cumOverlap() to analyse the overlap between two
+  ordered lists.
+
+24 Jul 2016: limma 3.29.17
 
 - Update Phipson et al (2016) reference for robust empirical Bayes.
 
-21 Jul 2016: limma 3.28.16
+21 Jul 2016: limma 3.29.16
 
 - Added more explanation about the 'batch2' and 'covariates'
   arguments to the help page for removeBatchEffect().
 
-18 Jul 2016: limma 3.28.15
+18 Jul 2016: limma 3.29.15
 
 - More informative error message from getEAWP() and lmFit() when
   'object' is a non-normalized data object.
 
-28 Jun 2016: limma 3.28.14
+28 Jun 2016: limma 3.29.14
 
 - Bug fix for fry() when 'index' has NULL names.
 
-26 Jun 2016: limma 3.28.13
+26 Jun 2016: limma 3.29.13
 
 - The lowess line drawn by plotSA() is now more robust with respect
   to NA variances.
 
-23 Jun 2016: limma 3.28.12
+23 Jun 2016: limma 3.29.12
 
 - goana() now works for any species for which an Entrez Gene based
   organism package exists.
@@ -33,33 +75,33 @@
   by the name of the Term if the p-values are equal. (This is the
   same behavior as topKEGG.)
 
-22 Jun 2016: limma 3.28.11
+22 Jun 2016: limma 3.29.11
 
 - alias2Symbol() and alias2SymbolTable() now work for any species for
   which an Entrez Gene based organism package exists.
 
-15 Jun 2016: limma 3.28.10
+15 Jun 2016: limma 3.29.10
 
 - new argument 'annotation' for read.idat(), to allow users to read
   any required columns from the manifest file.
 
-15 Jun 2016: limma 3.28.9
+14 Jun 2016: limma 3.29.9
 
 - normalizeBetweenArrays() now checks whether the input 'object' is a
   data.frame, and converts to a matrix if possible.
 
-13 Jun 2016: limma 3.28.8
+13 Jun 2016: limma 3.29.8
 
 - new function detectionPValues() to compute detection p-values from
   negative control probes.
 
-12 Jun 2016: limma 3.28.7
+12 Jun 2016: limma 3.29.7
 
 - new argument 'verbose' for read.idat().
 
 - Example code added to duplicateCorrelation.Rd.
 
- 8 Jun 2016: limma 3.28.6
+ 4 Jun 2016: limma 3.29.6
 
 - The default settings for the 'inter.gene.cor' and 'use.neg.cor'
   arguments of camera() have been changed.  camera() now uses by
@@ -67,7 +109,7 @@
   effect that it tends to rank highly co-regulated,
   biologically-interpretable more highly than before.
 
-22 May 2016: limma 3.28.5
+22 May 2016: limma 3.29.5
 
 - New function fitmixture() to estimate genewise fold changes and
   expression values from mixture experiments.  Previously this
@@ -77,11 +119,11 @@
 - New function logcosh() to compute log(cosh(x)) accurately without
   floating underflow or overflow.
 
-11 May 2016: limma 3.28.4
+11 May 2016: limma 3.29.4
 
 - Slight change to fry() standardize="posterior.sd" method.
 
-11 May 2016: limma 3.28.3
+10 May 2016: limma 3.29.3
 
 - New flexibility for the roast() and mroast() functions, similar to
   that previously implemented for fry().  The index vector for each
@@ -95,11 +137,11 @@
 - bug fix to propTrueNull() when method="hist" and all the p-values
   are less than 1/nbins.
 
- 5 May 2016: limma 3.28.2
+ 5 May 2016: limma 3.29.2
 
 - minor speed improvements for squeezeVar() and fry().
 
- 4 May 2016: limma 3.28.1
+ 4 May 2016: limma 3.29.1
 
 - unnecessary argument 'design' removed from fitted.MArrayLM.
 
@@ -107,6 +149,7 @@
 
 - bug fix to alias2Symbol() with expand.symbol=TRUE.
 
+ 4 May 2016: limma 3.29.0 (Bioconductor 3.4 Developmental Branch)
  4 May 2016: limma 3.28.0 (Bioconductor 3.3 Release Branch)
 
 29 April 2016: limma 3.27.20
diff --git a/inst/doc/intro.pdf b/inst/doc/intro.pdf
index ab5ca24..5d726c6 100644
Binary files a/inst/doc/intro.pdf and b/inst/doc/intro.pdf differ
diff --git a/man/01Introduction.Rd b/man/01Introduction.Rd
index 0a6823e..7b4a213 100644
--- a/man/01Introduction.Rd
+++ b/man/01Introduction.Rd
@@ -34,6 +34,11 @@ The function \code{\link{changeLog}} displays the record of changes to the packa
 \author{Gordon Smyth, with contributions from many colleagues}
 
 \references{
+Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
+Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
+\emph{Annals of Applied Statistics} 10, 946-963.
+\url{http://projecteuclid.org/euclid.aoas/1469199900}
+
 Ritchie, ME, Phipson, B, Wu, D, Hu, Y, Law, CW, Shi, W, and Smyth, GK (2015).
 limma powers differential expression analyses for RNA-sequencing and microarray studies.
 \emph{Nucleic Acids Research} 43, e47.
diff --git a/man/cumOverlap.Rd b/man/cumOverlap.Rd
new file mode 100644
index 0000000..980027f
--- /dev/null
+++ b/man/cumOverlap.Rd
@@ -0,0 +1,49 @@
+\name{cumOverlap}
+\alias{cumOverlap}
+\title{Cumulative Overlap Analysis of Ordered Lists}
+\description{
+Test whether the leading members of ordered lists significantly overlap.
+}
+\usage{
+cumOverlap(ol1, ol2)
+}
+
+\arguments{
+  \item{ol1}{vector containing first ordered list.}
+  \item{ol2}{vector containing second ordered list.}
+}
+
+\value{
+List containing the following components:
+\item{n.min}{integer, top table length leading to smallest adjusted p-value.}
+\item{p.min}{smallest adjusted p-value.}
+\item{n.overlap}{integer, number of overlapping IDs in first \code{n.min}.}
+\item{id.overlap}{vector giving the overlapping IDs in first \code{n.min}.}
+\item{p.value}{numeric, vector of p-values for each possible top table length.}
+\item{adj.p.value}{numeric, vector of Bonferroni adjusted p-values for each possible top table length.}
+}
+
+\details{
+The function compares the top \code{n} members of each list, for every possible \code{n}, and conducts an hypergeometric test for overlap.
+The function returns the value of \code{n} giving the smallest Bonferroni adjusted p-value.
+
+This method was described in Chapter 4 of Wu (2011).
+}
+
+\author{Gordon Smyth and Di Wu}
+
+\references{
+Wu, D (2011).
+Finding hidden relationships between gene expression profiles with application to breast cancer biology.
+PhD thesis, University of Melbourne.
+\url{http://hdl.handle.net/11343/36278}
+}
+
+\examples{
+ol1 <- letters[1:26]
+ol2 <- letters[sample(26)]
+coa <- cumOverlap(ol1, ol2)
+coa$p.min
+}
+
+\concept{gene set enrichment analysis}
diff --git a/man/detectionPValue.Rd b/man/detectionPValue.Rd
index aca5353..4280439 100644
--- a/man/detectionPValue.Rd
+++ b/man/detectionPValue.Rd
@@ -24,6 +24,8 @@ Particularly useful for Illumina BeadChips.}
 The rows of \code{x} for which \code{status == negctrl} are assumed to correspond to negative control probes.
 
 For each column of \code{x}, the detection p-values are defined as \code{(N.eq/2 + N.gt) / N.neg}, where \code{N.gt} is the number of negative controls with expression greater than the observed value, \code{N.eq} is the number of negative controls with expression equal to the observed value, and \code{N.neg} is the total number of negative controls.
+
+When used on Illumina BeadChip data, this function produces essentially the same detection p-values as returned by Illumina's GenomeStudio software.
 }
 
 \value{numeric matrix of same dimensions as \code{x} containing detection p-values.}
diff --git a/man/ebayes.Rd b/man/ebayes.Rd
index a67d15f..1998871 100755
--- a/man/ebayes.Rd
+++ b/man/ebayes.Rd
@@ -96,7 +96,7 @@ Loennstedt, I., and Speed, T. P. (2002). Replicated microarray data. \emph{Stati
 Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
 Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
 \emph{Annals of Applied Statistics} 10, 946-963.
-\url{http://arxiv.org/abs/1602.08678}
+\url{http://projecteuclid.org/euclid.aoas/1469199900}
 
 Smyth, G. K. (2004). Linear models and empirical Bayes methods for assessing differential expression in microarray experiments.
 \emph{Statistical Applications in Genetics and Molecular Biology} 3, Article 3.
diff --git a/man/fitfdist.Rd b/man/fitfdist.Rd
index cdd2509..6b2c54f 100755
--- a/man/fitfdist.Rd
+++ b/man/fitfdist.Rd
@@ -24,7 +24,7 @@ The parameters are estimated using the method of moments, specifically from the
 
 \code{fitFDistRobustly} is similar to \code{fitFDist} except that it computes the moments of the Winsorized values of \code{x}, making it robust against left and right outliers.
 Larger values for \code{winsor.tail.p} produce more robustness but less efficiency.
-The robust method is described by Phipson (2013) and Phipson et al (2016).
+The robust method is described by Phipson et al (2016).
 
 As well as estimating the F-distribution for the bulk of the cases, i.e., with outliers discounted, \code{fitFDistRobustly} also returns an estimated F-distribution with reduced df2 that might be appropriate for each outlier case.
 }
@@ -53,15 +53,10 @@ Smyth, G. K. (2004). Linear models and empirical Bayes methods for
      No. 1, Article 3.
      \url{http://www.statsci.org/smyth/pubs/ebayes.pdf}
 
-Phipson, B. (2013).
-\emph{Empirical Bayes modelling of expression profiles and their associations}.
-PhD Thesis. University of Melbourne, Australia.
-\url{http://repository.unimelb.edu.au/10187/17614}
-
 Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
 Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
 \emph{Annals of Applied Statistics} 10, 946-963.
-\url{http://arxiv.org/abs/1602.08678}
+\url{http://projecteuclid.org/euclid.aoas/1469199900}
 }
 
 \seealso{
diff --git a/man/goana.Rd b/man/goana.Rd
index 513f754..b4f500e 100644
--- a/man/goana.Rd
+++ b/man/goana.Rd
@@ -26,7 +26,7 @@ getKEGGPathwayNames(species.KEGG = NULL, remove.qualifier = FALSE)
 }
 
 \arguments{
-  \item{de}{a vector of Entrez Gene IDs, or a list of such vectors, or an \code{MArrayLM} fit object.}
+  \item{de}{a character vector of Entrez Gene IDs, or a list of such vectors, or an \code{MArrayLM} fit object.}
   \item{coef}{column number or column name specifying for which coefficient or contrast differential expression should be assessed.}
   \item{geneid}{Entrez Gene identifiers. Either a vector of length \code{nrow(de)} or the name of the column of \code{de$genes} containing the Entrez Gene IDs.}
   \item{FDR}{false discovery rate cutoff for differentially expressed genes. Numeric value between 0 and 1.}
@@ -54,25 +54,28 @@ getKEGGPathwayNames(species.KEGG = NULL, remove.qualifier = FALSE)
 }
 
 \details{
-These functions performs a over-representation analysis for Gene Ontology terms or KEGG pathways in a list of Entrez Gene IDs.
-The default method accepts the gene list as a vector of gene IDs,
-while the \code{MArrayLM} method extracts the gene lists automatically from a linear model fit object.
+These functions perform over-representation analyses for Gene Ontology terms or KEGG pathways in one or more vectors of Entrez Gene IDs.
+The default method accepts a gene set as a vector of gene IDs or multiple gene sets as a list of vectors.
+An over-represention analysis is then done for each set.
+The \code{MArrayLM} method extracts the gene sets automatically from a linear model fit object.
 
 \code{goana} uses annotation from the appropriate Bioconductor organism package.
-The \code{species} can be any character string XX for which an organism package org.XX.eg.db exists and is installed.
+The \code{species} can be any character string XX for which an organism package org.XX.eg.db is installed.
+Examples are \code{"Hs"} for human for {"Mm"} for mouse.
 See \code{\link{alias2Symbol}} for other possible values for \code{species}.
 
 \code{kegga} reads KEGG pathway annotation from the KEGG website.
-Note that the species name can be provided in either Bioconductor or KEGG format.
+For \code{kegga}, the species name can be provided in either Bioconductor or KEGG format.
+Examples of KEGG format are \code{"hsa"} for human or \code{"mmu"} for mouse.
 \code{kegga} can be used for any species supported by KEGG, of which there are more than 14,000 possibilities.
 By default, \code{kegga} obtains the KEGG annotation for the specified species from the \url{http://rest.kegg.jp} website.
 Alternatively one can supply the required pathway annotation to \code{kegga} in the form of two data.frames.
 If this is done, then an internet connection is not required.
 
-The ability to supply data.frame annotation to \code{kegga} means that \code{kegga} can in principle be used to analyze any user-supplied gene sets.
+The ability to supply data.frame annotation to \code{kegga} means that \code{kegga} can in principle be used in conjunction with any user-supplied set of annotation terms.
 
 The default \code{goana} and \code{kegga} methods accept a vector \code{prior.prob} giving the prior probability that each gene in the universe appears in a gene set.
-This vector can be used to correct for unwanted trends in the differential expression analysis associated with gene length, gene abundance or any other covariate.
+This vector can be used to correct for unwanted trends in the differential expression analysis associated with gene length, gene abundance or any other covariate (Young et al, 2010).
 The \code{MArrayLM} object computes the \code{prior.prob} vector automatically when \code{trend} is non-\code{NULL}.
 
 If \code{prior.prob=NULL}, the function computes one-sided hypergeometric tests equivalent to Fisher's exact test.
@@ -150,22 +153,25 @@ fit <- lmFit(y, design)
 fit <- eBayes(fit)
 
 # Standard GO analysis
+
 go.fisher <- goana(fit, species="Hs")
 topGO(go.fisher, sort = "up")
 topGO(go.fisher, sort = "down")
 
 # GO analysis adjusting for gene abundance
+
 go.abund <- goana(fit, geneid = "GeneID", trend = TRUE)
 topGO(go.abund, sort = "up")
 topGO(go.abund, sort = "down")
 
 # GO analysis adjusting for gene length bias
 # (assuming that y$genes$Length contains gene lengths)
+
 go.len <- goana(fit, geneid = "GeneID", trend = "Length")
 topGO(go.len, sort = "up")
 topGO(go.len, sort = "down")
 
-## Default usage with a gene list:
+## Default usage with a list of gene sets:
 
 go.de <- goana(list(DE1 = EG.DE1, DE2 = EG.DE2, DE3 = EG.DE3))
 topGO(go.de, sort = "DE1")
@@ -175,6 +181,7 @@ topGO(go.de, ontology = "CC", sort = "DE3")
 topGO(go.de, ontology = "MF", sort = "DE3")
 
 ## Standard KEGG analysis
+
 k <- kegga(fit, species="Hs")
 k <- kegga(fit, species.KEGG="hsa") # equivalent to previous
 }
diff --git a/man/ids2indices.Rd b/man/ids2indices.Rd
index 5c1fd01..36bcd4f 100644
--- a/man/ids2indices.Rd
+++ b/man/ids2indices.Rd
@@ -31,10 +31,10 @@ There is a topic page on \link{10.GeneSetTests}.
 
 \dontrun{
 
-download.file("http://bioinf.wehi.edu.au/software/MSigDB/human_c2_v5p1.rdata", 
-	      "human_c2_v5p1.rdata", mode = "wb")
+download.file("http://bioinf.wehi.edu.au/software/MSigDB/human_c2_v5p2.rdata", 
+	      "human_c2_v5p2.rdata", mode = "wb")
 
-load("human_c2_v5p1.rdata")
+load("human_c2_v5p2.rdata")
 c2.indices <- ids2indices(Hs.c2, y$genes$GeneID)
 camera(y, c2.indices, design)
 
diff --git a/man/plotMDS.Rd b/man/plotMDS.Rd
index 5038e11..37d9ed8 100644
--- a/man/plotMDS.Rd
+++ b/man/plotMDS.Rd
@@ -13,7 +13,7 @@ Plot samples on a two-dimensional scatterplot so that distances on the plot appr
 \usage{
 \method{plotMDS}{default}(x, top = 500, labels = NULL, pch = NULL, cex = 1,
      dim.plot = c(1,2), ndim = max(dim.plot), gene.selection = "pairwise",
-     xlab = NULL, ylab = NULL, \dots)
+     xlab = NULL, ylab = NULL, plot = TRUE, \dots)
 \method{plotMDS}{MDS}(x, labels = NULL, pch = NULL, cex = 1, dim.plot = NULL,
      xlab = NULL, ylab = NULL, \dots)
 }
@@ -29,6 +29,7 @@ Plot samples on a two-dimensional scatterplot so that distances on the plot appr
   \item{gene.selection}{character, \code{"pairwise"} to choose the top genes separately for each pairwise comparison between the samples or \code{"common"} to select the same genes for all comparisons.}
   \item{xlab}{title for the x-axis.}
   \item{ylab}{title for the y-axis.}
+  \item{plot}{logical. If \code{TRUE} then a plot is produced. If not, an \code{"MDS"} object is returned.}
   \item{\dots}{any other arguments are passed to \code{plot}, and also to \code{text} (if \code{pch} is \code{NULL}).}
 }
 
@@ -42,13 +43,16 @@ If \code{gene.selection} is \code{"common"}, then the top genes are those with t
 If \code{gene.selection} is \code{"pairwise"}, then a different set of top genes is selected for each pair of samples.
 The pairwise feature selection may be appropriate for microarray data when different molecular pathways are relevant for distinguishing different pairs of samples.
 
+If \code{pch=NULL}, then each sample is represented by a text label, defaulting to the column names of \code{x}.
+If \code{pch} is not \code{NULL}, then plotting symbols are used.
+
 See \code{\link[graphics]{text}} for possible values for \code{col} and \code{cex}.
 }
 
 \value{
-A plot is created on the current graphics device.
+If \code{plot=TRUE}, a plot is created on the current graphics device.
 
-An object of class \code{"MDS"} is invisibly returned.
+An object of class \code{"MDS"} is also returned (invisibly if \code{plot=TRUE}).
 This is a list containing the following components:
 \item{distance.matrix}{numeric matrix of pairwise distances between columns of \code{x}}
 \item{cmdscale.out}{output from the function \code{cmdscale} given the distance matrix}
diff --git a/man/plotRLDF.Rd b/man/plotRLDF.Rd
index e5b095f..d6a9919 100644
--- a/man/plotRLDF.Rd
+++ b/man/plotRLDF.Rd
@@ -5,24 +5,27 @@
 Plot regularized linear discriminant functions for classifying samples based on expression data.
 }
 \usage{
-plotRLDF(y, design=NULL, z=NULL, labels.y=NULL, labels.z=NULL,
-         col.y="black", col.z="black", show.dimensions=c(1,2),
-         ndim=max(show.dimensions), nprobes=100, plot=TRUE,
-         var.prior=NULL, df.prior=NULL, trend=FALSE, robust=FALSE, \dots)
+plotRLDF(y, design = NULL, z = NULL, nprobes = 100, plot = TRUE,
+         labels.y = NULL, labels.z = NULL, pch.y = NULL, pch.z = NULL,
+         col.y = "black", col.z = "black",
+         show.dimensions = c(1,2), ndim = max(show.dimensions),
+         var.prior = NULL, df.prior = NULL, trend = FALSE, robust = FALSE, \dots)
 }
 \arguments{
  \item{y}{the training dataset. Can be any data object which can be coerced to a matrix, such as \code{ExpressionSet} or \code{EList}.}
  \item{design}{design matrix defining the training groups to be distinguished. The first column is assumed to represent the intercept.
  Defaults to \code{model.matrix(~factor(labels.y))}.}
  \item{z}{the dataset to be classified.  Can be any data object which can be coerced to a matrix, such as \code{ExpressionSet} or \code{EList}.  Rows must correspond to rows of \code{y}.}
+ \item{nprobes}{number of probes to be used for the calculations. The probes will be selected by moderated F statistic.}
+ \item{plot}{logical, should a plot be created?}
  \item{labels.y}{character vector of sample names or labels in \code{y}. Defaults to \code{colnames(y)} or failing that to \code{1:n}.}
  \item{labels.z}{character vector of sample names or labels in \code{z}. Defaults to \code{colnames(z)} or failing that to \code{letters[1:n]}.}
+ \item{pch.y}{plotting symbol or symbols for \code{y}. See \code{\link{points}} for possible values. Takes precedence over \code{labels.y} if both are specified.}
+ \item{pch.z}{plotting symbol or symbols for \code{y}. See \code{\link{points}} for possible values. Takes precedence over \code{labels.z} if both are specified.}
  \item{col.y}{colors for the plotting \code{labels.y}.}
  \item{col.z}{colors for the plotting \code{labels.z}.}
  \item{show.dimensions}{integer vector of length two indicating which two discriminant functions to plot. Functions are in decreasing order of discriminatory power.}
  \item{ndim}{number of discriminant functions to compute}
- \item{nprobes}{number of probes to be used for the calculations. The probes will be selected by moderated F statistic.}
- \item{plot}{logical, should a plot be created?}
  \item{var.prior}{prior variances, for regularizing the within-group covariance matrix. By default is estimated by \code{squeezeVar}.}
  \item{df.prior}{prior degrees of freedom for regularizing the within-group covariance matrix. By default is estimated by \code{squeezeVar}.}
  \item{trend}{logical, should a trend be estimated for \code{var.prior}?  See \code{eBayes} for details.  Only used if \code{var.prior} or \code{df.prior} are \code{NULL}.}
@@ -42,7 +45,9 @@ The \code{ndim} argument allows all required LDFs to be computed even though onl
 }
 
 \note{
-Default values for \code{df.prior} and \code{var.prior} were changed in limma 3.27.10.
+The default values for \code{df.prior} and \code{var.prior} were changed in limma 3.27.10.
+Previously these were preset values.
+Now the default is to estimate them using \code{squeezeVar}.
 }
 
 \value{
diff --git a/man/squeezeVar.Rd b/man/squeezeVar.Rd
index 4af6f95..dfb6d0f 100755
--- a/man/squeezeVar.Rd
+++ b/man/squeezeVar.Rd
@@ -56,7 +56,7 @@ A list with components
 Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
 Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
 \emph{Annals of Applied Statistics} 10, 946-963.
-\url{http://arxiv.org/abs/1602.08678}
+\url{http://projecteuclid.org/euclid.aoas/1469199900}
 
 Sartor MA, Tomlinson CR, Wesselkamper SC, Sivaganesan S, Leikauf GD, Medvedovic M (2006).
 Intensity-based hierarchical Bayes method improves testing for differentially expressed genes in microarray experiments.
diff --git a/man/voom.Rd b/man/voom.Rd
index 17e3883..0326ac9 100644
--- a/man/voom.Rd
+++ b/man/voom.Rd
@@ -2,7 +2,7 @@
 \alias{voom}
 \title{Transform RNA-Seq Data Ready for Linear Modelling}
 \description{
-Transform count data to log2-counts per million (logCPM), estimate the mean-variance relationship and use this to compute appropriate observational-level weights.
+Transform count data to log2-counts per million (logCPM), estimate the mean-variance relationship and use this to compute appropriate observation-level weights.
 The data are then ready for linear modelling.
 }
 
@@ -11,7 +11,7 @@ voom(counts, design = NULL, lib.size = NULL, normalize.method = "none",
      span = 0.5, plot = FALSE, save.plot = FALSE, \dots)
 }
 \arguments{
- \item{counts}{a numeric \code{matrix} containing raw counts, or an \code{ExpressionSet} containing raw counts, or a \code{DGEList} object.}
+ \item{counts}{a numeric \code{matrix} containing raw counts, or an \code{ExpressionSet} containing raw counts, or a \code{DGEList} object. Counts must be non-negative and NAs are not permitted.}
  \item{design}{design matrix with rows corresponding to samples and columns to coefficients to be estimated.  Defaults to the unit vector meaning that samples are treated as replicates.}
  \item{lib.size}{numeric vector containing total library sizes for each sample.
  If \code{NULL} and \code{counts} is a \code{DGEList} then, the normalized library sizes are taken from \code{counts}.
@@ -34,10 +34,14 @@ Raw counts show increasing variance with increasing count size, while log-counts
 This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
 The weights are then used in the linear modelling process to adjust for heteroscedasticity. 
 
-In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using \code{\link{lowess}}. The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag. The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag. Tags with zero counts across all samples are not included in the lowess fit.
-Optional normalization is performed using \code{\link{normalizeBetweenArrays}}. 
-Using fitted values of log2 counts from a linear model fit by \code{\link{lmFit}}, variances from the mean-variance trend were interpolated for each observation. This was carried out by \code{\link{approxfun}}. Inverse variance weights can be used to correct for mean-variance trend in the count data.
+\code{voom} performs the following specific calculations.
+First, the counts are converted to logCPM values, adding 0.5 to all the counts to avoid taking the logarithm of zero.
+The matrix of logCPM values is then optionally normalized.
+The \code{lmFit} function is used to fit row-wise linear models.
+The \code{lowess} function is then used to fit a trend to the square-root-standard-deviations as a function of average logCPM.
+The trend line is then used to predict the variance of each logCPM value as a function of its fitted value, and the inverse variances become the estimated precision weights.
 }
+
 \value{
 An \code{\link[limma:EList]{EList}} object with the following components:
 \item{E}{numeric matrix of normalized expression values on the log2 scale}
diff --git a/man/voomWithQualityWeights.Rd b/man/voomWithQualityWeights.Rd
index 6174f52..4c07cc9 100644
--- a/man/voomWithQualityWeights.Rd
+++ b/man/voomWithQualityWeights.Rd
@@ -40,8 +40,10 @@ This function is intended to process RNA-Seq data prior to linear modelling in l
 It combines observational-level weights from \code{voom} with sample-specific weights estimated using the \code{arrayWeights} function.
 }
 \value{
-A numeric matrix of same dimension as \code{counts} containing consolidated voom and sample-specific weights.
-If \code{replace.weights=TRUE}, then an \code{\link[limma:EList]{EList}} object is returned with the \code{weights} component containing the consolidated weights.
+If \code{replace.weights=TRUE}, then an \code{\link[limma:EList]{EList}} object is returned similar to that from \code{\link{voom}}.
+The \code{sample.weights} component contains the vector of sample quality factors and the \code{weights} component contains the matrix of weights consolidating both the voom precision weights and the sample quality factors.
+
+If \code{replace.weights=FALSE} then just the matrix of weights is returned.
 }
 
 \author{Matthew Ritchie and Cynthia Liu}
@@ -54,8 +56,8 @@ Voom: precision weights unlock linear model analysis tools for RNA-seq read coun
 
 Liu, R., Holik, A. Z., Su, S., Jansz, N., Chen, K., Leong, H. S., Blewitt, M. E., Asselin-Labat, M.-L., Smyth, G. K., Ritchie, M. E. (2015).
 Why weight? Combining voom with estimates of sample quality improves power in RNA-seq analyses.
-\emph{Nucleic Acids Research} 43.
-(Accepted 17 April 2015)
+\emph{Nucleic Acids Research} 43, e97.
+\url{http://nar.oxfordjournals.org/content/43/15/e97}
 
 Ritchie, M. E., Diyagama, D., Neilson, van Laar, R., J., Dobrovic, A., Holloway, A., and Smyth, G. K. (2006).
 Empirical array quality weights in the analysis of microarray data.
diff --git a/tests/limma-Tests.Rout.save b/tests/limma-Tests.Rout.save
index 5596aa1..5c3b214 100755
--- a/tests/limma-Tests.Rout.save
+++ b/tests/limma-Tests.Rout.save
@@ -1,5 +1,5 @@
 
-R version 3.3.0 (2016-05-03) -- "Supposedly Educational"
+R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
 Copyright (C) 2016 The R Foundation for Statistical Computing
 Platform: x86_64-w64-mingw32/x64 (64-bit)
 
@@ -1346,40 +1346,40 @@ set1      5        Up 7.3344e-12
 > go <- goana(fit,FDR=0.8,geneid=EB)
 > topGO(go,n=10,truncate.term=30)
                                      Term Ont  N Up Down        P.Up
-GO:0032502          developmental process  BP 24  3    6 0.868520473
 GO:0006915              apoptotic process  BP  5  4    1 0.003446627
 GO:0012501          programmed cell death  BP  5  4    1 0.003446627
 GO:0055082 cellular chemical homeostas...  BP  2  0    2 1.000000000
 GO:0006897                    endocytosis  BP  3  3    0 0.005046382
 GO:0048856 anatomical structure develo...  BP 23  3    5 0.845083285
-GO:0044767 single-organism development...  BP 23  3    5 0.845083285
-GO:1902589 single-organism organelle o...  BP  7  1    3 0.762502428
+GO:0032502          developmental process  BP 23  3    5 0.845083285
 GO:0008219                     cell death  BP  6  4    1 0.009129968
-GO:0016265                          death  BP  6  4    1 0.009129968
-                 P.Down
-GO:0032502 0.0006606503
-GO:0006915 0.3096959098
-GO:0012501 0.3096959098
-GO:0055082 0.0042424242
-GO:0006897 1.0000000000
-GO:0048856 0.0066515474
-GO:0044767 0.0066515474
-GO:1902589 0.0066732856
-GO:0008219 0.3605604217
-GO:0016265 0.3605604217
+GO:0019725           cellular homeostasis  BP  3  0    2 1.000000000
+GO:0048232         male gamete generation  BP  3  0    2 1.000000000
+GO:0007283                spermatogenesis  BP  3  0    2 1.000000000
+                P.Down
+GO:0006915 0.309695910
+GO:0012501 0.309695910
+GO:0055082 0.004242424
+GO:0006897 1.000000000
+GO:0048856 0.006651547
+GO:0032502 0.006651547
+GO:0008219 0.360560422
+GO:0019725 0.012294372
+GO:0048232 0.012294372
+GO:0007283 0.012294372
 > topGO(go,n=10,truncate.term=30,sort="down")
-                                     Term Ont  N Up Down      P.Up       P.Down
-GO:0032502          developmental process  BP 24  3    6 0.8685205 0.0006606503
-GO:0055082 cellular chemical homeostas...  BP  2  0    2 1.0000000 0.0042424242
-GO:0048856 anatomical structure develo...  BP 23  3    5 0.8450833 0.0066515474
-GO:0044767 single-organism development...  BP 23  3    5 0.8450833 0.0066515474
-GO:1902589 single-organism organelle o...  BP  7  1    3 0.7625024 0.0066732856
-GO:0031225 anchored component of membr...  CC  3  0    2 1.0000000 0.0122943723
-GO:0019725           cellular homeostasis  BP  3  0    2 1.0000000 0.0122943723
-GO:0061008 hepaticobiliary system deve...  BP  3  0    2 1.0000000 0.0122943723
-GO:0001889              liver development  BP  3  0    2 1.0000000 0.0122943723
-GO:0048232         male gamete generation  BP  3  0    2 1.0000000 0.0122943723
+                                     Term Ont  N Up Down      P.Up      P.Down
+GO:0055082 cellular chemical homeostas...  BP  2  0    2 1.0000000 0.004242424
+GO:0048856 anatomical structure develo...  BP 23  3    5 0.8450833 0.006651547
+GO:0032502          developmental process  BP 23  3    5 0.8450833 0.006651547
+GO:0019725           cellular homeostasis  BP  3  0    2 1.0000000 0.012294372
+GO:0048232         male gamete generation  BP  3  0    2 1.0000000 0.012294372
+GO:0007283                spermatogenesis  BP  3  0    2 1.0000000 0.012294372
+GO:0009653 anatomical structure morpho...  BP 10  0    3 1.0000000 0.020760307
+GO:0031988       membrane-bounded vesicle  CC 19  1    4 0.9851064 0.023153004
+GO:0007276              gamete generation  BP  4  0    2 1.0000000 0.023749721
+GO:0032504 multicellular organism repr...  BP  4  0    2 1.0000000 0.023749721
 > 
 > proc.time()
    user  system elapsed 
-   2.77    0.23    3.04 
+   2.88    0.18    3.08 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-limma.git



More information about the debian-med-commit mailing list