[med-svn] [r-bioc-edger] 01/07: Imported Upstream version 3.16.1+dfsg

Fri Oct 28 08:36:57 UTC 2016

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-bioc-edger.

commit 3b301b7dd24af67cf4002fd72b497d0300375e5b
Author: Andreas Tille <tille at debian.org>
Date:   Fri Oct 28 09:49:26 2016 +0200

    Imported Upstream version 3.16.1+dfsg
---
 DESCRIPTION                    |   10 +-
 NAMESPACE                      |    3 +-
 R/DGEList.R                    |   15 +-
 R/addPriorCount.R              |   27 +
 R/adjustedProfileLik.R         |   68 +--
 R/aveLogCPM.R                  |   60 +--
 R/cpm.R                        |   25 +-
 R/diffSpliceDGE.R              |    5 +-
 R/estimateDisp.R               |   82 ++-
 R/expandAsMatrix.R             |   18 +-
 R/glmQLFTest.R                 |   61 +--
 R/glmTreat.R                   |  131 +++--
 R/glmfit.R                     |   15 +-
 R/locfitByCol.R                |    5 +-
 R/makeCompressedMatrix.R       |  164 ++++++
 R/mglmLevenberg.R              |   60 +--
 R/mglmOneGroup.R               |   36 +-
 R/mglmOneWay.R                 |   29 +-
 R/nbinomDeviance.R             |   50 +-
 R/plotMD.R                     |   13 +-
 R/predFC.R                     |   34 +-
 R/readDGE.R                    |   18 +-
 R/spliceVariants.R             |   10 +-
 build/vignette.rds             |  Bin 228 -> 229 bytes
 inst/CITATION                  |   37 +-
 inst/NEWS.Rd                   |   39 ++
 inst/doc/edgeR.pdf             |  Bin 48664 -> 45765 bytes
 man/addPriorCount.Rd           |   52 ++
 man/aveLogCPM.Rd               |    4 +-
 man/edgeR-package.Rd           |   25 +-
 man/estimateDisp.Rd            |    5 +-
 man/expandAsMatrix.Rd          |   21 +-
 man/glmQLFTest.Rd              |   21 +-
 man/glmTreat.Rd                |   20 +-
 man/goodTuring.Rd              |    6 +-
 man/makeCompressedMatrix.Rd    |  108 ++++
 man/plotMD.Rd                  |    3 +
 man/plotQLDisp.Rd              |   11 +-
 man/plotSmear.Rd               |   17 +-
 man/predFC.Rd                  |    7 +-
 man/processAmplicons.Rd        |   34 +-
 src/R_add_prior_count.cpp      |   64 +++
 src/R_add_repeat_matrices.cpp  |   41 ++
 src/R_ave_log_cpm.cpp          |   62 +++
 src/R_calculate_cpm.cpp        |   86 ++++
 src/R_check_counts.cpp         |  137 +++++
 src/R_check_poisson_bound.cpp  |   50 ++
 src/R_compute_apl.cpp          |  111 +++++
 src/R_compute_nbdev.cpp        |  113 +++--
 src/R_cr_adjust.cpp            |   43 --
 src/R_get_one_way_fitted.cpp   |   60 +++
 src/R_initialize_levenberg.cpp |  206 ++++++++
 src/R_levenberg.cpp            |  105 ++--
 src/R_one_group.cpp            |  136 +++--
 src/add_prior.cpp              |   56 +++
 src/add_prior.h                |   25 +
 src/adj_coxreid.cpp            |   62 ++-
 src/glm.h                      |   17 +-
 src/glm_levenberg.cpp          |  120 ++---
 src/glm_one_group.cpp          |   20 +-
 src/init.cpp                   |   24 +-
 src/matvec_check.cpp           |  178 +++++--
 src/matvec_check.h             |   32 +-
 src/nbdev.cpp                  |    2 +-
 src/utils.h                    |   42 +-
 tests/edgeR-Tests.Rout.save    | 1079 ++++++++++++++++++++--------------------
 66 files changed, 2836 insertions(+), 1384 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 245d4e8..30ac9d4 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,14 +1,14 @@
 Package: edgeR
-Version: 3.14.0
-Date: 2016-04-19
+Version: 3.16.1
+Date: 2016-10-25
 Title: Empirical Analysis of Digital Gene Expression Data in R
 Description: Differential expression analysis of RNA-seq expression profiles with biological replication. Implements a range of statistical methodology based on the negative binomial distributions, including empirical Bayes estimation, exact tests, generalized linear models and quasi-likelihood tests. As well as RNA-seq, it be applied to differential signal analysis of other types of genomic data that produce counts, including ChIP-seq, SAGE and CAGE.
 Author: Yunshun Chen <yuchen at wehi.edu.au>, Aaron Lun <alun at wehi.edu.au>, Davis McCarthy <dmccarthy at wehi.edu.au>, Xiaobei Zhou <xiaobei.zhou at uzh.ch>, Mark Robinson <mark.robinson at imls.uzh.ch>, Gordon Smyth <smyth at wehi.edu.au>
 Maintainer: Yunshun Chen <yuchen at wehi.edu.au>, Aaron Lun <alun at wehi.edu.au>, Mark Robinson <mark.robinson at imls.uzh.ch>, Davis McCarthy <dmccarthy at wehi.edu.au>, Gordon Smyth <smyth at wehi.edu.au>
 License: GPL (>=2)
 Depends: R (>= 2.15.0), limma
-Imports: graphics, stats, utils, methods
-Suggests: MASS, statmod, splines, locfit, KernSmooth
+Imports: graphics, stats, utils, methods, locfit
+Suggests: MASS, statmod, splines, KernSmooth
 URL: http://bioinf.wehi.edu.au/edgeR
 biocViews: GeneExpression, Transcription, AlternativeSplicing,
         Coverage, DifferentialExpression, DifferentialSplicing,
@@ -16,4 +16,4 @@ biocViews: GeneExpression, Transcription, AlternativeSplicing,
         TimeCourse, SAGE, Sequencing, ChIPSeq, RNASeq, BatchEffect,
         MultipleComparison, Normalization, QualityControl
 NeedsCompilation: yes
-Packaged: 2016-05-04 03:09:43 UTC; biocbuild
+Packaged: 2016-10-25 22:31:14 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
index 32babb5..b092605 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,6 +8,7 @@ exportClasses("DGEList","DGEExact","DGEGLM","DGELRT","TopTags")
 exportMethods("show")
 
 import(methods)
+import(locfit)
 importFrom("graphics", "abline", "axis", "curve", "grid", "legend",
            "lines", "mtext", "plot", "points", "smoothScatter")
 importFrom("stats", "approxfun", "as.dist", "chisq.test", "cmdscale",
@@ -22,7 +23,7 @@ if( tools:::.OStype() == "windows" ) importFrom("utils", "winMenuAddItem")
 
 importFrom("limma",camera,contrastAsCoef,decideTests,fry,goana,kegga,is.fullrank,
      lmFit,loessFit,mroast,nonEstimable,plotMD,plotMDS,plotWithHighlights,removeExt,
-     roast,romer,squeezeVar,subsetListOfArrays,zscoreGamma)
+     roast,romer,squeezeVar,subsetListOfArrays,zscoreGamma,zscoreT)
 importClassesFrom("limma","LargeDataObject","Roast","MDS")
 
 S3method(as.data.frame,TopTags)
diff --git a/R/DGEList.R b/R/DGEList.R
index 4f1717a..9bdcddb 100644
--- a/R/DGEList.R
+++ b/R/DGEList.R
@@ -1,6 +1,6 @@
 DGEList <- function(counts=matrix(0,0,0), lib.size=colSums(counts), norm.factors=rep(1,ncol(counts)), samples=NULL, group=NULL, genes=NULL, remove.zeros=FALSE) 
 #	Construct DGEList object from components, with some checking
-#	Last modified 3 Dec 2015
+#	Last modified 3 Oct 2016
 {
 #	Check counts
 	counts <- as.matrix(counts)
@@ -8,11 +8,12 @@ DGEList <- function(counts=matrix(0,0,0), lib.size=colSums(counts), norm.factors
 	ntags <- nrow(counts)
 	if(nlib>0L && is.null(colnames(counts))) colnames(counts) <- paste0("Sample",1L:nlib)
 	if(ntags>0L && is.null(rownames(counts))) rownames(counts) <- 1L:ntags
+	.isAllZero(counts) # don't really care about all-zeroes, but do want to protect against NA's, negative values.
 
 #	Check lib.size
 	if(is.null(lib.size)) lib.size <- colSums(counts)
-	if(nlib != length(lib.size)) stop("Length of 'lib.size' must equal number of columns in 'counts'")
-	if(any(lib.size==0L)) warning("Zero library size detected.")
+	if(nlib != length(lib.size)) stop("length of 'lib.size' must equal number of columns in 'counts'")
+	if(any(lib.size==0L)) warning("library size of zero detected")
 
 #	Check norm.factors
 	if(is.null(norm.factors)) norm.factors <- rep(1,ncol(counts))
@@ -69,3 +70,11 @@ DGEList <- function(counts=matrix(0,0,0), lib.size=colSums(counts), norm.factors
 	x
 }
 
+.isAllZero <- function(y) 
+# Function to check if all counts are zero in a memory-efficient manner.
+# Also checks and throws an error if NA or negative counts are present.
+{
+	is.zero <- .Call(.cR_check_counts, y)
+	if(is.character(is.zero)) stop(is.zero)
+	return(is.zero)
+}
diff --git a/R/addPriorCount.R b/R/addPriorCount.R
new file mode 100644
index 0000000..51293c0
--- /dev/null
+++ b/R/addPriorCount.R
@@ -0,0 +1,27 @@
+addPriorCount <- function(y, lib.size=NULL, offset=NULL, prior.count=1) 
+# Add library size-adjusted prior counts to values of 'y'.
+# Also add twice the adjusted prior to the library sizes, 
+# which are provided as log-transformed values in 'offset'.
+#
+# written by Aaron Lun
+# created 26 September 2016
+{
+#	Check y
+	if (!is.numeric(y)) stop('count matrix must be numeric')
+	y <- as.matrix(y)
+
+#	Check prior.count
+	prior.count <- .compressPrior(prior.count)
+
+#	Check lib.size and offset.
+#	If offsets are provided, they must have a similar average to log(lib.size)
+#	for the results to be meaningful as logCPM values
+	offset <- .compressOffsets(y, lib.size=lib.size, offset=offset)
+
+#   Adding the prior count.
+	out <- .Call(.cR_add_prior_count, y, offset, prior.count)
+	if (is.character(out)) stop(out)
+	names(out) <- c("y", "offset")
+	return(out)
+}
+
diff --git a/R/adjustedProfileLik.R b/R/adjustedProfileLik.R
index bc7266b..9344aca 100644
--- a/R/adjustedProfileLik.R
+++ b/R/adjustedProfileLik.R
@@ -6,55 +6,41 @@ adjustedProfileLik <- function(dispersion, y, design, offset, weights=NULL, adju
 # Yunshun Chen, Gordon Smyth, Aaron Lun
 # Created June 2010. Last modified 27 July 2015.
 {
-	if(any(dim(y)!=dim(offset))) offset <- expandAsMatrix(offset,dim(y))
-	ntags <- nrow(y)
-	nlibs <- ncol(y)
-	dispersion <- expandAsMatrix(dispersion,dim(y),byrow=FALSE)
-       
+#   Checking counts
+	if (!is.numeric(y)) stop("counts must be numeric")
+	y <- as.matrix(y)
+
+#   Checking offsets
+	if (!is.double(offset)) storage.mode(offset) <- "double"
+	offset <- makeCompressedMatrix(offset, byrow=TRUE)
+
+#   Checking dispersion
+	if (!is.double(dispersion)) storage.mode(dispersion) <- "double"
+	dispersion <- makeCompressedMatrix(dispersion, byrow=FALSE)
+
+#   Checking weights
+	if(is.null(weights)) weights <- 1
+	if (!is.double(weights)) storage.mode(weights) <- "double"
+	weights <- makeCompressedMatrix(weights, byrow=TRUE)
+	  
 #	Fit tagwise linear models. This is actually the most time-consuming
 #	operation that I can see for this function.
 	fit <- glmFit(y,design=design,dispersion=dispersion,offset=offset,prior.count=0,weights=weights,start=start)
-
-#	Compute log-likelihood.
 	mu <- fit$fitted
-	if(is.null(weights)) weights <- 1
-	if(all(dispersion == 0)){
-#		loglik <- rowSums(weights*dpois(y,lambda=mu,log = TRUE))
-		ll <- y*log(mu)-mu-lgamma(y+1)
-		ll[mu==0] <- 0
-		loglik <- rowSums(weights*ll)
-		
-	} else {
-#		loglik <- rowSums(weights*dnbinom(y,size=1/dispersion,mu=mu,log = TRUE))
-		r <- 1/dispersion
-		ll <- y*log(mu)-y*log(mu+r)+r*log(r)-r*log(mu+r)+lgamma(y+r)-lgamma(y+1)-lgamma(r)
-		ll[mu==0] <- 0
-		loglik <- rowSums(weights*ll)		
-	}
-	if (!adjust) {
-		return(loglik)
-	}
-		
-#	Calculating the Cox-Reid adjustment.
-	if(ncol(design)==1) {
-		D <- rowSums(weights*mu/(1+mu*dispersion))
-		cr <- 0.5*log(abs(D))
-	} else {
-		W <- weights*mu/(1+dispersion*mu)
 
-#	Checking type, otherwise the C++ code will complain.
-#	Note the use of a transposed matrix for easy row access in column-major format.
-		if (!is.double(W)) storage.mode(W)<-"double"
-		if (!is.double(design)) storage.mode(design)<-"double"
-		cr <- .Call(.cR_cr_adjust, t(W), design, nrow(design))
-		if (is.character(cr)) { stop(cr) }
-	}
+#   Check other inputs to C++ code
+	adjust <- as.logical(adjust)
+	if (!is.double(design)) storage.mode(design)<-"double"
+
+#   Compute adjusted log-likelihood
+	apl <- .Call(.cR_compute_apl, y, mu, dispersion, weights, adjust, design)
+	if (is.character(apl)) stop(apl)
 
 #	Deciding what to return.
-    if (get.coef) { 
-		return(list(apl=loglik-cr, beta=fit$coefficients))
+	if (get.coef) { 
+		return(list(apl=apl, beta=fit$coefficients))
 	} else {
-		return(loglik - cr)
+		return(apl)
 	}
 }
 
diff --git a/R/aveLogCPM.R b/R/aveLogCPM.R
index 0b3c601..daf00b5 100644
--- a/R/aveLogCPM.R
+++ b/R/aveLogCPM.R
@@ -39,54 +39,40 @@ aveLogCPM.default <- function(y,lib.size=NULL,offset=NULL,prior.count=2,dispersi
 #	Gordon Smyth
 #	Created 25 Aug 2012. Last modified 25 Sep 2014.
 {
-#	Check y
+#	Special case when all counts and library sizes are zero
 	y <- as.matrix(y)
-	if(any(is.na(y))) stop("NA counts not allowed")
-	if(any(y<0)) stop("counts must be non-negative")
-
-#	Check prior.count
-	neg.prior <- prior.count < 0
-	if(any(neg.prior)) prior.count[neg.prior] <- 0
+	if(.isAllZero(y)) {
+		if ((is.null(lib.size) || max(lib.size)==0) && (is.null(offset) || max(offset)==-Inf)) {
+			abundance <- rep(-log(nrow(y)),nrow(y))
+			return( (abundance+log(1e6)) / log(2) )
+		}
+	}
 
 #	Check dispersion
 	if(is.null(dispersion)) dispersion <- 0.05
-	isna <- is.na(dispersion)
+	isna <- is.na(dispersion) # ???
 	if(all(isna)) dispersion <- 0.05
 	if(any(isna)) dispersion[isna] <- mean(dispersion,na.rm=TRUE)
 
-#	Check lib.size and offset.
-#	If offset is provided, it takes precedence over lib.size.
-#	However it must have a similar average to log(lib.size)
-#	for the results to be meaningful as logCPM values
-	if(is.null(offset)) {
-		if(is.null(lib.size)) lib.size <- colSums(y)
-	} else {
-		lib.size <- exp(offset)
-	}
-	mean.lib.size <- mean(lib.size)
+	dispersion <- .compressDispersions(dispersion)
 
-#	Special case when all counts are zero
-	if(mean.lib.size==0) {
-		abundance <- rep(-log(nrow(y)),nrow(y))
-		return( (abundance+log(1e6)) / log(2) )
-	}
+#   Check weights
+	weights <- .compressWeights(weights)
 
-#	Ensuring lib.size has appropriate dimensions for prior.count
-	if(length(prior.count)>1L) {
-		if(nrow(y)!=length(prior.count)) stop("length of prior count vector should be equal to the number of rows")
-		lib.size <- expandAsMatrix(lib.size, dim(y)) 
-	}
+#   Check offsets
+	offset <- .compressOffsets(y, lib.size=lib.size, offset=offset)
 
-#	Scale prior counts to preserve fold changes
-	prior.count.scaled <- lib.size/mean.lib.size*prior.count
+#   Check prior counts
+	prior.count <- .compressPrior(prior.count)
 
-#	Add double prior counts to library sizes
-	offset <- log(lib.size+2*prior.count.scaled)
+#   Retrieve GLM fitting parameters
+	maxit <- formals(mglmOneGroup)$maxit
+	tol <- formals(mglmOneGroup)$tol
 
-#	Add prior counts to y
-	if (is.null(dim(prior.count.scaled))) prior.count.scaled <- expandAsMatrix(prior.count.scaled, dim(y))
-	y <- y+prior.count.scaled
+#   Calling the C++ code
+	ab <- .Call(.cR_ave_log_cpm, y, offset, prior.count, dispersion, weights, maxit, tol)
+	if (is.character(ab)) stop(ab)
 
-	abundance <- mglmOneGroup(y,dispersion=dispersion,offset=offset,weights=weights)
-	(abundance+log(1e6)) / log(2)
+	return(ab)
 }
+
diff --git a/R/cpm.R b/R/cpm.R
index 6a8f7d3..7f6d428 100644
--- a/R/cpm.R
+++ b/R/cpm.R
@@ -14,20 +14,29 @@ cpm.DGEList <- function(x, normalized.lib.sizes=TRUE, log=FALSE, prior.count=0.2
 cpm.default <- function(x, lib.size=NULL, log=FALSE, prior.count=0.25, ...)
 #	Counts per million for a matrix
 #	Davis McCarthy and Gordon Smyth.
-#	Created 20 June 2011. Last modified 11 March 2016
+#	Created 20 June 2011. Last modified 03 October 2016
 {
 	x <- as.matrix(x)
 	if (any(dim(x)==0L)) {
 		return(x)
 	}
+
 	if(is.null(lib.size)) lib.size <- colSums(x)
+	if(!is.double(lib.size)) storage.mode(lib.size) <- "double"
+	lib.size <- makeCompressedMatrix(lib.size, byrow=TRUE)
+	err <- .Call(.cR_check_positive, lib.size, "library sizes")
+	if (is.character(err)) stop(err)
+
+	# Calculating in C++ for max efficiency
 	if(log) {
-		prior.count.scaled <- lib.size/mean(lib.size)*prior.count
-		lib.size <- lib.size+2*prior.count.scaled
+		prior.count <- .compressPrior(prior.count)
+		out <- .Call(.cR_calculate_cpm_log, x, lib.size, prior.count)
+	} else {
+		out <- .Call(.cR_calculate_cpm_raw, x, lib.size)
 	}
-	lib.size <- 1e-6*lib.size
-	if(log)
-		log2(t( (t(x)+prior.count.scaled) / lib.size ))
-	else
-		t(t(x)/lib.size)
+	if (is.character(out)) stop(out)
+
+	# Cleaning up
+	dimnames(out) <- dimnames(x)
+	out
 }
diff --git a/R/diffSpliceDGE.R b/R/diffSpliceDGE.R
index 630da6d..8455bf5 100644
--- a/R/diffSpliceDGE.R
+++ b/R/diffSpliceDGE.R
@@ -2,7 +2,7 @@ diffSpliceDGE <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, genei
 {
 # Identify exons and genes with splice variants using negative binomial GLMs
 # Yunshun Chen and Gordon Smyth
-# Created 29 March 2014.  Last modified 25 September 2015. 
+# Created 29 March 2014.  Last modified 03 October 2016. 
 
 #	Check if glmfit is from glmFit() or glmQLFit()
 	isLRT <- is.null(glmfit$df.prior)
@@ -106,7 +106,8 @@ diffSpliceDGE <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, genei
 	gene.betabar <- fit.gene$coefficients[g, coef, drop=FALSE]
 
 #	New offset
-	offset.new <- glmfit$offset + gene.betabar %*% t(design[, coef, drop=FALSE])
+	offset.new <- .addCompressedMatrices(makeCompressedMatrix(glmfit$offset),  
+	        makeCompressedMatrix(gene.betabar %*% t(design[,coef,drop=FALSE])))
 	coefficients <- beta - gene.betabar
 
 #	Testing
diff --git a/R/estimateDisp.R b/R/estimateDisp.R
index 149018d..7b94170 100644
--- a/R/estimateDisp.R
+++ b/R/estimateDisp.R
@@ -12,6 +12,7 @@ estimateDisp.DGEList <- function(y, design=NULL, prior.df=NULL, trend.method="lo
 
 	d <- estimateDisp(y=y$counts, design=design, group=group, lib.size=lib.size, offset=getOffset(y), prior.df=prior.df, trend.method=trend.method, mixed.df=mixed.df, tagwise=tagwise, span=span, min.row.sum=min.row.sum, grid.length=grid.length, grid.range=grid.range, robust=robust, winsor.tail.p=winsor.tail.p, tol=tol, weights=y$weights, ...)
 
+	if(!is.null(design)) y$design <- design
 	y$common.dispersion <- d$common.dispersion
 	y$trended.dispersion <- d$trended.dispersion
 	if(tagwise) y$tagwise.dispersion <- d$tagwise.dispersion
@@ -26,7 +27,7 @@ estimateDisp.DGEList <- function(y, design=NULL, prior.df=NULL, trend.method="lo
 estimateDisp.default <- function(y, design=NULL, group=NULL, lib.size=NULL, offset=NULL, prior.df=NULL, trend.method="locfit", mixed.df=FALSE, tagwise=TRUE, span=NULL, min.row.sum=5, grid.length=21, grid.range=c(-10,10), robust=FALSE, winsor.tail.p=c(0.05,0.1), tol=1e-06, weights=NULL, ...)
 #  Use GLM approach if a design matrix is given, and classic approach otherwise.
 #  It calculates a matrix of likelihoods for each gene at a set of dispersion grid points, and then calls WLEB() to do the shrinkage.
-#  Yunshun Chen, Gordon Smyth. Created July 2012. Last modified 18 March 2016.
+#  Yunshun Chen, Aaron Lun, Gordon Smyth. Created July 2012. Last modified 03 Oct 2016.
 {
 #	Check y
 	y <- as.matrix(y)
@@ -47,11 +48,16 @@ estimateDisp.default <- function(y, design=NULL, group=NULL, lib.size=NULL, offs
 	if(length(lib.size)!=nlibs) stop("Incorrect length of lib.size.")
 	
 #	Check offset
-	if(is.null(offset)) offset <- log(lib.size)
-	offset <- expandAsMatrix(offset, dim(y))
+	offset <- .compressOffsets(y, lib.size=lib.size, offset=offset)
+
+#	Check weights
+	weights <- .compressWeights(weights)
 
 #	Check for genes with small counts
 	sel <- rowSums(y) >= min.row.sum
+	sely <- .subsetMatrixWithoutCopying(y, i=sel)
+	seloffset <- .subsetMatrixWithoutCopying(offset, i=sel)
+	selweights <- .subsetMatrixWithoutCopying(weights, i=sel)
 	
 #	Spline points
 	spline.pts <- seq(from=grid.range[1], to=grid.range[2], length=grid.length)
@@ -94,14 +100,16 @@ estimateDisp.default <- function(y, design=NULL, group=NULL, lib.size=NULL, offs
 			return(list(common.dispersion=NA, trended.dispersion=NA, tagwise.dispersion=NA))
 		}
 
-		# Protect against zeros.
-		glmfit <- glmFit(y[sel,], design, offset=offset[sel,], dispersion=0.05, prior.count=0)
-		zerofit <- (glmfit$fitted.values < 1e-4) & (glmfit$counts < 1e-4)
+		# Identify which observations have means of zero (weights aren't needed here).
+		glmfit <- glmFit(sely, design, offset=seloffset, dispersion=0.05, prior.count=0)
+		zerofit <- .areFittedValuesZero(y=glmfit$counts, mu=glmfit$fitted.values)
 		by.group <- .comboGroups(zerofit)
 
 		for (subg in by.group) { 
 			cur.nzero <- !zerofit[subg[1],]
 			if (!any(cur.nzero)) { next } 
+
+			# Removing samples with zero means from design, to avoid attempts to converge to -Inf.
 			if (all(cur.nzero)) { 
 				redesign <- design
 			} else {
@@ -111,11 +119,15 @@ estimateDisp.default <- function(y, design=NULL, group=NULL, lib.size=NULL, offs
 				if (nrow(redesign) == ncol(redesign)) { next }
 			}
 
+			cury <- .subsetMatrixWithoutCopying(sely, i=subg, j=cur.nzero)
+			curo <- .subsetMatrixWithoutCopying(seloffset, i=subg, j=cur.nzero)
+			curw <- .subsetMatrixWithoutCopying(selweights, i=subg, j=cur.nzero)
+		   
 			# Using the last fit to hot-start the next fit
 			last.beta <- NULL
-			for(i in 1:grid.length) {
-				out <- adjustedProfileLik(spline.disp[i], y=y[sel, ][subg,cur.nzero,drop=FALSE], design=redesign, 
-					offset=offset[sel,][subg,cur.nzero,drop=FALSE], start=last.beta, get.coef=TRUE)
+			for(i in seq_len(grid.length)) {
+				out <- adjustedProfileLik(spline.disp[i], y=cury, design=redesign, 
+					offset=curo, weights=curw, start=last.beta, get.coef=TRUE)
 				l0[subg,i] <- out$apl
 				last.beta <- out$beta
 			}
@@ -136,13 +148,13 @@ estimateDisp.default <- function(y, design=NULL, group=NULL, lib.size=NULL, offs
 
 	# Calculate prior.df
 	if(is.null(prior.df)){
-		glmfit <- glmFit(y[sel,], design, offset=offset[sel,], dispersion=disp.trend, prior.count=0)
+		glmfit <- glmFit(sely, offset=seloffset, weight=selweights, design=design, dispersion=disp.trend, prior.count=0)
 
 		# Residual deviances
 		df.residual <- glmfit$df.residual
 
 		# Adjust df.residual for fitted values at zero
-		zerofit <- (glmfit$fitted.values < 1e-4) & (glmfit$counts < 1e-4)
+        zerofit <- .areFittedValuesZero(y=glmfit$counts, mu=glmfit$fitted.values)
 		df.residual <- .residDF(zerofit, design)
 
 		# Empirical Bayes squeezing of the quasi-likelihood variance factors
@@ -267,3 +279,51 @@ WLEB <- function(theta, loglik, prior.n=5, covariate=NULL, trend.method="locfit"
 
 	out
 }
+
+.subsetMatrixWithoutCopying <- function(x, i, j) 
+# This will attempt to subset the matrix without any copying if
+# it detects that 'i' and 'j' don't modify the ordering of the matrix.
+# This reduces the memory footprint for large matrices.
+#
+# written by Aaron Lun
+# created 29 September 2016
+{
+	if (is(x, "compressedMatrix")) {
+		# Subsetting is ignored if rows/columns are repeated.
+		if (attributes(x)$repeat.row) i <- TRUE
+		if (attributes(x)$repeat.col) j <- TRUE
+	} else if (!is.matrix(x)) {
+		stop("'x' must be a matrix for subsetting")
+	}
+
+	isokay <- TRUE
+	if (!missing(i)) {
+		# Most flexible way of handling different types of subset vectors;
+		# try it out and see if it gives the same results.
+		example <- cbind(seq_len(nrow(x)))
+		rownames(example) <- rownames(x)
+		if (!identical(example, example[i,,drop=FALSE])) isokay <- FALSE
+	}
+	if (!missing(j)) {
+		example <- rbind(seq_len(ncol(x)))
+		colnames(example) <- colnames(x)
+		if (!identical(example, example[,j,drop=FALSE])) isokay <- FALSE
+	}
+
+	if (isokay) {
+		# Avoids copying if no modification incurred.
+		return(x)
+	} else {
+		return(x[i,j,drop=FALSE])
+	}
+}
+
+.areFittedValuesZero <- function(y, mu, tol=1e-4) 
+# A slightly more efficient way to check if the fitted values are zero,
+# based on both the count and fitted value being zero for an observation.
+{
+    if (!is.double(mu)) storage.mode(mu) <- "double"
+    out <- .Call(.cR_check_zero_fitted, y, mu, as.double(tol))
+    if (is.character(out)) stop(out)
+    return(out)
+}
diff --git a/R/expandAsMatrix.R b/R/expandAsMatrix.R
index c71064b..f96c3cd 100644
--- a/R/expandAsMatrix.R
+++ b/R/expandAsMatrix.R
@@ -1,8 +1,7 @@
-
 expandAsMatrix <- function(x,dim=NULL,byrow=TRUE)
 #	Convert scalar, row or column vector, or matrix, to be a matrix
-#	Gordon Smyth
-#	26 Jan 2011.  Last modified 27 July 2015 (Yunshun Chen).
+#	Gordon Smyth, Yunshun Chen, Aaron Lun
+#	26 Jan 2011.  Last modified 03 Oct 2016.
 {
 #	Check dim argument
 	if(is.null(dim)) return(as.matrix(x))
@@ -25,6 +24,19 @@ expandAsMatrix <- function(x,dim=NULL,byrow=TRUE)
 	if(length(dx)<2) stop("x has less than 2 dimensions")
 	if(length(dx)>2) stop("x has more than 2 dimensions")
 	if(all(dx==dim)) return(as.matrix(x))
+
+#	x is a compressedMatrix
+	if(is(x, "compressedMatrix")) {
+		if (attributes(x)$repeat.row && dim[2]==ncol(x)) {
+			if (!byrow) warning("'byrow=FALSE' is not compatible with compressedMatrix settings")
+			return(Recall(as.vector(x),dim=dim,byrow=TRUE))
+		} else if (attributes(x)$repeat.col && dim[1]==nrow(x)) {
+			if (byrow) warning("'byrow=TRUE' is not compatible with compressedMatrix settings")
+			return(Recall(as.vector(x),dim=dim,byrow=FALSE))
+		} else if (all(dx==1)) {
+			return(Recall(as.vector(x),dim=dim))
+		}
+	}
 	stop("x is matrix of wrong size")
 }
 
diff --git a/R/glmQLFTest.R b/R/glmQLFTest.R
index cb41005..245282b 100644
--- a/R/glmQLFTest.R
+++ b/R/glmQLFTest.R
@@ -5,18 +5,20 @@ UseMethod("glmQLFit")
 
 glmQLFit.DGEList <- function(y, design=NULL, dispersion=NULL, offset=NULL, abundance.trend=TRUE, robust=FALSE, winsor.tail.p=c(0.05, 0.1), ...)
 # 	Yunshun Chen and Aaron Lun
-#	Created 05 November 2014.
+#	Created 05 November 2014.  Last modified 03 Oct 2016.
 {
+	if(is.null(design)) design <- model.matrix(~y$samples$group)
 	if(is.null(dispersion)) {
 		dispersion <- y$trended.dispersion
 		if(is.null(dispersion)) dispersion <- y$common.dispersion
-		if(is.null(dispersion)) dispersion <- 0.05
+		if(is.null(dispersion)) stop("dispersion value not supplied")
 	}
 	if(is.null(dispersion)) stop("No dispersion values found in DGEList object.")
 	if(is.null(offset)) offset <- getOffset(y)
 	if(is.null(y$AveLogCPM)) y$AveLogCPM <- aveLogCPM(y)
 
-	fit <- glmQLFit(y=y$counts, design=design, dispersion=dispersion, offset=offset, lib.size=NULL, abundance.trend=abundance.trend, AveLogCPM=y$AveLogCPM, robust=robust, winsor.tail.p=winsor.tail.p, weights=y$weights, ...)
+	fit <- glmQLFit(y=y$counts, design=design, dispersion=dispersion, offset=offset, lib.size=NULL, abundance.trend=abundance.trend, 
+		AveLogCPM=y$AveLogCPM, robust=robust, winsor.tail.p=winsor.tail.p, weights=y$weights, ...)
 	fit$samples <- y$samples
 	fit$genes <- y$genes
 #	fit$prior.df <- y$prior.df
@@ -24,42 +26,17 @@ glmQLFit.DGEList <- function(y, design=NULL, dispersion=NULL, offset=NULL, abund
 	new("DGEGLM",fit)
 }
 
-glmQLFit.default <- function(y, design=NULL, dispersion=NULL, offset=NULL, lib.size=NULL, abundance.trend=TRUE, AveLogCPM=NULL, robust=FALSE, winsor.tail.p=c(0.05, 0.1), ...)
+glmQLFit.default <- function(y, design=NULL, dispersion=NULL, offset=NULL, lib.size=NULL, weights=NULL, 
+        abundance.trend=TRUE, AveLogCPM=NULL, robust=FALSE, winsor.tail.p=c(0.05, 0.1), ...)
 # 	Fits a GLM and computes quasi-likelihood dispersions for each gene.
 # 	Davis McCarthy, Gordon Smyth, Yunshun Chen, Aaron Lun.
-# 	Originally part of glmQLFTest, as separate function 15 September 2014. Last modified 05 November 2014.
+# 	Originally part of glmQLFTest, as separate function 15 September 2014. Last modified 03 October 2016.
 {
-#	Check y
-	y <- as.matrix(y)
-	ntag <- nrow(y)
-	nlib <- ncol(y)
-	
-#	Check design
-	if(is.null(design)) {
-		design <- matrix(1,ncol(y),1)
-		rownames(design) <- colnames(y)
-		colnames(design) <- "Intercept"
-	} else {
-		design <- as.matrix(design)
-		ne <- nonEstimable(design)
-		if(!is.null(ne)) stop(paste("Design matrix not of full rank.  The following coefficients not estimable:\n", paste(ne, collapse = " ")))
-	}
-	
-#	Check dispersion
-	if(is.null(dispersion)) stop("No dispersion values provided.")
-
-#	Check offset and lib.size
-	if(is.null(offset)) {
-		if(is.null(lib.size)) lib.size <- colSums(y)
-		offset <- log(lib.size)
-	}
-	offset <- expandAsMatrix(offset,dim(y))
-
-	glmfit <- glmFit(y,design=design,dispersion=dispersion,offset=offset,lib.size=lib.size,...)
+	glmfit <- glmFit(y, design=design, dispersion=dispersion, offset=offset, lib.size=lib.size, weights=weights,...)
 
 #	Setting up the abundances.
 	if(abundance.trend) {
-		if(is.null(AveLogCPM)) AveLogCPM <- aveLogCPM(y) 
+		if(is.null(AveLogCPM)) AveLogCPM <- aveLogCPM(y, lib.size=lib.size, weights=weights, dispersion=dispersion) 
 		glmfit$AveLogCPM <- AveLogCPM
 	} else {
 		AveLogCPM <- NULL
@@ -67,7 +44,7 @@ glmQLFit.default <- function(y, design=NULL, dispersion=NULL, offset=NULL, lib.s
 
 #	Adjust df.residual for fitted values at zero
 	zerofit <- (glmfit$fitted.values < 1e-4) & (glmfit$counts < 1e-4)
-	df.residual <- .residDF(zerofit, design)
+	df.residual <- .residDF(zerofit, glmfit$design)
 
 #	Empirical Bayes squeezing of the quasi-likelihood variance factors
 	s2 <- glmfit$deviance / df.residual
@@ -87,9 +64,9 @@ glmQLFit.default <- function(y, design=NULL, dispersion=NULL, offset=NULL, lib.s
 glmQLFTest <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, poisson.bound=TRUE)
 #	Quasi-likelihood F-tests for DGE glms.
 #	Davis McCarthy, Gordon Smyth, Aaron Lun.
-#	Created 18 Feb 2011. Last modified 28 Jan 2016.
+#	Created 18 Feb 2011. Last modified 04 Oct 2016.
 {
-    if(!is(glmfit,"DGEGLM")) stop("glmfit must be an DGEGLM object produced by glmQLFit") 
+	if(!is(glmfit,"DGEGLM")) stop("glmfit must be an DGEGLM object produced by glmQLFit") 
 	if(is.null(glmfit$var.post)) stop("need to run glmQLFit before glmQLFTest") 
 	out <- glmLRT(glmfit, coef=coef, contrast=contrast)
 
@@ -104,7 +81,7 @@ glmQLFTest <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, poisson.
 
 #	Ensure is not more significant than chisquare test with Poisson variance
 	if(poisson.bound) {
-		i <- rowSums(glmfit$var.post * (1 + glmfit$fitted.values * glmfit$dispersion) < 1) > 0L
+		i <- .isBelowPoissonBound(glmfit)
 		if(any(i)) {
 			pois.fit <- glmfit[i,]
 			pois.fit <- glmFit(pois.fit$counts, design=pois.fit$design, offset=pois.fit$offset, weights=pois.fit$weights, start=pois.fit$unshrunk.coefficients, dispersion=0)
@@ -121,6 +98,16 @@ glmQLFTest <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, poisson.
 	out
 }
 
+.isBelowPoissonBound <- function(glmfit) 
+# A convenience function to avoid generating temporary matrices.
+{
+    disp <- makeCompressedMatrix(glmfit$dispersion, byrow=FALSE)
+    s2 <- makeCompressedMatrix(glmfit$var.post, byrow=FALSE)
+    out <- .Call(.cR_check_poisson_bound, glmfit$fitted.values, disp, s2)
+    if (is.character(out)) stop(out)
+    return(out)
+}
+
 plotQLDisp <- function(glmfit, xlab="Average Log2 CPM", ylab="Quarter-Root Mean Deviance", pch=16, cex=0.2, col.shrunk="red", col.trend="blue", col.raw="black", ...)
 # 	Plots the result of QL-based shrinkage.
 #	Davis McCarthy, Gordon Smyth, Aaron Lun, Yunshun Chen.
diff --git a/R/glmTreat.R b/R/glmTreat.R
index fa70ec3..a67f49c 100644
--- a/R/glmTreat.R
+++ b/R/glmTreat.R
@@ -1,18 +1,13 @@
-treatDGE <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, lfc=0)
-{
-	message("treatDGE() has been renamed to glmTreat().  Please use the latter instead.")
-	glmTreat(glmfit=glmfit,coef=coef,contrast=contrast,lfc=lfc)
-}
-
-glmTreat <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, lfc=0)
+glmTreat <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, lfc=0, null="interval")
 #	Likelihood ratio test or quasi-likelihood F-test with a threshold
 #	Yunshun Chen and Gordon Smyth
-#	Created on 05 May 2014. Last modified on 29 April 2015
+#	Created on 05 May 2014. Last modified on 03 Oct 2016
 {
 	if(lfc < 0) stop("lfc has to be non-negative")
-	
+
 #	Check if glmfit is from glmFit() or glmQLFit()
 	isLRT <- is.null(glmfit$df.prior)
+	Fit <- ifelse(isLRT, glmFit, glmQLFit)
 
 #	Switch to glmLRT() or glmQLFTest() if lfc is zero
 	if(lfc==0) {
@@ -32,7 +27,6 @@ glmTreat <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, lfc=0)
 		stop("glmfit must be an DGEGLM object (usually produced by glmFit or glmQLFit).")
 	}
 	if(is.null(glmfit$AveLogCPM)) glmfit$AveLogCPM <- aveLogCPM(glmfit)
-	nlibs <- ncol(glmfit)
 	ngenes <- nrow(glmfit)
 
 #	Check design matrix
@@ -74,78 +68,68 @@ glmTreat <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, lfc=0)
 		design <- reform$design
 	}
 	unshrunk.logFC <- as.vector(unshrunk.logFC)
-	up <- unshrunk.logFC >= 0
 
 #	Null design matrix
 	design0 <- design[, -coef, drop=FALSE]
 
-	if(isLRT) {
-#		Adjusted offset
-		offset.adj <- matrix(-lfc*log(2), ngenes, 1)
-		offset.adj[up, ] <- lfc*log(2)
-		
-#		Test statistics at beta_0 = tau
-		offset.new <- glmfit$offset + offset.adj %*% t(design[, coef, drop=FALSE])
-		fit0.tau <- glmFit(glmfit$counts, design=design0, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
-		fit1.tau <- glmFit(glmfit$counts, design=design, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
-		X2.tau <- pmax(0, fit0.tau$deviance - fit1.tau$deviance)
-		z.tau <- sqrt(X2.tau)
-		
-#		Test statistics at beta_0 = -tau
-		offset.new <- glmfit$offset - offset.adj %*% t(design[, coef, drop=FALSE])
-		fit0.tau <- glmFit(glmfit$counts, design=design0, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
-		fit1.tau <- glmFit(glmfit$counts, design=design, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
-		X2.tau2 <- pmax(0, fit0.tau$deviance - fit1.tau$deviance)
-		z.tau2 <- sqrt(X2.tau2)
-
-		within <- abs(unshrunk.logFC) <= lfc
-		sgn <- 2*within - 1
-
-#		Integral of Normal CDF from a to b
-		fun <- function(a,b) ifelse(a==b, pnorm(a), ( b*pnorm(b)+dnorm(b) - (a*pnorm(a)+dnorm(a)) )/(b-a) )
-		p.value <- 2*fun(-z.tau2, z.tau*sgn)
-	} else {
-#		Guass quadrature
-		if(!requireNamespace("statmod",quietly=TRUE)) stop("statmod required but is not available")
-		nnodes <- 5
-		gquad <- statmod::gauss.quad.prob(nnodes, dist="uniform", l=0, u=lfc*log(2))
-	
-		offset.adj <- X2.pos <- X2.neg <- matrix(-gquad$nodes, ngenes, nnodes, byrow=TRUE)
-		offset.adj[up, ] <- -offset.adj[up, ] 
-
-		for(k in 1:nnodes){
-#			Test statistics at beta_0 = pos_nodes (tau)
-			offset.new <- glmfit$offset + offset.adj[,k] %*% t(design[, coef, drop=FALSE])
-			fit0.pos <- glmQLFit(glmfit$counts, design=design0, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
-			fit1.pos <- glmQLFit(glmfit$counts, design=design, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
-			X2.pos[,k] <- pmax(0, fit0.pos$deviance - fit1.pos$deviance)
-			
-#			Test statistics at beta_0 = neg_nodes (-tau)
-			offset.new <- glmfit$offset - offset.adj[,k] %*% t(design[, coef, drop=FALSE])
-			fit0.neg <- glmQLFit(glmfit$counts, design=design0, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
-			fit1.neg <- glmQLFit(glmfit$counts, design=design, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
-			X2.neg[,k] <- pmax(0, fit0.neg$deviance - fit1.neg$deviance)
-		}
-		z.pos <- sqrt(X2.pos)
-		z.neg <- sqrt(X2.neg)
-	
-		within <- matrix(abs(unshrunk.logFC)*log(2), ngenes, nnodes) <= abs(offset.adj)
-		sgn <- 2*within - 1
+#	Offset adjustment
+	offset.old <- makeCompressedMatrix(glmfit$offset, byrow=TRUE)
+	offset.adj <- makeCompressedMatrix(lfc*log(2) * design[, coef], byrow=TRUE)
+
+#	Test statistics at beta_0 = tau
+	offset.new <- .addCompressedMatrices(offset.old, offset.adj)
+	fit0 <- Fit(glmfit$counts, design=design0, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
+	fit1 <- Fit(glmfit$counts, design=design, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
+	z.left <- sqrt( pmax(0, fit0$deviance - fit1$deviance) )
+
+#	Test statistics at beta_0 = -tau
+	offset.new <- .addCompressedMatrices(offset.old, -offset.adj)
+	fit0 <- Fit(glmfit$counts, design=design0, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
+	fit1 <- Fit(glmfit$counts, design=design, offset=offset.new, weights=glmfit$weights, dispersion=glmfit$dispersion, prior.count=0)
+	z.right <- sqrt( pmax(0, fit0$deviance - fit1$deviance) )
 
-#		Calculate expected p-values by Gauss quadrature
-		t.pos <- z.pos/sqrt(glmfit$var.post)
-		t.neg <- z.neg/sqrt(glmfit$var.post)
+#	Make sure z.left < z.right
+	i <- z.left > z.right
+	if(any(i)) {
+		tmp <- z.left[i]
+		z.left[i] <- z.right[i]
+		z.right[i] <- tmp
+	}
+
+#	Convert t to z under the QL pipeline
+	if(!isLRT){
 		df.total <- glmfit$df.prior + glmfit$df.residual.zeros
 		max.df.residual <- ncol(glmfit$counts)-ncol(glmfit$design)
 		df.total <- pmin(df.total, nrow(glmfit)*max.df.residual)
-		p.value <- ( pt(t.pos*sgn, df=df.total) + pt(t.neg, df=df.total, lower.tail=FALSE) ) %*% gquad$weights
+		z.left <- limma::zscoreT(z.left/sqrt(glmfit$var.post), df=df.total)
+		z.right <- limma::zscoreT(z.right/sqrt(glmfit$var.post), df=df.total)
+	}
+
+	within <- abs(unshrunk.logFC) <= lfc
+	sgn <- 2*within - 1
+	z.left <- z.left*sgn
+
+	null <- match.arg(null, c("interval", "worst.case"))
+	if(null=="interval") {
+#		Interval threshold
+		c <- 1.470402
+		j <- z.right + z.left > c
+		p.value <- rep_len(1L, ngenes)
+		p.value[j] <- .integratepnorm(-z.right[j], -z.right[j] + c) + .integratepnorm(z.left[j] - c, z.left[j])
+		p.value[!j] <- 2*.integratepnorm(-z.right[!j], z.left[!j])
+	} else {
+		p.value <- pnorm(-z.right) + pnorm(z.left)
+	}
 	
-#		Ensure is not more significant than z-test
-		i <- glmfit$var.post < 1
+#	Ensure it is not more significant than chisquare test with Poisson variance		
+	if(!isLRT){
+		i <- rowSums(glmfit$var.post * (1 + glmfit$fitted.values * glmfit$dispersion) < 1) > 0L
 		if(any(i)) {
-			z.pvalue <- ( pnorm(z.pos[i,]*sgn[i,]) + pnorm(z.neg[i,], lower.tail=FALSE) ) %*% gquad$weights
-			p.value[i] <- pmax(p.value[i], z.pvalue)
-		}	
+			pois.fit <- glmfit[i,]
+			pois.fit <- glmFit(pois.fit$counts, design=pois.fit$design, offset=pois.fit$offset, weights=pois.fit$weights, start=pois.fit$unshrunk.coefficients, dispersion=0)
+			pois.res <- Recall(pois.fit, coef=coef, contrast=contrast, lfc=lfc)
+			p.value[i] <- pmax(p.value[i], pois.res$table$PValue)
+		}
 	}
 
 #	Table output
@@ -166,3 +150,6 @@ glmTreat <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, lfc=0)
 	glmfit$comparison <- coef.name
 	new("DGELRT",unclass(glmfit))
 }
+
+
+.integratepnorm <- function(a, b) ifelse(a==b, pnorm(a), ( b*pnorm(b)+dnorm(b) - (a*pnorm(a)+dnorm(a)) )/(b-a) )
diff --git a/R/glmfit.R b/R/glmfit.R
index 48e0928..811acfb 100644
--- a/R/glmfit.R
+++ b/R/glmfit.R
@@ -4,8 +4,9 @@ glmFit <- function(y, ...)
 UseMethod("glmFit")
 
 glmFit.DGEList <- function(y, design=NULL, dispersion=NULL, prior.count=0.125, start=NULL, ...)
-#	Created 11 May 2011.  Last modified 11 March 2013.
+#	Created 11 May 2011.  Last modified 14 Aug 2016.
 {
+	if(is.null(design)) design <- model.matrix(~y$samples$group)
 	if(is.null(dispersion)) dispersion <- getDispersion(y)
 	if(is.null(dispersion)) stop("No dispersion values found in DGEList object.")
 
@@ -23,7 +24,7 @@ glmFit.default <- function(y, design=NULL, dispersion=NULL, offset=NULL, lib.siz
 #	Fit negative binomial generalized linear model for each transcript
 #	to a series of digital expression libraries
 #	Davis McCarthy and Gordon Smyth
-#	Created 17 August 2010. Last modified 18 May 2015.
+#	Created 17 August 2010. Last modified 03 Oct 2016.
 {
 #	Check y
 	y <- as.matrix(y)
@@ -43,16 +44,10 @@ glmFit.default <- function(y, design=NULL, dispersion=NULL, offset=NULL, lib.siz
 
 #	Check dispersion
 	if(is.null(dispersion)) stop("No dispersion values provided.")
-	dispersion.mat <- expandAsMatrix(dispersion, dim(y), byrow=FALSE)
+	dispersion.mat <- makeCompressedMatrix(dispersion, byrow=FALSE)
 
 #	Check offset and lib.size
-	if(is.null(offset)) {
-		if(is.null(lib.size)) lib.size <- colSums(y)
-		if(any(lib.size==0L)) stop("Zero library size detected. Remove the empty libraries before proceeding to the next step.")
-		offset <- log(lib.size)
-	}
-	if(any(is.infinite(offset))) stop("Infinite offset value detected. Check the input offset.")
-	offset <- expandAsMatrix(offset,dim(y))
+	offset <- .compressOffsets(y=y, lib.size=lib.size, offset=offset)
 
 #	weights are checked in lower-level functions
 
diff --git a/R/locfitByCol.R b/R/locfitByCol.R
index b38dcbc..dcc6ebf 100644
--- a/R/locfitByCol.R
+++ b/R/locfitByCol.R
@@ -1,13 +1,12 @@
 locfitByCol <- function(y, x=NULL, weights=1, span=0.5, degree=0)
 #	Gordon Smyth
-#	20 Aug 2012.  Last modified 2 Feb 2015.
+#	20 Aug 2012.  Last modified 15 June 2016.
 {
 	y <- as.matrix(y)
 	ntags <- nrow(y)
 	weights <- rep_len(weights,ntags)
 	if(is.null(x)) x <- 1:ntags
 	if(span*ntags<2 || ntags<=1) return(y)
-	if(!requireNamespace("locfit",quietly=TRUE)) stop("locfit required but is not available")
-	for (j in 1:ncol(y)) y[,j] <- fitted(locfit::locfit(y[,j]~x,weights=weights, alpha=span,deg=degree))
+	for (j in 1:ncol(y)) y[,j] <- fitted(locfit(y[,j]~x,weights=weights, alpha=span,deg=degree))
 	y
 }
diff --git a/R/makeCompressedMatrix.R b/R/makeCompressedMatrix.R
new file mode 100644
index 0000000..b39df76
--- /dev/null
+++ b/R/makeCompressedMatrix.R
@@ -0,0 +1,164 @@
+makeCompressedMatrix <- function(x, byrow=TRUE) 
+# Coerces a NULL, scalar, vector or matrix to a compressed matrix,
+# Determines whether the rows or columns are intended to be 
+# repeated, and stores this in the attributes.
+#
+# written by Aaron Lun
+# created 24 September 2016
+# last modified 27 September 2016
+{
+	if (is.matrix(x)) {
+		if (is(x, "compressedMatrix")) {
+			return(x)
+		}
+		repeat.row <- repeat.col <- FALSE
+	} else if (length(x)==1L) {
+		repeat.row <- repeat.col <- TRUE
+		x <- matrix(x)
+	} else {
+		if (!byrow) {
+			x <- cbind(x)
+			repeat.row <- FALSE
+			repeat.col <- TRUE
+		} else {
+			x <- rbind(x)
+			repeat.row <- TRUE
+			repeat.col <- FALSE
+		}
+	}
+	class(x) <- "compressedMatrix"
+	attributes(x)$repeat.row <- repeat.row
+	attributes(x)$repeat.col <- repeat.col
+	return(x)
+}
+
+`[.compressedMatrix` <- function(x, i, j, ...)
+# A wrapper function to easily subset a makeCompressedMatrix object.
+#
+# written by Aaron Lun
+# created 24 September 2016
+{
+	row.status <- attributes(x)$repeat.row
+	col.status <- attributes(x)$repeat.col
+	oldclass <- class(x)
+	x <- unclass(x)
+	if (!row.status && !missing(i)) {
+		x <- x[i,,drop=FALSE]
+	}
+	if (!col.status && !missing(j)) {
+		x <- x[,j,drop=FALSE]
+	}
+	class(x) <- oldclass
+	attributes(x)$repeat.row <- row.status
+	attributes(x)$repeat.col <- col.status
+	return(x)
+}
+
+as.matrix.compressedMatrix <- function(x, ...) 
+# Getting rid of odds and ends.
+#
+# written by Aaron Lun
+# created 26 September 2016
+{
+	attributes(x)$repeat.row <- NULL
+	attributes(x)$repeat.col <- NULL
+	unclass(x)
+}
+
+.addCompressedMatrices <- function(x, y) 
+# A function that performs addition of two compressedMatrix objects,
+# in a manner that best preserves memory usage.
+#
+# written by Aaron Lun
+# created 26 September 2016
+# last modified 27 September 2016
+{
+	if (!is(x, "compressedMatrix") || !is(y, "compressedMatrix")) {
+		stop("only two compressedMatrix objects can be added")
+	}
+	dims <- pmax(dim(x), dim(y))
+	out <- .Call(.cR_add_repeat_matrices, x, y, dims[1], dims[2])
+	if (is.character(out)) stop(out)
+
+	summed <- out[[1]]
+	class(summed) <- class(x)
+	attributes(summed)$repeat.row <- out[[2]]
+	attributes(summed)$repeat.col <- out[[3]]
+	return(summed)
+}
+
+.compressOffsets <- function(y, offset, lib.size=NULL) 
+# A convenience function to avoid repeatedly having to write the code below.
+# If provided, offsets take precedence over the library size.
+# If neither are provided, library sizes are automatically computed
+# as the sum of counts in the count matrix 'y'.
+# A prefunctory check for finite values is performed in the C++ code.
+# If 'offset' is already of the compressedMatrix class, then 
+# we assume it's already gone through this once so we don't do it again.
+{
+	if (is(offset, "compressedMatrix")) {
+		return(offset)
+	}
+
+	if (is.null(offset)) {
+		if (is.null(lib.size)) lib.size <- colSums(y)
+		offset <- log(lib.size)
+	}
+	if (!is.double(offset)) storage.mode(offset) <- "double"
+	offset <- makeCompressedMatrix(offset, byrow=TRUE)
+
+	err <- .Call(.cR_check_finite, offset, "offsets")
+	if (is.character(err)) stop(err) 
+	return(offset)
+}
+
+.compressWeights <- function(weights=NULL) 
+# A convenience function to avoid repeatedly having to write the code below.
+# All weights default to 1 if not specified.
+# A prefunctory check for finite, positive values is performed in the C++ code.
+# If 'weights' is already a compressedMatrix, then we assume it's 
+# already gone through this and don't do it again.
+{
+	if (is(weights, "compressedMatrix")) {
+		return(weights)
+	}
+
+	if (is.null(weights)) weights <- 1
+	if (!is.double(weights)) storage.mode(weights) <- "double"
+	weights <- makeCompressedMatrix(weights, byrow=TRUE)
+
+	err <- .Call(.cR_check_positive, weights, "weights")
+	if (is.character(err)) stop(err)
+	return(weights)
+}
+
+.compressPrior <- function(prior.count) 
+# Again for the prior counts, checking for non-negative finite values.
+# Skipping the check if it's already a compressedMatrix object.
+{
+	if (is(prior.count, "compressedMatrix")) {
+		return(prior.count)
+	}
+			
+	if(!is.double(prior.count)) storage.mode(prior.count) <- "double"
+	prior.count <- makeCompressedMatrix(prior.count, byrow=FALSE)
+	err <- .Call(.cR_check_nonnegative, prior.count, "prior counts")
+	if (is.character(err)) stop(err)
+	return(prior.count)
+}
+
+.compressDispersions <- function(dispersion) 
+# Again for the dispersions, checking for non-negative finite values.
+# Skipping the check if it's already a compressedMatrix object.
+{
+	if (is(dispersion, "compressedMatrix")) {
+		return(dispersion)
+	}
+			
+	if(!is.double(dispersion)) storage.mode(dispersion) <- "double"
+	dispersion <- makeCompressedMatrix(dispersion, byrow=FALSE)
+	err <- .Call(.cR_check_nonnegative, dispersion, "dispersions")
+	if (is.character(err)) stop(err)
+	return(dispersion)
+}
+
diff --git a/R/mglmLevenberg.R b/R/mglmLevenberg.R
index 7044fee..b819a5d 100644
--- a/R/mglmLevenberg.R
+++ b/R/mglmLevenberg.R
@@ -4,70 +4,46 @@ mglmLevenberg <- function(y, design, dispersion=0, offset=0, weights=NULL, coef.
 
 #	R version by Gordon Smyth and Yunshun Chen
 #	C++ version by Aaron Lun
-#	Created 3 March 2011.  Last modified 11 July 2012
+#	Created 3 March 2011.  Last modified 03 Oct 2016
 {
 #	Check arguments
 	y <- as.matrix(y)
 	if(!is.numeric(y)) stop("y is non-numeric")
-	if(any(y<0)) stop("y must be non-negative")
 	nlibs <- ncol(y)
 	ngenes <- nrow(y)
 	if(nlibs==0 || ngenes==0) stop("no data")
 
-	if(!( all(is.finite(y)) || all(is.finite(design)) )) stop("All values must be finite and non-missing")
-	design <- as.matrix(design)
-	dispersion <- expandAsMatrix(dispersion, dim(y), byrow=FALSE)
+#	Checks for negative, NA or non-finite values in the count matrix.
+	.isAllZero(y)
 
-	if(is.null(coef.start)) {
-		start.method <- match.arg(start.method, c("null","y"))
-		if(start.method=="null") N <- exp(offset)
-	} else {
-		coef.start <- as.matrix(coef.start)
-	}
-	
-	offset <- t(expandAsMatrix(offset,dim(y)))
+#	Checking the design matrix
+	design <- as.matrix(design)
+	if (!is.double(design)) storage.mode(design) <- "double"
+	if (!all(is.finite(design))) stop("all entries of design matrix must be finite and non-missing")
 
-#	Check weights
-	if(is.null(weights)) weights <- 1
-	weights <- t(expandAsMatrix(weights,dim(y)))
+#	Checking dispersions, offsets and weights
+	offset <- .compressOffsets(y, offset=offset)
+    dispersion <- .compressDispersions(dispersion)
+	weights <- .compressWeights(weights)
 
-# 	Initializing if desired. Note that lm.fit can fit in a vectorised manner, 
-# 	where each column of the input matrix is a separate set of observations.
+#	Initializing values for the coefficients at reasonable best guess with linear models.
 	if(is.null(coef.start)) {
-		if(start.method=="y") {
-			delta <- min(max(y), 1/6)
-			y1 <- pmax(y, delta)
-			fit <- lm.wfit(design, t(log(y1)) - offset, weights)
-			beta <- fit$coefficients
-			mu <- exp(beta + offset)
-		} else {
-			N <- expandAsMatrix(N,dim(y))
-			w <- t(weights) * N/(1+dispersion*N)
-			beta.mean <- log(rowSums(y*w/N)/rowSums(w))
-			beta <- qr.coef(qr(design), matrix(beta.mean,nrow=nlibs,ncol=ngenes,byrow=TRUE))
-			mu <- exp(design %*% beta + offset)
-		}
+		start.method <- match.arg(start.method, c("null","y"))
+		beta <- .Call(.cR_get_levenberg_start, y, offset, dispersion, weights, design, start.method=="null")
+		if (is.character(beta)) stop(beta) 
 	} else {
-		beta <- t(coef.start)
-		mu <- exp(design %*% beta + offset)
+		beta <- as.matrix(coef.start)
 	}
 
-# 	Checking arguments and calling the C++ method. We use transposed matrices so that each can be accessed from column-major storage in C++.
-	if (!is.double(design)) storage.mode(design) <- "double"
-	if (!is.double(dispersion)) storage.mode(dispersion) <- "double"
-	if (!is.double(offset)) storage.mode(offset) <- "double"
-	if (!is.double(weights)) storage.mode(weights) <- "double"
+# 	Checking arguments and calling the C++ method.
 	if (!is.double(beta)) storage.mode(beta) <- "double"
-	if (!is.double(mu)) storage.mode(mu) <- "double"
-	output <- .Call(.cR_levenberg, nlibs, ngenes, design, t(y), t(dispersion), offset, weights, beta, mu, tol, maxit)
+	output <- .Call(.cR_levenberg, design, y, dispersion, offset, weights, beta, tol, maxit)
 
 #	Check for error condition
 	if (is.character(output)) { stop(output) }
 
 #	Naming the output and returning it.  
 	names(output) <- c("coefficients", "fitted.values", "deviance", "iter", "failed")
-	output$coefficients <- t(output$coefficients)
-	output$fitted.values <- t(output$fitted.values)
 	colnames(output$coefficients) <- colnames(design)
 	rownames(output$coefficients) <- rownames(y)
 	dimnames(output$fitted.values) <- dimnames(y)
diff --git a/R/mglmOneGroup.R b/R/mglmOneGroup.R
index 036d651..78091eb 100644
--- a/R/mglmOneGroup.R
+++ b/R/mglmOneGroup.R
@@ -1,47 +1,29 @@
 mglmOneGroup <- function(y,dispersion=0,offset=0,weights=NULL,maxit=50,tol=1e-10,verbose=FALSE,coef.start=NULL)
 #	Fit single-group negative-binomial glm
 #	Aaron Lun and Gordon Smyth
-#	18 Aug 2010. Last modified 11 Sep 2014.
+#	18 Aug 2010. Last modified 03 Oct 2016.
 {
 #	Check y
 	y <- as.matrix(y)
 	if(!is.numeric(y)) stop("y is non-numeric")
-	if(any(y<0)) stop("y must be non-negative")
-	ntags <- nrow(y)
-	nlibs <- ncol(y)
+	.isAllZero(y)
 
 #	Check dispersion
-	dispersion <- expandAsMatrix(dispersion, dim(y), byrow=FALSE)
-	if(typeof(dispersion) != "double") stop("dispersion not floating point number")
-	if(any(dispersion<0)) stop("dispersion must be non-negative")
+	dispersion <- .compressDispersions(dispersion)
 
 #	Check offset
-	if(typeof(offset) != "double") stop("offset not floating point number")
+	offset <- .compressOffsets(y, offset=offset)
 
 #	Check starting values
-	if (typeof(coef.start) != "double") storage.mode(coef.start) <- "double"
-
-#	All-Poisson special case
-	if(all(dispersion==0) && is.null(weights)) {
-		N <- exp(offset)
-		if(is.null(dim(N)))
-			m <- mean(N)
-		else
-			m <- .rowMeans(N,ntags,nlibs)
-	    return(log(.rowMeans(y/m, ntags, nlibs)))
-	}
+	if (is.null(coef.start)) coef.start <- NA_real_
+	if (!is.double(coef.start)) storage.mode(coef.start) <- "double"
+	coef.start <- makeCompressedMatrix(coef.start, byrow=FALSE)
 
 #	Check weights
-	if(is.null(weights)) weights=1
-	if(typeof(weights) == "integer") storage.mode(weights) <- "double"
-	if(typeof(weights) != "double") stop("weights is non-numeric")
-
-#	Expansions to full dimensions
-	offset <- expandAsMatrix(offset,dim(y))
-	weights <- expandAsMatrix(weights,dim(y))
+	weights <- .compressWeights(weights)
 
 #	Fisher scoring iteration.
-	output <- .Call(.cR_one_group, nlibs, ntags, t(y), t(dispersion), offset, weights, maxit, tol, coef.start)
+	output <- .Call(.cR_one_group, y, dispersion, offset, weights, maxit, tol, coef.start)
 
 #	Check error condition
 	if(is.character(output)) stop(output)
diff --git a/R/mglmOneWay.R b/R/mglmOneWay.R
index 8641a58..2e5e03b 100644
--- a/R/mglmOneWay.R
+++ b/R/mglmOneWay.R
@@ -15,7 +15,7 @@ mglmOneWay <- function(y,design=NULL,dispersion=0,offset=0,weights=NULL,maxit=50
 #	by Fisher scoring with
 #	only a single explanatory factor in the model
 #	Gordon Smyth
-#	11 March 2011.  Last modified 11 Sep 2014.
+#	11 March 2011.  Last modified 03 Oct 2016.
 {
 	y <- as.matrix(y)
 	ntags <- nrow(y)
@@ -29,25 +29,30 @@ mglmOneWay <- function(y,design=NULL,dispersion=0,offset=0,weights=NULL,maxit=50
 	}
 	ngroups <- length(levels(group))
 	stopifnot(ncol(design)==ngroups)
-	mu <- matrix(0,ntags,ngroups)
-	offset <- expandAsMatrix(offset,dim(y))
-	dispersion <- expandAsMatrix(dispersion, dim(y), byrow=FALSE)
-	if(!is.null(weights)) weights <- expandAsMatrix(weights,dim(y))
+	groupbeta <- matrix(0,ntags,ngroups)
+
+	offset <- .compressOffsets(y, offset=offset)
+	dispersion <- .compressDispersions(dispersion)
+	weights <- .compressWeights(weights)
 	
 	firstjofgroup <- rep(0,ngroups)
 	new.start <- NULL
-	for (g in 1:ngroups) {
+	for (g in seq_len(ngroups)) {
 		j <- which(group==(levels(group)[g]))
 		firstjofgroup[g] <- j[1]
 		if (!is.null(coef.start)) { new.start <- coef.start %*% design[firstjofgroup[g],] }
-		mu[,g] <- mglmOneGroup(y[,j,drop=FALSE],dispersion=dispersion[,j,drop=FALSE],offset=offset[,j,drop=FALSE],weights=weights[,j,drop=FALSE],maxit=maxit,tol=tol,
-			coef.start=new.start)
+		groupbeta[,g] <- mglmOneGroup(y[,j,drop=FALSE], dispersion=dispersion[,j,drop=FALSE], offset=offset[,j,drop=FALSE],
+					weights=weights[,j,drop=FALSE], maxit=maxit,tol=tol, coef.start=new.start)
 	}
 
+	# Computing the fitted values from the group-wise beta's.
+	mu <- .Call(.cR_get_one_way_fitted, groupbeta, offset, as.integer(group)-1L)
+	if (is.character(mu)) stop(mu)
+
+	# Reformatting the beta's to reflect the original design.
 	designunique <- design[firstjofgroup,,drop=FALSE]
-	mu1 <- pmax(mu,-1e8)
-	beta <- t(solve(designunique,t(mu1)))
-	mu <- mu[,group,drop=FALSE]
-	mu <- exp(mu+offset)
+	groupbeta <- pmax(groupbeta,-1e8)
+	beta <- t(solve(designunique,t(groupbeta)))
+
 	list(coefficients=beta,fitted.values=mu)
 }
diff --git a/R/nbinomDeviance.R b/R/nbinomDeviance.R
index 5d2357b..7e7a04a 100644
--- a/R/nbinomDeviance.R
+++ b/R/nbinomDeviance.R
@@ -3,44 +3,42 @@ nbinomDeviance <- function(y,mean,dispersion=0,weights=NULL)
 #	y is a matrix and a deviance is computed for each row
 #	A vector y is taken to be a matrix with one row.
 #	Original version 23 November 2010.
-#	Last modified 9 Dec 2013.
+#	Last modified 03 Oct 2016.
 {
 #	Convert vector to row matrix
 	if(!is.matrix(y)) y <- array(y, c(1L,length(y)), if(!is.null(names(y))) list(NULL,names(y)))
-
-	d <- nbinomUnitDeviance(y=y,mean=mean,dispersion=dispersion)
-	if(!is.null(weights)) {
-		weights <- expandAsMatrix(weights, dim(d)) 
-		d <- weights*d
-	}
-	rowSums(d)
+	out <- .compute_nbdeviance(y=y, mean=mean, dispersion=dispersion, weights=weights, dosum=TRUE)
+	names(out) <- rownames(y)
+	out
 }
 
-
-nbinomUnitDeviance <- function(y,mean,dispersion=0) 
-#	Unit deviance for the nbinom distribution.
-{
+.compute_nbdeviance <- function(y, mean, dispersion, weights, dosum) {
 #	Check y. May be matrix or vector.
-	if (!is.double(y)) storage.mode(y) <- "double"
-	ntags <- NROW(y)
+	if(!is.matrix(y)) y <- matrix(y)
 	nobs <- length(y)
 
 #	Check mean
-	if (!is.double(mean)) storage.mode(mean) <- "double"
+	if(!is.matrix(mean)) mean <- matrix(mean)
+	if(!is.double(mean)) storage.mode(mean) <- "double"
 	if(length(mean)<nobs) stop("mean should have same dimensions as y")
 
-#	Check dispersion.
-#	Can be tagwise (rowwise) or observation-wise.
-	if (!is.double(dispersion)) dispersion <- "double"
-	lend <- length(dispersion)
-	if(lend < ntags) dispersion <- rep_len(dispersion, length.out=ntags)
-	if(lend > ntags && lend < nobs) dispersion <- rep_len(dispersion, length.out=nobs)
+#	Check dispersion (can be tagwise (rowwise) or observation-wise).
+	dispersion <- .compressDispersions(dispersion)
 
-	out <- .Call(.cR_compute_nbdev, y=y, mu=mean, phi=dispersion)
+#	Check weights.
+	weights <- .compressWeights(weights)
 
-#	Check error status
-	if (is.character(out)) stop(out)
+#	Computing unit deviance or residual deviance per gene, depending on 'dosum'.
+	d <- .Call(.cR_compute_nbdev, y, mean, dispersion, weights, as.logical(dosum))
+	if(is.character(d)) stop(d) 
 
-	y[] <- out
-	return(y)
+	return(d)
+}
+
+nbinomUnitDeviance <- function(y,mean,dispersion=0) 
+#	Unit deviance for the nbinom distribution.
+{
+	out <- .compute_nbdeviance(y=y, mean=mean, dispersion=dispersion, weights=NULL, dosum=FALSE)
+	dimnames(out) <- dimnames(y)
+	return(out)
 }
diff --git a/R/plotMD.R b/R/plotMD.R
index dcbc5ac..e4cc98b 100644
--- a/R/plotMD.R
+++ b/R/plotMD.R
@@ -39,10 +39,19 @@ plotMD.DGEGLM <- function(object, column=ncol(object), coef=NULL, xlab="Average
 	plotWithHighlights(x=object$AveLogCPM,y=logFC,xlab=xlab,ylab=ylab,main=main,status=status,...)
 }
 
-plotMD.DGEExact <- plotMD.DGELRT <- function(object, xlab="Average log CPM", ylab="log-fold-change", main=object$comparison, status=object$genes$Status, ...)
+plotMD.DGELRT <- function(object, xlab="Average log CPM", ylab="log-fold-change", main=object$comparison, status=object$genes$Status, ...)
 #	Mean-difference plot with color coding for controls
 #	Gordon Smyth
-#	Last modified 24 June 2015.
+#	Created 24 June 2015.
 {
 	plotWithHighlights(x=object$table$logCPM,y=object$table$logFC,xlab=xlab,ylab=ylab,main=main,status=status,...)
 }
+
+plotMD.DGEExact <- function(object, xlab="Average log CPM", ylab="log-fold-change", main=NULL, status=object$genes$Status, ...)
+#	Mean-difference plot with color coding for controls
+#	Gordon Smyth
+#	Created 24 June 2015.  Last modified 14 Aug 2016.
+{
+	if(is.null(main)) main <- paste(object$comparison[2],"vs",object$comparison[1])
+	plotWithHighlights(x=object$table$logCPM,y=object$table$logFC,xlab=xlab,ylab=ylab,main=main,status=status,...)
+}
diff --git a/R/predFC.R b/R/predFC.R
index 4d7f2ec..8744ace 100644
--- a/R/predFC.R
+++ b/R/predFC.R
@@ -15,42 +15,20 @@ predFC.DGEList <- function(y,design=NULL,prior.count=0.125,offset=NULL,dispersio
 predFC.default <- function(y,design=NULL,prior.count=0.125,offset=NULL,dispersion=0,weights=NULL,...)
 #	Shrink log-fold-changes towards zero by augmenting data counts
 #	Gordon Smyth and Belinda Phipson
-#	17 Aug 2011.  Last modified 4 Nov 2012.
+#	17 Aug 2011.  Last modified 3 Oct 2016.
 {
-#	Check y
-	y <- as.matrix(y)
-	ngenes <- nrow(y)
-	nsamples <- ncol(y)
-
-
-#	Check prior.count
-	if(prior.count<0) stop("prior.count should be non-negative")
+#	Add prior counts in proportion to library sizes
+	out <- addPriorCount(y, offset=offset, prior.count=prior.count)
 
-#	Check offset
-	if(is.null(offset)) {
-		lib.size <- colSums(y)
-		offset <- log(lib.size)
-	} else
-		lib.size <- exp(offset)
 #	Check design
 	if(is.null(design)) {
 		warning("Behaviour of predFC with design=NULL is scheduled to be deprecated April 2014. Use cpm() instead.",call.=FALSE)
-		return(cpm(y,lib.size=lib.size,log=TRUE,prior.count=prior.count))
+		return(cpm(y,lib.size=exp(offset),log=TRUE,prior.count=prior.count))
 	} else
 		design <- as.matrix(design)
 
-#	Add prior counts in proportion to library sizes
-	if(is.null(dim(lib.size)))
-		ave.lib.size <- mean(lib.size)
-	else
-		ave.lib.size <- rowMeans(lib.size)
-	prior.count <- prior.count * lib.size/ave.lib.size
-	lib.size <- lib.size+2*prior.count
-	if(is.null(dim(prior.count))) prior.count <- matrix(prior.count,ngenes,nsamples,byrow=TRUE)
-	y <- y+prior.count
-
 #	Return matrix of coefficients on log2 scale
-   g <- glmFit(y,design,offset=log(lib.size),dispersion=dispersion,prior.count=0,weights=weights,...)
-   g$coefficients/log(2)
+	g <- glmFit(out$y,design,offset=out$offset,dispersion=dispersion,prior.count=0,weights=weights,...)
+	g$coefficients/log(2)
 }
 
diff --git a/R/readDGE.R b/R/readDGE.R
index dccf58c..1e5b2b9 100644
--- a/R/readDGE.R
+++ b/R/readDGE.R
@@ -1,6 +1,6 @@
 readDGE <- function(files,path=NULL,columns=c(1,2),group=NULL,labels=NULL,...) 
 #	Read and collate a set of count data files, each file containing counts for one library
-#	Created 2006.  Last modified 15 June 2015.
+#	Created 2006.  Last modified 29 July 2016.
 {
 #	Create data.frame to hold sample information
 	x <- list()
@@ -18,18 +18,16 @@ readDGE <- function(files,path=NULL,columns=c(1,2),group=NULL,labels=NULL,...)
 
 #	Set group factor
 	if(!is.null(group)) x$samples$group <- group
-	if(is.null(x$samples$group)) x$samples$group <- rep(1,nfiles)
+	if(is.null(x$samples$group)) x$samples$group <- rep_len(1L,nfiles)
 	x$samples$group <- as.factor(x$samples$group)
 
-#	Read files
+#	Read files into a list
 	d <- taglist <- list()
 	for (fn in x$samples$files) {
 		if(!is.null(path)) fn <- file.path(path,fn)
 		d[[fn]] <- read.delim(fn,...,stringsAsFactors=FALSE)
 		taglist[[fn]] <- as.character(d[[fn]][,columns[1]])
-		if(anyDuplicated(taglist[[fn]])) {
-			stop(paste("Repeated tag sequences in",fn)) 
-		}
+		if(anyDuplicated(taglist[[fn]])) stop("Repeated tag sequences in",fn)
 	}
 
 #	Collate counts for unique tags
@@ -42,13 +40,13 @@ readDGE <- function(files,path=NULL,columns=c(1,2),group=NULL,labels=NULL,...)
 		x$counts[aa,i] <- d[[i]][,columns[2]]
 	}
 
-#	Enter library sizes and norm factors
-	x$samples$lib.size <- colSums(x$counts)
-	x$samples$norm.factors <- 1
-
 #	Alert user if htseq style meta genes found
 	MetaTags <- grep("^__",tags,value=TRUE)
 	if(length(MetaTags)) message("Meta tags detected: ",paste(MetaTags,collapse=", "))
 
+#	Enter library sizes and norm factors
+	x$samples$lib.size <- colSums(x$counts)
+	x$samples$norm.factors <- 1
+
 	new("DGEList",x)
 }
diff --git a/R/spliceVariants.R b/R/spliceVariants.R
index 503da15..01e3711 100644
--- a/R/spliceVariants.R
+++ b/R/spliceVariants.R
@@ -2,18 +2,21 @@ spliceVariants <- function(y, geneID, dispersion=NULL, group=NULL, estimate.gene
 # Identify genes with splice variants using a negative binomial model
 # We assume that the data come in a matrix (possibly and/or a DGEList), counts summarized at exon level, with gene information available
 # Davis McCarthy and Gordon Smyth
-# Created 4 February 2011.  Last modified 2 Aug 2011. 
+# Created 4 February 2011.  Last modified 3 Oct 2016. 
 {
 	if( is(y, "DGEList") ) {
 		y.mat <- y$counts
 		if( is.null(group) )
 			group <- y$samples$group
+		lib.size <- y$samples$lib.size * y$samples$norm.factors
 	}
 	else {
 		y.mat <- as.matrix(y)
 		if( is.null(group) )
 			stop("y is a matrix and no group argument has been supplied. Please supply group argument.")
+		lib.size <- colSums(y)
 	}
+
 	geneID <- as.vector(unlist(geneID))
 	## Order genes by geneID: we need some way to reorganise the data---output cannot possibly be same dimension as input so this is a sensible way to organise things
 	o <- order(geneID)
@@ -81,8 +84,9 @@ spliceVariants <- function(y, geneID, dispersion=NULL, group=NULL, estimate.gene
 		full.index <- rownames(exons) %in% uniqIDs[this.genes]
 		if( any(this.genes) ) {
 			gene.counts.mat <- matrix(t(exons[full.index,]), nrow=sum(this.genes), ncol=ncol(exons)*i.exons, byrow=TRUE)
+			expanded.lib.size <- rep(lib.size, i.exons)
 			if(i.exons==1) {
-				abundance[this.genes] <- aveLogCPM(gene.counts.mat)
+				abundance[this.genes] <- aveLogCPM(gene.counts.mat, lib.size=expanded.lib.size)
 				splicevars.out$LR[this.genes] <- 0
 				splicevars.out$PValue[this.genes] <- 1
 			}
@@ -95,7 +99,7 @@ spliceVariants <- function(y, geneID, dispersion=NULL, group=NULL, estimate.gene
 				coef <- (ncol(X.null)+1):ncol(X.full)
 				## Fit NB GLMs to these genes
 				fit.this <- glmFit(gene.counts.mat, X.full, dispersion[this.genes], offset=0, prior.count=0)
-				abundance[this.genes] <- aveLogCPM(gene.counts.mat)
+				abundance[this.genes] <- aveLogCPM(gene.counts.mat, lib.size=expanded.lib.size)
 				results.this <- glmLRT(fit.this, coef=coef)
 				if(sum(this.genes)==1) {
 					splicevars.out$LR[this.genes] <- results.this$table$LR[1]
diff --git a/build/vignette.rds b/build/vignette.rds
index 12bd1b9..9304f3b 100644
Binary files a/build/vignette.rds and b/build/vignette.rds differ
diff --git a/inst/CITATION b/inst/CITATION
index 48c71f3..e1bc1bf 100644
--- a/inst/CITATION
+++ b/inst/CITATION
@@ -1,4 +1,4 @@
-citHeader("Please cite the first paper for the software itself and the other papers for the various original statistical methods implemented in edgeR.  See Section 1.2 in the User's Guide for more detail.")
+citHeader("See Section 1.2 in the User's Guide for more detail about how to cite the different edgeR pipelines.")
 
 citEntry(
    entry="article",
@@ -22,38 +22,3 @@ citEntry(
 	pages = 4288-4297,
 	textVersion = "McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis of multifactor RNA-Seq experiments with respect to biological variation. Nucleic Acids Research 40, 4288-4297"
 )
-
-citEntry(
-   entry="article",
-   title = "Moderated statistical tests for assessing differences in tag abundance",
-   author = "Mark D Robinson and Gordon K Smyth",
-   journal = "Bioinformatics",
-   volume = 23,
-   pages = 2881-2887,
-   year = 2007,
-   textVersion = "Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing differences in tag abundance. Bioinformatics 23, 2881-2887"
-)
-
-citEntry(
-   entry="article",
-   title = "Small-sample estimation of negative binomial dispersion, with applications to SAGE data",
-   author = "Mark D Robinson and Gordon K Smyth",
-   journal = "Biostatistics",
-   volume = 9,
-   pages = 321-332,
-   year = 2008,
-   textVersion = "Robinson MD and Smyth GK (2008). Small-sample estimation of negative binomial dispersion, with applications to SAGE data. Biostatistics, 9, 321-332"
-)
-
-
-citEntry(
-   entry="article",
-   title = "Robustly detecting differential expression in RNA sequencing data using observation weights",
-   author = "Xiaobei Zhou and Helen Lindsay and Mark D Robinson",
-   journal = "Nucleic Acids Research",
-   volume = 42,
-   pages = "e91",
-   year = 2014,
-   textVersion = "Zhou X, Lindsay H, Robinson MD (2014). Robustly detecting differential expression in RNA sequencing data using observation weights. Nucleic Acids Research, 42(11), e91."
-)
-
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index c8d1272..667ff21 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -2,6 +2,42 @@
 \title{edgeR News}
 \encoding{UTF-8}
 
+\section{Version 3.16.0}{\itemize{
+\item 
+estimateDisp() now respects weights in calculating the APLs.
+
+\item 
+Added design matrix to the output of estimateDisp().
+
+\item 
+glmFit() constructs design matrix, if design=NULL, from y$samples$group.
+
+\item 
+New argument 'null' in glmTreat(), and a change in how p-values are calculated by default.
+
+\item 
+Modified the default 'main' in plotMD().
+
+\item 
+Created a new S3 class, compressedMatrix, to store offsets and weights efficiently.
+
+\item 
+Added the makeCompressedMatrix() function to make a compressedMatrix object.
+
+\item 
+Switched storage of offsets in DGEGLM objects to use the compressedMatrix class.
+
+\item 
+Added the addPriorCount() function for adding prior counts.
+
+\item 
+Modified spliceVariants() calculation of the average log-CPM.
+
+\item 
+Migrated some internal calculations and checks to C++ for greater efficiency.
+}}
+
+
 \section{Version 3.14.0}{\itemize{
 \item
 estimateDisp(), estimateCommonDisp(), estimateTrendedDisp(), estimateTagwiseDisp(), splitIntoGroups() and equalizeLibSizes() are now S3 generic functions.
@@ -10,6 +46,9 @@ estimateDisp(), estimateCommonDisp(), estimateTrendedDisp(), estimateTagwiseDisp
 The default method of estimateGLMTrendedDisp() and estimateGLMTagwiseDisp() now only return dispersion estimates instead of a list.
 
 \item
+The DGEList method of estimateDisp(), estimateCommonDisp() and estimateGLMCommonDisp() now use the common dispersion estimate to compute AveLogCPM and store it in the output.
+
+\item
 Add fry method for DGEList objects.
 
 \item
diff --git a/inst/doc/edgeR.pdf b/inst/doc/edgeR.pdf
index 8ae117d..94330ea 100644
Binary files a/inst/doc/edgeR.pdf and b/inst/doc/edgeR.pdf differ
diff --git a/man/addPriorCount.Rd b/man/addPriorCount.Rd
new file mode 100644
index 0000000..a74f6a3
--- /dev/null
+++ b/man/addPriorCount.Rd
@@ -0,0 +1,52 @@
+\name{addPriorCount}
+\alias{addPriorCount}
+
+\title{Add a prior count}
+\description{Add a library size-adjusted prior count to each observation.}
+
+\usage{
+addPriorCount(y, lib.size=NULL, offset=NULL, prior.count=1)
+}
+
+\arguments{
+\item{y}{a numeric count matrix, with rows corresponding to genes and columns to libraries.}
+\item{lib.size}{a numeric vector of library sizes.}
+\item{offset}{a numeric vector or matrix of offsets.}
+\item{prior.count}{a numeric scalar or vector of prior counts to be added to each gene.}
+}
+
+\details{
+This function adds a positive prior count to each observation, often useful for avoiding zeroes during calculation of log-values.
+For example, \code{\link{predFC}} will call this function to calculate shrunken log-fold changes.
+\code{\link{aveLogCPM}} and \code{\link{cpm}} also use the same underlying code to calculate (average) log-counts per million.
+
+The actual value added to the counts for each library is scaled according to the library size.
+This ensures that the relative contribution of the prior is the same for each library.
+Otherwise, a fixed prior would have little effect on a large library, but a big effect for a small library. 
+
+The library sizes are also modified, with twice the scaled prior being added to the library size for each library.
+To understand the motivation for this, consider that each observation is, effectively, a proportion of the total count in the library.
+The addition scheme implemented here represents an empirical logistic transform and ensures that the proportion can never be zero or one.
+
+If \code{offset} is supplied, this is used in favour of \code{lib.size} where \code{exp(offset)} is defined as the vector/matrix of library sizes.
+If an offset matrix is supplied, this will lead to gene-specific scaling of the prior as described above.
+
+Most use cases of this function will involve supplying a constant value to \code{prior.count} for all genes.
+However, it is also possible to use gene-specific values by supplying a vector of length equal to the number of rows in \code{y}.
+}
+
+\value{
+A list is returned containing \code{y}, a matrix of counts with the added priors; and \code{offset}, a compressedMatrix containing the (log-transformed) modified library sizes.
+}
+
+\author{
+Aaron Lun
+}
+
+\seealso{
+\code{\link{aveLogCPM}},
+\code{\link{cpm}},
+\code{\link{predFC}}
+}
+
+
diff --git a/man/aveLogCPM.Rd b/man/aveLogCPM.Rd
index e9c6da4..4750aa8 100644
--- a/man/aveLogCPM.Rd
+++ b/man/aveLogCPM.Rd
@@ -30,7 +30,7 @@ Compute average log2 counts-per-million for each row of counts.
 \details{
 This function uses \code{mglmOneGroup} to compute average counts-per-million (AveCPM) for each row of counts, and returns log2(AveCPM).
 An average value of \code{prior.count} is added to the counts before running \code{mglmOneGroup}.
-If \code{prior.count} is a vector, each entry will be added to all counts in the corresponding row of \code{y}.
+If \code{prior.count} is a vector, each entry will be added to all counts in the corresponding row of \code{y}, as described in \code{\link{addPriorCount}}.
 
 This function is similar to
 
@@ -67,5 +67,7 @@ aveLogCPM(y, dispersion=runif(nrow(y), 0, 0.2))
 \seealso{
 See \code{\link{cpm}} for individual logCPM values, rather than genewise averages.
 
+Addition of the prior count is performed using the strategy described in \code{\link{addPriorCount}}.
+
 The computations for \code{aveLogCPM} are done by \code{\link{mglmOneGroup}}.
 }
diff --git a/man/edgeR-package.Rd b/man/edgeR-package.Rd
index fa6d6ce..2b9d4ea 100644
--- a/man/edgeR-package.Rd
+++ b/man/edgeR-package.Rd
@@ -5,6 +5,7 @@
 \title{Empirical analysis of digital gene expression data in R}
 \description{
 edgeR is a package for the analysis of digital gene expression data arising from RNA sequencing technologies such as SAGE, CAGE, Tag-seq or RNA-seq, with emphasis on testing for differential expression.
+It can also be used for other sequencing technologies from which read counts are produced, such as ChIP-seq, Hi-C or CRISPR.
 
 Particular strengths of the package include the ability to estimate biological variation between replicate libraries, and to conduct exact tests of significance which are suitable for small counts.
 The package is able to make use of even minimal numbers of replicates.
@@ -38,8 +39,26 @@ Mark Robinson <mrobinson at wehi.edu.au>, Davis McCarthy <dmccarthy at wehi.edu.au>, Y
   Differential expression analysis of multifactor RNA-Seq experiments with respect to biological variation.
   \emph{Nucleic Acids Research} 40, 4288-4297.
 
-  Lund, SP, Nettleton, D, McCarthy, DJ, Smyth, GK (2012).
-  Detecting differential expression in RNA-sequence data using quasi-likelihood with shrunken dispersion estimates.
-  \emph{Statistical Applications in Genetics and Molecular Biology}. (Accepted 31 July 2012)
+Chen, Y, Lun, ATL, and Smyth, GK (2014).
+Differential expression analysis of complex RNA-seq experiments using edgeR.
+In: \emph{Statistical Analysis of Next Generation Sequence Data},
+Somnath Datta and Daniel S Nettleton (eds), Springer, New York, pages 51-74.
+\url{http://www.statsci.org/smyth/pubs/edgeRChapterPreprint.pdf}
+
+Dai Z, Sheridan, JM, Gearing, LJ, Moore, DL, Su, S, Wormald, S, Wilcox, S, O'Connor, L, Dickins, RA, Blewitt, ME, Ritchie, ME (2014).
+edgeR: a versatile tool for the analysis of shRNA-seq and CRISPR-Cas9 genetic screens.
+\emph{F1000Research} 3, 95.
+\url{http://f1000research.com/articles/3-95}
+
+Lun, ATL, Chen, Y, and Smyth, GK (2016).
+It's DE-licious: a recipe for differential expression analyses of RNA-seq experiments using quasi-likelihood methods in edgeR.
+\emph{Methods in Molecular Biology} 1418, 391-416.
+\url{http://www.statsci.org/smyth/pubs/QLedgeRPreprint.pdf">Preprint 8 April 2015}
+
+Chen Y, Lun ATL, and Smyth, GK (2016).
+From reads to genes to pathways: differential expression analysis of RNA-Seq experiments using Rsubread and the edgeR quasi-likelihood pipeline.
+\emph{F1000Research} 5, 1438.
+\url{http://f1000research.com/articles/5-1438}
 }
+
 \keyword{package}
diff --git a/man/estimateDisp.Rd b/man/estimateDisp.Rd
index eb35f09..e9af62d 100644
--- a/man/estimateDisp.Rd
+++ b/man/estimateDisp.Rd
@@ -41,6 +41,7 @@ Maximizes the negative binomial likelihood to give the estimate of the common, t
 
 \value{
 \code{estimateDisp.DGEList} adds the following components to the input \code{DGEList} object:
+	\item{design}{the design matrix.}
 	\item{common.dispersion}{estimate of the common dispersion.}
 	\item{trended.dispersion}{estimates of the trended dispersions.}
 	\item{tagwise.dispersion}{tagwise estimates of the dispersion parameter if \code{tagwise=TRUE}.}
@@ -67,12 +68,12 @@ The \code{estimateDisp} function doesn't give exactly the same estimates as the
 Chen, Y, Lun, ATL, and Smyth, GK (2014).
 Differential expression analysis of complex RNA-seq experiments using edgeR.
 In: \emph{Statistical Analysis of Next Generation Sequence Data},
-Somnath Datta and Daniel S Nettleton (eds), Springer, New York.
+Somnath Datta and Daniel S. Nettleton (eds), Springer, New York, pages 51-74.
 \url{http://www.statsci.org/smyth/pubs/edgeRChapterPreprint.pdf}
 
 Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
 Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
-\emph{Annals of Applied Statistics} 10. 
+\emph{Annals of Applied Statistics} 10, 946-963.
 \url{http://arxiv.org/abs/1602.08678}
 }
 
diff --git a/man/expandAsMatrix.Rd b/man/expandAsMatrix.Rd
index c91560d..3494470 100644
--- a/man/expandAsMatrix.Rd
+++ b/man/expandAsMatrix.Rd
@@ -8,14 +8,25 @@ Expand scalar or vector to a matrix.
 expandAsMatrix(x, dim=NULL, byrow=TRUE)
 }
 \arguments{
-  \item{x}{scalar, vector or matrix. If a vector, length must match one of the output dimensions.}
-  \item{dim}{required dimension for the output matrix.}
-  \item{byrow}{logical. Should the matrix be filled by columns or by rows (the default) if the length of the input vector is equal to both dimensions?}
+  \item{x}{scalar, vector, matrix or compressedMatrix.}
+  \item{dim}{integer vector of length 2 specifying the required dimensions of the output matrix.}
+  \item{byrow}{logical scalar specifying if matrix should be filled by columns or rows for a vector \code{x}.}
+  
 }
 
 \details{
-This function expands a row or column vector to be a matrix.
-It is used internally in edgeR to convert offsets to a matrix.
+This function expands a scalar, row/column vector or compressedMatrix to be a matrix of dimensions \code{dim}.
+It is used internally in edgeR to convert offsets, weights and other values to a matrix for consistent handling.
+If \code{dim} is \code{NULL}, the function is equivalent to calling \code{as.matrix(x)}.
+
+If \code{x} is a vector, its length must match one of the output dimensions.
+The matrix will then be filled by repeating the matrix across the matching dimension.
+For example, if \code{length(x)==dim[1]}, the matrix will be filled such that each row contains \code{x}.
+If both dimensions match, filling is determined by \code{byrow}, with filling by rows as the default.
+
+If \code{x} compressedMatrix object, the size of any non-repeated dimensions must be consistent with corresponding output dimension.
+The \code{byrow} argument will be ignored as the repeat specifications will dictate how expansion should be performed.
+See \code{?\link{compressedMatrix}} for more details.
 }
 
 \value{
diff --git a/man/glmQLFTest.Rd b/man/glmQLFTest.Rd
index ccc45ff..3b9ac6f 100644
--- a/man/glmQLFTest.Rd
+++ b/man/glmQLFTest.Rd
@@ -13,7 +13,7 @@ Conduct genewise statistical tests for a given coefficient or contrast.}
 \method{glmQLFit}{DGEList}(y, design=NULL, dispersion=NULL, offset=NULL, abundance.trend=TRUE,
         robust=FALSE, winsor.tail.p=c(0.05, 0.1), \dots)
 \method{glmQLFit}{default}(y, design=NULL, dispersion=NULL, offset=NULL, lib.size=NULL,
-        abundance.trend=TRUE, AveLogCPM=NULL, robust=FALSE,
+        weights=NULL, abundance.trend=TRUE, AveLogCPM=NULL, robust=FALSE,
         winsor.tail.p=c(0.05, 0.1), \dots)
 glmQLFTest(glmfit, coef=ncol(glmfit$design), contrast=NULL, poisson.bound=TRUE)
 }
@@ -29,6 +29,8 @@ glmQLFTest(glmfit, coef=ncol(glmfit$design), contrast=NULL, poisson.bound=TRUE)
 
 \item{lib.size}{numeric vector of length \code{ncol(y)} giving library sizes. Only used if \code{offset=NULL}, in which case \code{offset} is set to \code{log(lib.size)}. Defaults to \code{colSums(y)}.}
 
+\item{weights}{numeric matrix of same size as \code{y} giving weights for the log-linear models. If \code{NULL}, will be set to unity for all observations.}
+
 \item{abundance.trend}{logical, whether to allow an abundance-dependent trend when estimating the prior values for the quasi-likelihood multiplicative dispersion parameter.}
 
 \item{AveLogCPM}{average log2-counts per million, the average taken over all libraries in \code{y}. If \code{NULL} will be computed by \code{aveLogCPM(y)}.}
@@ -84,9 +86,14 @@ It also stores \code{df.total}, a numeric vector containing the denominator degr
 }
 
 \references{
-Lun, ATL, Chen, Y, and Smyth, GK (2015).
+Chen Y, Lun ATL, and Smyth, GK (2016).
+From reads to genes to pathways: differential expression analysis of RNA-Seq experiments using Rsubread and the edgeR quasi-likelihood pipeline.
+\emph{F1000Research} 5, 1438.
+\url{http://f1000research.com/articles/5-1438}
+
+Lun, ATL, Chen, Y, and Smyth, GK (2016).
 It's DE-licious: a recipe for differential expression analyses of RNA-seq experiments using quasi-likelihood methods in edgeR.
-Bioinformatics Division, Walter and Eliza Hall Institute of Medical Research, Melbourne, Australia.
+\emph{Methods in Molecular Biology} 1418, 391-416.
 \url{http://www.statsci.org/smyth/pubs/QLedgeRPreprint.pdf">Preprint 8 April 2015}
 
 Lund, SP, Nettleton, D, McCarthy, DJ, and Smyth, GK (2012).
@@ -94,10 +101,10 @@ Detecting differential expression in RNA-sequence data using quasi-likelihood wi
 \emph{Statistical Applications in Genetics and Molecular Biology} Volume 11, Issue 5, Article 8.
 \url{http://www.statsci.org/smyth/pubs/QuasiSeqPreprint.pdf}
 
-Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2013).
-Empirical Bayes in the presence of exceptional cases, with application to microarray data.
-Bioinformatics Division, Walter and Eliza Hall Institute of Medical Research, Melbourne, Australia.
-\url{http://www.statsci.org/smyth/pubs/RobustEBayesPreprint.pdf}
+Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
+Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
+\emph{Annals of Applied Statistics} 10, 946-963.
+\url{http://arxiv.org/abs/1602.08678}
 }
 
 \author{Yunshun Chen, Aaron Lun, Davis McCarthy and Gordon Smyth}
diff --git a/man/glmTreat.Rd b/man/glmTreat.Rd
index e709163..42d8b8d 100644
--- a/man/glmTreat.Rd
+++ b/man/glmTreat.Rd
@@ -1,14 +1,12 @@
 \name{glmTreat}
 \alias{glmTreat}
-\alias{treatDGE}
 
 \title{Test for Differential Expression Relative to a Threshold}
 
 \description{Conduct genewise statistical tests for a given coefficient or contrast relative to a specified fold-change threshold.}
 
 \usage{
-glmTreat(glmfit, coef=ncol(glmfit$design), contrast=NULL, lfc=0)
-treatDGE(glmfit, coef=ncol(glmfit$design), contrast=NULL, lfc=0)
+glmTreat(glmfit, coef = ncol(glmfit$design), contrast = NULL, lfc = 0, null = "interval")
 }
 
 \arguments{
@@ -19,6 +17,11 @@ treatDGE(glmfit, coef=ncol(glmfit$design), contrast=NULL, lfc=0)
 \item{contrast}{numeric vector specifying the contrast of the linear model coefficients to be tested against the log2-fold-change threshold. Length must equal to the number of columns of \code{design}. If specified, then takes precedence over \code{coef}.}
 
 \item{lfc}{numeric scalar specifying the absolute value of the log2-fold change threshold above which differential expression is to be considered.}
+
+\item{null}{character string, choices are \code{"worst.case"} or \code{"interval"}.
+If \code{"worst.case"}, then the null hypothesis asssumes that the true logFC is on the boundary of the possible values, either at \code{lfc} or \code{-lfc}, whichever gives the largest p-value.
+This gives the most conservative results.
+If \code{"interval"}, then the null hypotheses assumes the true logFC to belong to a bounded interval of possible values.}
 }
 
 \value{
@@ -48,14 +51,23 @@ If \code{lfc=0}, then \code{glmTreat} is equivalent to \code{glmLRT} or \code{gl
 If there is no shrinkage on log-fold-changes, i.e., fitting glms with \code{prior.count=0}, then \code{unshrunk.logFC} and \code{logFC} are essentially the same. Hence they are merged into one column of \code{logFC} in \code{table}.
 Note that \code{glmTreat} constructs test statistics using \code{unshrunk.logFC} rather than \code{logFC}.
 
+\code{glmTreat} with positive \code{lfc} gives larger p-values than would be obtained with \code{lfc=0}.
+If \code{null="worst.case"}, then \code{glmTreat} conducts a test closely analogous to the \code{treat} function in the limma package.
+This conducts a test if which the null hypothesis puts the true logFC on the boundary of the \code{[-lfc,lfc]} interval closest to the observed logFC.
+If \code{null="interval"}, then the null hypotheses assumes an interval of possible values for the true logFC.
+This approach is somewhat less conservative.
+}
+
+\note{
 \code{glmTreat} was previously called \code{treatDGE}.
-The old function name is now deprecated and will be removed in a future release of edgeR.
 }
 
 \author{Yunshun Chen and Gordon Smyth}
 
 \seealso{
 \code{\link{topTags}} displays results from \code{glmTreat}.
+
+\code{\link[limma]{treat}} is the corresponding function in the limma package, designed on normally distributed expression data rather than negative binomial counts.
 }
 
 \references{
diff --git a/man/goodTuring.Rd b/man/goodTuring.Rd
index 20dd2ef..bb7c21b 100644
--- a/man/goodTuring.Rd
+++ b/man/goodTuring.Rd
@@ -37,7 +37,7 @@ The posterior means are then converted to proportions.
 In the empirical Bayes step, the counts are smoothed by assuming a log-linear relationship between frequencies and frequencies of frequencies.
 The fundamentals of the algorithm are from Good (1953).
 Gale and Sampson (1995) proposed a simplied algorithm with a rule for switching between the observed and smoothed frequencies, and it is Gale and Sampson's simplified algorithm that is implemented here.
-The number of zero values in \code{x} are not used in the algorithm, but is returned by this function.
+The number of zero values in \code{x} is not used as part of the algorithm, but is returned by this function.
 
 Sampson gives a C code version on his webpage at
 \url{http://www.grsampson.net/RGoodTur.html}
@@ -53,6 +53,10 @@ uses the results to predict the proportion of each gene in each library.
 Gale, WA, and Sampson, G (1995).
 Good-Turing frequency estimation without tears.
 \emph{Journal of Quantitative Linguistics} 2, 217-237.
+
+Good, IJ (1953).
+The population frequencies of species and the estimation of population parameters.
+\emph{Biometrika} 40, 237-264.
 }
 
 \author{Aaron Lun and Gordon Smyth, adapted from Sampson's C code from \url{http://www.grsampson.net/RGoodTur.html}}
diff --git a/man/makeCompressedMatrix.Rd b/man/makeCompressedMatrix.Rd
new file mode 100644
index 0000000..604820a
--- /dev/null
+++ b/man/makeCompressedMatrix.Rd
@@ -0,0 +1,108 @@
+\title{makeCompressedMatrix}
+\name{makeCompressedMatrix}
+\alias{makeCompressedMatrix}
+\alias{compressedMatrix}
+\alias{[.compressedMatrix}
+\alias{as.matrix.compressedMatrix}
+
+\description{
+Construct a compressedMatrix object from a scalar, vector or matrix.
+}
+
+\usage{
+makeCompressedMatrix(x, byrow=TRUE)
+
+\method{[}{compressedMatrix}(x, i, j, ...)
+\method{as.matrix}{compressedMatrix}(x, ...)
+}
+
+\arguments{
+  \item{x}{For \code{makeCompressedMatrix}, a scalar, vector, matrix or compressedMatrix object.
+        For the S3 methods, a compressedMatrix object.}
+  \item{byrow}{logical. If \code{x} is a vector, should it be repeated across rows (default) or across columns?}
+  \item{i, j}{subset indices to apply to \code{x}.}
+  \item{...}{additional arguments, ignored.}
+}
+
+\details{
+This function creates a compressedMatrix object from \code{x}. 
+The compressedMatrix class inherits from a matrix and holds two logical scalar attributes \code{repeat.row} and \code{repeat.col}.
+Each attribute specifies whether the values are to be repeated across rows and/or across columns.
+This avoids the need to store redundant values in a full-sized matrix of dimensions \code{dim}, as would be done with \code{\link{expandAsMatrix}}.
+
+To illustrate, consider that rows usually correspond to genes while columns usually correspond to libraries.
+If we have a vector of library sizes, this will hold one unique value per library that is the same for all genes.
+Thus, we should use \code{byrow=TRUE}, which will construct a compressedMatrix object storing one row containing this vector.
+Here, \code{repeat.row=TRUE} and \code{repeat.col=FALSE}, indicating that the row is to be repeated for all genes.
+
+On the other hand, we may have a vector of gene-specific values that is the same for all libraries (e.g., dispersions).
+In this case, we should use \code{byrow=FALSE} to construct the compressedMatrix object.
+This will store one column with \code{repeat.row=FALSE} and \code{repeat.col=TRUE}, indicating that the column should be repeated across libraries.
+
+In cases where \code{x} is a scalar, \code{byrow} is ignored and both \code{repeat.row} and \code{repeat.col} will be \code{TRUE} by default.
+If \code{x} is a matrix, both attributes will be \code{FALSE}.
+If \code{x} is a compressedMatrix, it will be returned without modification.
+
+Subsetting of a compressedMatrix object depends on the values of \code{repeat.row} and \code{repeat.col}.
+If the rows are repeated, any subsetting by row will be ignored.
+Similarly, if the columns are repeated, any subsetting by column will be ignored.
+This reflects the fact that the repeated dimension has no fixed size, so subsetting on it is meaningless.
+If neither are repeated, subsetting behaves as it would for a normal matrix.
+
+Calling \code{as.matrix} will return the raw matrix without attributes or classes.
+If either the columns or rows are repeated, the corresponding dimension in the returned matrix will be of length 1.
+Otherwise, it will be of arbitrary length depending on the size/length of \code{x} used originally to construct \code{y}.
+A compressedMatrix object can also be used as input to \code{\link{expandAsMatrix}}, which will expand it to the specified dimensions.
+
+The compressedMatrix is used throughout edgeR to save space in storing offsets and (to a lesser extent) weights.
+This is because, for routine analyses, offsets are the same for all genes so it makes little sense to expand it to the full dimensions of the count matrix.
+Most functions will accept a compressedMatrix as input to \code{offset} or \code{weights} arguments.
+}
+
+\value{
+A object of class compressedMatrix, containing \code{x} and the additional attributes \code{repeat.row} and \code{repeat.col}.
+}
+
+\author{Aaron Lun}
+
+\examples{
+# Repeated rows:
+library.sizes <- runif(4, 1e6, 2e6)
+lib.mat <- makeCompressedMatrix(library.sizes, byrow=TRUE)
+lib.mat
+
+lib.mat[,1:2] # subset by column works as expected
+lib.mat[1:10,] # subset by row has no effect (see Details)
+as.matrix(lib.mat)
+expandAsMatrix(lib.mat, dim=c(10, 4))
+
+# Repeated columns:
+gene.disp <- runif(10, 0.01, 0.1)
+disp.mat <- makeCompressedMatrix(gene.disp, byrow=FALSE)
+disp.mat
+
+disp.mat[,1:2] # subset by column has no effect
+disp.mat[1:5,] # subset by row works as expected
+as.matrix(disp.mat)
+expandAsMatrix(disp.mat, dim=c(10, 4), byrow=FALSE)
+
+# Scalar:
+weights <- makeCompressedMatrix(1)
+weights[1:10,] # subsetting has no effect
+weights[,1:10]
+as.matrix(weights)
+expandAsMatrix(weights, dim=c(10, 4))
+
+# Matrix:
+offsets <- makeCompressedMatrix(matrix(runif(40), 10, 4))
+offsets[1:5,]
+offsets[,1:2]
+as.matrix(offsets)
+expandAsMatrix(offsets, dim=c(10, 4))
+}
+
+\seealso{
+\code{\link{expandAsMatrix}}
+}
+
+\keyword{hplot}
diff --git a/man/plotMD.Rd b/man/plotMD.Rd
index c6c189d..139dcb4 100644
--- a/man/plotMD.Rd
+++ b/man/plotMD.Rd
@@ -20,6 +20,9 @@ Creates a mean-difference plot (aka MA plot) with color coding for highlighted p
 \method{plotMD}{DGELRT}(object, xlab = "Average log CPM",
        ylab = "log-fold-change", main = object$comparison,
        status=object$genes$Status, \dots)
+\method{plotMD}{DGEExact}(object, xlab = "Average log CPM",
+       ylab = "log-fold-change", main = NULL,
+       status=object$genes$Status, \dots)
 }
 
 \arguments{
diff --git a/man/plotQLDisp.Rd b/man/plotQLDisp.Rd
index 0d78dff..df4ac9e 100644
--- a/man/plotQLDisp.Rd
+++ b/man/plotQLDisp.Rd
@@ -14,7 +14,7 @@ plotQLDisp(glmfit, xlab="Average Log2 CPM", ylab="Quarter-Root Mean Deviance", p
   \item{ylab}{label for the y-axis.}
   \item{pch}{the plotting symbol. See \code{\link{points}} for more details.}
   \item{cex}{plot symbol expansion factor. See \code{\link{points}} for more details.}
-  \item{col.shrunk}{color of the points representing the shrunk quasi-liklihood dispersions.}
+  \item{col.shrunk}{color of the points representing the squeezed quasi-likelihood dispersions.}
   \item{col.trend}{color of line showing dispersion trend.}
   \item{col.raw}{color of points showing the unshrunk dispersions.}
   \item{\dots}{any other arguments are passed to \code{plot}.}
@@ -30,7 +30,7 @@ The quarter-root transformation is applied to improve visibility for dispersions
 A plot is created on the current graphics device.
 }
 
-\author{Aaron Lun, based on code by Davis McCarthy and Gordon Smyth}
+\author{Aaron Lun, Davis McCarthy, Gordon Smyth, Yunshun Chen.}
 
 \examples{
 nbdisp <- 1/rchisq(1000, df=10)
@@ -45,4 +45,11 @@ fit <- glmQLFit(y, design, abundance.trend=FALSE)
 plotQLDisp(fit)
 }
 
+\references{
+Chen Y, Lun ATL, and Smyth, GK (2016).
+From reads to genes to pathways: differential expression analysis of RNA-Seq experiments using Rsubread and the edgeR quasi-likelihood pipeline.
+\emph{F1000Research} 5, 1438.
+\url{http://f1000research.com/articles/5-1438}
+}
+
 \keyword{plot}
diff --git a/man/plotSmear.Rd b/man/plotSmear.Rd
index dd8faa5..381c695 100644
--- a/man/plotSmear.Rd
+++ b/man/plotSmear.Rd
@@ -1,11 +1,12 @@
 \name{plotSmear}
 \alias{plotSmear}
 \title{
-Plots log-Fold Change versus log-Concentration (or, M versus A) for Count Data
+Smear plot
 }
 \description{
-Both of these functions plot the log-fold change (i.e. the log of the ratio of expression levels for each gene between two experimential groups) against the log-concentration (i.e. the overall average expression level for each gene across the two groups). To represent counts that were low (e.g. zero in 1 library and non-zero in the other) in one of the two conditions, a 'smear' of points at low A value is presented in \code{plotSmear}.
+Make a mean-difference plot of two libraries of count data with smearing of points with very low counts, especially those that are zero for one of the columns.
 }
+
 \usage{
 plotSmear(object, pair=NULL, de.tags=NULL, xlab="Average logCPM", ylab="logFC", pch=19,
      cex=0.2, smearWidth=0.5, panel.first=grid(), smooth.scatter=FALSE, lowess=FALSE, \dots)
@@ -25,16 +26,22 @@ plotSmear(object, pair=NULL, de.tags=NULL, xlab="Average logCPM", ylab="logFC",
   \item{\dots}{further arguments passed on to \code{plot}}
 }
 
-\value{A plot to the current device}
+\value{
+Invisibly returns the x and y coordinates of the plotted points, and a plot is created on the current device.
+}
 
 \details{
-\code{plotSmear} is a more sophisticated and superior way to produce an 'MA plot'. \code{plotSmear} resolves the problem of plotting genes that have a total count of zero for one of the groups by adding the 'smear' of points at low A value. The points to be smeared are identified as being equal to the minimum estimated concentration in one of the two groups.  The smear is created by using random uniform numbers of width \code{smearWidth} to the left of the minimum A. \code{plotSmear} als [...]
+\code{plotSmear} produces a type of mean-difference plot (or MA plot) with a special representation (smearing) of log-ratios that are infinite.
+\code{plotSmear} resolves the problem of plotting genes that have a total count of zero for one of the groups by adding the 'smear' of points at low A value.
+The points to be smeared are identified as being equal to the minimum estimated concentration in one of the two groups.
+The smear is created by using random uniform numbers of width \code{smearWidth} to the left of the minimum A.
+\code{plotSmear} also allows easy highlighting of differentially expressed (DE) genes.
 }
 
 \author{Mark Robinson, Davis McCarthy}
 
 \seealso{
-\code{\link{maPlot}}
+\code{\link{maPlot}}, \code{\link{plotMD.DGEList}}
 }
 
 \examples{
diff --git a/man/predFC.Rd b/man/predFC.Rd
index c447854..43858e2 100644
--- a/man/predFC.Rd
+++ b/man/predFC.Rd
@@ -29,11 +29,11 @@
 }
 
 \details{
-This function computes predictive log-fold changes (pfc) for a NB glm.
+This function computes predictive log-fold changes (pfc) for a NB GLM.
 The pfc are posterior Bayesian estimators of the true log-fold-changes.
 They are predictive of values that might be replicated in a future experiment.
 
-Specifically the function adds a small prior count to each observation before estimating the glm.
+Specifically, the function adds a small prior count to each observation before fitting the GLM (see \code{\link{addPriorCount}} for details).
 The actual prior count that is added is proportion to the library size.
 This has the effect that any log-fold-change that was zero prior to augmentation remains zero and non-zero log-fold-changes are shrunk towards zero.
 
@@ -78,6 +78,7 @@ abline(a=0,b=1)
 }
 
 \seealso{
-\code{\link{glmFit}}, \code{\link{exactTest}}
+\code{\link{glmFit}}, \code{\link{exactTest}},
+\code{\link{addPriorCount}}
 }
 
diff --git a/man/processAmplicons.Rd b/man/processAmplicons.Rd
index 7bf7d17..c8643d8 100644
--- a/man/processAmplicons.Rd
+++ b/man/processAmplicons.Rd
@@ -48,21 +48,41 @@ processAmplicons(readfile, readfile2=NULL, barcodefile, hairpinfile,
 	\item{lib.size}{auto-calculated column sum of the counts matrix}
 }
 
-\details{The \code{processAmplicons} function assumes the sequences in your fastq files have a fixed structure (as per Figure 1A of http://f1000research.com/articles/3-95/v2). It cannot be used if your hairpins/sgRNAs/sample index sequences are in random locations within each read. You will need to customise your own sequence processing pipeline if this is the case, but can still complete your downstream analysis using edgeR.
+\details{
+The \code{processAmplicons} function assumes the sequences in your fastq files have a fixed structure (as per Figure 1A of Dai et al, 2014).
 
-The input barcode file and hairpin/sgRNA files are tab-separated text files with at least two columns (named 'ID' and 'Sequences') containing the sample or hairpin/sgRNA ids and a second column indicating the sample index or hairpin/sgRNA sequences to be matched. If \code{barcode2Start} and \code{barcode2End} are specified, a third column 'Sequences2' is expected in the barcode file. If \code{readfile2}, \code{barcodeStartRev} and \code{barcodeEndRev} are specified, another column 'Seque [...]
+The input barcode file and hairpin/sgRNA files are tab-separated text files with at least two columns (named 'ID' and 'Sequences') containing the sample or hairpin/sgRNA ids and a second column indicating the sample index or hairpin/sgRNA sequences to be matched.
+If \code{barcode2Start} and \code{barcode2End} are specified, a third column 'Sequences2' is expected in the barcode file.
+If \code{readfile2}, \code{barcodeStartRev} and \code{barcodeEndRev} are specified, another column 'SequencesReverse' is expected in the barcode file.
+The barcode file may also contain a 'group' column that indicates which experimental group a sample belongs to.
+Additional columns in each file will be included in the respective \code{$samples} or \code{$genes} data.frames of the final code{\link[edgeR:DGEList-class]{DGEList}} object.
+These files, along with the fastq file/(s) are assumed to be in the current working directory.
 
-To compute the count matrix, matching to the given barcodes and hairpins/sgRNAs is conducted in two rounds. The first round looks for an exact sequence match for the given barcode sequences and hairpin/sgRNA sequences at the locations specified. If \code{allowShifting} is set to \code{TRUE}, the program also checks if a given hairpin/sgRNA sequence can be found at a neighbouring position in the read. If a match isn't found, the program performs a second round of matching which allows for [...]
+To compute the count matrix, matching to the given barcodes and hairpins/sgRNAs is conducted in two rounds.
+The first round looks for an exact sequence match for the given barcode sequences and hairpin/sgRNA sequences at the locations specified.
+If \code{allowShifting} is set to \code{TRUE}, the program also checks if a given hairpin/sgRNA sequence can be found at a neighbouring position in the read.
+If a match isn't found, the program performs a second round of matching which allows for sequence mismatches if \code{allowMismatch} is set to \code{TRUE}.
+The program also checks parameter \code{allowShiftedMismatch} which accommodates mismatches at the shifted positions.
+The maximum number of mismatch bases in barcode and hairpin/sgRNA are specified by the parameters \code{barcodeMismatchBase} and \code{hairpinMismatchBase}. 
 
-The program outputs a \code{\link[edgeR:DGEList-class]{DGEList}} object, with a count matrix indicating the number of times each barcode and hairpin/sgRNA combination could be matched in reads from input fastq file/(s).
+The program outputs a \code{\link[edgeR:DGEList-class]{DGEList}} object, with a count matrix indicating the number of times each barcode and hairpin/sgRNA combination could be matched in reads from input fastq file(s).
 
-For further examples and data, refer to the Case studies available from http://bioinf.wehi.edu.au/shRNAseq/.
+For further examples and data, refer to the case studies available from \url{http://bioinf.wehi.edu.au/shRNAseq}.
+}
+
+\note{
+This function replaced the earlier function \code{processHairpinReads} in edgeR 3.7.17.
+
+This function cannot be used if the hairpins/sgRNAs/sample index sequences are in random locations within each read.
+If that is the case, then analysts will need to customise their own sequence processing pipeline, although edgeR can still be used for downstream analysis.
 
-Note: The \code{processAmplicons} function supercedes the earlier \code{processHairpinReads} function.
 }
 
 \author{Zhiyin Dai and Matthew Ritchie}
 
 \references{
-Dai Z, Sheridan JM, et al. (2014). edgeR: a versatile tool for the analysis of shRNA-seq and CRISPR-Cas9 genetic screens. F1000Research, 3:95. http://f1000research.com/articles/3-95/v2. PMID: 24860646.
+Dai Z, Sheridan JM,  Gearing, LJ, Moore, DL, Su, S, Wormald, S, Wilcox, S, O'Connor, L, Dickins, RA, Blewitt, ME, Ritchie, ME(2014).
+edgeR: a versatile tool for the analysis of shRNA-seq and CRISPR-Cas9 genetic screens.
+\emph{F1000Research} 3, 95.
+\url{http://f1000research.com/articles/3-95}
 }
diff --git a/src/R_add_prior_count.cpp b/src/R_add_prior_count.cpp
new file mode 100644
index 0000000..cb0a53f
--- /dev/null
+++ b/src/R_add_prior_count.cpp
@@ -0,0 +1,64 @@
+#include "matvec_check.h"
+#include "utils.h"
+#include "add_prior.h"
+
+/**** Adding a prior count to each observation. *******/
+
+SEXP R_add_prior_count(SEXP y, SEXP offset, SEXP prior) try {
+    count_holder counts(y);
+    const int num_tags=counts.get_ntags();
+    const int num_libs=counts.get_nlibs();
+    double* count_ptr=(double*)R_alloc(num_libs, sizeof(double));
+
+    add_prior AP(num_tags, num_libs, prior, offset, true, true);
+    const double* const out_prior=AP.get_priors();
+    const double* const out_off=AP.get_offsets();
+    const bool priors_are_the_same=AP.same_across_rows();
+
+    SEXP output=PROTECT(allocVector(VECSXP, 2));
+    try {
+        SET_VECTOR_ELT(output, 0, allocMatrix(REALSXP, num_tags, num_libs));
+        double* outptr=REAL(VECTOR_ELT(output, 0));
+
+        double* libptr;
+        if (priors_are_the_same) {
+            // Just doing this once to save time, if they are all the same.
+            AP.fill_and_next();
+            SET_VECTOR_ELT(output, 1, allocVector(REALSXP, num_libs));
+            libptr=REAL(VECTOR_ELT(output, 1));
+            std::copy(out_off, out_off+num_libs, libptr);
+        } else {
+            SET_VECTOR_ELT(output, 1, allocMatrix(REALSXP, num_tags, num_libs));
+            libptr=REAL(VECTOR_ELT(output, 1));
+        }
+
+        // Adding a library size-adjusted prior to each count.
+        int lib;
+        for (int tag=0; tag<num_tags; ++tag) { 
+            counts.fill_and_next(count_ptr);
+
+            if (!priors_are_the_same) {
+                // Repeating with the next set of priors/offsets, and storing the new offsets.
+                AP.fill_and_next();
+                for (lib=0; lib<num_libs; ++lib) {
+                    libptr[lib*num_tags]=out_off[lib];
+                }
+                ++libptr;
+            }
+
+            for (lib=0; lib<num_libs; ++lib) {
+                outptr[lib*num_tags]=count_ptr[lib] + out_prior[lib];
+            }
+            ++outptr;
+        }
+    } catch (std::exception& e) {
+        UNPROTECT(1);
+        throw;
+    }
+
+    UNPROTECT(1);
+    return output;
+} catch (std::exception& e) {
+    return mkString(e.what());
+}
+
diff --git a/src/R_add_repeat_matrices.cpp b/src/R_add_repeat_matrices.cpp
new file mode 100644
index 0000000..8e6c589
--- /dev/null
+++ b/src/R_add_repeat_matrices.cpp
@@ -0,0 +1,41 @@
+#include "matvec_check.h"
+#include "utils.h"
+
+SEXP R_add_repeat_matrices(SEXP x, SEXP y, SEXP nr, SEXP nc) try {
+    if (!isInteger(nr) || LENGTH(nr)!=1) { throw std::runtime_error("number of rows should be an integer scalar"); }
+    const int nrows=asInteger(nr);
+    if (!isInteger(nc) || LENGTH(nc)!=1) { throw std::runtime_error("number of columns should be an integer scalar"); }
+    const int ncols=asInteger(nc);
+
+    matvec_check allx(x, nrows, ncols);
+    const double* const xptr2=allx.access();
+    matvec_check ally(y, nrows, ncols);
+    const double* const yptr2=ally.access();
+
+    SEXP output=PROTECT(allocVector(VECSXP, 3));
+    try {
+        SET_VECTOR_ELT(output, 0, allocMatrix(REALSXP, nrows, ncols));
+        double* optr=REAL(VECTOR_ELT(output, 0));
+        int tag, lib;
+
+        for (tag=0; tag<nrows; ++tag) {
+            for (lib=0; lib<ncols; ++lib) {
+                optr[lib*nrows] = xptr2[lib] + yptr2[lib];
+            }
+            ++optr;
+            allx.advance();
+            ally.advance();
+        }
+
+        SET_VECTOR_ELT(output, 1, ScalarLogical(allx.is_row_repeated() & ally.is_row_repeated()));
+        SET_VECTOR_ELT(output, 2, ScalarLogical(allx.is_col_repeated() & ally.is_col_repeated()));
+    } catch (std::exception& e) {
+        UNPROTECT(1);
+        throw;
+    }
+
+    UNPROTECT(1);
+    return output;
+} catch (std::exception& e) {
+    return mkString(e.what());
+}
diff --git a/src/R_ave_log_cpm.cpp b/src/R_ave_log_cpm.cpp
new file mode 100644
index 0000000..08df056
--- /dev/null
+++ b/src/R_ave_log_cpm.cpp
@@ -0,0 +1,62 @@
+#include "glm.h"
+#include "add_prior.h"
+#include "matvec_check.h"
+
+SEXP R_ave_log_cpm(SEXP y, SEXP offset, SEXP prior, SEXP disp, SEXP weights, SEXP max_iterations, SEXP tolerance) try {
+    count_holder counts(y);
+    const int num_tags=counts.get_ntags();
+    const int num_libs=counts.get_nlibs();
+    double* count_ptr=(double*)R_alloc(num_libs, sizeof(double));
+
+    add_prior AP(num_tags, num_libs, prior, offset, true, true);
+    const double* const out_prior=AP.get_priors();
+    const double* const out_off=AP.get_offsets();
+    const bool priors_are_the_same=AP.same_across_rows();
+
+    // Other CompressedMatrix stuff.
+    matvec_check alld(disp, num_tags, num_libs);
+    const double* const dptr2=alld.access();
+    matvec_check allw(weights, num_tags, num_libs);
+    const double* const wptr2=allw.access();
+    
+    // GLM fitting specifications
+    const int maxit=asInteger(max_iterations);
+    const double tol=asReal(tolerance);
+
+    SEXP output=PROTECT(allocVector(REALSXP, num_tags));
+    try {
+        double* optr=REAL(output);
+        if (priors_are_the_same) {
+            AP.fill_and_next();
+        }
+        
+        int lib;
+        for (int tag=0; tag<num_tags; ++tag) {
+            counts.fill_and_next(count_ptr);
+            if (!priors_are_the_same) {
+                AP.fill_and_next();
+            }
+
+            // Adding the current set of priors.
+            for (lib=0; lib<num_libs; ++lib) {
+                count_ptr[lib]+=out_prior[lib];
+            }
+
+            // Fitting a one-way layout.
+            std::pair<double,bool> fit=glm_one_group(num_libs, maxit, tol, out_off, wptr2, count_ptr, dptr2, NA_REAL);
+            optr[tag]=(fit.first + LNmillion)/LNtwo;
+
+            allw.advance();
+            alld.advance();
+        }
+
+    } catch (std::exception& e) {
+        UNPROTECT(1);
+        throw;
+    }
+
+    UNPROTECT(1);
+    return output;
+} catch (std::exception& e) {
+    return mkString(e.what());
+}
diff --git a/src/R_calculate_cpm.cpp b/src/R_calculate_cpm.cpp
new file mode 100644
index 0000000..8b5c22e
--- /dev/null
+++ b/src/R_calculate_cpm.cpp
@@ -0,0 +1,86 @@
+#include "matvec_check.h"
+#include "utils.h"
+#include "add_prior.h"
+
+/**** Calculating the CPMs in cpm.default with log=TRUE, but more memory-efficiently. *******/
+
+SEXP R_calculate_cpm_log (SEXP y, SEXP libsize, SEXP prior) try {
+    count_holder counts(y);
+    const int num_tags=counts.get_ntags();
+    const int num_libs=counts.get_nlibs();
+    double* count_ptr=(double*)R_alloc(num_libs, sizeof(double));
+
+    add_prior AP(num_tags, num_libs, prior, libsize, false, true);
+    const double* const out_prior=AP.get_priors();
+    const double* const out_off=AP.get_offsets();
+    const bool priors_are_the_same=AP.same_across_rows();
+
+    SEXP output=PROTECT(allocMatrix(REALSXP, num_tags, num_libs));
+    try {
+        double* outptr=REAL(output);
+        if (priors_are_the_same) { // Just doing this once to save time, if they're all the same.
+            AP.fill_and_next();
+        }
+
+        // Adding a library size-adjusted prior to each count.
+        int lib;
+        for (int tag=0; tag<num_tags; ++tag) { 
+            counts.fill_and_next(count_ptr);
+
+            if (!priors_are_the_same) { // Repeating with the next set of priors/offsets.
+                AP.fill_and_next();
+            }
+
+            for (lib=0; lib<num_libs; ++lib) {
+                double& curval=outptr[lib*num_tags];
+                curval=count_ptr[lib] + out_prior[lib];
+                curval=std::log(curval)-out_off[lib]+LNmillion;
+                curval/=LNtwo;
+            }
+            ++outptr;
+        }
+    } catch (std::exception& e) {
+        UNPROTECT(1);
+        throw;
+    }
+    UNPROTECT(1);
+    return output;
+} catch (std::exception& e) {
+    return mkString(e.what());
+}
+
+/**** Calculating the CPMs in cpm.default with log=FALSE, but more memory-efficiently. *******/
+
+SEXP R_calculate_cpm_raw (SEXP y, SEXP libsize) try {
+    count_holder counts(y);
+    const int num_tags=counts.get_ntags();
+    const int num_libs=counts.get_nlibs();
+    double* count_ptr=(double*)R_alloc(num_libs, sizeof(double));
+    matvec_check allL(libsize, num_tags, num_libs);
+    const double* const lptr2=allL.access();
+
+    SEXP output=PROTECT(allocMatrix(REALSXP, num_tags, num_libs));
+    try {
+        double* outptr=REAL(output);
+        int lib;
+        for (int tag=0; tag<num_tags; ++tag) { 
+            counts.fill_and_next(count_ptr);
+
+            for (lib=0; lib<num_libs; ++lib) {
+                outptr[lib*num_tags]=count_ptr[lib]/(lptr2[lib]/one_million);
+            }
+            ++outptr;
+            allL.advance();
+        }
+    } catch (std::exception& e) {
+        UNPROTECT(1);
+        throw;
+    }
+    UNPROTECT(1);
+    return output;
+} catch (std::exception& e) {
+    return mkString(e.what());
+}
+
+
+
diff --git a/src/R_check_counts.cpp b/src/R_check_counts.cpp
new file mode 100644
index 0000000..7dac4bc
--- /dev/null
+++ b/src/R_check_counts.cpp
@@ -0,0 +1,137 @@
+#include "matvec_check.h"
+#include "utils.h"
+
+bool isNA(int x) {
+    return x==NA_INTEGER;
+}
+
+bool isNA(double x) {
+    return !R_FINITE(x);
+}
+
+template <typename T>
+SEXP check_counts (const T* ptr, const int len) {
+    int allzero=1;
+    for (int i=0; i<len; ++i) {
+        const T& curval=ptr[i];
+        if (isNA(curval)) {
+            throw std::runtime_error("missing values not supported");
+        }
+        if (curval<0) {
+            throw std::runtime_error("negative counts not supported");
+        }
+        if (curval!=0) {
+            allzero=0;
+        }
+    }
+    return ScalarLogical(allzero);
+}
+
+SEXP R_check_counts(SEXP y) try {
+    count_holder counts(y);
+    const int total_size=counts.get_ntags()*counts.get_nlibs();
+    if (counts.is_data_integer()) {
+        return check_counts<int>(counts.get_raw_int(), total_size);
+    } else {
+        return check_counts<double>(counts.get_raw_double(), total_size);
+    }
+} catch (std::exception& e) {
+    return mkString(e.what());
+}
+
+SEXP R_check_finite (SEXP values, SEXP name) try {
+    if (!isReal(values)) { throw std::runtime_error("should be double-precision"); }
+    const int nobs=LENGTH(values);
+    const double* vptr=REAL(values);
+    for (int o=0; o<nobs; ++o) {
+        const double& curval=vptr[o];
+        if (!R_FINITE(curval)) {
+            throw std::runtime_error("should be finite values");
+        }
+    }
+    return ScalarLogical(1);
+} catch (std::exception& e) {
+    if (!isString(name) || LENGTH(name)!=1) { throw std::runtime_error("value-type name should be a string"); }
+    std::stringstream final;
+    final << CHAR(STRING_ELT(name, 0)) << " " << e.what();
+    return mkString(final.str().c_str());
+}
+
+SEXP R_check_positive (SEXP values, SEXP name) try {
+    if (!isReal(values)) { throw std::runtime_error("should be double-precision"); }
+    const int nobs=LENGTH(values);
+    const double* vptr=REAL(values);
+    for (int o=0; o<nobs; ++o) {
+        const double& curval=vptr[o];
+        if (!R_FINITE(curval) || curval <= 0) {
+            throw std::runtime_error("should be finite positive values");
+        }
+    }
+    return ScalarLogical(1);
+} catch (std::exception& e) {
+     if (!isString(name) || LENGTH(name)!=1) { throw std::runtime_error("value-type name should be a string"); }
+    std::stringstream final;
+    final << CHAR(STRING_ELT(name, 0)) << " " << e.what();
+    return mkString(final.str().c_str());
+}
+
+SEXP R_check_nonnegative (SEXP values, SEXP name) try {
+    if (!isReal(values)) { throw std::runtime_error("should be double-precision"); }
+    const int nobs=LENGTH(values);
+    const double* vptr=REAL(values);
+    for (int o=0; o<nobs; ++o) {
+        const double& curval=vptr[o];
+        if (!R_FINITE(curval) || curval < 0) {
+            throw std::runtime_error("should be finite non-negative values");
+        }
+    }
+    return ScalarLogical(1);
+} catch (std::exception& e) {
+     if (!isString(name) || LENGTH(name)!=1) { throw std::runtime_error("value-type name should be a string"); }
+    std::stringstream final;
+    final << CHAR(STRING_ELT(name, 0)) << " " << e.what();
+    return mkString(final.str().c_str());
+}
+
+template <typename T>
+SEXP check_zero_fitted(const T* yptr, const int num_tags, const int num_libs, SEXP fitted, SEXP tolerance) {
+    const int total_len=num_tags*num_libs;
+
+    if (!isReal(fitted)) { throw std::runtime_error("fitted values must be double-precision"); }
+    if (LENGTH(fitted)!=num_tags*num_libs) { throw std::runtime_error("dimensions of fitted and count matrices are not equal"); }
+    const double* fptr=REAL(fitted);
+
+    if (!isReal(tolerance) || LENGTH(tolerance)!=1) { throw std::runtime_error("tolerance must be a double-precision scalar"); }
+    const double tol=asReal(tolerance);
+
+    SEXP output=PROTECT(allocMatrix(LGLSXP, num_tags, num_libs));
+    try {
+        int* optr=LOGICAL(output);
+        for (int i=0; i<total_len; ++i) {
+            (*optr)=((*fptr < tol) & (*yptr < tol));
+            ++optr;
+            ++fptr;
+            ++yptr;
+        }
+    } catch (std::exception& e) {
+        UNPROTECT(1);
+        throw;
+    }
+
+    UNPROTECT(1);
+    return output;
+}
+
+SEXP R_check_zero_fitted(SEXP y, SEXP fitted, SEXP tolerance) try {
+    count_holder counts(y);
+    const int num_tags=counts.get_ntags();
+    const int num_libs=counts.get_nlibs();
+
+    if (counts.is_data_integer()){ 
+        return check_zero_fitted<int>(counts.get_raw_int(), num_tags, num_libs, fitted, tolerance);
+    } else {
+        return check_zero_fitted<double>(counts.get_raw_double(), num_tags, num_libs, fitted, tolerance);
+    }
+} catch (std::exception& e) {
+    return mkString(e.what());
+}
diff --git a/src/R_check_poisson_bound.cpp b/src/R_check_poisson_bound.cpp
new file mode 100644
index 0000000..11268ed
--- /dev/null
+++ b/src/R_check_poisson_bound.cpp
@@ -0,0 +1,50 @@
+#include "matvec_check.h"
+
+/* Checks whether the variance is below the Poisson bound. */
+
+SEXP R_check_poisson_bound (SEXP fitted, SEXP disp, SEXP s2) try {
+    if (!isReal(fitted)) { throw std::runtime_error("matrix of fitted values should be double-precision"); }
+    const double* fptr=REAL(fitted);
+
+    SEXP dims=getAttrib(fitted, R_DimSymbol);
+    if (!isInteger(dims) || LENGTH(dims)!=2) { 
+        throw std::runtime_error("matrix dimensions should be an integer vector of length 2");
+    }
+    const int num_tags=INTEGER(dims)[0], num_libs=INTEGER(dims)[1];
+    if (LENGTH(fitted)!=num_libs*num_tags) {
+        throw std::runtime_error("recorded matrix dimensions are not consistent with its length"); 
+    }
+
+    matvec_check alld(disp, num_tags, num_libs);
+    const double* const dptr2=alld.access();
+    matvec_check alls(s2, num_tags, num_libs);
+    const double* const sptr2=alls.access();
+
+    SEXP output=PROTECT(allocVector(LGLSXP, num_tags));
+    try {
+        int* optr=LOGICAL(output);
+        std::fill(optr, optr+num_tags, 0);
+        
+        int lib;
+        for (int tag=0; tag<num_tags; ++tag) {
+            for (lib=0; lib<num_libs; ++lib) {
+                if ((fptr[lib*num_tags] * dptr2[lib] + 1) * sptr2[lib] < 1) {
+                    optr[tag]=1;
+                    break;
+                }
+            }
+
+            ++fptr;
+            alld.advance();
+            alls.advance();
+        }
+    } catch (std::exception& e) {
+        UNPROTECT(1);
+        throw;
+    }
+
+    UNPROTECT(1);
+    return output;
+} catch (std::exception& e){ 
+    return mkString(e.what());
+}
diff --git a/src/R_compute_apl.cpp b/src/R_compute_apl.cpp
new file mode 100644
index 0000000..a9936e9
--- /dev/null
+++ b/src/R_compute_apl.cpp
@@ -0,0 +1,111 @@
+#include "glm.h"
+#include "matvec_check.h"
+
+SEXP R_compute_apl(SEXP y, SEXP means, SEXP disps, SEXP weights, SEXP adjust, SEXP design) try {
+    count_holder counts(y);
+    const int num_tags=counts.get_ntags();
+    const int num_libs=counts.get_nlibs();
+    double* count_ptr=(double*)R_alloc(num_libs, sizeof(double));
+
+    // Setting up the means.
+    if (!isReal(means)) {
+        throw std::runtime_error("mean matrix must be double-precision");
+    }
+    if (LENGTH(means)!=LENGTH(y)) {
+        throw std::runtime_error("mean and count matrices must be of same size");
+    }
+    const double* mptr=REAL(means);
+
+    // Setting up the dispersions and weights.
+    matvec_check alld(disps, num_tags, num_libs);
+    const double* const dptr2=alld.access();
+    matvec_check allw(weights, num_tags, num_libs);
+    const double* const wptr2=allw.access();
+
+    // Determining whether we want to do the adjustment.
+    if (!isLogical(adjust) || LENGTH(adjust)!=1) { 
+        throw std::runtime_error("'adjust' must be a logical scalar");
+    }
+    const bool do_adjust=asLogical(adjust);
+    double* W_ptr=(double*)R_alloc(num_libs, sizeof(double));
+
+    // Setting up the design matrix and the CR adjustment object.
+    if (!isNumeric(design)) { throw std::runtime_error("design matrix must be double precision"); }
+    const int num_coefs=LENGTH(design)/num_libs;
+    if (num_coefs*num_libs!=LENGTH(design)) { throw std::runtime_error("dimensions of design matrix not consistent with number of libraries"); }
+    adj_coxreid acr(num_libs, num_coefs, REAL(design)); 
+
+    SEXP output=PROTECT(allocVector(REALSXP, num_tags));
+    try {
+        double* optr=REAL(output);
+        double loglik, r, logmur, adj;
+        int lib, index;
+        for (int tag=0; tag<num_tags; ++tag) {
+
+            /* First computing the log-likelihood. */
+            double& sum_loglik=(optr[tag]=0);
+            counts.fill_and_next(count_ptr);
+            index=0;
+            
+            for (lib=0; lib<num_libs; ++lib, index+=num_tags) {
+                const double& cury=count_ptr[lib];
+                const double& curmu=mptr[index];
+                if (curmu==0) { 
+                    continue; // Should only be possible if count is zero, where the log-likelihood would then be 0.
+                }
+
+                if (dptr2[lib] > 0) {
+                    // same as loglik <- rowSums(weights*dnbinom(y,size=1/dispersion,mu=mu,log = TRUE))
+                    r=1/dptr2[lib];
+                    logmur=std::log(curmu+r);
+                    loglik = cury*std::log(curmu) - cury*logmur + r*std::log(r) - r*logmur + lgamma(cury+r) - lgamma(cury+1) - lgamma(r); 
+                } else {
+                    // same as loglik <- rowSums(weights*dpois(y,lambda=mu,log = TRUE))
+                    loglik = cury*std::log(curmu) - curmu - lgamma(cury+1);
+                }
+                sum_loglik += loglik * wptr2[lib]; // with weights.
+            }
+            
+            if (do_adjust) {
+                /* Computing 'W', the matrix of negative binomial probabilities. 
+                 * The class computes 'XtWX' and performs an LDL decomposition 
+                 * to compute the Cox-Reid adjustment factor.
+                 */
+                for (lib=0; lib<num_libs; ++lib) {
+                    const double& curmu=mptr[lib*num_tags];
+                    W_ptr[lib] = wptr2[lib] * curmu/(1 + dptr2[lib] * curmu);
+                }
+
+                if (num_coefs==1) {
+                    adj=0;
+                    for (lib=0; lib<num_libs; ++lib) {
+                        adj+=W_ptr[lib];
+                    }
+                    adj=std::log(std::abs(adj))/2;
+                } else {
+                    std::pair<double, bool> x=acr.compute(W_ptr);
+                    if (!x.second) { 
+                        std::stringstream errout;
+                        errout << "LDL factorization failed for tag " << tag+1;
+                        throw std::runtime_error(errout.str());
+                    }
+                    adj=x.first;
+                }
+			    sum_loglik-=adj;
+			} 
+
+            ++mptr;
+            alld.advance();
+            allw.advance();
+        }
+    } catch (std::exception& e) {
+        UNPROTECT(1);
+        throw;
+    }
+
+    UNPROTECT(1);
+    return output;
+} catch (std::exception& e) {
+    return mkString(e.what());
+}
+
diff --git a/src/R_compute_nbdev.cpp b/src/R_compute_nbdev.cpp
index dd6551f..0f54c37 100644
--- a/src/R_compute_nbdev.cpp
+++ b/src/R_compute_nbdev.cpp
@@ -1,40 +1,85 @@
 #include "utils.h"
 #include "glm.h"
+#include "matvec_check.h"
 
-SEXP R_compute_nbdev (SEXP y, SEXP mu, SEXP phi) try {
-	if (!isNumeric(phi)) { throw std::runtime_error("dispersion vector should be double-precision"); }
-	const int ntags=LENGTH(phi);
-	if (!isNumeric(y)) { throw std::runtime_error("count matrix should be double-precision"); }
-	if (!isNumeric(mu)) { throw std::runtime_error("matrix of means should be double-precision"); }
-	const int nlib=LENGTH(mu)/ntags;
-	if (nlib*ntags !=LENGTH(mu)) { throw std::runtime_error("mean matrix has inconsistent dimensions"); }
-	if (LENGTH(mu)!=LENGTH(y)) { throw std::runtime_error("count and mean matrices should have same dimensions"); }
-
-	const double* yptr=REAL(y);
-	const double* mptr=REAL(mu);
-	const double* dptr=REAL(phi);
-
-	// Running through each row and computing the unit deviance, and then that sum.
-	SEXP output=PROTECT(allocMatrix(REALSXP, ntags, nlib));
-	try {
-		double* optr=REAL(output);
-		int counter;
-		for (int i=0; i<ntags; ++i) {
-			counter=0;
-			for (int j=0; j<nlib; ++j, counter+=ntags) {
-				optr[counter]=compute_unit_nb_deviance(yptr[counter], mptr[counter], dptr[i]);
-			}
-			++optr;
-			++yptr;
-			++mptr;
-		}
-	} catch (std::exception& e) {
-		UNPROTECT(1);
-		throw;
-	}
-
-	UNPROTECT(1);
-	return output;
+SEXP R_compute_nbdev (SEXP y, SEXP mu, SEXP phi, SEXP weights, SEXP dosum) try {
+    count_holder counts(y);
+    const int num_tags=counts.get_ntags();
+    const int num_libs=counts.get_nlibs();
+    double* count_ptr=(double*)R_alloc(num_libs, sizeof(double));
+
+    // Setting up means.
+	if (!isReal(mu)) { throw std::runtime_error("matrix of means should be double-precision"); }
+    if (LENGTH(mu)!=num_tags*num_libs) { throw std::runtime_error("length of means is not consistent with length of counts"); }
+    const double* mptr=REAL(mu);
+
+    // Setting up dispersions.
+    matvec_check alld(phi, num_tags, num_libs);
+    const double* const dptr2=alld.access();
+
+    // Seeing if we have to sum things together.
+    if (!isLogical(dosum) || LENGTH(dosum)!=1) {
+        throw std::runtime_error("summation specification should be a logical vector");
+    }
+    const bool sumtogether=asLogical(dosum);
+    int tag, lib, index;
+
+    if (sumtogether) {
+        // Setting up weights.
+        matvec_check allw(weights, num_tags, num_libs);
+        const double* const wptr2=allw.access();
+
+        SEXP output=PROTECT(allocVector(REALSXP, num_tags));
+        try {
+            // Running through each row and computing the unit deviance, and then computing the weighted sum.
+            double* optr=REAL(output);
+            for (tag=0; tag<num_tags; ++tag) {
+                counts.fill_and_next(count_ptr);
+
+                double& current_sumdev=(optr[tag]=0);
+                index=0;
+                for (lib=0; lib<num_libs; ++lib) {
+                    current_sumdev += compute_unit_nb_deviance(count_ptr[lib], mptr[index], dptr2[lib]) * wptr2[lib];
+                    index+=num_tags;
+                }
+
+                ++mptr;
+                alld.advance();
+                allw.advance();
+            }
+        } catch (std::exception& e) {
+            UNPROTECT(1);
+            throw;
+        }
+
+        UNPROTECT(1);
+        return output;
+    } else {
+        SEXP output=PROTECT(allocMatrix(REALSXP, num_tags, num_libs));
+        try {
+            // Computing unit deviance for each observation.
+            double* optr=REAL(output);
+            for (tag=0; tag<num_tags; ++tag) {
+                counts.fill_and_next(count_ptr);
+
+                index=0;
+                for (lib=0; lib<num_libs; ++lib) {
+                    optr[index] = compute_unit_nb_deviance(count_ptr[lib], mptr[index], dptr2[lib]);
+                    index += num_tags;
+                }
+
+                ++optr;
+                ++mptr;
+                alld.advance();
+            }
+        } catch (std::exception& e) {
+            UNPROTECT(1);
+            throw;
+        }
+
+        UNPROTECT(1);
+        return output;
+    }
 } catch(std::exception& e) {
 	return mkString(e.what());
 }
diff --git a/src/R_cr_adjust.cpp b/src/R_cr_adjust.cpp
deleted file mode 100644
index 4ed4a81..0000000
--- a/src/R_cr_adjust.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#include "glm.h"
-
-/* 'w' represents a matrix of negative binomial probabilities (i.e.
- * the prob of success/failure, a function of mean and dispersion)
- * whereas 'x' represents the design matrix. This function calculates
- * the multiplication of the matrices, then performs a Cholesky decomposition
- * to get the lower triangular matrix 'L'. The diagonal of 'L' can then
- * be used to compute the Cox-Reid adjustment factor.
- */
-SEXP R_cr_adjust (SEXP w, SEXP x, SEXP nlibs) try {
-	if (!isNumeric(w)) { throw std::runtime_error("matrix of likelihoods must be double precision"); }
-	if (!isNumeric(x)) { throw std::runtime_error("design matrix must be double precision"); }
-
-    const int num_libs=asInteger(nlibs);
-    const int num_tags=LENGTH(w)/num_libs;
-    const int num_coefs=LENGTH(x)/num_libs;
-    
-    // Setting up a couple of indices for quick access.
-    adj_coxreid acr(num_libs, num_coefs, REAL(x));
-	const double* wptr=REAL(w);
-
-   	SEXP output=PROTECT(allocVector(REALSXP, num_tags));
-    double* out_ptr=REAL(output);
-    try {
-		// Running through each tag.
-       	for (int tag=0; tag<num_tags; ++tag) {
-			std::pair<double, bool> x=acr.compute(wptr);
-			if (!x.second) { 
-				std::stringstream errout;
-				errout << "LDL factorization failed for tag " << tag+1;
-				throw std::runtime_error(errout.str());
-			}
-			out_ptr[tag]=x.first;
-			wptr+=num_libs;
-    	} 
-	} catch (std::exception& e) {
-		UNPROTECT(1);
-		throw; 
-	}
-
-    UNPROTECT(1);
-    return output;
-} catch (std::exception& e) { return mkString(e.what()); }
diff --git a/src/R_get_one_way_fitted.cpp b/src/R_get_one_way_fitted.cpp
new file mode 100644
index 0000000..e528de7
--- /dev/null
+++ b/src/R_get_one_way_fitted.cpp
@@ -0,0 +1,60 @@
+#include "glm.h"
+#include "matvec_check.h"
+
+/*** Function to compute the fitted values without a lot of temporary matrices. ***/
+
+SEXP R_get_one_way_fitted (SEXP beta, SEXP offset, SEXP groups) try { 
+    SEXP dims=getAttrib(beta, R_DimSymbol);
+    if (!isInteger(dims) || LENGTH(dims)!=2) { 
+        throw std::runtime_error("matrix dimensions should be an integer vector of length 2");
+    }
+    int num_tags=INTEGER(dims)[0];
+    int num_coefs=INTEGER(dims)[1];
+
+    if (!isReal(beta)) {
+        throw std::runtime_error("beta matrix should be double-precision");
+    }
+    if (LENGTH(beta)!=num_tags*num_coefs) {
+        throw std::runtime_error("recorded matrix dimensions are not consistent with its length"); 
+    }
+    const double* bptr=REAL(beta);
+    double* bptr2=(double*)R_alloc(num_coefs, sizeof(double));
+
+    if (!isInteger(groups)) {
+        throw std::runtime_error("grouping vector should be integer");
+    }
+    int num_libs=LENGTH(groups);
+    int* gptr=INTEGER(groups);
+
+    matvec_check allo(offset, num_tags, num_libs);
+    const double* optr2=allo.access();
+
+    SEXP output=PROTECT(allocMatrix(REALSXP, num_tags, num_libs));
+    try {
+        double* outptr=REAL(output);
+        int lib, coef;
+        for (int tag=0; tag<num_tags; ++tag) {
+            // Storing to a single vector for faster caching.
+            for (coef=0; coef<num_coefs; ++coef) {
+                bptr2[coef]=bptr[coef*num_tags];
+            }
+            ++bptr;
+
+            // Caching is going to be suboptimal, but oh well.
+            for (lib=0; lib<num_libs; ++lib) {
+                outptr[lib*num_tags]=std::exp(optr2[lib] + bptr2[gptr[lib]]);
+            }
+            ++outptr;
+            allo.advance();
+        }
+    } catch (std::exception& e) {
+        UNPROTECT(1);
+        throw;
+    }
+
+    UNPROTECT(1);
+    return output;
+} catch (std::exception& e) {
+    return mkString(e.what());
+}
+
diff --git a/src/R_initialize_levenberg.cpp b/src/R_initialize_levenberg.cpp
new file mode 100644
index 0000000..aee5b31
--- /dev/null
+++ b/src/R_initialize_levenberg.cpp
@@ -0,0 +1,206 @@
+#include "glm.h"
+#include "matvec_check.h"
+
+/* Different initialization methods for the Levenberg coefficients */
+
+const char side='L';
+const char trans_ormqr='T';
+const char uplo='U';
+const char trans_trtrs='N';
+const char diag='N';
+const int unity=1;
+
+struct QRdecomposition {
+    QRdecomposition(const double* curX, const int nrows, const int ncoefs) : X(curX), NR(nrows), NC(ncoefs) {
+        Xcopy=(double*)R_alloc(NR*NC, sizeof(double));
+        tau=(double*)R_alloc(NC, sizeof(double));
+
+        // Also setting up a vector for weights and effects.
+        weights=(double*)R_alloc(NR, sizeof(double));
+        effects=(double*)R_alloc(NR, sizeof(double));
+
+        // Setting up the workspace for dgeqrf.
+        double tmpwork;
+        lwork_geqrf=lwork_oqrmqr=-1;
+        F77_CALL(dgeqrf)(&NR, &NC, Xcopy, &NR, tau, &tmpwork, &lwork_geqrf, &info);
+
+        // Loading up the optimal WORK.
+        lwork_geqrf=tmpwork+0.5;
+        work_geqrf=(double*)R_alloc(lwork_geqrf, sizeof(double));
+
+        // Repeating for dormqr
+        F77_CALL(dormqr)(&side, &trans_ormqr, &NR, &unity, &NC, Xcopy, &NR, tau, effects, &NR, &tmpwork, &lwork_oqrmqr, &info);
+        lwork_oqrmqr=tmpwork+0.5;
+        work_oqrmqr=(double*)R_alloc(lwork_oqrmqr, sizeof(double));
+
+        return;
+    }
+
+    void store_weights(const double* w) {
+        if (w==NULL) {
+            std::fill(weights, weights+NR, 1);
+        } else {
+            for (row=0; row<NR; ++row) {
+                weights[row]=std::sqrt(w[row]);
+            }
+        }
+        return;
+    }
+
+    void decompose() {
+        std::copy(X, X+NR*NC, Xcopy);
+        index=0;
+        for (coef=0; coef<NC; ++coef) {
+            for (row=0; row<NR; ++row) {
+                Xcopy[index]*=weights[row];
+                ++index;
+            }
+        }
+ 
+        F77_CALL(dgeqrf)(&NR, &NC, Xcopy, &NR, tau, work_geqrf, &lwork_geqrf, &info);
+        if (info) {
+            throw std::runtime_error("QR decomposition failed");
+        }
+       return;
+    } 
+
+    void solve(const double * y) {
+        for (row=0; row<NR; ++row) {
+            effects[row]=y[row]*weights[row];
+        }
+
+        F77_CALL(dormqr)(&side, &trans_ormqr, &NR, &unity, &NC, Xcopy, &NR, tau, effects, &NR, work_oqrmqr, &lwork_oqrmqr, &info);
+        if (info) {
+            throw std::runtime_error("Q**T multiplication failed");
+        }
+
+        F77_CALL(dtrtrs)(&uplo, &trans_trtrs, &diag, &NC, &unity, Xcopy, &NR, effects, &NR, &info);
+        if (info) {
+            throw std::runtime_error("failed to solve the triangular system");
+        }
+
+        return;
+    }
+
+    const double* X;
+    double * Xcopy, * tau, * effects, *weights, *work_geqrf, *work_oqrmqr;
+    const int NR, NC;
+    int lwork_geqrf, lwork_oqrmqr, info;
+    int row, coef, index;
+};
+
+
+SEXP R_get_levenberg_start(SEXP y, SEXP offset, SEXP disp, SEXP weights, SEXP design, SEXP use_null) try {
+ 	if (!isReal(design)) { throw  std::runtime_error("design matrix should be double precision"); }
+    count_holder counts(y);
+    const int num_tags=counts.get_ntags();
+    const int num_libs=counts.get_nlibs();
+    double* count_ptr=(double*)R_alloc(num_libs, sizeof(double));
+
+    // Getting and checking the dimensions of the arguments.    
+    const int dlen=LENGTH(design);
+    if (dlen%num_libs!=0) { throw std::runtime_error("size of design matrix is incompatible with number of libraries"); }
+    const int num_coefs=dlen/num_libs;
+
+    // Initializing pointers to the assorted features.
+    const double *design_ptr=REAL(design);
+    matvec_check allo(offset, num_tags, num_libs);
+    const double* const optr2=allo.access();
+    matvec_check allw(weights, num_tags, num_libs);
+    const double* const wptr2=allw.access();
+    matvec_check alld(disp, num_tags, num_libs);
+    const double* const dptr2=alld.access();
+
+    // Determining what type of algorithm is to be used.
+    if (!isLogical(use_null) || LENGTH(use_null)!=1) {
+        throw std::runtime_error("'use_null' specification should be a logical scalar");
+    }
+    const bool null_method=asLogical(use_null);
+
+    SEXP output_beta=PROTECT(allocMatrix(REALSXP, num_tags, num_coefs));
+    try {
+        double* obptr=REAL(output_beta);
+        QRdecomposition QR(design_ptr, num_libs, num_coefs);
+        double* tmp_exprs=(double*)R_alloc(num_libs, sizeof(double));
+        int lib, coef;
+
+        if (null_method) {
+            QR.store_weights(NULL);
+            QR.decompose();
+            double sum_exprs=0, sum_weight=0, curN, curweight;
+            for (int tag=0; tag<num_tags; ++tag) {
+                counts.fill_and_next(count_ptr);
+               
+                // Computing weighted average of the count:library size ratios.
+                sum_weight=0; 
+                sum_exprs=0;
+                for (lib=0; lib<num_libs; ++lib) {
+                    curN=std::exp(optr2[lib]);
+                    curweight=wptr2[lib]*curN/(1 + dptr2[lib] * curN);
+                    sum_exprs += count_ptr[lib] * curweight / curN;
+                    sum_weight += curweight;
+                }
+                std::fill(tmp_exprs, tmp_exprs + num_libs, std::log(sum_exprs/sum_weight));
+
+                // Performing the QR decomposition and taking the solution.
+                QR.solve(tmp_exprs);
+                for (coef=0; coef<num_coefs; ++coef) {
+                    obptr[coef*num_tags]=QR.effects[coef];
+                }
+
+                ++obptr;
+                allo.advance();
+                allw.advance();
+                alld.advance();
+            }
+        } else {
+            bool weights_are_the_same=allw.is_row_repeated();
+            if (weights_are_the_same) {
+                QR.store_weights(wptr2);
+                QR.decompose();
+            }
+
+            // Finding the delta.
+            double delta;
+            if (counts.is_data_integer()) {
+                const int* ciptr=counts.get_raw_int();
+                delta=*std::max_element(ciptr, ciptr+num_libs*num_tags);
+            } else {
+                const double* cdptr=counts.get_raw_double();
+                delta=*std::max_element(cdptr, cdptr+num_libs*num_tags);
+            }
+            delta=std::min(delta, 1.0/6);
+
+            for (int tag=0; tag<num_tags; ++tag) {
+                if (!weights_are_the_same) {
+                    QR.store_weights(wptr2);
+                    QR.decompose();
+                    allw.advance();
+                }
+                counts.fill_and_next(count_ptr);
+              
+                // Computing normalized log-expression values.
+                for (lib=0; lib<num_libs; ++lib) {
+                    tmp_exprs[lib]=std::log(std::max(delta, count_ptr[lib])) - optr2[lib];
+                }
+
+                // Performing the QR decomposition and taking the solution.
+                QR.solve(tmp_exprs);
+                for (coef=0; coef<num_coefs; ++coef) {
+                    obptr[coef*num_tags]=QR.effects[coef];
+                }
+
+                ++obptr;
+                allo.advance();
+            }
+        }
+    } catch (std::exception& e){
+        UNPROTECT(1);
+        throw;
+    }
+
+    UNPROTECT(1);
+    return output_beta;
+} catch (std::exception& e) {
+    return mkString(e.what());
+}
diff --git a/src/R_levenberg.cpp b/src/R_levenberg.cpp
index 5aa701b..9bb0742 100644
--- a/src/R_levenberg.cpp
+++ b/src/R_levenberg.cpp
@@ -1,56 +1,36 @@
 #include "glm.h"
 #include "matvec_check.h"
 
-SEXP R_levenberg (SEXP nlib, SEXP ntag, SEXP design, SEXP counts, SEXP disp, SEXP offset, SEXP weights,
-		SEXP beta, SEXP fitted, SEXP tol, SEXP maxit) try {
-	if (!isNumeric(design)) { throw  std::runtime_error("design matrix should be double precision"); }
-	if (!isNumeric(disp)) { throw std::runtime_error("dispersion matrix should be double precision"); }
-	if (!isNumeric(beta)) { throw std::runtime_error("matrix of start values for coefficients should be double precision"); }
-	if (!isNumeric(fitted)) { throw std::runtime_error("matrix of starting fitted values should be double precision"); }
-    const int num_tags=asInteger(ntag);
-    const int num_libs=asInteger(nlib);
-
-	// Checking the count matrix.
-    const double *cdptr=NULL;
-    const int* ciptr=NULL;
+SEXP R_levenberg (SEXP design, SEXP y, SEXP disp, SEXP offset, SEXP weights,
+		SEXP beta, SEXP tol, SEXP maxit) try {
+    count_holder counts(y);
+    const int num_tags=counts.get_ntags();
+    const int num_libs=counts.get_nlibs();
     double* count_ptr=(double*)R_alloc(num_libs, sizeof(double));
-    bool is_integer=isInteger(counts);
-    if (is_integer) {
-        ciptr=INTEGER(counts);
-    } else {
-        if (!isNumeric(counts)) { throw std::runtime_error("count matrix must be integer or double-precision"); }
-        cdptr=REAL(counts); 
-    }
 	
     // Getting and checking the dimensions of the arguments.    
+	if (!isReal(design)) { throw  std::runtime_error("design matrix should be double precision"); }
     const int dlen=LENGTH(design);
-	const int clen=LENGTH(counts);
     if (dlen%num_libs!=0) { throw std::runtime_error("size of design matrix is incompatible with number of libraries"); }
     const int num_coefs=dlen/num_libs;
-    if (clen!=num_tags*num_libs) { 
-        throw std::runtime_error("dimensions of the count matrix are not as specified");
-    } else if (LENGTH(beta)!=num_tags*num_coefs) {
+    if (!isReal(beta)) { throw std::runtime_error("starting coefficient values must be positive"); }
+    if (LENGTH(beta)!=num_tags*num_coefs) {
         throw std::runtime_error("dimensions of the beta matrix do not match to the number of tags and coefficients");
-    } else if (LENGTH(fitted)!=clen) {
-        throw std::runtime_error("dimensions of the fitted matrix do not match those of the count matrix");
-    } else if (LENGTH(disp)!=clen) { 
-		throw std::runtime_error("dimensions of dispersion matrix is not as specified"); 
-	} 
+    } 
 
     // Initializing pointers to the assorted features.
-    const double* beta_ptr=REAL(beta), 
-		  *design_ptr=REAL(design), 
-	  	  *fitted_ptr=REAL(fitted), 
-		  *disp_ptr=REAL(disp);
-    matvec_check allo(num_libs, num_tags, offset, true, "offset");
-    const double* const* optr2=allo.access();
-    matvec_check allw(num_libs, num_tags, weights, true, "weight", 1);
-    const double* const* wptr2=allw.access();
+    const double *design_ptr=REAL(design), *bptr=REAL(beta);
+    matvec_check allo(offset, num_tags, num_libs);
+    const double* const optr2=allo.access();
+    matvec_check allw(weights, num_tags, num_libs);
+    const double* const wptr2=allw.access();
+    matvec_check alld(disp, num_tags, num_libs);
+    const double* const dptr2=alld.access();
 
     // Initializing output cages.
     SEXP output=PROTECT(allocVector(VECSXP, 5));
-   	SET_VECTOR_ELT(output, 0, allocMatrix(REALSXP, num_coefs, num_tags)); // beta (transposed)
-   	SET_VECTOR_ELT(output, 1, allocMatrix(REALSXP, num_libs, num_tags)); // new fitted (transposed)
+   	SET_VECTOR_ELT(output, 0, allocMatrix(REALSXP, num_tags, num_coefs)); // beta 
+   	SET_VECTOR_ELT(output, 1, allocMatrix(REALSXP, num_tags, num_libs)); // new fitted 
 	SET_VECTOR_ELT(output, 2, allocVector(REALSXP, num_tags));
 	SET_VECTOR_ELT(output, 3, allocVector(INTSXP, num_tags));
 	SET_VECTOR_ELT(output, 4, allocVector(LGLSXP, num_tags));
@@ -59,41 +39,41 @@ SEXP R_levenberg (SEXP nlib, SEXP ntag, SEXP design, SEXP counts, SEXP disp, SEX
     double* dev_ptr=REAL(VECTOR_ELT(output, 2));
     int* iter_ptr=INTEGER(VECTOR_ELT(output, 3));
     int* fail_ptr=LOGICAL(VECTOR_ELT(output, 4));
+
+    double* tmp_beta_ptr=(double*)R_alloc(num_coefs, sizeof(double));
+    double* tmp_fitted_ptr=(double*)R_alloc(num_libs, sizeof(double));
+
 	try {
        	// Running through each tag and fitting the NB GLM.
 		glm_levenberg glbg(num_libs, num_coefs, design_ptr, asInteger(maxit), asReal(tol));
+        int lib, coef;
     	for (int tag=0; tag<num_tags; ++tag) {
+            counts.fill_and_next(count_ptr);
+	
+			// Copying elements to the tmp_beta as these are modified in-place.
+            for (coef=0; coef<num_coefs; ++coef) {
+                tmp_beta_ptr[coef]=bptr[coef*num_tags];
+            }
+            ++bptr;
 
-			// Copying integer/double counts to a new vector.
-            if (is_integer) {
-				for (int i=0; i<num_libs; ++i) { count_ptr[i]=double(ciptr[i]); }
-				ciptr+=num_libs;
-			} else {
-				for (int i=0; i<num_libs; ++i) { count_ptr[i]=cdptr[i]; }
-				cdptr+=num_libs;
-			}
-		
-			// Copying elements to the new_beta and new_fitted, so output is automatically stored.
-			for (int i=0; i<num_libs; ++i) { new_fitted_ptr[i]=fitted_ptr[i]; }
-			for (int i=0; i<num_coefs; ++i) { new_beta_ptr[i]=beta_ptr[i]; }
-			if (glbg.fit(*optr2, count_ptr, 
-#ifdef WEIGHTED
-						*wptr2,
-#endif
-						disp_ptr, new_fitted_ptr, new_beta_ptr)) {
-				std::stringstream errout;
+			if (glbg.fit(optr2, count_ptr, wptr2, dptr2, tmp_fitted_ptr, tmp_beta_ptr)) {
+                std::stringstream errout;
 				errout<< "solution using Cholesky decomposition failed for tag " << tag+1;
 				throw std::runtime_error(errout.str());
 			} 
 			allo.advance();
 			allw.advance();
+            alld.advance();
 			
-			disp_ptr+=num_libs;
-			fitted_ptr+=num_libs;
-			new_fitted_ptr+=num_libs;
-			beta_ptr+=num_coefs;
-			new_beta_ptr+=num_coefs;
-			
+            for (lib=0; lib<num_libs; ++lib) {
+			    new_fitted_ptr[lib*num_tags]=tmp_fitted_ptr[lib];
+            }
+            ++new_fitted_ptr;
+            for (coef=0; coef<num_coefs; ++coef) {
+                new_beta_ptr[coef*num_tags]=tmp_beta_ptr[coef];
+            }
+            ++new_beta_ptr;
+
 			*(dev_ptr++)=glbg.get_deviance();
 			*(iter_ptr++)=glbg.get_iterations();
 			*(fail_ptr++)=glbg.is_failure();
@@ -106,3 +86,4 @@ SEXP R_levenberg (SEXP nlib, SEXP ntag, SEXP design, SEXP counts, SEXP disp, SEX
     UNPROTECT(1);
     return output;   
 } catch (std::exception& e) { return mkString(e.what()); }
+
diff --git a/src/R_one_group.cpp b/src/R_one_group.cpp
index 83d36cd..81b5b44 100644
--- a/src/R_one_group.cpp
+++ b/src/R_one_group.cpp
@@ -1,78 +1,108 @@
 #include "glm.h"
 #include "matvec_check.h"
 
-SEXP R_one_group (SEXP nlib, SEXP ntag, SEXP y, SEXP disp, SEXP offsets, SEXP weights, SEXP max_iterations, SEXP tolerance, SEXP beta) try {
-    const int num_tags=asInteger(ntag);
-    const int num_libs=asInteger(nlib);
-    if (!isNumeric(disp)) { throw std::runtime_error("dispersion matrix must be double precision"); }
-    if (num_tags*num_libs !=LENGTH(disp)) { throw std::runtime_error("dimensions of dispersion vector is not equal to number of tags"); }
-	if (num_tags*num_libs != LENGTH(y) ) { throw std::runtime_error("dimensions of the count table are not as specified"); }  // Checking that it is an exact division.
-  
-	if (!isNumeric(beta)) { throw std::runtime_error("beta start vector must be double precision"); }
-	const int blen=LENGTH(beta);
-	const bool use_old_start=(blen!=0);
-	if (use_old_start && blen!=num_tags) { 
-		throw std::runtime_error("non-empty start vector must have length equal to the number of tags"); 		
-	}
-	const double* bsptr=REAL(beta);
-   
+template <typename T>
+bool is_array_equal_to(const T* x, const int n, const bool rep, const T& v) {
+    if (rep) {
+        return (n>0 && x[0]==v);
+    } else {
+        for (int i=0; i<n; ++i) {
+            if (x[i]!=v) { return false; }
+        }
+        return true;
+    }
+}
+
+SEXP R_one_group (SEXP y, SEXP disp, SEXP offsets, SEXP weights, SEXP max_iterations, SEXP tolerance, SEXP beta) try {
+    count_holder counts(y);
+    const int num_tags=counts.get_ntags();
+    const int num_libs=counts.get_nlibs();
+  	double* yptr=(double*)R_alloc(num_libs, sizeof(double));
+
+    // Setting up assorted input matrices.
+    matvec_check allo(offsets, num_tags, num_libs);
+	const double* const optr2=allo.access();
+	matvec_check allw(weights, num_tags, num_libs);
+	const double* const wptr2=allw.access();
+    matvec_check alld(disp, num_tags, num_libs);
+	const double* const dptr2=alld.access();
+    matvec_check allb(beta, num_tags, 1); // only one coefficient.
+    const double* const bptr2=allb.access();
+
+    // GLM iterations.
 	const int maxit=asInteger(max_iterations);
 	const double tol=asReal(tolerance);
- 
-    // Setting up some iterators. We provide some flexibility to detecting numeric-ness.
-	double *ydptr=NULL;
-	int* yiptr=NULL;
-	double* yptr=(double*)R_alloc(num_libs, sizeof(double));
-	bool is_integer=isInteger(y);
-	if (is_integer) { 
-		yiptr=INTEGER(y); 
-	} else { 
-		if (!isNumeric(y)) { throw std::runtime_error("count matrix must be integer or double-precision"); }
-		ydptr=REAL(y); 
-	}
-    matvec_check allo(num_libs, num_tags, offsets, false, "offset");
-	const double* const* optr2=allo.access();
-	matvec_check allw(num_libs, num_tags, weights, false, "weight", 1);
-	const double* const* wptr2=allw.access();
-	const double* dptr=REAL(disp);
 
-    // Setting up beta for output. 
+    // Setting up beta for output.
 	SEXP output=PROTECT(allocVector(VECSXP, 2));
 	SET_VECTOR_ELT(output, 0, allocVector(REALSXP, num_tags));
 	SET_VECTOR_ELT(output, 1, allocVector(LGLSXP, num_tags));
     double* bptr=REAL(VECTOR_ELT(output, 0));
 	int* cptr=LOGICAL(VECTOR_ELT(output, 1));
 	try {
-        
+
+        // Preparing for possible Poisson sums.
+        bool disp_is_zero=true, weight_is_one=true;
+        double sum_counts, sum_lib=0;
+        int lib;
+        if (allo.is_row_repeated()) {
+            for (lib=0; lib<num_libs; ++lib) {
+                sum_lib+=std::exp(optr2[lib]);
+            }
+        }
+        if (alld.is_row_repeated()) {
+            disp_is_zero=is_array_equal_to<double>(dptr2, num_libs, alld.is_col_repeated(), 0);
+        }
+        if (allw.is_row_repeated()) {
+            weight_is_one=is_array_equal_to<double>(wptr2, num_libs, allw.is_col_repeated(), 1);
+        }
+
     	// Iterating through tags and fitting.
-    	int lib=0;
     	for (int tag=0; tag<num_tags; ++tag) {
-			if (is_integer) { 
-				for (lib=0; lib<num_libs; ++lib) { yptr[lib]=double(yiptr[lib]); }	
-				yiptr+=num_libs;
-			} else {
-				yptr=ydptr;
-				ydptr+=num_libs;
-			}
-			std::pair<double, bool> out=glm_one_group(num_libs, maxit, tol, *optr2,
-#ifdef WEIGHTED					
-					*wptr2,
-#endif					
-					yptr, dptr, (use_old_start ? bsptr[tag] : R_NaReal));
+            counts.fill_and_next(yptr);
+
+            // Checking for the Poisson special case with all-unity weights and all-zero dispersions.
+            if (!alld.is_row_repeated()) {
+                disp_is_zero=is_array_equal_to<double>(dptr2, num_libs, alld.is_col_repeated(), 0);
+            }
+            if (!allw.is_row_repeated()) {
+                weight_is_one=is_array_equal_to<double>(wptr2, num_libs, allw.is_col_repeated(), 1);
+            }
+
+            if (disp_is_zero && weight_is_one) {
+                if (!allo.is_row_repeated()) {
+                    // Only recalculate sum of library sizes if it has changed.
+                    sum_lib=0;
+                    for (lib=0; lib<num_libs; ++lib) { sum_lib+=std::exp(optr2[lib]); }
+                }
+
+                sum_counts=0;
+                for (lib=0; lib<num_libs; ++lib) { sum_counts+=yptr[lib]; }
+                if (sum_counts==0) {
+                    bptr[tag]=R_NegInf;
+                } else {
+                    bptr[tag]=std::log(sum_counts/sum_lib);
+                }
+                cptr[tag]=true;
+            } else {
+                // Otherwise going through NR iterations.
+                std::pair<double, bool> out=glm_one_group(num_libs, maxit, tol, optr2, wptr2, yptr, dptr2, *bptr2);
+                bptr[tag]=out.first;
+                cptr[tag]=out.second;
+            }
 
-			bptr[tag]=out.first;
-			cptr[tag]=out.second;
-			dptr+=num_libs;
 			allo.advance();
 			allw.advance();
+            alld.advance();
+            allb.advance();
     	}
-	} catch (std::exception& e) { 
+	} catch (std::exception& e) {
 		UNPROTECT(1);
-		throw; 
+		throw;
 	}
 
 	// Returning everything as a list.
-    UNPROTECT(1); 
+    UNPROTECT(1);
     return output;
 } catch (std::exception& e) {
 	return mkString(e.what());
diff --git a/src/add_prior.cpp b/src/add_prior.cpp
new file mode 100644
index 0000000..3faa1b0
--- /dev/null
+++ b/src/add_prior.cpp
@@ -0,0 +1,56 @@
+#include "add_prior.h"
+
+add_prior::add_prior(int nt, int nl, SEXP priors, SEXP offsets, bool login, bool logout) : num_tags(nt), num_libs(nl),
+        allp(priors, num_tags, num_libs), allo(offsets, num_tags, num_libs), pptr2(allp.access()), optr2(allo.access()),
+        logged_in(login), logged_out(logout), tagdex(0) {
+    adj_prior=(double*)R_alloc(num_libs, sizeof(double));
+    adj_libs=(double*)R_alloc(num_libs, sizeof(double));
+    return;
+}
+
+void add_prior::fill_and_next() {
+    if (same_across_rows() && tagdex!=0) {
+        // Skipping if all rows are the same, and we've already filled it in once.
+        return;
+    } else if (tagdex>=num_tags) {
+        throw std::runtime_error("number of increments exceeds available number of tags");
+    }
+
+    ave_lib=0;
+    for (lib=0; lib<num_libs; ++lib) {
+        if (logged_in) { // unlogging to get library sizes, if they were originally logged offsets.
+            adj_libs[lib]=std::exp(optr2[lib]);
+        } else {
+            adj_libs[lib]=optr2[lib];
+        }
+        ave_lib+=adj_libs[lib];
+    }
+    ave_lib/=num_libs;
+
+    // Computing the adjusted prior count for each library.
+    for (lib=0; lib<num_libs; ++lib) {
+        adj_prior[lib]=pptr2[lib]*adj_libs[lib]/ave_lib;
+    }
+
+    // Adding it twice back to the library size, and log-transforming.
+    for (lib=0; lib<num_libs; ++lib) {
+        double& curval=adj_libs[lib];
+        curval+=2*adj_prior[lib];
+        if (logged_out) {
+            curval=std::log(curval);
+        }
+    }
+
+    ++tagdex;
+    allp.advance();
+    allo.advance();
+    return;
+}
+
+const double* const add_prior::get_priors() const { return adj_prior; }
+
+const double* const add_prior::get_offsets()  const { return adj_libs; }
+
+const bool add_prior::same_across_rows() const {
+    return (allp.is_row_repeated() && allo.is_row_repeated());
+}
diff --git a/src/add_prior.h b/src/add_prior.h
new file mode 100644
index 0000000..def47d9
--- /dev/null
+++ b/src/add_prior.h
@@ -0,0 +1,25 @@
+#ifndef ADD_PRIOR_H
+#define ADD_PRIOR_H
+#include "matvec_check.h"
+#include "utils.h"
+
+class add_prior{
+public:
+    add_prior(int, int, SEXP, SEXP, bool, bool);
+    void fill_and_next();
+    const double* const get_priors() const;
+    const double* const get_offsets() const;
+    const bool same_across_rows() const;
+private:
+    double* adj_prior, *adj_libs;
+    const int num_tags, num_libs;
+
+    matvec_check allp, allo;
+    const double* const pptr2, *const optr2;
+    const bool logged_in, logged_out;
+
+    int lib, tagdex;
+    double ave_lib;
+};
+
+#endif
diff --git a/src/adj_coxreid.cpp b/src/adj_coxreid.cpp
index 13b2fdd..e5ca7b9 100644
--- a/src/adj_coxreid.cpp
+++ b/src/adj_coxreid.cpp
@@ -13,7 +13,7 @@ adj_coxreid::adj_coxreid (const int& nl, const int& nc, const double* d) : ncoef
      * reallocate the work pointer to this value.
      */
 	double temp_work;
-    F77_NAME(dsytrf)(&uplo, &ncoefs, working_matrix, &ncoefs, pivots, &temp_work, &lwork, &info);
+    F77_CALL(dsytrf)(&uplo, &ncoefs, working_matrix, &ncoefs, pivots, &temp_work, &lwork, &info);
 	if (info) { 
 		delete [] pivots;
 		delete [] working_matrix;
@@ -37,12 +37,7 @@ adj_coxreid::~adj_coxreid () {
 }
 
 std::pair<double, bool> adj_coxreid::compute(const double* wptr) {
-    /* Setting working weight_matrix to 'A=Xt %*% diag(W) %*% X' with column-major storage.
- 	 * This represents the expected Fisher information. The overall strategy is
- 	 * to compute the determinant of the information matrix in order to compute
- 	 * the adjustment factor for the likelihood (in order to account for uncertainty
- 	 * in the nuisance parameters i.e. the fitted values).
- 	 */
+    // Setting working weight_matrix to 'A=Xt %*% diag(W) %*% X' with column-major storage.
     for (int row=0; row<ncoefs; ++row) {
         for (int col=0; col<=row; ++col) {
             double& cur_entry=(working_matrix[col*ncoefs+row]=0);
@@ -52,25 +47,11 @@ std::pair<double, bool> adj_coxreid::compute(const double* wptr) {
         }
     }   
 
-    /* We now apply the Cholesky decomposition using the appropriate routine from the 
-     * LAPACK library. Specifically, we call the routine to do a symmetric indefinite
-     * factorisation i.e. A = LDLt. This guarantees factorization for singular matrices 
-     * when the actual Cholesky decomposition would fail.
-     */
-    F77_NAME(dsytrf)(&uplo, &ncoefs, working_matrix, &ncoefs, pivots, work, &lwork, &info);
+    // LDL* decomposition.
+    F77_CALL(dsytrf)(&uplo, &ncoefs, working_matrix, &ncoefs, pivots, work, &lwork, &info);
     if (info<0) { return std::make_pair(0, false); }
 
-    /* For triangular matrices, we need the diagonal to compute the determinant. Fortunately, 
- 	 * this remains the diagonal of the output matrix despite the permutations performed by 
- 	 * the pivoting. We sum over all log'd diagonal elements to get the log determinant of the 
- 	 * information matrix (valid because det(LDL*)=det(L)*det(D)*det(L*) and we're using the 
- 	 * result of the decomposition). We then have to halve the resulting value. 
- 	 *
- 	 * Note the protection against zero. This just replaces it with an appropriately small
- 	 * non-zero value, if the diagnoal element is zero or NA. This is valid because the set
- 	 * of fitted values which are zero will be constant at all dispersions. Thus, any replacement
- 	 * value will eventually cancel out during interpolation to obtain the CRAPLE.
-     */
+    // Log-determinant as sum of the log-diagonals, then halving (see below).
     double sum_log_diagonals=0;
     for (int i=0; i<ncoefs; ++i) { 
         const double& cur_val=working_matrix[i*ncoefs+i];
@@ -83,4 +64,37 @@ std::pair<double, bool> adj_coxreid::compute(const double* wptr) {
 	return std::make_pair(sum_log_diagonals*0.5, true);
 }
 
+/* EXPLANATION:
+   
+   XtWX represents the expected Fisher information. The overall strategy is to compute the 
+   log-determinant of this matrix, to compute the adjustment factor for the likelihood (in 
+   order to account for uncertainty in the nuisance parameters i.e. the fitted values).
+
+   We want to apply the Cholesky decomposition to the XtWX matrix. However, to be safe,
+   we call the routine to do a symmetric indefinite factorisation i.e. A = LDLt. This 
+   guarantees factorization for singular matrices when the actual Cholesky decomposition 
+   would fail because it would start whining about non-positive eigenvectors. 
+  
+   We then try to compute the determinant of the working_matrix. Here we use two facts:
+
+   - For triangular matrices, the determinant is the product of the diagonals.
+   - det(LDL*)=det(L)*det(D)*det(L*)
+   - All diagonal elements of 'L' are unity.
+
+   Thus, all we need to do is to we sum over all log'd diagonal elements in 'D', which - 
+   happily enough - are stored as the diagonal elements of 'working_matrix'. (And then 
+   divide by two, because that's just how the Cox-Reid adjustment works.)
+
+   'info > 0' indicates that one of the diagonals is zero. We handle this by replacing the
+   it with an appropriately small non-zero value, if the diagnoal element is zero or NA. This
+   is valid because the set of fitted values which are zero will be constant at all dispersions. 
+   Thus, any replacement value will eventually cancel out during interpolation to obtain the CRAPLE.
+
+   Note that the LAPACK routine will also do some pivoting, essentially solving PAP* = LDL* for 
+   some permutation matrix P. This shouldn't affect anything; the determinant of the permutation 
+   is either 1 or -1, but this cancels out, so det(A) = det(PAP*).
+
+   Further note that the routine can theoretically give block diagonals, but this should 
+   not occur for positive (semi)definite matrices, which is what XtWX should always be.
+*/
 
diff --git a/src/glm.h b/src/glm.h
index 2bc6cd8..075a99a 100644
--- a/src/glm.h
+++ b/src/glm.h
@@ -5,20 +5,13 @@
 #include "utils.h"
 
 std::pair<double,bool> glm_one_group(const int&, const int&, const double&, const double*, 
-#ifdef WEIGHTED
-		const double*, 
-#endif
-		const double*, const double*, double);
+		const double*, const double*, const double*, double);
 
 class glm_levenberg {
 public:
 	glm_levenberg(const int&, const int&, const double*, const int&, const double&);
 	~glm_levenberg();
-	int fit(const double*, const double*, 
-#ifdef WEIGHTED
-			const double*, 
-#endif
-			const double*, double*, double*);
+	int fit(const double*, const double*, const double*, const double*, double*, double*);
 
 	const bool& is_failure() const;
 	const int& get_iterations()  const;
@@ -42,11 +35,7 @@ private:
 	int iter;
 	bool failed;
 
-	double nb_deviance(const double*, const double*, 
-#ifdef WEIGHTED
-			const double*, 
-#endif
-			const double*) const;
+	double nb_deviance(const double*, const double*, const double*, const double*) const;
 	void autofill(const double*, double*, const double*);
 };
 
diff --git a/src/glm_levenberg.cpp b/src/glm_levenberg.cpp
index af11c37..4010b32 100644
--- a/src/glm_levenberg.cpp
+++ b/src/glm_levenberg.cpp
@@ -1,36 +1,24 @@
 #include "glm.h"
-
-/* Function to calculate the deviance. Note the protection for very large mu*phi (where we
- * use a gamma instead) or very small mu*phi (where we use the Poisson instead). This 
- * approximation protects against numerical instability introduced by subtrackting
- * a very large log value in (log cur_mu) with another very large logarithm (log cur_mu+1/phi).
- * We need to consider the 'phi' as the approximation is only good when the product is
- * very big or very small.
- */
     
 const double one_millionth=std::pow(10, -6.0);
 const double supremely_low_value=std::pow(10, -13.0), ridiculously_low_value=std::pow(10, -100.0);
 
-double glm_levenberg::nb_deviance (const double* y, const double* mu, 
-#ifdef WEIGHTED
-		const double* w, 
-#endif		
-		const double* phi) const {
+double glm_levenberg::nb_deviance (const double* y, const double* mu, const double* w, const double* phi) const {
     double tempdev=0;
     for (int i=0; i<nlibs; ++i) {
-#ifdef WEIGHTED
         tempdev+=w[i]*compute_unit_nb_deviance(y[i], mu[i], phi[i]);
-#else
-		tempdev+=compute_unit_nb_deviance(y[i], mu[i], phi[i]);
-#endif
     }
     return tempdev;
 }
 
+const char trans='n';
+const int incx=1, incy=1;
+const double first_scaling=1, second_scaling=1;
 void glm_levenberg::autofill(const double* offset, double* mu, const double* beta) {
+    std::copy(offset, offset+nlibs, mu);
+    F77_CALL(dgemv)(&trans, &nlibs, &ncoefs, &first_scaling, design, &nlibs, beta, &incx, &second_scaling, mu, &incy);
 	for (int lib=0; lib<nlibs; ++lib) {
-		double& cur_mean=(mu[lib]=offset[lib]);
-		for (int coef=0; coef<ncoefs; ++coef) { cur_mean+=design[coef*nlibs+lib]*beta[coef]; }
+		double& cur_mean=mu[lib];
 		cur_mean=std::exp(cur_mean);
 	}
 	return;
@@ -73,12 +61,9 @@ const char normal='n', transposed='t', uplo='U';
 const double a=1, b=0;
 const int nrhs=1;
 
-int glm_levenberg::fit(const double* offset, const double* y, 
-#ifdef WEIGHTED
-		const double* w, 
-#endif
+int glm_levenberg::fit(const double* offset, const double* y, const double* w, 
 		const double* disp, double* mu, double* beta) {
-	// We expect 'mu' and 'beta' to be supplied. We then check the maximum value of the counts.
+	// We expect 'beta' to be supplied. We then check the maximum value of the counts.
     double ymax=0;
     for (int lib=0; lib<nlibs; ++lib) { 
 		const double& count=y[lib];
@@ -90,24 +75,25 @@ int glm_levenberg::fit(const double* offset, const double* y,
 
     // If we start off with all entries at zero, there's really no point continuing. 
     if (ymax<low_value) {
-        for (int coef=0; coef<ncoefs; ++coef) { beta[coef]=NA_REAL; }
-        for (int lib=0; lib<nlibs; ++lib) { mu[lib]=0; }
+        std::fill(beta, beta+ncoefs, NA_REAL);
+        std::fill(mu, mu+nlibs, 0);
         return 0;
     }
     
-	/* Otherwise, we have to make sure 'beta' and 'mu' make sense relative to one another.
- 	 * We then proceed to iterating using reweighted least squares.
- 	 */
+	// Otherwise, we compute 'mu' based on 'beta', and proceed to iterating using reweighted least squares.
 	autofill(offset, mu, beta);
-	dev=nb_deviance(y, mu, 
-#ifdef WEIGHTED
-			w, 
-#endif
-			disp);
+	dev=nb_deviance(y, mu, w, disp);
+
+    // Assorted temporary objects.
     double max_info=-1, lambda=0;
+    double denom, weight, deriv;
+    int row, col, index;
+    double divergence;
+    int lev=0;
+    bool low_dev=false;
 
     while ((++iter) <= maxit) {
-		for (int i=0; i<ncoefs; ++i) { dl[i]=0; }
+        std::fill(dl, dl+ncoefs, 0);
 
 		/* Here we set up the matrix XtWX i.e. the Fisher information matrix. X is the design matrix and W is a diagonal matrix
  		 * with the working weights for each observation (i.e. library). The working weights are part of the first derivative of
@@ -122,26 +108,23 @@ int glm_levenberg::fit(const double* offset, const double* y,
  		 * is the second derivative, and dl is the first, you can see that we are effectively performing a multivariate 
  		 * Newton-Raphson procedure with 'dbeta' as the step.
  		 */
-        for (int row=0; row<nlibs; ++row) {
+        for (row=0; row<nlibs; ++row) {
             const double& cur_mu=mu[row];
-			const double denom=(1+cur_mu*disp[row]);
-#ifdef WEIGHTED
-            const double weight=cur_mu/denom*w[row];
-			const double deriv=(y[row]-cur_mu)/denom*w[row];
-#else
-            const double weight=cur_mu/denom;
-			const double deriv=(y[row]-cur_mu)/denom;
-#endif
-            for (int col=0; col<ncoefs; ++col){ 
-				const int index=col*nlibs+row;
+			denom=(1+cur_mu*disp[row]);
+            weight=cur_mu/denom*w[row];
+			deriv=(y[row]-cur_mu)/denom*w[row];
+
+            index=row;
+            for (col=0; col<ncoefs; ++col) {
                 wx[index]=design[index]*weight;
 				dl[col]+=design[index]*deriv;
+                index+=nlibs;
             }
         }
-        F77_NAME(dgemm)(&transposed, &normal, &ncoefs, &ncoefs, &nlibs,
+        F77_CALL(dgemm)(&transposed, &normal, &ncoefs, &ncoefs, &nlibs,
                 &a, design, &nlibs, wx, &nlibs, &b, xwx, &ncoefs);
-        for (int i=0; i<ncoefs; ++i) {
-            const double& cur_val=xwx[i*ncoefs+i];
+        for (row=0; row<ncoefs; ++row) {
+            const double& cur_val=xwx[row*ncoefs+row];
             if (cur_val>max_info) { max_info=cur_val; }
         }
         if (iter==1) {
@@ -153,29 +136,28 @@ int glm_levenberg::fit(const double* offset, const double* y,
          * step can be found that increases the deviance. In short, increases in the deviance
          * are enforced to avoid problems with convergence.
          */ 
-        int lev=0;
-        bool low_dev=false;
+        lev=0;
+        low_dev=false;
         while (++lev) {
-			for (int col=0; col<ncoefs; ++col) { dbeta[col]=dl[col]; } // Copying dl to dbeta.
+            std::copy(dl, dl+ncoefs, dbeta);
+
 			do {
              	/* We need to set up copies as the decomposition routine overwrites the originals, and 
  				 * we want the originals in case we don't like the latest step. For efficiency, we only 
 	 			 * refer to the upper triangular for the XtWX copy (as it should be symmetrical). We also add 
 	 			 * 'lambda' to the diagonals. This reduces the step size as the second derivative is increased.
         	     */
-         		for (int col=0; col<ncoefs; ++col) {
-                	for (int row=0; row<=col; ++row) {
-                    	const int index=col*ncoefs+row;
-						xwx_copy[index]=xwx[index];
-                    	if (row==col) { xwx_copy[index]+=lambda; }
-                	}
+         		for (col=0; col<ncoefs; ++col) {
+                    index=col*ncoefs;
+                    std::copy(xwx+index, xwx+index+col+1, xwx_copy+index);
+                    xwx_copy[index+col]+=lambda; 
             	}
 
             	// Cholesky decomposition, and then use of the decomposition to solve for dbeta in (XtWX)dbeta = dl.
-                F77_NAME(dpotrf)(&uplo, &ncoefs, xwx_copy, &ncoefs, &info);
+                F77_CALL(dpotrf)(&uplo, &ncoefs, xwx_copy, &ncoefs, &info);
                 if (info!=0) {
                     /* If it fails, it MUST mean that the matrix is singular due to numerical imprecision
-                     * as all the diagonal entries of the XVX matrix must be positive. This occurs because of 
+                     * as all the diagonal entries of the XtWX matrix must be positive. This occurs because of 
                      * fitted values being exactly zero; thus, the coefficients attempt to converge to negative 
                      * infinity. This generally forces the step size to be larger (i.e. lambda lower) in order to 
                      * get to infinity faster (which is impossible). Low lambda leads to numerical instability 
@@ -189,11 +171,11 @@ int glm_levenberg::fit(const double* offset, const double* y,
                 } else { break; }
             } while (1);
 
-            F77_NAME(dpotrs)(&uplo, &ncoefs, &nrhs, xwx_copy, &ncoefs, dbeta, &ncoefs, &info);
+            F77_CALL(dpotrs)(&uplo, &ncoefs, &nrhs, xwx_copy, &ncoefs, dbeta, &ncoefs, &info);
             if (info!=0) { return 1; }
 
             // Updating beta and the means. 'dbeta' stores 'Y' from the solution of (X*VX)Y=dl, corresponding to a NR step.
-            for (int i=0; i<ncoefs; ++i) { beta_new[i]=beta[i]+dbeta[i]; }
+            for (index=0; index<ncoefs; ++index) { beta_new[index]=beta[index]+dbeta[index]; }
             autofill(offset, mu_new, beta_new);
 
             /* Checking if the deviance has decreased or if it's too small to care about. Either case is good
@@ -202,15 +184,11 @@ int glm_levenberg::fit(const double* offset, const double* y,
              * lambda up so we want to retake the step from where we were before). This is why we don't modify the values
              * in-place until we're sure we want to take the step.
              */
-            const double dev_new=nb_deviance(y, mu_new, 
-#ifdef WEIGHTED
-					w, 
-#endif
-					disp);
+            const double dev_new=nb_deviance(y, mu_new, w, disp);
             if (dev_new/ymax < supremely_low_value) { low_dev=true; }
             if (dev_new <= dev || low_dev) {
-				for (int i=0; i<ncoefs; ++i) { beta[i]=beta_new[i]; }
-                for (int i=0; i<nlibs; ++i) { mu[i]=mu_new[i]; }
+                std::copy(beta_new, beta_new+ncoefs, beta);
+                std::copy(mu_new, mu_new+nlibs, mu);
                 dev=dev_new; 
                 break; 
             }
@@ -232,8 +210,8 @@ int glm_levenberg::fit(const double* offset, const double* y,
          */
         if (failed) { break; }
 		if (low_dev) { break; }
-        double divergence=0;
-        for (int i=0; i<ncoefs; ++i) { divergence+=dl[i]*dbeta[i]; }
+        divergence=0;
+        for (index=0; index<ncoefs; ++index) { divergence+=dl[index]*dbeta[index]; }
         if (divergence < tolerance) { break; }
 
         /* If we quit the inner levenberg loop immediately and survived all the break conditions above, that means that deviance is decreasing
diff --git a/src/glm_one_group.cpp b/src/glm_one_group.cpp
index 8bb6643..91e6477 100644
--- a/src/glm_one_group.cpp
+++ b/src/glm_one_group.cpp
@@ -1,10 +1,7 @@
 #include "glm.h"
 
 std::pair<double,bool> glm_one_group(const int& nlibs, const int& maxit, const double& tolerance, const double* offset, 
-#ifdef WEIGHTED		
-		const double* weights,	
-#endif		
-		const double* y, const double* disp, double cur_beta) {
+		const double* weights, const double* y, const double* disp, double cur_beta) {
     /* Setting up initial values for beta as the log of the mean of the ratio of counts to offsets.
  	 * This is the exact solution for the gamma distribution (which is the limit of the NB as
  	 * the dispersion goes to infinity. However, if cur_beta is not NA, then we assume it's good. 
@@ -12,24 +9,14 @@ std::pair<double,bool> glm_one_group(const int& nlibs, const int& maxit, const d
 	bool nonzero=false;
 	if (ISNA(cur_beta)) {
 		cur_beta=0;
-#ifdef WEIGHTED 	   	
  	   	double totweight=0;
-#else 
-		double totweight=nlibs;
-#endif
 		for (int j=0; j<nlibs; ++j) {
 			const double& cur_val=y[j];
 			if (cur_val>low_value) {
-#ifdef WEIGHTED			
 				cur_beta+=cur_val/std::exp(offset[j]) * weights[j];
-#else			
-				cur_beta+=cur_val/std::exp(offset[j]);
-#endif			
 				nonzero=true;
 			}
-#ifdef WEIGHTED		
 			totweight+=weights[j];
-#endif			
 		}
 		cur_beta=std::log(cur_beta/totweight);
 	} else {
@@ -49,13 +36,8 @@ std::pair<double,bool> glm_one_group(const int& nlibs, const int& maxit, const d
  	    info=0;
 		for (int j=0; j<nlibs; ++j) {
 			const double mu=std::exp(cur_beta+offset[j]), denominator=1+mu*disp[j];
-#ifdef WEIGHTED			
 			dl+=(y[j]-mu)/denominator * weights[j];
 			info+=mu/denominator * weights[j];
-#else
-			dl+=(y[j]-mu)/denominator;
-			info+=mu/denominator;
-#endif			
 		}
 		const double step=dl/info;
 		cur_beta+=step;
diff --git a/src/init.cpp b/src/init.cpp
index 94f6c9a..7f7ae78 100644
--- a/src/init.cpp
+++ b/src/init.cpp
@@ -5,14 +5,30 @@
 extern "C" {
 
 static const R_CallMethodDef all_call_entries[] = {
-	CALLDEF(R_compute_nbdev, 3),
-	CALLDEF(R_cr_adjust, 3),
+	CALLDEF(R_compute_nbdev, 5),
+	CALLDEF(R_compute_apl, 6),
 	CALLDEF(R_exact_test_by_deviance, 5), 
-	CALLDEF(R_levenberg, 11),
 	CALLDEF(R_loess_by_col, 4),
 	CALLDEF(R_maximize_interpolant, 2),
-	CALLDEF(R_one_group, 9),
+
+    CALLDEF(R_levenberg, 8),
+	CALLDEF(R_get_levenberg_start, 6),
+	CALLDEF(R_one_group, 7),
+	CALLDEF(R_get_one_way_fitted, 3),
 	CALLDEF(R_simple_good_turing, 3),
+
+    CALLDEF(R_add_prior_count, 3),
+    CALLDEF(R_calculate_cpm_log, 3),
+    CALLDEF(R_calculate_cpm_raw, 2),
+    CALLDEF(R_ave_log_cpm, 7),
+
+    CALLDEF(R_check_counts, 1),
+    CALLDEF(R_check_finite, 2),
+    CALLDEF(R_check_positive, 2),
+    CALLDEF(R_check_nonnegative, 2),
+    CALLDEF(R_check_zero_fitted, 3),
+    CALLDEF(R_check_poisson_bound, 3),
+    CALLDEF(R_add_repeat_matrices, 4),
 	{NULL, NULL, 0}
 };
 
diff --git a/src/matvec_check.cpp b/src/matvec_check.cpp
index 59a6593..0ab9435 100644
--- a/src/matvec_check.cpp
+++ b/src/matvec_check.cpp
@@ -1,64 +1,142 @@
 #include "matvec_check.h"
 
-matvec_check::matvec_check(const int nlib, const int nlen, SEXP incoming, const bool transposed, 
-		const char* err, const double nullfill) : mycheck(NULL), temp(NULL), isvec(true), istran(transposed), 
-		nl(nlib), nt(nlen), tagdex(0), libdex(0) {
-	
-	std::stringstream failed;
-	if (!isNumeric(incoming)) {
-		failed << err << " vector or matrix should be double precision";
-		throw std::runtime_error(failed.str());
-	}
-	
-	// Checking if it is a vector, matrix or transposed matrix.
+matvec_check::matvec_check(SEXP incoming, int nr, int nc) : nrow(nr), ncol(nc), mycheck(NULL), index(0), libdex(0), temp(NULL), toget(NULL) { 
+
+    SEXP dims=getAttrib(incoming, R_DimSymbol);
+    if (!isInteger(dims) || LENGTH(dims)!=2) { 
+        throw std::runtime_error("matrix dimensions should be an integer vector of length 2");
+    }
+    const int NR=INTEGER(dims)[0], NC=INTEGER(dims)[1];
+    if (LENGTH(incoming)!=NC*NR) {
+        throw std::runtime_error("recorded matrix dimensions are not consistent with its length"); 
+    }
+
+    SEXP rep_r=getAttrib(incoming, install("repeat.row"));
+    if (!isLogical(rep_r) || LENGTH(rep_r)!=1) {
+        throw std::runtime_error("repeat_row specification must be a logical scalar");
+    }
+    repeat_row=asLogical(rep_r);
+    if (!repeat_row) {
+        if (NR!=nrow){ 
+            throw std::runtime_error("matrix dimensions are not consistent for non-repeating number of rows");
+        }
+    } else if (NR!=1) {
+        throw std::runtime_error("only one row should be present if it is repeating");
+    }
+    maxdex=NR;
+
+    SEXP rep_c=getAttrib(incoming, install("repeat.col"));
+    if (!isLogical(rep_c) || LENGTH(rep_c)!=1) {
+        throw std::runtime_error("repeat_col specification must be a logical scalar");
+    }
+    repeat_col=asLogical(rep_c);
+    if (!repeat_col) {
+        if (NC!=ncol){ 
+            throw std::runtime_error("matrix dimensions are not consistent for non-repeating number of columns");
+        }
+    } else if (NC!=1) {
+        throw std::runtime_error("only one column should be present if it is repeating");
+    }
+
+	if (!isReal(incoming)) {
+		throw std::runtime_error("matrix should be double-precision"); 
+	}	
 	mycheck=REAL(incoming);
-	const int curlen=LENGTH(incoming);
-	if (curlen==0) {
-		temp=new double[nl];
-		for (int i=0; i<nl; ++i) { temp[i]=nullfill; }
-		mycheck=temp;
-	} else if (curlen!=nl) { 
-		isvec=false;
- 	   	if (LENGTH(incoming)!=nl*nlen) {
-			failed << "dimensions of " << err << " matrix are not consistent with number of libraries and tags";
-			throw std::runtime_error(failed.str());
-		} 
-		if (!istran) {
-			temp=new double[nl];
-			libdex=0;
-			for (int i=0; i<nl; ++i, libdex+=nt) { temp[i]=mycheck[libdex]; }
-		}
-	} else {
-		// Otherwise, it's all good; we can use the pointer directly if it's a vector.
-		;
-	}
+    temp=new double[ncol];
+    
+    try {
+        // Setting the initial value of 'temp'.
+        if (repeat_row) {
+            if (repeat_col) {
+                std::fill(temp, temp+ncol, *mycheck);
+            } else {
+                temp=new double[ncol];
+                std::copy(mycheck, mycheck+ncol, temp);
+            }
+        } else {
+            advance();
+        }
+    } catch (std::exception& e) {
+        delete [] temp;
+        throw;
+    }
+    
 	return;
 }
 
 void matvec_check::advance() {
-	if (!isvec) { 
-		if (!istran) { 
-			// Copying elements to an array if it is not transposed, so each row can be accessed at a pointer.
-			++mycheck;
-			if ((++tagdex) >= nt) { return; }
-			libdex=0;
-			for (int i=0; i<nl; ++i, libdex+=nt) { temp[i]=mycheck[libdex]; }
-		} else {
-			// Each (original) row is a (transposed) column, so rows can be accessed directly in column-major format.
-			mycheck+=nl;
-		}
-	}
-	return;
+    if (repeat_row || index>=maxdex) {
+        // No need for updating, if repeated across rows; or if we're at the end.
+        return;
+    }
+    if (repeat_col) {  
+        // Updating for new row.
+        std::fill(temp, temp+ncol, mycheck[index]);
+    } else {
+        // Updating for new row/col.
+        toget=mycheck + index;
+        for (libdex=0; libdex<ncol; ++libdex) {
+            temp[libdex]=*toget;
+            toget+=nrow;
+        }
+    }
+    ++index;
+    return;
+}
+
+const double* const matvec_check::access() const { 
+    return temp;
+}
+
+const bool matvec_check::is_row_repeated () const {
+    return repeat_row;
 }
 
-const double* const* matvec_check::access() const { 
-	if (isvec || istran) { 
-		return &mycheck; 
-	} else {
-		return &temp;
-	}
+const bool matvec_check::is_col_repeated () const {
+    return repeat_col;
 }
 
 matvec_check::~matvec_check() {
 	if (temp!=NULL) { delete [] temp; }
 }
+
+count_holder::count_holder (SEXP y) : yiptr(NULL), ydptr(NULL) {
+    SEXP dims=getAttrib(y, R_DimSymbol);
+    if (!isInteger(dims) || LENGTH(dims)!=2) {                         
+        throw std::runtime_error("matrix dimensions should be an integer vector of length 2");                            
+    }                
+    num_tags=INTEGER(dims)[0];                    
+    num_libs=INTEGER(dims)[1];
+
+    is_integer=isInteger(y);                                        
+    if (is_integer) {                                                             
+        yiptr=INTEGER(y);                                                                     
+    } else if (isReal(y)) {                                                                                        
+        ydptr=REAL(y);
+    } else {                                                                                                                    
+        throw std::runtime_error("count matrix must be integer or double-precision");
+    }
+
+    if (LENGTH(y)!=num_tags*num_libs) {
+        throw std::runtime_error("dimensions of the count matrix are not as specified");
+    } 
+    return;
+}
+
+void count_holder::fill_and_next(double* yptr) {
+    if (is_integer) {
+        for (libdex=0; libdex<num_libs; ++libdex) { yptr[libdex]=double(yiptr[libdex*num_tags]); }
+        ++yiptr;
+    } else {
+        for (libdex=0; libdex<num_libs; ++libdex) { yptr[libdex]=ydptr[libdex*num_tags]; }
+        ++ydptr;
+    }
+    return;
+}
+
+bool count_holder::is_data_integer() const { return is_integer; }
+const int* count_holder::get_raw_int () const { return yiptr; }
+const double* count_holder::get_raw_double () const { return ydptr; }
+
+int count_holder::get_ntags() const { return num_tags; }
+int count_holder::get_nlibs() const { return num_libs; }
diff --git a/src/matvec_check.h b/src/matvec_check.h
index 657c586..623f561 100644
--- a/src/matvec_check.h
+++ b/src/matvec_check.h
@@ -4,16 +4,38 @@
 
 class matvec_check {
 public:
-	matvec_check(const int, const int, SEXP, const bool, const char*, const double=0);
+	matvec_check(SEXP, int, int);
 	~matvec_check();
 	void advance();
-	const double* const* access() const;
+	const double* const access() const;
+    const bool is_row_repeated() const;
+    const bool is_col_repeated() const;
 private:
+    int nrow, ncol;
+    bool repeat_row, repeat_col;
+    
 	const double* mycheck;
+    int index, libdex, maxdex;
 	double* temp;
-	bool isvec, istran;
-	const int nl, nt;
-	int tagdex, libdex;
+    const double* toget;
+};
+
+class count_holder {
+public:
+    count_holder(SEXP);
+    void fill_and_next(double*);
+    int get_ntags() const;
+    int get_nlibs() const;
+
+    bool is_data_integer() const;
+    const int* get_raw_int() const;
+    const double* get_raw_double() const;
+private:
+    const int* yiptr;
+    const double* ydptr;
+    int num_tags, num_libs;
+    bool is_integer;
+    int libdex;
 };
 
 #endif
diff --git a/src/nbdev.cpp b/src/nbdev.cpp
index b3f7837..e5fb367 100644
--- a/src/nbdev.cpp
+++ b/src/nbdev.cpp
@@ -8,7 +8,7 @@
  * very big or very small.
  */
     
-const double one_million=std::pow(10, 6.0), one_tenthousandth=std::pow(10, -4.0);
+const double one_tenthousandth=std::pow(10, -4.0);
 const double mildly_low_value=std::pow(10, -8.0);
 
 double compute_unit_nb_deviance (double y, double mu, const double& phi) {
diff --git a/src/utils.h b/src/utils.h
index 69aaa5c..a483a80 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -10,6 +10,7 @@
 #include <deque>
 #include <stdexcept>
 #include <sstream>
+#include <algorithm>
 
 #include "R.h"
 #include "Rinternals.h"
@@ -18,28 +19,51 @@
 #include "R_ext/BLAS.h"
 #include "R_ext/Lapack.h"
 
-const double low_value=std::pow(10.0, -10.0), log_low_value=std::log(low_value);
-
 /* Defining all R-accessible functions. */
 
 extern "C" {
 
-SEXP R_compute_nbdev(SEXP, SEXP, SEXP);
+SEXP R_compute_nbdev(SEXP, SEXP, SEXP, SEXP, SEXP);
 
-SEXP R_cr_adjust (SEXP, SEXP, SEXP);
+SEXP R_compute_apl (SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 
 SEXP R_exact_test_by_deviance(SEXP, SEXP, SEXP, SEXP, SEXP);
 
-SEXP R_levenberg (SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
+SEXP R_levenberg (SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
+
+SEXP R_get_levenberg_start (SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 
 SEXP R_loess_by_col(SEXP, SEXP, SEXP, SEXP);
 
 SEXP R_maximize_interpolant(SEXP, SEXP);
 
-SEXP R_one_group (SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
+SEXP R_one_group (SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
+
+SEXP R_get_one_way_fitted (SEXP, SEXP, SEXP);
 
 SEXP R_simple_good_turing (SEXP, SEXP, SEXP);
 
+SEXP R_add_prior_count (SEXP, SEXP, SEXP);
+
+SEXP R_calculate_cpm_log (SEXP, SEXP, SEXP);
+
+SEXP R_calculate_cpm_raw (SEXP, SEXP);
+
+SEXP R_ave_log_cpm(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
+
+SEXP R_check_counts (SEXP);
+
+SEXP R_check_finite (SEXP, SEXP);
+
+SEXP R_check_positive (SEXP, SEXP);
+
+SEXP R_check_nonnegative (SEXP, SEXP);
+
+SEXP R_check_zero_fitted (SEXP, SEXP, SEXP);
+
+SEXP R_check_poisson_bound (SEXP, SEXP, SEXP);
+
+SEXP R_add_repeat_matrices(SEXP, SEXP, SEXP, SEXP);
 
 void processHairpinReads(int *, int *, char**, char**, int*,
 		char**, char**, int*, int*, int*, int*, int*, int*,
@@ -48,4 +72,10 @@ void processHairpinReads(int *, int *, char**, char**, int*,
 
 }
 
+/* Other utility functions and values */
+
+const double low_value=std::pow(10.0, -10.0), log_low_value=std::log(low_value);
+
+const double LNtwo=std::log(2), one_million=1000000, LNmillion=std::log(one_million);
+
 #endif
diff --git a/tests/edgeR-Tests.Rout.save b/tests/edgeR-Tests.Rout.save
index 9c951fb..5518a77 100644
--- a/tests/edgeR-Tests.Rout.save
+++ b/tests/edgeR-Tests.Rout.save
@@ -1,539 +1,540 @@
-
-R version 3.2.4 Revised (2016-03-16 r70336) -- "Very Secure Dishes"
-Copyright (C) 2016 The R Foundation for Statistical Computing
-Platform: x86_64-w64-mingw32/x64 (64-bit)
-
-R is free software and comes with ABSOLUTELY NO WARRANTY.
-You are welcome to redistribute it under certain conditions.
-Type 'license()' or 'licence()' for distribution details.
-
-  Natural language support but running in an English locale
-
-R is a collaborative project with many contributors.
-Type 'contributors()' for more information and
-'citation()' on how to cite R or R packages in publications.
-
-Type 'demo()' for some demos, 'help()' for on-line help, or
-'help.start()' for an HTML browser interface to help.
-Type 'q()' to quit R.
-
-> library(edgeR)
-Loading required package: limma
-> 
-> set.seed(0); u <- runif(100)
-> 
-> # generate raw counts from NB, create list object
-> y <- matrix(rnbinom(80,size=5,mu=10),nrow=20)
-> y <- rbind(0,c(0,0,2,2),y)
-> rownames(y) <- paste("Tag",1:nrow(y),sep=".")
-> d <- DGEList(counts=y,group=rep(1:2,each=2),lib.size=1001:1004)
-> 
-> # estimate common dispersion and find differences in expression
-> d <- estimateCommonDisp(d)
-> d$common.dispersion
-[1] 0.210292
-> de <- exactTest(d)
-> summary(de$table)
-     logFC             logCPM          PValue       
- Min.   :-1.7266   Min.   :10.96   Min.   :0.01976  
- 1st Qu.:-0.4855   1st Qu.:13.21   1st Qu.:0.33120  
- Median : 0.2253   Median :13.37   Median :0.56514  
- Mean   : 0.1877   Mean   :13.26   Mean   :0.54504  
- 3rd Qu.: 0.5258   3rd Qu.:13.70   3rd Qu.:0.81052  
- Max.   : 4.0861   Max.   :14.31   Max.   :1.00000  
-> topTags(de)
-Comparison of groups:  2-1 
-            logFC   logCPM     PValue       FDR
-Tag.17  2.0450964 13.73726 0.01975954 0.4347099
-Tag.21 -1.7265870 13.38327 0.06131012 0.6744114
-Tag.6  -1.6329986 12.81479 0.12446044 0.8982100
-Tag.2   4.0861092 11.54121 0.16331090 0.8982100
-Tag.16  0.9324996 13.57074 0.29050785 0.9655885
-Tag.20  0.8543138 13.76364 0.31736609 0.9655885
-Tag.12  0.7081170 14.31389 0.37271028 0.9655885
-Tag.19 -0.7976602 13.31405 0.40166354 0.9655885
-Tag.3  -0.7300410 13.54155 0.42139935 0.9655885
-Tag.8  -0.7917906 12.86353 0.47117217 0.9655885
-> 
-> d2 <- estimateTagwiseDisp(d,trend="none",prior.df=20)
-> summary(d2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1757  0.1896  0.1989  0.2063  0.2185  0.2677 
-> de <- exactTest(d2,dispersion="common")
-> topTags(de)
-Comparison of groups:  2-1 
-            logFC   logCPM     PValue       FDR
-Tag.17  2.0450964 13.73726 0.01975954 0.4347099
-Tag.21 -1.7265870 13.38327 0.06131012 0.6744114
-Tag.6  -1.6329986 12.81479 0.12446044 0.8982100
-Tag.2   4.0861092 11.54121 0.16331090 0.8982100
-Tag.16  0.9324996 13.57074 0.29050785 0.9655885
-Tag.20  0.8543138 13.76364 0.31736609 0.9655885
-Tag.12  0.7081170 14.31389 0.37271028 0.9655885
-Tag.19 -0.7976602 13.31405 0.40166354 0.9655885
-Tag.3  -0.7300410 13.54155 0.42139935 0.9655885
-Tag.8  -0.7917906 12.86353 0.47117217 0.9655885
-> 
-> de <- exactTest(d2)
-> topTags(de)
-Comparison of groups:  2-1 
-            logFC   logCPM     PValue       FDR
-Tag.17  2.0450987 13.73726 0.01327001 0.2919403
-Tag.21 -1.7265897 13.38327 0.05683886 0.6252275
-Tag.6  -1.6329910 12.81479 0.11460208 0.8404152
-Tag.2   4.0861092 11.54121 0.16126207 0.8869414
-Tag.16  0.9324975 13.57074 0.28103256 0.9669238
-Tag.20  0.8543178 13.76364 0.30234789 0.9669238
-Tag.12  0.7081149 14.31389 0.37917895 0.9669238
-Tag.19 -0.7976633 13.31405 0.40762735 0.9669238
-Tag.3  -0.7300478 13.54155 0.40856822 0.9669238
-Tag.8  -0.7918243 12.86353 0.49005179 0.9669238
-> 
-> d2 <- estimateTagwiseDisp(d,trend="movingave",span=0.4,prior.df=20)
-> summary(d2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1005  0.1629  0.2064  0.2077  0.2585  0.3164 
-> de <- exactTest(d2)
-> topTags(de)
-Comparison of groups:  2-1 
-            logFC   logCPM     PValue       FDR
-Tag.17  2.0450951 13.73726 0.02427872 0.5341319
-Tag.21 -1.7265927 13.38327 0.05234833 0.5758316
-Tag.6  -1.6330014 12.81479 0.12846308 0.8954397
-Tag.2   4.0861092 11.54121 0.16280722 0.8954397
-Tag.16  0.9324887 13.57074 0.24308201 0.9711975
-Tag.20  0.8543044 13.76364 0.35534649 0.9711975
-Tag.19 -0.7976535 13.31405 0.38873717 0.9711975
-Tag.3  -0.7300525 13.54155 0.40001438 0.9711975
-Tag.12  0.7080985 14.31389 0.43530227 0.9711975
-Tag.8  -0.7918376 12.86353 0.49782701 0.9711975
-> 
-> summary(exactTest(d2,rejection="smallp")$table$PValue)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.02428 0.36370 0.55660 0.54320 0.78890 1.00000 
-> summary(exactTest(d2,rejection="deviance")$table$PValue)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.02428 0.36370 0.55660 0.54320 0.78890 1.00000 
-> 
-> d2 <- estimateTagwiseDisp(d,trend="loess",span=0.8,prior.df=20)
-> summary(d2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1165  0.1449  0.1832  0.1848  0.2116  0.2825 
-> de <- exactTest(d2)
-> topTags(de)
-Comparison of groups:  2-1 
-            logFC   logCPM     PValue       FDR
-Tag.17  2.0450979 13.73726 0.01546795 0.3402949
-Tag.21 -1.7266049 13.38327 0.03545446 0.3899990
-Tag.6  -1.6329841 12.81479 0.10632987 0.7797524
-Tag.2   4.0861092 11.54121 0.16057893 0.8831841
-Tag.16  0.9324935 13.57074 0.26348818 0.9658389
-Tag.20  0.8543140 13.76364 0.31674090 0.9658389
-Tag.19 -0.7976354 13.31405 0.35564858 0.9658389
-Tag.3  -0.7300593 13.54155 0.38833737 0.9658389
-Tag.12  0.7081041 14.31389 0.41513004 0.9658389
-Tag.8  -0.7918152 12.86353 0.48483449 0.9658389
-> 
-> d2 <- estimateTagwiseDisp(d,trend="tricube",span=0.8,prior.df=20)
-> summary(d2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1165  0.1449  0.1832  0.1848  0.2116  0.2825 
-> de <- exactTest(d2)
-> topTags(de)
-Comparison of groups:  2-1 
-            logFC   logCPM     PValue       FDR
-Tag.17  2.0450979 13.73726 0.01546795 0.3402949
-Tag.21 -1.7266049 13.38327 0.03545446 0.3899990
-Tag.6  -1.6329841 12.81479 0.10632987 0.7797524
-Tag.2   4.0861092 11.54121 0.16057893 0.8831841
-Tag.16  0.9324935 13.57074 0.26348818 0.9658389
-Tag.20  0.8543140 13.76364 0.31674090 0.9658389
-Tag.19 -0.7976354 13.31405 0.35564858 0.9658389
-Tag.3  -0.7300593 13.54155 0.38833737 0.9658389
-Tag.12  0.7081041 14.31389 0.41513004 0.9658389
-Tag.8  -0.7918152 12.86353 0.48483449 0.9658389
-> 
-> # mglmOneWay
-> design <- model.matrix(~group,data=d$samples)
-> mglmOneWay(d[1:10,],design,dispersion=0.2)
-$coefficients
-        (Intercept)        group2
- [1,] -1.000000e+08  0.000000e+00
- [2,] -1.000000e+08  1.000000e+08
- [3,]  2.525729e+00 -5.108256e-01
- [4,]  2.525729e+00  1.484200e-01
- [5,]  2.140066e+00 -1.941560e-01
- [6,]  2.079442e+00 -1.163151e+00
- [7,]  2.014903e+00  2.363888e-01
- [8,]  1.945910e+00 -5.596158e-01
- [9,]  1.504077e+00  2.006707e-01
-[10,]  2.302585e+00  2.623643e-01
-
-$fitted.values
-      [,1] [,2] [,3] [,4]
- [1,]  0.0  0.0  0.0  0.0
- [2,]  0.0  0.0  2.0  2.0
- [3,] 12.5 12.5  7.5  7.5
- [4,] 12.5 12.5 14.5 14.5
- [5,]  8.5  8.5  7.0  7.0
- [6,]  8.0  8.0  2.5  2.5
- [7,]  7.5  7.5  9.5  9.5
- [8,]  7.0  7.0  4.0  4.0
- [9,]  4.5  4.5  5.5  5.5
-[10,] 10.0 10.0 13.0 13.0
-
-> mglmOneWay(d[1:10,],design,dispersion=0)
-$coefficients
-        (Intercept)        group2
- [1,] -1.000000e+08  0.000000e+00
- [2,] -1.000000e+08  1.000000e+08
- [3,]  2.525729e+00 -5.108256e-01
- [4,]  2.525729e+00  1.484200e-01
- [5,]  2.140066e+00 -1.941560e-01
- [6,]  2.079442e+00 -1.163151e+00
- [7,]  2.014903e+00  2.363888e-01
- [8,]  1.945910e+00 -5.596158e-01
- [9,]  1.504077e+00  2.006707e-01
-[10,]  2.302585e+00  2.623643e-01
-
-$fitted.values
-      [,1] [,2] [,3] [,4]
- [1,]  0.0  0.0  0.0  0.0
- [2,]  0.0  0.0  2.0  2.0
- [3,] 12.5 12.5  7.5  7.5
- [4,] 12.5 12.5 14.5 14.5
- [5,]  8.5  8.5  7.0  7.0
- [6,]  8.0  8.0  2.5  2.5
- [7,]  7.5  7.5  9.5  9.5
- [8,]  7.0  7.0  4.0  4.0
- [9,]  4.5  4.5  5.5  5.5
-[10,] 10.0 10.0 13.0 13.0
-
-> 
-> fit <- glmFit(d,design,dispersion=d$common.dispersion,prior.count=0.5/4)
-> lrt <- glmLRT(fit,coef=2)
-> topTags(lrt)
-Coefficient:  group2 
-            logFC   logCPM        LR     PValue       FDR
-Tag.17  2.0450964 13.73726 6.0485417 0.01391779 0.3058698
-Tag.2   4.0861092 11.54121 4.8400340 0.02780635 0.3058698
-Tag.21 -1.7265870 13.38327 4.0777825 0.04345065 0.3186381
-Tag.6  -1.6329986 12.81479 3.0078205 0.08286364 0.4557500
-Tag.16  0.9324996 13.57074 1.3477682 0.24566867 0.8276702
-Tag.20  0.8543138 13.76364 1.1890032 0.27553071 0.8276702
-Tag.19 -0.7976602 13.31405 0.9279151 0.33540526 0.8276702
-Tag.12  0.7081170 14.31389 0.9095513 0.34023349 0.8276702
-Tag.3  -0.7300410 13.54155 0.8300307 0.36226364 0.8276702
-Tag.8  -0.7917906 12.86353 0.7830377 0.37621371 0.8276702
-> 
-> fit <- glmFit(d,design,dispersion=d$common.dispersion,prior.count=0.5)
-> summary(fit$coef)
-  (Intercept)         group2        
- Min.   :-7.604   Min.   :-1.13681  
- 1st Qu.:-4.895   1st Qu.:-0.32341  
- Median :-4.713   Median : 0.15083  
- Mean   :-4.940   Mean   : 0.07817  
- 3rd Qu.:-4.524   3rd Qu.: 0.35163  
- Max.   :-4.107   Max.   : 1.60864  
-> 
-> fit <- glmFit(d,design,prior.count=0.5/4)
-> lrt <- glmLRT(fit,coef=2)
-> topTags(lrt)
-Coefficient:  group2 
-            logFC   logCPM        LR     PValue       FDR
-Tag.17  2.0450964 13.73726 6.0485417 0.01391779 0.3058698
-Tag.2   4.0861092 11.54121 4.8400340 0.02780635 0.3058698
-Tag.21 -1.7265870 13.38327 4.0777825 0.04345065 0.3186381
-Tag.6  -1.6329986 12.81479 3.0078205 0.08286364 0.4557500
-Tag.16  0.9324996 13.57074 1.3477682 0.24566867 0.8276702
-Tag.20  0.8543138 13.76364 1.1890032 0.27553071 0.8276702
-Tag.19 -0.7976602 13.31405 0.9279151 0.33540526 0.8276702
-Tag.12  0.7081170 14.31389 0.9095513 0.34023349 0.8276702
-Tag.3  -0.7300410 13.54155 0.8300307 0.36226364 0.8276702
-Tag.8  -0.7917906 12.86353 0.7830377 0.37621371 0.8276702
-> 
-> dglm <- estimateGLMCommonDisp(d,design)
-> dglm$common.dispersion
-[1] 0.2033282
-> dglm <- estimateGLMTagwiseDisp(dglm,design,prior.df=20)
-> summary(dglm$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1756  0.1879  0.1998  0.2031  0.2135  0.2578 
-> fit <- glmFit(dglm,design,prior.count=0.5/4)
-> lrt <- glmLRT(fit,coef=2)
-> topTags(lrt)
-Coefficient:  group2 
-            logFC   logCPM        LR      PValue       FDR
-Tag.17  2.0450988 13.73727 6.8001118 0.009115216 0.2005348
-Tag.2   4.0861092 11.54122 4.8594088 0.027495756 0.2872068
-Tag.21 -1.7265904 13.38327 4.2537154 0.039164558 0.2872068
-Tag.6  -1.6329904 12.81479 3.1763761 0.074710253 0.4109064
-Tag.16  0.9324970 13.57074 1.4126709 0.234613512 0.8499599
-Tag.20  0.8543183 13.76364 1.2721097 0.259371274 0.8499599
-Tag.19 -0.7976614 13.31405 0.9190392 0.337727381 0.8499599
-Tag.12  0.7081163 14.31389 0.9014515 0.342392806 0.8499599
-Tag.3  -0.7300488 13.54155 0.8817937 0.347710872 0.8499599
-Tag.8  -0.7918166 12.86353 0.7356185 0.391068049 0.8603497
-> dglm <- estimateGLMTrendedDisp(dglm,design)
-> summary(dglm$trended.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1522  0.1676  0.1740  0.1887  0.2000  0.3469 
-> dglm <- estimateGLMTrendedDisp(dglm,design,method="power")
-> summary(dglm$trended.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1522  0.1676  0.1740  0.1887  0.2000  0.3469 
-> dglm <- estimateGLMTrendedDisp(dglm,design,method="spline")
-> summary(dglm$trended.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.09353 0.11080 0.15460 0.19010 0.23050 0.52010 
-> dglm <- estimateGLMTrendedDisp(dglm,design,method="bin.spline")
-> summary(dglm$trended.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1997  0.1997  0.1997  0.1997  0.1997  0.1997 
-> dglm <- estimateGLMTagwiseDisp(dglm,design,prior.df=20)
-> summary(dglm$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1385  0.1792  0.1964  0.1935  0.2026  0.2709 
-> 
-> dglm2 <- estimateDisp(dglm, design)
-> summary(dglm2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1766  0.1789  0.1814  0.1846  0.1870  0.2119 
-> dglm2 <- estimateDisp(dglm, design, prior.df=20)
-> summary(dglm2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1527  0.1669  0.1814  0.1858  0.1951  0.2497 
-> dglm2 <- estimateDisp(dglm, design, robust=TRUE)
-> summary(dglm2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1766  0.1789  0.1814  0.1846  0.1870  0.2119 
-> 
-> # Continuous trend
-> nlibs <- 3
-> ntags <- 1000
-> dispersion.true <- 0.1
-> # Make first transcript respond to covariate x
-> x <- 0:2
-> design <- model.matrix(~x)
-> beta.true <- cbind(Beta1=2,Beta2=c(2,rep(0,ntags-1)))
-> mu.true <- 2^(beta.true %*% t(design))
-> # Generate count data
-> y <- rnbinom(ntags*nlibs,mu=mu.true,size=1/dispersion.true)
-> y <- matrix(y,ntags,nlibs)
-> colnames(y) <- c("x0","x1","x2")
-> rownames(y) <- paste("Gene",1:ntags,sep="")
-> d <- DGEList(y)
-> d <- calcNormFactors(d)
-> fit <- glmFit(d, design, dispersion=dispersion.true, prior.count=0.5/3)
-> results <- glmLRT(fit, coef=2)
-> topTags(results)
-Coefficient:  x 
-            logFC   logCPM        LR       PValue          FDR
-Gene1    2.907024 13.56183 38.738512 4.845536e-10 4.845536e-07
-Gene61   2.855317 10.27136 10.738307 1.049403e-03 5.247015e-01
-Gene62  -2.123902 10.53174  8.818703 2.981585e-03 8.334760e-01
-Gene134 -1.949073 10.53355  8.125889 4.363759e-03 8.334760e-01
-Gene740 -1.610046 10.94907  8.013408 4.643227e-03 8.334760e-01
-Gene354  2.022698 10.45066  7.826308 5.149118e-03 8.334760e-01
-Gene5    1.856816 10.45249  7.214238 7.232750e-03 8.334760e-01
-Gene746 -1.798331 10.53094  6.846262 8.882693e-03 8.334760e-01
-Gene110  1.623148 10.68607  6.737984 9.438120e-03 8.334760e-01
-Gene383  1.637140 10.75412  6.687530 9.708965e-03 8.334760e-01
-> d1 <- estimateGLMCommonDisp(d, design, verbose=TRUE)
-Disp = 0.10253 , BCV = 0.3202 
-> glmFit(d,design,dispersion=dispersion.true, prior.count=0.5/3)
-An object of class "DGEGLM"
-$coefficients
-      (Intercept)          x
-Gene1   -7.391745  2.0149958
-Gene2   -7.318483 -0.7611895
-Gene3   -6.831702 -0.1399478
-Gene4   -7.480255  0.5172002
-Gene5   -8.747793  1.2870467
-995 more rows ...
-
-$fitted.values
-             x0        x1          x2
-Gene1 2.3570471 18.954454 138.2791328
-Gene2 2.5138172  1.089292   0.4282107
-Gene3 4.1580452  3.750528   3.0690081
-Gene4 2.1012460  3.769592   6.1349937
-Gene5 0.5080377  2.136398   8.1502486
-995 more rows ...
-
-$deviance
-[1] 6.38037545 1.46644913 1.38532340 0.01593969 1.03894513
-995 more elements ...
-
-$iter
-[1] 8 4 4 4 6
-995 more elements ...
-
-$failed
-[1] FALSE FALSE FALSE FALSE FALSE
-995 more elements ...
-
-$method
-[1] "levenberg"
-
-$counts
-      x0 x1  x2
-Gene1  0 30 110
-Gene2  2  2   0
-Gene3  3  6   2
-Gene4  2  4   6
-Gene5  1  1   9
-995 more rows ...
-
-$unshrunk.coefficients
-      (Intercept)          x
-Gene1   -7.437763  2.0412762
-Gene2   -7.373370 -0.8796273
-Gene3   -6.870127 -0.1465014
-Gene4   -7.552642  0.5410832
-Gene5   -8.972372  1.3929679
-995 more rows ...
-
-$df.residual
-[1] 1 1 1 1 1
-995 more elements ...
-
-$design
-  (Intercept) x
-1           1 0
-2           1 1
-3           1 2
-attr(,"assign")
-[1] 0 1
-
-$offset
-         [,1]     [,2]     [,3]
-[1,] 8.295172 8.338525 8.284484
-[2,] 8.295172 8.338525 8.284484
-[3,] 8.295172 8.338525 8.284484
-[4,] 8.295172 8.338525 8.284484
-[5,] 8.295172 8.338525 8.284484
-995 more rows ...
-
-$dispersion
-[1] 0.1
-
-$prior.count
-[1] 0.1666667
-
-$samples
-   group lib.size norm.factors
-x0     1     4001    1.0008730
-x1     1     4176    1.0014172
-x2     1     3971    0.9977138
-
-$AveLogCPM
-[1] 13.561832  9.682757 10.447014 10.532113 10.452489
-995 more elements ...
-
-> 
-> d2 <- estimateDisp(d, design)
-> summary(d2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.05545 0.09511 0.11620 0.11010 0.13330 0.16860 
-> d2 <- estimateDisp(d, design, prior.df=20)
-> summary(d2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.04203 0.08586 0.11280 0.11010 0.12370 0.37410 
-> d2 <- estimateDisp(d, design, robust=TRUE)
-> summary(d2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.05545 0.09511 0.11620 0.11010 0.13330 0.16860 
-> 
-> # Exact tests
-> y <- matrix(rnbinom(20,mu=10,size=3/2),5,4)
-> group <- factor(c(1,1,2,2))
-> ys <- splitIntoGroupsPseudo(y,group,pair=c(1,2))
-> exactTestDoubleTail(ys$y1,ys$y2,dispersion=2/3)
-[1] 0.1334396 0.6343568 0.7280015 0.7124912 0.3919258
-> 
-> y <- matrix(rnbinom(5*7,mu=10,size=3/2),5,7)
-> group <- factor(c(1,1,2,2,3,3,3))
-> ys <- splitIntoGroupsPseudo(y,group,pair=c(1,3))
-> exactTestDoubleTail(ys$y1,ys$y2,dispersion=2/3)
-[1] 1.0000000 0.4486382 1.0000000 0.9390317 0.4591241
-> exactTestBetaApprox(ys$y1,ys$y2,dispersion=2/3)
-[1] 1.0000000 0.4492969 1.0000000 0.9421695 0.4589194
-> 
-> y[1,3:4] <- 0
-> design <- model.matrix(~group)
-> fit <- glmFit(y,design,dispersion=2/3,prior.count=0.5/7)
-> summary(fit$coef)
-  (Intercept)         group2            group3        
- Min.   :-1.817   Min.   :-5.0171   Min.   :-0.64646  
- 1st Qu.:-1.812   1st Qu.:-1.1565   1st Qu.:-0.13919  
- Median :-1.712   Median : 0.1994   Median :-0.10441  
- Mean   :-1.625   Mean   :-0.9523   Mean   :-0.04217  
- 3rd Qu.:-1.429   3rd Qu.: 0.3755   3rd Qu.:-0.04305  
- Max.   :-1.356   Max.   : 0.8374   Max.   : 0.72227  
-> 
-> lrt <- glmLRT(fit,contrast=cbind(c(0,1,0),c(0,0,1)))
-> topTags(lrt)
-Coefficient:  LR test of 2 contrasts 
-     logFC.1    logFC.2   logCPM         LR      PValue        FDR
-1 -7.2381060 -0.0621100 17.19071 10.7712171 0.004582051 0.02291026
-5 -1.6684268 -0.9326507 17.33529  1.7309951 0.420842115 0.90967967
-2  1.2080938  1.0420198 18.24544  1.0496688 0.591653347 0.90967967
-4  0.5416704 -0.1506381 17.57744  0.3958596 0.820427427 0.90967967
-3  0.2876249 -0.2008143 18.06216  0.1893255 0.909679672 0.90967967
-> design <- model.matrix(~0+group)
-> fit <- glmFit(y,design,dispersion=2/3,prior.count=0.5/7)
-> lrt <- glmLRT(fit,contrast=cbind(c(-1,1,0),c(0,-1,1),c(-1,0,1)))
-> topTags(lrt)
-Coefficient:  LR test of 2 contrasts 
-     logFC.1    logFC.2   logCPM         LR      PValue        FDR
-1 -7.2381060  7.1759960 17.19071 10.7712171 0.004582051 0.02291026
-5 -1.6684268  0.7357761 17.33529  1.7309951 0.420842115 0.90967967
-2  1.2080938 -0.1660740 18.24544  1.0496688 0.591653347 0.90967967
-4  0.5416704 -0.6923084 17.57744  0.3958596 0.820427427 0.90967967
-3  0.2876249 -0.4884392 18.06216  0.1893255 0.909679672 0.90967967
-> 
-> # simple Good-Turing algorithm runs.
-> test1<-1:9
-> freq1<-c(2018046, 449721, 188933, 105668, 68379, 48190, 35709, 37710, 22280)
-> goodTuring(rep(test1, freq1))
-$P0
-[1] 0.3814719
-
-$proportion
-[1] 8.035111e-08 2.272143e-07 4.060582e-07 5.773690e-07 7.516705e-07
-[6] 9.276808e-07 1.104759e-06 1.282549e-06 1.460837e-06
-
-$count
-[1] 1 2 3 4 5 6 7 8 9
-
-$n
-[1] 2018046  449721  188933  105668   68379   48190   35709   37710   22280
-
-$n0
-[1] 0
-
-> test2<-c(312, 14491, 16401, 65124, 129797, 323321, 366051, 368599, 405261, 604962)
-> goodTuring(test2)
-$P0
-[1] 0
-
-$proportion
- [1] 0.0001362656 0.0063162959 0.0071487846 0.0283850925 0.0565733349
- [6] 0.1409223124 0.1595465235 0.1606570896 0.1766365144 0.2636777866
-
-$count
- [1]    312  14491  16401  65124 129797 323321 366051 368599 405261 604962
-
-$n
- [1] 1 1 1 1 1 1 1 1 1 1
-
-$n0
-[1] 0
-
-> 
-> 
-> 
-> proc.time()
-   user  system elapsed 
-   3.58    0.03    3.61 
+
+R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
+Copyright (C) 2016 The R Foundation for Statistical Computing
+Platform: x86_64-apple-darwin13.4.0 (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+  Natural language support but running in an English locale
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(edgeR)
+Loading required package: limma
+> 
+> set.seed(0); u <- runif(100)
+> 
+> # generate raw counts from NB, create list object
+> y <- matrix(rnbinom(80,size=5,mu=10),nrow=20)
+> y <- rbind(0,c(0,0,2,2),y)
+> rownames(y) <- paste("Tag",1:nrow(y),sep=".")
+> d <- DGEList(counts=y,group=rep(1:2,each=2),lib.size=1001:1004)
+> 
+> # estimate common dispersion and find differences in expression
+> d <- estimateCommonDisp(d)
+> d$common.dispersion
+[1] 0.210292
+> de <- exactTest(d)
+> summary(de$table)
+     logFC             logCPM          PValue       
+ Min.   :-1.7266   Min.   :10.96   Min.   :0.01976  
+ 1st Qu.:-0.4855   1st Qu.:13.21   1st Qu.:0.33120  
+ Median : 0.2253   Median :13.37   Median :0.56514  
+ Mean   : 0.1877   Mean   :13.26   Mean   :0.54504  
+ 3rd Qu.: 0.5258   3rd Qu.:13.70   3rd Qu.:0.81052  
+ Max.   : 4.0861   Max.   :14.31   Max.   :1.00000  
+> topTags(de)
+Comparison of groups:  2-1 
+            logFC   logCPM     PValue       FDR
+Tag.17  2.0450964 13.73726 0.01975954 0.4347099
+Tag.21 -1.7265870 13.38327 0.06131012 0.6744114
+Tag.6  -1.6329986 12.81479 0.12446044 0.8982100
+Tag.2   4.0861092 11.54121 0.16331090 0.8982100
+Tag.16  0.9324996 13.57074 0.29050785 0.9655885
+Tag.20  0.8543138 13.76364 0.31736609 0.9655885
+Tag.12  0.7081170 14.31389 0.37271028 0.9655885
+Tag.19 -0.7976602 13.31405 0.40166354 0.9655885
+Tag.3  -0.7300410 13.54155 0.42139935 0.9655885
+Tag.8  -0.7917906 12.86353 0.47117217 0.9655885
+> 
+> d2 <- estimateTagwiseDisp(d,trend="none",prior.df=20)
+> summary(d2$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1757  0.1896  0.1989  0.2063  0.2185  0.2677 
+> de <- exactTest(d2,dispersion="common")
+> topTags(de)
+Comparison of groups:  2-1 
+            logFC   logCPM     PValue       FDR
+Tag.17  2.0450964 13.73726 0.01975954 0.4347099
+Tag.21 -1.7265870 13.38327 0.06131012 0.6744114
+Tag.6  -1.6329986 12.81479 0.12446044 0.8982100
+Tag.2   4.0861092 11.54121 0.16331090 0.8982100
+Tag.16  0.9324996 13.57074 0.29050785 0.9655885
+Tag.20  0.8543138 13.76364 0.31736609 0.9655885
+Tag.12  0.7081170 14.31389 0.37271028 0.9655885
+Tag.19 -0.7976602 13.31405 0.40166354 0.9655885
+Tag.3  -0.7300410 13.54155 0.42139935 0.9655885
+Tag.8  -0.7917906 12.86353 0.47117217 0.9655885
+> 
+> de <- exactTest(d2)
+> topTags(de)
+Comparison of groups:  2-1 
+            logFC   logCPM     PValue       FDR
+Tag.17  2.0450987 13.73726 0.01327001 0.2919403
+Tag.21 -1.7265897 13.38327 0.05683886 0.6252275
+Tag.6  -1.6329910 12.81479 0.11460208 0.8404152
+Tag.2   4.0861092 11.54121 0.16126207 0.8869414
+Tag.16  0.9324975 13.57074 0.28103256 0.9669238
+Tag.20  0.8543178 13.76364 0.30234789 0.9669238
+Tag.12  0.7081149 14.31389 0.37917895 0.9669238
+Tag.19 -0.7976633 13.31405 0.40762735 0.9669238
+Tag.3  -0.7300478 13.54155 0.40856822 0.9669238
+Tag.8  -0.7918243 12.86353 0.49005179 0.9669238
+> 
+> d2 <- estimateTagwiseDisp(d,trend="movingave",span=0.4,prior.df=20)
+> summary(d2$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1005  0.1629  0.2064  0.2077  0.2585  0.3164 
+> de <- exactTest(d2)
+> topTags(de)
+Comparison of groups:  2-1 
+            logFC   logCPM     PValue       FDR
+Tag.17  2.0450951 13.73726 0.02427872 0.5341319
+Tag.21 -1.7265927 13.38327 0.05234833 0.5758316
+Tag.6  -1.6330014 12.81479 0.12846308 0.8954397
+Tag.2   4.0861092 11.54121 0.16280722 0.8954397
+Tag.16  0.9324887 13.57074 0.24308201 0.9711975
+Tag.20  0.8543044 13.76364 0.35534649 0.9711975
+Tag.19 -0.7976535 13.31405 0.38873717 0.9711975
+Tag.3  -0.7300525 13.54155 0.40001438 0.9711975
+Tag.12  0.7080985 14.31389 0.43530227 0.9711975
+Tag.8  -0.7918376 12.86353 0.49782701 0.9711975
+> 
+> summary(exactTest(d2,rejection="smallp")$table$PValue)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+0.02428 0.36370 0.55660 0.54320 0.78890 1.00000 
+> summary(exactTest(d2,rejection="deviance")$table$PValue)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+0.02428 0.36370 0.55660 0.54320 0.78890 1.00000 
+> 
+> d2 <- estimateTagwiseDisp(d,trend="loess",span=0.8,prior.df=20)
+> summary(d2$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1165  0.1449  0.1832  0.1848  0.2116  0.2825 
+> de <- exactTest(d2)
+> topTags(de)
+Comparison of groups:  2-1 
+            logFC   logCPM     PValue       FDR
+Tag.17  2.0450979 13.73726 0.01546795 0.3402949
+Tag.21 -1.7266049 13.38327 0.03545446 0.3899990
+Tag.6  -1.6329841 12.81479 0.10632987 0.7797524
+Tag.2   4.0861092 11.54121 0.16057893 0.8831841
+Tag.16  0.9324935 13.57074 0.26348818 0.9658389
+Tag.20  0.8543140 13.76364 0.31674090 0.9658389
+Tag.19 -0.7976354 13.31405 0.35564858 0.9658389
+Tag.3  -0.7300593 13.54155 0.38833737 0.9658389
+Tag.12  0.7081041 14.31389 0.41513004 0.9658389
+Tag.8  -0.7918152 12.86353 0.48483449 0.9658389
+> 
+> d2 <- estimateTagwiseDisp(d,trend="tricube",span=0.8,prior.df=20)
+> summary(d2$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1165  0.1449  0.1832  0.1848  0.2116  0.2825 
+> de <- exactTest(d2)
+> topTags(de)
+Comparison of groups:  2-1 
+            logFC   logCPM     PValue       FDR
+Tag.17  2.0450979 13.73726 0.01546795 0.3402949
+Tag.21 -1.7266049 13.38327 0.03545446 0.3899990
+Tag.6  -1.6329841 12.81479 0.10632987 0.7797524
+Tag.2   4.0861092 11.54121 0.16057893 0.8831841
+Tag.16  0.9324935 13.57074 0.26348818 0.9658389
+Tag.20  0.8543140 13.76364 0.31674090 0.9658389
+Tag.19 -0.7976354 13.31405 0.35564858 0.9658389
+Tag.3  -0.7300593 13.54155 0.38833737 0.9658389
+Tag.12  0.7081041 14.31389 0.41513004 0.9658389
+Tag.8  -0.7918152 12.86353 0.48483449 0.9658389
+> 
+> # mglmOneWay
+> design <- model.matrix(~group,data=d$samples)
+> mglmOneWay(d[1:10,],design,dispersion=0.2)
+$coefficients
+        (Intercept)        group2
+ [1,] -1.000000e+08  0.000000e+00
+ [2,] -1.000000e+08  1.000000e+08
+ [3,]  2.525729e+00 -5.108256e-01
+ [4,]  2.525729e+00  1.484200e-01
+ [5,]  2.140066e+00 -1.941560e-01
+ [6,]  2.079442e+00 -1.163151e+00
+ [7,]  2.014903e+00  2.363888e-01
+ [8,]  1.945910e+00 -5.596158e-01
+ [9,]  1.504077e+00  2.006707e-01
+[10,]  2.302585e+00  2.623643e-01
+
+$fitted.values
+      [,1] [,2] [,3] [,4]
+ [1,]  0.0  0.0  0.0  0.0
+ [2,]  0.0  0.0  2.0  2.0
+ [3,] 12.5 12.5  7.5  7.5
+ [4,] 12.5 12.5 14.5 14.5
+ [5,]  8.5  8.5  7.0  7.0
+ [6,]  8.0  8.0  2.5  2.5
+ [7,]  7.5  7.5  9.5  9.5
+ [8,]  7.0  7.0  4.0  4.0
+ [9,]  4.5  4.5  5.5  5.5
+[10,] 10.0 10.0 13.0 13.0
+
+> mglmOneWay(d[1:10,],design,dispersion=0)
+$coefficients
+        (Intercept)        group2
+ [1,] -1.000000e+08  0.000000e+00
+ [2,] -1.000000e+08  1.000000e+08
+ [3,]  2.525729e+00 -5.108256e-01
+ [4,]  2.525729e+00  1.484200e-01
+ [5,]  2.140066e+00 -1.941560e-01
+ [6,]  2.079442e+00 -1.163151e+00
+ [7,]  2.014903e+00  2.363888e-01
+ [8,]  1.945910e+00 -5.596158e-01
+ [9,]  1.504077e+00  2.006707e-01
+[10,]  2.302585e+00  2.623643e-01
+
+$fitted.values
+      [,1] [,2] [,3] [,4]
+ [1,]  0.0  0.0  0.0  0.0
+ [2,]  0.0  0.0  2.0  2.0
+ [3,] 12.5 12.5  7.5  7.5
+ [4,] 12.5 12.5 14.5 14.5
+ [5,]  8.5  8.5  7.0  7.0
+ [6,]  8.0  8.0  2.5  2.5
+ [7,]  7.5  7.5  9.5  9.5
+ [8,]  7.0  7.0  4.0  4.0
+ [9,]  4.5  4.5  5.5  5.5
+[10,] 10.0 10.0 13.0 13.0
+
+> 
+> fit <- glmFit(d,design,dispersion=d$common.dispersion,prior.count=0.5/4)
+> lrt <- glmLRT(fit,coef=2)
+> topTags(lrt)
+Coefficient:  group2 
+            logFC   logCPM        LR     PValue       FDR
+Tag.17  2.0450964 13.73726 6.0485417 0.01391779 0.3058698
+Tag.2   4.0861092 11.54121 4.8400340 0.02780635 0.3058698
+Tag.21 -1.7265870 13.38327 4.0777825 0.04345065 0.3186381
+Tag.6  -1.6329986 12.81479 3.0078205 0.08286364 0.4557500
+Tag.16  0.9324996 13.57074 1.3477682 0.24566867 0.8276702
+Tag.20  0.8543138 13.76364 1.1890032 0.27553071 0.8276702
+Tag.19 -0.7976602 13.31405 0.9279151 0.33540526 0.8276702
+Tag.12  0.7081170 14.31389 0.9095513 0.34023349 0.8276702
+Tag.3  -0.7300410 13.54155 0.8300307 0.36226364 0.8276702
+Tag.8  -0.7917906 12.86353 0.7830377 0.37621371 0.8276702
+> 
+> fit <- glmFit(d,design,dispersion=d$common.dispersion,prior.count=0.5)
+> summary(fit$coef)
+  (Intercept)         group2        
+ Min.   :-7.604   Min.   :-1.13681  
+ 1st Qu.:-4.895   1st Qu.:-0.32341  
+ Median :-4.713   Median : 0.15083  
+ Mean   :-4.940   Mean   : 0.07817  
+ 3rd Qu.:-4.524   3rd Qu.: 0.35163  
+ Max.   :-4.107   Max.   : 1.60864  
+> 
+> fit <- glmFit(d,design,prior.count=0.5/4)
+> lrt <- glmLRT(fit,coef=2)
+> topTags(lrt)
+Coefficient:  group2 
+            logFC   logCPM        LR     PValue       FDR
+Tag.17  2.0450964 13.73726 6.0485417 0.01391779 0.3058698
+Tag.2   4.0861092 11.54121 4.8400340 0.02780635 0.3058698
+Tag.21 -1.7265870 13.38327 4.0777825 0.04345065 0.3186381
+Tag.6  -1.6329986 12.81479 3.0078205 0.08286364 0.4557500
+Tag.16  0.9324996 13.57074 1.3477682 0.24566867 0.8276702
+Tag.20  0.8543138 13.76364 1.1890032 0.27553071 0.8276702
+Tag.19 -0.7976602 13.31405 0.9279151 0.33540526 0.8276702
+Tag.12  0.7081170 14.31389 0.9095513 0.34023349 0.8276702
+Tag.3  -0.7300410 13.54155 0.8300307 0.36226364 0.8276702
+Tag.8  -0.7917906 12.86353 0.7830377 0.37621371 0.8276702
+> 
+> dglm <- estimateGLMCommonDisp(d,design)
+> dglm$common.dispersion
+[1] 0.2033282
+> dglm <- estimateGLMTagwiseDisp(dglm,design,prior.df=20)
+> summary(dglm$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1756  0.1879  0.1998  0.2031  0.2135  0.2578 
+> fit <- glmFit(dglm,design,prior.count=0.5/4)
+> lrt <- glmLRT(fit,coef=2)
+> topTags(lrt)
+Coefficient:  group2 
+            logFC   logCPM        LR      PValue       FDR
+Tag.17  2.0450988 13.73727 6.8001118 0.009115216 0.2005348
+Tag.2   4.0861092 11.54122 4.8594088 0.027495756 0.2872068
+Tag.21 -1.7265904 13.38327 4.2537154 0.039164558 0.2872068
+Tag.6  -1.6329904 12.81479 3.1763761 0.074710253 0.4109064
+Tag.16  0.9324970 13.57074 1.4126709 0.234613512 0.8499599
+Tag.20  0.8543183 13.76364 1.2721097 0.259371274 0.8499599
+Tag.19 -0.7976614 13.31405 0.9190392 0.337727381 0.8499599
+Tag.12  0.7081163 14.31389 0.9014515 0.342392806 0.8499599
+Tag.3  -0.7300488 13.54155 0.8817937 0.347710872 0.8499599
+Tag.8  -0.7918166 12.86353 0.7356185 0.391068049 0.8603497
+> dglm <- estimateGLMTrendedDisp(dglm,design)
+> summary(dglm$trended.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1522  0.1676  0.1740  0.1887  0.2000  0.3469 
+> dglm <- estimateGLMTrendedDisp(dglm,design,method="power")
+> summary(dglm$trended.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1522  0.1676  0.1740  0.1887  0.2000  0.3469 
+> dglm <- estimateGLMTrendedDisp(dglm,design,method="spline")
+> summary(dglm$trended.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+0.09353 0.11080 0.15460 0.19010 0.23050 0.52010 
+> dglm <- estimateGLMTrendedDisp(dglm,design,method="bin.spline")
+> summary(dglm$trended.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1997  0.1997  0.1997  0.1997  0.1997  0.1997 
+> dglm <- estimateGLMTagwiseDisp(dglm,design,prior.df=20)
+> summary(dglm$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1385  0.1792  0.1964  0.1935  0.2026  0.2709 
+> 
+> dglm2 <- estimateDisp(dglm, design)
+> summary(dglm2$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1766  0.1789  0.1814  0.1846  0.1870  0.2119 
+> dglm2 <- estimateDisp(dglm, design, prior.df=20)
+> summary(dglm2$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1527  0.1669  0.1814  0.1858  0.1951  0.2497 
+> dglm2 <- estimateDisp(dglm, design, robust=TRUE)
+> summary(dglm2$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+ 0.1766  0.1789  0.1814  0.1846  0.1870  0.2119 
+> 
+> # Continuous trend
+> nlibs <- 3
+> ntags <- 1000
+> dispersion.true <- 0.1
+> # Make first transcript respond to covariate x
+> x <- 0:2
+> design <- model.matrix(~x)
+> beta.true <- cbind(Beta1=2,Beta2=c(2,rep(0,ntags-1)))
+> mu.true <- 2^(beta.true %*% t(design))
+> # Generate count data
+> y <- rnbinom(ntags*nlibs,mu=mu.true,size=1/dispersion.true)
+> y <- matrix(y,ntags,nlibs)
+> colnames(y) <- c("x0","x1","x2")
+> rownames(y) <- paste("Gene",1:ntags,sep="")
+> d <- DGEList(y)
+> d <- calcNormFactors(d)
+> fit <- glmFit(d, design, dispersion=dispersion.true, prior.count=0.5/3)
+> results <- glmLRT(fit, coef=2)
+> topTags(results)
+Coefficient:  x 
+            logFC   logCPM        LR       PValue          FDR
+Gene1    2.907024 13.56183 38.738512 4.845536e-10 4.845536e-07
+Gene61   2.855317 10.27136 10.738307 1.049403e-03 5.247015e-01
+Gene62  -2.123902 10.53174  8.818703 2.981585e-03 8.334760e-01
+Gene134 -1.949073 10.53355  8.125889 4.363759e-03 8.334760e-01
+Gene740 -1.610046 10.94907  8.013408 4.643227e-03 8.334760e-01
+Gene354  2.022698 10.45066  7.826308 5.149118e-03 8.334760e-01
+Gene5    1.856816 10.45249  7.214238 7.232750e-03 8.334760e-01
+Gene746 -1.798331 10.53094  6.846262 8.882693e-03 8.334760e-01
+Gene110  1.623148 10.68607  6.737984 9.438120e-03 8.334760e-01
+Gene383  1.637140 10.75412  6.687530 9.708965e-03 8.334760e-01
+> d1 <- estimateGLMCommonDisp(d, design, verbose=TRUE)
+Disp = 0.10253 , BCV = 0.3202 
+> glmFit(d,design,dispersion=dispersion.true, prior.count=0.5/3)
+An object of class "DGEGLM"
+$coefficients
+      (Intercept)          x
+Gene1   -7.391745  2.0149958
+Gene2   -7.318483 -0.7611895
+Gene3   -6.831702 -0.1399478
+Gene4   -7.480255  0.5172002
+Gene5   -8.747793  1.2870467
+995 more rows ...
+
+$fitted.values
+             x0        x1          x2
+Gene1 2.3570471 18.954454 138.2791328
+Gene2 2.5138172  1.089292   0.4282107
+Gene3 4.1580452  3.750528   3.0690081
+Gene4 2.1012460  3.769592   6.1349937
+Gene5 0.5080377  2.136398   8.1502486
+995 more rows ...
+
+$deviance
+[1] 6.38037545 1.46644913 1.38532340 0.01593969 1.03894513
+995 more elements ...
+
+$iter
+[1] 8 4 4 4 6
+995 more elements ...
+
+$failed
+[1] FALSE FALSE FALSE FALSE FALSE
+995 more elements ...
+
+$method
+[1] "levenberg"
+
+$counts
+      x0 x1  x2
+Gene1  0 30 110
+Gene2  2  2   0
+Gene3  3  6   2
+Gene4  2  4   6
+Gene5  1  1   9
+995 more rows ...
+
+$unshrunk.coefficients
+      (Intercept)          x
+Gene1   -7.437763  2.0412762
+Gene2   -7.373370 -0.8796273
+Gene3   -6.870127 -0.1465014
+Gene4   -7.552642  0.5410832
+Gene5   -8.972372  1.3929679
+995 more rows ...
+
+$df.residual
+[1] 1 1 1 1 1
+995 more elements ...
+
+$design
+  (Intercept) x
+1           1 0
+2           1 1
+3           1 2
+attr(,"assign")
+[1] 0 1
+
+$offset
+      [,1]     [,2]     [,3]
+x 8.295172 8.338525 8.284484
+attr(,"class")
+[1] "compressedMatrix"
+attr(,"repeat.row")
+[1] TRUE
+attr(,"repeat.col")
+[1] FALSE
+
+$dispersion
+[1] 0.1
+
+$prior.count
+[1] 0.1666667
+
+$samples
+   group lib.size norm.factors
+x0     1     4001    1.0008730
+x1     1     4176    1.0014172
+x2     1     3971    0.9977138
+
+$AveLogCPM
+[1] 13.561832  9.682757 10.447014 10.532113 10.452489
+995 more elements ...
+
+> 
+> d2 <- estimateDisp(d, design)
+> summary(d2$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+0.05545 0.09511 0.11620 0.11010 0.13330 0.16860 
+> d2 <- estimateDisp(d, design, prior.df=20)
+> summary(d2$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+0.04203 0.08586 0.11280 0.11010 0.12370 0.37410 
+> d2 <- estimateDisp(d, design, robust=TRUE)
+> summary(d2$tagwise.dispersion)
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+0.05545 0.09511 0.11620 0.11010 0.13330 0.16860 
+> 
+> # Exact tests
+> y <- matrix(rnbinom(20,mu=10,size=3/2),5,4)
+> group <- factor(c(1,1,2,2))
+> ys <- splitIntoGroupsPseudo(y,group,pair=c(1,2))
+> exactTestDoubleTail(ys$y1,ys$y2,dispersion=2/3)
+[1] 0.1334396 0.6343568 0.7280015 0.7124912 0.3919258
+> 
+> y <- matrix(rnbinom(5*7,mu=10,size=3/2),5,7)
+> group <- factor(c(1,1,2,2,3,3,3))
+> ys <- splitIntoGroupsPseudo(y,group,pair=c(1,3))
+> exactTestDoubleTail(ys$y1,ys$y2,dispersion=2/3)
+[1] 1.0000000 0.4486382 1.0000000 0.9390317 0.4591241
+> exactTestBetaApprox(ys$y1,ys$y2,dispersion=2/3)
+[1] 1.0000000 0.4492969 1.0000000 0.9421695 0.4589194
+> 
+> y[1,3:4] <- 0
+> design <- model.matrix(~group)
+> fit <- glmFit(y,design,dispersion=2/3,prior.count=0.5/7)
+> summary(fit$coef)
+  (Intercept)         group2            group3        
+ Min.   :-1.817   Min.   :-5.0171   Min.   :-0.64646  
+ 1st Qu.:-1.812   1st Qu.:-1.1565   1st Qu.:-0.13919  
+ Median :-1.712   Median : 0.1994   Median :-0.10441  
+ Mean   :-1.625   Mean   :-0.9523   Mean   :-0.04217  
+ 3rd Qu.:-1.429   3rd Qu.: 0.3755   3rd Qu.:-0.04305  
+ Max.   :-1.356   Max.   : 0.8374   Max.   : 0.72227  
+> 
+> lrt <- glmLRT(fit,contrast=cbind(c(0,1,0),c(0,0,1)))
+> topTags(lrt)
+Coefficient:  LR test of 2 contrasts 
+     logFC.1    logFC.2   logCPM         LR      PValue        FDR
+1 -7.2381060 -0.0621100 17.19071 10.7712171 0.004582051 0.02291026
+5 -1.6684268 -0.9326507 17.33529  1.7309951 0.420842115 0.90967967
+2  1.2080938  1.0420198 18.24544  1.0496688 0.591653347 0.90967967
+4  0.5416704 -0.1506381 17.57744  0.3958596 0.820427427 0.90967967
+3  0.2876249 -0.2008143 18.06216  0.1893255 0.909679672 0.90967967
+> design <- model.matrix(~0+group)
+> fit <- glmFit(y,design,dispersion=2/3,prior.count=0.5/7)
+> lrt <- glmLRT(fit,contrast=cbind(c(-1,1,0),c(0,-1,1),c(-1,0,1)))
+> topTags(lrt)
+Coefficient:  LR test of 2 contrasts 
+     logFC.1    logFC.2   logCPM         LR      PValue        FDR
+1 -7.2381060  7.1759960 17.19071 10.7712171 0.004582051 0.02291026
+5 -1.6684268  0.7357761 17.33529  1.7309951 0.420842115 0.90967967
+2  1.2080938 -0.1660740 18.24544  1.0496688 0.591653347 0.90967967
+4  0.5416704 -0.6923084 17.57744  0.3958596 0.820427427 0.90967967
+3  0.2876249 -0.4884392 18.06216  0.1893255 0.909679672 0.90967967
+> 
+> # simple Good-Turing algorithm runs.
+> test1<-1:9
+> freq1<-c(2018046, 449721, 188933, 105668, 68379, 48190, 35709, 37710, 22280)
+> goodTuring(rep(test1, freq1))
+$P0
+[1] 0.3814719
+
+$proportion
+[1] 8.035111e-08 2.272143e-07 4.060582e-07 5.773690e-07 7.516705e-07
+[6] 9.276808e-07 1.104759e-06 1.282549e-06 1.460837e-06
+
+$count
+[1] 1 2 3 4 5 6 7 8 9
+
+$n
+[1] 2018046  449721  188933  105668   68379   48190   35709   37710   22280
+
+$n0
+[1] 0
+
+> test2<-c(312, 14491, 16401, 65124, 129797, 323321, 366051, 368599, 405261, 604962)
+> goodTuring(test2)
+$P0
+[1] 0
+
+$proportion
+ [1] 0.0001362656 0.0063162959 0.0071487846 0.0283850925 0.0565733349
+ [6] 0.1409223124 0.1595465235 0.1606570896 0.1766365144 0.2636777866
+
+$count
+ [1]    312  14491  16401  65124 129797 323321 366051 368599 405261 604962
+
+$n
+ [1] 1 1 1 1 1 1 1 1 1 1
+
+$n0
+[1] 0
+
+> 
+> 
+> 
+> proc.time()
+   user  system elapsed 
+  3.259   0.065   3.693 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-edger.git