[med-svn] [r-bioc-edger] 01/04: Imported Upstream version 3.12.0+dfsg

Andreas Tille tille at debian.org
Tue Nov 3 13:40:43 UTC 2015


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-bioc-edger.

commit 84aaf809baf52b050070c5b1531b4dd61dac6162
Author: Andreas Tille <tille at debian.org>
Date:   Tue Nov 3 14:18:15 2015 +0100

    Imported Upstream version 3.12.0+dfsg
---
 DESCRIPTION                 |   6 +-
 R/diffSpliceDGE.R           | 210 ++++++++++++++++++++------------------
 R/estimateDisp.R            |   4 +-
 inst/NEWS.Rd                |  41 +++++++-
 inst/doc/edgeR.pdf          | Bin 48664 -> 45781 bytes
 man/diffSpliceDGE.Rd        |  58 ++++++-----
 man/estimateDisp.Rd         |  10 +-
 man/goana.Rd                |   2 +-
 man/plotSpliceDGE.Rd        |  17 ++--
 man/topSpliceDGE.Rd         |  34 +++++--
 tests/edgeR-Tests.Rout.save | 239 +++++++++-----------------------------------
 vignettes/edgeR.Rnw         |  48 ---------
 12 files changed, 283 insertions(+), 386 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index e88211e..9e4cc4c 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: edgeR
-Version: 3.10.3
-Date: 2015/09/24
+Version: 3.12.0
+Date: 2015/10/05
 Title: Empirical analysis of digital gene expression data in R
 Description: Differential expression analysis of RNA-seq expression profiles with biological replication. Implements a range of statistical methodology based on the negative binomial distributions, including empirical Bayes estimation, exact tests, generalized linear models and quasi-likelihood tests. As well as RNA-seq, it be applied to differential signal analysis of other types of genomic data that produce counts, including ChIP-seq, SAGE and CAGE.
 Author: Yunshun Chen <yuchen at wehi.edu.au>, Aaron Lun <alun at wehi.edu.au>, Davis McCarthy <dmccarthy at wehi.edu.au>, Xiaobei Zhou <xiaobei.zhou at uzh.ch>, Mark Robinson <mark.robinson at imls.uzh.ch>, Gordon Smyth <smyth at wehi.edu.au>
@@ -16,4 +16,4 @@ biocViews: GeneExpression, Transcription, AlternativeSplicing,
         TimeCourse, SAGE, Sequencing, ChIPSeq, RNASeq, BatchEffect,
         MultipleComparison, Normalization, QualityControl
 NeedsCompilation: yes
-Packaged: 2015-09-25 01:53:01 UTC; biocbuild
+Packaged: 2015-10-14 01:06:43 UTC; biocbuild
diff --git a/R/diffSpliceDGE.R b/R/diffSpliceDGE.R
index a372ffb..630da6d 100644
--- a/R/diffSpliceDGE.R
+++ b/R/diffSpliceDGE.R
@@ -1,15 +1,18 @@
-diffSpliceDGE <- function(fit.exon, coef=ncol(fit.exon$design), contrast=NULL, geneid, exonid=NULL, verbose=TRUE)
+diffSpliceDGE <- function(glmfit, coef=ncol(glmfit$design), contrast=NULL, geneid, exonid=NULL, prior.count=0.125, verbose=TRUE)
 {
 # Identify exons and genes with splice variants using negative binomial GLMs
 # Yunshun Chen and Gordon Smyth
-# Created 29 March 2014.  Last modified 13 May 2015. 
+# Created 29 March 2014.  Last modified 25 September 2015. 
 
+#	Check if glmfit is from glmFit() or glmQLFit()
+	isLRT <- is.null(glmfit$df.prior)
+	
 #	Check input (from diffSplice in limma)
-	exon.genes <- fit.exon$genes
-	nexons <- nrow(fit.exon)
-	design <- fit.exon$design
+	exon.genes <- glmfit$genes
+	nexons <- nrow(glmfit)
+	design <- glmfit$design
 
-	if(is.null(exon.genes)) exon.genes <- data.frame(ExonID=1:nrow(fit.exon))
+	if(is.null(exon.genes)) exon.genes <- data.frame(ExonID=1:nrow(glmfit))
 	if(length(geneid)==1) {
 		genecolname <- as.character(geneid)
 		geneid <- exon.genes[[genecolname]]
@@ -28,26 +31,22 @@ diffSpliceDGE <- function(fit.exon, coef=ncol(fit.exon$design), contrast=NULL, g
 	else
 		exoncolname <- NULL
 
-#	Sort by geneid (from diffSplice in limma)
+#	Sort by geneid
 	if(is.null(exonid))
 		o <- order(geneid)
 	else
 		o <- order(geneid,exonid)
 	geneid <- geneid[o]
 	exon.genes <- exon.genes[o,,drop=FALSE]
-	fit.exon <- fit.exon[o, ]
+	glmfit <- glmfit[o, ]
 
 #	Check design matrix
-	design <- as.matrix(fit.exon$design)
+	design <- as.matrix(glmfit$design)
 	nbeta <- ncol(design)
 	if(nbeta < 2) stop("Need at least two columns for design, usually the first is the intercept column")
 	coef.names <- colnames(design)
 
-	if(fit.exon$prior.count!=0){
-		coefficients.mle <- fit.exon$unshrunk.coefficients
-	} else {
-		coefficients.mle <- fit.exon$coefficients
-	}
+	coefficients <- glmfit$coefficients
 
 #	Evaluate beta to be tested
 #	Note that contrast takes precedence over coef: if contrast is given
@@ -62,12 +61,12 @@ diffSpliceDGE <- function(fit.exon, coef=ncol(fit.exon$design), contrast=NULL, g
 		}
 		else
 			coef.name <- coef.names[coef]
-		beta <- coefficients.mle[, coef, drop=FALSE]
+		beta <- coefficients[, coef, drop=FALSE]
 	} else {
 		contrast <- as.matrix(contrast)
 		reform <- contrastAsCoef(design, contrast=contrast, first=TRUE)
 		coef <- 1
-		beta <- drop(coefficients.mle %*% contrast)
+		beta <- drop(coefficients %*% contrast)
 		contrast <- drop(contrast)
 		i <- contrast!=0
 		coef.name <- paste(paste(contrast[i],coef.names[i],sep="*"),collapse=" ")
@@ -78,12 +77,6 @@ diffSpliceDGE <- function(fit.exon, coef=ncol(fit.exon$design), contrast=NULL, g
 #	Null design matrix
 	design0 <- design[, -coef, drop=FALSE]
 
-#	Gene level information
-	gene.counts <- rowsum(fit.exon$counts, geneid, reorder=FALSE)
-	gene.dge <- DGEList(counts=gene.counts, genes=unique(geneid))
-	gene.dge <- estimateDisp(gene.dge, design, robust=FALSE)
-	fit.gene <- glmFit(gene.dge, design)
-
 # 	Count exons and get genewise variances
 	gene.nexons <- rowsum(rep(1,nexons), geneid, reorder=FALSE)
 	if(verbose) {
@@ -94,15 +87,6 @@ diffSpliceDGE <- function(fit.exon, coef=ncol(fit.exon$design), contrast=NULL, g
 		cat("Max number of exons in a gene: ", max(gene.nexons), "\n")
 	}
 
-#	Squeeze
-	fit.gene.trend <- glmFit(gene.dge, design=design, dispersion=gene.dge$trended.dispersion)
-	zerofit <- (fit.gene.trend$fitted.values < 1e-4) & (fit.gene.trend$counts < 1e-4)
-	gene.df.residual <- .residDF(zerofit, design)
-	s2 <- fit.gene.trend$deviance / gene.df.residual
-	s2[gene.df.residual==0] <- 0
-	s2 <- pmax(s2,0)
-	s2.fit <- squeezeVar(s2, df=gene.df.residual, covariate=fit.gene.trend$AveLogCPM, robust=FALSE)
-
 #	Remove genes with only 1 exon
 	gene.keep <- gene.nexons > 1
 	ngenes <- sum(gene.keep)
@@ -111,43 +95,58 @@ diffSpliceDGE <- function(fit.exon, coef=ncol(fit.exon$design), contrast=NULL, g
 	exon.keep <- rep(gene.keep, gene.nexons)
 	geneid <- geneid[exon.keep]
 	exon.genes <- exon.genes[exon.keep, , drop=FALSE]
-	fit.exon <- fit.exon[exon.keep, ]
 	beta <- beta[exon.keep]
-
-	fit.gene <- fit.gene[gene.keep, ]
 	gene.nexons <- gene.nexons[gene.keep]
-	gene.df.test <- gene.nexons-1
-	gene.df.residual <- gene.df.residual[gene.keep]
 	
-# 	Genewise betas
+#	Gene level information
 	g <- rep(1:ngenes, times=gene.nexons)
-	gene.counts.exon <- fit.gene$counts[g, , drop=FALSE]
-	gene.dispersion.exon <- fit.gene$dispersion[g]
-	gene.fit.exon <- glmFit(gene.counts.exon, design=design, dispersion=gene.dispersion.exon, lib.size=gene.dge$samples$lib.size)
-	gene.betabar <- gene.fit.exon$coefficients[, coef, drop=FALSE]
-	offset.new <- fit.exon$offset + gene.betabar %*% t(design[, coef, drop=FALSE])
+	glmfit <- glmfit[exon.keep, ]
+	gene.counts <- rowsum(glmfit$counts, geneid, reorder=FALSE)
+	fit.gene <- glmFit(gene.counts, design, dispersion=0.05, offset=as.vector(glmfit$offset[1,]), prior.count=prior.count)
+	gene.betabar <- fit.gene$coefficients[g, coef, drop=FALSE]
+
+#	New offset
+	offset.new <- glmfit$offset + gene.betabar %*% t(design[, coef, drop=FALSE])
 	coefficients <- beta - gene.betabar
 
 #	Testing
 	design0 <- design[, -coef, drop=FALSE]
-	fit.null <- glmFit(fit.exon$counts, design=design0, offset=offset.new, dispersion=fit.exon$dispersion)
-	fit.alt <- glmFit(fit.exon$counts, design=design, offset=offset.new, dispersion=fit.exon$dispersion)
+	if(isLRT){
+		fit0 <- glmFit(glmfit$counts, design=design0, offset=offset.new, dispersion=glmfit$dispersion)
+		fit1 <- glmFit(glmfit$counts, design=design, offset=offset.new, dispersion=glmfit$dispersion)
+		exon.LR <- fit0$deviance - fit1$deviance
+		gene.LR <- rowsum(exon.LR, geneid, reorder=FALSE)
+		exon.df.test <- fit0$df.residual - fit1$df.residual
+		gene.df.test <- rowsum(exon.df.test, geneid, reorder=FALSE) - 1
+		exon.p.value <- pchisq(exon.LR, df=exon.df.test, lower.tail=FALSE, log.p=FALSE)
+		gene.p.value <- pchisq(gene.LR, df=gene.df.test, lower.tail=FALSE, log.p=FALSE)
+	} else {
+		fit0 <- glmQLFit(glmfit$counts, design=design0, offset=offset.new, dispersion=glmfit$dispersion)
+		fit1 <- glmQLFit(glmfit$counts, design=design, offset=offset.new, dispersion=glmfit$dispersion)
+		exon.s2 <- fit1$deviance / fit1$df.residual.zeros
+		gene.s2 <- rowsum(exon.s2, geneid, reorder=FALSE) / gene.nexons
+		gene.df.residual <- rowsum(fit1$df.residual.zeros, geneid, reorder=FALSE)
+		squeeze <- squeezeVar(var=gene.s2, df=gene.df.residual, robust=TRUE)	
 
-# 	Exon p-values
-	exon.LR <- fit.null$deviance - fit.alt$deviance
-	exon.df.test <- fit.null$df.residual - fit.alt$df.residual	
-	exon.F <- exon.LR / exon.df.test / s2.fit$var.post[gene.keep][g]
-	gene.df.total <- s2.fit$df.prior + gene.df.residual
-	max.df.residual <- ncol(fit.exon$counts)-ncol(design)
-	gene.df.total <- pmin(gene.df.total, ngenes*max.df.residual)
-	exon.p.value <- pf(exon.F, df1=exon.df.test, df2=gene.df.total[g], lower.tail=FALSE, log.p=FALSE)
+		exon.df.test <- fit0$df.residual - fit1$df.residual
+		gene.df.test <- rowsum(exon.df.test, geneid, reorder=FALSE) - 1
+		gene.df.total <- gene.df.residual + squeeze$df.prior
+		gene.df.total <- pmin(gene.df.total, sum(gene.df.residual))
+		gene.s2.post <- squeeze$var.post
+		
+		exon.LR <- fit0$deviance - fit1$deviance
+		exon.F <- exon.LR / exon.df.test / gene.s2.post[g]
+		gene.F <- rowsum(exon.LR, geneid, reorder=FALSE) / gene.df.test / gene.s2.post
+		exon.p.value <- pf(exon.F, df1=exon.df.test, df2=gene.df.total[g], lower.tail=FALSE, log.p=FALSE)
 
-#	Ensure is not more significant than chisquare test
-	i <- s2.fit$var.post[gene.keep][g] < 1
-	if(any(i)) {
-		chisq.pvalue <- pchisq(exon.LR[i], df=exon.df.test[i], lower.tail=FALSE, log.p=FALSE)
-		exon.p.value[i] <- pmax(exon.p.value[i], chisq.pvalue)
-	}
+#		Ensure is not more significant than chisquare test
+		i <- gene.s2.post[g] < 1
+		if(any(i)) {
+			chisq.pvalue <- pchisq(exon.LR[i], df=exon.df.test[i], lower.tail=FALSE, log.p=FALSE)
+			exon.p.value[i] <- pmax(exon.p.value[i], chisq.pvalue)
+		}
+		gene.p.value <- pf(gene.F, df1=gene.df.test, df2=gene.df.total, lower.tail=FALSE, log.p=FALSE)		
+	}	
 
 #	Gene Simes' p-values
 	o <- order(g, exon.p.value, decreasing=FALSE)
@@ -159,33 +158,35 @@ diffSpliceDGE <- function(fit.exon, coef=ncol(fit.exon$design), contrast=NULL, g
 	oo <- order(-g, pmin(pp,1), decreasing=TRUE)
 	gene.Simes.p.value <- pp[oo][cumsum(gene.nexons)]
 
-#	Gene F p-values
-	gene.F <- rowsum(exon.F, geneid, reorder=FALSE) / (gene.df.test)
-	gene.F.p.value <- pf(gene.F, df1=(gene.df.test), df2=gene.df.total, lower.tail=FALSE)
-
 #	Output
 	out <- new("DGELRT",list())
 	out$comparison <- colnames(design)[coef]
 	out$design <- design
 	out$coefficients <- as.vector(coefficients)
-	
-#	Exon level output
-	out$exon.df.test <- exon.df.test
-	out$exon.df.prior <- s2.fit$df.prior[g]
-	out$exon.df.residual <- gene.df.residual[g]
-	out$exon.F <- exon.F
-	out$exon.p.value <- exon.p.value
 	out$genes <- exon.genes
 	out$genecolname <- genecolname
 	out$exoncolname <- exoncolname
 	
+#	Exon level output
+	out$exon.df.test <- exon.df.test
+	if(isLRT){
+		out$exon.LR <- exon.LR
+	} else {
+		out$exon.F <- exon.F
+	}
+	out$exon.p.value <- exon.p.value
+
 #	Gene level output
 	out$gene.df.test <- gene.df.test
-	out$gene.df.prior <- s2.fit$df.prior
-	out$gene.df.residual <- gene.df.residual
+	if(isLRT){
+		out$gene.LR <- gene.LR
+	} else {
+		out$gene.df.prior <- squeeze$df.prior
+		out$gene.df.residual <- gene.df.residual
+		out$gene.F <- gene.F
+	}
+	out$gene.p.value <- gene.p.value
 	out$gene.Simes.p.value <- gene.Simes.p.value
-	out$gene.F <- gene.F
-	out$gene.F.p.value <- gene.F.p.value
 
 #	Which columns of exon.genes contain gene level annotation? (from diffSplice in limma)
 	exon.lastexon <- cumsum(gene.nexons)
@@ -195,75 +196,87 @@ diffSpliceDGE <- function(fit.exon, coef=ncol(fit.exon$design), contrast=NULL, g
 	isgenelevel <- apply(isdup,2,all)
 	out$gene.genes <- exon.genes[exon.lastexon,isgenelevel, drop=FALSE]
 	out$gene.genes$NExons <- gene.nexons
-	
+
 	out
 }
 
 
-topSpliceDGE <- function(lrt, level="gene", gene.test="Simes", number=10, FDR=1)
+topSpliceDGE <- function(lrt, test="Simes", number=10, FDR=1)
 # Yunshun Chen and Gordon Smyth
-# Created 29 March 2014.  Last modified 24 September 2014. 
+# Created 29 March 2014.  Last modified 25 September 2015. 
 {
-	level <- match.arg(level,c("exon","gene"))
-	gene.test <- match.arg(gene.test,c("Simes","F","f"))
-	if(level=="exon") {
+	test <- match.arg(test,c("Simes","simes","gene","exon"))
+	if(test=="simes") test <- "Simes"
+	if(test=="exon") {
 		number <- min(number, nrow(lrt$genes))
 		P <- lrt$exon.p.value
 		BH <- p.adjust(P, method="BH")
 		if(FDR<1) number <- min(number, sum(BH<FDR))
 		o <- order(P)[1:number]
-		data.frame(lrt$genes[o,,drop=FALSE],logFC=lrt$coefficients[o],F=lrt$exon.F[o],P.Value=P[o],FDR=BH[o])
+		if(is.null(lrt$exon.F)){
+			data.frame(lrt$genes[o,,drop=FALSE],logFC=lrt$coefficients[o],exon.LR=lrt$exon.LR[o],P.Value=P[o],FDR=BH[o])
+		} else {
+			data.frame(lrt$genes[o,,drop=FALSE],logFC=lrt$coefficients[o],exon.F=lrt$exon.F[o],P.Value=P[o],FDR=BH[o])
+		}
 	} else {
 		number <- min(number, nrow(lrt$gene.genes))
-		if(gene.test == "Simes") P <- lrt$gene.Simes.p.value else P <- lrt$gene.F.p.value 
+		if(test=="Simes") P <- lrt$gene.Simes.p.value else P <- lrt$gene.p.value 
 		BH <- p.adjust(P, method="BH")
 		if(FDR<1) number <- min(number,sum(BH<FDR))
 		o <- order(P)[1:number]
-		if(gene.test=="Simes")
+		if(test=="Simes"){
 			data.frame(lrt$gene.genes[o,,drop=FALSE],P.Value=P[o],FDR=BH[o])
-		else
-			data.frame(lrt$gene.genes[o,,drop=FALSE],F=lrt$gene.F[o],P.Value=P[o],FDR=BH[o])
+		} else {
+			if(is.null(lrt$gene.F)){
+				data.frame(lrt$gene.genes[o,,drop=FALSE],gene.LR=lrt$gene.LR[o],P.Value=P[o],FDR=BH[o])
+			} else {
+				data.frame(lrt$gene.genes[o,,drop=FALSE],gene.F=lrt$gene.F[o],P.Value=P[o],FDR=BH[o])
+			}
+		}
 	}
 }
 
 
-plotSpliceDGE <- function(lrt, geneid=NULL, rank=1L, FDR = 0.05)
+plotSpliceDGE <- function(lrt, geneid=NULL, genecolname=NULL, rank=1L, FDR = 0.05)
 # Plot exons of most differentially spliced gene
 # Yunshun Chen and Gordon Smyth
-# Created 29 March 2014.  Last modified 24 September 2014.
+# Created 29 March 2014.  Last modified 5 October 2015.
 {
-	# Gene labelling including gene symbol
-	genecolname <- lrt$genecolname
-	genelab <- grep(paste0(genecolname,"|Symbol|symbol"), colnames(lrt$gene.genes), value = T)
+	if(is.null(genecolname)) 
+		genecolname <- lrt$genecolname
+	else
+		genecolname <- as.character(genecolname)
 	
 	if(is.null(geneid)) {
 		if(rank==1L)
 			i <- which.min(lrt$gene.Simes.p.value)
 		else
 			i <- order(lrt$gene.Simes.p.value)[rank]
-		geneid <- paste(lrt$gene.genes[i,genelab], collapse = ".")
+		geneid <- paste(lrt$gene.genes[i, genecolname], collapse = ".")
 	} else {
-		i <- which(lrt$gene.genes[,lrt$genecolname]==geneid)
-		geneid <- paste(lrt$gene.genes[i,genelab], collapse = ".")
+		geneid <- as.character(geneid)
+		i <- which(lrt$gene.genes[, genecolname]==geneid)[1]
 		if(!length(i)) stop(paste("geneid",geneid,"not found"))
 	}
+
 	exon.lastexon <- cumsum(lrt$gene.genes$NExons[1:i])
 	j <- (exon.lastexon[i]-lrt$gene.genes$NExons[i]+1):exon.lastexon[i]
+
 	exoncolname <- lrt$exoncolname
 	if(is.null(exoncolname)){
-		plot(lrt$coefficients[j], xlab = "Exon", ylab = "logFC (this exon vs the average)", main = geneid, type = "b")
+		plot(lrt$coefficients[j], xlab="Exon", ylab="logFC (this exon vs the average)", main=geneid, type="b")
 	}
 	# Plot exons and mark exon ids on the axis
 	if(!is.null(exoncolname)) {
 		exon.id <- lrt$genes[j, exoncolname]
-		xlab <- paste("Exon", exoncolname, sep = " ")
+		xlab <- paste("Exon", exoncolname, sep=" ")
 		
-		plot(lrt$coefficients[j], xlab = "", ylab = "logFC (this exon vs the average)", main = geneid,type = "b", xaxt = "n")
-		axis(1, at = 1:length(j), labels = exon.id, las = 2, cex.axis = 0.6)
-		mtext(xlab, side = 1, padj = 5.2)
+		plot(lrt$coefficients[j], xlab="", ylab="logFC (this exon vs the average)", main=geneid, type="b", xaxt="n")
+		axis(1, at=1:length(j), labels=exon.id, las=2, cex.axis=0.6)
+		mtext(xlab, side=1, padj=5.2)
 
 		# Mark the topSpliced exons
-		top <- topSpliceDGE(lrt, number = Inf, level = "exon", FDR = FDR)
+		top <- topSpliceDGE(lrt, number=Inf, test="exon", FDR=FDR)
 		m <- which(top[,genecolname] %in% lrt$gene.genes[i,genecolname])
 
 		if(length(m) > 0){
@@ -278,4 +291,5 @@ plotSpliceDGE <- function(lrt, geneid=NULL, rank=1L, FDR = 0.05)
 		}
 	}
 	abline(h=0,lty=2)
+	invisible()
 }
diff --git a/R/estimateDisp.R b/R/estimateDisp.R
index e418e8b..97c650c 100644
--- a/R/estimateDisp.R
+++ b/R/estimateDisp.R
@@ -2,7 +2,7 @@
 ########### Weighted Likelihood Empirical Bayes ##############
 ##############################################################
 
-estimateDisp <- function(y, design=NULL, prior.df=NULL, trend.method="locfit", span=NULL, min.row.sum=5, grid.length=21, grid.range=c(-10,10), robust=FALSE, winsor.tail.p=c(0.05,0.1), tol=1e-06)
+estimateDisp <- function(y, design=NULL, prior.df=NULL, trend.method="locfit", tagwise=TRUE, span=NULL, min.row.sum=5, grid.length=21, grid.range=c(-10,10), robust=FALSE, winsor.tail.p=c(0.05,0.1), tol=1e-06)
 #  Estimating dispersion using weighted conditional likelihood empirical Bayes.
 #  Use GLM approach if a design matrix is given, and classic approach otherwise.
 #  It calculates a matrix of likelihoods for each gene at a set of dispersion grid points, and then calls WLEB() to do the shrinkage.
@@ -103,6 +103,8 @@ estimateDisp <- function(y, design=NULL, prior.df=NULL, trend.method="locfit", s
 	y$AveLogCPM <- AveLogCPM
 	y$span <- out.1$span
 
+	if(!tagwise) return(y)
+
 	# Calculate prior.df
 	if(is.null(prior.df)){
 		glmfit <- glmFit(y$counts[sel,], design, offset=offset[sel,], dispersion=disp.trend, prior.count=0)
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index e6de424..c7a76ae 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -2,6 +2,45 @@
 \title{edgeR News}
 \encoding{UTF-8}
 
+\section{Version 3.12.0}{\itemize{
+\item
+New argument tagwise for estimateDisp(), allowing users not to estimate tagwise dispersions. 
+
+\item
+estimateTrendedDisp() has more stable performance and does not return negative trended dispersion estimates.
+
+\item
+New plotMD methods for DGEList, DGEGLM, DGEExact and DGELRT objects to make a mean-difference plot (aka MA plot).
+
+\item
+readDGE() now recognizes HTSeq style meta genes.
+
+\item
+Remove the F-test in glmLRT().
+
+\item
+New argument contrast for diffSpliceDGE(), allowing users to specify the testing contrast. 
+
+\item
+glmTreat() returns both logFC and unshrunk.logFC in the output table.
+
+\item
+New method implemented in glmTreat() to increase the power of the test.
+
+\item
+New kegga methods for DGEExact and DGELRT objects to perform KEGG pathway analysis of differentially expressed genes using Entrez Gene IDs.
+
+\item
+New dimnames<- methods for DGEExact and DGELRT objects.
+
+\item
+Bug fix to dimnames<- method for DGEGLM objects.
+
+\item
+User's Guide updated. Three old case studies are replaced by two new comprehensive case studies.
+}}
+
+
 \section{Version 3.10.0}{\itemize{
 \item
 An DGEList method for romer() has been added, allowing access to rotation gene set enrichment analysis.
@@ -31,7 +70,7 @@ glmQLFit() is now an S3 generic function.
 glmQLFit() now breaks the output component s2.fit into three separate components: df.prior, var.post and var.prior.
 
 \item
-estimateDisp() now protects against fitted values of zeros, giving a more accurate estimate of prior.df.
+estimateDisp() now protects against fitted values of zeros, giving more accurate dispersion estimates.
 
 \item
 DGEList() now gives a message rather than an error when the count matrix has non-unique column names.
diff --git a/inst/doc/edgeR.pdf b/inst/doc/edgeR.pdf
index 86fd262..68b0de2 100644
Binary files a/inst/doc/edgeR.pdf and b/inst/doc/edgeR.pdf differ
diff --git a/man/diffSpliceDGE.Rd b/man/diffSpliceDGE.Rd
index e87548a..f65d9ff 100644
--- a/man/diffSpliceDGE.Rd
+++ b/man/diffSpliceDGE.Rd
@@ -4,63 +4,73 @@
 \title{Test for Differential Exon Usage}
 \description{Given a negative binomial generalized log-linear model fit at the exon level, test for differential exon usage between experimental conditions.}
 \usage{
-diffSpliceDGE(fit.exon, coef=ncol(fit.exon$design), contrast=NULL, geneid, exonid=NULL, verbose=TRUE)
+diffSpliceDGE(glmfit, coef=ncol(glmfit$design), contrast=NULL, geneid, exonid=NULL, prior.count=0.125, verbose=TRUE)
 }
 
 \arguments{
-  \item{fit.exon}{an \code{DGEGLM} fitted model object produced by \code{glmFit}. Rows should correspond to exons.}
+  \item{glmfit}{an \code{DGEGLM} fitted model object produced by \code{glmFit} or \code{glmQLFit}. Rows should correspond to exons.}
   \item{coef}{integer indicating which coefficient of the generalized linear model is to be tested for differential exon usage. Defaults to the last coefficient.}
   \item{contrast}{numeric vector specifying the contrast of the linear model coefficients to be tested for differential exon usage. Length must equal to the number of columns of \code{design}. If specified, then takes precedence over \code{coef}.}
-  \item{geneid}{gene identifiers. Either a vector of length \code{nrow(fit.exon)} or the name of the column of \code{fit.exon$genes} containing the gene identifiers. Rows with the same ID are assumed to belong to the same gene.}
-  \item{exonid}{exon identifiers. Either a vector of length \code{nrow(fit.exon)} or the name of the column of \code{fit.exon$genes} containing the exon identifiers.}
+  \item{geneid}{gene identifiers. Either a vector of length \code{nrow(glmfit)} or the name of the column of \code{glmfit$genes} containing the gene identifiers. Rows with the same ID are assumed to belong to the same gene.}
+  \item{exonid}{exon identifiers. Either a vector of length \code{nrow(glmfit)} or the name of the column of \code{glmfit$genes} containing the exon identifiers.}
+  \item{prior.count}{average prior count to be added to observation to shrink the estimated log-fold-changes towards zero.}
   \item{verbose}{logical, if \code{TRUE} some diagnostic information about the number of genes and exons is output.}
 }
 
 \value{
-\code{diffSpliceDGE} produces an object of class \code{DGELRT} containing the component \code{design} from \code{fit.exon} plus the following new components:
+\code{diffSpliceDGE} produces an object of class \code{DGELRT} containing the component \code{design} from \code{glmfit} plus the following new components:
   \item{comparison}{character string describing the coefficient being tested.}
-  \item{coefficients}{numeric vector of coefficients on the natural log scale. Each coefficient is the difference between the log-fold-change for that exon versus the average log-fold-change for the rest exons within the same gene.}
-  \item{exon.F}{numeric vector of F-statistics for exons.}
-  \item{exon.df.test}{numeric vector of testing degrees of freedom for exons.}
-  \item{exon.df.prior}{numeric vector of prior degrees of freedom for exons.}
-  \item{exon.df.residual}{numeric vector of residual degrees of freedom for exons.}
-  \item{exon.p.value}{numeric vector of p-values for exons.}
-  \item{genes}{data.frame of exon annotation}
+  \item{coefficients}{numeric vector of coefficients on the natural log scale. Each coefficient is the difference between the log-fold-change for that exon versus the log-fold-change for the entire gene which contains that exon.}
+  \item{genes}{data.frame of exon annotation.}
   \item{genecolname}{character string giving the name of the column of \code{genes} containing gene IDs.}
   \item{exoncolname}{character string giving the name of the column of \code{genes} containing exon IDs.}
+  \item{exon.df.test}{numeric vector of testing degrees of freedom for exons.}
+  \item{exon.p.value}{numeric vector of p-values for exons.}
   \item{gene.df.test}{numeric vector of testing degrees of freedom for genes.}
+  \item{gene.p.value}{numeric vector of gene-level testing p-values.}
+  \item{gene.Simes.p.value}{numeric vector of Simes' p-values for genes.}
+  \item{gene.genes}{data.frame of gene annotation.}
+
+Some components of the output depend on whether \code{glmfit} is produced by \code{glmFit} or \code{glmQLFit}. 
+If \code{glmfit} is produced by \code{glmFit}, then the following components are returned in the output object:
+  \item{exon.LR}{numeric vector of LR-statistics for exons.}
+  \item{gene.LR}{numeric vector of LR-statistics for gene-level test.}
+  
+If \code{glmfit} is produced by \code{glmQLFit}, then the following components are returned in the output object:
+  \item{exon.F}{numeric vector of F-statistics for exons.}
   \item{gene.df.prior}{numeric vector of prior degrees of freedom for genes.}
   \item{gene.df.residual}{numeric vector of residual degrees of freedom for genes.}
-  \item{gene.Simes.p.value}{numeric vector of Simes' p-values for genes.}
   \item{gene.F}{numeric vector of F-statistics for gene-level test.}
-  \item{gene.F.p.value}{numeric vector of F-test p-values for genes.}
-  \item{gene.genes}{data.frame of gene annotation.}
+
 The information and testing results for both exons and genes are sorted by geneid and by exonid within gene.
 }
 
 \details{
 This function tests for differential exon usage for each gene for a given coefficient of the generalized linear model.
 
-Testing for differential exon usage is equivalent to testing whether the exons in each gene have the same log-fold changes as the other exons in the same gene. 
-At exon-level, each exon is compared to the average of all other exons for the same gene using quasi-likelihood F-tests. 
-At gene-level, two different tests are provided. The first is converting exon-level p-values to gene-level p-values by Simes method.
-The other is an F-test for differences between the exon log-fold-changes within each gene.
+Testing for differential exon usage is equivalent to testing whether the exons in each gene have the same log-fold-changes as the other exons in the same gene. 
+At exon-level, the log-fold-change of each exon is compared to the log-fold-change of the entire gene which contains that exon.
+At gene-level, two different tests are provided. One is converting exon-level p-values to gene-level p-values by the Simes method.
+The other is using exon-level test statistics to conduct gene-level tests.
 }
 
 \author{Yunshun Chen and Gordon Smyth}
 
 \examples{
 # Gene exon annotation
-Gene <- paste("G", 1:10, sep="")
+Gene <- paste("Gene", 1:100, sep="")
 Gene <- rep(Gene, each=10)
 Exon <- paste("Ex", 1:10, sep="")
 Gene.Exon <- paste(Gene, Exon, sep=".")
 genes <- data.frame(GeneID=Gene, Gene.Exon=Gene.Exon)
 
-design <- model.matrix(~c(0,0,0,1,1,1))
-mu <- matrix(20, 100, 6)
-mu[1,4:6] <- 200
-counts <- matrix(rnbinom(600,mu=mu,size=20),100,6)
+group <- factor(rep(1:2, each=3))
+design <- model.matrix(~group)
+mu <- matrix(100, nrow=1000, ncol=6)
+# knock-out the first exon of Gene1 by 90%
+mu[1,4:6] <- 10
+# generate exon counts
+counts <- matrix(rnbinom(6000,mu=mu,size=20),1000,6)
 
 y <- DGEList(counts=counts, lib.size=rep(1e6,6), genes=genes)
 gfit <- glmFit(y, design, dispersion=0.05)
diff --git a/man/estimateDisp.Rd b/man/estimateDisp.Rd
index 6cfc8d9..b6f0ec7 100644
--- a/man/estimateDisp.Rd
+++ b/man/estimateDisp.Rd
@@ -8,9 +8,9 @@ Maximizes the negative binomial likelihood to give the estimate of the common, t
 }
 
 \usage{
-estimateDisp(y, design=NULL, prior.df=NULL, trend.method="locfit", span=NULL, 
-             min.row.sum=5, grid.length=21, grid.range=c(-10,10), robust=FALSE,
-             winsor.tail.p=c(0.05,0.1), tol=1e-06)
+estimateDisp(y, design=NULL, prior.df=NULL, trend.method="locfit", tagwise=TRUE,
+             span=NULL, min.row.sum=5, grid.length=21, grid.range=c(-10,10), 
+             robust=FALSE, winsor.tail.p=c(0.05,0.1), tol=1e-06)
 }
 
 \arguments{
@@ -22,6 +22,8 @@ estimateDisp(y, design=NULL, prior.df=NULL, trend.method="locfit", span=NULL,
 
 \item{trend.method}{method for estimating dispersion trend. Possible values are \code{"none"}, \code{"movingave"}, \code{"loess"} and \code{"locfit"}.}
 
+\item{tagwise}{logical, should the tagwise dispersions be estimated?}
+
 \item{span}{width of the smoothing window, as a proportion of the data set.}
 
 \item{min.row.sum}{numeric scalar giving a value for the filtering out of low abundance tags. Only tags with total sum of counts above this value are used. Low abundance tags can adversely affect the dispersion estimation, so this argument allows the user to select an appropriate filter threshold for the tag abundance.}
@@ -40,7 +42,7 @@ estimateDisp(y, design=NULL, prior.df=NULL, trend.method="locfit", span=NULL,
 \value{Returns \code{object} with the following added components:
 	\item{common.dispersion}{estimate of the common dispersion.}
 	\item{trended.dispersion}{estimates of the trended dispersions.}
-	\item{tagwise.dispersion}{tagwise estimates of the dispersion parameter.}
+	\item{tagwise.dispersion}{tagwise estimates of the dispersion parameter if \code{tagwise=TRUE}.}
 	\item{logCPM}{the average abundance of each tag, in log average counts per million.}
 	\item{prior.df}{prior degrees of freedom. It is a vector when robust method is used.}
 	\item{prior.n}{estimate of the prior weight, i.e. the smoothing parameter that indicates the weight to put on the common likelihood compared to the individual tag's likelihood.}
diff --git a/man/goana.Rd b/man/goana.Rd
index 7d12c32..450f5ae 100644
--- a/man/goana.Rd
+++ b/man/goana.Rd
@@ -18,7 +18,7 @@ Test for over-representation of gene ontology (GO) terms or KEGG pathways in the
   \item{trend}{adjust analysis for gene length or abundance?
   Can be logical, or a numeric vector of covariate values, or the name of the column of \code{de$genes} containing the covariate values.
   If \code{TRUE}, then \code{de$AveLogCPM} is used as the covariate.}
-  \item{\dots}{any other arguments are passed to \code{\link{goana.default}}.}
+  \item{\dots}{any other arguments are passed to \code{\link{goana.default}} or \code{\link{kegga.default}}.}
 }
 \details{
 \code{goana} performs Gene Ontology enrichment analyses for the up and down differentially expressed genes from a linear model analysis.
diff --git a/man/plotSpliceDGE.Rd b/man/plotSpliceDGE.Rd
index 433ce03..4ad83cd 100644
--- a/man/plotSpliceDGE.Rd
+++ b/man/plotSpliceDGE.Rd
@@ -1,26 +1,29 @@
-\title{Plot exons of a differentially spliced gene}
+\title{Differential splicing plot}
 \name{plotSpliceDGE}
 \alias{plotSpliceDGE}
 \description{
-Plot the exon-level log-fold changes for a differentially spliced gene.
+Plot relative log-fold changes by exons for the specified gene and highlight the significantly spliced exons.
 }
 \usage{
-plotSpliceDGE(lrt, geneid=NULL, rank=1L, FDR = 0.05)
+plotSpliceDGE(lrt, geneid=NULL, genecolname=NULL, rank=1L, FDR=0.05)
 }
 \arguments{
-  \item{lrt}{\code{GLMLRT} object produced by \code{diffSpliceDGE}.}
+  \item{lrt}{\code{DGELRT} object produced by \code{diffSpliceDGE}.}
   \item{geneid}{character string, ID of the gene to plot.}
+  \item{genecolname}{column name of \code{lrt$genes} containing gene IDs. Defaults to \code{lrt$genecolname}.}
   \item{rank}{integer, if \code{geneid=NULL} then this ranked gene will be plotted.}
   \item{FDR}{numeric, mark exons with false discovery rate less than this cutoff.}
 }
 
 \details{
-Plots interaction log-fold-change by exon for the specified gene.
+Plot relative log2-fold-changes by exon for the specified gene.
+The relative logFC is the difference between the exon's logFC and the overall logFC for the gene, as computed by \code{diffSpliceDGE}.
+The significantly spliced individual exons are highlighted as red dots. 
+The size of the red dots are weighted by its significance.
 }
 
 \value{A plot is created on the current graphics device.}
 \author{Yunshun Chen, Yifang Hu and Gordon Smyth}
 \seealso{
-\code{\link{diffSpliceDGE}}
+\code{\link{diffSpliceDGE}}, \code{\link{topSpliceDGE}}.
 }
-\examples{# See \code{\link{diffSpliceDGE}}}
diff --git a/man/topSpliceDGE.Rd b/man/topSpliceDGE.Rd
index ef264a8..3b41336 100644
--- a/man/topSpliceDGE.Rd
+++ b/man/topSpliceDGE.Rd
@@ -5,29 +5,43 @@
 Top table ranking the most differentially spliced genes or exons.
 }
 \usage{
-topSpliceDGE(lrt, level="gene", gene.test="Simes", number=10, FDR=1)
+topSpliceDGE(lrt, test="Simes", number=10, FDR=1)
 }
 \arguments{
   \item{lrt}{\code{DGELRT} object produced by \code{diffSpliceDGE}.}
-  \item{level}{character string, should the table be by \code{"exon"} or by \code{"gene"}.}
-  \item{gene.test}{character string, choice for the gene-level p-values. Possible values are "Simes" and "F".}
+  \item{test}{character string, possible values are \code{"Simes"}, \code{"gene"} or \code{"exon"}.
+    \code{"exon"} gives exon-level tests for each exon.
+    \code{"gene"} gives gene-level tests for each gene.
+    \code{"Simes"} gives genewise p-values derived from the exon-level tests after Simes adjustment for each gene.}
   \item{number}{integer, maximum number of rows to output.}
   \item{FDR}{numeric, only show exons or genes with false discovery rate less than this cutoff.}
 }
 
 \details{
-Ranks exons or genes by p-values.
+Ranks genes or exons by evidence for differential splicing.
+The exon-level tests test for differences between each exon and all the exons for the same gene.
+The gene-level tests test for any differences in exon usage between experimental conditions.
+
+The Simes method processes the exon-level p-values to give an overall call of differential splicing for each gene.
+It returns the minimum Simes-adjusted p-values for each gene.
+
+The gene-level tests are likely to be powerful for genes in which several exons are differentially splices.
+The Simes p-values is likely to be more powerful when only a minority of the exons for a gene are differentially spliced.
+The exon-level tests are not recommended for formal error rate control.
 }
 
-\value{A data.frame with any annotation columns found in \code{fit} plus the following columns
-  \item{NExons}{number of exons if \code{level="gene"}}
-  \item{Gene.Exon}{exon annotation if \code{level="exon"}}
-  \item{logFC}{log-fold change of one exon vs all the exons for the same gene (if \code{level="exon"})}
-  \item{F}{F-statistics for exons if \code{level="exon"}}
+\value{A data.frame with any annotation columns found in \code{lrt} plus the following columns
+  \item{NExons}{number of exons if \code{test="Simes"} or \code{"gene"}}
+  \item{Gene.Exon}{exon annotation if \code{test="exon"}}
+  \item{logFC}{log-fold change of one exon vs all the exons for the same gene (if \code{test="exon"})}
+  \item{exon.LR}{LR-statistics for exons (if \code{test="exon"} and the object for \code{diffSpliceDGE} was produced by \code{glmFit})}
+  \item{exon.F}{F-statistics for exons (if \code{test="exon"} and the object for \code{diffSpliceDGE} was produced by \code{glmQLFit})}
+  \item{gene.LR}{LR-statistics for genes (if \code{test="gene"} and the object for \code{diffSpliceDGE} was produced by \code{glmFit})}
+  \item{gene.F}{F-statistics for genes (if \code{test="gene"} and the object for \code{diffSpliceDGE} was produced by \code{glmQLFit})}
   \item{P.Value}{p-value}
   \item{FDR}{false discovery rate}
 }
 
 \author{Yunshun Chen and Gordon Smyth}
 
-\examples{# See \code{\link{diffSpliceDGE}}}
+\seealso{ \code{\link{diffSpliceDGE}}.}
diff --git a/tests/edgeR-Tests.Rout.save b/tests/edgeR-Tests.Rout.save
index b0c9742..f91f8dd 100644
--- a/tests/edgeR-Tests.Rout.save
+++ b/tests/edgeR-Tests.Rout.save
@@ -302,195 +302,7 @@ Loading required package: splines
  0.1766  0.1789  0.1814  0.1846  0.1870  0.2119 
 > dglm2 <- estimateDisp(dglm, design, prior.df=20)
 > summary(dglm2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1527  0.1669  0.1814  0.1858  0.1951  0.2497 
-> dglm2 <- estimateDisp(dglm, design, robust=TRUE)
-Loading required package: statmod
-> summary(dglm2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
- 0.1766  0.1789  0.1814  0.1846  0.1870  0.2119 
-> 
-> # Continuous trend
-> nlibs <- 3
-> ntags <- 1000
-> dispersion.true <- 0.1
-> # Make first transcript respond to covariate x
-> x <- 0:2
-> design <- model.matrix(~x)
-> beta.true <- cbind(Beta1=2,Beta2=c(2,rep(0,ntags-1)))
-> mu.true <- 2^(beta.true %*% t(design))
-> # Generate count data
-> y <- rnbinom(ntags*nlibs,mu=mu.true,size=1/dispersion.true)
-> y <- matrix(y,ntags,nlibs)
-> colnames(y) <- c("x0","x1","x2")
-> rownames(y) <- paste("Gene",1:ntags,sep="")
-> d <- DGEList(y)
-> d <- calcNormFactors(d)
-> fit <- glmFit(d, design, dispersion=dispersion.true, prior.count=0.5/3)
-> results <- glmLRT(fit, coef=2)
-> topTags(results)
-Coefficient:  x 
-            logFC   logCPM        LR       PValue          FDR
-Gene1    2.907024 13.56183 38.738512 4.845536e-10 4.845536e-07
-Gene61   2.855317 10.27136 10.738307 1.049403e-03 5.247015e-01
-Gene62  -2.123902 10.53174  8.818703 2.981585e-03 8.334760e-01
-Gene134 -1.949073 10.53355  8.125889 4.363759e-03 8.334760e-01
-Gene740 -1.610046 10.94907  8.013408 4.643227e-03 8.334760e-01
-Gene354  2.022698 10.45066  7.826308 5.149118e-03 8.334760e-01
-Gene5    1.856816 10.45249  7.214238 7.232750e-03 8.334760e-01
-Gene746 -1.798331 10.53094  6.846262 8.882693e-03 8.334760e-01
-Gene110  1.623148 10.68607  6.737984 9.438120e-03 8.334760e-01
-Gene383  1.637140 10.75412  6.687530 9.708965e-03 8.334760e-01
-> d <- estimateGLMCommonDisp(d, design, verbose=TRUE)
-Disp = 0.10253 , BCV = 0.3202 
-> glmFit(d,design,dispersion=dispersion.true, prior.count=0.5/3)
-An object of class "DGEGLM"
-$coefficients
-      (Intercept)          x
-Gene1   -7.391745  2.0149958
-Gene2   -7.318483 -0.7611895
-Gene3   -6.831702 -0.1399478
-Gene4   -7.480255  0.5172002
-Gene5   -8.747793  1.2870467
-995 more rows ...
-
-$fitted.values
-             x0        x1          x2
-Gene1 2.3570471 18.954454 138.2791328
-Gene2 2.5138172  1.089292   0.4282107
-Gene3 4.1580452  3.750528   3.0690081
-Gene4 2.1012460  3.769592   6.1349937
-Gene5 0.5080377  2.136398   8.1502486
-995 more rows ...
-
-$deviance
-[1] 6.38037545 1.46644913 1.38532340 0.01593969 1.03894513
-995 more elements ...
-
-$iter
-[1] 8 4 4 4 6
-995 more elements ...
-
-$failed
-[1] FALSE FALSE FALSE FALSE FALSE
-995 more elements ...
-
-$method
-[1] "levenberg"
-
-$counts
-      x0 x1  x2
-Gene1  0 30 110
-Gene2  2  2   0
-Gene3  3  6   2
-Gene4  2  4   6
-Gene5  1  1   9
-995 more rows ...
-
-$unshrunk.coefficients
-      (Intercept)          x
-Gene1   -7.437763  2.0412762
-Gene2   -7.373370 -0.8796273
-Gene3   -6.870127 -0.1465014
-Gene4   -7.552642  0.5410832
-Gene5   -8.972372  1.3929679
-995 more rows ...
-
-$df.residual
-[1] 1 1 1 1 1
-995 more elements ...
-
-$design
-  (Intercept) x
-1           1 0
-2           1 1
-3           1 2
-attr(,"assign")
-[1] 0 1
-
-$offset
-         [,1]     [,2]     [,3]
-[1,] 8.295172 8.338525 8.284484
-[2,] 8.295172 8.338525 8.284484
-[3,] 8.295172 8.338525 8.284484
-[4,] 8.295172 8.338525 8.284484
-[5,] 8.295172 8.338525 8.284484
-995 more rows ...
-
-$dispersion
-[1] 0.1
-
-$prior.count
-[1] 0.1666667
-
-$samples
-   group lib.size norm.factors
-x0     1     4001    1.0008730
-x1     1     4176    1.0014172
-x2     1     3971    0.9977138
-
-$AveLogCPM
-[1] 13.561832  9.682757 10.447014 10.532113 10.452489
-995 more elements ...
-
-> 
-> d2 <- estimateDisp(d, design)
-> summary(d2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.05545 0.09511 0.11620 0.11010 0.13330 0.16860 
-> d2 <- estimateDisp(d, design, prior.df=20)
-> summary(d2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.04203 0.08587 0.11280 0.11010 0.12370 0.37410 
-> d2 <- estimateDisp(d, design, robust=TRUE)
-> summary(d2$tagwise.dispersion)
-   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.05545 0.09511 0.11620 0.11010 0.13330 0.16860 
-> 
-> # Exact tests
-> y <- matrix(rnbinom(20,mu=10,size=3/2),5,4)
-> group <- factor(c(1,1,2,2))
-> ys <- splitIntoGroupsPseudo(y,group,pair=c(1,2))
-> exactTestDoubleTail(ys$y1,ys$y2,dispersion=2/3)
-[1] 0.1334396 0.6343568 0.7280015 0.7124912 0.3919258
-> 
-> y <- matrix(rnbinom(5*7,mu=10,size=3/2),5,7)
-> group <- factor(c(1,1,2,2,3,3,3))
-> ys <- splitIntoGroupsPseudo(y,group,pair=c(1,3))
-> exactTestDoubleTail(ys$y1,ys$y2,dispersion=2/3)
-[1] 1.0000000 0.4486382 1.0000000 0.9390317 0.4591241
-> exactTestBetaApprox(ys$y1,ys$y2,dispersion=2/3)
-[1] 1.0000000 0.4492969 1.0000000 0.9421695 0.4589194
-> 
-> y[1,3:4] <- 0
-> design <- model.matrix(~group)
-> fit <- glmFit(y,design,dispersion=2/3,prior.count=0.5/7)
-> summary(fit$coef)
-  (Intercept)         group2            group3        
- Min.   :-1.817   Min.   :-5.0171   Min.   :-0.64646  
- 1st Qu.:-1.812   1st Qu.:-1.1565   1st Qu.:-0.13919  
- Median :-1.712   Median : 0.1994   Median :-0.10441  
- Mean   :-1.625   Mean   :-0.9523   Mean   :-0.04217  
- 3rd Qu.:-1.429   3rd Qu.: 0.3755   3rd Qu.:-0.04305  
- Max.   :-1.356   Max.   : 0.8374   Max.   : 0.72227  
-> 
-> lrt <- glmLRT(fit,contrast=cbind(c(0,1,0),c(0,0,1)))
-> topTags(lrt)
-Coefficient:  LR test of 2 contrasts 
-     logFC.1    logFC.2   logCPM         LR      PValue        FDR
-1 -7.2381060 -0.0621100 17.19071 10.7712171 0.004582051 0.02291026
-5 -1.6684268 -0.9326507 17.33529  1.7309951 0.420842115 0.90967967
-2  1.2080938  1.0420198 18.24544  1.0496688 0.591653347 0.90967967
-4  0.5416704 -0.1506381 17.57744  0.3958596 0.820427427 0.90967967
-3  0.2876249 -0.2008143 18.06216  0.1893255 0.909679672 0.90967967
-> design <- model.matrix(~0+group)
-> fit <- glmFit(y,design,dispersion=2/3,prior.count=0.5/7)
-> lrt <- glmLRT(fit,contrast=cbind(c(-1,1,0),c(0,-1,1),c(-1,0,1)))
-> topTags(lrt)
-Coefficient:  LR test of 2 contrasts 
-     logFC.1    logFC.2   logCPM         LR      PValue        FDR
-1 -7.2381060  7.1759960 17.19071 10.7712171 0.004582051 0.02291026
-5 -1.6684268  0.7357761 17.33529  1.7309951 0.420842115 0.90967967
+   Min. 1st Qu.  Median    Mean 3rd Qu.    .6684268  0.7357761 17.33529  1.7309951 0.420842115 0.90967967
 2  1.2080938 -0.1660740 18.24544  1.0496688 0.591653347 0.90967967
 4  0.5416704 -0.6923084 17.57744  0.3958596 0.820427427 0.90967967
 3  0.2876249 -0.4884392 18.06216  0.1893255 0.909679672 0.90967967
@@ -562,3 +374,52 @@ $n0
 > proc.time()
    user  system elapsed 
    3.58    0.04    5.32 
+                                                                                                                                                                                                                                                                                                                                                                   edgeR/vignettes/                                                                                    0000755 0001751 0001751 00000000000 1 [...]
+%\VignetteKeyword{RNA-Seq}
+%\VignetteKeyword{differential expression}
+%\VignettePackage{edgeR}
+\documentclass[12pt]{article}
+
+\textwidth=6.2in
+\textheight=8.5in
+\oddsidemargin=0.2in
+\evensidemargin=0.2in
+\headheight=0in
+\headsep=0in
+
+\begin{document}
+
+\title{edgeR Package Introduction}
+\author{Yunshun Chen, Davis McCarthy, Aaron Lun,\\
+Xiaobei Zhou, Mark Robinson, Gordon K.\ Smyth}
+\date{10 October 2012\\
+Revised 8 October 2014}
+\maketitle
+
+
+edgeR is a package for the differential expression analysis of digital gene expression data,
+that is, of count data arising from DNA sequencing technologies.
+It is especially designed for differential expression analyses of RNA-Seq or SAGE data,
+or differential marking analyses of ChIP-Seq data.
+
+edgeR implements novel statistical methods based on the negative binomial distribution
+as a model for count variability, including empirical Bayes methods, exact tests, and generalized linear models.
+The package is especially suitable for analysing designed experiments with multiple
+experimental factors but possibly small numbers of replicates.
+It has unique abilities to model transcript specific variation even in small samples,
+a capability essential for prioritizing genes or transcripts that have consistent effects across replicates.
+
+The full edgeR User's Guide is available as part of the online documentation.
+To reach the User's Guide, install the edgeR package and load it into an R session by \texttt{library(edgeR)}.
+In R for Windows, the User's Guide will then be available from the drop-down menu called ``Vignettes''.
+In other operating systems, type
+\begin{Schunk}
+\begin{Sinput}
+> library(edgeR)
+> edgeRUsersGuide()
+\end{Sinput}
+\end{Schunk}
+at the R prompt to open the User's Guide in a pdf viewer.
+
+\end{document}
+                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             [...]
\ No newline at end of file
diff --git a/vignettes/edgeR.Rnw b/vignettes/edgeR.Rnw
deleted file mode 100755
index af5781e..0000000
--- a/vignettes/edgeR.Rnw
+++ /dev/null
@@ -1,48 +0,0 @@
-%\VignetteIndexEntry{edgeR Vignette}
-%\VignetteKeyword{RNA-Seq}
-%\VignetteKeyword{differential expression}
-%\VignettePackage{edgeR}
-\documentclass[12pt]{article}
-
-\textwidth=6.2in
-\textheight=8.5in
-\oddsidemargin=0.2in
-\evensidemargin=0.2in
-\headheight=0in
-\headsep=0in
-
-\begin{document}
-
-\title{edgeR Package Introduction}
-\author{Yunshun Chen, Davis McCarthy, Aaron Lun,\\
-Xiaobei Zhou, Mark Robinson, Gordon K.\ Smyth}
-\date{10 October 2012\\
-Revised 8 October 2014}
-\maketitle
-
-
-edgeR is a package for the differential expression analysis of digital gene expression data,
-that is, of count data arising from DNA sequencing technologies.
-It is especially designed for differential expression analyses of RNA-Seq or SAGE data,
-or differential marking analyses of ChIP-Seq data.
-
-edgeR implements novel statistical methods based on the negative binomial distribution
-as a model for count variability, including empirical Bayes methods, exact tests, and generalized linear models.
-The package is especially suitable for analysing designed experiments with multiple
-experimental factors but possibly small numbers of replicates.
-It has unique abilities to model transcript specific variation even in small samples,
-a capability essential for prioritizing genes or transcripts that have consistent effects across replicates.
-
-The full edgeR User's Guide is available as part of the online documentation.
-To reach the User's Guide, install the edgeR package and load it into an R session by \texttt{library(edgeR)}.
-In R for Windows, the User's Guide will then be available from the drop-down menu called ``Vignettes''.
-In other operating systems, type
-\begin{Schunk}
-\begin{Sinput}
-> library(edgeR)
-> edgeRUsersGuide()
-\end{Sinput}
-\end{Schunk}
-at the R prompt to open the User's Guide in a pdf viewer.
-
-\end{document}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-edger.git



More information about the debian-med-commit mailing list