[med-svn] [r-bioc-cummerbund] 02/04: Imported Upstream version 2.10.0

Andreas Tille tille at debian.org
Sun Jun 28 20:09:31 UTC 2015


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-bioc-cummerbund.

commit efe7e1bcb9c303dcf3e12fed576ad957a6740773
Author: Andreas Tille <tille at debian.org>
Date:   Sun Jun 28 21:56:24 2015 +0200

    Imported Upstream version 2.10.0
---
 DESCRIPTION                              |  10 +-
 R/database-setup.R                       | 491 ++++++++++++++++---------------
 build/vignette.rds                       | Bin 303 -> 302 bytes
 inst/NEWS                                |  95 +++---
 inst/doc/cummeRbund-example-workflow.pdf | Bin 198287 -> 198279 bytes
 inst/doc/cummeRbund-manual.R             |  16 +-
 inst/doc/cummeRbund-manual.Rnw           | 220 +++++++-------
 inst/doc/cummeRbund-manual.pdf           | Bin 1563970 -> 1563785 bytes
 inst/extdata/.RData                      | Bin 3317 -> 0 bytes
 inst/reports/runReport.Rnw               |  86 ------
 vignettes/cummeRbund-manual.Rnw          | 220 +++++++-------
 11 files changed, 544 insertions(+), 594 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 7647173..a23cacf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: cummeRbund
 Title: Analysis, exploration, manipulation, and visualization of
         Cufflinks high-throughput sequencing data.
-Version: 2.8.0
+Version: 2.10.0
 Date: 2013-04-22
 Author: L. Goff, C. Trapnell, D. Kelley
 Description: Allows for persistent storage, access, exploration, and manipulation of Cufflinks high-throughput sequencing data.  In addition, provides numerous plotting functions for commonly used visualizations.
@@ -17,7 +17,9 @@ Collate: AllGenerics.R AllClasses.R database-setup.R methods-CuffSet.R
         methods-CuffFeatureSet.R methods-CuffGene.R
         methods-CuffFeature.R tools.R
 LazyLoad: yes
-biocViews: Sequencing, RNASeq, GeneExpression, DifferentialExpression,
+biocViews: HighThroughputSequencing, HighThroughputSequencingData,
+        RNAseq, RNAseqData, GeneExpression, DifferentialExpression,
         Infrastructure, DataImport, DataRepresentation, Visualization,
-        Clustering, MultipleComparison, QualityControl
-Packaged: 2014-10-14 02:20:08 UTC; biocbuild
+        Bioinformatics, Clustering, MultipleComparisons, QualityControl
+NeedsCompilation: no
+Packaged: 2015-04-17 03:36:01 UTC; biocbuild
diff --git a/R/database-setup.R b/R/database-setup.R
index fb93544..8f9cd36 100644
--- a/R/database-setup.R
+++ b/R/database-setup.R
@@ -1,5 +1,5 @@
 # TODO: Add comment
-# 
+#
 # Author: lgoff
 ###############################################################################
 
@@ -20,22 +20,22 @@ loadRunInfo<-function(runInfoFile,
 		stringsAsFactors=FALSE,
 		row.names=NULL,
 		...) {
-	
+
 	#Setup and reporting
 	write(paste("Reading Run Info File ",runInfoFile,sep=""),stderr())
 	fileArgs$file = runInfoFile
-	
+
 	#Read Run Info file
 	runInfo = as.data.frame(do.call(read.table,fileArgs))
-	
+
 	#Parsing
 	#not needed...
-	
+
 	#Load into database (runInfo table)
 	write("Writing runInfo Table",stderr())
 	insert_SQL<-'INSERT INTO runInfo VALUES(:param, :value)'
 	bulk_insert(dbConn,insert_SQL,runInfo)
-	
+
 }
 
 #ReplicateTable
@@ -50,22 +50,22 @@ loadRepTable<-function(repTableFile,
 		stringsAsFactors=FALSE,
 		row.names=NULL,
 		...) {
-		
+
 	#Setup and reporting
 	write(paste("Reading Read Group Info  ",repTableFile,sep=""),stderr())
 	fileArgs$file = repTableFile
-	
+
 	#Read Run Info file
 	full = as.data.frame(read.delim(repTableFile))
 	#print(head(full))
-	
+
 	#Fix sample_names
 	full$condition<-make.db.names(dbConn,as.character(full$condition),unique=FALSE)
-	
+
 	#Parsing
 	#For now, I need to concatenate condition and replicate number
 	full$rep_name<-paste(full$condition,full$replicate_num,sep="_")
-	
+
 	#Load into database (replicates table)
 	write("Writing replicates Table",stderr())
 	insert_SQL<-'INSERT INTO replicates VALUES(:file, :condition, :replicate_num, :rep_name, :total_mass, :norm_mass, :internal_scale, :external_scale)'
@@ -84,19 +84,19 @@ loadVarModelTable<-function(VarModelFile,
 		stringsAsFactors=FALSE,
 		row.names=NULL,
 		...) {
-	
+
 	#Setup and reporting
 	write(paste("Reading Var Model Info  ",VarModelFile,sep=""),stderr())
 	fileArgs$file = VarModelFile
-	
+
 	#Read Run Info file
 	full = as.data.frame(read.delim(VarModelFile))
 	#print(head(full))
-	
+
 	#Fix sample_names
 	full$condition<-make.db.names(dbConn,as.character(full$condition),unique=FALSE)
-	
-	
+
+
 	#Load into database (replicates table)
 	write("Writing varModel Table",stderr())
 	insert_SQL<-'INSERT INTO varModel VALUES(:condition, :locus, :compatible_count_mean, :compatible_count_var, :total_count_mean, :total_count_var, :fitted_var)'
@@ -128,24 +128,24 @@ loadGenes<-function(fpkmFile,
 	#Error Trapping
 	if (missing(fpkmFile))
 		stop("fpkmFile cannot be missing!")
-	
+
 	if (missing(dbConn))
 		stop("Must provide a dbConn connection")
-	
+
 	#TODO test dbConn connection and database structure
-	
+
 	idCols = c(1:9)
 
 	#Read primary file
 	write(paste("Reading ",fpkmFile,sep=""),stderr())
 	fpkmArgs$file = fpkmFile
 	full = as.data.frame(do.call(read.table,fpkmArgs))
-	
+
 	########
 	#Handle Sample Names
 	########
 
-	
+
 	#Check that samples table is populated
 	write("Checking samples table...",stderr())
 	samples<-getSamplesFromColnames(full)
@@ -161,7 +161,7 @@ loadGenes<-function(fpkmFile,
 		write("Populating samples table...",stderr())
 		populateSampleTable(samples,dbConn)
 	}
-	
+
 	######
 	#Populate genes table
 	######
@@ -170,7 +170,7 @@ loadGenes<-function(fpkmFile,
 	#dbWriteTable(dbConn,'genes',genesTable,row.names=F,append=T)
 	insert_SQL<-'INSERT INTO genes VALUES(:tracking_id, :class_code, :nearest_ref_id, :gene_short_name, :locus, :length, :coverage)'
 	bulk_insert(dbConn,insert_SQL,genesTable)
-	
+
 	######
 	#Populate geneData table
 	######
@@ -179,7 +179,7 @@ loadGenes<-function(fpkmFile,
 	colnames(genemelt)[colnames(genemelt)=='variable']<-'sample_name'
 	#Clean up and normalize data
 	genemelt$measurement = ""
-	
+
 	genemelt$measurement[grepl("_FPKM$",genemelt$sample_name)] = "fpkm"
 	genemelt$measurement[grepl("_conf_lo$",genemelt$sample_name)] = "conf_lo"
 	genemelt$measurement[grepl("_conf_hi$",genemelt$sample_name)] = "conf_hi"
@@ -189,27 +189,27 @@ loadGenes<-function(fpkmFile,
 	genemelt$sample_name<-gsub("_conf_lo$","",genemelt$sample_name)
 	genemelt$sample_name<-gsub("_conf_hi$","",genemelt$sample_name)
 	genemelt$sample_name<-gsub("_status$","",genemelt$sample_name)
-	
+
 	#Adjust sample names with make.db.names
 	genemelt$sample_name <- make.db.names(dbConn,as.vector(genemelt$sample_name),unique=FALSE)
-	
+
 	#Recast
 	write("Recasting",stderr())
 	genemelt<-as.data.frame(dcast(genemelt,...~measurement))
-	
+
 	#debugging
 	#write(colnames(genemelt),stderr())
-	
+
 	#Write geneData table
 	write("Writing geneData table",stderr())
 	#dbWriteTable(dbConn,'geneData',as.data.frame(genemelt[,c(1:2,5,3,4,6)]),row.names=F,append=T)
 	insert_SQL<-'INSERT INTO geneData VALUES(:tracking_id,:sample_name,:fpkm,:conf_hi,:conf_lo,:status)'
 	bulk_insert(dbConn,insert_SQL,genemelt[,c(1:2,5,3,4,6)])
-	
+
 	#######
 	#Handle gene_exp.diff
 	#######
-	
+
 	if(file.exists(diffFile)){
 		#Read diff file
 		write(paste("Reading ",diffFile,sep=""),stderr())
@@ -221,22 +221,22 @@ loadGenes<-function(fpkmFile,
 			#Adjust sample names with make.db.names
 			diff$sample_1<-make.db.names(dbConn,as.vector(diff$sample_1),unique=FALSE)
 			diff$sample_2<-make.db.names(dbConn,as.vector(diff$sample_2),unique=FALSE)
-			
+
 			write("Writing geneExpDiffData table",stderr())
 			diffCols<-c(1,5:14)
-			
+
 			#debugging
 			#write(colnames(diff[,diffCols]),stderr())
-			
+
 			#dbWriteTable(dbConn,'geneExpDiffData',diff[,diffCols],row.names=F,append=T)
 			insert_SQL<-"INSERT INTO geneExpDiffData VALUES(:test_id,:sample_1,:sample_2,:status,:value_1,:value_2,?,:test_stat,:p_value,:q_value,:significant)"
 			bulk_insert(dbConn,insert_SQL,diff[,diffCols])
 		}else{
 			write(paste("No records found in", diffFile),stderr())
 		}
-	
+
 	}
-	
+
 	########
 	#TODO: Handle promoters.diff
 	########
@@ -245,7 +245,7 @@ loadGenes<-function(fpkmFile,
 		write(paste("Reading ",promoterFile,sep=""),stderr())
 		promoterArgs$file = promoterFile
 		promoter<-as.data.frame(do.call(read.table,promoterArgs))
-		
+
 		write("Writing promoterDiffData table",stderr())
 		promoterCols<-c(2,5:14)
 		if(dim(promoter)[1]>0){
@@ -256,54 +256,54 @@ loadGenes<-function(fpkmFile,
 			write(paste("No records found in", promoterFile),stderr())
 		}
 	}
-	
+
 	#########
 	#Handle Feature Data (this will actually be done on CuffData objects instead...but I may include something here as well)
 	#########
-	
+
 	###########
 	#Handle Counts .count_tracking
 	###########
 	if(file.exists(countFile)){
-		
+
 		idCols = c(1)
-		
+
 		#Read countFile
 		write(paste("Reading ", countFile,sep=""),stderr())
 		countArgs$file = countFile
 		counts<-as.data.frame(do.call(read.table,countArgs))
-		
+
 		if(dim(counts)[1]>0){
 			#Reshape geneCount table
 			write("Reshaping geneCount table",stderr())
 			countmelt<-melt(counts,id.vars=c("tracking_id"),measure.vars=-idCols)
 			colnames(countmelt)[colnames(countmelt)=='variable']<-'sample_name'
-			
+
 			countmelt$measurement = ""
-			
+
 			countmelt$measurement[grepl("_count$",countmelt$sample_name)] = "count"
 			countmelt$measurement[grepl("_count_variance$",countmelt$sample_name)] = "variance"
 			countmelt$measurement[grepl("_count_uncertainty_var$",countmelt$sample_name)] = "uncertainty"
 			countmelt$measurement[grepl("_count_dispersion_var$",countmelt$sample_name)] = "dispersion"
 			countmelt$measurement[grepl("_status$",countmelt$sample_name)] = "status"
-			
+
 			countmelt$sample_name<-gsub("_count$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_variance$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_uncertainty_var$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_dispersion_var$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_status$","",countmelt$sample_name)
-			
+
 			#Adjust sample names with make.db.names
 			countmelt$sample_name <- make.db.names(dbConn,as.vector(countmelt$sample_name),unique=FALSE)
-			
+
 			#Recast
 			write("Recasting",stderr())
 			countmelt<-as.data.frame(dcast(countmelt,...~measurement))
-			
+
 			#debugging
 			#write(colnames(countmelt),stderr())
-			
-	
+
+
 			#Write geneCount table
 			write("Writing geneCount table",stderr())
 			insert_SQL<-'INSERT INTO geneCount VALUES(:tracking_id,:sample_name,:count,:variance,:uncertainty,:dispersion,:status)'
@@ -311,10 +311,10 @@ loadGenes<-function(fpkmFile,
 		}else{
 			write(paste("No records found in", countFile),stderr())
 		}
-		
+
 	}
-		
-		
+
+
 	###########
 	#Handle Replicates .rep_tracking
 	###########
@@ -326,16 +326,16 @@ loadGenes<-function(fpkmFile,
 		replicateArgs$file = replicateFile
 		reps<-as.data.frame(do.call(read.table,replicateArgs))
 		#print(head(reps))
-		
+
 		if(dim(reps)[1]>0){
-		
+
 			#Adjust sample names with make.db.names
 			reps$condition <- make.db.names(dbConn,as.character(reps$condition),unique=FALSE)
-		
+
 			#Create unique rep name
 			reps$rep_name<-paste(reps$condition,reps$replicate,sep="_")
 			colnames(reps)[colnames(reps)=="condition"]<-"sample_name"
-			
+
 			#Write geneReplicateData table
 			write("Writing geneReplicateData table",stderr())
 			insert_SQL<-'INSERT INTO geneReplicateData VALUES(:tracking_id,:sample_name,:replicate,:rep_name,:raw_frags,:internal_scaled_frags,:external_scaled_frags,:FPKM,:effective_length,:status)'
@@ -343,11 +343,11 @@ loadGenes<-function(fpkmFile,
 		}else{
 			write(paste("No records found in", replicateFile),stderr())
 		}
-		
+
 	}
-	
+
 }
-	
+
 #Isoforms
 loadIsoforms<-function(fpkmFile,
 		diffFile,
@@ -367,28 +367,28 @@ loadIsoforms<-function(fpkmFile,
 		stringsAsFactors = FALSE,
 		row.names=NULL,
 		...) {
-	
+
 	#Error Trapping
 	if (missing(fpkmFile))
 		stop("fpkmFile cannot be missing!")
-	
+
 	if (missing(dbConn))
 		stop("Must provide a dbConn connection")
-	
+
 	#TODO test dbConn connection and database structure
-	
+
 	idCols = c(1:9)
-	
+
 	#Read primary file
 	write(paste("Reading ",fpkmFile,sep=""),stderr())
 	fpkmArgs$file = fpkmFile
 	full = as.data.frame(do.call(read.table,fpkmArgs))
-	
+
 	########
 	#Handle Sample Names
 	########
-	
-	
+
+
 	#Check that samples table is populated
 	write("Checking samples table...",stderr())
 	samples<-getSamplesFromColnames(full)
@@ -405,22 +405,22 @@ loadIsoforms<-function(fpkmFile,
 		write("Populating samples table...",stderr())
 		populateSampleTable(samples,dbConn)
 	}
-	
+
 	######
 	#Populate isoforms table
 	######
 	isoformCols<-c(1,4,5,6,2,3,7:9)
 	isoformsTable<-full[,isoformCols]
-	
+
 	#This is a temporary fix until p_id is added to the 'isoforms.fpkm_tracking' file
 	isoformsTable<-cbind(isoformsTable[,1:2],data.frame(CDS_id=rep("NA",dim(isoformsTable)[1])),isoformsTable[,-c(1:2)])
 	#print (head(isoformsTable))
-	
+
 	write("Writing isoforms table",stderr())
 	#dbWriteTable(dbConn,'isoforms',as.data.frame(isoformsTable),row.names=F,append=T)
 	insert_SQL<-'INSERT INTO isoforms VALUES(?,?,?,?,?,?,?,?,?,?)'
 	bulk_insert(dbConn,insert_SQL,isoformsTable)
-	
+
 	######
 	#Populate isoformData table
 	######
@@ -429,34 +429,34 @@ loadIsoforms<-function(fpkmFile,
 	colnames(isoformmelt)[colnames(isoformmelt)=='variable']<-'sample_name'
 	#Clean up and normalize data
 	isoformmelt$measurement = ""
-	
+
 	isoformmelt$measurement[grepl("_FPKM$",isoformmelt$sample_name)] = "fpkm"
 	isoformmelt$measurement[grepl("_conf_lo$",isoformmelt$sample_name)] = "conf_lo"
 	isoformmelt$measurement[grepl("_conf_hi$",isoformmelt$sample_name)] = "conf_hi"
 	isoformmelt$measurement[grepl("_status$",isoformmelt$sample_name)] = "status"
-	
+
 	isoformmelt$sample_name<-gsub("_FPKM$","",isoformmelt$sample_name)
 	isoformmelt$sample_name<-gsub("_conf_lo$","",isoformmelt$sample_name)
 	isoformmelt$sample_name<-gsub("_conf_hi$","",isoformmelt$sample_name)
 	isoformmelt$sample_name<-gsub("_status$","",isoformmelt$sample_name)
-	
+
 	#Adjust sample names with make.db.names
 	isoformmelt$sample_name <- make.db.names(dbConn,as.vector(isoformmelt$sample_name),unique=FALSE)
-	
+
 	#Recast
 	write("Recasting",stderr())
 	isoformmelt<-as.data.frame(dcast(isoformmelt,...~measurement))
-	
+
 	#Write geneData table
 	write("Writing isoformData table",stderr())
 	#dbWriteTable(dbConn,'isoformData',as.data.frame(isoformmelt[,c(1:2,5,3,4,6)]),row.names=F,append=T)
 	insert_SQL<-"INSERT INTO isoformData VALUES(?,?,?,?,?,?)"
 	bulk_insert(dbConn,insert_SQL,isoformmelt[,c(1:2,5,3,4,6)])
-	
+
 	#######
 	#Handle isoform_exp.diff
 	#######
-	
+
 	if(file.exists(diffFile)){
 		#Read diff file
 		write(paste("Reading ",diffFile,sep=""),stderr())
@@ -466,7 +466,7 @@ loadIsoforms<-function(fpkmFile,
 			#Adjust sample names with make.db.names
 			diff$sample_1<-make.db.names(dbConn,as.vector(diff$sample_1),unique=FALSE)
 			diff$sample_2<-make.db.names(dbConn,as.vector(diff$sample_2),unique=FALSE)
-		
+
 			write("Writing isoformExpDiffData table",stderr())
 			diffCols<-c(1,5:14)
 			#dbWriteTable(dbConn,'isoformExpDiffData',diff[,diffCols],row.names=F,append=T)
@@ -476,52 +476,52 @@ loadIsoforms<-function(fpkmFile,
 			write(paste("No records found in",diffFile),stderr())
 		}
 	}
-	
+
 	###########
 	#Handle Counts .count_tracking
 	###########
 	if(file.exists(countFile)){
-		
+
 		idCols = c(1)
-		
+
 		#Read countFile
 		write(paste("Reading ", countFile,sep=""),stderr())
 		countArgs$file = countFile
 		counts<-as.data.frame(do.call(read.table,countArgs))
-		
+
 		if(dim(counts)[1]>0){
-		
+
 			#Reshape isoformCount table
 			write("Reshaping isoformCount table",stderr())
 			countmelt<-melt(counts,id.vars=c("tracking_id"),measure.vars=-idCols)
 			colnames(countmelt)[colnames(countmelt)=='variable']<-'sample_name'
-			
+
 			countmelt$measurement = ""
-			
+
 			countmelt$measurement[grepl("_count$",countmelt$sample_name)] = "count"
 			countmelt$measurement[grepl("_count_variance$",countmelt$sample_name)] = "variance"
 			countmelt$measurement[grepl("_count_uncertainty_var$",countmelt$sample_name)] = "uncertainty"
 			countmelt$measurement[grepl("_count_dispersion_var$",countmelt$sample_name)] = "dispersion"
 			countmelt$measurement[grepl("_status$",countmelt$sample_name)] = "status"
-			
+
 			countmelt$sample_name<-gsub("_count$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_variance$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_uncertainty_var$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_dispersion_var$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_status$","",countmelt$sample_name)
-			
+
 			#Adjust sample names with make.db.names
 			countmelt$sample_name <- make.db.names(dbConn,as.vector(countmelt$sample_name),unique=FALSE)
-			
-			
+
+
 			#Recast
 			write("Recasting",stderr())
 			countmelt<-as.data.frame(dcast(countmelt,...~measurement))
-			
+
 			#debugging
 			#write(colnames(countmelt),stderr())
-			
-			
+
+
 			#Write isoformCount table
 			write("Writing isoformCount table",stderr())
 			insert_SQL<-'INSERT INTO isoformCount VALUES(:tracking_id,:sample_name,:count,:variance,:uncertainty,:dispersion,:status)'
@@ -530,38 +530,38 @@ loadIsoforms<-function(fpkmFile,
 			write(paste("No records found in",countFile),stderr())
 		}
 	}
-	
-	
+
+
 	###########
 	#Handle Replicates .rep_tracking
 	###########
 	if(file.exists(replicateFile)){
-		
+
 		idCols = 1
 		#Read countFile
 		write(paste("Reading read group info in ", replicateFile,sep=""),stderr())
 		replicateArgs$file = replicateFile
 		reps<-as.data.frame(do.call(read.table,replicateArgs))
 		#print(head(reps))
-		
+
 		if(dim(reps)[1]>0){
-			
+
 			#Adjust sample names with make.db.names
 			reps$condition <- make.db.names(dbConn,as.character(reps$condition),unique=FALSE)
-		
+
 			#Create unique rep name
 			reps$rep_name<-paste(reps$condition,reps$replicate,sep="_")
 			colnames(reps)[colnames(reps)=="condition"]<-"sample_name"
-			
+
 			#Write isoformReplicateData table
 			write("Writing isoformReplicateData table",stderr())
 			insert_SQL<-'INSERT INTO isoformReplicateData VALUES(:tracking_id,:sample_name,:replicate,:rep_name,:raw_frags,:internal_scaled_frags,:external_scaled_frags,:FPKM,:effective_length,:status)'
 			bulk_insert(dbConn,insert_SQL,reps)
 		}else{
 			write(paste("No records found in",replicateFile),stderr())
-		}	
+		}
 	}
-	
+
 }
 
 #TSS groups
@@ -585,28 +585,28 @@ loadTSS<-function(fpkmFile,
 		stringsAsFactors = FALSE,
 		row.names=NULL,
 		...) {
-	
+
 	#Error Trapping
 	if (missing(fpkmFile))
 		stop("fpkmFile cannot be missing!")
-	
+
 	if (missing(dbConn))
 		stop("Must provide a dbConn connection")
-	
+
 	#TODO test dbConn connection and database structure
-	
+
 	idCols = c(1:9)
-	
+
 	#Read primary file
 	write(paste("Reading ",fpkmFile,sep=""),stderr())
 	fpkmArgs$file = fpkmFile
 	full = as.data.frame(do.call(read.table,fpkmArgs))
-	
+
 	########
 	#Handle Sample Names
 	########
-	
-	
+
+
 	#Check that samples table is populated
 	write("Checking samples table...",stderr())
 	samples<-getSamplesFromColnames(full)
@@ -622,7 +622,7 @@ loadTSS<-function(fpkmFile,
 		write("Populating samples table...",stderr())
 		populateSampleTable(samples,dbConn)
 	}
-	
+
 	######
 	#Populate TSS table
 	######
@@ -632,7 +632,7 @@ loadTSS<-function(fpkmFile,
 	if (nrow(tssTable)>0){
 		insert_SQL<-"INSERT INTO TSS VALUES(?,?,?,?,?,?,?,?)"
 		bulk_insert(dbConn,insert_SQL,tssTable)
-		
+
 		######
 		#Populate geneData table
 		######
@@ -641,24 +641,24 @@ loadTSS<-function(fpkmFile,
 		colnames(tssmelt)[colnames(tssmelt)=='variable']<-'sample_name'
 		#Clean up and normalize data
 		tssmelt$measurement = ""
-		
+
 		tssmelt$measurement[grepl("_FPKM$",tssmelt$sample_name)] = "fpkm"
 		tssmelt$measurement[grepl("_conf_lo$",tssmelt$sample_name)] = "conf_lo"
 		tssmelt$measurement[grepl("_conf_hi$",tssmelt$sample_name)] = "conf_hi"
 		tssmelt$measurement[grepl("_status$",tssmelt$sample_name)] = "status"
-		
+
 		tssmelt$sample_name<-gsub("_FPKM$","",tssmelt$sample_name)
 		tssmelt$sample_name<-gsub("_conf_lo$","",tssmelt$sample_name)
 		tssmelt$sample_name<-gsub("_conf_hi$","",tssmelt$sample_name)
 		tssmelt$sample_name<-gsub("_status$","",tssmelt$sample_name)
-		
+
 		#Adjust sample names with make.db.names
 		tssmelt$sample_name <- make.db.names(dbConn,as.vector(tssmelt$sample_name),unique=FALSE)
-		
+
 		#Recast
 		write("Recasting",stderr())
 		tssmelt<-as.data.frame(dcast(tssmelt,...~measurement))
-		
+
 		#Write geneData table
 		write("Writing TSSData table",stderr())
 		#dbWriteTable(dbConn,'TSSData',as.data.frame(tssmelt[,c(1:2,5,3,4,6)]),row.names=F,append=T)
@@ -672,13 +672,13 @@ loadTSS<-function(fpkmFile,
 	#######
 	#Handle tss_groups_exp.diff
 	#######
-	
+
 	if(file.exists(diffFile)){
 		#Read diff file
 		write(paste("Reading ",diffFile,sep=""),stderr())
 		diffArgs$file = diffFile
 		diff<-as.data.frame(do.call(read.table,diffArgs))
-		
+
 		if(dim(diff)[1]>0){
 			#Adjust sample names with make.db.names
 			diff$sample_1<-make.db.names(dbConn,as.vector(diff$sample_1),unique=FALSE)
@@ -693,7 +693,7 @@ loadTSS<-function(fpkmFile,
 			write(paste("No records found in",diffFile),stderr())
 		}
 	}
-	
+
 	#########
 	#TODO: Handle splicing.diff
 	########
@@ -702,7 +702,7 @@ loadTSS<-function(fpkmFile,
 		write(paste("Reading ",splicingFile,sep=""),stderr())
 		splicingArgs$file = splicingFile
 		splicing<-as.data.frame(do.call(read.table,splicingArgs))
-		
+
 		if(dim(splicing)[1]>0){
 			write("Writing splicingDiffData table",stderr())
 			splicingCols<-c(1:2,5:14)
@@ -713,52 +713,52 @@ loadTSS<-function(fpkmFile,
 			write(paste("No records found in",splicingFile),stderr())
 		}
 	}
-	
+
 	###########
 	#Handle Counts .count_tracking
 	###########
 	if(file.exists(countFile)){
-		
+
 		idCols = c(1)
-		
+
 		#Read countFile
 		write(paste("Reading ", countFile,sep=""),stderr())
 		countArgs$file = countFile
 		counts<-as.data.frame(do.call(read.table,countArgs))
-		
+
 		if(dim(counts)[1]>0){
-		
+
 			#Reshape TSSCount table
 			write("Reshaping TSSCount table",stderr())
 			countmelt<-melt(counts,id.vars=c("tracking_id"),measure.vars=-idCols)
 			colnames(countmelt)[colnames(countmelt)=='variable']<-'sample_name'
-			
+
 			countmelt$measurement = ""
-			
+
 			countmelt$measurement[grepl("_count$",countmelt$sample_name)] = "count"
 			countmelt$measurement[grepl("_count_variance$",countmelt$sample_name)] = "variance"
 			countmelt$measurement[grepl("_count_uncertainty_var$",countmelt$sample_name)] = "uncertainty"
 			countmelt$measurement[grepl("_count_dispersion_var$",countmelt$sample_name)] = "dispersion"
 			countmelt$measurement[grepl("_status$",countmelt$sample_name)] = "status"
-			
+
 			countmelt$sample_name<-gsub("_count$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_variance$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_uncertainty_var$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_dispersion_var$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_status$","",countmelt$sample_name)
-			
+
 			#Adjust sample names with make.db.names
 			countmelt$sample_name <- make.db.names(dbConn,as.vector(countmelt$sample_name),unique=FALSE)
-			
-			
+
+
 			#Recast
 			write("Recasting",stderr())
 			countmelt<-as.data.frame(dcast(countmelt,...~measurement))
-			
+
 			#debugging
 			#write(colnames(countmelt),stderr())
-			
-			
+
+
 			#Write TSSCount table
 			write("Writing TSSCount table",stderr())
 			insert_SQL<-'INSERT INTO TSSCount VALUES(:tracking_id,:sample_name,:count,:variance,:uncertainty,:dispersion,:status)'
@@ -767,29 +767,29 @@ loadTSS<-function(fpkmFile,
 			write(paste("No records found in",countFile),stderr())
 		}
 	}
-	
-	
+
+
 	###########
 	#Handle Replicates .rep_tracking
 	###########
 	if(file.exists(replicateFile)){
-		
+
 		idCols = 1
 		#Read countFile
 		write(paste("Reading read group info in ", replicateFile,sep=""),stderr())
 		replicateArgs$file = replicateFile
 		reps<-as.data.frame(do.call(read.table,replicateArgs))
 		#print(head(reps))
-		
+
 		if(dim(reps)[1]>0){
-				
+
 			#Adjust sample names with make.db.names
 			reps$condition <- make.db.names(dbConn,as.character(reps$condition),unique=FALSE)
-		
+
 			#Create unique rep name
 			reps$rep_name<-paste(reps$condition,reps$replicate,sep="_")
 			colnames(reps)[colnames(reps)=="condition"]<-"sample_name"
-			
+
 			#Write TSSReplicateData table
 			write("Writing TSSReplicateData table",stderr())
 			insert_SQL<-'INSERT INTO TSSReplicateData VALUES(:tracking_id,:sample_name,:replicate,:rep_name,:raw_frags,:internal_scaled_frags,:external_scaled_frags,:FPKM,:effective_length,:status)'
@@ -797,9 +797,9 @@ loadTSS<-function(fpkmFile,
 		}else{
 			write(paste("No records found in",replicateFile),stderr())
 		}
-		
+
 	}
-	
+
 }
 
 #CDS
@@ -823,29 +823,29 @@ loadCDS<-function(fpkmFile,
 		stringsAsFactors = FALSE,
 		row.names=NULL,
 		...) {
-	
+
 	#Error Trapping
 	if (missing(fpkmFile))
 		stop("fpkmFile cannot be missing!")
-	
+
 	if (missing(dbConn))
 		stop("Must provide a dbConn connection")
-	
+
 	#TODO test dbConn connection and database structure
-	
+
 	idCols = c(1:9)
-	
+
 	#Read primary file
 	write(paste("Reading ",fpkmFile,sep=""),stderr())
 	fpkmArgs$file = fpkmFile
 	full = as.data.frame(do.call(read.table,fpkmArgs))
-	
+
 	########
 	#Handle Sample Names
 	########
-	
-	
-	
+
+
+
 	#Check that samples table is populated
 	write("Checking samples table...",stderr())
 	samples<-getSamplesFromColnames(full)
@@ -861,7 +861,7 @@ loadCDS<-function(fpkmFile,
 		write("Populating samples table...",stderr())
 		populateSampleTable(samples,dbConn)
 	}
-	
+
 	######
 	#Populate CDS table
 	######
@@ -871,7 +871,7 @@ loadCDS<-function(fpkmFile,
 	if (nrow(cdsTable)>0){
 		insert_SQL<-"INSERT INTO CDS VALUES(?,?,?,?,?,?,?,?,?)"
 		bulk_insert(dbConn,insert_SQL,cdsTable)
-		
+
 		######
 		#Populate geneData table
 		######
@@ -880,51 +880,51 @@ loadCDS<-function(fpkmFile,
 		colnames(cdsmelt)[colnames(cdsmelt)=='variable']<-'sample_name'
 		#Clean up and normalize data
 		cdsmelt$measurement = ""
-		
+
 		cdsmelt$measurement[grepl("_FPKM$",cdsmelt$sample_name)] = "fpkm"
 		cdsmelt$measurement[grepl("_conf_lo$",cdsmelt$sample_name)] = "conf_lo"
 		cdsmelt$measurement[grepl("_conf_hi$",cdsmelt$sample_name)] = "conf_hi"
 		cdsmelt$measurement[grepl("_status$",cdsmelt$sample_name)] = "status"
-		
+
 		cdsmelt$sample_name<-gsub("_FPKM$","",cdsmelt$sample_name)
 		cdsmelt$sample_name<-gsub("_conf_lo$","",cdsmelt$sample_name)
 		cdsmelt$sample_name<-gsub("_conf_hi$","",cdsmelt$sample_name)
 		cdsmelt$sample_name<-gsub("_status$","",cdsmelt$sample_name)
-		
+
 		#Adjust sample names with make.db.names
 		cdsmelt$sample_name <- make.db.names(dbConn,as.vector(cdsmelt$sample_name),unique=FALSE)
-		
+
 		#Recast
 		write("Recasting",stderr())
 		cdsmelt<-as.data.frame(dcast(cdsmelt,...~measurement))
-		
+
 		#Write geneData table
 		write("Writing CDSData table",stderr())
 		#dbWriteTable(dbConn,'CDSData',as.data.frame(cdsmelt[,c(1:2,5,3,4,6)]),row.names=F,append=T)
 		insert_SQL<-"INSERT INTO CDSData VALUES(?,?,?,?,?,?)"
 		bulk_insert(dbConn,insert_SQL,cdsmelt[,c(1:2,5,3,4,6)])
-	
+
 	}else {
 		write(paste("No records found in",fpkmFile),stderr())
 		write("CDS FPKM tracking file was empty.",stderr())
 	}
-	
-	
+
+
 	#######
 	#Handle cds_groups_exp.diff
 	#######
-	
+
 	if(file.exists(diffFile)){
 		#Read diff file
 		write(paste("Reading ",diffFile,sep=""),stderr())
 		diffArgs$file = diffFile
 		diff<-as.data.frame(do.call(read.table,diffArgs))
-		
+
 		if(dim(diff)[1]>0){
 			#Adjust sample names with make.db.names
 			diff$sample_1<-make.db.names(dbConn,as.vector(diff$sample_1),unique=FALSE)
 			diff$sample_2<-make.db.names(dbConn,as.vector(diff$sample_2),unique=FALSE)
-			
+
 			write("Writing CDSExpDiffData table",stderr())
 			diffCols<-c(1,5:14)
 			#dbWriteTable(dbConn,'CDSExpDiffData',diff[,diffCols],row.names=F,append=T)
@@ -934,7 +934,7 @@ loadCDS<-function(fpkmFile,
 			write(paste("No records found in",diffFile),stderr())
 		}
 	}
-	
+
 	#########
 	#TODO: Handle CDS.diff
 	########
@@ -953,52 +953,52 @@ loadCDS<-function(fpkmFile,
 			write(paste("No records found in",CDSDiff),stderr())
 		}
 	}
-	
+
 	###########
 	#Handle Counts .count_tracking
 	###########
 	if(file.exists(countFile)){
-		
+
 		idCols = c(1)
-		
+
 		#Read countFile
 		write(paste("Reading ", countFile,sep=""),stderr())
 		countArgs$file = countFile
 		counts<-as.data.frame(do.call(read.table,countArgs))
-		
+
 		if(dim(counts)[1]>0){
-		
+
 			#Reshape CDSCount table
 			write("Reshaping CDSCount table",stderr())
 			countmelt<-melt(counts,id.vars=c("tracking_id"),measure.vars=-idCols)
 			colnames(countmelt)[colnames(countmelt)=='variable']<-'sample_name'
-			
+
 			countmelt$measurement = ""
-			
+
 			countmelt$measurement[grepl("_count$",countmelt$sample_name)] = "count"
 			countmelt$measurement[grepl("_count_variance$",countmelt$sample_name)] = "variance"
 			countmelt$measurement[grepl("_count_uncertainty_var$",countmelt$sample_name)] = "uncertainty"
 			countmelt$measurement[grepl("_count_dispersion_var$",countmelt$sample_name)] = "dispersion"
 			countmelt$measurement[grepl("_status$",countmelt$sample_name)] = "status"
-			
+
 			countmelt$sample_name<-gsub("_count$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_variance$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_uncertainty_var$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_count_dispersion_var$","",countmelt$sample_name)
 			countmelt$sample_name<-gsub("_status$","",countmelt$sample_name)
-			
+
 			#Adjust sample names with make.db.names
 			countmelt$sample_name <- make.db.names(dbConn,as.vector(countmelt$sample_name),unique=FALSE)
-			
-			
+
+
 			#Recast
 			write("Recasting",stderr())
 			countmelt<-as.data.frame(dcast(countmelt,...~measurement))
-			
+
 			#debugging
 			#write(colnames(countmelt),stderr())
-			
-			
+
+
 			#Write CDSCount table
 			write("Writing CDSCount table",stderr())
 			insert_SQL<-'INSERT INTO CDSCount VALUES(:tracking_id,:sample_name,:count,:variance,:uncertainty,:dispersion,:status)'
@@ -1007,29 +1007,29 @@ loadCDS<-function(fpkmFile,
 			write(paste("No records found in",countFile),stderr())
 		}
 	}
-	
-	
+
+
 	###########
 	#Handle Replicates .rep_tracking
 	###########
 	if(file.exists(replicateFile)){
-		
+
 		idCols = 1
 		#Read countFile
 		write(paste("Reading read group info in ", replicateFile,sep=""),stderr())
 		replicateArgs$file = replicateFile
 		reps<-as.data.frame(do.call(read.table,replicateArgs))
 		#print(head(reps))
-		
+
 		if(dim(reps)[1]>0){
-				
+
 			#Adjust sample names with make.db.names
 			reps$condition <- make.db.names(dbConn,as.character(reps$condition),unique=FALSE)
-		
+
 			#Create unique rep name
 			reps$rep_name<-paste(reps$condition,reps$replicate,sep="_")
 			colnames(reps)[colnames(reps)=="condition"]<-"sample_name"
-			
+
 			#Write CDSReplicateData table
 			write("Writing CDSReplicateData table",stderr())
 			insert_SQL<-'INSERT INTO CDSReplicateData VALUES(:tracking_id,:sample_name,:replicate,:rep_name,:raw_frags,:internal_scaled_frags,:external_scaled_frags,:FPKM,:effective_length,:status)'
@@ -1037,9 +1037,9 @@ loadCDS<-function(fpkmFile,
 		}else{
 			write(paste("No records found in",replicateFile),stderr())
 		}
-		
+
 	}
-	
+
 }
 
 ########################
@@ -1054,10 +1054,10 @@ loadCDS<-function(fpkmFile,
 createDB_noIndex<-function(dbFname="cuffData.db",driver="SQLite") {
 	#Builds sqlite db at 'dbFname' and returns a dbConnect object pointing to newly created database.
 	#No indexes are present
-	
+
 	drv<-dbDriver(driver)
 	db <- dbConnect(drv,dbname=dbFname)
-	
+
 	schema.text<-'
 -- Creator:       MySQL Workbench 5.2.33/ExportSQLite plugin 2009.12.02
 -- Author:        Loyal Goff
@@ -1636,17 +1636,17 @@ COMMIT;
 	create.sql <- paste(collapse="\n", create.sql)
 	create.sql <- strsplit(create.sql, ";")[[1]]
 	create.sql <- create.sql[-length(create.sql)] #nothing to run here
-	
-	tmp <- sapply(create.sql,function(x) sqliteQuickSQL(db,x))
+
+	tmp <- sapply(create.sql,function(x) dbGetQuery(db,x))
 	db
 }
 
 
 createIndices<-function(dbFname="cuffData.db",driver="SQLite",verbose=F){
-	
+
 	drv<-dbDriver(driver)
 	db <- dbConnect(drv,dbname=dbFname)
-	
+
 	index.text<-
 'CREATE INDEX "genes.gsn_index" ON "genes"("gene_short_name");
 CREATE INDEX "genes.cc_index" ON "genes"("class_code");
@@ -1728,15 +1728,22 @@ CREATE INDEX "features.fk_features_genes1" ON "features"("gene_id");
 CREATE INDEX "features.fk_features_isoforms1" ON "features"("isoform_id");
 CREATE INDEX "varModel.varModel_condition1" ON "varModel"("condition");
 CREATE INDEX "varModel.varModel_locus1" ON "varModel"("locus");
+CREATE INDEX "splicingDiffData.fk_splicingDiffData_allSamples" ON "splicingDiffData"("sample_1","sample_2");
+CREATE INDEX "TSSExpDiffData.fk_TSSExpDiffData_allSamples" ON "TSSExpDiffData"("sample_1","sample_2");
+CREATE INDEX "CDSDiffData.fk_CDSDiffData_allSamples" ON "CDSDiffData"("sample_1","sample_2");
+CREATE INDEX "CDSExpDiffData.fk_CDSExpDiffData_allSamples" ON "CDSExpDiffData"("sample_1","sample_2");
+CREATE INDEX "promoterDiffData.fk_promoterDiffData_allSamples" ON "promoterDiffData"("sample_1","sample_2");
+CREATE INDEX "isoformExpDiffData.fk_isoformExpDiffData_allSamples" ON "isoformExpDiffData"("sample_1","sample_2");
+CREATE INDEX "geneExpDiffData.fk_geneExpDiffData_allSamples" ON "geneExpDiffData"("sample_1","sample_2");
 '
 
 	create.sql <- strsplit(index.text,"\n")[[1]]
-	
+
 	tmp <- sapply(create.sql,function(x){
 			if (verbose){
 						write(paste(x,sep=""),stderr())
 					}
-			sqliteQuickSQL(db,x)
+			dbGetQuery(db,x)
 	})
 }
 
@@ -1758,7 +1765,7 @@ populateSampleTable<-function(samples,dbConn){
 
 bulk_insert <- function(dbConn,sql,bound.data)
 {
-	dbBeginTransaction(dbConn)
+	dbBegin(dbConn)
 	dbGetPreparedQuery(dbConn, sql, bind.data = bound.data)
 	dbCommit(dbConn)
 }
@@ -1797,7 +1804,7 @@ readCufflinks<-function(dir = getwd(),
 						rebuild = FALSE,
 						verbose = FALSE,
 						...){
-	
+
 	#Set file locations with directory
 	dbFile=file.path(dir,dbFile)
 	runInfoFile=file.path(dir,runInfoFile)
@@ -1822,53 +1829,53 @@ readCufflinks<-function(dir = getwd(),
 	promoterFile=file.path(dir,promoterFile)
 	splicingFile=file.path(dir,splicingFile)
 	varModelFile=file.path(dir,varModelFile)
-					
-					
+
+
 	#Check to see whether dbFile exists
 	if (!file.exists(dbFile) || rebuild == TRUE){
 		#if not, create it
 		write(paste("Creating database ",dbFile,sep=""),stderr())
 		dbConn<-createDB_noIndex(dbFile)
-		
+
 		#populate DB
 		if(file.exists(runInfoFile)){
 			loadRunInfo(runInfoFile,dbConn)
 		}
-		
+
 		if(file.exists(repTableFile)){
 			loadRepTable(repTableFile,dbConn)
 		}
-		
+
 		if(file.exists(varModelFile)){
 			loadVarModelTable(varModelFile,dbConn)
 		}
-		
+
 		if(!is.null(gtfFile)){
 			if(!is.null(genome)){
 				.loadGTF(gtfFile,genome,dbConn)
 			}else{
-				stop("'genome' cannot be NULL if you are supplying a .gtf file!")	
+				stop("'genome' cannot be NULL if you are supplying a .gtf file!")
 			}
 		}
-		
+
 		loadGenes(geneFPKM,geneDiff,promoterFile,countFile=geneCount,replicateFile=geneRep,dbConn)
 		loadIsoforms(isoformFPKM,isoformDiff,isoformCount,isoformRep,dbConn)
 		loadTSS(TSSFPKM,TSSDiff,splicingFile,TSSCount,TSSRep,dbConn)
 		loadCDS(CDSFPKM,CDSExpDiff,CDSDiff,CDSCount,CDSRep,dbConn)
-		
+
 		#Create Indexes on DB
 		write("Indexing Tables...",stderr())
 		createIndices(dbFile,verbose=verbose)
-		
+
 		#load Distribution Tests
 		#loadDistTests(promoterFile,splicingFile,CDSDiff)
-		
+
 	}
 	dbConn<-dbConnect(dbDriver(driver),dbFile)
 	return (
 			new("CuffSet",DB = dbConn,
 					#TODO: need to add replicate and count tables here and in AllClasses.R
-					
+
 					genes = new("CuffData",DB = dbConn, tables = list(mainTable = "genes",dataTable = "geneData",expDiffTable = "geneExpDiffData",featureTable = "geneFeatures",countTable="geneCount",replicateTable="geneReplicateData"), filters = list(),type = "genes",idField = "gene_id"),
 					isoforms = new("CuffData", DB = dbConn, tables = list(mainTable = "isoforms",dataTable = "isoformData",expDiffTable = "isoformExpDiffData",featureTable = "isoformFeatures",countTable="isoformCount",replicateTable="isoformReplicateData"), filters = list(),type="isoforms",idField = "isoform_id"),
 					TSS = new("CuffData", DB = dbConn, tables = list(mainTable = "TSS",dataTable = "TSSData",expDiffTable = "TSSExpDiffData",featureTable = "TSSFeatures",countTable="TSSCount",replicateTable="TSSReplicateData"), filters = list(),type = "TSS",idField = "TSS_group_id"),
@@ -1877,71 +1884,71 @@ readCufflinks<-function(dir = getwd(),
 					splicing = new("CuffDist", DB = dbConn, table = "splicingDiffData",type="splicing",idField="TSS_group_id"),
 					relCDS = new("CuffDist", DB = dbConn, table = "CDSDiffData",type="relCDS",idField="gene_id")
 			)
-	)	
-							
+	)
+
 }
 
 ############
 # Handle GTF file
 ############
 #loadGTF<-function(gtfFile,dbConn) {
-#	
+#
 #	#Error Trapping
 #	if (missing(gtfFile))
 #		stop("GTF file cannot be missing!")
-#	
+#
 #	if (missing(dbConn))
 #		stop("Must provide a dbConn connection")
-#	
+#
 #	write("Reading GTF file")
 #	gtf<-read.table(gtfFile,sep="\t",header=F)
-#	
+#
 #	write("Melting attributes")
 #	attributes<-melt(strsplit(as.character(gtf$V9),"; "))
 #	colnames(attributes)<-c("attribute","featureID")
 #	attributes<-paste(attributes$attribute,attributes$featureID)
 #	attributes<-strsplit(as.character(attributes)," ")
 #	attributes<-as.data.frame(do.call("rbind",attributes))
-#	
+#
 #	colnames(attributes)<-c("attribute","value","featureID")
 #	attributes<-attributes[,c(3,1,2)]
-#	
+#
 #	#Grab only gene_ID and transcript_ID to add to features table
 #	id.attributes<-attributes[attributes$attribute %in% c("gene_id","transcript_id"),]
 #	id.attributes$featureID<-as.numeric(as.character(id.attributes$featureID))
 #	id.attributes<-dcast(id.attributes,...~attribute)
-#	
+#
 #	#Main features table
 #	features<-gtf[,c(1:8)]
 #	colnames(features)<-c("seqname","source","type","start","end","score","strand","frame")
 #	features$featureID<-as.numeric(as.character(rownames(features)))
-#	
+#
 #	#Merge features and id.attributes
 #	features<-merge(features,id.attributes,by.x='featureID',by.y='featureID')
 #	features<-features[,c(1,10:11,2:9)]
-#	
+#
 #	#strip gene_id and transcript_id from attributes
 #	attributes<-attributes[!(attributes$attribute %in% c("gene_id","transcript_id")),]
-#	
+#
 #	#Write features table
 #	write("Writing features table",stderr())
 #	#dbWriteTable(dbConn,'geneData',as.data.frame(genemelt[,c(1:2,5,3,4,6)]),row.names=F,append=T)
 #	dbWriteTable(dbConn,'features',as.data.frame(features),append=T)
-#	
+#
 #	#Write features attribtues table
 #	#write("Writing feature attributes table",stderr())
 #	dbWriteTable(dbConn,'attributes',as.data.frame(attributes),append=T)
-#	
+#
 #}
 
 .loadGTF<-function(gtfFile,genomebuild,dbConn){
 	#Error Trapping
 	if (missing(gtfFile))
 		stop("GTF file cannot be missing!")
-	
+
 	if (missing(dbConn))
 		stop("Must provide a dbConn connection")
-	
+
 	write("Reading GTF file",stderr())
 	gr<-import(gtfFile,asRangedData=FALSE)
 	gr<-as(gr,"data.frame")
@@ -1951,7 +1958,7 @@ readCufflinks<-function(dir = getwd(),
 	colnames(gr)[grepl('^p_id$',colnames(gr))]<-'CDS_id'
 	write("Writing GTF features to 'features' table...",stderr())
 	#dbSendQuery(dbConn,"DROP TABLE IF EXISTS 'features'")
-	#dbBeginTransaction(dbConn)
+	#dbBegin(dbConn)
 	dbWriteTable(dbConn,'features',gr,row.names=F,overwrite=T)
 	#record Genome build
 	.recordGenome(genomebuild,dbConn)
@@ -1966,29 +1973,29 @@ readCufflinks<-function(dir = getwd(),
 }
 
 .cuff2db <- function(gtfFile, out.file = NULL, verbose = TRUE) {
-	
+
 	#require(rtracklayer)
 	#require(GenomicRanges)
 	#require(GenomicFeatures)
-	
+
 	requiredAttribs <- c("gene_id", "transcript_id", "exon_number")
-	
+
 	if (verbose) message("Importing ", gtfFile)
 	tmp <- import(gtfFile, asRangedData=FALSE)
-	
+
 	#dispose of unspliced unstranded transcripts
 	#tmp <- tmp[ which(strand(tmp) %in% c('+','-')) ]
-	
+
 	# fix the gene IDs
 	#values(tmp)$gene_id <- gsub('CUFF.', '', values(tmp)$gene_id)
-	
+
 	# fix the exon IDs
 	#values(tmp)$transcript_id <- gsub('CUFF.', '', values(tmp)$transcript_id)
-	
+
 	# split the object into transcript and exon pieces
 	#by.type = split(tmp, values(tmp)$type)
 	#browser()
-	
+
 	#make transcripts table
 	tmpT <- split(tmp,
 			values(tmp)$transcript_id)
@@ -2002,7 +2009,7 @@ readCufflinks<-function(dir = getwd(),
 			tx_end=sapply(end(ranges(tmpT)), max),
 			stringsAsFactors=FALSE
 	)
-	
+
 	#make splicings table
 	tmpS <- split(tmp, values(tmp)$transcript_id)
 	if(verbose) message('Attempting to create the splicings data.frame')
@@ -2015,7 +2022,7 @@ readCufflinks<-function(dir = getwd(),
 			exon_end=end(unlist(tmpS)),
 			stringsAsFactors=FALSE
 	)
-	
+
 	#make genes table
 	if(verbose) message('Attempting to create the genes data.frame')
 	gene_txs <- tapply(values(tmp)$transcript_id, values(tmp)$gene_id, unique)
@@ -2023,13 +2030,13 @@ readCufflinks<-function(dir = getwd(),
 			tx_name=unlist(gene_txs),
 			gene_id=rep(names(gene_txs), sapply(gene_txs, length)),
 			stringsAsFactors=FALSE)
-	
+
 	#create the db
-	if (verbose) message("Creating TxDb")
+	if (verbose) message("Creating TranscriptDb")
 	tmpdb <- makeTranscriptDb(transcripts, splicings, genes=genes)
 	if (verbose) message("Use saveFeatures() to save the database to a file")
 	return(tmpdb)
-	
+
 }
 
 #library(Gviz)
diff --git a/build/vignette.rds b/build/vignette.rds
index 0da12ed..69addd8 100644
Binary files a/build/vignette.rds and b/build/vignette.rds differ
diff --git a/inst/NEWS b/inst/NEWS
index 1377da1..8cdbbc6 100644
--- a/inst/NEWS
+++ b/inst/NEWS
@@ -1,3 +1,30 @@
+v2.9.3
+	Bugfix:
+		- Introduced CHECK error by adding to .Rbuildignore...this is now fixed.
+
+v2.9.2
+	- version bump to let BioC nightly build grab commit.
+
+v2.9.1
+	- version bump for BioC devel release 3.1
+
+v2.8.2
+	Bugfixes:
+		- removed reference to sqliteCloseConnection() (not exported by RSQLite 1.0.0) in vignette.
+
+v2.8.1
+	Bugfixes:
+		- Made minimal changes for compatibility with RSQLite 1.0.0
+
+v2.7.3
+	Bugfixes:
+		- Fixed sigMatrix legend argument to comply with ggplot2 deprecations. No longer throws an error.
+
+	New Features:
+
+	Notes:
+		- Trying out a few more indices to speed up queries using sampleIdList.
+
 v2.7.1
 	Bugfixes:
 		- Fixed 'fullnames' argument to cuffData::*Matrix() methods so that it does what it's supposed to do.
@@ -7,7 +34,7 @@ v2.7.1
 		- Added repFpkmMatrix() and replicates() methods to CuffFeature objects.
 		- Removed unnecessary Joins to optimize retrieval speed for several key queries.
 		- Fixed bug in csVolcano matrix that forced ylimits to be c(0,15)
-		
+
 	New Features:
 		- Added csNMF() method for CuffData and CuffFeatureSet objects to perform non-negative matrix factorization.  As of now, it's merely a wrapper around the default settings for NMFN::nnmf(), but hope to expand in the future.
 			* Does not adjust sparsity of matrices after output, must be done by user as needed.
@@ -16,19 +43,19 @@ v2.7.1
 		- Added varModel.info tracking for compatibility with cuffdiff >=2.1. Will now find varModel.info file if exists, and incorporate into database.
 		- dispersionPlot() method added for CuffSet object.  This now appropriately draws from varModel.info and is the preferred visualization for dispersion of RNA-Seq data with cummeRbund.
 		- Added diffTable() method to CuffData and CuffFeatureSet objects to allow a 'one-table' snapshot of results for all Features (CuffData) or a set of Features (CuffFeatureSet). This table outputs key values including gene name,
-			gene short name, expression estimates and per-comparison fold-change, p-value, q-value, and significance values (yes/no). A convenient 'data-dump' function to merge across several tables. 
+			gene short name, expression estimates and per-comparison fold-change, p-value, q-value, and significance values (yes/no). A convenient 'data-dump' function to merge across several tables.
 		- Added coercion methods for CuffGene objects to create GRanges and GRangeslist objects (more BioC friendly!). Will work on making this possible on CuffFeatureSet and CuffFeature objects as well.
 		- Added pass-through to select p.adjust method for getSig (method argument to getSig)
-		- Added ability to revert to cuffdiff q-values for specific paired-wise interrogations with getSig as opposed to re-calculating new ones (useCuffMTC; default=FALSE) 
-	
+		- Added ability to revert to cuffdiff q-values for specific paired-wise interrogations with getSig as opposed to re-calculating new ones (useCuffMTC; default=FALSE)
+
 	Notes:
 		- Removed generic for 'featureNames'.  Now appropriately uses featureNames generic from Biobase.  As a consequence, Biobase is now a dependency.
 		- Added passthrough to as.dist(...) in JSdist(...)
 		- Added 'logMode' argument to csClusterPlot.
 		- Added 'showPoints' argument to PCAplot to allow disabling of gene values in PCA plot. If false, only sample projections are plotted.
 		- Added 'facet' argument to expressionPlot to disable faceting by feature_id.
-		- shannon.entropy now uses log2 instead of log10 to constrain specificity scores between 0 and 1.  
-		
+		- shannon.entropy now uses log2 instead of log10 to constrain specificity scores between 0 and 1.
+
 v1.99.6
 	Notes:
 		- 'annotation' and "annotation<-" generics were moved to BiocGenerics 0.3.2. Now using appropriate generic function, but requiring BiocGenerics >= 0.3.2
@@ -38,7 +65,7 @@ v1.99.5
 		- Added replicates argument to csDistHeat to view distances between individual replicate samples.
 		- Appropriately distinguish now between 'annotation' (external attributes) and features (gene-level sub-features).
 		- csHeatmap now has 'method' argument to pass function for any dissimilarity metric you desire. You must pass a function that returns a 'dist' object applied to rows of a matrix. Default is still JS-distance.
-		
+
 v1.99.3
 	New Features:
 		- Added diffTable() method to return a table of differential results broken out by pairwise comparison. (more human-readable)
@@ -54,17 +81,17 @@ v1.99.3
 		- Added csScatterMatrix() and csVolcanoMatrix() method to CuffData objects.
 		- Added fpkmSCVPlot() as a CuffData method to visualize replicate-level coefficient of variation across fpkm range per condition.
 		- Added PCAplot() and MDSplot() for dimensionality reduction visualizations (Principle components, and multi-dimensional scaling respectively)
-		- Added csDistHeat() to create a heatmap of JS-distances between conditions.  
-		
+		- Added csDistHeat() to create a heatmap of JS-distances between conditions.
+
 	Bugfixes:
 		- Fixed diffData 'features' argument so that it now does what it's supposed to do.
 		- added DB() with signature(object="CuffSet") to NAMESPACE
-		
+
 	Notes:
 		- Once again, there have been modifications to the underlying database schema so you will have to re-run readCufflinks(rebuild=T) to re-analyze existing datasets.
 		- Importing 'defaults' from plyr instead of requiring entire package (keeps namespace cleaner).
 		- Set pseudocount=0.0 as default for csDensity() and csScatter() methods (This prevents a visual bias for genes with FPKM <1 and ggplot2 handles removing true zero values).
-		
+
 v1.99.2
 	Bugfixes:
 		- Fixed bug in replicate table that did not apply make.db.names to match samples table.
@@ -79,14 +106,14 @@ v1.99.1
 		- sample-level information such as mass and scaling factors.
 		- individual replicate fpkms and associated statistics for all features.
 		- raw and normalized count tables and associated statistics all features.
-	
+
 	New Features:
 		- Please see updated vignette for overview of new features.
 		- New dispersionPlot() to visualize model fit (mean count vs dispersion) at all feature levels.
 		- New runInfo() method returns cuffdiff run parameters.
 		- New replicates() method returns a data.frame of replicate-level parameters and information.
 		- getGene() and getGenes() can now take a list of any tracking_id or gene_short_name (not just gene_ids) to retrieve
-		a gene or geneset. 
+		a gene or geneset.
 		- Added getFeatures() method to retrieve a CuffFeatureSet independent of gene-level attributes.  This is ideal for looking at sets of features
 		outside of the context of all other gene-related information (i.e. facilitates feature-level analysis)
 		- Replicate-level fpkm data now available.
@@ -94,25 +121,25 @@ v1.99.1
 		- repFpkm(), repFpkmMatrix, count(), and countMatrix are new accessor methods to CuffData, CuffFeatureSet, and CuffFeature objects.
 		- All relevant plots now have a logical 'replicates' argument (default = F) that when set to TRUE will expose replicate FPKM values in appropriate ways.
 		- MAPlot() now has 'useCount' argument to draw MA plots using count data as opposed to fpkm estimates.
-	
+
 	Notes:
 		- Changed default csHeatmap colorscheme to the much more pleasing 'lightyellow' to 'darkred' through 'orange'.
 		- SQLite journaling is no longer disabled by default (The benefits outweigh the moderate reduction in load times).
-	
+
 	Bugfixes:
 		- Numerous random bug fixes to improve consistency and improve performance for large datasets.
 
 v1.2.1
 	Bugfixes:
 		-Fixed bug in CuffFeatureSet::expressionBarplot to make compatible with ggplot2 v0.9.
-	
+
 	New Features:
 		- Added 'distThresh' argument to findSimilar.  This allows you to retrieve all similar genes within a given JS distance as specified by distThresh.
 		- Added 'returnGeneSet' argument to findSimilar.  [default = T] If true, findSimilar returns a CuffGeneSet of genes matching criteria (default). If false, a rank-ordered data frame of JS distance values is returned.
 		- findSimilar can now take a 'sampleIdList' argument. This should be a vector of sample names across which the distance between genes should be evaluated.  This should be a subset of the output of samples(genes(cuff)).
 	Notes:
 		- Added requirement for 'fastcluster' package.  There is very little footprint, and it makes a significant improvement in speed for the clustering analyses.
-		
+
 v1.1.5 / 1.2.0
 	Bugfixes:
 		- Fixed minor bug in database setup that caused instability with cuffdiff --no-diff argument.
@@ -124,19 +151,19 @@ v1.1.4
 	Bugfixes:
 		- Finished abrupt migration to reshape2. As a result fixed a bug in which 'cast' was still required for several functions and could not be found. Now appropriately using 'dcast' or 'acast'.
 		- Fixed minor bug in CuffFeature::fpkmMatrix
-		
-v1.1.3 
+
+v1.1.3
 	New Features:
-		- getSig() has been split into two functions: getSig() now returns a vector of ids (no longer a list of vectors), and getSigTable() returns a 'testTable' of 
+		- getSig() has been split into two functions: getSig() now returns a vector of ids (no longer a list of vectors), and getSigTable() returns a 'testTable' of
 		binary values indicating whether or not a gene was significant
 		in a particular comparison.
 		- Added ability in getSig() to limit retrieval of significant genes to two provided conditions (arguments x & y). (reduces time for function call if you have a specific comparison in mind a priori)
 			* When you specify x & y with getSig(), q-values are recalculated from just those selected tests to reduce impact of multiple testing correction.
-			* If you do not specificy x & y getSig() will return a vector of tracking_ids for all comparisons (with appropriate MTC). 
+			* If you do not specificy x & y getSig() will return a vector of tracking_ids for all comparisons (with appropriate MTC).
 		- You can now specify an 'alpha' for getSig() and getSigTable() [ 0.05 by default to match cuffdiff default ] by which to filter the resulting significance calls.
-		- Added csSpecificity() method:  This method returns a feature-X-condition matrix (same shape as fpkmMatrix) that provides a 'condition-specificity' score  
+		- Added csSpecificity() method:  This method returns a feature-X-condition matrix (same shape as fpkmMatrix) that provides a 'condition-specificity' score
 			* defined as 1-(JSdist(p,q))
-			  where p is is the density of expression (probability vector of log(FPKM+1)) of a given gene across all conditions, 
+			  where p is is the density of expression (probability vector of log(FPKM+1)) of a given gene across all conditions,
 			  and q is the unit vector for that condition (ie. perfect expression in that particular condition)
 			* specificity = 1.0 if the feature is expressed exclusively in that condition
 		- Created csDendro() method: This method returns a object of class 'dendrogram' (and plots using grid) of JS distances between conditions for all genes in a CuffData, CuffGeneSet, or CuffFeatureSet object.
@@ -145,21 +172,21 @@ v1.1.3
 		to trust the expression values for a given gene under a specific condition, and may provide insight into outlier expression values.
 			* This feature can be disabled by setting showStatus=F.
 		- csDensity() is now available for CuffFeatureSet and CuffGeneSet objects
-	
+
 	Bugfixes:
 		- Fixed bug in getGenes that may have resulted in long query lag for retrieving promoter diffData. As a result all calls to getGenes should be significanly faster.
 		- CuffData fpkm argument 'features' now returns appropriate data.frame (includes previously un-reported data fields).
 		- Replaced all instances of 'ln_fold_change' with the actual 'log2_fold_change'.  Values were previously log2 fold change but database headers were not updated to reflect this.
 		- Fixed bug that could cause readCufflinks() to die with error when using reshape2::melt instead of reshape::melt.
-	
+
 	Notes:
 		- ***The structure of the underlying database has changed in this version.  As a consequence, you must rebuild you cuffData.db file to use new version. readCufflinks(rebuild=T)***
 		- Updated vignette
 		- A 'fullnames' logical argument was added to fpkmMatrix. If True, rownames for fpkmMatrix will be a concatenation of gene_short_name and tracking_id.
-		This has the added benefit of making row labels in csHeatmap easier to read, as well as preserving uniqueness. 
+		This has the added benefit of making row labels in csHeatmap easier to read, as well as preserving uniqueness.
 		- Slight speed improvements to JSdist (noticeable when using csCluster on large feature sets).
 		- 'testTable' argument to getSig() has been dropped in lieu of new getSigTable() method.
-	
+
 v1.1.1
 	Bugfixes:
 		- fixed issue in which there was no graceful error handling of missing CDS or TSS data in cuffdiff output.
@@ -168,15 +195,15 @@ v1.1.1
 		- Fixed bug that would cause diffData() to return a filtered subset of results by default.
 		- Adjusted indexing of tables to improve performance on large datasets.
 		- Fixed bug that caused diffData method to not be registered with CuffFeature and CuffGene objects.
-		- Fixed bug that sometimes caused over-plotting of axis labels in csBarplots.  
-		
-	New Features:	
-		- added getSig method to CuffSet class for rapid retrieval of significant features from all pairwise tests (as a list of IDs). 
+		- Fixed bug that sometimes caused over-plotting of axis labels in csBarplots.
+
+	New Features:
+		- added getSig method to CuffSet class for rapid retrieval of significant features from all pairwise tests (as a list of IDs).
 		By default the level is 'genes' but any feature level can be queried.
 		- csCluster now uses Jensen-Shannon distance by default (as opposed to Euclidean)
 		- Added 'xlimits' argument to csVolcano to constrain plot dimensions.
 		- Enforced requirement in csVolcano for x and y arguments (as sample names).
-	
+
 	Notes:
 		- Changed dependency 'reshape' to 'reshape2'
 		- Changed the default orientation of expressionBarplot() for CuffFeatureSet objects.
@@ -191,9 +218,9 @@ v0.99.5
 	- Significant speed improvements to readCufflinks() for large cuffdiff datasets.
 		- Tables written first then indexed.
 	- Added slot accessor methods to avoid using slots directly.
-	
+
 v0.99.4
 	- Second beta release and submission to Bioconductor
 
 v0.1.3 Release 2011-08-18:
-	- First Beta release of cummeRbund and submission to Bioconductor for review and hosting.
\ No newline at end of file
+	- First Beta release of cummeRbund and submission to Bioconductor for review and hosting.
diff --git a/inst/doc/cummeRbund-example-workflow.pdf b/inst/doc/cummeRbund-example-workflow.pdf
index f0e5c9c..c2ac74f 100644
Binary files a/inst/doc/cummeRbund-example-workflow.pdf and b/inst/doc/cummeRbund-example-workflow.pdf differ
diff --git a/inst/doc/cummeRbund-manual.R b/inst/doc/cummeRbund-manual.R
index 8917425..792dcae 100644
--- a/inst/doc/cummeRbund-manual.R
+++ b/inst/doc/cummeRbund-manual.R
@@ -550,7 +550,7 @@ plotTracks(genetrack)
 ###################################################
 ### code chunk number 68: features_3
 ###################################################
-trackList<-list() 
+trackList<-list()
 myStart<-min(features(myGene)$start)
 myEnd<-max(features(myGene)$end)
 myChr<-unique(features(myGene)$seqnames)
@@ -561,17 +561,17 @@ trackList<-c(trackList,ideoTrack)
 
 axtrack<-GenomeAxisTrack()
 trackList<-c(trackList,axtrack)
- 
+
 genetrack<-makeGeneRegionTrack(myGene)
 genetrack
- 
+
 trackList<-c(trackList,genetrack)
- 
+
 biomTrack<-BiomartGeneRegionTrack(genome=genome,chromosome=as.character(myChr),
 		start=myStart,end=myEnd,name="ENSEMBL",showId=T)
- 
+
 trackList<-c(trackList,biomTrack)
- 
+
 conservation <- UcscTrack(genome = genome, chromosome = myChr,
 		track = "Conservation", table = "phyloP100wayAll",
 		from = myStart-2000, to = myEnd+2000, trackType = "DataTrack",
@@ -581,7 +581,7 @@ conservation <- UcscTrack(genome = genome, chromosome = myChr,
 		name = "Conservation")
 
 trackList<-c(trackList,conservation)
- 
+
 plotTracks(trackList,from=myStart-2000,to=myEnd+2000)
 
 
@@ -761,7 +761,7 @@ print(mySimilar2.expression)
 ###################################################
 ### code chunk number 91: close_connection
 ###################################################
-end<-sqliteCloseConnection(cuff at DB)
+end<-dbDisconnect(cuff at DB)
 
 
 ###################################################
diff --git a/inst/doc/cummeRbund-manual.Rnw b/inst/doc/cummeRbund-manual.Rnw
index 0b146c8..7ce98d0 100644
--- a/inst/doc/cummeRbund-manual.Rnw
+++ b/inst/doc/cummeRbund-manual.Rnw
@@ -67,7 +67,7 @@
 \begin{document}
 <<init, echo=FALSE>>=
 options(width=65)
-@ 
+@
 
 \maketitle
 \tableofcontents
@@ -78,7 +78,7 @@ that you update your cufflinks installation to version $\ge$2.0 to take full
 advantage of the improvements in modeling, reporting, and visualization that have been incorporated.
 \begin{itemize}
 	\item Cufflinks $\ge$ v2.0.1
-	\item SQLite 
+	\item SQLite
 	\item R $\ge$ v3.0
 	\item Packages:
 	\begin{itemize}
@@ -96,24 +96,24 @@ advantage of the improvements in modeling, reporting, and visualization that hav
 		\end{itemize}
 	\end{itemize}
 \end{itemize}
-	
+
 \clearpage
 
 \section{Introduction}
 	\Rpackage{cummeRbund} is a visualization package for Cufflinks high-throughput sequencing data. It is designed to help you navigate through the large amount of data produced from a Cuffdiff RNA-Seq differential expression
 	analysis. The results of this analysis are typically a large number of inter-related files that are not terribly intuitive to navigate through. cummeRbund helps promote rapid analysis of RNA-Seq data by aggregating, indexing,
 	and allowing you easily visualize and create publication-ready figures of your RNA-Seq data while maintaining appropriate relationships between connected data points.
-	CummeRbund is a multifaceted suite for streamlined analysis and visualization of massively parallel RNA differential expression data sequencing data. 
-	
+	CummeRbund is a multifaceted suite for streamlined analysis and visualization of massively parallel RNA differential expression data sequencing data.
+
 	CummeRbund begins by re-organizing output files of a cuffdiff analysis, and storing these data in a local SQLite database. CummeRbund indexes the data to speed up access to specific feature data (genes, isoforms, TSS, CDS, etc.),
 	and preserves the various relationships between these features. Access to data elements is managed via the RSQLite package and data are presented in appropriately structured R classes with various convenience functions designed
 	to streamline your workflow. This persistent database storage means that inter-connected expression values are rapidly accessible and quickly searchable in future analyses.
-	
+
 	CummeRbund defines two types of data classes, 'pointer' or reference classes describe SQL connections to the database without directly containing data, and 'data' classes that retrieve a subset of related data points such as associated
-	features from a given gene or gene set. Each class type has methods for direct access to FPKM vales, differential expression information, statistical test results, raw and normalized fragment counts, individual replicate FPKM values, and additional annotation information for features. Output formats allow 
-	for browsing and analysis of data in standard R objects (data.frame, list, etc). CummeRbund was designed to provide analysis and visualization tools analogous to microarray data. In this regard, numerous plotting methods are provided for visualization 
+	features from a given gene or gene set. Each class type has methods for direct access to FPKM vales, differential expression information, statistical test results, raw and normalized fragment counts, individual replicate FPKM values, and additional annotation information for features. Output formats allow
+	for browsing and analysis of data in standard R objects (data.frame, list, etc). CummeRbund was designed to provide analysis and visualization tools analogous to microarray data. In this regard, numerous plotting methods are provided for visualization
 	of RNA-Seq data quality and global statistics, and simple routines for plotting expression levels for one or thousands of genes, their isoforms, TSS groups, or CDS groups.
-	  
+
 	The base class, \Rclass{cuffSet} is a 'pointer' to cuffdiff data that are stored out-of-memory in a sqlite database.
 
 \clearpage
@@ -129,7 +129,7 @@ advantage of the improvements in modeling, reporting, and visualization that hav
 \subsection{CuffData Class}
 	The \Rclass{CuffData} class is also a pointer class to the SQL backend, but each instance is specific for a data subtype (genes, isoforms, TSS, CDS). Again, there is an DB slot (accessible using \Rmethod{DB()}) that contains the RSQLite connection object.
 	There are several accessor, setter, and plotting methods that allow for global analysis of all features within a \Rmethod{CuffData} class.Subsetting is currently being re-written, however, it is primarily done through the 'gene\_id' field.
-	Available slots for the CuffData class are: 
+	Available slots for the CuffData class are:
 	\begin{itemize}
 		\item DB: RSQLite connection object
 		\item tables: A \Rclass{list} of tables in the SQLite DB that contain the cufflinks data.
@@ -142,7 +142,7 @@ advantage of the improvements in modeling, reporting, and visualization that hav
 \subsection{CuffDist Class}
 	The \Rclass{CuffDist} class is an pointer class that contains the results of the various 'distribution tests' performed by cuffdiff.  These include differential promoter usage, differential splicing, and differential CDS usage.  These are independent tests from the differential analysis of gene-, isoform-, TSS-, and CDS-level features and therefore
 	have their own container type to distinguish them as such.  The 'promoters', 'relCDS', and 'splicing' slots of a \Rclass{CuffSet} class are all \Rclass{CuffDist} instances.
-	
+
 	Available slots for the CuffDist class are:
 	\begin{itemize}
 	\item DB: RSQLite connection object
@@ -164,7 +164,7 @@ advantage of the improvements in modeling, reporting, and visualization that hav
         \item genome: A character string indicating which build of the genome
         the associated features are derived from.  (e.g. `hg19',`mm9')
 	\end{itemize}
-	
+
 	A specialized sub-class of \Rclass{CuffFeatureSet} is the \Rclass{CuffGeneSet} class. This subclass adds additional slots to contain all isoforms, TSS, and CDS information for a given set of gene\_ids.  The \Rclass{CuffGeneSet} class is designed to aggregate all relevant
 	information for a set of genes into one object for easy analysis and/or manipulation.
 	The \Rclass{CuffGeneSet} object adds the following slots:
@@ -186,7 +186,7 @@ advantage of the improvements in modeling, reporting, and visualization that hav
 		\item diff: A data frame of differential expression/regulation data for a given feature.
 		\item count: A data frame containing raw and normalized fragment counts, variance, dispersion, and uncertainty for a given feature.
 	\end{itemize}
-	
+
 	A specialized sub-class of \Rclass{CuffFeature} is the \Rclass{CuffGene} class. This subclass adds additional slots to contain all isoform, TSS, and CDS information for a given gene.
 	The \Rclass{CuffGene} object adds the following slots:
 	\begin{itemize}
@@ -242,7 +242,7 @@ In addition, cuffdiff also performs differential splicing, CDS usage, and promot
 
 All of these output files are related to each other through their various tracking\_ids, but parsing through individual files to query for important result information requires both a good deal of patience and a strong grasp of command-line text manipulation. Enter cummeRbund, an R solution to aggregate, organize, and help visualize this multi-layered dataset. \\
 One of the principle benefits of using cummeRbund is that data are stored in a SQLite database.  This allows for out-of-memory analysis of data, quick retrieval, and only a one-time cost to setup the tables. By default, cummeRbund assumes that all output files from cuffdiff are in the current working directory.
-To read these files, populate the 'cuffData.db' database backend, and return the \Rclass{CuffSet} pointer object, you can do the following. 
+To read these files, populate the 'cuffData.db' database backend, and return the \Rclass{CuffSet} pointer object, you can do the following.
 
 <<loadLib>>=
 library(cummeRbund)
@@ -264,9 +264,9 @@ cuff
 
 Again, by default $dir$ is assumed to be the current working directory and \code{cuff<-readCufflinks()} should work if all appropriate files are in the current working directory. We now also
 recommend that you use both the \Rfunarg{genome} and \Rfunarg{gtfFile} arguments to readCufflinks(). This will allow cummeRbund to archive the transcript structure information located in the .gtf file associated with
-your particular cuffdiff run, as well as associate these transcripts with an appropriate genome build (e.g. 'hg19', 'mm9', etc) so as to allow for transcript-level visualizations and future integration with other external resources. 
+your particular cuffdiff run, as well as associate these transcripts with an appropriate genome build (e.g. 'hg19', 'mm9', etc) so as to allow for transcript-level visualizations and future integration with other external resources.
 Should you need to rebuild the SQLite backend for any reason, you can add the option \Rfunarg{rebuild=T} to \Rmethod{readCufflinks}.  Once the database is created, \Rmethod{readCufflinks} will default to using the SQL backend and should not need to rebuild this database.
-Each R session should begin with a call to \Rmethod{readCufflinks} so as to initialize the database connection and create an object with the appropriate RSQLite connection information. 
+Each R session should begin with a call to \Rmethod{readCufflinks} so as to initialize the database connection and create an object with the appropriate RSQLite connection information.
 
 \subsection{Adding additional feature annotation}
 Gene- or feature-level annotation can be permanently added to the database tables for future querying. If you have a data.frame where the first column contains the 'tracking\_id' (eg. 'gene\_id' for genes, 'isoform\_id' for isoforms, etc). You can easily add feature level annotation using the \Rfunction{addFeatures()} function:
@@ -294,10 +294,10 @@ disp
 <<label=global_dispersion_plot,fig=TRUE,echo=FALSE,include=FALSE>>=
 	print(disp)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_dispersion_plot}
 	}
-	
+
 	\end{center}
 \end{figure}
 
@@ -305,11 +305,11 @@ Alternatively a call to \code{dispersionPlot(cuff)} directly will allow you to
 visualize the full model fit.
 
 The squared coefficient of variation is a normalized measure of cross-replicate
-variability that can be useful for evaluating the quality your RNA-seq data. 
+variability that can be useful for evaluating the quality your RNA-seq data.
 Differences in $CV^2$ can result in lower numbers of differentially expressed
 genes due to a higher degree of variability between replicate fpkm estimates.
 
-<<SCV_visualization,evaluate=FALSE>>= 
+<<SCV_visualization,evaluate=FALSE>>=
 genes.scv<-fpkmSCVPlot(genes(cuff))
 isoforms.scv<-fpkmSCVPlot(isoforms(cuff))
 @
@@ -319,11 +319,11 @@ isoforms.scv<-fpkmSCVPlot(isoforms(cuff))
 	\subfloat[The squared coefficient of variation allows visualization of
 	cross-replicate variability between conditions and can be a useful metric in
 	determining data quality at the gene level (left) or isoform level (right).
-	Here we demonstrate the variability of each individual ENCODE project 
+	Here we demonstrate the variability of each individual ENCODE project
     RNA-seq conditions.]{
 	\includegraphics[width=0.85\textwidth]{ENCODE_SCV}
 	}
-	
+
 	\end{center}
 \end{figure}
 
@@ -339,21 +339,21 @@ densRep
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Density plot of individual conditions.]{
-	
+
 <<label=global_plots_dens,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_1>>
 	print(dens)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_dens}
 	}
 	\qquad
 	\subfloat[Density plot with replicates=TRUE exposes individual replicate FPKM distributions.]{
-	
+
 <<label=global_plots_dens_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 	print(densRep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_dens_rep}}
 	\end{center}
 \end{figure}
@@ -370,21 +370,21 @@ brep
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Box plot of FPKM distributions for individual conditions.]{
-	
+
 <<label=global_plots_box,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_2>>
 	print(b)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_box}
 	}
 	\qquad
 	\subfloat[Box plot with replicates=TRUE exposes individual replicate FPKM distributions.]{
-	
+
 <<label=global_plots_box_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 	print(brep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_box_rep}}
 	\end{center}
 \end{figure}
@@ -401,12 +401,12 @@ s<-csScatterMatrix(genes(cuff))
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Scatterplots can be useful to identify global changes and trends in gene expression between pairs of conditions.]{
-	
+
 <<label=global_plots_scatter_1,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_3.1>>
 	print(s)
 @
-	
+
 	\includegraphics[width=0.65\textwidth]{cummeRbund-manual-global_plots_scatter_1}}
 	\end{center}
 \end{figure}
@@ -423,12 +423,12 @@ s
 	\begin{center}
 	\subfloat[Pairwise scatterplots can identify biases in gene expression between
 	two particular conditions.]{
-	
+
 <<label=global_plots_scatter_2,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_3.2>>
 	print(s)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_scatter_2}}
 	\end{center}
 \end{figure}
@@ -442,21 +442,21 @@ dend.rep<-csDendro(genes(cuff),replicates=T)
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Dendrogram of JS distances between conditions.]{
-	
+
 <<label=global_plots_dendro,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_6>>
 	plot(dend)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_dendro}
 	}
 	\qquad
 	\subfloat[Dendrogram with replicates=TRUE can identify outlier replicates.]{
-	
+
 <<label=global_plots_dendro_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 	plot(dend.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_dendro_rep}}
 	\end{center}
 \end{figure}
@@ -473,26 +473,26 @@ mCount
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[MA plots can identify biases across ranges of intensity and fold-change.]{
-	
+
 <<label=global_plots_MA,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_4>>
 	print(m)
 @
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_MA}}
-	
+
 	\qquad
 	\subfloat[MA plot drawn on normalized count values instead of FPKM.]{
-	
+
 <<label=global_plots_MA_count,fig=TRUE,echo=FALSE,include=FALSE>>=
 	print(mCount)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_MA_count}}
 	\end{center}
 \end{figure}
 
 %Volcano plots
-Volcano plots are also available for the \Rclass{CuffData} objects. 
+Volcano plots are also available for the \Rclass{CuffData} objects.
 
 <<global_plots_5_1,include=FALSE>>=
 v<-csVolcanoMatrix(genes(cuff))
@@ -501,7 +501,7 @@ v
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[Volcano plots explore the relationship between fold-change and significance.]{
-		
+
 <<label=global_plots_volcano_1,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_5_1>>
 print(v)
@@ -519,7 +519,7 @@ v
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[Volcano plots explore the relationship between fold-change and significance.]{
-		
+
 <<label=global_plots_volcano_2,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_5_2>>
 print(v)
@@ -595,7 +595,7 @@ head(gene.count.matrix)
 @
 
 \subsection{Writing your own SQL accessors}
-Since the cuffData.db is a SQLite database backend, if you are familiar with SQL and/or RSQLite query construction, you can simply design your own SQL queries to access the data that you are after. 
+Since the cuffData.db is a SQLite database backend, if you are familiar with SQL and/or RSQLite query construction, you can simply design your own SQL queries to access the data that you are after.
 
 \begin{figure}[h]
 \centering
@@ -604,7 +604,7 @@ Since the cuffData.db is a SQLite database backend, if you are familiar with SQL
 
 \end{figure}
 
-\clearpage 
+\clearpage
 
 \section{Creating Gene Sets}
 Gene Sets (stored in a \Rclass{CuffGeneSet} object) can be created using the \Rmethod{getGenes} method on a CuffSet object.
@@ -661,16 +661,16 @@ h.rep
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Heatmaps provide a convenient way to visualize the expression of entire gene sets at once.]{
-	
+
 <<label=geneset_plots_heatmap,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<geneset_plots_1>>
 print(h)
 @
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-geneset_plots_heatmap}}
-	
+
 	\qquad
 	\subfloat[Same heatmap, with replicates=T can help to visualize variance between replicates.]{
-	
+
 <<label=geneset_plots_heatmap_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 print(h.rep)
 @
@@ -686,7 +686,7 @@ b
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[A (somewhat crowded) barplot for all genes in a CuffGeneSet object.]{
-		
+
 <<label=geneset_plots_barplot,fig=TRUE,echo=FALSE,include=FALSE,width=8,height=4>>=
 <<geneset_plots_1.5>>
 print(b)
@@ -704,7 +704,7 @@ s
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[Scatterplot showing relationship between two conditions for genes in a CuffGeneSet.]{
-		
+
 <<label=geneset_plots_scatter,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<geneset_plots_2>>
 print(s)
@@ -723,7 +723,7 @@ v
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[Fold-change vs significance for genes in a CuffGeneSet object.]{
-		
+
 <<label=geneset_plots_volcano,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<geneset_plots_3>>
 print(v)
@@ -744,7 +744,7 @@ th
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[A heatmap of isoform-level FPKM values for all genes in a CuffGeneSet object.]{
-		
+
 <<label=geneset_plots_isoform_heatmap,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<geneset_plots_4>>
 print(ih)
@@ -752,12 +752,12 @@ print(ih)
 		\includegraphics[width=0.4\textwidth]{cummeRbund-manual-geneset_plots_isoform_heatmap}}
 		\qquad
 		\subfloat[A heatmap of TSS-level FPKM values for all genes in a CuffGeneSet object.]{
-		
+
 <<label=geneset_plots_TSS_heatmap,fig=TRUE,echo=FALSE,include=FALSE>>=
 print(th)
 @
 		\includegraphics[width=0.4\textwidth]{cummeRbund-manual-geneset_plots_TSS_heatmap}}
-	
+
 	\end{center}
 \end{figure}
 
@@ -769,13 +769,13 @@ den<-csDendro(myGenes)
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[A dendrogram of the relationship between conditions based on the expression of genes in a CuffGeneSet.]{
-		
+
 <<label=geneset_plots_dendro,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<geneset_plots_5>>
 plot(den)
 @
 		\includegraphics[width=0.4\textwidth]{cummeRbund-manual-geneset_plots_dendro}}
-	
+
 	\end{center}
 \end{figure}
 
@@ -811,40 +811,40 @@ gl.cds.rep
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Expression plot of a single gene.]{
-	
+
 <<label=gene_plots_line,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<gene_plots_1>>
 	print(gl)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_line}
 	}
 	\qquad
 	\subfloat[Expression plot of a single gene with replicate FPKMs exposed.]{
-	
+
 <<label=gene_plots_replicate_line,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(gl.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_replicate_line}}
 	\qquad
 	\subfloat[Expression plot of all isoforms of a single gene with replicate FPKMs exposed.]{
-	
+
 <<label=gene_plots_iso_replicate_line,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(gl.iso.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_iso_replicate_line}}
 	\qquad
 	\subfloat[Expression plot of all CDS for a single gene with replicate FPKMs exposed.]{
-	
+
 <<label=gene_plots_cds_replicate_line,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(gl.cds.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_cds_replicate_line}}
 	\end{center}
 \end{figure}
@@ -860,22 +860,22 @@ gb.rep
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Expression Barplot of a single gene.]{
-	
+
 <<label=gene_plots_bar,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<gene_plots_2>>
 print(gb)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_bar}
 	}
 	\qquad
 	\subfloat[Expression Barplot of a single gene with replicate FPKMs exposed.]{
-	
+
 
 <<label=gene_plots_bar_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 print(gb.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_bar_rep}}
 	\end{center}
 \end{figure}
@@ -889,7 +889,7 @@ igb
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Expression Barplot of all isoforms single gene with replicates exposed.]{
-	
+
 <<label=gene_plots_bar_isoforms,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<gene_plots_3>>
 print(igb)
@@ -908,14 +908,14 @@ gp
 	\begin{center}
 	\subfloat[Pie charts showing relative proportion of individual isoforms for a
 	single gene across conditions.]{
-	
+
 <<label=gene_plots_pie,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<gene_plots_4>>
 print(gp)
 @
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_pie}
 	}
-	
+
 	\end{center}
 \end{figure}
 
@@ -945,7 +945,7 @@ to add additional tracks from an external data source.
 It is still provided with cummeRbund, but is no longer supported.}
 
 <<features_3,fig=TRUE>>=
-trackList<-list() 
+trackList<-list()
 myStart<-min(features(myGene)$start)
 myEnd<-max(features(myGene)$end)
 myChr<-unique(features(myGene)$seqnames)
@@ -956,17 +956,17 @@ trackList<-c(trackList,ideoTrack)
 
 axtrack<-GenomeAxisTrack()
 trackList<-c(trackList,axtrack)
- 
+
 genetrack<-makeGeneRegionTrack(myGene)
 genetrack
- 
+
 trackList<-c(trackList,genetrack)
- 
+
 biomTrack<-BiomartGeneRegionTrack(genome=genome,chromosome=as.character(myChr),
 		start=myStart,end=myEnd,name="ENSEMBL",showId=T)
- 
+
 trackList<-c(trackList,biomTrack)
- 
+
 conservation <- UcscTrack(genome = genome, chromosome = myChr,
 		track = "Conservation", table = "phyloP100wayAll",
 		from = myStart-2000, to = myEnd+2000, trackType = "DataTrack",
@@ -976,7 +976,7 @@ conservation <- UcscTrack(genome = genome, chromosome = myChr,
 		name = "Conservation")
 
 trackList<-c(trackList,conservation)
- 
+
 plotTracks(trackList,from=myStart-2000,to=myEnd+2000)
 
 @
@@ -998,21 +998,21 @@ mySigMat<-sigMatrix(cuff,level='genes',alpha=0.05)
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Significant features overview matrix. This plot describes the number of significant genes at a 5\%FDR for each pairwise interaction tested.]{
-	
+
 <<label=sig_mat_plot_1,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<sig_mat_1>>
 print(mySigMat)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-sig_mat_plot_1}}
-	
+
 	\end{center}
 \end{figure}
 
 \subsection{Creating gene sets from significantly regulated genes}
-One of the primary roles of a differential expression analysis is to conduct significance tests on each feature (genes, isoforms, TSS, and CDS) for appropriate pairwise comparisons of conditions. The results of these tests (after multiple testing correction of course) can be used to determine which genes are differentially regulated. 
+One of the primary roles of a differential expression analysis is to conduct significance tests on each feature (genes, isoforms, TSS, and CDS) for appropriate pairwise comparisons of conditions. The results of these tests (after multiple testing correction of course) can be used to determine which genes are differentially regulated.
 \Rpackage{cummeRbund} makes accessing the results of these significance tests simple via \Rmethod{getSig()}. This function takes a CuffSet object and will scan at various feature levels ('genes' by default) to produce a \Rclass{vector} of feature IDs.
 By default \Rmethod{getSig()} outputs a vector of tracking IDs corresponding to all \emph{genes} that reject the null hypothesis in any condition tested. The default feature type can be changed by adjusting the 'level' argument to \Rmethod{getSig()}. In addition, a alpha value can be provided on which to filter the resulting list
-(the default is $0.05$ to match the default of cuffdiff). 
+(the default is $0.05$ to match the default of cuffdiff).
 
 <<get_sig_1>>=
 mySigGeneIds<-getSig(cuff,alpha=0.05,level='genes')
@@ -1058,13 +1058,13 @@ myDistHeat<-csDistHeat(genes(cuff))
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[JS distance heatmap between conditions across all gene features.]{
-	
+
 <<label=dist_heat_plot_1,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<dist_heat_1>>
 print(myDistHeat)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-dist_heat_plot_1}}
-	
+
 	\end{center}
 \end{figure}
 
@@ -1080,18 +1080,18 @@ myRepDistHeat<-csDistHeat(genes(cuff),replicates=T)
 	\begin{center}
 	\subfloat[JS distance heatmap between replicate samples across all gene
 	features.]{
-	
+
 <<label=dist_heat_plot_2,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<dist_heat_2>>
 print(myRepDistHeat)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-dist_heat_plot_2}}
-	
+
 	\end{center}
 \end{figure}
 
 This method can be used to explore similarities between conditions for all
-features, or just those features contained within a \Rclass{CuffGeneSet} class. 
+features, or just those features contained within a \Rclass{CuffGeneSet} class.
 Additionally, the \Rfunarg{samples.not.genes=F} argument will display distances
 between individual genes or features across conditions.
 
@@ -1115,40 +1115,40 @@ genes.MDS.rep<-MDSplot(genes(cuff),replicates=T)
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[PCA plot for gene-level features]{
-	
+
 <<label=gene_PCA,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<dim_reduction_1>>
 	print(genes.PCA)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_PCA}
 	}
 	\qquad
 	\subfloat[MDS plot for gene-level features]{
-	
+
 <<label=gene_MDS,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(genes.MDS)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_MDS}}
 	\qquad
 	\subfloat[Individual replicate level PCA plot for gene-level features]{
-	
+
 <<label=gene_PCA_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(genes.PCA.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_PCA_rep}}
 	\qquad
 	\subfloat[Individual replicate level MDS plot for gene-level features]{
-	
+
 <<label=gene_MDS_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(genes.MDS.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_MDS_rep}}
 	\end{center}
 \end{figure}
@@ -1160,9 +1160,9 @@ function \Rfunction{nnmf} for non-negative matrix factorization. You can use the
 \clearpage
 
 \subsection{Partitioning}
-K-means clustering is a useful tool that can be helpful in identifying clusters of genes with similar expression profiles. In fact, these profiles are learned from the data during the clustering. 
+K-means clustering is a useful tool that can be helpful in identifying clusters of genes with similar expression profiles. In fact, these profiles are learned from the data during the clustering.
 \Rmethod{csCluster()} uses the \Rmethod{pam()} method from the \Rpackage{clustering} package to perform the partitioning around medoids. In this case however, the distance metric used by default is the
-Jensen-Shannon distance instead of the default Euclidean distance. Prior to performing this particular partitioning, the user must choose the number of clusters (K) into which the expression profiles should be divided. 
+Jensen-Shannon distance instead of the default Euclidean distance. Prior to performing this particular partitioning, the user must choose the number of clusters (K) into which the expression profiles should be divided.
 
 <<geneset_cluster_1,include=FALSE>>=
 ic<-csCluster(myGenes,k=4)
@@ -1177,26 +1177,26 @@ in the \Rmethod{csClusterPlot()} method.
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[PAM clustering with JS distance for a CuffGeneSet.]{
-	
+
 <<label=geneset_plots_cluster,fig=TRUE,echo=FALSE,include=FALSE>>=
 print(icp)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-geneset_plots_cluster}
 	}
-	
+
 	\end{center}
 \end{figure}
 
 \clearpage
 
 \subsection{Specificity}
-In some cases, a researcher may be interested in identifying features that are 'condition-specific'. Or, more likely, producing an ordered list of genes based on their specificity for a given condition. 
+In some cases, a researcher may be interested in identifying features that are 'condition-specific'. Or, more likely, producing an ordered list of genes based on their specificity for a given condition.
 We define a specificity score (S) as the following:
 \begin{equation}
 S_{g,i}=1-JSD(p_g,\hat{q_i})
 \end{equation}
 
-Where $JSD$ is the Jensen-Shannon distance, $p_g$ is the expression profile of a given gene $g$ expressed as a density (probability) of $log_{10} FPKM+1$, and $\hat{q_i}$ is the unit vector of 'perfect expression' in a particular condition $i$. 
+Where $JSD$ is the Jensen-Shannon distance, $p_g$ is the expression profile of a given gene $g$ expressed as a density (probability) of $log_{10} FPKM+1$, and $\hat{q_i}$ is the unit vector of 'perfect expression' in a particular condition $i$.
 
 We have created a method, \Rmethod{csSpecificity()} that outputs a matrix (with identical shape to that produced by \Rmethod{fpkmMatrix()}) of specificity scores (S) across all conditions for all features in a \Rclass{CuffFeatureSet} or \Rclass{CuffGeneSet}.
 <<label=specificity_1>>=
@@ -1220,17 +1220,17 @@ mySimilar.expression<-expressionPlot(mySimilar,logMode=T,showErrorbars=F)
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Top 20 most similar genes to 'PINK1'.]{
-	
+
 <<label=similar_plots_1,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<similar_1>>
 print(mySimilar.expression)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-similar_plots_1}}
-	
+
 	\end{center}
 \end{figure}
 
-By default, findSimilar will return a CuffGeneSet of similar genes matching your criteria. 
+By default, findSimilar will return a CuffGeneSet of similar genes matching your criteria.
 Recently a few additional features have been added as well to enhance this type of exploration:
 
 \begin{itemize}
@@ -1250,18 +1250,18 @@ mySimilar2.expression<-expressionPlot(mySimilar2,logMode=T,showErrorbars=F)
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Top 10 genes most similar genes to a provided expression profile.]{
-	
+
 <<label=similar_plots_2,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<similar_2>>
 print(mySimilar2.expression)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-similar_plots_2}}
-	
+
 	\end{center}
 \end{figure}
 
 
-\Rmethod{findSimilar()} also uses the Jensen-Shannon distance between the probability distributions of each gene across conditions to determine the similarity.  
+\Rmethod{findSimilar()} also uses the Jensen-Shannon distance between the probability distributions of each gene across conditions to determine the similarity.
 We have found this to be a more robust way to determine distance between genes using the high dynamic range of FPKM data. Future versions may allow for other dissimilarity measures to be used instead.
 
 \clearpage
@@ -1291,7 +1291,7 @@ We have found this to be a more robust way to determine distance between genes u
 
 \section{Session info}
 <<label=close_connection,echo=FALSE>>=
-end<-sqliteCloseConnection(cuff at DB)
+end<-dbDisconnect(cuff at DB)
 @
 
 <<session>>=
diff --git a/inst/doc/cummeRbund-manual.pdf b/inst/doc/cummeRbund-manual.pdf
index 4c01705..72154c9 100644
Binary files a/inst/doc/cummeRbund-manual.pdf and b/inst/doc/cummeRbund-manual.pdf differ
diff --git a/inst/extdata/.RData b/inst/extdata/.RData
deleted file mode 100644
index 1912aeb..0000000
Binary files a/inst/extdata/.RData and /dev/null differ
diff --git a/inst/reports/runReport.Rnw b/inst/reports/runReport.Rnw
deleted file mode 100644
index 6f2cb38..0000000
--- a/inst/reports/runReport.Rnw
+++ /dev/null
@@ -1,86 +0,0 @@
-%
-%
-%
-%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%\SweaveOpts{prefix.string=graphics/plot}
-\documentclass[10pt]{article}
-\usepackage{amsmath}
-\usepackage[authoryear,round]{natbib}
-\usepackage{hyperref}
-\hypersetup{
-    colorlinks,
-    citecolor=black,
-    filecolor=black,
-    linkcolor=red,
-    urlcolor=black
-}
-\usepackage{theorem}
-\usepackage{float}
-\usepackage{ifthen}
-\usepackage[OT1]{fontenc}
-
-\newcommand{\R}{{\textsf{R}}}
-\newcommand{\code}[1]{{\texttt{#1}}}
-\newcommand{\term}[1]{{\emph{#1}}}
-\newcommand{\Rpackage}[1]{\textsf{#1}}
-\newcommand{\Rfunction}[1]{\texttt{#1}}
-\newcommand{\Robject}[1]{\texttt{#1}}
-\newcommand{\Rclass}[1]{{\textit{#1}}}
-\newcommand{\Rmethod}[1]{{\textit{#1}}}
-\newcommand{\Rfunarg}[1]{{\textit{#1}}}
-
-\bibliographystyle{plainnat}
-\title{cummeRbund Run Report}
-
-\date{}
-
-\begin{document}
-<<init, echo=FALSE>>=
-options(width=65)
-library(cummeRbund)
-library(xtable)
-cuff<-readCufflinks(dir="../",dbfile="cuffData.db")
-@ 
-
-\maketitle
-
-\tableofcontents
-
-\section{Run Parameters}
-<<>>=
-runInfo(cuff)
-@
-\section{Samples}
-
-\subsection{Condition Information}
-
-\subsection{Replicate Information}
-<<results=tex>>=
-xtable(replicates(cuff),caption="Replicate information")
-@
-
-\section{Gene information}
-
-\section{Model statistics}
-
-
-\section{Differential Expression Analysis}
-
-\section{Condition Specificity}
-
-%Put \subsection loop here for specificity on each condition including top genes?
-
-
-\clearpage
-
-\section{Session info}
-<<label=close_connection,echo=FALSE>>=
-end<-sqliteCloseConnection(cuff at DB)
-@
-
-<<session>>=
-sessionInfo()
-@
-
-\end{document}
diff --git a/vignettes/cummeRbund-manual.Rnw b/vignettes/cummeRbund-manual.Rnw
index 0b146c8..7ce98d0 100644
--- a/vignettes/cummeRbund-manual.Rnw
+++ b/vignettes/cummeRbund-manual.Rnw
@@ -67,7 +67,7 @@
 \begin{document}
 <<init, echo=FALSE>>=
 options(width=65)
-@ 
+@
 
 \maketitle
 \tableofcontents
@@ -78,7 +78,7 @@ that you update your cufflinks installation to version $\ge$2.0 to take full
 advantage of the improvements in modeling, reporting, and visualization that have been incorporated.
 \begin{itemize}
 	\item Cufflinks $\ge$ v2.0.1
-	\item SQLite 
+	\item SQLite
 	\item R $\ge$ v3.0
 	\item Packages:
 	\begin{itemize}
@@ -96,24 +96,24 @@ advantage of the improvements in modeling, reporting, and visualization that hav
 		\end{itemize}
 	\end{itemize}
 \end{itemize}
-	
+
 \clearpage
 
 \section{Introduction}
 	\Rpackage{cummeRbund} is a visualization package for Cufflinks high-throughput sequencing data. It is designed to help you navigate through the large amount of data produced from a Cuffdiff RNA-Seq differential expression
 	analysis. The results of this analysis are typically a large number of inter-related files that are not terribly intuitive to navigate through. cummeRbund helps promote rapid analysis of RNA-Seq data by aggregating, indexing,
 	and allowing you easily visualize and create publication-ready figures of your RNA-Seq data while maintaining appropriate relationships between connected data points.
-	CummeRbund is a multifaceted suite for streamlined analysis and visualization of massively parallel RNA differential expression data sequencing data. 
-	
+	CummeRbund is a multifaceted suite for streamlined analysis and visualization of massively parallel RNA differential expression data sequencing data.
+
 	CummeRbund begins by re-organizing output files of a cuffdiff analysis, and storing these data in a local SQLite database. CummeRbund indexes the data to speed up access to specific feature data (genes, isoforms, TSS, CDS, etc.),
 	and preserves the various relationships between these features. Access to data elements is managed via the RSQLite package and data are presented in appropriately structured R classes with various convenience functions designed
 	to streamline your workflow. This persistent database storage means that inter-connected expression values are rapidly accessible and quickly searchable in future analyses.
-	
+
 	CummeRbund defines two types of data classes, 'pointer' or reference classes describe SQL connections to the database without directly containing data, and 'data' classes that retrieve a subset of related data points such as associated
-	features from a given gene or gene set. Each class type has methods for direct access to FPKM vales, differential expression information, statistical test results, raw and normalized fragment counts, individual replicate FPKM values, and additional annotation information for features. Output formats allow 
-	for browsing and analysis of data in standard R objects (data.frame, list, etc). CummeRbund was designed to provide analysis and visualization tools analogous to microarray data. In this regard, numerous plotting methods are provided for visualization 
+	features from a given gene or gene set. Each class type has methods for direct access to FPKM vales, differential expression information, statistical test results, raw and normalized fragment counts, individual replicate FPKM values, and additional annotation information for features. Output formats allow
+	for browsing and analysis of data in standard R objects (data.frame, list, etc). CummeRbund was designed to provide analysis and visualization tools analogous to microarray data. In this regard, numerous plotting methods are provided for visualization
 	of RNA-Seq data quality and global statistics, and simple routines for plotting expression levels for one or thousands of genes, their isoforms, TSS groups, or CDS groups.
-	  
+
 	The base class, \Rclass{cuffSet} is a 'pointer' to cuffdiff data that are stored out-of-memory in a sqlite database.
 
 \clearpage
@@ -129,7 +129,7 @@ advantage of the improvements in modeling, reporting, and visualization that hav
 \subsection{CuffData Class}
 	The \Rclass{CuffData} class is also a pointer class to the SQL backend, but each instance is specific for a data subtype (genes, isoforms, TSS, CDS). Again, there is an DB slot (accessible using \Rmethod{DB()}) that contains the RSQLite connection object.
 	There are several accessor, setter, and plotting methods that allow for global analysis of all features within a \Rmethod{CuffData} class.Subsetting is currently being re-written, however, it is primarily done through the 'gene\_id' field.
-	Available slots for the CuffData class are: 
+	Available slots for the CuffData class are:
 	\begin{itemize}
 		\item DB: RSQLite connection object
 		\item tables: A \Rclass{list} of tables in the SQLite DB that contain the cufflinks data.
@@ -142,7 +142,7 @@ advantage of the improvements in modeling, reporting, and visualization that hav
 \subsection{CuffDist Class}
 	The \Rclass{CuffDist} class is an pointer class that contains the results of the various 'distribution tests' performed by cuffdiff.  These include differential promoter usage, differential splicing, and differential CDS usage.  These are independent tests from the differential analysis of gene-, isoform-, TSS-, and CDS-level features and therefore
 	have their own container type to distinguish them as such.  The 'promoters', 'relCDS', and 'splicing' slots of a \Rclass{CuffSet} class are all \Rclass{CuffDist} instances.
-	
+
 	Available slots for the CuffDist class are:
 	\begin{itemize}
 	\item DB: RSQLite connection object
@@ -164,7 +164,7 @@ advantage of the improvements in modeling, reporting, and visualization that hav
         \item genome: A character string indicating which build of the genome
         the associated features are derived from.  (e.g. `hg19',`mm9')
 	\end{itemize}
-	
+
 	A specialized sub-class of \Rclass{CuffFeatureSet} is the \Rclass{CuffGeneSet} class. This subclass adds additional slots to contain all isoforms, TSS, and CDS information for a given set of gene\_ids.  The \Rclass{CuffGeneSet} class is designed to aggregate all relevant
 	information for a set of genes into one object for easy analysis and/or manipulation.
 	The \Rclass{CuffGeneSet} object adds the following slots:
@@ -186,7 +186,7 @@ advantage of the improvements in modeling, reporting, and visualization that hav
 		\item diff: A data frame of differential expression/regulation data for a given feature.
 		\item count: A data frame containing raw and normalized fragment counts, variance, dispersion, and uncertainty for a given feature.
 	\end{itemize}
-	
+
 	A specialized sub-class of \Rclass{CuffFeature} is the \Rclass{CuffGene} class. This subclass adds additional slots to contain all isoform, TSS, and CDS information for a given gene.
 	The \Rclass{CuffGene} object adds the following slots:
 	\begin{itemize}
@@ -242,7 +242,7 @@ In addition, cuffdiff also performs differential splicing, CDS usage, and promot
 
 All of these output files are related to each other through their various tracking\_ids, but parsing through individual files to query for important result information requires both a good deal of patience and a strong grasp of command-line text manipulation. Enter cummeRbund, an R solution to aggregate, organize, and help visualize this multi-layered dataset. \\
 One of the principle benefits of using cummeRbund is that data are stored in a SQLite database.  This allows for out-of-memory analysis of data, quick retrieval, and only a one-time cost to setup the tables. By default, cummeRbund assumes that all output files from cuffdiff are in the current working directory.
-To read these files, populate the 'cuffData.db' database backend, and return the \Rclass{CuffSet} pointer object, you can do the following. 
+To read these files, populate the 'cuffData.db' database backend, and return the \Rclass{CuffSet} pointer object, you can do the following.
 
 <<loadLib>>=
 library(cummeRbund)
@@ -264,9 +264,9 @@ cuff
 
 Again, by default $dir$ is assumed to be the current working directory and \code{cuff<-readCufflinks()} should work if all appropriate files are in the current working directory. We now also
 recommend that you use both the \Rfunarg{genome} and \Rfunarg{gtfFile} arguments to readCufflinks(). This will allow cummeRbund to archive the transcript structure information located in the .gtf file associated with
-your particular cuffdiff run, as well as associate these transcripts with an appropriate genome build (e.g. 'hg19', 'mm9', etc) so as to allow for transcript-level visualizations and future integration with other external resources. 
+your particular cuffdiff run, as well as associate these transcripts with an appropriate genome build (e.g. 'hg19', 'mm9', etc) so as to allow for transcript-level visualizations and future integration with other external resources.
 Should you need to rebuild the SQLite backend for any reason, you can add the option \Rfunarg{rebuild=T} to \Rmethod{readCufflinks}.  Once the database is created, \Rmethod{readCufflinks} will default to using the SQL backend and should not need to rebuild this database.
-Each R session should begin with a call to \Rmethod{readCufflinks} so as to initialize the database connection and create an object with the appropriate RSQLite connection information. 
+Each R session should begin with a call to \Rmethod{readCufflinks} so as to initialize the database connection and create an object with the appropriate RSQLite connection information.
 
 \subsection{Adding additional feature annotation}
 Gene- or feature-level annotation can be permanently added to the database tables for future querying. If you have a data.frame where the first column contains the 'tracking\_id' (eg. 'gene\_id' for genes, 'isoform\_id' for isoforms, etc). You can easily add feature level annotation using the \Rfunction{addFeatures()} function:
@@ -294,10 +294,10 @@ disp
 <<label=global_dispersion_plot,fig=TRUE,echo=FALSE,include=FALSE>>=
 	print(disp)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_dispersion_plot}
 	}
-	
+
 	\end{center}
 \end{figure}
 
@@ -305,11 +305,11 @@ Alternatively a call to \code{dispersionPlot(cuff)} directly will allow you to
 visualize the full model fit.
 
 The squared coefficient of variation is a normalized measure of cross-replicate
-variability that can be useful for evaluating the quality your RNA-seq data. 
+variability that can be useful for evaluating the quality your RNA-seq data.
 Differences in $CV^2$ can result in lower numbers of differentially expressed
 genes due to a higher degree of variability between replicate fpkm estimates.
 
-<<SCV_visualization,evaluate=FALSE>>= 
+<<SCV_visualization,evaluate=FALSE>>=
 genes.scv<-fpkmSCVPlot(genes(cuff))
 isoforms.scv<-fpkmSCVPlot(isoforms(cuff))
 @
@@ -319,11 +319,11 @@ isoforms.scv<-fpkmSCVPlot(isoforms(cuff))
 	\subfloat[The squared coefficient of variation allows visualization of
 	cross-replicate variability between conditions and can be a useful metric in
 	determining data quality at the gene level (left) or isoform level (right).
-	Here we demonstrate the variability of each individual ENCODE project 
+	Here we demonstrate the variability of each individual ENCODE project
     RNA-seq conditions.]{
 	\includegraphics[width=0.85\textwidth]{ENCODE_SCV}
 	}
-	
+
 	\end{center}
 \end{figure}
 
@@ -339,21 +339,21 @@ densRep
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Density plot of individual conditions.]{
-	
+
 <<label=global_plots_dens,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_1>>
 	print(dens)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_dens}
 	}
 	\qquad
 	\subfloat[Density plot with replicates=TRUE exposes individual replicate FPKM distributions.]{
-	
+
 <<label=global_plots_dens_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 	print(densRep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_dens_rep}}
 	\end{center}
 \end{figure}
@@ -370,21 +370,21 @@ brep
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Box plot of FPKM distributions for individual conditions.]{
-	
+
 <<label=global_plots_box,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_2>>
 	print(b)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_box}
 	}
 	\qquad
 	\subfloat[Box plot with replicates=TRUE exposes individual replicate FPKM distributions.]{
-	
+
 <<label=global_plots_box_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 	print(brep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_box_rep}}
 	\end{center}
 \end{figure}
@@ -401,12 +401,12 @@ s<-csScatterMatrix(genes(cuff))
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Scatterplots can be useful to identify global changes and trends in gene expression between pairs of conditions.]{
-	
+
 <<label=global_plots_scatter_1,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_3.1>>
 	print(s)
 @
-	
+
 	\includegraphics[width=0.65\textwidth]{cummeRbund-manual-global_plots_scatter_1}}
 	\end{center}
 \end{figure}
@@ -423,12 +423,12 @@ s
 	\begin{center}
 	\subfloat[Pairwise scatterplots can identify biases in gene expression between
 	two particular conditions.]{
-	
+
 <<label=global_plots_scatter_2,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_3.2>>
 	print(s)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_scatter_2}}
 	\end{center}
 \end{figure}
@@ -442,21 +442,21 @@ dend.rep<-csDendro(genes(cuff),replicates=T)
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Dendrogram of JS distances between conditions.]{
-	
+
 <<label=global_plots_dendro,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_6>>
 	plot(dend)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_dendro}
 	}
 	\qquad
 	\subfloat[Dendrogram with replicates=TRUE can identify outlier replicates.]{
-	
+
 <<label=global_plots_dendro_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 	plot(dend.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_dendro_rep}}
 	\end{center}
 \end{figure}
@@ -473,26 +473,26 @@ mCount
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[MA plots can identify biases across ranges of intensity and fold-change.]{
-	
+
 <<label=global_plots_MA,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_4>>
 	print(m)
 @
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_MA}}
-	
+
 	\qquad
 	\subfloat[MA plot drawn on normalized count values instead of FPKM.]{
-	
+
 <<label=global_plots_MA_count,fig=TRUE,echo=FALSE,include=FALSE>>=
 	print(mCount)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-global_plots_MA_count}}
 	\end{center}
 \end{figure}
 
 %Volcano plots
-Volcano plots are also available for the \Rclass{CuffData} objects. 
+Volcano plots are also available for the \Rclass{CuffData} objects.
 
 <<global_plots_5_1,include=FALSE>>=
 v<-csVolcanoMatrix(genes(cuff))
@@ -501,7 +501,7 @@ v
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[Volcano plots explore the relationship between fold-change and significance.]{
-		
+
 <<label=global_plots_volcano_1,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_5_1>>
 print(v)
@@ -519,7 +519,7 @@ v
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[Volcano plots explore the relationship between fold-change and significance.]{
-		
+
 <<label=global_plots_volcano_2,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<global_plots_5_2>>
 print(v)
@@ -595,7 +595,7 @@ head(gene.count.matrix)
 @
 
 \subsection{Writing your own SQL accessors}
-Since the cuffData.db is a SQLite database backend, if you are familiar with SQL and/or RSQLite query construction, you can simply design your own SQL queries to access the data that you are after. 
+Since the cuffData.db is a SQLite database backend, if you are familiar with SQL and/or RSQLite query construction, you can simply design your own SQL queries to access the data that you are after.
 
 \begin{figure}[h]
 \centering
@@ -604,7 +604,7 @@ Since the cuffData.db is a SQLite database backend, if you are familiar with SQL
 
 \end{figure}
 
-\clearpage 
+\clearpage
 
 \section{Creating Gene Sets}
 Gene Sets (stored in a \Rclass{CuffGeneSet} object) can be created using the \Rmethod{getGenes} method on a CuffSet object.
@@ -661,16 +661,16 @@ h.rep
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Heatmaps provide a convenient way to visualize the expression of entire gene sets at once.]{
-	
+
 <<label=geneset_plots_heatmap,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<geneset_plots_1>>
 print(h)
 @
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-geneset_plots_heatmap}}
-	
+
 	\qquad
 	\subfloat[Same heatmap, with replicates=T can help to visualize variance between replicates.]{
-	
+
 <<label=geneset_plots_heatmap_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 print(h.rep)
 @
@@ -686,7 +686,7 @@ b
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[A (somewhat crowded) barplot for all genes in a CuffGeneSet object.]{
-		
+
 <<label=geneset_plots_barplot,fig=TRUE,echo=FALSE,include=FALSE,width=8,height=4>>=
 <<geneset_plots_1.5>>
 print(b)
@@ -704,7 +704,7 @@ s
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[Scatterplot showing relationship between two conditions for genes in a CuffGeneSet.]{
-		
+
 <<label=geneset_plots_scatter,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<geneset_plots_2>>
 print(s)
@@ -723,7 +723,7 @@ v
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[Fold-change vs significance for genes in a CuffGeneSet object.]{
-		
+
 <<label=geneset_plots_volcano,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<geneset_plots_3>>
 print(v)
@@ -744,7 +744,7 @@ th
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[A heatmap of isoform-level FPKM values for all genes in a CuffGeneSet object.]{
-		
+
 <<label=geneset_plots_isoform_heatmap,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<geneset_plots_4>>
 print(ih)
@@ -752,12 +752,12 @@ print(ih)
 		\includegraphics[width=0.4\textwidth]{cummeRbund-manual-geneset_plots_isoform_heatmap}}
 		\qquad
 		\subfloat[A heatmap of TSS-level FPKM values for all genes in a CuffGeneSet object.]{
-		
+
 <<label=geneset_plots_TSS_heatmap,fig=TRUE,echo=FALSE,include=FALSE>>=
 print(th)
 @
 		\includegraphics[width=0.4\textwidth]{cummeRbund-manual-geneset_plots_TSS_heatmap}}
-	
+
 	\end{center}
 \end{figure}
 
@@ -769,13 +769,13 @@ den<-csDendro(myGenes)
 \begin{figure}[htp]
 	\begin{center}
 		\subfloat[A dendrogram of the relationship between conditions based on the expression of genes in a CuffGeneSet.]{
-		
+
 <<label=geneset_plots_dendro,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<geneset_plots_5>>
 plot(den)
 @
 		\includegraphics[width=0.4\textwidth]{cummeRbund-manual-geneset_plots_dendro}}
-	
+
 	\end{center}
 \end{figure}
 
@@ -811,40 +811,40 @@ gl.cds.rep
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Expression plot of a single gene.]{
-	
+
 <<label=gene_plots_line,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<gene_plots_1>>
 	print(gl)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_line}
 	}
 	\qquad
 	\subfloat[Expression plot of a single gene with replicate FPKMs exposed.]{
-	
+
 <<label=gene_plots_replicate_line,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(gl.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_replicate_line}}
 	\qquad
 	\subfloat[Expression plot of all isoforms of a single gene with replicate FPKMs exposed.]{
-	
+
 <<label=gene_plots_iso_replicate_line,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(gl.iso.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_iso_replicate_line}}
 	\qquad
 	\subfloat[Expression plot of all CDS for a single gene with replicate FPKMs exposed.]{
-	
+
 <<label=gene_plots_cds_replicate_line,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(gl.cds.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_cds_replicate_line}}
 	\end{center}
 \end{figure}
@@ -860,22 +860,22 @@ gb.rep
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Expression Barplot of a single gene.]{
-	
+
 <<label=gene_plots_bar,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<gene_plots_2>>
 print(gb)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_bar}
 	}
 	\qquad
 	\subfloat[Expression Barplot of a single gene with replicate FPKMs exposed.]{
-	
+
 
 <<label=gene_plots_bar_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 print(gb.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_bar_rep}}
 	\end{center}
 \end{figure}
@@ -889,7 +889,7 @@ igb
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Expression Barplot of all isoforms single gene with replicates exposed.]{
-	
+
 <<label=gene_plots_bar_isoforms,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<gene_plots_3>>
 print(igb)
@@ -908,14 +908,14 @@ gp
 	\begin{center}
 	\subfloat[Pie charts showing relative proportion of individual isoforms for a
 	single gene across conditions.]{
-	
+
 <<label=gene_plots_pie,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<gene_plots_4>>
 print(gp)
 @
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_plots_pie}
 	}
-	
+
 	\end{center}
 \end{figure}
 
@@ -945,7 +945,7 @@ to add additional tracks from an external data source.
 It is still provided with cummeRbund, but is no longer supported.}
 
 <<features_3,fig=TRUE>>=
-trackList<-list() 
+trackList<-list()
 myStart<-min(features(myGene)$start)
 myEnd<-max(features(myGene)$end)
 myChr<-unique(features(myGene)$seqnames)
@@ -956,17 +956,17 @@ trackList<-c(trackList,ideoTrack)
 
 axtrack<-GenomeAxisTrack()
 trackList<-c(trackList,axtrack)
- 
+
 genetrack<-makeGeneRegionTrack(myGene)
 genetrack
- 
+
 trackList<-c(trackList,genetrack)
- 
+
 biomTrack<-BiomartGeneRegionTrack(genome=genome,chromosome=as.character(myChr),
 		start=myStart,end=myEnd,name="ENSEMBL",showId=T)
- 
+
 trackList<-c(trackList,biomTrack)
- 
+
 conservation <- UcscTrack(genome = genome, chromosome = myChr,
 		track = "Conservation", table = "phyloP100wayAll",
 		from = myStart-2000, to = myEnd+2000, trackType = "DataTrack",
@@ -976,7 +976,7 @@ conservation <- UcscTrack(genome = genome, chromosome = myChr,
 		name = "Conservation")
 
 trackList<-c(trackList,conservation)
- 
+
 plotTracks(trackList,from=myStart-2000,to=myEnd+2000)
 
 @
@@ -998,21 +998,21 @@ mySigMat<-sigMatrix(cuff,level='genes',alpha=0.05)
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Significant features overview matrix. This plot describes the number of significant genes at a 5\%FDR for each pairwise interaction tested.]{
-	
+
 <<label=sig_mat_plot_1,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<sig_mat_1>>
 print(mySigMat)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-sig_mat_plot_1}}
-	
+
 	\end{center}
 \end{figure}
 
 \subsection{Creating gene sets from significantly regulated genes}
-One of the primary roles of a differential expression analysis is to conduct significance tests on each feature (genes, isoforms, TSS, and CDS) for appropriate pairwise comparisons of conditions. The results of these tests (after multiple testing correction of course) can be used to determine which genes are differentially regulated. 
+One of the primary roles of a differential expression analysis is to conduct significance tests on each feature (genes, isoforms, TSS, and CDS) for appropriate pairwise comparisons of conditions. The results of these tests (after multiple testing correction of course) can be used to determine which genes are differentially regulated.
 \Rpackage{cummeRbund} makes accessing the results of these significance tests simple via \Rmethod{getSig()}. This function takes a CuffSet object and will scan at various feature levels ('genes' by default) to produce a \Rclass{vector} of feature IDs.
 By default \Rmethod{getSig()} outputs a vector of tracking IDs corresponding to all \emph{genes} that reject the null hypothesis in any condition tested. The default feature type can be changed by adjusting the 'level' argument to \Rmethod{getSig()}. In addition, a alpha value can be provided on which to filter the resulting list
-(the default is $0.05$ to match the default of cuffdiff). 
+(the default is $0.05$ to match the default of cuffdiff).
 
 <<get_sig_1>>=
 mySigGeneIds<-getSig(cuff,alpha=0.05,level='genes')
@@ -1058,13 +1058,13 @@ myDistHeat<-csDistHeat(genes(cuff))
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[JS distance heatmap between conditions across all gene features.]{
-	
+
 <<label=dist_heat_plot_1,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<dist_heat_1>>
 print(myDistHeat)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-dist_heat_plot_1}}
-	
+
 	\end{center}
 \end{figure}
 
@@ -1080,18 +1080,18 @@ myRepDistHeat<-csDistHeat(genes(cuff),replicates=T)
 	\begin{center}
 	\subfloat[JS distance heatmap between replicate samples across all gene
 	features.]{
-	
+
 <<label=dist_heat_plot_2,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<dist_heat_2>>
 print(myRepDistHeat)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-dist_heat_plot_2}}
-	
+
 	\end{center}
 \end{figure}
 
 This method can be used to explore similarities between conditions for all
-features, or just those features contained within a \Rclass{CuffGeneSet} class. 
+features, or just those features contained within a \Rclass{CuffGeneSet} class.
 Additionally, the \Rfunarg{samples.not.genes=F} argument will display distances
 between individual genes or features across conditions.
 
@@ -1115,40 +1115,40 @@ genes.MDS.rep<-MDSplot(genes(cuff),replicates=T)
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[PCA plot for gene-level features]{
-	
+
 <<label=gene_PCA,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<dim_reduction_1>>
 	print(genes.PCA)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_PCA}
 	}
 	\qquad
 	\subfloat[MDS plot for gene-level features]{
-	
+
 <<label=gene_MDS,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(genes.MDS)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_MDS}}
 	\qquad
 	\subfloat[Individual replicate level PCA plot for gene-level features]{
-	
+
 <<label=gene_PCA_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(genes.PCA.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_PCA_rep}}
 	\qquad
 	\subfloat[Individual replicate level MDS plot for gene-level features]{
-	
+
 <<label=gene_MDS_rep,fig=TRUE,echo=FALSE,include=FALSE>>=
 
 	print(genes.MDS.rep)
 @
-	
+
 	\includegraphics[width=0.4\textwidth]{cummeRbund-manual-gene_MDS_rep}}
 	\end{center}
 \end{figure}
@@ -1160,9 +1160,9 @@ function \Rfunction{nnmf} for non-negative matrix factorization. You can use the
 \clearpage
 
 \subsection{Partitioning}
-K-means clustering is a useful tool that can be helpful in identifying clusters of genes with similar expression profiles. In fact, these profiles are learned from the data during the clustering. 
+K-means clustering is a useful tool that can be helpful in identifying clusters of genes with similar expression profiles. In fact, these profiles are learned from the data during the clustering.
 \Rmethod{csCluster()} uses the \Rmethod{pam()} method from the \Rpackage{clustering} package to perform the partitioning around medoids. In this case however, the distance metric used by default is the
-Jensen-Shannon distance instead of the default Euclidean distance. Prior to performing this particular partitioning, the user must choose the number of clusters (K) into which the expression profiles should be divided. 
+Jensen-Shannon distance instead of the default Euclidean distance. Prior to performing this particular partitioning, the user must choose the number of clusters (K) into which the expression profiles should be divided.
 
 <<geneset_cluster_1,include=FALSE>>=
 ic<-csCluster(myGenes,k=4)
@@ -1177,26 +1177,26 @@ in the \Rmethod{csClusterPlot()} method.
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[PAM clustering with JS distance for a CuffGeneSet.]{
-	
+
 <<label=geneset_plots_cluster,fig=TRUE,echo=FALSE,include=FALSE>>=
 print(icp)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-geneset_plots_cluster}
 	}
-	
+
 	\end{center}
 \end{figure}
 
 \clearpage
 
 \subsection{Specificity}
-In some cases, a researcher may be interested in identifying features that are 'condition-specific'. Or, more likely, producing an ordered list of genes based on their specificity for a given condition. 
+In some cases, a researcher may be interested in identifying features that are 'condition-specific'. Or, more likely, producing an ordered list of genes based on their specificity for a given condition.
 We define a specificity score (S) as the following:
 \begin{equation}
 S_{g,i}=1-JSD(p_g,\hat{q_i})
 \end{equation}
 
-Where $JSD$ is the Jensen-Shannon distance, $p_g$ is the expression profile of a given gene $g$ expressed as a density (probability) of $log_{10} FPKM+1$, and $\hat{q_i}$ is the unit vector of 'perfect expression' in a particular condition $i$. 
+Where $JSD$ is the Jensen-Shannon distance, $p_g$ is the expression profile of a given gene $g$ expressed as a density (probability) of $log_{10} FPKM+1$, and $\hat{q_i}$ is the unit vector of 'perfect expression' in a particular condition $i$.
 
 We have created a method, \Rmethod{csSpecificity()} that outputs a matrix (with identical shape to that produced by \Rmethod{fpkmMatrix()}) of specificity scores (S) across all conditions for all features in a \Rclass{CuffFeatureSet} or \Rclass{CuffGeneSet}.
 <<label=specificity_1>>=
@@ -1220,17 +1220,17 @@ mySimilar.expression<-expressionPlot(mySimilar,logMode=T,showErrorbars=F)
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Top 20 most similar genes to 'PINK1'.]{
-	
+
 <<label=similar_plots_1,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<similar_1>>
 print(mySimilar.expression)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-similar_plots_1}}
-	
+
 	\end{center}
 \end{figure}
 
-By default, findSimilar will return a CuffGeneSet of similar genes matching your criteria. 
+By default, findSimilar will return a CuffGeneSet of similar genes matching your criteria.
 Recently a few additional features have been added as well to enhance this type of exploration:
 
 \begin{itemize}
@@ -1250,18 +1250,18 @@ mySimilar2.expression<-expressionPlot(mySimilar2,logMode=T,showErrorbars=F)
 \begin{figure}[htp]
 	\begin{center}
 	\subfloat[Top 10 genes most similar genes to a provided expression profile.]{
-	
+
 <<label=similar_plots_2,fig=TRUE,echo=FALSE,include=FALSE>>=
 <<similar_2>>
 print(mySimilar2.expression)
 @
 	\includegraphics[width=0.6\textwidth]{cummeRbund-manual-similar_plots_2}}
-	
+
 	\end{center}
 \end{figure}
 
 
-\Rmethod{findSimilar()} also uses the Jensen-Shannon distance between the probability distributions of each gene across conditions to determine the similarity.  
+\Rmethod{findSimilar()} also uses the Jensen-Shannon distance between the probability distributions of each gene across conditions to determine the similarity.
 We have found this to be a more robust way to determine distance between genes using the high dynamic range of FPKM data. Future versions may allow for other dissimilarity measures to be used instead.
 
 \clearpage
@@ -1291,7 +1291,7 @@ We have found this to be a more robust way to determine distance between genes u
 
 \section{Session info}
 <<label=close_connection,echo=FALSE>>=
-end<-sqliteCloseConnection(cuff at DB)
+end<-dbDisconnect(cuff at DB)
 @
 
 <<session>>=

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-cummerbund.git



More information about the debian-med-commit mailing list