[med-svn] [bowtie2] 03/09: New upstream version 2.3.3.1

Alex Mestiashvili malex-guest at moszumanska.debian.org
Fri Oct 13 13:48:17 UTC 2017


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository bowtie2.

commit 7d9948854e45b2b2f6d407cab663892068f589b3
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Thu Oct 12 16:14:52 2017 +0200

    New upstream version 2.3.3.1
---
 MANUAL                       |  2 +-
 MANUAL.markdown              |  4 ++--
 Makefile                     | 42 +++++++++++++++++++++++---------
 NEWS                         |  6 +++++
 VERSION                      |  2 +-
 bowtie2                      |  8 ++++++-
 bt2_search.cpp               | 28 ++++++++++++++++------
 doc/manual.html              |  2 +-
 doc/website/manual.ssi       |  2 +-
 doc/website/old_news.ssi     | 49 +++++++++++++++++++++++++++++++++++++
 doc/website/recent_news.ssi  | 57 +++++++-------------------------------------
 doc/website/rhsidebar.ssi    |  4 ++--
 pat.cpp                      | 36 +++++++++++++---------------
 pat.h                        | 29 ++++++++++++++--------
 read_qseq.cpp                |  6 +++--
 scripts/test/simple_tests.pl | 57 ++++++++++++++++++++++++++++++++++----------
 16 files changed, 213 insertions(+), 121 deletions(-)

diff --git a/MANUAL b/MANUAL
index 5328c74..2360447 100644
--- a/MANUAL
+++ b/MANUAL
@@ -1516,7 +1516,7 @@ by tabs; from left to right, the fields are:
     will truncate the name at the first whitespace character. This is
     similar to the behavior of other tools. The standard behavior of
     truncating at the first whitespace can be suppressed with
-    --sam-noqname-trunc at the expense of generating non-standard SAM.
+    --sam-no-qname-trunc at the expense of generating non-standard SAM.
 
 2.  Sum of all applicable flags. Flags relevant to Bowtie are:
 
diff --git a/MANUAL.markdown b/MANUAL.markdown
index 039118c..8092431 100644
--- a/MANUAL.markdown
+++ b/MANUAL.markdown
@@ -1927,8 +1927,8 @@ left to right, the fields are:
 	If the read name contains any whitespace characters, Bowtie 2 will truncate
 	the name at the first whitespace character.  This is similar to the
 	behavior of other tools. The standard behavior of truncating at the first
-    whitespace can be suppressed with `--sam-noqname-trunc` at the expense of
-    generating non-standard SAM.
+	whitespace can be suppressed with `--sam-no-qname-trunc` at the expense of
+	generating non-standard SAM.
 
 2.  Sum of all applicable flags.  Flags relevant to Bowtie are:
 
diff --git a/Makefile b/Makefile
index 37c97e9..9b4453e 100644
--- a/Makefile
+++ b/Makefile
@@ -24,8 +24,8 @@
 prefix = /usr/local
 bindir = $(prefix)/bin
 
-INC =
-LIBS = -lz
+INC = $(if $(RELEASE_BUILD),-I$(CURDIR)/.include)
+LIBS = $(LDFLAGS) $(if $(RELEASE_BUILD),-L$(CURDIR)/.lib) -lz
 GCC_PREFIX = $(shell dirname `which gcc`)
 GCC_SUFFIX =
 CC ?= $(GCC_PREFIX)/gcc$(GCC_SUFFIX)
@@ -33,11 +33,11 @@ CPP ?= $(GCC_PREFIX)/g++$(GCC_SUFFIX)
 CXX ?= $(CPP)
 HEADERS = $(wildcard *.h)
 BOWTIE_MM = 1
-BOWTIE_SHARED_MEM = 0
+BOWTIE_SHARED_MEM =
 
 # Detect Cygwin or MinGW
-WINDOWS = 0
-MINGW = 0
+WINDOWS =
+MINGW =
 ifneq (,$(findstring MINGW,$(shell uname)))
 	WINDOWS = 1
 	MINGW = 1
@@ -47,7 +47,7 @@ ifneq (,$(findstring MINGW,$(shell uname)))
 	override EXTRA_FLAGS += -ansi
 endif
 
-MACOS = 0
+MACOS =
 ifneq (,$(findstring Darwin,$(shell uname)))
 	MACOS = 1
 	ifneq (,$(findstring 13,$(shell uname -r)))
@@ -55,6 +55,9 @@ ifneq (,$(findstring Darwin,$(shell uname)))
 		CC = clang
 		override EXTRA_FLAGS += -stdlib=libstdc++
 	endif
+	ifeq (1, $(RELEASE_BUILD))
+		EXTRA_FLAGS += -mmacosx-version-min=10.9
+	endif
 endif
 
 POPCNT_CAPABILITY ?= 1
@@ -95,7 +98,7 @@ endif
 
 #default is to use Intel TBB
 ifneq (1,$(NO_TBB))
-	LIBS += $(PTHREAD_LIB) -ltbb -ltbbmalloc_proxy
+	LIBS += $(PTHREAD_LIB) -ltbb -ltbbmalloc$(if $(RELEASE_BUILD),,_proxy)
 	override EXTRA_FLAGS += -DWITH_TBB
 else
 	LIBS += $(PTHREAD_LIB)
@@ -433,9 +436,8 @@ bowtie2-src: $(SRC_PKG_LIST)
 	rm -rf .src.tmp
 
 .PHONY: bowtie2-pkg
-bowtie2-pkg: $(BIN_PKG_LIST) $(BOWTIE2_BIN_LIST) $(BOWTIE2_BIN_LIST_AUX)
-	$(eval HAS_TBB=$(shell strings bowtie2-align-l* | grep tbb))
-	$(eval PKG_DIR=bowtie2-$(VERSION)$(if $(HAS_TBB),,-legacy))
+bowtie2-pkg: static-libs $(BIN_PKG_LIST) $(BOWTIE2_BIN_LIST) $(BOWTIE2_BIN_LIST_AUX)
+	$(eval PKG_DIR=bowtie2-$(VERSION)-$(if $(MACOS),macos,$(if $(MINGW),mingw,linux))-x86_64)
 	chmod a+x scripts/*.sh scripts/*.pl
 	rm -rf .bin.tmp
 	mkdir -p .bin.tmp/$(PKG_DIR)
@@ -494,13 +496,30 @@ random-test: all perl-deps
 .PHONY: perl-deps
 perl-deps:
 	if [ ! -e .perllib.tmp ]; then \
-		DL=$$([ `which wget` ] && echo wget -O- || echo curl -L) ; \
+		DL=$$([ `which wget` ] && echo "wget --no-check-certificate -O-" || echo "curl -L") ; \
 		mkdir .perllib.tmp ; \
 		$$DL http://cpanmin.us | perl - -l $(CURDIR)/.perllib.tmp App::cpanminus local::lib ; \
 		eval `perl -I $(CURDIR)/.perllib.tmp/lib/perl5 -Mlocal::lib=$(CURDIR)/.perllib.tmp` ; \
 		cpanm --force Math::Random Clone Test::Deep Sys::Info ; \
 	fi
 
+static-libs:
+	if [[ ! -d $(CURDIR)/.lib || ! -d $(CURDIR)/.inc ]]; then \
+		mkdir $(CURDIR)/.lib $(CURDIR)/.include ; \
+	fi ; \
+	if [[ `uname` = "Darwin" ]]; then \
+		export CFLAGS=-mmacosx-version-min=10.9 ; \
+		export CXXFLAGS=-mmacosx-version-min=10.9 ; \
+	fi ; \
+	DL=$$([ `which wget` ] && echo "wget --no-check-certificate" || echo "curl -LO") ; \
+	cd /tmp ; \
+	$$DL https://zlib.net/zlib-1.2.11.tar.gz && tar xzf zlib-1.2.11.tar.gz && cd zlib-1.2.11 ; \
+	$(if $(MINGW), mingw32-make -f win32/Makefile.gcc, ./configure --static && make) && cp libz.a $(CURDIR)/.lib && cp zconf.h zlib.h $(CURDIR)/.include ; \
+	cd .. ; \
+	$$DL https://github.com/01org/tbb/archive/2017_U8.tar.gz && tar xzf 2017_U8.tar.gz && cd tbb-2017_U8; \
+	$(if $(MINGW), mingw32-make compiler=gcc arch=ia64 runtime=mingw, make) extra_inc=big_iron.inc -j4 \
+	&& cp -r include/tbb $(CURDIR)/.include && cp build/*_release/*.a $(CURDIR)/.lib
+
 .PHONY: test
 test: simple-test random-test
 
@@ -512,3 +531,4 @@ clean:
 	rm -f core.* .tmp.head
 	rm -rf *.dSYM
 	rm -rf .perllib.tmp
+	rm -rf .include .lib
diff --git a/NEWS b/NEWS
index 4b0cd62..cff70a1 100644
--- a/NEWS
+++ b/NEWS
@@ -19,6 +19,12 @@ Please report any issues to the Bowtie 2 Github page or using the Sourceforge bu
 Version Release History
 =======================
 
+Version 2.3.3.1 - Oct 05, 2017
+    * Fixed an issue causing input files to be skipped when running
+    multi-threaded alignment
+    * Fixed an issue causing the first character of a read name to be
+    dropped while parsing reads split across multiple input files
+
 Version 2.3.3 - Sep 09, 2017
 From this release forward prepackaged bowtie2 binaries are now
 statically linked to the zlib compression library and, the recommended
diff --git a/VERSION b/VERSION
index 0bee604..9d71486 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.3.3
+2.3.3.1
diff --git a/bowtie2 b/bowtie2
index 8476ad3..036d425 100755
--- a/bowtie2
+++ b/bowtie2
@@ -345,7 +345,13 @@ sub Extract_IndexName_From {
             my $idx_basename = $_[$i+1];
             my @idx_filenames = glob($idx_basename . "*.bt2{,l}");
             unless (@idx_filenames) {
-                Fail("\"" . $idx_basename . "\" is not a Bowtie 2 index\n");
+                if (exists $ENV{"BOWTIE2_INDEXES"}) {
+                    @idx_filenames = glob("$ENV{'BOWTIE2_INDEXES'}/$idx_basename" . "bt2{,l}");
+                }
+
+                if (!@idx_filenames) {
+                    Fail("\"" . $idx_basename . "\" does not exist or is not a Bowtie 2 index\n");
+                }
             }
             return $idx_basename;
         }
diff --git a/bt2_search.cpp b/bt2_search.cpp
index 7214f57..1daa996 100644
--- a/bt2_search.cpp
+++ b/bt2_search.cpp
@@ -29,7 +29,11 @@
 #include <limits>
 #include <time.h>
 #include <dirent.h>
+
+#ifndef _WIN32
 #include <signal.h>
+#endif
+
 #include "alphabet.h"
 #include "assert_helpers.h"
 #include "endian_swap.h"
@@ -819,7 +823,7 @@ static void printUsage(ostream& out) {
 	    << "  --rg <text>        add <text> (\"lab:value\") to @RG line of SAM header." << endl
 	    << "                     Note: @RG line only printed when --rg-id is set." << endl
 	    << "  --omit-sec-seq     put '*' in SEQ and QUAL fields for secondary alignments." << endl
-	    << "  --sam-noqname-trunc Suppress standard behavior of truncating readname at first whitespace " << endl
+	    << "  --sam-no-qname-trunc Suppress standard behavior of truncating readname at first whitespace " << endl
 	    << "                      at the expense of generating non-standard SAM." << endl
 	    << "  --xeq              Use '='/'X', instead of 'M,' to specify matches/mismatches in SAM record." << endl
 	    << "  --soft-clipped-unmapped-tlen Exclude soft-clipped bases when reporting TLEN" << endl
@@ -4353,7 +4357,7 @@ static void multiseedSearchWorker_2p5(void *vp) {
 	return;
 }
 
-
+#ifndef _WIN32
 /**
  * Print friendly-ish message pertaining to failed system call.
  */
@@ -4524,7 +4528,7 @@ static void thread_monitor(int pid, int orig_threads, EList<int>& tids, EList<T*
 		}
 	}
 }
-
+#endif
 /**
  * Called once per alignment job.  Sets up global pointers to the
  * shared global data structures, creates per-thread structures, then
@@ -4564,10 +4568,12 @@ static void multiseedSearch(
 	delete _t;
 	if(!refs->loaded()) throw 1;
 	multiseed_refs = refs.get();
-    sigset_t set;
-    sigemptyset(&set);
-    sigaddset(&set, SIGPIPE);
-    pthread_sigmask(SIG_BLOCK, &set, NULL);
+#ifndef _WIN32
+	sigset_t set;
+	sigemptyset(&set);
+	sigaddset(&set, SIGPIPE);
+	pthread_sigmask(SIG_BLOCK, &set, NULL);
+#endif
 	EList<int> tids;
 #ifdef WITH_TBB
 	//tbb::task_group tbb_grp;
@@ -4614,12 +4620,14 @@ static void multiseedSearch(
 	{
 		Timer _t(cerr, "Multiseed full-index search: ", timing);
 
+#ifndef _WIN32
 		int pid = 0;
 		if(thread_stealing) {
 			pid = getpid();
 			write_pid(thread_stealing_dir.c_str(), pid);
 			thread_counter = 0;
 		}
+#endif
 		
 		for(int i = 0; i < nthreads; i++) {
 #ifdef WITH_TBB
@@ -4644,10 +4652,12 @@ static void multiseedSearch(
 #endif
 		}
 
+#ifndef _WIN32
 		if(thread_stealing) {
 			int orig_threads = nthreads;
 			thread_monitor(pid, orig_threads, tids, threads);
 		}
+#endif
 	
 #ifdef WITH_TBB
 		while(all_threads_done < nthreads) {
@@ -4659,9 +4669,11 @@ static void multiseedSearch(
 		}
 #endif
 
+#ifndef _WIN32
 		if(thread_stealing) {
 			del_pid(thread_stealing_dir.c_str(), pid);
 		}
+#endif
 	}
 	if(!metricsPerRead && (metricsOfb != NULL || metricsStderr)) {
 		metrics.reportInterval(metricsOfb, metricsStderr, true, NULL);
@@ -5012,7 +5024,9 @@ int bowtie(int argc, const char **argv) {
 				return 1;
 			}
 			
+#ifndef _WIN32
 			thread_stealing = thread_ceiling > nthreads;
+#endif
 			if(thread_stealing && thread_stealing_dir.empty()) {
 				cerr << "When --thread-ceiling is specified, must also specify --thread-piddir" << endl;
 				printUsage(cerr);
diff --git a/doc/manual.html b/doc/manual.html
index 77d2adb..7c9d65b 100644
--- a/doc/manual.html
+++ b/doc/manual.html
@@ -1058,7 +1058,7 @@ Seed 4 rc:                   TTATGCATGA</code></pre>
 <p>Each subsequent line describes an alignment or, if the read failed to align, a read. Each line is a collection of at least 12 fields separated by tabs; from left to right, the fields are:</p>
 <ol style="list-style-type: decimal">
 <li><p>Name of read that aligned.</p>
-<p>Note that the <a href="http://samtools.sourceforge.net/SAM1.pdf">SAM specification</a> disallows whitespace in the read name. If the read name contains any whitespace characters, Bowtie 2 will truncate the name at the first whitespace character. This is similar to the behavior of other tools. The standard behavior of truncating at the first whitespace can be suppressed with <code>--sam-noqname-trunc</code> at the expense of generating non-standard SAM.</p></li>
+<p>Note that the <a href="http://samtools.sourceforge.net/SAM1.pdf">SAM specification</a> disallows whitespace in the read name. If the read name contains any whitespace characters, Bowtie 2 will truncate the name at the first whitespace character. This is similar to the behavior of other tools. The standard behavior of truncating at the first whitespace can be suppressed with <code>--sam-no-qname-trunc</code> at the expense of generating non-standard SAM.</p></li>
 <li><p>Sum of all applicable flags. Flags relevant to Bowtie are:</p>
 <table>
 <tr>
diff --git a/doc/website/manual.ssi b/doc/website/manual.ssi
index d9c222d..37c745d 100644
--- a/doc/website/manual.ssi
+++ b/doc/website/manual.ssi
@@ -1058,7 +1058,7 @@ Seed 4 rc:                   TTATGCATGA</code></pre>
 <p>Each subsequent line describes an alignment or, if the read failed to align, a read. Each line is a collection of at least 12 fields separated by tabs; from left to right, the fields are:</p>
 <ol style="list-style-type: decimal">
 <li><p>Name of read that aligned.</p>
-<p>Note that the <a href="http://samtools.sourceforge.net/SAM1.pdf">SAM specification</a> disallows whitespace in the read name. If the read name contains any whitespace characters, Bowtie 2 will truncate the name at the first whitespace character. This is similar to the behavior of other tools. The standard behavior of truncating at the first whitespace can be suppressed with <code>--sam-noqname-trunc</code> at the expense of generating non-standard SAM.</p></li>
+<p>Note that the <a href="http://samtools.sourceforge.net/SAM1.pdf">SAM specification</a> disallows whitespace in the read name. If the read name contains any whitespace characters, Bowtie 2 will truncate the name at the first whitespace character. This is similar to the behavior of other tools. The standard behavior of truncating at the first whitespace can be suppressed with <code>--sam-no-qname-trunc</code> at the expense of generating non-standard SAM.</p></li>
 <li><p>Sum of all applicable flags. Flags relevant to Bowtie are:</p>
 <table>
 <tr>
diff --git a/doc/website/old_news.ssi b/doc/website/old_news.ssi
index 7df0a31..791e804 100644
--- a/doc/website/old_news.ssi
+++ b/doc/website/old_news.ssi
@@ -1,3 +1,52 @@
+<h2>Bowtie2 developers note</h2>
+<p>As of Nov 2015 we had to fix the bowtie2 github repo and relabel the entire history. Developers and contributors should re-clone the bowtie2 github repo from this current state. </p>
+<h2>Version 2.2.9 - Apr 22, 2016</h2>
+<ul>
+   <li>Fixed the multiple threads issue for the bowtie2-build.</li>
+   <li>Fixed a TBB related build issue impacting TBB v4.4.</li>
+</ul>
+<h2>Version 2.2.8 - Mar 10, 2016</h2>
+<ul>
+   <li>Various website updates.</li>
+   <li>Fixed the bowtie2-build issue that made TBB compilation fail.</li>
+   <li>Fixed the static build for Win32 platform.</li>
+</ul>
+<h2>Version 2.2.7 - Feb 10, 2016</h2>
+<ul>
+   <li>Added a parallel index build option: bowtie2-build --threads <# threads>.</li>
+   <li>Fixed an issue whereby IUPAC codes (other than A/C/G/T/N) in reads were converted to As. Now all non-A/C/G/T characters in reads become Ns.</li>
+   <li>Fixed some compilation issues, including for the Intel C++ Compiler.</li>
+   <li>Removed debugging code that could impede performance for many alignment threads.</li>
+   <li>Fixed a few typos in documentation.</li>
+</ul>
+<h2>Version 2.2.6 - Jul 22, 2015</h2>
+<ul>
+   <li>Switched to a stable sort to avoid some potential reproducibility confusions.</li>
+   <li>Added <tt>'install'</tt> target for *nix platforms.</li>
+   <li>Added the Intel TBB option which provides in most situations a better performance output. TBB is not present by default in the current build but can be added by compiling the source code with <tt>WITH_TBB=1</tt> option.</li>
+   <li>Fixed a bug that caused seed lenght to be dependent of the <tt><a href="manual.shtml#bowtie2-options-L">-L</a></tt> and <tt><a href="manual.shtml#bowtie2-options-N">-N</a></tt> parameters order.</li>
+   <li>Fixed a bug that caused <tt><a href="manual.shtml#bowtie2-options-local">--local</a></tt> followed by <tt><a href="manual.shtml#bowtie2-options-N">-N</a></tt> to reset seed lenght to 22 which is actually the default value for global.</li>
+   <li>Enable compilation on FreeBDS and clang, although gmake port is still required.</li>
+   <li>Fixed an issue that made bowtie2 compilation process to fail on Snow Leopard.</li>
+</ul>
+
+<h2>Version 2.2.5 - Mar 9, 2015</h2>
+<ul>
+   <li>Fixed some situations where incorrectly we could detect a Mavericks platform.</li>
+   <li>Fixed some manual issues including some HTML bad formating.</li>
+   <li>Make sure the wrapper correctly identifies the platform under OSX.</li>
+   <li>Fixed <tt><a href="manual.shtml#bowtie2-options-rg">--rg</a></tt>/<tt><a href="manual.shtml#bowtie2-options-rg-id">--rg-id</a></tt> options where included spaces were incorrectly treated.</li>
+   <li>Various documentation fixes added by contributors.</li>
+   <li>Fixed the incorrect behavior where parameter file names may contain spaces.</li>
+   <li>Fixed bugs related with the presence of spaces in the path where bowtie binaries are stored.</li>
+   <li>Improved exception handling for missformated quality values.</li>
+   <li>Improved redundancy checks by correctly account for soft clipping.</li>
+</ul>
+
+<h2>Lighter released</h2>
+<ul>
+   <li>Lighter is an extremely fast and memory-efficient program for correcting sequencing errors in DNA sequencing data.  For details on how error correction can help improve the speed and accuracy of downstream analysis tools, see the <a href="http://genomebiology.com/2014/15/11/509">paper in Genome Biology</a>.  Source and software <a href="https://github.com/mourisl/Lighter">available at GitHub</a></li>.
+</ul>
 <h2>Version 2.2.4 - Oct 22, 2014</h2>
 <ul>
    <li>Fixed a Mavericks OSX specific bug caused by some linkage ambiguities.</li>
diff --git a/doc/website/recent_news.ssi b/doc/website/recent_news.ssi
index ed82ee7..9a41e30 100644
--- a/doc/website/recent_news.ssi
+++ b/doc/website/recent_news.ssi
@@ -1,3 +1,9 @@
+<h2>Version 2.3.3.1 - October 05, 2017</h2>
+<ul>
+    <li>Fixed an issue causing input files to be skipped when running multi-threaded alignment</li>
+    <li>Fixed an issue causing the first character of a read name to be dropped while parsing reads split across multiple input files</li>
+</ul>
+
 <h2>Version 2.3.3 - September 06, 2017</h2>
 <p>From this release forward prepackaged bowtie2 binaries are now statically linked to the zlib compression library and, the recommended threading library, TBB. Users who rely on prepackaged builds are no longer required to have these packages pre-installed. As a result of the aforementioned changes legacy packages have been discontinued.</p>
 <ul>
@@ -21,6 +27,7 @@
     <li>Fixed compilation issues caused by gzbuffer function when compiling with zlib v1.2.3.5 and earlier. Users compiling against these libraries will use the zlib default buffer size of 8Kb when decompressing read files.</li>
     <li>Fixed issue that would cause Bowtie 2 hang when aligning FASTA inputs with more than one thread</li>
 </ul>
+
 <h2>Version 2.3.1 - Mar 03, 2017</h2>
 <p>Please note that as of this release Bowtie 2 now has dependencies on zlib and readline libraries. Make sure that all dependencies are met before attempting to build from source.</p>
 <ul>
@@ -31,6 +38,7 @@
     <li>Fixed a bug whereby combining <tt><a href="manual.shtml#bowtie2-options-un-conc">-—un-conc</a></tt> with <tt><a href="manual.shtml#bowtie2-options-k">-k</a></tt> or <tt><a href="manual.shtml#bowtie2-build-options-a">-a</a></tt> would cause <tt>bowtie2</tt> to print duplicate reads in one or both of the <tt><a href="manual.shtml#bowtie2-options-un-conc">--un-conc*</a></tt> output files, causing the ends to be misaligned.</li>
     <li>The default <tt><a href="manual.shtml#bowtie2-options-score-min">--score-min</a></tt> for <tt><a href="manual.shtml#bowtie2-options-local">--local</a></tt> mode is now <tt>'G,20,8'</tt>. That was the stated default in the documentation for a while, but the actual default was <tt>'G,0,10'</tt> for many versions. Now the default matches the documentation and, we find, yields more accurate alignments than <tt>'G,0,10'</tt></li>
 </ul>
+
 <h2>Version 2.3.0 - Dec 13, 2016</h2>
 <p>This is a major release with some larger and many smaller changes. These notes emphasize the large changes. See commit history for details.</p>
 <ul>
@@ -42,52 +50,3 @@
     <li>Now detects and reports inconsistencies between <tt><a href="manual.shtml#bowtie2-options-score-min">--score-min</a></tt> and <tt><a href="manual.shtml#bowtie2-options-ma">--ma</a></tt></li>
     <li>Changed default for <tt><a href="manual.shtml#bowtie2-build-options-bmaxdivn">--bmaxdivn</a></tt> to yield better memory footprint and running time when building an index with many threads</li>
 </ul>
-<h2>Bowtie2 developers note</h2>
-<p>As of Nov 2015 we had to fix the bowtie2 github repo and relabel the entire history. Developers and contributors should re-clone the bowtie2 github repo from this current state. </p>
-<h2>Version 2.2.9 - Apr 22, 2016</h2>
-<ul>
-   <li>Fixed the multiple threads issue for the bowtie2-build.</li>
-   <li>Fixed a TBB related build issue impacting TBB v4.4.</li>
-</ul>
-<h2>Version 2.2.8 - Mar 10, 2016</h2>
-<ul>
-   <li>Various website updates.</li>
-   <li>Fixed the bowtie2-build issue that made TBB compilation fail.</li>
-   <li>Fixed the static build for Win32 platform.</li>
-</ul>
-<h2>Version 2.2.7 - Feb 10, 2016</h2>
-<ul>
-   <li>Added a parallel index build option: bowtie2-build --threads <# threads>.</li>
-   <li>Fixed an issue whereby IUPAC codes (other than A/C/G/T/N) in reads were converted to As. Now all non-A/C/G/T characters in reads become Ns.</li>
-   <li>Fixed some compilation issues, including for the Intel C++ Compiler.</li>
-   <li>Removed debugging code that could impede performance for many alignment threads.</li>
-   <li>Fixed a few typos in documentation.</li>
-</ul>
-<h2>Version 2.2.6 - Jul 22, 2015</h2>
-<ul>
-   <li>Switched to a stable sort to avoid some potential reproducibility confusions.</li>
-   <li>Added <tt>'install'</tt> target for *nix platforms.</li>
-   <li>Added the Intel TBB option which provides in most situations a better performance output. TBB is not present by default in the current build but can be added by compiling the source code with <tt>WITH_TBB=1</tt> option.</li>
-   <li>Fixed a bug that caused seed lenght to be dependent of the <tt><a href="manual.shtml#bowtie2-options-L">-L</a></tt> and <tt><a href="manual.shtml#bowtie2-options-N">-N</a></tt> parameters order.</li>
-   <li>Fixed a bug that caused <tt><a href="manual.shtml#bowtie2-options-local">--local</a></tt> followed by <tt><a href="manual.shtml#bowtie2-options-N">-N</a></tt> to reset seed lenght to 22 which is actually the default value for global.</li>
-   <li>Enable compilation on FreeBDS and clang, although gmake port is still required.</li>
-   <li>Fixed an issue that made bowtie2 compilation process to fail on Snow Leopard.</li>
-</ul>
-
-<h2>Version 2.2.5 - Mar 9, 2015</h2>
-<ul>
-   <li>Fixed some situations where incorrectly we could detect a Mavericks platform.</li>
-   <li>Fixed some manual issues including some HTML bad formating.</li>
-   <li>Make sure the wrapper correctly identifies the platform under OSX.</li>
-   <li>Fixed <tt><a href="manual.shtml#bowtie2-options-rg">--rg</a></tt>/<tt><a href="manual.shtml#bowtie2-options-rg-id">--rg-id</a></tt> options where included spaces were incorrectly treated.</li>
-   <li>Various documentation fixes added by contributors.</li>
-   <li>Fixed the incorrect behavior where parameter file names may contain spaces.</li>
-   <li>Fixed bugs related with the presence of spaces in the path where bowtie binaries are stored.</li>
-   <li>Improved exception handling for missformated quality values.</li>
-   <li>Improved redundancy checks by correctly account for soft clipping.</li>
-</ul>
-
-<h2>Lighter released</h2>
-<ul>
-   <li>Lighter is an extremely fast and memory-efficient program for correcting sequencing errors in DNA sequencing data.  For details on how error correction can help improve the speed and accuracy of downstream analysis tools, see the <a href="http://genomebiology.com/2014/15/11/509">paper in Genome Biology</a>.  Source and software <a href="https://github.com/mourisl/Lighter">available at GitHub</a></li>.
-</ul>
diff --git a/doc/website/rhsidebar.ssi b/doc/website/rhsidebar.ssi
index a490f3e..3a74fdc 100644
--- a/doc/website/rhsidebar.ssi
+++ b/doc/website/rhsidebar.ssi
@@ -18,10 +18,10 @@
         </tr>
       <tr>
       <td>
-        <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.3.3">Bowtie2 2.3.3</a>
+        <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.3.3.1">Bowtie2 2.3.3.1</a>
       </td>
       <td align="right">
-        09/06/17 
+        10/05/17 
       </td>
       </tr>
       <tr>
diff --git a/pat.cpp b/pat.cpp
index 16d6754..b86a6be 100644
--- a/pat.cpp
+++ b/pat.cpp
@@ -401,11 +401,11 @@ pair<bool, int> CFilePatternSource::nextBatchImpl(
 	bool batch_a)
 {
 	bool done = false;
-	int nread = 0;
+	unsigned nread = 0;
 	pt.setReadId(readCnt_);
 	while(true) { // loop that moves on to next file when needed
 		do {
-			pair<bool, int> ret = nextBatchFromFile(pt, batch_a);
+			pair<bool, int> ret = nextBatchFromFile(pt, batch_a, nread);
 			done = ret.first;
 			nread = ret.second;
 		} while(!done && nread == 0); // not sure why this would happen
@@ -413,9 +413,10 @@ pair<bool, int> CFilePatternSource::nextBatchImpl(
 			open();
 			resetForNextFile(); // reset state to handle a fresh file
 			filecur_++;
-			if(nread == 0) {
+			if(nread == 0 || (nread < pt.max_buf_)) {
 				continue;
 			}
+			done = false;
 		}
 		break;
 	}
@@ -678,7 +679,7 @@ bool VectorPatternSource::parse(Read& ra, Read& rb, TReadId rdid) const {
  */
 pair<bool, int> FastaPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a, unsigned readi)
 {
 	int c;
 	EList<Read>& readbuf = batch_a ? pt.bufa_ : pt.bufb_;
@@ -697,7 +698,6 @@ pair<bool, int> FastaPatternSource::nextBatchFromFile(
 		first_ = false;
 	}
 	bool done = false;
-	size_t readi = 0;
 	// Read until we run out of input or until we've filled the buffer
 	for(; readi < pt.max_buf_ && !done; readi++) {
 		Read::TBuf& buf = readbuf[readi].readOrigBuf;
@@ -803,11 +803,10 @@ bool FastaPatternSource::parse(Read& r, Read& rb, TReadId rdid) const {
  */
 pair<bool, int> FastaContinuousPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a, unsigned readi)
 {
 	int c = -1;
 	EList<Read>& readbuf = batch_a ? pt.bufa_ : pt.bufb_;
-	size_t readi = 0;
 	while(readi < pt.max_buf_) {
 		c = getc_wrapper();
 		if(c < 0) {
@@ -847,18 +846,18 @@ pair<bool, int> FastaContinuousPatternSource::nextBatchFromFile(
 			}
 			if(eat_ > 0) {
 				eat_--;
-				// Try to keep readCnt_ aligned with the offset
+				// Try to keep cur_ aligned with the offset
 				// into the reference; that lets us see where
 				// the sampling gaps are by looking at the read
 				// name
 				if(!beginning_) {
-					readCnt_++;
+					cur_++;
 				}
 				continue;
 			}
 			// install name
 			readbuf[readi].readOrigBuf = name_prefix_buf_;
-			itoa10<TReadId>(readCnt_ - subReadCnt_, name_int_buf_);
+			itoa10<TReadId>(cur_ - last_, name_int_buf_);
 			readbuf[readi].readOrigBuf.append(name_int_buf_);
 			readbuf[readi].readOrigBuf.append('\t');
 			// install sequence
@@ -872,7 +871,7 @@ pair<bool, int> FastaContinuousPatternSource::nextBatchFromFile(
 				readbuf[readi].readOrigBuf.append(c);
 			}
 			eat_ = freq_-1;
-			readCnt_++;
+			cur_++;
 			beginning_ = false;
 			readi++;
 		}
@@ -947,7 +946,7 @@ bool FastaContinuousPatternSource::parse(
  */
 pair<bool, int> FastqPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a, unsigned readi)
 {
 	int c = -1;
 	EList<Read>* readbuf = batch_a ? &pt.bufa_ : &pt.bufb_;
@@ -964,15 +963,13 @@ pair<bool, int> FastqPatternSource::nextBatchFromFile(
 			throw 1;
 		}
 		first_ = false;
-		(*readbuf)[0].readOrigBuf.append('@');
+		(*readbuf)[readi].readOrigBuf.append('@');
 	}
 
 	bool done = false, aborted = false;
-	size_t readi = 0;
 	// Read until we run out of input or until we've filled the buffer
 	while (readi < pt.max_buf_ && !done) {
 		Read::TBuf& buf = (*readbuf)[readi].readOrigBuf;
-		assert(readi == 0 || buf.empty());
 		int newlines = 4;
 		while(newlines) {
 			c = getc_wrapper();
@@ -1118,7 +1115,7 @@ bool FastqPatternSource::parse(Read &r, Read& rb, TReadId rdid) const {
 	// Set up a default name if one hasn't been set
 	if(r.name.empty()) {
 		char cbuf[20];
-		itoa10<TReadId>(static_cast<TReadId>(readCnt_), cbuf);
+		itoa10<TReadId>(static_cast<TReadId>(rdid), cbuf);
 		r.name.install(cbuf);
 	}
 	r.parsed = true;
@@ -1133,14 +1130,13 @@ bool FastqPatternSource::parse(Read &r, Read& rb, TReadId rdid) const {
  */
 pair<bool, int> TabbedPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a, unsigned readi)
 {
 	int c = getc_wrapper();
 	while(c >= 0 && (c == '\n' || c == '\r')) {
 		c = getc_wrapper();
 	}
 	EList<Read>& readbuf = batch_a ? pt.bufa_ : pt.bufb_;
-	size_t readi = 0;
 	// Read until we run out of input or until we've filled the buffer
 	for(; readi < pt.max_buf_ && c >= 0; readi++) {
 		readbuf[readi].readOrigBuf.clear();
@@ -1267,14 +1263,14 @@ bool TabbedPatternSource::parse(Read& ra, Read& rb, TReadId rdid) const {
  */
 pair<bool, int> RawPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a,
+    unsigned readi)
 {
 	int c = getc_wrapper();
 	while(c >= 0 && (c == '\n' || c == '\r')) {
 		c = getc_wrapper();
 	}
 	EList<Read>& readbuf = batch_a ? pt.bufa_ : pt.bufb_;
-	size_t readi = 0;
 	// Read until we run out of input or until we've filled the buffer
 	for(; readi < pt.max_buf_ && c >= 0; readi++) {
 		readbuf[readi].readOrigBuf.clear();
diff --git a/pat.h b/pat.h
index 57726a3..ce31278 100644
--- a/pat.h
+++ b/pat.h
@@ -382,7 +382,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a) = 0;
+		bool batch_a,
+		unsigned read_idx) = 0;
 
 	/**
 	 * Reset state to handle a fresh file
@@ -471,7 +472,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		unsigned read_idx);
 
 	/**
 	 * Scan to the next FASTA record (starting with >) and return the first
@@ -523,7 +525,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		unsigned read_idx);
 
 	bool secondName_;	// true if --tab6, false if --tab5
 };
@@ -568,7 +571,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		unsigned read_idx);
 
 	EList<std::string> qualToks_;
 };
@@ -588,7 +592,8 @@ public:
 		eat_(length_-1),
 		beginning_(true),
 		bufCur_(0),
-		subReadCnt_(0llu)
+		cur_(0llu),
+		last_(0llu)
 	{
 		assert_gt(freq_, 0);
 		resetForNextFile();
@@ -612,7 +617,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		unsigned read_idx);
 	
 	/**
 	 * Reset state to be read for the next file.
@@ -622,7 +628,7 @@ protected:
 		name_prefix_buf_.clear();
 		beginning_ = true;
 		bufCur_ = 0;
-		subReadCnt_ = readCnt_;
+		last_ = cur_;
 	}
 
 private:
@@ -638,7 +644,8 @@ private:
 	char name_int_buf_[20]; /// for composing offsets for names
 	size_t bufCur_;		/// buffer cursor; points to where we should
 						/// insert the next character
-	uint64_t subReadCnt_;/// number to subtract from readCnt_ to get
+	uint64_t cur_;
+	uint64_t last_;     /// number to subtract from readCnt_ to get
 						/// the pat id to output (so it resets to 0 for
 						/// each new sequence)
 };
@@ -675,7 +682,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		unsigned read_idx);
 	
 	/**
 	 * Reset state to be ready for the next file.
@@ -719,7 +727,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		unsigned read_idx);
 	
 	/**
 	 * Reset state to be ready for the next file.
diff --git a/read_qseq.cpp b/read_qseq.cpp
index f428d65..627bfe4 100644
--- a/read_qseq.cpp
+++ b/read_qseq.cpp
@@ -53,14 +53,13 @@ static int parseName(
  */
 pair<bool, int> QseqPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a, unsigned readi)
 {
 	int c = getc_wrapper();
 	while(c >= 0 && (c == '\n' || c == '\r')) {
 		c = getc_wrapper();
 	}
 	EList<Read>& readbuf = batch_a ? pt.bufa_ : pt.bufb_;
-	size_t readi = 0;
 	// Read until we run out of input or until we've filled the buffer
 	for(; readi < pt.max_buf_ && c >= 0; readi++) {
 		readbuf[readi].readOrigBuf.clear();
@@ -72,6 +71,9 @@ pair<bool, int> QseqPatternSource::nextBatchFromFile(
 			c = getc_wrapper();
 		}
 	}
+	if (c != EOF) {
+		ungetc_wrapper(c);
+	}
 	return make_pair(c < 0, readi);
 }
 
diff --git a/scripts/test/simple_tests.pl b/scripts/test/simple_tests.pl
index f1fdde7..51ef2be 100644
--- a/scripts/test/simple_tests.pl
+++ b/scripts/test/simple_tests.pl
@@ -341,7 +341,7 @@ my @cases = (
 	  fastq1  => "\@r0\nAGCATCGATC\r\n+\nIIIIIIIIII\n".
 	             "\@r1\nTCAGTTTTTGA\r\n+\nIIIIIIIIIII\n",
 	  fastq2  => "\@r0\nTCAGTTTTTGA\n+\nIIIIIIIIIII\n".
-	             "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII",
+	             "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII\n",
 	  pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] },
 
 	# Paired-end reads that should align
@@ -354,7 +354,7 @@ my @cases = (
 	  fastq1  => "\@r0\nAGCATCGATC\r\n+\nIIIIIIIIII\n".
 	             "\@r1\nTCAGTTTTTGA\n+\nIIIIIIIIIII\n",
 	  fastq2  => "\@r0\nTCAGTTTTTGA\n+\nIIIIIIIIIII\n".
-	             "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII",
+	             "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII\n",
 	  pairhits => [ { }, { "0,8" => 1 } ] },
 
 	# Paired-end reads that should align
@@ -367,7 +367,7 @@ my @cases = (
 	  fastq1  => "\@r0\nAGCATCGATC\r\n+\nIIIIIIIIII\n".
 				 "\@r1\nTCAGTTTTTGA\r\n+\nIIIIIIIIIII\n",
 	  fastq2  => "\@r0\nTCAGTTTTTGA\n+\nIIIIIIIIIII\n".
-				 "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII",
+				 "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII\n",
 	  pairhits => [ { "0,8" => 1 }, { } ] },
 
 	# Paired-end reads with left end entirely trimmed away
@@ -4327,16 +4327,18 @@ sub writeReads($$$$$$$$$) {
 		$fq1,
 		$fq2) = @_;
 
-	open(FQ1, defined($compressed) ? "| gzip -c >$fq1.gz" : ">$fq1") || die "Could not open '$fq1' for writing";
-	open(FQ2, defined($compressed) ? "| gzip -c >$fq2.gz" : ">$fq2") || die "Could not open '$fq2' for writing";
 	my $pe = (defined($mate1s) && $mate1s ne "");
 	if($pe) {
 		for (0..scalar(@$mate1s)-1) {
+			open(FQ1, defined($compressed) ? "| gzip -c >$fq1->[$_]" : ">$fq1->[$_]") || die "Could not open '$fq1->[$_]' for writing";
+			open(FQ2, defined($compressed) ? "| gzip -c >$fq2->[$_]" : ">$fq2->[$_]") || die "Could not open '$fq2->[$_]' for writing";
+
 			my $m1 = $mate1s->[$_];
 			my $m2 = $mate2s->[$_];
 			my $q1 = $qual1s->[$_];
 			my $q2 = $qual2s->[$_];
 			my $nm = $names->[$_];
+
 			defined($m1) || die;
 			defined($m2) || die;
 			$q1 = $q1 || ("I" x length($m1));
@@ -4344,20 +4346,24 @@ sub writeReads($$$$$$$$$) {
 			$nm = $nm || "r$_";
 			print FQ1 "\@$nm/1\n$m1\n+\n$q1\n";
 			print FQ2 "\@$nm/2\n$m2\n+\n$q2\n";
+			close(FQ1);
+			close(FQ2);
 		}
 	} else {
 		for (0..scalar(@$reads)-1) {
+			open(FQ1, defined($compressed) ? "| gzip -c >$fq1->[$_]" : ">$fq1->[$_]") || die "Could not open '$fq1->[$_]' for writing";
+
 			my $read = $reads->[$_];
 			defined($read) || die;
 			my $qual = $quals->[$_];
 			my $nm = $names->[$_];
+
 			$qual = $qual || ("I" x length($read));
 			$nm = $nm || "r$_";
 			print FQ1 "\@$nm\n$read\n+\n$qual\n";
+			close(FQ1);
 		}
 	}
-	close(FQ1);
-	close(FQ2);
 }
 
 ##
@@ -4476,6 +4482,21 @@ my  $idx_type = "";
 			}
 		}
 	} else {
+		$mate1arg = [];
+		$mate2arg = [];
+		my $ext = $compressed ? ".fq.gz" : ".fq";
+		my $base_filename = ".simple_tests";
+
+		for (0 .. scalar($pe ? @$mate1s : @$reads) - 1) {
+			my $f = $base_filename . ".1" . ('a' .. 'z')[$_] . $ext;
+			push @$mate1arg, $f;
+
+			if ($pe) {
+				$f = $base_filename . ".2" . ('a' .. 'z')[$_] . $ext;
+				push @$mate2arg, $f;
+			}
+		}
+
 		writeReads(
 			$reads,
 			$quals,
@@ -4484,10 +4505,9 @@ my  $idx_type = "";
 			$mate2s,
 			$qual2s,
 			$names,
-			".simple_tests.1.fq",
-			".simple_tests.2.fq");
-		$mate1arg = defined($compressed) ? ".simple_tests.1.fq.gz" : ".simple_tests.1.fq";
-		$mate2arg = defined($compressed) ? ".simple_tests.2.fq.gz" : ".simple_tests.2.fq";
+			$mate1arg,
+			$mate2arg);
+
 		$formatarg = "-q";
 		$readarg = $mate1arg;
 	}
@@ -4495,12 +4515,23 @@ my  $idx_type = "";
 	my $debug_arg = "";
 	$debug_arg = "--debug" if $debug_mode;
 	my $cmd;
+	my $batch_size = int(rand(16) + 1);
 	if($pe) {
 		# Paired-end case
-		$cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args -x .simple_tests.tmp $formatarg -1 $mate1arg -2 $mate2arg";
+		if (ref $mate1arg eq "ARRAY") {
+			$cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args --reads-per-batch $batch_size -x .simple_tests.tmp $formatarg -1 " . join(",", @$mate1arg) . " -2 " . join(",", @$mate2arg);
+		}
+		else {
+			$cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args --reads-per-batch $batch_size -x .simple_tests.tmp $formatarg -1 $mate1arg -2 $mate2arg";
+		}
 	} else {
 		# Unpaired case
-		$cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args -x .simple_tests.tmp $formatarg $readarg";
+		if (ref $readarg eq "ARRAY") {
+			$cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args --reads-per-batch $batch_size -x .simple_tests.tmp $formatarg " . join(",", @$readarg);
+		}
+		else {
+			$cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args --reads-per-batch $batch_size -x .simple_tests.tmp $formatarg $readarg";
+		}
 	}
 	print "$cmd\n";
 	open(BT, "$cmd |") || die "Could not open pipe '$cmd |'";

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bowtie2.git



More information about the debian-med-commit mailing list