[med-svn] [jellyfish] 01/06: New upstream version 2.2.7

Andreas Tille tille at debian.org
Mon Feb 5 21:36:09 UTC 2018


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository jellyfish.

commit 37545beb31fa8e4070aa691fb6906c1577957f48
Author: Andreas Tille <tille at debian.org>
Date:   Mon Feb 5 22:20:50 2018 +0100

    New upstream version 2.2.7
---
 .gitignore                                        |   1 +
 .travis.yml                                       |   5 +
 Makefile.am                                       |  18 +-
 README.md                                         |   6 +
 config.rpath                                      | 545 ++++++++++++++++++++++
 configure.ac                                      |  24 +-
 examples/count_in_file/Makefile                   |   3 +-
 examples/jf_count_dump/Makefile                   |   3 +-
 examples/query_per_sequence/Makefile              |   3 +-
 include/jellyfish/mer_overlap_sequence_parser.hpp | 111 +++--
 include/jellyfish/parser_common.hpp               |  61 +++
 include/jellyfish/sam_format.hpp                  | 105 +++++
 include/jellyfish/stream_manager.hpp              | 106 ++++-
 include/jellyfish/whole_sequence_parser.hpp       | 108 +++--
 jellyfish/fastq2sam.cc                            |  47 ++
 jellyfish/fastq2sam_cmdline.yaggo                 |   8 +
 m4/m4-ax_check_compile_flag.m4                    |  74 +++
 m4/m4-ax_ext.m4                                   | 303 ++++++++++++
 m4/m4-ax_gcc_x86_avx_xgetbv.m4                    |  79 ++++
 m4/m4-ax_gcc_x86_cpuid.m4                         |  89 ++++
 sub_commands/count_main.cc                        | 102 ++--
 sub_commands/count_main_cmdline.yaggo             |  10 +
 swig/mer_file.i                                   |   4 +-
 tests/bloom_filter.sh                             |  29 +-
 tests/compat.sh.in                                |   1 +
 tests/generate_sequence.sh                        |   6 +
 tests/large_key.sh                                |   6 +-
 tests/quality_filter.sh                           |   6 +
 tests/sam.sh                                      |  45 ++
 unit_tests/test_mer_dna.cc                        |   1 -
 unit_tests/test_mer_dna_bloom_counter.cc          |  20 +-
 unit_tests/test_mer_iterator.cc                   |   6 +-
 unit_tests/test_mer_overlap_sequence_parser.cc    |   7 +-
 unit_tests/test_whole_sequence_parser.cc          |   6 +-
 34 files changed, 1791 insertions(+), 157 deletions(-)

diff --git a/.gitignore b/.gitignore
index ba20c38..c81fd48 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ config.h.in~
 config.sub
 ltmain.sh
 *_cmdline.hpp
+test-driver
diff --git a/.travis.yml b/.travis.yml
index d8fd723..f55f113 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,3 +4,8 @@ before_install:
   - curl -L -o ~/bin/yaggo https://github.com/gmarcais/yaggo/releases/download/v1.5.9/yaggo
   - chmod a+rx ~/bin/yaggo
 script: autoreconf -i && ./configure YAGGO=~/bin/yaggo && make && make check
+addons:
+  artifacts:
+    paths:
+      - $(git ls-files -o | grep '\.log$' | tr "\n" ":")
+  s3_region: "us-east-1"
diff --git a/Makefile.am b/Makefile.am
index 1a6ae27..91c0efc 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -6,9 +6,10 @@ man1_MANS = doc/jellyfish.man
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = jellyfish-2.0.pc
 
-AM_LDFLAGS = -lpthread # $(VALGRIND_LIBS)
-AM_CPPFLAGS = -Wall -Wnon-virtual-dtor -I$(srcdir) -I$(srcdir)/include -g -O3 $(VALGRIND_CFLAGS)
-AM_CXXFLAGS = $(ALL_CXXFLAGS) -g -O3
+AM_LDFLAGS = -pthread $(HTSLIB_LIBS) $(HTSLIB_RPATH) # $(VALGRIND_LIBS)
+AM_CPPFLAGS = -Wall -Wnon-virtual-dtor -I$(srcdir) -I$(srcdir)/include $(VALGRIND_CFLAGS)
+AM_CXXFLAGS = $(ALL_CXXFLAGS) $(HTSLIB_CFLAGS)
+#AM_LDADD    = $(HTSLIB_LIBS) -lpthread
 
 noinst_HEADERS = $(YAGGO_SOURCES)
 bin_PROGRAMS =
@@ -31,7 +32,7 @@ YAGGO_SOURCES = # Append all file to be built by yaggo
 bin_PROGRAMS += bin/jellyfish
 lib_LTLIBRARIES = libjellyfish-2.0.la
 LDADD = libjellyfish-2.0.la # $(VALGRIND_LIBS)
-check_PROGRAMS = bin/generate_sequence
+check_PROGRAMS = bin/generate_sequence bin/fastq2sam
 
 ############################
 # Build Jellyfish the exec #
@@ -50,7 +51,6 @@ bin_jellyfish_SOURCES = sub_commands/jellyfish.cc	\
                         jellyfish/merge_files.cc
 bin_jellyfish_LDFLAGS = $(AM_LDFLAGS) $(STATIC_FLAGS)
 
-
 YAGGO_SOURCES += sub_commands/count_main_cmdline.hpp	\
                  sub_commands/info_main_cmdline.hpp	\
                  sub_commands/dump_main_cmdline.hpp	\
@@ -94,6 +94,8 @@ library_include_HEADERS = $(JFI)/allocators_mmap.hpp			\
                           $(JFI)/stream_iterator.hpp			\
                           $(JFI)/mer_overlap_sequence_parser.hpp	\
                           $(JFI)/whole_sequence_parser.hpp		\
+                          $(JFI)/parser_common.hpp			\
+                          $(JFI)/sam_format.hpp				\
                           $(JFI)/binary_dumper.hpp			\
                           $(JFI)/sorted_dumper.hpp			\
                           $(JFI)/text_dumper.hpp $(JFI)/dumper.hpp	\
@@ -131,6 +133,9 @@ bin_generate_sequence_SOURCES = jellyfish/generate_sequence.cc		\
                                 jellyfish/dbg.cc
 YAGGO_SOURCES += jellyfish/generate_sequence_cmdline.hpp
 
+bin_fastq2sam_SOURCES = jellyfish/fastq2sam.cc
+YAGGO_SOURCES += jellyfish/fastq2sam_cmdline.hpp
+
 #########
 # Tests #
 #########
@@ -141,7 +146,7 @@ AM_SH_LOG_FLAGS =
 TESTS = tests/generate_sequence.sh tests/parallel_hashing.sh	\
         tests/merge.sh tests/bloom_filter.sh tests/big.sh	\
         tests/subset_hashing.sh tests/multi_file.sh		\
-        tests/bloom_counter.sh tests/large_key.sh
+        tests/bloom_counter.sh tests/large_key.sh tests/sam.sh
 
 EXTRA_DIST += $(TESTS)
 clean-local: clean-local-check
@@ -158,6 +163,7 @@ tests/merge.log: tests/generate_sequence.log
 tests/min_qual.log: tests/generate_fastq_sequence.log
 tests/large_key.log: tests/generate_sequence.log
 tests/quality_filter.log: tests/generate_sequence.log
+tests/sam.log: tests/generate_sequence.log
 
 # SWIG tests
 TESTS += tests/swig_python.sh tests/swig_ruby.sh tests/swig_perl.sh
diff --git a/README.md b/README.md
index 6c72475..7ac6951 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,12 @@ make -j 4
 sudo make install
 ```
 
+If the software is installed in system directories (hint: you needed to use `sudo` to install), like the example above, then the system library cache must be updated like such:
+
+```Shell
+sudo ldconfig
+```
+
 Extra / Examples
 ----------------
 
diff --git a/config.rpath b/config.rpath
new file mode 100644
index 0000000..c683222
--- /dev/null
+++ b/config.rpath
@@ -0,0 +1,545 @@
+#! /bin/sh
+# Output a system dependent set of variables, describing how to set the
+# run time search path of shared libraries in an executable.
+#
+#   Copyright 1996-2003 Free Software Foundation, Inc.
+#   Taken from GNU libtool, 2001
+#   Originally by Gordon Matzigkeit <gord at gnu.ai.mit.edu>, 1996
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+#   As a special exception to the GNU General Public License, if you
+#   distribute this file as part of a program that contains a
+#   configuration script generated by Autoconf, you may include it under
+#   the same distribution terms that you use for the rest of that program.
+#
+# The first argument passed to this file is the canonical host specification,
+#    CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or
+#    CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# The environment variables CC, GCC, LDFLAGS, LD, with_gnu_ld
+# should be set by the caller.
+#
+# The set of defined variables is at the end of this script.
+
+# Known limitations:
+# - On IRIX 6.5 with CC="cc", the run time search patch must not be longer
+#   than 256 bytes, otherwise the compiler driver will dump core. The only
+#   known workaround is to choose shorter directory names for the build
+#   directory and/or the installation directory.
+
+# All known linkers require a `.a' archive for static linking (except M$VC,
+# which needs '.lib').
+libext=a
+shrext=.so
+
+host="$1"
+host_cpu=`echo "$host" | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+host_vendor=`echo "$host" | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+host_os=`echo "$host" | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+
+# Code taken from libtool.m4's AC_LIBTOOL_PROG_COMPILER_PIC.
+
+wl=
+if test "$GCC" = yes; then
+  wl='-Wl,'
+else
+  case "$host_os" in
+    aix*)
+      wl='-Wl,'
+      ;;
+    mingw* | pw32* | os2*)
+      ;;
+    hpux9* | hpux10* | hpux11*)
+      wl='-Wl,'
+      ;;
+    irix5* | irix6* | nonstopux*)
+      wl='-Wl,'
+      ;;
+    newsos6)
+      ;;
+    linux*)
+      case $CC in
+        icc|ecc)
+          wl='-Wl,'
+          ;;
+        ccc)
+          wl='-Wl,'
+          ;;
+      esac
+      ;;
+    osf3* | osf4* | osf5*)
+      wl='-Wl,'
+      ;;
+    sco3.2v5*)
+      ;;
+    solaris*)
+      wl='-Wl,'
+      ;;
+    sunos4*)
+      wl='-Qoption ld '
+      ;;
+    sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+      wl='-Wl,'
+      ;;
+    sysv4*MP*)
+      ;;
+    uts4*)
+      ;;
+  esac
+fi
+
+# Code taken from libtool.m4's AC_LIBTOOL_PROG_LD_SHLIBS.
+
+hardcode_libdir_flag_spec=
+hardcode_libdir_separator=
+hardcode_direct=no
+hardcode_minus_L=no
+
+case "$host_os" in
+  cygwin* | mingw* | pw32*)
+    # FIXME: the MSVC++ port hasn't been tested in a loooong time
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    if test "$GCC" != yes; then
+      with_gnu_ld=no
+    fi
+    ;;
+  openbsd*)
+    with_gnu_ld=no
+    ;;
+esac
+
+ld_shlibs=yes
+if test "$with_gnu_ld" = yes; then
+  case "$host_os" in
+    aix[3-9]*)
+      # On AIX/PPC, the GNU linker is very broken
+      if test "$host_cpu" != ia64; then
+        ld_shlibs=no
+      fi
+      ;;
+    amigaos*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_minus_L=yes
+      # Samuel A. Falvo II <kc5tja at dolphin.openprojects.net> reports
+      # that the semantics of dynamic libraries on AmigaOS, at least up
+      # to version 4, is to share data among multiple programs linked
+      # with the same dynamic library.  Since this doesn't match the
+      # behavior of shared libraries on other platforms, we can use
+      # them.
+      ld_shlibs=no
+      ;;
+    beos*)
+      if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+        :
+      else
+        ld_shlibs=no
+      fi
+      ;;
+    cygwin* | mingw* | pw32*)
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      hardcode_libdir_flag_spec='-L$libdir'
+      if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then
+        :
+      else
+        ld_shlibs=no
+      fi
+      ;;
+    netbsd*)
+      ;;
+    solaris* | sysv5*)
+      if $LD -v 2>&1 | egrep 'BFD 2\.8' > /dev/null; then
+        ld_shlibs=no
+      elif $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+        :
+      else
+        ld_shlibs=no
+      fi
+      ;;
+    sunos4*)
+      hardcode_direct=yes
+      ;;
+    *)
+      if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+        :
+      else
+        ld_shlibs=no
+      fi
+      ;;
+  esac
+
+  if test "$ld_shlibs" = yes; then
+    # Unlike libtool, we use -rpath here, not --rpath, since the documented
+    # option of GNU ld is called -rpath, not --rpath.
+    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+  fi
+
+else
+  case "$host_os" in
+    aix3*)
+      # Note: this linker hardcodes the directories in LIBPATH if there
+      # are no directories specified by -L.
+      hardcode_minus_L=yes
+      if test "$GCC" = yes; then
+        # Neither direct hardcoding nor static linking is supported with a
+        # broken collect2.
+        hardcode_direct=unsupported
+      fi
+      ;;
+    aix[4-9]*)
+      if test "$host_cpu" = ia64; then
+        # On IA64, the linker does run time linking by default, so we don't
+        # have to do anything special.
+        aix_use_runtimelinking=no
+      else
+        aix_use_runtimelinking=no
+        # Test if we are trying to use run time linking or normal
+        # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+        # need to do runtime linking.
+        case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*)
+          for ld_flag in $LDFLAGS; do
+            if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+              aix_use_runtimelinking=yes
+              break
+            fi
+          done
+        esac
+      fi
+      hardcode_direct=yes
+      hardcode_libdir_separator=':'
+      if test "$GCC" = yes; then
+        case $host_os in aix4.[012]|aix4.[012].*)
+          collect2name=`${CC} -print-prog-name=collect2`
+          if test -f "$collect2name" && \
+            strings "$collect2name" | grep resolve_lib_name >/dev/null
+          then
+            # We have reworked collect2
+            hardcode_direct=yes
+          else
+            # We have old collect2
+            hardcode_direct=unsupported
+            hardcode_minus_L=yes
+            hardcode_libdir_flag_spec='-L$libdir'
+            hardcode_libdir_separator=
+          fi
+        esac
+      fi
+      # Begin _LT_AC_SYS_LIBPATH_AIX.
+      echo 'int main () { return 0; }' > conftest.c
+      ${CC} ${LDFLAGS} conftest.c -o conftest
+      aix_libpath=`dump -H conftest 2>/dev/null | sed -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
+}'`
+      if test -z "$aix_libpath"; then
+        aix_libpath=`dump -HX64 conftest 2>/dev/null | sed -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
+}'`
+      fi
+      if test -z "$aix_libpath"; then
+        aix_libpath="/usr/lib:/lib"
+      fi
+      rm -f conftest.c conftest
+      # End _LT_AC_SYS_LIBPATH_AIX.
+      if test "$aix_use_runtimelinking" = yes; then
+        hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
+      else
+        if test "$host_cpu" = ia64; then
+          hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib'
+        else
+          hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
+        fi
+      fi
+      ;;
+    amigaos*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_minus_L=yes
+      # see comment about different semantics on the GNU ld section
+      ld_shlibs=no
+      ;;
+    bsdi4*)
+      ;;
+    cygwin* | mingw* | pw32*)
+      # When not using gcc, we currently assume that we are using
+      # Microsoft Visual C++.
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      hardcode_libdir_flag_spec=' '
+      libext=lib
+      ;;
+    darwin* | rhapsody*)
+      if $CC -v 2>&1 | grep 'Apple' >/dev/null ; then
+        hardcode_direct=no
+      fi
+      ;;
+    dgux*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      ;;
+    freebsd2.2*)
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      ;;
+    freebsd2*)
+      hardcode_direct=yes
+      hardcode_minus_L=yes
+      ;;
+    freebsd*)
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      ;;
+    hpux9*)
+      hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+      hardcode_libdir_separator=:
+      hardcode_direct=yes
+      # hardcode_minus_L: Not really in the search PATH,
+      # but as the default location of the library.
+      hardcode_minus_L=yes
+      ;;
+    hpux10* | hpux11*)
+      if test "$with_gnu_ld" = no; then
+        case "$host_cpu" in
+          hppa*64*)
+            hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+            hardcode_libdir_separator=:
+            hardcode_direct=no
+            ;;
+          ia64*)
+            hardcode_libdir_flag_spec='-L$libdir'
+            hardcode_direct=no
+            # hardcode_minus_L: Not really in the search PATH,
+            # but as the default location of the library.
+            hardcode_minus_L=yes
+            ;;
+          *)
+            hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+            hardcode_libdir_separator=:
+            hardcode_direct=yes
+            # hardcode_minus_L: Not really in the search PATH,
+            # but as the default location of the library.
+            hardcode_minus_L=yes
+            ;;
+        esac
+      fi
+      ;;
+    irix5* | irix6* | nonstopux*)
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      ;;
+    netbsd*)
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      ;;
+    newsos6)
+      hardcode_direct=yes
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      ;;
+    openbsd*)
+      hardcode_direct=yes
+      if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+        hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+      else
+        case "$host_os" in
+          openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
+            hardcode_libdir_flag_spec='-R$libdir'
+            ;;
+          *)
+            hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+            ;;
+        esac
+      fi
+      ;;
+    os2*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_minus_L=yes
+      ;;
+    osf3*)
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      ;;
+    osf4* | osf5*)
+      if test "$GCC" = yes; then
+        hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      else
+        # Both cc and cxx compiler support -rpath directly
+        hardcode_libdir_flag_spec='-rpath $libdir'
+      fi
+      hardcode_libdir_separator=:
+      ;;
+    sco3.2v5*)
+      ;;
+    solaris*)
+      hardcode_libdir_flag_spec='-R$libdir'
+      ;;
+    sunos4*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_direct=yes
+      hardcode_minus_L=yes
+      ;;
+    sysv4)
+      case $host_vendor in
+        sni)
+          hardcode_direct=yes # is this really true???
+          ;;
+        siemens)
+          hardcode_direct=no
+          ;;
+        motorola)
+          hardcode_direct=no #Motorola manual says yes, but my tests say they lie
+          ;;
+      esac
+      ;;
+    sysv4.3*)
+      ;;
+    sysv4*MP*)
+      if test -d /usr/nec; then
+        ld_shlibs=yes
+      fi
+      ;;
+    sysv4.2uw2*)
+      hardcode_direct=yes
+      hardcode_minus_L=no
+      ;;
+    sysv5OpenUNIX8* | sysv5UnixWare7* |  sysv5uw[78]* | unixware7*)
+      ;;
+    sysv5*)
+      hardcode_libdir_flag_spec=
+      ;;
+    uts4*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      ;;
+    *)
+      ld_shlibs=no
+      ;;
+  esac
+fi
+
+# Check dynamic linker characteristics
+# Code taken from libtool.m4's AC_LIBTOOL_SYS_DYNAMIC_LINKER.
+libname_spec='lib$name'
+case "$host_os" in
+  aix3*)
+    ;;
+  aix[4-9]*)
+    ;;
+  amigaos*)
+    ;;
+  beos*)
+    ;;
+  bsdi4*)
+    ;;
+  cygwin* | mingw* | pw32*)
+    shrext=.dll
+    ;;
+  darwin* | rhapsody*)
+    shrext=.dylib
+    ;;
+  dgux*)
+    ;;
+  freebsd*)
+    ;;
+  gnu*)
+    ;;
+  hpux9* | hpux10* | hpux11*)
+    case "$host_cpu" in
+      ia64*)
+        shrext=.so
+        ;;
+      hppa*64*)
+        shrext=.sl
+        ;;
+      *)
+        shrext=.sl
+        ;;
+    esac
+    ;;
+  irix5* | irix6* | nonstopux*)
+    case "$host_os" in
+      irix5* | nonstopux*)
+        libsuff= shlibsuff=
+        ;;
+      *)
+        case $LD in
+          *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= ;;
+          *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 ;;
+          *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 ;;
+          *) libsuff= shlibsuff= ;;
+        esac
+        ;;
+    esac
+    ;;
+  linux*oldld* | linux*aout* | linux*coff*)
+    ;;
+  linux*)
+    ;;
+  netbsd*)
+    ;;
+  newsos6)
+    ;;
+  nto-qnx)
+    ;;
+  openbsd*)
+    ;;
+  os2*)
+    libname_spec='$name'
+    shrext=.dll
+    ;;
+  osf3* | osf4* | osf5*)
+    ;;
+  sco3.2v5*)
+    ;;
+  solaris*)
+    ;;
+  sunos4*)
+    ;;
+  sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+    ;;
+  sysv4*MP*)
+    ;;
+  uts4*)
+    ;;
+esac
+
+sed_quote_subst='s/\(["`$\\]\)/\\\1/g'
+escaped_wl=`echo "X$wl" | sed -e 's/^X//' -e "$sed_quote_subst"`
+shlibext=`echo "$shrext" | sed -e 's,^\.,,'`
+escaped_hardcode_libdir_flag_spec=`echo "X$hardcode_libdir_flag_spec" | sed -e 's/^X//' -e "$sed_quote_subst"`
+
+sed -e 's/^\([a-zA-Z0-9_]*\)=/acl_cv_\1=/' <<EOF
+
+# How to pass a linker flag through the compiler.
+wl="$escaped_wl"
+
+# Static library suffix (normally "a").
+libext="$libext"
+
+# Shared library suffix (normally "so").
+shlibext="$shlibext"
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist.
+hardcode_libdir_flag_spec="$escaped_hardcode_libdir_flag_spec"
+
+# Whether we need a single -rpath flag with a separated argument.
+hardcode_libdir_separator="$hardcode_libdir_separator"
+
+# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the
+# resulting binary.
+hardcode_direct="$hardcode_direct"
+
+# Set to yes if using the -LDIR flag during linking hardcodes DIR into the
+# resulting binary.
+hardcode_minus_L="$hardcode_minus_L"
+
+EOF
diff --git a/configure.ac b/configure.ac
index b4c1799..a8874e7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([jellyfish], [2.2.6], [gmarcais at umd.edu])
+AC_INIT([jellyfish], [2.2.7], [gmarcais at umd.edu])
 AC_CANONICAL_HOST
 AC_CONFIG_MACRO_DIR([m4])
 AM_INIT_AUTOMAKE([subdir-objects foreign parallel-tests color-tests])
@@ -6,6 +6,8 @@ AM_SILENT_RULES([yes])
 AC_CONFIG_SRCDIR([jellyfish])
 AC_CONFIG_HEADERS([config.h])
 AC_PROG_LIBTOOL
+AC_LIB_RPATH
+PKG_PROG_PKG_CONFIG
 
 # Change default compilation flags
 AC_SUBST([ALL_CXXFLAGS], [-std=c++0x])
@@ -16,6 +18,18 @@ AC_PROG_CXX
 # Major version of the library
 AC_SUBST([PACKAGE_LIB], [2.0])
 
+# Try to find htslib to read SAM/BAM/CRAM files
+AC_ARG_ENABLE([htslib],
+              [AS_HELP_STRING([--enable-htslib], [Look for the HTS library (default=yes)])])
+echo "enable_htslib $enable_htslib"
+AS_IF([test "x$enable_htslib" = "xyes" -o "x$enable_htslib" = "x"],
+      [PKG_CHECK_MODULES([HTSLIB], [htslib], [AC_DEFINE([HAVE_HTSLIB], [1], [Defined if htslib is available])], [true])]
+      [AC_LIB_LINKFLAGS_FROM_LIBS([HTSLIB_RPATH], [$HTSLIB_LIBS], [LIBTOOL])])
+# Look for sammtools
+AC_ARG_VAR([SAMTOOLS], [Path to samtools program])
+AS_IF([test "x$SAMTOOLS" = "x"],
+      [AC_PATH_PROG([SAMTOOLS], [samtools])])
+
 # Check for md5 or md5sum
 AC_ARG_VAR([MD5], [Path to md5 hashing program])
 AS_IF([test "x$MD5" = "x"], AC_CHECK_PROG([MD5], [md5sum], [md5sum]), [])
@@ -40,7 +54,7 @@ AC_ARG_WITH([sse],
             [AS_HELP_STRING([--with-sse], [enable SSE])],
             [], [with_sse=yes])
 AS_IF([test "x$with_sse" != xno],
-      [AC_DEFINE([HAVE_SSE], [1], [Define if you have SSE])],
+      [AX_EXT],
       [])
 
 # Use valgrind to check memory allocation with mmap
@@ -101,6 +115,9 @@ AC_ARG_ENABLE([ruby-binding],
 # --enable-perl-binding
 AC_ARG_ENABLE([perl-binding],
               [AC_HELP_STRING([--enable-perl-binding@<:@=PATH@:>@], [create SWIG perl module and install in PATH])])
+# --enable-all-binding
+AC_ARG_ENABLE([all-binding],
+              [AC_HELP_STRING([--enable-all-binding@<:@=PATH@:>@], [create SWIG module for all languages and install in PATH])])
 
 # --enable-swig
 AC_ARG_ENABLE([swig],
@@ -112,18 +129,21 @@ AS_IF([test -n "$SWIG"],
 AM_CONDITIONAL([HAVE_SWIG], [test -n "$SWIG"])
 
 # Python binding setup
+AS_IF([test -z "$enable_python_binding"], [enable_python_binding="$enable_all_binding"])
 AM_CONDITIONAL(PYTHON_BINDING, [test -n "$enable_python_binding" -a x$enable_python_binding != xno])
 AM_COND_IF([PYTHON_BINDING],
            [AS_IF([test x$enable_python_binding != xyes], [PYTHON_SITE_PKG=$enable_python_binding])]
            [AX_PYTHON_DEVEL([], [$prefix])])
 
 # Ruby binding setup
+AS_IF([test -z "$enable_ruby_binding"], [enable_ruby_binding="$enable_all_binding"])
 AM_CONDITIONAL([RUBY_BINDING], [test -n "$enable_ruby_binding" -a x$enable_ruby_binding != xno])
 AM_COND_IF([RUBY_BINDING],
            [AS_IF([test x$enable_ruby_binding != xyes], [RUBY_EXT_LIB=$enable_ruby_binding])]
            [AX_RUBY_EXT([$prefix])])
 
 # Perl binding setup
+AS_IF([test -z "$enable_perl_binding"], [enable_perl_binding="$enable_all_binding"])
 AM_CONDITIONAL([PERL_BINDING], [test -n "$enable_perl_binding" -a x$enable_perl_binding != xno])
 AM_COND_IF([PERL_BINDING],
            [AS_IF([test x$enable_perl_binding != xyes], [PERL_EXT_LIB=$enable_perl_binding])]
diff --git a/examples/count_in_file/Makefile b/examples/count_in_file/Makefile
index 4b6358c..edb8c7c 100644
--- a/examples/count_in_file/Makefile
+++ b/examples/count_in_file/Makefile
@@ -1,6 +1,7 @@
 CC = g++
 CXXFLAGS = $(shell pkg-config --cflags jellyfish-2.0) -std=c++0x -Wall -O3
-LDFLAGS = $(shell pkg-config --libs jellyfish-2.0) -Wl,--rpath=$(shell pkg-config --libs-only-L jellyfish-2.0 | sed -e 's/-L//g')
+LDFLAGS = -Wl,--rpath=$(shell pkg-config --libs-only-L jellyfish-2.0 | sed -e 's/-L//g')
+LDLIBS = $(shell pkg-config --libs jellyfish-2.0)
 
 all: count_in_file
 clean:
diff --git a/examples/jf_count_dump/Makefile b/examples/jf_count_dump/Makefile
index 2880b13..b37a2c0 100644
--- a/examples/jf_count_dump/Makefile
+++ b/examples/jf_count_dump/Makefile
@@ -1,6 +1,7 @@
 CC = g++
 CXXFLAGS = $(shell pkg-config --cflags jellyfish-2.0) -std=c++0x -Wall -O3
-LDFLAGS = $(shell pkg-config --libs jellyfish-2.0) -Wl,--rpath=$(shell pkg-config --libs-only-L jellyfish-2.0 | sed -e 's/-L//g')
+LDFLAGS = -Wl,--rpath=$(shell pkg-config --libs-only-L jellyfish-2.0 | sed -e 's/-L//g')
+LDLIBS = $(shell pkg-config --libs jellyfish-2.0)
 
 all: jf_count_dump
 clean:
diff --git a/examples/query_per_sequence/Makefile b/examples/query_per_sequence/Makefile
index b9d824d..fd199c9 100644
--- a/examples/query_per_sequence/Makefile
+++ b/examples/query_per_sequence/Makefile
@@ -1,6 +1,7 @@
 CC = g++
 CXXFLAGS = $(shell pkg-config --cflags jellyfish-2.0) -std=c++0x -Wall -O3
-LDFLAGS = $(shell pkg-config --libs jellyfish-2.0) -Wl,--rpath=$(shell pkg-config --libs-only-L jellyfish-2.0 | sed -e 's/-L//g')
+LDFLAGS = -Wl,--rpath=$(shell pkg-config --libs-only-L jellyfish-2.0 | sed -e 's/-L//g')
+LDLIBS = $(shell pkg-config --libs jellyfish-2.0)
 
 all: query_per_sequence
 query_per_sequence: query_per_sequence.cc sequence_mers.hpp
diff --git a/include/jellyfish/mer_overlap_sequence_parser.hpp b/include/jellyfish/mer_overlap_sequence_parser.hpp
index 488cd83..2932042 100644
--- a/include/jellyfish/mer_overlap_sequence_parser.hpp
+++ b/include/jellyfish/mer_overlap_sequence_parser.hpp
@@ -24,6 +24,7 @@
 #include <jellyfish/err.hpp>
 #include <jellyfish/cooperative_pool2.hpp>
 #include <jellyfish/cpp_array.hpp>
+#include <jellyfish/parser_common.hpp>
 
 namespace jellyfish {
 
@@ -35,8 +36,6 @@ struct sequence_ptr {
 template<typename StreamIterator>
 class mer_overlap_sequence_parser : public jellyfish::cooperative_pool2<mer_overlap_sequence_parser<StreamIterator>, sequence_ptr> {
   typedef jellyfish::cooperative_pool2<mer_overlap_sequence_parser<StreamIterator>, sequence_ptr> super;
-  enum file_type { DONE_TYPE, FASTA_TYPE, FASTQ_TYPE };
-  typedef std::unique_ptr<std::istream> stream_type;
 
   struct stream_status {
     char*       seam;
@@ -104,11 +103,16 @@ public:
     case FASTQ_TYPE:
       read_fastq(st, buff);
       break;
+#ifdef HAVE_HTSLIB
+    case SAM_TYPE:
+      read_sam(st, buff);
+      break;
+#endif
     case DONE_TYPE:
       return true;
     }
 
-    if(st.stream->good())
+    if(st.stream.good())
       return false;
 
     // Reach the end of file, close current and try to open the next one
@@ -128,31 +132,42 @@ protected:
     // requesting a new one.
     st.stream.reset();
     st.stream = streams_iterator_.next();
-    if(!st.stream) {
+    if(!st.stream.good()) {
       st.type = DONE_TYPE;
       return false;
     }
 
     ++files_read_;
-    switch(st.stream->peek()) {
-    case EOF: return open_next_file(st);
-    case '>':
-      st.type = FASTA_TYPE;
-      ignore_line(*st.stream); // Pass header
-      ++reads_read_;
-      break;
-    case '@':
-      st.type = FASTQ_TYPE;
-      ignore_line(*st.stream); // Pass header
-      ++reads_read_;
-      break;
-    default:
-      throw std::runtime_error("Unsupported format"); // Better error management
+    if(st.stream.standard) {
+      switch(st.stream.standard->peek()) {
+      case EOF: return open_next_file(st);
+      case '>':
+        st.type = FASTA_TYPE;
+        ignore_line(*st.stream.standard); // Pass header
+        ++reads_read_;
+        break;
+      case '@':
+        st.type = FASTQ_TYPE;
+        ignore_line(*st.stream.standard); // Pass header
+        ++reads_read_;
+        break;
+      default:
+        throw std::runtime_error("Unsupported format"); // Better error management
+      }
+    }
+#ifdef HAVE_HTSLIB
+    else if(st.stream.sam) {
+      st.type = SAM_TYPE;
+    }
+#endif
+    else {
+      st.type = DONE_TYPE;
     }
     return true;
   }
 
   void read_fasta(stream_status& st, sequence_ptr& buff) {
+    auto& stream = *st.stream.standard;
     size_t read = 0;
     if(st.have_seam) {
       memcpy(buff.start, st.seam, mer_len_ - 1);
@@ -161,11 +176,12 @@ protected:
 
     // Here, the current stream is assumed to always point to some
     // sequence (or EOF). Never at header.
-    while(st.stream->good() && read < buf_size_ - mer_len_ - 1) {
-      read += read_sequence(*st.stream, read, buff.start, '>');
-      if(st.stream->peek() == '>') {
-        *(buff.start + read++) = 'N'; // Add N between reads
-        ignore_line(*st.stream); // Skip to next sequence (skip headers, quals, ...)
+    while(stream.good() && read < buf_size_ - mer_len_ - 1) {
+      read += read_sequence(stream, read, buff.start, '>');
+      if(stream.peek() == '>') {
+        if(read > 0)
+          *(buff.start + read++) = 'N'; // Add N between reads
+        ignore_line(stream); // Skip to next sequence (skip headers, quals, ...)
         ++reads_read_;
       }
     }
@@ -177,6 +193,7 @@ protected:
   }
 
   void read_fastq(stream_status& st, sequence_ptr& buff) {
+    auto& stream = *st.stream.standard;
     size_t read = 0;
     if(st.have_seam) {
       memcpy(buff.start, st.seam, mer_len_ - 1);
@@ -185,15 +202,15 @@ protected:
 
     // Here, the st.stream is assumed to always point to some
     // sequence (or EOF). Never at header.
-    while(st.stream->good() && read < buf_size_ - mer_len_ - 1) {
-      size_t nread  = read_sequence(*st.stream, read, buff.start, '+');
+    while(stream.good() && read < buf_size_ - mer_len_ - 1) {
+      size_t nread  = read_sequence(stream, read, buff.start, '+');
       read         += nread;
       st.seq_len   += nread;
-      if(st.stream->peek() == '+') {
-        skip_quals(*st.stream, st.seq_len);
-        if(st.stream->good()) {
+      if(stream.peek() == '+') {
+        skip_quals(stream, st.seq_len);
+        if(stream.good()) {
           *(buff.start + read++) = 'N'; // Add N between reads
-          ignore_line(*st.stream); // Skip sequence header
+          ignore_line(stream); // Skip sequence header
           ++reads_read_;
         }
         st.seq_len = 0;
@@ -206,6 +223,42 @@ protected:
       memcpy(st.seam, buff.end - mer_len_ + 1, mer_len_ - 1);
   }
 
+#ifdef HAVE_HTSLIB
+  void read_sam(stream_status& st, sequence_ptr& buff) {
+    auto&        stream    = *st.stream.sam;
+    size_t read = 0;
+    if(st.have_seam) {
+      memcpy(buff.start, st.seam, mer_len_ - 1);
+      read = mer_len_ - 1;
+    }
+
+    // std.seq_len is the amount of sequence left in the stream buffer
+    // to read. When st.seq_len==0, we need to get the next sequence
+    // from the stream.
+    auto seq = buff.start;
+    while(read < buf_size_ - mer_len_ - 1) {
+      if(st.seq_len == 0) {
+        if(stream.next() < 0)
+          break;
+        st.seq_len = stream.seq_len();
+        if(read > 0)
+          seq[read++] = 'N';
+        ++reads_read_;
+      }
+      const size_t start = stream.seq_len() - st.seq_len;
+      const size_t limit = std::min(st.seq_len, buf_size_ - 1 - read) + start;
+      for(size_t i = start; i < limit; ++i, ++read)
+        seq[read] = stream.base(i);
+      st.seq_len -= (limit - start);
+    }
+    buff.end = buff.start + read;
+
+    st.have_seam = read >= (size_t)(mer_len_ - 1);
+    if(st.have_seam)
+      memcpy(st.seam, buff.end - mer_len_ + 1, mer_len_ - 1);
+  }
+#endif
+
   size_t read_sequence(std::istream& is, const size_t read, char* const start, const char stop) {
     size_t nread = read;
 
diff --git a/include/jellyfish/parser_common.hpp b/include/jellyfish/parser_common.hpp
new file mode 100644
index 0000000..9741880
--- /dev/null
+++ b/include/jellyfish/parser_common.hpp
@@ -0,0 +1,61 @@
+/*  This file is part of Jellyfish.
+
+    Jellyfish is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Jellyfish is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Jellyfish.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __JELLYFISH_PARSER_COMMON_H__
+#define __JELLYFISH_PARSER_COMMON_H__
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <memory>
+
+#include <jellyfish/sam_format.hpp>
+
+namespace jellyfish {
+enum file_type { DONE_TYPE
+                 , FASTA_TYPE
+                 , FASTQ_TYPE
+#ifdef HAVE_HTSLIB
+                 , SAM_TYPE
+#endif
+};
+
+struct stream_type {
+  std::unique_ptr<std::istream> standard;
+#ifdef HAVE_HTSLIB
+  std::unique_ptr<sam_wrapper> sam;
+#endif
+
+  bool good() {
+    return (standard && standard->good())
+#ifdef HAVE_HTSLIB
+      || (sam && sam->good())
+#endif
+      ;
+  }
+
+  void reset() {
+    standard.reset();
+#ifdef HAVE_HTSLIB
+    sam.reset();
+#endif
+  }
+};
+
+}
+
+#endif // __JELLYFISH_PARSER_COMMON_H__
diff --git a/include/jellyfish/sam_format.hpp b/include/jellyfish/sam_format.hpp
new file mode 100644
index 0000000..50e1b22
--- /dev/null
+++ b/include/jellyfish/sam_format.hpp
@@ -0,0 +1,105 @@
+/*  This file is part of Jellyfish.
+
+    Jellyfish is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Jellyfish is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Jellyfish.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __JELYFISH_SAM_FORMAT_H__
+#define __JELYFISH_SAM_FORMAT_H__
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef HAVE_HTSLIB
+#include <htslib/sam.h>
+
+namespace jellyfish {
+
+// Manage a sam_stream
+class sam_wrapper {
+protected:
+  htsFile*   m_fd;
+  bam_hdr_t* m_header;
+  bam1_t*    m_record;
+  uint8_t*   m_seq;
+  uint8_t*   m_qual;
+  bool       m_eof;
+
+public:
+  sam_wrapper()
+    : m_fd(nullptr)
+    , m_header(nullptr)
+    , m_record(nullptr)
+    , m_eof(false)
+  { }
+
+  sam_wrapper(const char* path)
+    : m_fd(sam_open(path, "r"))
+    , m_header(m_fd ? sam_hdr_read(m_fd) : nullptr)
+    , m_record(m_header ? bam_init1() : nullptr)
+    , m_eof()
+  { }
+
+  bool fail() const {
+    return !m_fd || !m_header || !m_record;
+  }
+
+  // True if stream is good
+  bool good() const {
+    return !fail() && !m_eof;
+  }
+
+  operator bool() const { return !fail(); }
+
+  // Read next record. Return >= 0 if successful. <0 otherwise.
+  int next() {
+    const auto res = sam_read1(m_fd, m_header, m_record);
+    if(res >= 0) {
+      m_seq  = bam_get_seq(m_record);
+      m_qual = bam_get_qual(m_record);
+    } else {
+      m_eof = true;
+    }
+    return res;
+  }
+
+  // Query the content of the record. Valid only after a successful
+  // next()
+  const bam1_t* record() { return m_record; }
+  ssize_t seq_len() const { return m_record->core.l_qseq; }
+  inline static char decode(int x) {
+    switch(x) {
+    case 1: return 'A';
+    case 2: return 'C';
+    case 4: return 'G';
+    case 8: return 'T';
+    default: return 'N';
+    }
+  }
+  char base(ssize_t i) const { return decode(bam_seqi(m_seq, i)); }
+  char qual(ssize_t i) const { return m_qual[i]; }
+  char* qname_str() const { return bam_get_qname(m_record); }
+  size_t qname_length() const { return m_record->core.l_qname; }
+  std::string qname() const { return std::string(qname_str(), qname_length()); }
+
+  virtual ~sam_wrapper() {
+    if(m_record) bam_destroy1(m_record);
+    if(m_header) bam_hdr_destroy(m_header);
+    if(m_fd) sam_close(m_fd);
+  }
+};
+
+}  // jellyfish
+#endif // HAVE_HTSLIB
+#endif // __JELYFISH_SAM_FORMAT_H__
diff --git a/include/jellyfish/stream_manager.hpp b/include/jellyfish/stream_manager.hpp
index 5b31708..71e7424 100644
--- a/include/jellyfish/stream_manager.hpp
+++ b/include/jellyfish/stream_manager.hpp
@@ -24,6 +24,7 @@
 
 #include <jellyfish/locks_pthread.hpp>
 #include <jellyfish/err.hpp>
+#include <jellyfish/parser_common.hpp>
 
 namespace jellyfish {
 template<typename PathIterator>
@@ -62,9 +63,28 @@ class stream_manager {
   };
   friend class pipe_stream;
 
-  typedef std::unique_ptr<std::istream> stream_type;
+#ifdef HAVE_HTSLIB
+  /// A wrapper around a SAM file wrapper.
+  class sam_stream : public sam_wrapper {
+    const char*     path_;
+    stream_manager& manager_;
+  public:
+    sam_stream(const char* path, stream_manager& manager)
+      : sam_wrapper(path)
+      , path_(path)
+      , manager_(manager)
+    {
+      manager_.take_file();
+    }
+    virtual ~sam_stream() { manager_.release_file(); }
+  };
+  friend class sam_stream;
+#endif
 
   PathIterator           paths_cur_, paths_end_;
+#ifdef HAVE_HTSLIB
+  PathIterator           sams_cur_, sams_end_;
+#endif
   int                    files_open_;
   const int              concurrent_files_;
   std::list<const char*> free_pipes_;
@@ -74,27 +94,61 @@ class stream_manager {
 public:
   define_error_class(Error);
 
-  stream_manager(PathIterator paths_begin, PathIterator paths_end, int concurrent_files = 1) :
-    paths_cur_(paths_begin), paths_end_(paths_end),
-    files_open_(0),
-    concurrent_files_(concurrent_files)
+  explicit stream_manager(int concurrent_files = 1)
+    : paths_cur_(PathIterator()), paths_end_(PathIterator())
+#ifdef HAVE_HTSLIB
+    , sams_cur_(PathIterator()), sams_end_(PathIterator())
+#endif
+    , files_open_(0)
+    , concurrent_files_(concurrent_files)
+  { }
+  stream_manager(PathIterator paths_begin, PathIterator paths_end, int concurrent_files = 1)
+    : paths_cur_(paths_begin), paths_end_(paths_end)
+#ifdef HAVE_HTSLIB
+    , sams_cur_(PathIterator()), sams_end_(PathIterator())
+#endif
+    , files_open_(0)
+    , concurrent_files_(concurrent_files)
   { }
 
   stream_manager(PathIterator paths_begin, PathIterator paths_end,
                  PathIterator pipe_begin, PathIterator pipe_end,
-                 int concurrent_files = 1) :
-    paths_cur_(paths_begin), paths_end_(paths_end),
-    files_open_(0),
-    concurrent_files_(concurrent_files),
-    free_pipes_(pipe_begin, pipe_end)
+                 int concurrent_files = 1)
+    : paths_cur_(paths_begin), paths_end_(paths_end)
+#ifdef HAVE_HTSLIB
+    , sams_cur_(PathIterator()), sams_end_(PathIterator())
+#endif
+    , files_open_(0)
+    , concurrent_files_(concurrent_files)
+    , free_pipes_(pipe_begin, pipe_end)
   { }
 
+  void paths(PathIterator paths_begin, PathIterator paths_end) {
+    paths_cur_ = paths_begin;
+    paths_end_ = paths_end;
+  }
+
+  void pipes(PathIterator pipe_begin, PathIterator pipe_end) {
+    free_pipes_.assign(pipe_begin, pipe_end);
+  }
+
+#ifdef HAVE_HTSLIB
+  void sams(PathIterator sam_begin, PathIterator sam_end) {
+    sams_cur_ = sam_begin;
+    sams_end_ = sam_end;
+  }
+#endif
+
   stream_type next() {
     locks::pthread::mutex_lock lock(mutex_);
     stream_type res;
     open_next_file(res);
-    if(!res)
-      open_next_pipe(res);
+    if(res.standard) return res;
+    open_next_pipe(res);
+    if(res.standard) return res;
+#ifdef HAVE_HTSLIB
+    open_next_sam(res);
+#endif
     return res;
   }
 
@@ -111,26 +165,42 @@ protected:
     while(paths_cur_ != paths_end_) {
       std::string path = *paths_cur_;
       ++paths_cur_;
-      res.reset(new file_stream(path.c_str(), *this));
-      if(res->good())
+      res.standard.reset(new file_stream(path.c_str(), *this));
+      if(res.standard->good())
         return;
-      res.reset();
+      res.standard.reset();
       throw std::runtime_error(err::msg() << "Can't open file '" << path << "'");
     }
   }
 
+#ifdef HAVE_HTSLIB
+  void open_next_sam(stream_type& res) {
+    if(files_open_ >= concurrent_files_)
+      return;
+    while(sams_cur_ != sams_end_) {
+      std::string path = *sams_cur_;
+      ++sams_cur_;
+      res.sam.reset(new sam_stream(path.c_str(), *this));
+      if(res.sam->good())
+        return;
+      res.sam.reset();
+      throw std::runtime_error(err::msg() << "Can't open SAM file '" << path << '\'');
+    }
+  }
+#endif
+
   void open_next_pipe(stream_type& res) {
     while(!free_pipes_.empty()) {
       const char* path = free_pipes_.front();
       free_pipes_.pop_front();
-      res.reset(new pipe_stream(path, *this));
-      if(res->good()) {
+      res.standard.reset(new pipe_stream(path, *this));
+      if(res.standard->good()) {
         busy_pipes_.insert(path);
         return;
       }
       // The pipe failed to open, so it is not marked as busy. This
       // reset will make us forget about this path.
-      res.reset();
+      res.standard.reset();
     }
   }
 
diff --git a/include/jellyfish/whole_sequence_parser.hpp b/include/jellyfish/whole_sequence_parser.hpp
index 46b0520..b12952e 100644
--- a/include/jellyfish/whole_sequence_parser.hpp
+++ b/include/jellyfish/whole_sequence_parser.hpp
@@ -7,6 +7,7 @@
 #include <jellyfish/err.hpp>
 #include <jellyfish/cooperative_pool2.hpp>
 #include <jellyfish/cpp_array.hpp>
+#include <jellyfish/parser_common.hpp>
 
 namespace jellyfish {
 struct header_sequence_qual {
@@ -22,8 +23,6 @@ struct sequence_list {
 template<typename StreamIterator>
 class whole_sequence_parser : public jellyfish::cooperative_pool2<whole_sequence_parser<StreamIterator>, sequence_list> {
   typedef jellyfish::cooperative_pool2<whole_sequence_parser<StreamIterator>, sequence_list> super;
-  typedef std::unique_ptr<std::istream> stream_type;
-  enum file_type { DONE_TYPE, FASTA_TYPE, FASTQ_TYPE };
 
   struct stream_status {
     file_type   type;
@@ -70,11 +69,16 @@ public:
     case FASTQ_TYPE:
       read_fastq(st, buff);
       break;
+#ifdef HAVE_HTSLIB
+    case SAM_TYPE:
+      read_sam(st, buff);
+      break;
+#endif
     case DONE_TYPE:
       return true;
     }
 
-    if(st.stream->good())
+    if(st.stream.good())
       return false;
 
     // Reach the end of file, close current and try to open the next one
@@ -89,7 +93,7 @@ protected:
   void open_next_file(stream_status& st) {
     st.stream.reset();
     st.stream = streams_iterator_.next();
-    if(!st.stream) {
+    if(!st.stream.good()) {
       st.type = DONE_TYPE;
       return;
     }
@@ -97,32 +101,43 @@ protected:
     ++files_read_;
     // Update the type of the current file and move past first header
     // to beginning of sequence.
-    switch(st.stream->peek()) {
-    case EOF: return open_next_file(st);
-    case '>':
-      st.type = FASTA_TYPE;
-      break;
-    case '@':
-      st.type = FASTQ_TYPE;
-      break;
-    default:
-      throw std::runtime_error("Unsupported format"); // Better error management
+    if(st.stream.standard) {
+      switch(st.stream.standard->peek()) {
+      case EOF: return open_next_file(st);
+      case '>':
+        st.type = FASTA_TYPE;
+        break;
+      case '@':
+        st.type = FASTQ_TYPE;
+        break;
+      default:
+        throw std::runtime_error("Unsupported format"); // Better error management
+      }
+    }
+#ifdef HAVE_HTSLIB
+    else if(st.stream.sam) {
+      st.type = SAM_TYPE;
+    }
+#endif
+    else {
+      st.type = DONE_TYPE;
     }
   }
 
   void read_fasta(stream_status& st, sequence_list& buff) {
     size_t&      nb_filled = buff.nb_filled;
     const size_t data_size = buff.data.size();
+    auto&        stream    = *st.stream.standard;
 
-    for(nb_filled = 0; nb_filled < data_size && st.stream->peek() != EOF; ++nb_filled) {
+    for(nb_filled = 0; nb_filled < data_size && stream.peek() != EOF; ++nb_filled) {
       ++reads_read_;
       header_sequence_qual& fill_buff = buff.data[nb_filled];
-      st.stream->get(); // Skip '>'
-      std::getline(*st.stream, fill_buff.header);
+      stream.get(); // Skip '>'
+      std::getline(stream, fill_buff.header);
       fill_buff.seq.clear();
-      for(int c = st.stream->peek(); c != '>' && c != EOF; c = st.stream->peek()) {
-        std::getline(*st.stream, st.buffer); // Wish there was an easy way to combine the
-        fill_buff.seq.append(st.buffer);             // two lines avoiding copying
+      for(int c = stream.peek(); c != '>' && c != EOF; c = stream.peek()) {
+        std::getline(stream, st.buffer); // Wish there was an easy way to combine the
+        fill_buff.seq.append(st.buffer); // two lines avoiding copying
       }
     }
   }
@@ -130,38 +145,63 @@ protected:
   void read_fastq(stream_status& st, sequence_list& buff) {
     size_t&      nb_filled = buff.nb_filled;
     const size_t data_size = buff.data.size();
+    auto&        stream    = *st.stream.standard;
 
-    for(nb_filled = 0; nb_filled < data_size && st.stream->peek() != EOF; ++nb_filled) {
+    for(nb_filled = 0; nb_filled < data_size && stream.peek() != EOF; ++nb_filled) {
       ++reads_read_;
       header_sequence_qual& fill_buff = buff.data[nb_filled];
-      st.stream->get(); // Skip '@'
-      std::getline(*st.stream, fill_buff.header);
+      stream.get(); // Skip '@'
+      std::getline(stream, fill_buff.header);
 
-      if(st.stream->peek() != '+')
-        std::getline(*st.stream, fill_buff.seq);
+      if(stream.peek() != '+')
+        std::getline(stream, fill_buff.seq);
       else
         fill_buff.seq.clear();
-      while(st.stream->peek() != '+' && st.stream->peek() != EOF) {
-        std::getline(*st.stream, st.buffer); // Wish there was an easy way to combine the
+      while(stream.peek() != '+' && stream.peek() != EOF) {
+        std::getline(stream, st.buffer); // Wish there was an easy way to combine the
         fill_buff.seq.append(st.buffer);             // two lines avoiding copying
       }
-      if(!st.stream->good())
+      if(!stream.good())
         throw std::runtime_error("Truncated fastq file");
-      st.stream->ignore(std::numeric_limits<std::streamsize>::max(), '\n');
-      if(st.stream->peek() != '+')
-        std::getline(*st.stream, fill_buff.qual);
+      stream.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+      if(stream.peek() != '+')
+        std::getline(stream, fill_buff.qual);
       else
         fill_buff.qual.clear();
-      while(fill_buff.qual.size() < fill_buff.seq.size() && st.stream->good()) {
-        std::getline(*st.stream, st.buffer);
+      while(fill_buff.qual.size() < fill_buff.seq.size() && stream.good()) {
+        std::getline(stream, st.buffer);
         fill_buff.qual.append(st.buffer);
       }
       if(fill_buff.qual.size() != fill_buff.seq.size())
         throw std::runtime_error("Invalid fastq file: wrong number of quals");
-      if(st.stream->peek() != EOF && st.stream->peek() != '@')
+      if(stream.peek() != EOF && stream.peek() != '@')
         throw std::runtime_error("Invalid fastq file: header missing");
+      // std::cerr << nb_filled << ": " << fill_buff.seq << '\n'
+      //           << nb_filled << ": " << fill_buff.qual << '\n';
+    }
+  }
+
+#ifdef HAVE_HTSLIB
+  void read_sam(stream_status& st, sequence_list& buff) {
+    size_t&      nb_filled = buff.nb_filled;
+    const size_t data_size = buff.data.size();
+    auto&        stream    = *st.stream.sam;
+
+    for(nb_filled = 0; nb_filled < data_size && stream.next() >= 0; ++nb_filled) {
+      ++reads_read_;
+      header_sequence_qual& fill_buff = buff.data[nb_filled];
+      fill_buff.header = stream.qname();
+      auto& seq  = fill_buff.seq;
+      auto& qual = fill_buff.qual;
+      seq.resize(stream.seq_len());
+      qual.resize(stream.seq_len());
+      for(ssize_t i = 0; i < stream.seq_len(); ++i) {
+        seq[i] = stream.base(i);
+        qual[i] = stream.qual(i) + '!';
+      }
     }
   }
+#endif
 };
 } // namespace jellyfish
 
diff --git a/jellyfish/fastq2sam.cc b/jellyfish/fastq2sam.cc
new file mode 100644
index 0000000..e152ccf
--- /dev/null
+++ b/jellyfish/fastq2sam.cc
@@ -0,0 +1,47 @@
+#include <fstream>
+#include <string>
+#include <limits>
+
+#include <jellyfish/fastq2sam_cmdline.hpp>
+
+int main(int argc, char* argv[]) {
+  const std::string ext(".fastq");
+  const std::string out_ext(".sam");
+
+  fastq2sam_cmdline args(argc, argv);
+
+  for(const auto& path : args.fastq_arg) {
+    if(path.size() < ext.size() || path.substr(path.size() - ext.size()) != ext)
+      fastq2sam_cmdline::error() << "Input must have '" << ext << "' extension";
+    const std::string out_path = path.substr(0, path.size() - ext.size()) + out_ext;
+
+    std::ifstream is(path);
+    if(!is.good())
+      fastq2sam_cmdline::error() << "Failed to open '" << path << '\'';
+    std::ofstream os(out_path);
+    if(!os.good())
+      fastq2sam_cmdline::error() << "Failed to open '" << out_path << '\'';
+
+    //    os << "@PG\tID:fastq2sam\tPN:fastq2sam\n";
+
+    std::string name, seq, quals;
+    char c = is.get();
+    while(c == '@') {
+      std::getline(is, name);
+      std::getline(is, seq);
+      if((c = is.get()) != '+') break;
+      is.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+      std::getline(is, quals);
+      os << name << "\t4\t*\t0\t0\t*\t*\t0\t0\t" << seq << '\t' << quals << '\n';
+      c = is.get();
+    }
+
+    if(c != EOF)
+      fastq2sam_cmdline::error() << "Input fastq file badly formatted. Unexpected characters '" << c << "' at position " << is.tellg();
+
+    if(!os.good())
+      fastq2sam_cmdline::error() << "Error while writing to file '" << out_path << '\'';
+  }
+
+  return 0;
+}
diff --git a/jellyfish/fastq2sam_cmdline.yaggo b/jellyfish/fastq2sam_cmdline.yaggo
new file mode 100644
index 0000000..65db334
--- /dev/null
+++ b/jellyfish/fastq2sam_cmdline.yaggo
@@ -0,0 +1,8 @@
+purpose "Convert a fastq file to a bare SAM file"
+package "fastq2sam"
+
+
+
+arg("fastq") {
+  description "Fastq files"
+  string; multiple; at_least 1 }
diff --git a/m4/m4-ax_check_compile_flag.m4 b/m4/m4-ax_check_compile_flag.m4
new file mode 100644
index 0000000..ca36397
--- /dev/null
+++ b/m4/m4-ax_check_compile_flag.m4
@@ -0,0 +1,74 @@
+# ===========================================================================
+#   http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
+#
+# DESCRIPTION
+#
+#   Check whether the given FLAG works with the current language's compiler
+#   or gives an error.  (Warnings, however, are ignored)
+#
+#   ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
+#   success/failure.
+#
+#   If EXTRA-FLAGS is defined, it is added to the current language's default
+#   flags (e.g. CFLAGS) when the check is done.  The check is thus made with
+#   the flags: "CFLAGS EXTRA-FLAGS FLAG".  This can for example be used to
+#   force the compiler to issue an error when a bad flag is given.
+#
+#   INPUT gives an alternative input source to AC_COMPILE_IFELSE.
+#
+#   NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
+#   macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Guido U. Draheim <guidod at gmx.de>
+#   Copyright (c) 2011 Maarten Bosmans <mkbosmans at gmail.com>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 4
+
+AC_DEFUN([AX_CHECK_COMPILE_FLAG],
+[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
+AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
+AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
+  ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
+  _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
+  AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
+    [AS_VAR_SET(CACHEVAR,[yes])],
+    [AS_VAR_SET(CACHEVAR,[no])])
+  _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
+AS_VAR_IF(CACHEVAR,yes,
+  [m4_default([$2], :)],
+  [m4_default([$3], :)])
+AS_VAR_POPDEF([CACHEVAR])dnl
+])dnl AX_CHECK_COMPILE_FLAGS
diff --git a/m4/m4-ax_ext.m4 b/m4/m4-ax_ext.m4
new file mode 100644
index 0000000..be9b57f
--- /dev/null
+++ b/m4/m4-ax_ext.m4
@@ -0,0 +1,303 @@
+# ===========================================================================
+#          http://www.gnu.org/software/autoconf-archive/ax_ext.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_EXT
+#
+# DESCRIPTION
+#
+#   Find supported SIMD extensions by requesting cpuid. When a SIMD
+#   extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
+#   compiler supports it. For example, if "sse2" is available then "-msse2"
+#   is added to SIMD_FLAGS.
+#
+#   Find other supported CPU extensions by requesting cpuid. When a
+#   processor extension is found, the -m"extensionname" is added to
+#   CPUEXT_FLAGS if compiler supports it. For example, if "bmi2" is
+#   available then "-mbmi2" is added to CPUEXT_FLAGS.
+#
+#   This macro calls:
+#
+#     AC_SUBST(SIMD_FLAGS)
+#     AC_SUBST(CPUEXT_FLAGS)
+#
+#   And defines:
+#
+#     HAVE_RDRND / HAVE_BMI1 / HAVE_BMI2 / HAVE_ADX / HAVE_MPX
+#     HAVE_PREFETCHWT1 / HAVE_ABM / HAVE_MMX / HAVE_SSE / HAVE_SSE2
+#     HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4_1 / HAVE_SSE4_2 / HAVE_SSE4a
+#     HAVE_SHA / HAVE_AES / HAVE_AVX / HAVE_FMA3 / HAVE_FMA4 / HAVE_XOP
+#     HAVE_AVX2 / HAVE_AVX512_F / HAVE_AVX512_CD / HAVE_AVX512_PF
+#     HAVE_AVX512_ER / HAVE_AVX512_VL / HAVE_AVX512_BW / HAVE_AVX512_DQ
+#     HAVE_AVX512_IFMA / HAVE_AVX512_VBMI
+#
+# LICENSE
+#
+#   Copyright (c) 2007 Christophe Tournayre <turn3r at users.sourceforge.net>
+#   Copyright (c) 2013,2015 Michael Petch <mpetch at capp-sysware.com>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 15
+
+AC_DEFUN([AX_EXT],
+[
+  AC_REQUIRE([AC_CANONICAL_HOST])
+  AC_REQUIRE([AC_PROG_CC])
+
+  CPUEXT_FLAGS=""
+  SIMD_FLAGS=""
+
+  case $host_cpu in
+    powerpc*)
+      AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
+          [
+            if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
+                if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
+                  ax_cv_have_altivec_ext=yes
+                fi
+            fi
+          ])
+
+          if test "$ax_cv_have_altivec_ext" = yes; then
+            AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
+            AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])
+          fi
+    ;;
+
+    i[[3456]]86*|x86_64*|amd64*)
+
+      AC_REQUIRE([AX_GCC_X86_CPUID])
+      AC_REQUIRE([AX_GCC_X86_CPUID_COUNT])
+      AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
+
+      eax_cpuid0=0
+      AX_GCC_X86_CPUID(0x00000000)
+      if test "$ax_cv_gcc_x86_cpuid_0x00000000" != "unknown";
+      then
+        eax_cpuid0=`echo $ax_cv_gcc_x86_cpuid_0x00000000 | cut -d ":" -f 1`
+      fi
+
+      eax_cpuid80000000=0
+      AX_GCC_X86_CPUID(0x80000000)
+      if test "$ax_cv_gcc_x86_cpuid_0x80000000" != "unknown";
+      then
+        eax_cpuid80000000=`echo $ax_cv_gcc_x86_cpuid_0x80000000 | cut -d ":" -f 1`
+      fi
+
+      ecx_cpuid1=0
+      edx_cpuid1=0
+      if test "$((0x$eax_cpuid0))" -ge 1 ; then
+        AX_GCC_X86_CPUID(0x00000001)
+        if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown";
+        then
+          ecx_cpuid1=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
+          edx_cpuid1=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
+        fi
+      fi
+
+      ebx_cpuid7=0
+      ecx_cpuid7=0
+      if test "$((0x$eax_cpuid0))" -ge 7 ; then
+        AX_GCC_X86_CPUID_COUNT(0x00000007, 0x00)
+        if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown";
+        then
+          ebx_cpuid7=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2`
+          ecx_cpuid7=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 3`
+        fi
+      fi
+
+      ecx_cpuid80000001=0
+      edx_cpuid80000001=0
+      if test "$((0x$eax_cpuid80000000))" -ge "$((0x80000001))" ; then
+        AX_GCC_X86_CPUID(0x80000001)
+        if test "$ax_cv_gcc_x86_cpuid_0x80000001" != "unknown";
+        then
+          ecx_cpuid80000001=`echo $ax_cv_gcc_x86_cpuid_0x80000001 | cut -d ":" -f 3`
+          edx_cpuid80000001=`echo $ax_cv_gcc_x86_cpuid_0x80000001 | cut -d ":" -f 4`
+        fi
+      fi
+
+      AC_CACHE_VAL([ax_cv_have_mmx_os_support_ext],
+      [
+        ax_cv_have_mmx_os_support_ext=yes
+      ])
+
+      ax_cv_have_none_os_support_ext=yes
+
+      AC_CACHE_VAL([ax_cv_have_sse_os_support_ext],
+      [
+        ax_cv_have_sse_os_support_ext=no,
+        if test "$((0x$edx_cpuid1>>25&0x01))" = 1; then
+          AC_LANG_PUSH([C])
+          AC_TRY_RUN([
+#include <signal.h>
+#include <stdlib.h>
+            /* No way at ring1 to ring3 in protected mode to check the CR0 and CR4
+               control registers directly. Execute an SSE instruction.
+               If it raises SIGILL then OS doesn't support SSE based instructions */
+            void sig_handler(int signum){ exit(1); }
+            int main(){
+              signal(SIGILL, sig_handler);
+              /* SSE instruction xorps  %xmm0,%xmm0 */
+              __asm__ __volatile__ (".byte 0x0f, 0x57, 0xc0");
+              return 0;
+            }],
+            ax_cv_have_sse_os_support_ext=yes,
+            ax_cv_have_sse_os_support_ext=no,
+            ax_cv_have_sse_os_support_ext=no)
+          AC_LANG_POP([C])
+        fi
+      ])
+
+      xgetbv_eax=0
+      if test "$((0x$ecx_cpuid1>>28&0x01))" = 1; then
+        AX_GCC_X86_AVX_XGETBV(0x00000000)
+
+        if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
+          xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
+        fi
+
+        AC_CACHE_VAL([ax_cv_have_avx_os_support_ext],
+        [
+          ax_cv_have_avx_os_support_ext=no
+          if test "$((0x$ecx_cpuid1>>27&0x01))" = 1; then
+            if test "$((0x$xgetbv_eax&0x6))" = 6; then
+              ax_cv_have_avx_os_support_ext=yes
+            fi
+          fi
+        ])
+      fi
+
+      AC_CACHE_VAL([ax_cv_have_avx512_os_support_ext],
+      [
+        ax_cv_have_avx512_os_support_ext=no
+        if test "$ax_cv_have_avx_os_support_ext" = yes; then
+          if test "$((0x$xgetbv_eax&0xe6))" = "$((0xe6))"; then
+            ax_cv_have_avx512_os_support_ext=yes
+          fi
+        fi
+      ])
+
+      for ac_instr_info dnl
+      in "none;rdrnd;RDRND;ecx_cpuid1,30;-mrdrnd;HAVE_RDRND;CPUEXT_FLAGS" dnl
+         "none;bmi1;BMI1;ebx_cpuid7,3;-mbmi;HAVE_BMI1;CPUEXT_FLAGS" dnl
+         "none;bmi2;BMI2;ebx_cpuid7,8;-mbmi2;HAVE_BMI2;CPUEXT_FLAGS" dnl
+         "none;adx;ADX;ebx_cpuid7,19;-madx;HAVE_ADX;CPUEXT_FLAGS" dnl
+         "none;mpx;MPX;ebx_cpuid7,14;-mmpx;HAVE_MPX;CPUEXT_FLAGS" dnl
+         "none;prefetchwt1;PREFETCHWT1;ecx_cpuid7,0;-mprefetchwt1;HAVE_PREFETCHWT1;CPUEXT_FLAGS" dnl
+         "none;abm;ABM;ecx_cpuid80000001,5;-mabm;HAVE_ABM;CPUEXT_FLAGS" dnl
+         "mmx;mmx;MMX;edx_cpuid1,23;-mmmx;HAVE_MMX;SIMD_FLAGS" dnl
+         "sse;sse;SSE;edx_cpuid1,25;-msse;HAVE_SSE;SIMD_FLAGS" dnl
+         "sse;sse2;SSE2;edx_cpuid1,26;-msse2;HAVE_SSE2;SIMD_FLAGS" dnl
+         "sse;sse3;SSE3;ecx_cpuid1,1;-msse3;HAVE_SSE3;SIMD_FLAGS" dnl
+         "sse;ssse3;SSSE3;ecx_cpuid1,9;-mssse3;HAVE_SSSE3;SIMD_FLAGS" dnl
+         "sse;sse41;SSE4.1;ecx_cpuid1,19;-msse4.1;HAVE_SSE4_1;SIMD_FLAGS" dnl
+         "sse;sse42;SSE4.2;ecx_cpuid1,20;-msse4.2;HAVE_SSE4_2;SIMD_FLAGS" dnl
+         "sse;sse4a;SSE4a;ecx_cpuid80000001,6;-msse4a;HAVE_SSE4a;SIMD_FLAGS" dnl
+         "sse;sha;SHA;ebx_cpuid7,29;-msha;HAVE_SHA;SIMD_FLAGS" dnl
+         "sse;aes;AES;ecx_cpuid1,25;-maes;HAVE_AES;SIMD_FLAGS" dnl
+         "avx;avx;AVX;ecx_cpuid1,28;-mavx;HAVE_AVX;SIMD_FLAGS" dnl
+         "avx;fma3;FMA3;ecx_cpuid1,12;-mfma;HAVE_FMA3;SIMD_FLAGS" dnl
+         "avx;fma4;FMA4;ecx_cpuid80000001,16;-mfma4;HAVE_FMA4;SIMD_FLAGS" dnl
+         "avx;xop;XOP;ecx_cpuid80000001,11;-mxop;HAVE_XOP;SIMD_FLAGS" dnl
+         "avx;avx2;AVX2;ebx_cpuid7,5;-mavx2;HAVE_AVX2;SIMD_FLAGS" dnl
+         "avx512;avx512f;AVX512-F;ebx_cpuid7,16;-mavx512f;HAVE_AVX512_F;SIMD_FLAGS" dnl
+         "avx512;avx512cd;AVX512-CD;ebx_cpuid7,28;-mavx512cd;HAVE_AVX512_CD;SIMD_FLAGS" dnl
+         "avx512;avx512pf;AVX512-PF;ebx_cpuid7,26;-mavx512pf;HAVE_AVX512_PF;SIMD_FLAGS" dnl
+         "avx512;avx512er;AVX512-ER;ebx_cpuid7,27;-mavx512er;HAVE_AVX512_ER;SIMD_FLAGS" dnl
+         "avx512;avx512vl;AVX512-VL;ebx_cpuid7,31;-mavx512vl;HAVE_AVX512_VL;SIMD_FLAGS" dnl
+         "avx512;avx512bw;AVX512-BW;ebx_cpuid7,30;-mavx512bw;HAVE_AVX512_BW;SIMD_FLAGS" dnl
+         "avx512;avx512dq;AVX512-DQ;ebx_cpuid7,17;-mavx512dq;HAVE_AVX512_DQ;SIMD_FLAGS" dnl
+         "avx512;avx512ifma;AVX512-IFMA;ebx_cpuid7,21;-mavx512ifma;HAVE_AVX512_IFMA;SIMD_FLAGS" dnl
+         "avx512;avx512vbmi;AVX512-VBMI;ecx_cpuid7,1;-mavx512vbmi;HAVE_AVX512_VBMI;SIMD_FLAGS" dnl
+         #
+      do ac_instr_os_support=$(eval echo \$ax_cv_have_$(echo $ac_instr_info | cut -d ";" -f 1)_os_support_ext)
+         ac_instr_acvar=$(echo $ac_instr_info | cut -d ";" -f 2)
+         ac_instr_shortname=$(echo $ac_instr_info | cut -d ";" -f 3)
+         ac_instr_chk_loc=$(echo $ac_instr_info | cut -d ";" -f 4)
+         ac_instr_chk_reg=0x$(eval echo \$$(echo $ac_instr_chk_loc | cut -d "," -f 1))
+         ac_instr_chk_bit=$(echo $ac_instr_chk_loc | cut -d "," -f 2)
+         ac_instr_compiler_flags=$(echo $ac_instr_info | cut -d ";" -f 5)
+         ac_instr_have_define=$(echo $ac_instr_info | cut -d ";" -f 6)
+         ac_instr_flag_type=$(echo $ac_instr_info | cut -d ";" -f 7)
+
+         AC_CACHE_CHECK([whether ${ac_instr_shortname} is supported by the processor], [ax_cv_have_${ac_instr_acvar}_cpu_ext],
+         [
+           eval ax_cv_have_${ac_instr_acvar}_cpu_ext=no
+           if test "$((${ac_instr_chk_reg}>>${ac_instr_chk_bit}&0x01))" = 1 ; then
+             eval ax_cv_have_${ac_instr_acvar}_cpu_ext=yes
+           fi
+         ])
+
+         if test x"$(eval echo \$ax_cv_have_${ac_instr_acvar}_cpu_ext)" = x"yes"; then
+           AC_CACHE_CHECK([whether ${ac_instr_shortname} is supported by the processor and OS], [ax_cv_have_${ac_instr_acvar}_ext],
+           [
+             eval ax_cv_have_${ac_instr_acvar}_ext=no
+             if test x"${ac_instr_os_support}" = x"yes"; then
+               eval ax_cv_have_${ac_instr_acvar}_ext=yes
+             fi
+           ])
+
+           if test "$(eval echo \$ax_cv_have_${ac_instr_acvar}_ext)" = yes; then
+             AX_CHECK_COMPILE_FLAG(${ac_instr_compiler_flags}, eval ax_cv_support_${ac_instr_acvar}_ext=yes,
+                                                               eval ax_cv_support_${ac_instr_acvar}_ext=no)
+             if test x"$(eval echo \$ax_cv_support_${ac_instr_acvar}_ext)" = x"yes"; then
+               eval ${ac_instr_flag_type}=\"\$${ac_instr_flag_type} ${ac_instr_compiler_flags}\"
+               AC_DEFINE_UNQUOTED([${ac_instr_have_define}])
+             else
+               AC_MSG_WARN([Your processor and OS supports ${ac_instr_shortname} instructions but not your compiler, can you try another compiler?])
+             fi
+           else
+             if test x"${ac_instr_os_support}" = x"no"; then
+               AC_CACHE_VAL(ax_cv_support_${ac_instr_acvar}_ext, eval ax_cv_support_${ac_instr_acvar}_ext=no)
+               AC_MSG_WARN([Your processor supports ${ac_instr_shortname}, but your OS doesn't])
+             fi
+           fi
+         else
+           AC_CACHE_VAL(ax_cv_have_${ac_instr_acvar}_ext, eval ax_cv_have_${ac_instr_acvar}_ext=no)
+           AC_CACHE_VAL(ax_cv_support_${ac_instr_acvar}_ext, eval ax_cv_support_${ac_instr_acvar}_ext=no)
+         fi
+      done
+  ;;
+  esac
+
+  AH_TEMPLATE([HAVE_RDRND],[Define to 1 to support Digital Random Number Generator])
+  AH_TEMPLATE([HAVE_BMI1],[Define to 1 to support Bit Manipulation Instruction Set 1])
+  AH_TEMPLATE([HAVE_BMI2],[Define to 1 to support Bit Manipulation Instruction Set 2])
+  AH_TEMPLATE([HAVE_ADX],[Define to 1 to support Multi-Precision Add-Carry Instruction Extensions])
+  AH_TEMPLATE([HAVE_MPX],[Define to 1 to support Memory Protection Extensions])
+  AH_TEMPLATE([HAVE_PREFETCHWT1],[Define to 1 to support Prefetch Vector Data Into Caches WT1])
+  AH_TEMPLATE([HAVE_ABM],[Define to 1 to support Advanced Bit Manipulation])
+  AH_TEMPLATE([HAVE_MMX],[Define to 1 to support Multimedia Extensions])
+  AH_TEMPLATE([HAVE_SSE],[Define to 1 to support Streaming SIMD Extensions])
+  AH_TEMPLATE([HAVE_SSE2],[Define to 1 to support Streaming SIMD Extensions])
+  AH_TEMPLATE([HAVE_SSE3],[Define to 1 to support Streaming SIMD Extensions 3])
+  AH_TEMPLATE([HAVE_SSSE3],[Define to 1 to support Supplemental Streaming SIMD Extensions 3])
+  AH_TEMPLATE([HAVE_SSE4_1],[Define to 1 to support Streaming SIMD Extensions 4.1])
+  AH_TEMPLATE([HAVE_SSE4_2],[Define to 1 to support Streaming SIMD Extensions 4.2])
+  AH_TEMPLATE([HAVE_SSE4a],[Define to 1 to support AMD Streaming SIMD Extensions 4a])
+  AH_TEMPLATE([HAVE_SHA],[Define to 1 to support Secure Hash Algorithm Extension])
+  AH_TEMPLATE([HAVE_AES],[Define to 1 to support Advanced Encryption Standard New Instruction Set (AES-NI)])
+  AH_TEMPLATE([HAVE_AVX],[Define to 1 to support Advanced Vector Extensions])
+  AH_TEMPLATE([HAVE_FMA3],[Define to 1 to support  Fused Multiply-Add Extensions 3])
+  AH_TEMPLATE([HAVE_FMA4],[Define to 1 to support Fused Multiply-Add Extensions 4])
+  AH_TEMPLATE([HAVE_XOP],[Define to 1 to support eXtended Operations Extensions])
+  AH_TEMPLATE([HAVE_AVX2],[Define to 1 to support Advanced Vector Extensions 2])
+  AH_TEMPLATE([HAVE_AVX512_F],[Define to 1 to support AVX-512 Foundation Extensions])
+  AH_TEMPLATE([HAVE_AVX512_CD],[Define to 1 to support AVX-512 Conflict Detection Instructions])
+  AH_TEMPLATE([HAVE_AVX512_PF],[Define to 1 to support AVX-512 Conflict Prefetch Instructions])
+  AH_TEMPLATE([HAVE_AVX512_ER],[Define to 1 to support AVX-512 Exponential & Reciprocal Instructions])
+  AH_TEMPLATE([HAVE_AVX512_VL],[Define to 1 to support AVX-512 Vector Length Extensions])
+  AH_TEMPLATE([HAVE_AVX512_BW],[Define to 1 to support AVX-512 Byte and Word Instructions])
+  AH_TEMPLATE([HAVE_AVX512_DQ],[Define to 1 to support AVX-512 Doubleword and Quadword Instructions])
+  AH_TEMPLATE([HAVE_AVX512_IFMA],[Define to 1 to support AVX-512 Integer Fused Multiply Add Instructions])
+  AH_TEMPLATE([HAVE_AVX512_VBMI],[Define to 1 to support AVX-512 Vector Byte Manipulation Instructions])
+  AC_SUBST(SIMD_FLAGS)
+  AC_SUBST(CPUEXT_FLAGS)
+])
diff --git a/m4/m4-ax_gcc_x86_avx_xgetbv.m4 b/m4/m4-ax_gcc_x86_avx_xgetbv.m4
new file mode 100644
index 0000000..2b38bbb
--- /dev/null
+++ b/m4/m4-ax_gcc_x86_avx_xgetbv.m4
@@ -0,0 +1,79 @@
+# ===========================================================================
+#   http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_GCC_X86_AVX_XGETBV
+#
+# DESCRIPTION
+#
+#   On later x86 processors with AVX SIMD support, with gcc or a compiler
+#   that has a compatible syntax for inline assembly instructions, run a
+#   small program that executes the xgetbv instruction with input OP. This
+#   can be used to detect if the OS supports AVX instruction usage.
+#
+#   On output, the values of the eax and edx registers are stored as
+#   hexadecimal strings as "eax:edx" in the cache variable
+#   ax_cv_gcc_x86_avx_xgetbv.
+#
+#   If the xgetbv instruction fails (because you are running a
+#   cross-compiler, or because you are not using gcc, or because you are on
+#   a processor that doesn't have this instruction),
+#   ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
+#
+#   This macro mainly exists to be used in AX_EXT.
+#
+# LICENSE
+#
+#   Copyright (c) 2013 Michael Petch <mpetch at capp-sysware.com>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 2
+
+AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
+[AC_REQUIRE([AC_PROG_CC])
+AC_LANG_PUSH([C])
+AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
+ [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
+     int op = $1, eax, edx;
+     FILE *f;
+      /* Opcodes for xgetbv */
+      __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0"
+        : "=a" (eax), "=d" (edx)
+        : "c" (op));
+     f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
+     fprintf(f, "%x:%x\n", eax, edx);
+     fclose(f);
+     return 0;
+])],
+     [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
+     [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
+     [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
+AC_LANG_POP([C])
+])
diff --git a/m4/m4-ax_gcc_x86_cpuid.m4 b/m4/m4-ax_gcc_x86_cpuid.m4
new file mode 100644
index 0000000..578b3a3
--- /dev/null
+++ b/m4/m4-ax_gcc_x86_cpuid.m4
@@ -0,0 +1,89 @@
+# ===========================================================================
+#     http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_GCC_X86_CPUID(OP)
+#   AX_GCC_X86_CPUID_COUNT(OP, COUNT)
+#
+# DESCRIPTION
+#
+#   On Pentium and later x86 processors, with gcc or a compiler that has a
+#   compatible syntax for inline assembly instructions, run a small program
+#   that executes the cpuid instruction with input OP. This can be used to
+#   detect the CPU type. AX_GCC_X86_CPUID_COUNT takes an additional COUNT
+#   parameter that gets passed into register ECX before calling cpuid.
+#
+#   On output, the values of the eax, ebx, ecx, and edx registers are stored
+#   as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable
+#   ax_cv_gcc_x86_cpuid_OP.
+#
+#   If the cpuid instruction fails (because you are running a
+#   cross-compiler, or because you are not using gcc, or because you are on
+#   a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP
+#   is set to the string "unknown".
+#
+#   This macro mainly exists to be used in AX_GCC_ARCHFLAG.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Steven G. Johnson <stevenj at alum.mit.edu>
+#   Copyright (c) 2008 Matteo Frigo
+#   Copyright (c) 2015 Michael Petch <mpetch at capp-sysware.com>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 9
+
+AC_DEFUN([AX_GCC_X86_CPUID],
+[AX_GCC_X86_CPUID_COUNT($1, 0)
+])
+
+AC_DEFUN([AX_GCC_X86_CPUID_COUNT],
+[AC_REQUIRE([AC_PROG_CC])
+AC_LANG_PUSH([C])
+AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
+ [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
+     int op = $1, level = $2, eax, ebx, ecx, edx;
+     FILE *f;
+      __asm__ __volatile__ ("xchg %%ebx, %1\n"
+        "cpuid\n"
+        "xchg %%ebx, %1\n"
+        : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
+        : "a" (op), "2" (level));
+
+     f = fopen("conftest_cpuid", "w"); if (!f) return 1;
+     fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
+     fclose(f);
+     return 0;
+])],
+     [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
+     [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
+     [ax_cv_gcc_x86_cpuid_$1=unknown])])
+AC_LANG_POP([C])
+])
diff --git a/sub_commands/count_main.cc b/sub_commands/count_main.cc
index 20285e0..5532823 100644
--- a/sub_commands/count_main.cc
+++ b/sub_commands/count_main.cc
@@ -19,6 +19,7 @@
 #include <assert.h>
 #include <signal.h>
 
+#include <cctype>
 #include <iostream>
 #include <fstream>
 #include <string>
@@ -28,6 +29,10 @@
 #include <memory>
 #include <chrono>
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
 #include <jellyfish/err.hpp>
 #include <jellyfish/thread_exec.hpp>
 #include <jellyfish/hash_counter.hpp>
@@ -58,9 +63,10 @@ using jellyfish::mer_dna;
 using jellyfish::mer_dna_bloom_counter;
 using jellyfish::mer_dna_bloom_filter;
 typedef std::vector<const char*> file_vector;
+typedef jellyfish::stream_manager<file_vector::const_iterator> stream_manager_type;
 
 // Types for parsing arbitrary sequence ignoring quality scores
-typedef jellyfish::mer_overlap_sequence_parser<jellyfish::stream_manager<file_vector::const_iterator> > sequence_parser;
+typedef jellyfish::mer_overlap_sequence_parser<stream_manager_type> sequence_parser;
 typedef jellyfish::mer_iterator<sequence_parser, mer_dna> mer_iterator;
 
 // Types for parsing reads with quality score. Interface match type
@@ -80,10 +86,12 @@ public:
 class mer_qual_iterator : public jellyfish::mer_qual_iterator<sequence_qual_parser, mer_dna> {
   typedef jellyfish::mer_qual_iterator<sequence_qual_parser, mer_dna> super;
 public:
+  static char min_qual;
   mer_qual_iterator(sequence_qual_parser& parser, bool canonical = false) :
-    super(parser, args.min_qual_char_arg[0], canonical)
+    super(parser, min_qual, canonical)
   { }
 };
+char mer_qual_iterator::min_qual = '!'; // Phred+33
 
 // k-mer filters. Organized in a linked list, interpreted as a &&
 // (logical and). I.e. all filter must return true for the result to
@@ -123,26 +131,21 @@ struct filter_bf : public filter {
 };
 
 enum OPERATION { COUNT, PRIME, UPDATE };
-template<typename PathIterator, typename MerIteratorType, typename ParserType>
+template<typename MerIteratorType, typename ParserType>
 class mer_counter_base : public jellyfish::thread_exec {
-  int                                     nb_threads_;
-  mer_hash&                               ary_;
-  jellyfish::stream_manager<PathIterator> streams_;
-  ParserType                              parser_;
-  filter*                                 filter_;
-  OPERATION                               op_;
+  int                  nb_threads_;
+  mer_hash&            ary_;
+  ParserType           parser_;
+  filter*              filter_;
+  OPERATION            op_;
 
 public:
-  mer_counter_base(int nb_threads, mer_hash& ary,
-                   PathIterator file_begin, PathIterator file_end,
-                   PathIterator pipe_begin, PathIterator pipe_end,
-                   uint32_t concurent_files,
-                   OPERATION op, filter* filter = new struct filter) :
-    ary_(ary),
-    streams_(file_begin, file_end, pipe_begin, pipe_end, concurent_files),
-    parser_(mer_dna::k(), streams_.nb_streams(), 3 * nb_threads, 4096, streams_),
-    filter_(filter),
-    op_(op)
+  mer_counter_base(int nb_threads, mer_hash& ary, stream_manager_type& streams,
+                   OPERATION op, filter* filter = new struct filter)
+    : ary_(ary)
+    , parser_(mer_dna::k(), streams.nb_streams(), 3 * nb_threads, 4096, streams)
+    , filter_(filter)
+    , op_(op)
   { }
 
   virtual void start(int thid) {
@@ -181,8 +184,8 @@ public:
 };
 
 // Counter with and without quality value
-typedef mer_counter_base<file_vector::const_iterator, mer_iterator, sequence_parser> mer_counter;
-typedef mer_counter_base<file_vector::const_iterator, mer_qual_iterator, sequence_qual_parser> mer_qual_counter;
+typedef mer_counter_base<mer_iterator, sequence_parser> mer_counter;
+typedef mer_counter_base<mer_qual_iterator, sequence_qual_parser> mer_qual_counter;
 
 mer_dna_bloom_counter* load_bloom_filter(const char* path) {
   std::ifstream in(path, std::ios::in|std::ios::binary);
@@ -221,8 +224,35 @@ int count_main(int argc, char *argv[])
 
   args.parse(argc, argv);
 
-  if(args.min_qual_char_given && args.min_qual_char_arg.size() != 1)
-    count_main_cmdline::error("[-Q, --min-qual-char] must be one character.");
+#ifndef HAVE_HTSLIB
+  if(args.sam_given)
+    count_main_cmdline::error() << "SAM/BAM/CRAM not supported (missing htslib).";
+#endif
+
+
+  if(args.min_qual_char_given) {
+    if(args.min_qual_char_arg.size() != 1)
+      count_main_cmdline::error("[-Q, --min-qual-char] must be one character.");
+    const char min_qual = args.min_qual_char_arg[0];
+    if(!isprint(min_qual))
+      count_main_cmdline::error () << "Invalid non-printable quality character";
+    if(min_qual < '!' || min_qual > '~')
+      count_main_cmdline::error() << "Quality character '" << min_qual
+                                  << "' is outside of the range [!, ~]";
+    mer_qual_iterator::min_qual = min_qual;
+  }
+  if(args.min_quality_given) {
+    if(args.quality_start_arg < '!' || args.quality_start_arg > '~')
+      count_main_cmdline::error() << "Quality start " << args.quality_start_arg
+                                  << " is outside the range [" << (int)'!' << ", "
+                                  << (int)'~' << ']';
+    int min_qual = args.quality_start_arg + args.min_quality_arg;
+    if(min_qual < '!' || min_qual > '~')
+      count_main_cmdline::error() << "Min quality " << args.min_quality_arg
+                                  << " is outside the range [0, "
+                                  << ((int)'~' - args.quality_start_arg) << ']';
+    mer_qual_iterator::min_qual = min_qual;
+  }
 
   mer_dna::k(args.mer_len_arg);
 
@@ -256,10 +286,9 @@ int count_main(int argc, char *argv[])
 
   OPERATION do_op = COUNT;
   if(args.if_given) {
-    mer_counter counter(args.threads_arg, ary,
-                        args.if_arg.begin(), args.if_arg.end(),
-                        args.if_arg.end(), args.if_arg.end(), // no multi pipes
-                        args.Files_arg, PRIME);
+    stream_manager_type streams(args.Files_arg);
+    streams.paths(args.if_arg.begin(), args.if_arg.end());
+    mer_counter counter(args.threads_arg, ary, streams, PRIME);
     counter.exec_join(args.threads_arg);
     do_op = UPDATE;
   }
@@ -269,6 +298,13 @@ int count_main(int argc, char *argv[])
   auto pipes_begin = generator_manager.get() ? generator_manager->pipes().begin() : args.file_arg.end();
   auto pipes_end = (bool)generator_manager ? generator_manager->pipes().end() : args.file_arg.end();
 
+  stream_manager_type streams(args.Files_arg);
+  streams.paths(args.file_arg.begin(), args.file_arg.end());
+  streams.pipes(pipes_begin, pipes_end);
+#ifdef HAVE_HTSLIB
+  streams.sams(args.sam_arg.begin(), args.sam_arg.end());
+#endif
+
   // Bloom counter read from file to filter out low frequency
   // k-mers. Two pass algorithm.
   std::unique_ptr<filter> mer_filter(new filter);
@@ -286,18 +322,12 @@ int count_main(int argc, char *argv[])
     mer_filter.reset(new filter_bf(*bf));
   }
 
-  if(args.min_qual_char_given) {
-    mer_qual_counter counter(args.threads_arg, ary,
-                             args.file_arg.begin(), args.file_arg.end(),
-                             pipes_begin, pipes_end,
-                             args.Files_arg,
+  if(args.min_qual_char_given || args.min_quality_given) {
+    mer_qual_counter counter(args.threads_arg, ary, streams,
                              do_op, mer_filter.get());
     counter.exec_join(args.threads_arg);
   } else {
-    mer_counter counter(args.threads_arg, ary,
-                        args.file_arg.begin(), args.file_arg.end(),
-                        pipes_begin, pipes_end,
-                        args.Files_arg,
+    mer_counter counter(args.threads_arg, ary, streams,
                         do_op, mer_filter.get());
     counter.exec_join(args.threads_arg);
   }
diff --git a/sub_commands/count_main_cmdline.yaggo b/sub_commands/count_main_cmdline.yaggo
index 5803194..7e9b2ac 100644
--- a/sub_commands/count_main_cmdline.yaggo
+++ b/sub_commands/count_main_cmdline.yaggo
@@ -10,6 +10,10 @@ option("size", "s") {
 option("threads", "t") {
   description "Number of threads"
   uint32; default "1" }
+option("sam") {
+  description "SAM/BAM/CRAM formatted input file"
+  multiple;
+  c_string; typestr "PATH" }
 option("F", "Files") {
   description "Number files open simultaneously"
   uint32; default "1" }
@@ -49,6 +53,12 @@ option("if") {
 option("Q", "min-qual-char") {
   description "Any base with quality below this character is changed to N"
   string }
+option("quality-start") {
+  description "ASCII for quality values"
+  int32; default 64; conflict "min-qual-char" }
+option("min-quality") {
+  description "Minimum quality. A base with lesser quality becomes an N"
+  int32; conflict "min-qual-char" }
 option("reprobes", "p") {
   description "Maximum number of reprobes"
   uint32; default "126" }
diff --git a/swig/mer_file.i b/swig/mer_file.i
index 130695a..ed1d514 100644
--- a/swig/mer_file.i
+++ b/swig/mer_file.i
@@ -8,7 +8,7 @@
     std::unique_ptr<binary_query>                    jf;
 
   public:
-    QueryMerFile(const char* path) throw(std::runtime_error) {
+    QueryMerFile(const char* path) {
       std::ifstream in(path);
       if(!in.good())
         throw std::runtime_error(std::string("Can't open file '") + path + "'");
@@ -110,7 +110,7 @@ class QueryMerFile {
     }
 
   public:
-    ReadMerFile(const char* path) throw(std::runtime_error) :
+    ReadMerFile(const char* path) :
       in(path)
     {
       if(!in.good())
diff --git a/tests/bloom_filter.sh b/tests/bloom_filter.sh
index 035a6a7..d2a944b 100644
--- a/tests/bloom_filter.sh
+++ b/tests/bloom_filter.sh
@@ -8,16 +8,25 @@ cd tests
 
 $JF count --bf-size 10M --bf-fp 0.001 -t $nCPUs -o ${pref}_10m.jf -s 1M -m 40 seq10m.fa
 $JF histo ${pref}_10m.jf > ${pref}_10m.histo
-COLLISIONS=$(cut -d\  -f2 ${pref}_10m.histo | paste -sd+ - | bc)
-[ $((COLLISIONS > 10000)) = 0 ] || {
-    echo >& "Too many collisions"
-    false
-}
+if bc < /dev/null 2> /dev/null; then
+    COLLISIONS=$(cut -d\  -f2 ${pref}_10m.histo | paste -sd+ - | bc)
+    [ $((COLLISIONS > 10000)) = 0 ] || {
+        echo >&2 "Too many collisions"
+        false
+    }
+else
+    echo > "bc missing: Skip collisions test"
+fi
 
 $JF count --bf-size 3M --bf-fp 0.001 -t $nCPUs -o ${pref}_3m.jf -s 1M -m 40 seq1m_0.fa seq1m_1.fa seq1m_0.fa seq1m_2.fa
 $JF histo ${pref}_3m.jf > ${pref}_3m.histo
-COLLISIONS=$(cut -d\  -f2 ${pref}_3m.histo | paste -sd+ - | bc)
-[ $((COLLISIONS - 1000000 > 20000)) = 0 ] || {
-    echo >& "Too many collisions"
-    false
-}
+
+if bc < /dev/null 2> /dev/null; then
+    COLLISIONS=$(cut -d\  -f2 ${pref}_3m.histo | paste -sd+ - | bc)
+    [ $((COLLISIONS - 1000000 > 20000)) = 0 ] || {
+        echo >&2 "Too many collisions"
+        false
+    }
+else
+    echo "bc missing: Skip collisions test"
+fi
diff --git a/tests/compat.sh.in b/tests/compat.sh.in
index 37bca5a..3fe1432 100644
--- a/tests/compat.sh.in
+++ b/tests/compat.sh.in
@@ -18,6 +18,7 @@ ENABLE_PYTHON_BINDING="@PYTHON@"
 PYTHON="@PYTHON@"
 ENABLE_PERL_BINDING="@PERL_EXT_LIB@"
 PERL="@PERL@"
+SAMTOOLS="@SAMTOOLS@"
 
 if [ -n "$DEBUG" ]; then
     set -x;
diff --git a/tests/generate_sequence.sh b/tests/generate_sequence.sh
index 0ff6785..96049a6 100755
--- a/tests/generate_sequence.sh
+++ b/tests/generate_sequence.sh
@@ -11,3 +11,9 @@ for i in 0 1 2 3 4; do
 done
 
 ${DIR}/generate_sequence -v -q -o seq10m -s 1473540700 10000000
+ln -sf seq10m.fq seq10m.fastq
+${DIR}/fastq2sam seq10m.fastq
+if [ -n "$SAMTOOLS" ]; then
+    $SAMTOOLS view -b -o seq10m.bam seq10m.sam
+    $SAMTOOLS view -C -o seq10m.cram seq10m.sam
+fi
diff --git a/tests/large_key.sh b/tests/large_key.sh
index c00f1d6..d1fe9cf 100644
--- a/tests/large_key.sh
+++ b/tests/large_key.sh
@@ -9,10 +9,10 @@ ded3925fe6bbaca10accc10d1bde11b5 ${pref}_m100_2k_ordered
 ded3925fe6bbaca10accc10d1bde11b5 ${pref}_m100_2k_disk_ordered
 EOF
 
-head -n 10001 seq1m_0.fa | time $JF count -t $nCPUs -o ${pref}_m100_2M.jf -s 2M -m 100 /dev/fd/0
+head -n 10001 seq1m_0.fa | $JF count -t $nCPUs -o ${pref}_m100_2M.jf -s 2M -m 100 /dev/fd/0
 $JF dump -c ${pref}_m100_2M.jf | cut -d\  -f 1 | sort > ${pref}_m100_2M_ordered
-head -n 10001 seq1m_0.fa | time $JF count -t $nCPUs -o ${pref}_m100_2k.jf -s 2k -m 100 /dev/fd/0
+head -n 10001 seq1m_0.fa | $JF count -t $nCPUs -o ${pref}_m100_2k.jf -s 2k -m 100 /dev/fd/0
 $JF dump -c ${pref}_m100_2k.jf | cut -d\  -f 1 | sort > ${pref}_m100_2k_ordered
-head -n 10001 seq1m_0.fa | time $JF count -t $nCPUs -o ${pref}_m100_2k_disk.jf -s 2k --disk -m 100 /dev/fd/0
+head -n 10001 seq1m_0.fa | $JF count -t $nCPUs -o ${pref}_m100_2k_disk.jf -s 2k --disk -m 100 /dev/fd/0
 $JF dump -c ${pref}_m100_2k_disk.jf | cut -d\  -f 1 | sort > ${pref}_m100_2k_disk_ordered
 check ${pref}.md5sum
diff --git a/tests/quality_filter.sh b/tests/quality_filter.sh
index 3144eb3..658d7c8 100644
--- a/tests/quality_filter.sh
+++ b/tests/quality_filter.sh
@@ -6,8 +6,11 @@ cd tests
 sort -k2,2 > ${pref}.md5sum <<EOF
 46c5d23c3560cf908fea997fff8abc1c ${pref}.histo
 46c5d23c3560cf908fea997fff8abc1c ${pref}_q at .histo
+46c5d23c3560cf908fea997fff8abc1c ${pref}_q33.histo
 d41d8cd98f00b204e9800998ecf8427e ${pref}_qi.histo
+d41d8cd98f00b204e9800998ecf8427e ${pref}_q105.histo
 4faa0517f41d7dc808ec0b930fe0d88e ${pref}_qC.histo
+4faa0517f41d7dc808ec0b930fe0d88e ${pref}_q67.histo
 EOF
 
 count_histo () {
@@ -21,5 +24,8 @@ count_histo ${pref}
 count_histo ${pref}_q@ -Q '@'
 count_histo ${pref}_qi -Q 'i'
 count_histo ${pref}_qC -Q 'C'
+count_histo ${pref}_q33 --min-quality 0
+count_histo ${pref}_q105 --min-quality 41
+count_histo ${pref}_q67 --min-quality 34 --quality-start 33
 
 check ${pref}.md5sum
diff --git a/tests/sam.sh b/tests/sam.sh
new file mode 100644
index 0000000..395de56
--- /dev/null
+++ b/tests/sam.sh
@@ -0,0 +1,45 @@
+#! /bin/sh
+
+cd tests
+. ./compat.sh
+
+if ! grep -q '#define HAVE_HTSLIB' ../config.h; then
+    echo "Skip SAM/BAM/CRAM file format test"
+    exit 77
+fi
+
+sort -k2,2 > ${pref}.md5sum <<EOF
+8f8a71e04c27cd88918f11d44d9b3852 ${pref}_sam.histo
+8f8a71e04c27cd88918f11d44d9b3852 ${pref}_bam.histo
+8f8a71e04c27cd88918f11d44d9b3852 ${pref}_cram.histo
+f0faf797cc55add8b6e88ab67bbcf19b ${pref}_sam_qual.histo
+f0faf797cc55add8b6e88ab67bbcf19b ${pref}_bam_qual.histo
+f0faf797cc55add8b6e88ab67bbcf19b ${pref}_cram_qual.histo
+EOF
+
+comp_histo() {
+    set -x
+    suffix=$1
+    $JF count -t $nCPUs -m 20 -s 10M -o ${pref}_${suffix}.jf -C --sam seq10m.${suffix}
+    $JF histo ${pref}_${suffix}.jf > ${pref}_${suffix}.histo
+    $JF count -t $nCPUs -m 20 -s 10M -o ${pref}_${suffix}_qual.jf -C -Q D --sam seq10m.${suffix}
+    $JF histo ${pref}_${suffix}_qual.jf > ${pref}_${suffix}_qual.histo
+}
+
+comp_histo sam
+
+if [ -f seq10m.bam ]; then
+    comp_histo bam
+else
+    cp ${pref}_sam.histo ${pref}_bam.histo
+    cp ${pref}_sam_qual.histo ${pref}_bam_qual.histo
+fi
+
+if [ -f seq10m.cram ]; then
+    comp_histo cram
+else
+    cp ${pref}_sam.histo ${pref}_cram.histo
+    cp ${pref}_sam_qual.histo ${pref}_cram_qual.histo
+fi
+
+check ${pref}.md5sum
diff --git a/unit_tests/test_mer_dna.cc b/unit_tests/test_mer_dna.cc
index 9415219..2bb591a 100644
--- a/unit_tests/test_mer_dna.cc
+++ b/unit_tests/test_mer_dna.cc
@@ -343,7 +343,6 @@ typedef ::testing::Types<VTC<mer_dna_ns::mer_base_dynamic<uint64_t>, 0>,
                          VTC<mer_dna_ns::mer_base_dynamic<unsigned __int128>, 2>,
                          VTC<mer_dna_ns::mer_base_dynamic<unsigned __int128>, 3>,
 #endif
-                         VTC<mer_dna_ns::mer_base_static<uint32_t>, 3>,
                          VTC<mer_dna_ns::mer_base_static<uint64_t>, 0>,
                          VTC<mer_dna_ns::mer_base_static<uint64_t>, 1>,
                          VTC<mer_dna_ns::mer_base_static<uint64_t>, 2>,
diff --git a/unit_tests/test_mer_dna_bloom_counter.cc b/unit_tests/test_mer_dna_bloom_counter.cc
index 7e4ba18..3e02b98 100644
--- a/unit_tests/test_mer_dna_bloom_counter.cc
+++ b/unit_tests/test_mer_dna_bloom_counter.cc
@@ -67,7 +67,10 @@ TYPED_TEST(MerDnaBloomTest, FalsePositive) {
       mer_set.insert(m);
       nb_collisions += bc.insert(m) > 0;
     }
-    EXPECT_GT(error_rate * nb_inserts, nb_collisions);
+    //    EXPECT_GT(error_rate * nb_inserts, nb_collisions);
+    if(error_rate * nb_inserts < nb_collisions)
+      std::cerr << "First insertion high error rate: " << nb_collisions << " > "
+                << (error_rate * nb_inserts) << '\n';
   }
 
   // Second insertion
@@ -80,8 +83,11 @@ TYPED_TEST(MerDnaBloomTest, FalsePositive) {
       nb_collisions += oc > 1;
       nb_errors += oc < 1;
     }
-    EXPECT_GT(2 * error_rate * nb_inserts, nb_collisions);
     EXPECT_EQ((size_t)0, nb_errors);
+    //    EXPECT_GT(2 * error_rate * nb_inserts, nb_collisions);
+    if(2 * error_rate * nb_inserts < nb_collisions)
+      std::cerr << "Second insertion high error rate: " << nb_collisions << " > "
+                << (2 * error_rate * nb_inserts) << '\n';
   }
 
   // Write to file and reload two different ways
@@ -119,7 +125,10 @@ TYPED_TEST(MerDnaBloomTest, FalsePositive) {
       }
     }
     EXPECT_EQ((size_t)0, nb_errors);
-    EXPECT_GT(2 * error_rate * nb_inserts, nb_collisions);
+    //    EXPECT_GT(2 * error_rate * nb_inserts, nb_collisions);
+    if(2 * error_rate * nb_inserts < nb_collisions)
+      std::cerr << "Check known mers high error rate: " << nb_collisions << " > "
+                << (2 * error_rate * nb_inserts) << '\n';
   }
 
   // Check unknown mers
@@ -133,7 +142,10 @@ TYPED_TEST(MerDnaBloomTest, FalsePositive) {
       EXPECT_EQ(check, bc_map.check(m));
       nb_collisions += check > 0;
     }
-    EXPECT_GT(2 * error_rate * nb_inserts, nb_collisions);
+    //    EXPECT_GT(2 * error_rate * nb_inserts, nb_collisions);
+    if(2 * error_rate * nb_inserts < nb_collisions)
+      std::cerr << "Check unknown mers high error rate: " << nb_collisions << " > "
+                << (2 * error_rate * nb_inserts) << '\n';
   }
 }
 
diff --git a/unit_tests/test_mer_iterator.cc b/unit_tests/test_mer_iterator.cc
index 1e1cc52..6e204b8 100644
--- a/unit_tests/test_mer_iterator.cc
+++ b/unit_tests/test_mer_iterator.cc
@@ -17,10 +17,10 @@ struct opened_streams {
   Iterator begin_, end_;
 
   opened_streams(Iterator begin, Iterator end) : begin_(begin), end_(end) { }
-  std::unique_ptr<std::istream> next() {
-    std::unique_ptr<std::istream> res;
+  jellyfish::stream_type next() {
+    jellyfish::stream_type res;
     if(begin_ != end_) {
-      res.reset(*begin_);
+      res.standard.reset(*begin_);
       ++begin_;
     }
     return res;
diff --git a/unit_tests/test_mer_overlap_sequence_parser.cc b/unit_tests/test_mer_overlap_sequence_parser.cc
index c104ac4..58ab9d7 100644
--- a/unit_tests/test_mer_overlap_sequence_parser.cc
+++ b/unit_tests/test_mer_overlap_sequence_parser.cc
@@ -3,6 +3,7 @@
 
 #include <gtest/gtest.h>
 #include <unit_tests/test_main.hpp>
+#include <jellyfish/parser_common.hpp>
 #include <jellyfish/mer_overlap_sequence_parser.hpp>
 
 namespace {
@@ -13,10 +14,10 @@ struct opened_streams {
   Iterator begin_, end_;
 
   opened_streams(Iterator begin, Iterator end) : begin_(begin), end_(end) { }
-  std::unique_ptr<std::istream> next() {
-    std::unique_ptr<std::istream> res;
+  jellyfish::stream_type next() {
+    jellyfish::stream_type res;
     if(begin_ != end_) {
-      res.reset(*begin_);
+      res.standard.reset(*begin_);
       ++begin_;
     }
     return res;
diff --git a/unit_tests/test_whole_sequence_parser.cc b/unit_tests/test_whole_sequence_parser.cc
index ddd2fc3..652d5c1 100644
--- a/unit_tests/test_whole_sequence_parser.cc
+++ b/unit_tests/test_whole_sequence_parser.cc
@@ -12,10 +12,10 @@ struct opened_streams {
   Iterator begin_, end_;
 
   opened_streams(Iterator begin, Iterator end) : begin_(begin), end_(end) { }
-  std::unique_ptr<std::istream> next() {
-    std::unique_ptr<std::istream> res;
+  jellyfish::stream_type next() {
+    jellyfish::stream_type res;
     if(begin_ != end_) {
-      res.reset(*begin_);
+      res.standard.reset(*begin_);
       ++begin_;
     }
     return res;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/jellyfish.git



More information about the debian-med-commit mailing list