[med-svn] [salmon] 02/09: Imported Upstream version 0.7.1+ds1

Tue Aug 30 10:12:29 UTC 2016

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository salmon.

commit 8a4ec888fa8da998925aea000202dc3735d719c0
Author: Andreas Tille <tille at debian.org>
Date:   Tue Aug 30 11:04:22 2016 +0200

    Imported Upstream version 0.7.1+ds1
---
 .clang-format                          |   94 +
 CMakeLists.txt                         |   55 +-
 README.md                              |   10 +-
 cmake/Modules/FindJellyfish.cmake      |   26 +
 cmake/PostInstall.cmake                |   12 +-
 doc/source/ReadLibraryIllustration.png |  Bin 43418 -> 169638 bytes
 doc/source/conf.py                     |   17 +-
 doc/source/file_formats.rst            |  182 ++
 doc/source/index.rst                   |    1 +
 doc/source/library_type.rst            |    2 +-
 doc/source/sailfish.rst                |   89 -
 doc/source/salmon.rst                  |  373 ++-
 docker/Dockerfile                      |   28 +
 docker/build_test.sh                   |    2 +
 include/AlignmentLibrary.hpp           |  121 +-
 include/BAMQueue.tpp                   |   10 +-
 include/BiasParams.hpp                 |   43 +
 include/DistributionUtils.hpp          |   66 +
 include/FASTAParser.hpp                |    4 +-
 include/FastxParser.hpp                |  150 ++
 include/GCFragModel.hpp                |  181 ++
 include/GZipWriter.hpp                 |    5 +
 include/KmerContext.hpp                |   39 +
 include/LibraryFormat.hpp              |   70 +
 include/LibraryTypeDetector.hpp        |  153 ++
 include/LightweightAlignmentDefs.hpp   |   43 +-
 include/PairSequenceParser.hpp         |    2 +-
 include/ReadExperiment.hpp             |  276 ++-
 include/ReadKmerDist.hpp               |   89 +-
 include/ReadLibrary.hpp                |   62 +-
 include/ReadPair.hpp                   |   23 +-
 include/SBModel.hpp                    |   90 +
 include/SGSmooth.hpp                   |   13 +
 include/SalmonConfig.hpp               |    6 +-
 include/SalmonIndex.hpp                |  105 +-
 include/SalmonIndexVersionInfo.hpp     |    2 +-
 include/SalmonMath.hpp                 |    3 +
 include/SalmonOpts.hpp                 |   40 +
 include/SalmonUtils.hpp                |   82 +-
 include/Sampler.hpp                    |    8 +-
 include/SimplePosBias.hpp              |   43 +
 include/Transcript.hpp                 |  352 ++-
 include/TranscriptCluster.hpp          |    3 -
 include/TryableSpinLock.hpp            |   25 +
 include/UnpairedRead.hpp               |    4 +
 include/UtilityFunctions.hpp           |   68 +-
 include/blockingconcurrentqueue.h      |  639 +++--
 include/concurrentqueue.h              |  131 +-
 include/cuckoohash_config.hh           |    6 +
 include/cuckoohash_map.hh              | 1056 +++++----
 include/cuckoohash_util.hh             |  103 +-
 include/lazy_array.hh                  |  119 +
 include/make_unique.hpp                |   45 +
 include/spline.h                       |  404 ++++
 scripts/ConvertBootstrapsToTSV.py      |   95 +
 scripts/compile.sh                     |   36 +
 scripts/fetchRapMap.sh                 |    5 +-
 scripts/make-release.sh                |    2 +-
 scripts/runner.sh                      |   24 +
 src/BuildSalmonIndex.cpp               |   44 +-
 src/CMakeLists.txt                     |  126 +-
 src/CollapsedEMOptimizer.cpp           | 1697 +++++++-------
 src/CollapsedGibbsSampler.cpp          |    2 +-
 src/DistributionUtils.cpp              |   89 +
 src/FASTAParser.cpp                    |   16 +-
 src/FastxParser.cpp                    |  306 +++
 src/GZipWriter.cpp                     |  181 +-
 src/SBModel.cpp                        |  295 +++
 src/SGSmooth.cpp                       |  555 +++++
 src/Salmon.cpp                         |   30 +-
 src/SalmonQuantify.cpp                 | 3711 ++++++++++++++++-------------
 src/SalmonQuantifyAlignments.cpp       |  428 +++-
 src/SalmonUtils.cpp                    | 3969 +++++++++++++++++++++-----------
 src/SequenceBiasModel.cpp              |    3 +-
 src/SimplePosBias.cpp                  |   78 +
 src/merge_files.cc                     |  149 --
 tests/GCSampleTests.cpp                |   61 +
 tests/KmerHistTests.cpp                |   74 +-
 tests/UnitTests.cpp                    |    4 +-
 79 files changed, 12416 insertions(+), 5139 deletions(-)

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..df7597a
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,94 @@
+---
+Language:        Cpp
+# BasedOnStyle:  LLVM
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: false
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:   
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeCategories: 
+  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
+    Priority:        2
+  - Regex:           '^(<|"(gtest|isl|json)/)'
+    Priority:        3
+  - Regex:           '.*'
+    Priority:        1
+IncludeIsMainRegex: '$'
+IndentCaseLabels: false
+IndentWidth:     2
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Left 
+ReflowComments:  true
+SortIncludes:    true
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp11
+TabWidth:        4
+UseTab:          Never
+JavaScriptQuotes: Leave
+...
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 598602b..0a6ec55 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,10 +4,11 @@ enable_testing()
 
 project (Salmon)
 
-set(CPACK_PACKAGE_VERSION "0.6.0")
+set(CPACK_PACKAGE_VERSION "0.7.1")
 set(CPACK_PACKAGE_VERSION_MAJOR "0")
-set(CPACK_PACKAGE_VERSION_MINOR "6")
-set(CPACK_PACKAGE_VERSION_PATCH "0")
+set(CPACK_PACKAGE_VERSION_MINOR "7")
+set(CPACK_PACKAGE_VERSION_PATCH "1")
+set(PROJECT_VERSION ${CPACK_PACKAGE_VERSION})
 set(CPACK_GENERATOR "TGZ")
 set(CPACK_SOURCE_GENERATOR "TGZ")
 set(CPACK_PACKAGE_VENDOR "Stony Brook University")
@@ -26,7 +27,7 @@ SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
 
 ## Set the standard required compile flags
 # Nov 18th --- removed -DHAVE_CONFIG_H
-set (CMAKE_CXX_FLAGS "-pthread -funroll-loops -fPIC -fomit-frame-pointer -Ofast -DRAPMAP_SALMON_SUPPORT -DHAVE_ANSI_TERM -DHAVE_SSTREAM -Wall -Wno-reorder -Wno-unused-variable -std=c++11 -Wreturn-type -Werror=return-type")
+set (CMAKE_CXX_FLAGS "-pthread -ftree-vectorize -funroll-loops -fPIC -fomit-frame-pointer -Ofast -DRAPMAP_SALMON_SUPPORT -DHAVE_ANSI_TERM -DHAVE_SSTREAM -Wall -Wno-unknown-pragmas -Wno-reorder -Wno-unused-variable -std=c++11 -Wreturn-type -Werror=return-type")
 
 ##
 # OSX is strange (some might say, stupid in this regard).  Deal with it's quirkines here.
@@ -235,7 +236,7 @@ endif()
 ##
 # Set the latest version and look for what we need
 ##
-set(Boost_ADDITIONAL_VERSIONS "1.53" "1.53.0" "1.54" "1.55" "1.56" "1.57.0" "1.58")
+set(Boost_ADDITIONAL_VERSIONS "1.53" "1.53.0" "1.54" "1.55" "1.56" "1.57.0" "1.58" "1.59" "1.60" "1.61")
 find_package(Boost 1.53.0 COMPONENTS iostreams filesystem system thread timer chrono program_options serialization)
 message("BOOST_INCLUDEDIR = ${BOOST_INCLUDEDIR}")
 message("BOOST_LIBRARYDIR = ${BOOST_LIBRARYDIR}")
@@ -297,7 +298,7 @@ elseif(FETCH_BOOST)
         INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
         #PATCH_COMMAND patch -p2 < ${CMAKE_CURRENT_SOURCE_DIR}/external/boost156.patch
 	CONFIGURE_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_59_0/bootstrap.sh ${BOOST_CONFIGURE_TOOLSET} ${BOOST_BUILD_LIBS} --prefix=<INSTALL_DIR>
-	BUILD_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_59_0/b2 -d0 -j2 ${BOOST_LIB_SUBSET} toolset=${BOOST_TOOLSET} ${BOOST_EXTRA_FLAGS} cxxflags=${BOOST_CXX_FLAGS} install
+	BUILD_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_59_0/b2 -d0 -j2 ${BOOST_LIB_SUBSET} toolset=${BOOST_TOOLSET} ${BOOST_EXTRA_FLAGS} cxxflags=${BOOST_CXX_FLAGS} link=static install
         BUILD_IN_SOURCE 1
         INSTALL_COMMAND ""
     )
@@ -328,7 +329,7 @@ endif()
 message("BOOST INCLUDE DIR = ${Boost_INCLUDE_DIR}")
 message("BOOST INCLUDE DIRS = ${Boost_INCLUDE_DIRS}")
 message("BOOST LIB DIR = ${Boost_LIBRARY_DIRS}")
-message("BOOST LIBRAREIS = ${Boost_LIBRARIES}")
+message("BOOST LIBRARIES = ${Boost_LIBRARIES}")
 
 set(EXTERNAL_LIBRARY_PATH $CMAKE_CURRENT_SOURCE_DIR/lib)
 
@@ -378,22 +379,24 @@ ExternalProject_Add(libbwa
     BUILD_IN_SOURCE TRUE
 )
 
+find_package(Jellyfish 2.2.6)
+
+if (NOT JELLYFISH_FOUND)
 message("Build system will fetch and build Jellyfish")
 message("==================================================================")
 ExternalProject_Add(libjellyfish
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-    DOWNLOAD_COMMAND curl -k -L https://github.com/gmarcais/Jellyfish/releases/download/v2.2.3/jellyfish-2.2.3.tar.gz -o jellyfish-2.2.3.tgz &&
-        rm -fr jellyfish-2.2.3 &&
-        tar -xzvf jellyfish-2.2.3.tgz
-   SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/jellyfish-2.2.3
+    DOWNLOAD_COMMAND curl -k -L https://github.com/gmarcais/Jellyfish/releases/download/v2.2.6/jellyfish-2.2.6.tar.gz -o jellyfish-2.2.6.tgz &&
+    	rm -fr jellyfish-2.2.6 &&
+     	tar -xzvf jellyfish-2.2.6.tgz
+    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/jellyfish-2.2.6
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
-    CONFIGURE_COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/external/jellyfish-2.2.3/configure --prefix=<INSTALL_DIR> CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CXXFLAGS=${JELLYFISH_CXX_FLAGS}
+    CONFIGURE_COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/external/jellyfish-2.2.6/configure --prefix=<INSTALL_DIR> CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CXXFLAGS=${JELLYFISH_CXX_FLAGS}
     BUILD_COMMAND ${MAKE} CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CXXFLAGS=${JELLYFISH_CXX_FLAGS}
     BUILD_IN_SOURCE 1
-    INSTALL_COMMAND make install &&
-                    cp config.h <INSTALL_DIR>/include/jellyfish-2.2.3/jellyfish/ &&
-                    cp config.h <INSTALL_DIR>/include/
+    INSTALL_COMMAND make install
 )
+endif()
 
 find_package(TBB)
 
@@ -415,7 +418,7 @@ endif()
 message("Build system will fetch and build Intel Threading Building Blocks")
 message("==================================================================")
 # These are useful for the custom install step we'll do later
-set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/tbb43_20140724oss)
+set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/tbb44_20160526oss)
 set(TBB_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
 
 if ("${TBB_COMPILER}" STREQUAL "gcc")
@@ -426,10 +429,10 @@ endif()
 
 ExternalProject_Add(libtbb
 	DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-    URL http://www.threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb43_20140724oss_src.tgz
-    DOWNLOAD_COMMAND curl -k -L  http://www.threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb43_20140724oss_src.tgz -o tbb_20140724oss_src.tgz &&
-                     tar -xzvf tbb_20140724oss_src.tgz
-    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/tbb43_20140724oss
+    URL https://www.threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb44_20160526oss_src_0.tgz
+    DOWNLOAD_COMMAND curl -k -L https://www.threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb44_20160526oss_src_0.tgz -o tbb_20160526oss_src.tgz &&
+                    tar -xzvf tbb_20160526oss_src.tgz
+    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/tbb44_20160526oss
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
     PATCH_COMMAND "${TBB_PATCH_STEP}"
     CONFIGURE_COMMAND ""
@@ -521,10 +524,10 @@ message("Build system will fetch SPDLOG")
 message("==================================================================")
 ExternalProject_Add(libspdlog
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-    DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/spdlog/archive/v1.6.tar.gz -o spdlog-v1.6.tar.gz &&
-                     tar -xzf spdlog-v1.6.tar.gz &&
+    DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/spdlog/archive/v1.0.1.tar.gz -o spdlog-v1.0.1.tar.gz &&
+                     tar -xzf spdlog-v1.0.1.tar.gz &&
                      rm -fr spdlog &&
-                     mv -f  spdlog-1.6 spdlog
+                     mv -f  spdlog-1.0.1 spdlog
     SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/spdlog
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
     CONFIGURE_COMMAND ""
@@ -627,3 +630,9 @@ add_subdirectory ( src )
 
 # build a CPack driven installer package
 include (CPack)
+
+set(ARCHIVE_NAME ${CMAKE_PROJECT_NAME}-${PROJECT_VERSION})
+add_custom_target(dist
+    COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD
+        | gzip > ${CMAKE_BINARY_DIR}/${ARCHIVE_NAME}.tar.gz
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
diff --git a/README.md b/README.md
index 5dd8ce4..9cc8f04 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,23 @@
 [![Build Status](https://travis-ci.org/COMBINE-lab/salmon.svg?branch=master)](https://travis-ci.org/COMBINE-lab/salmon)
 [![Documentation Status](https://readthedocs.org/projects/salmon/badge/?version=latest)](http://salmon.readthedocs.org/en/latest)
+[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat-square)](http://bioconda.github.io/recipes/salmon/README.html)
 
 What is Salmon?
 ===============
 
 Salmon is a **wicked**-fast program to produce a highly-accurate, transcript-level quantification estimates from 
 RNA-seq data.  Salmon achieves is accuracy and speed via a number of different innovations, including the 
-use of *lightweight* alignments (accurate but fast-to-compute proxies for traditional read alignments) and 
+use of *quasi-mapping* (accurate but fast-to-compute proxies for traditional read alignments), and 
 massively-parallel stochastic collapsed variational inference.  The result is a versatile tool that fits nicely
-into many differnt pipelines.  For example, you can choose to make use of our *lightweight* alignments by providing Salmon with raw sequencing reads, or, if it is more convenient, you can provide Salmon with regular alignments (e.g. 
-computed with your favorite aligner), and it will use the same **wicked**-fast, state-of-the-art inference algorithm 
+into many differnt pipelines.  For example, you can choose to make use of our *quasi-mapping* algorithm by providing Salmon with raw sequencing reads, or, if it is more convenient, you can provide Salmon with regular alignments (e.g. an **unsorted** BAM file produced with your favorite aligner), and it will use the same **wicked**-fast, state-of-the-art inference algorithm 
 to estimate transcript-level abundances for your experiment.
 
 Give salmon a try!  You can find the latest binary releases [here](https://github.com/COMBINE-lab/salmon/releases).
 
+The current version number of the master branch of Salmon can be found [**here**](http://combine-lab.github.io/salmon/version_info/latest)
+
+**NOTE**: Salmon works by (quasi)-mapping sequencing reads directly to the *transcriptome*.  This means the Salmon index should be built on a set of target transcripts, **not** on the *genome* of the underlying organism.  If indexing appears to be taking a very long time, or using a tremendous amount of memory (which it should not), please ensure that you are not attempting to build an index on the genome of your organism!
+
 Documentation
 ==============
 
diff --git a/cmake/Modules/FindJellyfish.cmake b/cmake/Modules/FindJellyfish.cmake
new file mode 100644
index 0000000..e12b30a
--- /dev/null
+++ b/cmake/Modules/FindJellyfish.cmake
@@ -0,0 +1,26 @@
+###############################################################################
+# Find Jellyfish 
+#
+# This sets the following variables:
+# JELLYFISH_FOUND - True if Jellyfish was found.
+# JELLYFISH_INCLUDE_DIRS - Directories containing the Jellyfish include files.
+# JELLYFISH_DEFINITIONS - Compiler flags for Jellyfish.
+
+find_path(JELLYFISH_INCLUDE_DIR jellyfish
+	HINTS "${JELLYFISH_ROOT}/include" "$ENV{JELLYFISH_ROOT}/include" "/usr/include" "$ENV{PROGRAMFILES}/jellyfish/include")
+
+set(JELLYFISH_INCLUDE_DIRS ${JELLYFISH_INCLUDE_DIR})
+
+include(FindPackageHandleStandardArgs)
+#message("Required Jellyfish version ${Jellyfish_FIND_VERSION}")
+find_package_handle_standard_args(Jellyfish 
+                                 DEFAULT_MSG 
+                                 FOUND_VAR JELLYFISH_FOUND 
+                                  REQUIRED_VARS JELLYFISH_INCLUDE_DIR 
+                                  VERSION_VAR Jellyfish_FOUND_VERSION)
+
+mark_as_advanced(JELLYFISH_INCLUDE_DIR)
+
+if(JELLYFISH_FOUND)
+    message(STATUS "Jellyfish found (include: ${JELLYFISH_INCLUDE_DIRS})")
+endif(JELLYFISH_FOUND)
diff --git a/cmake/PostInstall.cmake b/cmake/PostInstall.cmake
index 62a572d..9a44768 100644
--- a/cmake/PostInstall.cmake
+++ b/cmake/PostInstall.cmake
@@ -4,12 +4,12 @@
 message("\n\n")
 message("Installation complete. Please ensure the following paths are set properly.")	
 message("==========================================================================")
-if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-	message("Fixing library names with install_name_tool")
-	execute_process(COMMAND install_name_tool -add_rpath ${CMAKE_INSTALL_PREFIX}/bin ${CMAKE_INSTALL_PREFIX}/bin/salmon)
-	execute_process(COMMAND install_name_tool -add_rpath ${CMAKE_INSTALL_PREFIX}/lib ${CMAKE_INSTALL_PREFIX}/bin/salmon)
-	execute_process(COMMAND install_name_tool -add_rpath @executable_path ${CMAKE_INSTALL_PREFIX}/bin/salmon) 
-endif()
+#if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+#	message("Fixing library names with install_name_tool")
+#	execute_process(COMMAND install_name_tool -add_rpath ${CMAKE_INSTALL_PREFIX}/bin ${CMAKE_INSTALL_PREFIX}/bin/salmon)
+#	execute_process(COMMAND install_name_tool -add_rpath ${CMAKE_INSTALL_PREFIX}/lib ${CMAKE_INSTALL_PREFIX}/bin/salmon)
+#	execute_process(COMMAND install_name_tool -add_rpath @executable_path ${CMAKE_INSTALL_PREFIX}/bin/salmon) 
+#endif()
 message("Please add ${CMAKE_INSTALL_PREFIX}/bin to your PATH")
 if ("${CMAKE_SYSTEM_NAME}" MATCHES "Darwin")
 	message("Please add ${CMAKE_INSTALL_PREFIX}/lib to your DYLD_FALLBACK_LIBRARY_PATH")
diff --git a/doc/source/ReadLibraryIllustration.png b/doc/source/ReadLibraryIllustration.png
index e4b1c02..3aef58b 100644
Binary files a/doc/source/ReadLibraryIllustration.png and b/doc/source/ReadLibraryIllustration.png differ
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 8909cba..5bbc8c1 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -48,16 +48,16 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'Salmon'
-copyright = u'2015, Rob Patro, Carl Kingsford and Steve Mount'
+copyright = u'2016, Rob Patro, Geet Duggal, Mike Love, Rafael Irizarry and Carl Kingsford'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = '0.5.0'
+version = '0.7.0'
 # The full version, including alpha/beta/rc tags.
-release = '0.5.0'
+release = '0.7.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -102,7 +102,10 @@ pygments_style = 'sphinx'
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'default'
+import sphinx_rtd_theme
+html_theme = "sphinx_rtd_theme"
+html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+#html_theme = 'classic'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -231,7 +234,8 @@ latex_documents = [
 # (source start file, name, description, authors, manual section).
 man_pages = [
     ('index', 'salmon', u'Salmon Documentation',
-     [u'Rob Patro, Carl Kingsford and Steve Mount'], 1)
+     [u'Rob Patro, Geet Duggal, Mike Love, Rafael Irizarry and Carl Kingsford'], 1)
+
 ]
 
 # If true, show URL addresses after external links.
@@ -245,7 +249,8 @@ man_pages = [
 #  dir menu entry, description, category)
 texinfo_documents = [
   ('index', 'Salmon', u'Salmon Documentation',
-   u'Rob Patro, Carl Kingsford and Steve Mount', 'Salmon', 'One line description of project.',
+   u'Rob Patro, Geet Duggal, Mike Love, Rafael Irizarry and Carl Kingsford',
+   'Salmon', 'Quantify expression quickly and accurately.',
    'Miscellaneous'),
 ]
 
diff --git a/doc/source/file_formats.rst b/doc/source/file_formats.rst
new file mode 100644
index 0000000..831b107
--- /dev/null
+++ b/doc/source/file_formats.rst
@@ -0,0 +1,182 @@
+.. _FileFormats:
+
+Salmon Output File Formats
+==========================
+
+Quantification File
+-------------------
+
+Salmon's main output is its quantification file.  This file is a plain-text, tab-separated file
+with a single header line (which names all of the columns).  This file is named ``quant.sf`` and
+appears at the top-level of Salmon's output directory. The columns appear in the following order:
+
++------+--------+-----------------+----+----------+
+| Name | Length | EffectiveLength |TPM | NumReads |
++------+--------+-----------------+----+----------+
+
+Each subsequent row describes a single quantification record.  The columns have
+the following interpretation.
+
+* **Name** --- 
+  This is the name of the target transcript provided in the input transcript database (FASTA file). 
+
+* **Length** ---
+  This is the length of the target transcript in nucleotides.
+
+* **EffectiveLength** ---
+  This is the computed *effective* length of the target transcript.  It takes into account 
+  all factors being modeled that will effect the probability of sampling fragments from
+  this transcript, including the fragment length distribution and sequence-specific and 
+  gc-fragment bias (if they are being modeled).
+
+* **TPM** ---
+  This is salmon's estimate of the relative abundance of this transcript in units of Transcripts Per Million (TPM).
+  TPM is the recommended relative abundance measure to use for downstream analysis. 
+
+* **NumReads** --- 
+  This is salmon's estimate of the number of reads mapping to each transcript that was quantified.  It is an "estimate" 
+  insofar as it is the expected number of reads that have originated from each transcript given the structure of the uniquely 
+  mapping and multi-mapping reads and the relative abundance estimates for each transcript.
+
+
+Command Information File
+------------------------
+
+In the top-level quantification directory, there will be a file called ``cmd_info.json``.  This is a
+JSON format file that records the main command line parameters with which Salmon was invoked for the 
+run that produced the output in this directory.
+
+
+Auxiliary File
+--------------
+
+The top-level quantification directory will contain an auxiliary directory called ``aux_info`` (unless 
+the auxiliary directory name was overridden via the command line).  This directory will have a number
+of files (and subfolders) depending on how salmon was invoked.
+
+""""""""""""""""
+Meta information
+""""""""""""""""
+
+The auxiliary directory will contain a JSON format file called
+``meta_info.json`` which contains meta information about the run,
+including stats such as the number of observed and mapped fragments,
+details of the bias modeling etc.  If Salmon was run with automatic
+inference of the library type (i.e. ``--libType A``), then one
+particularly important piece of information contained in this file is
+the inferred library type.  Most of the information recorded in this
+file should be self-descriptive.
+
+
+""""""""""""""""""""""""""""
+Fragment length distribution
+""""""""""""""""""""""""""""
+
+The auxiliary directory will contain a file called ``fld.gz``.  This
+file contains an approximation of the observed fragment length
+distribution.  It is a gzipped, binary file containing integer counts.
+The number of (signed, 32-bit) integers (with machine-native
+endianness) is equal to the number of bins in the fragment length
+distribution (1,001 by default --- for fragments ranging in length
+from 0 to 1,000 nucleotides).
+
+""""""""""""""""""""""""""""
+Sequence-specific bias files
+""""""""""""""""""""""""""""
+
+If sequence-specific bias modeling was enabled, there will be 4 files
+in the auxiliary directory named ``obs5_seq.gz``, ``obs3_seq.gz``,
+``exp5_seq.gz``, ``exp5_seq.gz``.  These encode the parameters of the
+VLMM that were learned for the 5' and 3' fragment ends.  Each file
+is a gzipped, binary file with the same format.
+
+It begins with 3 32-bit signed integers which record the length of the
+context (window around the read start / end) that is modeled, follwed
+by the length of the context that is to the left of the read and the
+length of the context that is to the right of the read.
+
+Next, the file contains 3 arrays of 32-bit signed integers (each of which
+have a length of equal to the context length recorded above).  The first
+records the order of the VLMM used at each position, the second records
+the *shifts* and the *widths* required to extract each sub-context --- these
+are implementation details.
+
+Next, the file contains a matrix that encodes all VLMM probabilities.
+This starts with two signed integers of type ``std::ptrdiff_t``.  This
+is a platform-specific type, but on most 64-bit systems should
+correspond to a 64-bit signed integer.  These numbers denote the number of
+rows (*nrow*) and columns (*ncol*) in the array to follow.
+
+Next, the file contains an array of (*nrow* * *ncol*) doubles which
+represent a dense matrix encoding the probabilities of the VLMM.  Each
+row corresponds to a possible preceeding sub-context, and each column
+corresponds to a position in the sequence context.  Unused values
+(values where the length of the sub-context exceed the order of the
+model at that position) contain a 0.  This array can be re-shaped
+into a matrix of the appropriate size.
+
+Finally, the file contains the marginalized 0:sup:`th`-order
+probabilities (i.e. the probability of each nucleotide at each
+position in the context).  This is stored as a 4-by-context length
+matrix.  As before, this entry begins with two signed integers that
+give the number of rows and columns, followed by an array of doubles
+giving the marginal probabilities.  The rows are in lexicographic
+order.
+
+""""""""""""""""""""""
+Fragment-GC bias files
+""""""""""""""""""""""
+
+If Salmon was run with fragment-GC bias correction enabled, the
+auxiliary directory will contain two files named ``expected_gc.gz``
+and ``observed_gc.gz``.  These are gzipped binary files containing,
+respectively, the expected and observed fragment-GC content curves.
+These files both have the same form.  They consist of a 32-bit signed
+int, *dtype* which specifies if the values to follow are in
+logarithmic space or not.  Then, the file contains two signed integers
+of type ``std::ptrdiff`` which give the number of rows and columns of
+the matrix to follow.  Finally, there is an array of *nrow* by *ncol*
+doubles.  Each row corresponds to a conditional fragment GC
+distribution, and the number of columns is the number of bins in the
+learned (or expected) fragment-GC distribution.
+
+
+.. _eq-class-file:
+
+""""""""""""""""""""""
+Equivalence class file
+""""""""""""""""""""""
+
+If Salmon was run with the ``--dumpEq`` option, then a file called ``eq_classes.txt``
+will exist in the auxiliary directory.  The format of that file is as follows:
+
+
+::
+   
+   N (num transcripts)
+   M (num equiv classes)
+   tn_1
+   tn_2
+   ...
+   tn_N
+   eq_1_size t_11 t_12 ... count
+   eq_2_size t_21 t_22 ... count
+
+   
+That is, the file begins with a line that contains the number of
+transcripts (say N) then a line that contains the number of
+equivalence classes (say M). It is then followed by N lines that list
+the transcript names --- the order here is important, because the
+labels of the equivalence classes are given in terms of the ID's of
+the transcripts. The rank of a transcript in this list is the ID with
+which it will be labeled when it appears in the label of an
+equivalence class. Finally, the file contains M lines, each of which
+describes an equivalence class of fragments. The first entry in this
+line is the number of transcripts in the label of this equivalence
+class (the number of different transcripts to which fragments in this
+class map --- call this k). The line then contains the k transcript
+IDs. Finally, the line contains the count of fragments in this
+equivalence class (how many fragments mapped to these
+transcripts). The values in each such line are tab separated.
+
+
diff --git a/doc/source/index.rst b/doc/source/index.rst
index ec4abb5..462d033 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -13,6 +13,7 @@ Contents:
    
    building.rst
    salmon.rst
+   file_formats.rst
    library_type.rst
 
 Indices and tables
diff --git a/doc/source/library_type.rst b/doc/source/library_type.rst
index 6cfaefe..9f87076 100644
--- a/doc/source/library_type.rst
+++ b/doc/source/library_type.rst
@@ -7,7 +7,7 @@ There are numerous library preparation protocols for RNA-seq that result in
 sequencing reads with different characteristics.  For example, reads can be
 single end (only one side of a fragment is recorded as a read) or paired-end
 (reads are generated from both ends of a fragment).  Further, the sequencing
-reads themselves may be unstraned or strand-specific.  Finally, paired-end
+reads themselves may be unstranded or strand-specific.  Finally, paired-end
 protocols will have a specified relative orientation.  To characterize the
 various different typs of sequencing libraries, we've created a miniature
 "language" that allows for the succinct description of the many different types
diff --git a/doc/source/sailfish.rst b/doc/source/sailfish.rst
deleted file mode 100644
index d8cc967..0000000
--- a/doc/source/sailfish.rst
+++ /dev/null
@@ -1,89 +0,0 @@
-Sailfish
-================
-
-Sailfish is a tool for transcript quantification from RNA-seq data.  It
-requires a set of target transcripts (either from a reference or *de-novo*
-assembly) to quantify.  All you need to run sailfish is a fasta file containing
-your reference transcripts and a (set of) fasta/fastq file(s) containing your
-reads.  Sailfish runs in two phases; indexing and quantification.  The indexing
-step is independent of the reads, and only needs to be run once for a particular
-set of reference transcripts and choice of k (the k-mer size). The
-quantification step, obviously, is specific to the set of RNA-seq reads and is
-thus run more frequently. For a more complete description of all available
-options in sailfish, see the manual.
-
-
-Indexing
---------
-
-To generate the sailfish index for your reference set of transcripts, you
-should run the following command:
-
-::
-
-    > sailfish index -t <ref_transcripts> -o <out_dir> -k <kmer_len>
-
-
-This will build a sailfish index for k-mers of length ``<kmer_len>`` for the
-reference transcripts  provided in the file ``<ref_transcripts>`` and place the
-index under the directory ``<out_dir>``.  There  are additional options that can
-be passed to the sailfish indexer (e.g. the number of threads to use).  These
-can be seen by executing the command ``sailfish index -h``.
-
-Quantification
---------------
-
-Now that you have generated the sailfish index (say that it's the directory
-``<index_dir>`` --- this corresponds to the <out_dir> argument provided in the
-previous step), you can quantify the transcript expression for a given set of
-reads.  To perform the quantification, you run a command like the following:
-
-::
-
-    > sailfish quant -i <index_dir> -l "<libtype>" {-r <unmated> | -1 <mates1> -2 <mates2>} -o <quant_dir>
-
-Where ``<index_dir>`` is, as described above, the location of the sailfish
-index, ``<libtype>`` is a string describing the format of the fragment (read)
-library (see :ref:`FragLibType`), ``<unmated>`` is a list of files
-containing unmated reads, ``<mates{1,2}>`` are lists of files containg,
-respectively, the first and second mates of paired-end reads. Finally,
-``<quant_dir>`` is the directory where the output should be written. Just like the
-indexing step, additional options are available, and can be viewed by running
-``sailfish quant -h``.
-
-When the quantification step is finished, the directory ``<quant_dir>`` will
-contain a file named "quant.sf" (and, if bias correction is enabled, an
-additional file names "quant_bias_corrected.sf").  This file contains the
-result of the Sailfish quantification step.  This file contains a number of
-columns (which are listed in the last of the header lines beginning with '#').
-Specifically, the columns are (1) Transcript ID, (2) Transcript Length, (3)
-Transcripts per Million (TPM), (4) Reads Per Kilobase per Million mapped reads
-(RPKM), (5) K-mers Per Kilobase per Million mapped k-mers (KPKM), (6) Estimated
-number of k-mers (an estimate of the number of k-mers drawn from this
-transcript given the transcript's relative abundance and length) and (7)
-Estimated number of reads (an estimate of the number of reads drawn from this
-transcript given the transcript's relative abnundance and length).  The first
-two columns are self-explanatory, the next four are measures of transcript
-abundance and the final is a commonly used input for differential expression
-tools.  The Transcripts per Million quantification number is computed as
-described in [1]_, and is meant as an estimate of the number of transcripts, per
-million observed transcripts, originating from each isoform.  Its benefit over
-the K/RPKM measure is that it is independent of the mean expressed transcript
-length (i.e. if the mean expressed transcript length varies between samples,
-for example, this alone can affect differential analysis based on the K/RPKM.)
-The RPKM is a classic measure of relative transcript abundance, and is an
-estimate of the number of reads per kilobase of transcript (per million mapped
-reads) originating from each transcript. The KPKM should closely track the
-RPKM, but is defined for very short features which are larger than the chosen
-k-mer length but may be shorter than the read length. Typically, you should
-prefer the KPKM measure to the RPKM measure, since the k-mer is the most
-natural unit of coverage for Sailfish.
-
-References
-----------
-
-.. [1] Li, Bo, et al. "RNA-Seq gene expression estimation with read mapping uncertainty."
-    Bioinformatics 26.4 (2010): 493-500.
-
-.. _CMake : http://www.cmake.org
-.. _Boost: http://www.boost.org
diff --git a/doc/source/salmon.rst b/doc/source/salmon.rst
index 34dce1d..7138d34 100644
--- a/doc/source/salmon.rst
+++ b/doc/source/salmon.rst
@@ -9,7 +9,7 @@ containing your reads.  Optionally, Salmon can make use of pre-computed
 alignments (in the form of a SAM/BAM file) to the transcripts rather than the
 raw reads.
 
-The **lightweight-alignment**-based mode of Salmon runs in two phases; indexing and
+The **quasi-mapping**-based mode of Salmon runs in two phases; indexing and
 quantification. The indexing step is independent of the reads, and only need to
 be run one for a particular set of reference transcripts. The quantification
 step, obviously, is specific to the set of RNA-seq reads and is thus run more
@@ -31,7 +31,7 @@ set of alignments.
 
 .. note:: Read / alignment order
 
-    Salmon, like eXpress, uses a streaming inference method to perform 
+    Salmon, like eXpress [#express]_, uses a streaming inference method to perform 
     transcript-level quantification.  One of the fundamental assumptions 
     of such inference methods is that observations (i.e. reads or alignments)
     are made "at random".  This means, for example, that alignments should 
@@ -58,41 +58,69 @@ set of alignments.
     8 --- 12 threads results in the maximum speed, threads allocated above this
     limit will likely spend most of their time idle / sleeping.
 
-    For lightweight-alignment-based Salmon, the story is somewhat different.
+    For quasi-mapping-based Salmon, the story is somewhat different.
     Generally, performance continues to improve as more threads are made
     available.  This is because the determiniation of the potential mapping
     locations of each read is, generally, the slowest step in
-    lightweight-alignment-based quantification.  Since this process is
+    quasi-mapping-based quantification.  Since this process is
     trivially parallelizable (and well-parallelized within Salmon), more
     threads generally equates to faster quantification. However, there may
-    still be a limit to the return on invested threads. Specifically, writing
-    to the mapping cache (see `Misc`_ below) is done via a single thread.  With
-    a huge number of quantification threads or in environments with a very slow
-    disk, this may become the limiting step. If you're certain that you have
-    more than the required number of observations, or if you have reason to
-    suspect that your disk is particularly slow on writes, then you can disable
-    the mapping cache (``--disableMappingCache``), and potentially increase the
-    parallelizability of lightweight-alignment-based Salmon.
-
-Lightweight-alignment-based mode (including quasimapping)
----------------------------------------------------------
+    still be a limit to the return on invested threads, when Salmon can begin
+    to process fragments more quickly than they can be provided via the parser.
+ 
+    
+"""""""""""""""""""""""""""""""""""""""
+Providing multiple read files to Salmon
+"""""""""""""""""""""""""""""""""""""""
+
+Often, a single library may be split into multiple FASTA/Q files.  Also, sometimes one may wish
+to quantify multiple replicates or samples together, treating them as if they are one library.
+Salmon allows the user to provide a *space-separated* list of read files to all of it's options
+that expect input files (i.e. ``-r``, ``-1``, ``-2``).  When the input is paired-end reads, the
+order of the files in the left and right lists must be the same.  There are a number of ways to
+provide salmon with multiple read files, and treat these as a single library.  For the examples
+below, assume we have two replicates ``lib_1`` and ``lib_2``.  The left and right reads for
+``lib_1`` are ``lib_1_1.fq`` and ``lib_1_2.fq``, respectively.  The left and right reads for
+``lib_2`` are ``lib_2_1.fq`` and ``lib_2_2.fq``, respectively.  The following are both valid
+ways to input these reads to Salmon::
+
+  > salmon quant -i index -l IU -1 lib_1_1.fq lib_2_1.fq -2 lib_1_2.fq lib_2_2.fq -o out
+
+  > salmon quant -i index -l IU -1 <(cat lib_1_1.fq lib_2_1.fq) -2 <(cat lib_1_2.fq lib_2_2.fq) -o out
+
+Similarly, both of these approaches can be adopted if the files are gzipped as well::
+
+   > salmon quant -i index -l IU -1 lib_1_1.fq.gz lib_2_1.fq.gz -2 lib_1_2.fq.gz lib_2_2.fq.gz -o out
+
+   > salmon quant -i index -l IU -1 <(gunzip -c lib_1_1.fq.gz lib_2_1.fq.gz) -2 <(gunzip -c lib_1_2.fq.gz lib_2_2.fq.gz) -o out
+
+In each pair of commands, the first command lets Salmon natively parse the files, while the latter command
+creates, on-the-fly, an input stream that consists of the concatenation of both files.  Both methods work, and
+are acceptable ways to merge the files.  The latter method (i.e. process substitution) allows more complex
+processing to be done to the reads in the substituted process before they are passed to Salmon as input, and thus,
+in some situations, is more versatile.
+
+    
+Quasi-mapping-based mode (including lightweight alignment)
+----------------------------------------------------------
 
 One of the novel and innovative features of Salmon is its ability to accurately
-quantify transcripts using *lightweight* alignments.  Lightweight alignments
+quantify transcripts using *quasi-mappings*. Quasi-mappings 
 are mappings of reads to transcript positions that are computed without
-performing a base-to-base alignment of the read to the transcript.  Lightweight 
-alignments are typically much faster to compute than traditional (or full)
+performing a base-to-base alignment of the read to the transcript.  Quasi-mapping
+is typically **much** faster to compute than traditional (or full)
 alignments, and can sometimes provide superior accuracy by being more robust 
-to errors in the read or genomic variation from the reference sequence.
+to errors in the read or genomic variation from the reference sequence.  More details
+about quasi-mappings, and how they are computed, can be found `here <http://bioinformatics.oxfordjournals.org/content/32/12/i192.full>`_.
 
-Salmon currently supports two different methods for lightweight-alignment; 
-SMEM-based mapping and quasi-mapping.  SMEM-based mapping is the original 
+Salmon currently supports two different methods for mapping reads to transcriptomes;
+(SMEM-based) lightweight-alignment and quasi-mapping.  SMEM-based mapping is the original 
 lightweight-alignment method used by Salmon, and quasi-mapping is a newer and 
 considerably faster alternative.  Both methods are currently exposed via the 
 same ``quant`` command, but the methods require different indices so that 
 SMEM-based mapping cannot be used with a quasi-mapping index and vice-versa.
 
-If you want to use Salmon in lightweight alignment-based mode, then you first
+If you want to use Salmon in quasi-mapping-based mode, then you first
 have to build an Salmon index for your transcriptome.  Assume that
 ``transcripts.fa`` contains the set of transcripts you wish to quantify. First,
 you run the Salmon indexer:
@@ -111,15 +139,18 @@ to work well for reads of 75bp or longer, but you might consider a smaller
 `k` parameter that can be passed to the ``quant`` command.  However, this has
 no effect if one is using a quasi-mapping index, as the `k` value provided
 during the index building phase overrides any `k` provided during
-quantification in this case.
+quantification in this case.  Since quasi-mapping is the default index type in 
+Salmon, you can actually leave off the ``--type quasi`` parameter when building 
+the index.  To build a lightweight-alignment (FMD-based) index instead, one
+would use the following command:
 
 ::
     
     > ./bin/salmon index -t transcripts.fa -i transcripts_index --type fmd
 
-This will build the SMEM-based mapping index.  Note that no value of `k` 
-is given here.  However, the SMEM-based mapping index makes use of a parameter 
-`k` that is passed in during the ``quant`` phase (the default value is `19`). 
+Note that no value of `k` is given here.  However, the SMEM-based mapping index
+makes use of a parameter `k` that is passed in during the ``quant`` phase (the
+default value is `19`). 
 
 Then, you can quantify any set of reads (say, paired-end reads in files
 `reads1.fq` and `reads2.fq`) directly against this index using the Salmon
@@ -221,19 +252,82 @@ bootstrapping / posterior sampling (if enabled).  Salmon is designed to work
 well with many threads, so, if you have a sufficient number of processors, larger
 values here can speed up the run substantially.
 
+""""""""""""
+``--dumpEq``
+""""""""""""
+
+If Salmon is passed the ``--dumpEq`` option, it will write a file in the auxiliary
+directory, called ``eq_classes.txt`` that contains the equivalence classes and corresponding
+counts that were computed during quasi-mapping.  The file has a format described in
+:ref:`eq-class-file`.
+
+
+"""""""""""""
+``--fldMean``
+"""""""""""""
+*Note* : This option is only important when running Salmon with single-end reads.
+
+Since the empirical fragment length distribution cannot be estimated
+from the mappings of single-end reads, the ``--fldMean`` allows the
+user to set the expected mean fragment lenth of the sequencing
+library.  This value will affect the effective length correction, and
+hence the estimated effective lengths of the transcripts and the TPMs.
+The value passed to ``--fldMean`` will be used as the mean of the assumed
+fragment length distribution (which is modeled as a truncated Gaussan with
+a standard deviation given by ``--fldSD``).
+
+
+"""""""""""
+``--fldSD``
+"""""""""""
+
+*Note* : This option is only important when running Salmon with single-end reads.
+
+Since the empirical fragment length distribution cannot be estimated
+from the mappings of single-end reads, the ``--fldSD`` allows the user
+to set the expected standard deviation of the fragment lenth
+distribution of the sequencing library.  This value will affect the
+effective length correction, and hence the estimated effective lengths
+of the transcripts and the TPMs.  The value passed to ``--fldSD`` will
+be used as the standard deviation of the assumed fragment length
+distribution (which is modeled as a truncated Gaussan with a mean
+given by ``--fldMean``).
+
 
 """"""""""""""
 ``--useVBOpt``
 """"""""""""""
 
-Use the variational Bayesian EM algorithm rather than the "standard" EM algorithm
-to optimize abundance estimates.  The details of the VBEM algorithm can be found
-in [2]_, and the details of the variant over fragment equivalence classes that
-we use can be found in [3]_.  While both the standard EM and the VBEM produce
-accurate abundance estimates, those produced by the VBEM seem, generally, to be
-a bit more accurate.  Further, the VBEM tends to converge after fewer iterations,
-so it may result in a shorter runtime; especially if you are computing many
-bootstrap samples. 
+Use the variational Bayesian EM algorithm rather than the "standard"
+EM algorithm to optimize abundance estimates.  The details of the VBEM
+algorithm can be found in [#salmon]_.  While both the standard EM and
+the VBEM produce accurate abundance estimates, there are some
+trade-offs between the approaches.  The EM algorithm tends to produce
+sparser estimates (i.e. more transcripts estimated to have 0
+abundance), while the VBEM, in part due to the prior, tends to
+estimate non-zero abundance for more transcripts.  Conversely, the
+prior used in the VBEM tends to have a regularizing effect, especially
+for low abundance transcripts, that leads to more consistent estimates
+of abundance at low expression levels.  We are currently working to
+analyze and understand all the tradeoffs between these different optimization
+approaches.  Also, the VBEM tends to converge after fewer iterations,
+so it may result in a shorter runtime; especially if you are computing
+many bootstrap samples.
+
+The default prior used in the VB optimization is a *per-nucleotide* prior
+of 0.001 per nucleotide.  This means that a transcript of length 1000 will
+have a prior count of 1 fragment, while a transcript of length 500 will have
+a prior count of 0.5 fragments, etc.  This behavior can be modified in two
+ways.  First, the prior itself can be modified via Salmon's ``--vbPrior``
+option.  The argument to this option is the value you wish to place as the
+*per-nucleotide* prior.  Additonally, you can modify the behavior to use
+a *per-transcript* rather than a *per-nucleotide* prior by passing the flag
+``--perTranscriptPrior`` to Salmon.  In this case, whatever value is set
+by ``--vbPrior`` will be used as the transcript-level prior, so that the
+prior count is no longer dependent on the transcript length.  However,
+the default behavior of a *per-nucleotide* prior is recommended when
+using VB optimization.
+
 
 """""""""""""""""""
 ``--numBootstraps``
@@ -262,17 +356,142 @@ to assess the potential trade-offs in time / accuracy.  The ``--numBootstraps``
 ``--numGibbsSamples`` options are mutually exclusive (i.e. in a given run, you must
 set at most one of these options to a positive integer.)
 
+"""""""""""""""""""""
+``--seqBias``
+"""""""""""""""""""""
+
+Passing the ``--seqBias`` flag to Salmon will enable it to learn and
+correct for sequence-specific biases in the input data.  Specifically,
+this model will attempt to correct for random hexamer priming bias,
+which results in the preferential sequencing of fragments starting
+with certain nucleotide motifs.  By default, Salmon learns the
+sequence-specific bias parameters using 1,000,000 reads from the
+beginning of the input.  If you wish to change the number of samples
+from which the model is learned, you can use the ``--numBiasSamples``
+parameter. Salmon uses a variable-length Markov Model
+(VLMM) to model the sequence specific biases at both the 5' and 3' end
+of sequenced fragments. This methodology generally follows that of
+Roberts et al. [#roberts]_, though some details of the VLMM differ.
+
+*Note*: This sequence-specific bias model is substantially different
+from the bias-correction methodology that was used in Salmon versions
+prior to 0.6.0.  This model specifically accounts for
+sequence-specific bias, and should not be prone to the over-fitting
+problem that was sometimes observed using the previous bias-correction
+methodology.
 
+"""""""""""""""""""""
+``--gcBias``
+"""""""""""""""""""""
+
+Passing the ``--gcBias`` flag to Salmon will enable it to learn and
+correct for fragment-level GC biases in the input data.  Specifically,
+this model will attempt to correct for biases in how likely a sequence
+is to be observed based on its internal GC content.  This bias is
+distinct from the primer biases learned with the ``--seqBias`` option.
+Though these biases are distinct, they are not completely independent.
+When both ``--seqBias`` and ``--gcBias`` are enabled, Salmon will
+learn a conditional fragment-GC bias model.  By default, Salmon will
+learn 3 different fragment-GC bias models based on the GC content of
+the fragment start and end contexts, though this number of conditional
+models can be changed with the (*hidden*) option
+``--conditionalGCBins``.  Likewise, the number of distinct fragment GC
+bins used to model the GC bias can be changed with the (*hidden*)
+option ``--numGCBins``.
+
+*Note* : In order to speed up the evaluation of the GC content of
+arbitrary fragments, Salmon pre-computes and stores the cumulative GC
+count for each transcript.  This requires an extra 4-bytes per
+nucleotide.  While this extra memory usage should normally be minor,
+it can nonetheless be controlled with the ``--gcSizeSamp`` option.
+This option takes a positive integer argument *i*, such that Salmon
+stores the values of the cumulative GC count only at every
+*i*:sup:`th` nucleotide.  The cumulative GC count at values between
+the sampled positions are recomputed on-the-fly when necessary.  Using
+this option will reduce the memory required to store the GC
+information by a factor of *i*, but will slow down the computation of
+GC-fragment content by a factor of *i*/2.  Typically, the
+``--gcSizeSamp`` can be left at its default value of 1, but larger
+values can be chosen if necessary.
+
+"""""""""""""""""""""
+``--posBias``
+"""""""""""""""""""""
+
+Passing the ``--posBias`` flag to Salmon will enable modeling of a
+position-specific fragment start distribution.  This is meant to model
+non-uniform coverage biases that are sometimes present in RNA-seq data
+(e.g. 5' or 3' positional bias).  Currently, a small and fixed number
+of models are learned for different lenght classes of transcripts, as
+is done in Roberts et al. [#roberts]_. *Note*: The positional bias
+model is relatively new, and is still undergoing testing.  It replaces
+the previous `--useFSPD` option, which is now deprecated.  This
+feature should be considered as *experimental* in the current release.
+
+
+"""""""""""""""""""
+``--biasSpeedSamp``
+"""""""""""""""""""
+
+When evaluating the bias models (the GC-fragment model specifically),
+Salmon must consider the probability of generating a fragment of every
+possible length (with a non-trivial probability) from every position
+on every transcript.  This results in a process that is quadratic in
+the length of the transcriptome --- though each evaluation itself is
+efficient and the process is highly parallelized.
+
+It is possible to speed this process up by a multiplicative factor by
+considering only every *i*:sup:`th` fragment length, and interploating
+the intermediate results.  The ``--biasSpeedSamp`` option allows the
+user to set this sampling factor.  Larger values speed up effective
+length correction, but may decrease the fidelity of bias modeling.
+However, reasonably small values (e.g. 10 or less) should have only a
+minor effect on the computed effective lengths, and can considerably
+speed up effective length correction on large transcriptomes.
+
+""""""""""""""""""""""""
+``--writeUnmappedNames``
+""""""""""""""""""""""""
+
+Passing the ``--writeUnmappedNames`` flag to Salmon will tell Salmon to
+write out the names of reads (or mates in paired-end reads) that do not
+map to the transcriptome.  When mapping paired-end reads, the entire
+fragment (both ends of the pair) are identified by the name of the first
+read (i.e. the read appearing in the ``_1`` file).  Each line of the umapped
+reads file contains the name of the unmapped read followed by a simple flag
+that designates *how* the read failed to map completely.  For single-end
+reads, the only valid flag is ``u`` (unmapped).  However, for paired-end
+reads, there are a number of different possibilities, outlined below:
+
+::
+   
+   u   = The entire pair was unmapped. No mappings were found for either the left or right read.
+   m1  = Left orphan (mappings were found for the left (i.e. first) read, but not the right).
+   m2  = Right orphan (mappinds were found for the right read, but not the left).
+   m12 = Left and right orphans. Both the left and right read mapped, but never to the same transcript. 
+
+By reading through the file of unmapped reads and selecting the appropriate
+sequences from the input FASTA/Q files, you can build an "unmapped" file that
+can then be used to investigate why these reads may not have mapped
+(e.g. poor quality, contamination, etc.).  Currently, this process must be
+done independently, but future versions of Salmon may provide a script to
+generate this unmapped FASTA/Q file from the unmapped file and the original
+inputs.
+
+   
 What's this ``LIBTYPE``?
 ------------------------
 
-Salmon, like sailfish, has the user provide a description of the type of
-sequencing library from which the reads come, and this contains information
-about e.g. the relative orientation of paired end reads.  However, we've
-replaced the somewhat esoteric description of the library type with a simple
-set of strings; each of which represents a different type of read library. This
-new method of specifying the type of read library is being back-ported into
-Sailfish and will be available in the next release.
+Salmon, has the user provide a description of the type of sequencing
+library from which the reads come, and this contains information about
+e.g. the relative orientation of paired end reads.  As of version
+0.7.0, Salmon also has the ability to automatically infer (i.e. guess)
+the library type based on how the first few thousand reads map to the
+transcriptome.  To allow Salmon to automatically infer the library
+type, simply provide ``-l A`` or ``--libType A`` to Salmon.  Even if you
+allow Salmon to infer the library type for you, you should still read
+the section below, so that you can interpret how Salmon reports the
+library type it discovers.
 
 The library type string consists of three parts: the relative orientation of
 the reads, the strandedness of the library, and the directionality of the
@@ -334,52 +553,46 @@ For more details on the library type, see :ref:`FragLibType`.
 Output
 ------
 
-Salmon writes its output in a simple tab-delimited file format.  Any line that begins 
-with a ``#`` is a comment line, and can be safely ignored.  Salmon records the files
-and options passed to it in comments at the top of its output file.  The last comment 
-line gives the names of each of the data columns. The columns appear in the following order: 
+For details of Salmon's different output files and their formats see :ref:`FileFormats`.
 
-+------+--------+-----+----------+
-| Name | Length | TPM | NumReads |
-+------+--------+-----+----------+
-
-Each subsequent row described a single quantification record.  The columns have
-the following interpretation.
-
-* **Name** --- 
-  This is the name of the target transcript provided in the input transcript database (FASTA file). 
+Misc
+----
 
-* **Length** ---
-  This is the length of the target transcript in nucleotides.
+Salmon, in *quasi-mapping*-based mode, can accept reads from FASTA/Q
+format files, or directly from gzipped FASTA/Q files (the ability to
+accept compressed files directly is a feature of Salmon 0.7.0 and
+higher).  If your reads are compressed in a different format, you can
+still stream them directly to Salmon by using process substitution.
+Say in the *quasi-mapping*-based Salmon example above, the reads were
+actually in the files ``reads1.fa.bz2`` and ``reads2.fa.bz2``, then
+you'd run the following command to decompress the reads "on-the-fly":
 
-* **TPM** ---
-  This is salmon's estimate of the relative abundance of this transcript in units of Transcripts Per Million (TPM).
-  TPM is the recommended relative abundance measure to use for downstream analysis. 
+::
 
-* **NumReads** --- 
-  This is salmon's estimate of the number of reads mapping to each transcript that was quantified.  It is an "estimate" 
-  insofar as it is the expected number of reads that have originated from each transcript given the structure of the uniquely 
-  mapping and multi-mapping reads and the relative abundance estimates for each transcript.
+    > ./bin/salmon quant -i transcripts_index -l <LIBTYPE> -1 <(bunzip2 -c reads1.fa.gz) -2 <(bunzip2 -c reads2.fa.bz2) -o transcripts_quant
 
-Misc
-----
+and the bzipped files will be decompressed via separate processes and
+the raw reads will be fed into Salmon.  Actually, you can use this
+same process even with gzip compressed reads (replacing ``bunzip2``
+with ``gunzip`` or ``pigz -d``).  Depending on the number of threads
+and the exact configuration, this may actually improve Salmon's
+running time, since the reads are decompressed concurrently in a
+separate process when you use process substitution.
 
-Salmon deals with reading from compressed read files in the same way as
-sailfish --- by using process substitution.  Say in the
-lightweigh-alignment-based salmon example above, the reads were actually in the
-files ``reads1.fa.gz`` and ``reads2.fa.gz``, then you'd run the following
-command to decompress the reads "on-the-fly":
+**Finally**, the purpose of making this software available is for
+people to use it and provide feedback.  The
+`pre-print describing this method is on bioRxiv <http://biorxiv.org/content/early/2015/10/03/021592>`_.
+If you have something useful to report or just some interesting ideas
+or suggestions, please contact us (`rob.patro at cs.stonybrook.edu`
+and/or `carlk at cs.cmu.edu`).  If you encounter any bugs, please file a
+*detailed* bug report at the `Salmon GitHub repository <https://github.com/COMBINE-lab/salmon>`_.
 
-::
 
-    > ./bin/salmon quant -i transcripts_index -l <LIBTYPE> -1 <(gzcat reads1.fa.gz) -2 <(gzcat reads2.fa.gz) -o transcripts_quant
+References
+----------
 
-and the gzipped files will be decompressed via separate processes and the raw
-reads will be fed into salmon.
+.. [#express] Roberts, Adam, and Lior Pachter. "Streaming fragment assignment for real-time analysis of sequencing experiments." Nature methods 10.1 (2013): 71-73.
+   
+.. [#roberts] Roberts, Adam, et al. "Improving RNA-Seq expression estimates by correcting for fragment bias." Genome biology 12.3 (2011): 1.
 
-**Finally**, the purpose of making this software available is for people to use
-it and provide feedback.  The `pre-print describing this method is on bioRxiv <http://biorxiv.org/content/early/2015/10/03/021592>`_.
-If you have something useful to report or just some interesting ideas or
-suggestions, please contact us (`rob.patro at cs.stonybrook.edu` and/or
-`carlk at cs.cmu.edu`).  If you encounter any bugs, please file a *detailed*
-bug report at the `Salmon GitHub repository <https://github.com/COMBINE-lab/salmon>`_. 
+.. [#salmon] Patro, Rob, et al. "Salmon provides accurate, fast, and bias-aware transcript expression estimates using dual-phase inference." bioRxiv (2016).
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..cf5a534
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,28 @@
+# image: COMBINE-lab/salmon
+# This dockerfile is based on the one created by
+# Titus Brown (available at https://github.com/ctb/2015-docker-building/blob/master/salmon/Dockerfile)
+FROM ubuntu:15.10
+MAINTAINER salmon.maintainer at gmail.com
+
+ENV PACKAGES git gcc make g++ cmake libboost-all-dev liblzma-dev libbz2-dev \
+    ca-certificates zlib1g-dev curl unzip autoconf
+ENV SALMON_VERSION 0.7.1
+
+# salmon binary will be installed in /home/salmon/bin/salmon
+
+### don't modify things below here for version updates etc.
+
+WORKDIR /home
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ${PACKAGES} && \
+    apt-get clean
+
+RUN curl -k -L https://github.com/COMBINE-lab/salmon/archive/v${SALMON_VERSION}.tar.gz -o salmon-v${SALMON_VERSION}.tar.gz && \
+    tar xzf salmon-v${SALMON_VERSION}.tar.gz && \
+    cd salmon-${SALMON_VERSION} && \
+    mkdir build && \
+    cd build && \
+    cmake .. && make && make install
+
+ENV PATH /home/salmon-${SALMON_VERSION}/bin:${PATH}
diff --git a/docker/build_test.sh b/docker/build_test.sh
new file mode 100644
index 0000000..704ee2b
--- /dev/null
+++ b/docker/build_test.sh
@@ -0,0 +1,2 @@
+#! /bin/bash
+docker build -t combinelab/salmon:0.7.1 .
diff --git a/include/AlignmentLibrary.hpp b/include/AlignmentLibrary.hpp
index 28f094c..bd4beea 100644
--- a/include/AlignmentLibrary.hpp
+++ b/include/AlignmentLibrary.hpp
@@ -9,6 +9,9 @@ extern "C" {
 }
 
 // Our includes
+#include "DistributionUtils.hpp"
+#include "GCFragModel.hpp"
+#include "SBModel.hpp"
 #include "ClusterForest.hpp"
 #include "Transcript.hpp"
 #include "BAMQueue.hpp"
@@ -25,6 +28,7 @@ extern "C" {
 #include "EquivalenceClassBuilder.hpp"
 #include "SpinLock.hpp" // RapMap's with try_lock
 #include "ReadKmerDist.hpp"
+#include "SimplePosBias.hpp"
 
 // Boost includes
 #include <boost/filesystem.hpp>
@@ -58,10 +62,16 @@ class AlignmentLibrary {
         libFmt_(libFmt),
         transcripts_(std::vector<Transcript>()),
     	fragStartDists_(5),
+        posBiasFW_(5),
+        posBiasRC_(5),
         seqBiasModel_(1.0),
     	eqBuilder_(salmonOpts.jointLog),
         quantificationPasses_(0),
-        expectedBias_(constExprPow(4, readBias_.getK()), 1.0) {
+        expectedBias_(constExprPow(4, readBias_[0].getK()), 1.0),
+	expectedGC_( salmonOpts.numConditionalGCBins,
+		    salmonOpts.numFragGCBins, distribution_utils::DistributionSpace::LOG),
+        observedGC_( salmonOpts.numConditionalGCBins,
+		    salmonOpts.numFragGCBins, distribution_utils::DistributionSpace::LOG) {
             namespace bfs = boost::filesystem;
 
             // Make sure the alignment file exists.
@@ -111,7 +121,7 @@ class AlignmentLibrary {
 
             fmt::print(stderr, "Populating targets from aln = {}, fasta = {} . . .",
                        alnFiles.front(), transcriptFile_);
-            fp.populateTargets(transcripts_);
+            fp.populateTargets(transcripts_, salmonOpts);
 	    for (auto& txp : transcripts_) {
 		    // Length classes taken from
 		    // ======
@@ -164,15 +174,17 @@ class AlignmentLibrary {
         return eqBuilder_;
     }
 
+    // TODO: Make same as mapping-based
     void updateTranscriptLengthsAtomic(std::atomic<bool>& done) {
         if (sl_.try_lock()) {
             if (!done) {
-                auto& fld = *(flDist_.get());
+
+                auto fld = flDist_.get();
+                // Convert the PMF to non-log scale
                 std::vector<double> logPMF;
                 size_t minVal;
                 size_t maxVal;
-                double logFLDMean = fld.mean();
-                fld.dumpPMF(logPMF, minVal, maxVal);
+                fld->dumpPMF(logPMF, minVal, maxVal);
                 double sum = salmon::math::LOG_0;
                 for (auto v : logPMF) {
                     sum = salmon::math::logAdd(sum, v);
@@ -180,10 +192,28 @@ class AlignmentLibrary {
                 for (auto& v : logPMF) {
                     v -= sum;
                 }
+
+                // Create the non-logged distribution.
+                // Here, we multiply by 100 to discourage small
+                // numbers in the correctionFactorsfromCounts call
+                // below.
+                std::vector<double> pmf(maxVal + 1, 0.0);
+                for (size_t i = minVal; i < maxVal; ++i) {
+                    pmf[i] = 100.0 * std::exp(logPMF[i - minVal]);
+                }
+
+		using distribution_utils::DistributionSpace;
+		// We compute the factors in linear space (since we've de-logged the pmf)
+                auto correctionFactors = distribution_utils::correctionFactorsFromMass(pmf, DistributionSpace::LINEAR);
+		// Since we'll continue treating effective lengths in log space, populate them as such
+		distribution_utils::computeSmoothedEffectiveLengths(pmf.size(), transcripts_, correctionFactors, DistributionSpace::LOG);
+		
+		/*
                 // Update the effective length of *every* transcript
                 for( auto& t : transcripts_ ) {
                     t.updateEffectiveLength(logPMF, logFLDMean, minVal, maxVal);
                 }
+		*/
                 // then declare that we are done
                 done = true;
                 sl_.unlock();
@@ -260,23 +290,74 @@ class AlignmentLibrary {
     }
 
     inline LibraryFormat format() { return libFmt_; }
+    inline const LibraryFormat format() const { return libFmt_; }
 
-    void setExpectedBias(const std::vector<double>& expectedBiasIn) {
-        expectedBias_ = expectedBiasIn;
-    }
+    void setGCFracForward(double fracForward) { gcFracFwd_ = fracForward; }
+
+    double gcFracFwd() const { return gcFracFwd_; }
+    double gcFracRC() const { return 1.0 - gcFracFwd_; }
 
-    std::vector<double>& expectedBias() {
+    std::vector<double>& expectedSeqBias() {
         return expectedBias_;
     }
 
-    const std::vector<double>& expectedBias() const {
+    const std::vector<double>& expectedSeqBias() const {
         return expectedBias_;
     }
 
-    ReadKmerDist<6, std::atomic<uint32_t>>& readBias() { return readBias_; }
-    const ReadKmerDist<6, std::atomic<uint32_t>>& readBias() const { return readBias_; }
+    void setExpectedGCBias(const GCFragModel& expectedBiasIn) {
+        expectedGC_ = expectedBiasIn;
+    }
+
+    GCFragModel& expectedGCBias() {
+        return expectedGC_;
+    }
+
+    const GCFragModel& expectedGCBias() const {
+        return expectedGC_;
+    }
 
+    const GCFragModel& observedGC() const {
+        return observedGC_;
+    }
+
+    GCFragModel& observedGC() {
+        return observedGC_;
+    }
 
+    std::vector<SimplePosBias>& posBias(salmon::utils::Direction dir) { 
+        return (dir == salmon::utils::Direction::FORWARD) ? posBiasFW_ : posBiasRC_; 
+    }
+    const std::vector<SimplePosBias>& posBias(salmon::utils::Direction dir) const { 
+        return (dir == salmon::utils::Direction::FORWARD) ? posBiasFW_ : posBiasRC_; 
+    }
+
+    ReadKmerDist<6, std::atomic<uint32_t>>& readBias(salmon::utils::Direction dir) { 
+        return (dir == salmon::utils::Direction::FORWARD) ? readBias_[0] : readBias_[1]; 
+    }
+    const ReadKmerDist<6, std::atomic<uint32_t>>& readBias(salmon::utils::Direction dir) const { 
+        return (dir == salmon::utils::Direction::FORWARD) ? readBias_[0] : readBias_[1]; 
+    }
+
+    SBModel& readBiasModelObserved(salmon::utils::Direction dir) { 
+        return (dir == salmon::utils::Direction::FORWARD) ? readBiasModelObserved_[0] : readBiasModelObserved_[1]; 
+    }
+    const SBModel& readBiasModelObserved(salmon::utils::Direction dir) const { 
+        return (dir == salmon::utils::Direction::FORWARD) ? readBiasModelObserved_[0] : readBiasModelObserved_[1]; 
+    }
+
+    SBModel& readBiasModelExpected(salmon::utils::Direction dir) { 
+	return (dir == salmon::utils::Direction::FORWARD) ? readBiasModelExpected_[0] : readBiasModelExpected_[1]; 
+    }
+    const SBModel& readBiasModelExpected(salmon::utils::Direction dir) const { 
+	return (dir == salmon::utils::Direction::FORWARD) ? readBiasModelExpected_[0] : readBiasModelExpected_[1]; 
+   }
+  
+    void setReadBiasModelExpected(SBModel&& model, salmon::utils::Direction dir) {
+        size_t idx = (dir == salmon::utils::Direction::FORWARD) ? 0 : 1;
+	readBiasModelExpected_[idx] = std::move(model);
+    }
+ 
     private:
     /**
      * The file from which the alignments will be read.
@@ -337,9 +418,23 @@ class AlignmentLibrary {
     SpinLock sl_;
     EquivalenceClassBuilder eqBuilder_;
 
+    /** Positional bias things**/
+    std::vector<SimplePosBias> posBiasFW_;
+    std::vector<SimplePosBias> posBiasRC_;
+ 
+    /** GC-fragment bias things **/
+    // One bin for each percentage GC content
+    double gcFracFwd_;
+    GCFragModel observedGC_;
+    GCFragModel expectedGC_;
+
     // Since multiple threads can touch this dist, we
     // need atomic counters.
-    ReadKmerDist<6, std::atomic<uint32_t>> readBias_;
+    std::array<ReadKmerDist<6, std::atomic<uint32_t>>, 2> readBias_;
+    std::array<SBModel, 2> readBiasModelObserved_;
+    std::array<SBModel, 2> readBiasModelExpected_;
+
+    //ReadKmerDist<6, std::atomic<uint32_t>> readBias_;
     std::vector<double> expectedBias_;
 };
 
diff --git a/include/BAMQueue.tpp b/include/BAMQueue.tpp
index 602d633..c1b4b84 100644
--- a/include/BAMQueue.tpp
+++ b/include/BAMQueue.tpp
@@ -85,7 +85,7 @@ void BAMQueue<FragT>::reset() {
               fmt::MemoryWriter errstr;
               errstr << "The header for file " << file.fileName.c_str() 
                      << " was deleted.  This should not happen! exiting!\n";
-              logger_->warn() << errstr.str();
+              logger_->warn(errstr.str());
               std::exit(1);
           }
       }
@@ -99,7 +99,7 @@ void BAMQueue<FragT>::reset() {
   if (file.fp == NULL) {
     fmt::MemoryWriter errstr;
     errstr << "ERROR: Failed to open file " << file.fileName.c_str() << ", exiting!\n";
-    logger_->warn() << errstr.str();
+    logger_->warn(errstr.str());
     std::exit(1);
   }
   scram_set_option(file.fp, CRAM_OPT_NTHREADS, file.numParseThreads);
@@ -131,7 +131,7 @@ BAMQueue<FragT>::~BAMQueue() {
             fmt::MemoryWriter errstr;
             errstr << "The header for file " << file.fileName.c_str() 
                 << " was deleted.  This should not happen! exiting!\n";
-            logger_->warn() << errstr.str();
+            logger_->warn(errstr.str());
             std::exit(1);
         } else {
             sam_hdr_decr_ref(file.header); 
@@ -438,7 +438,7 @@ inline bool BAMQueue<FragT>::getFrag_(ReadPair& rpair, FilterT filt) {
                     << "The read was marked as unpaired in sequencing (not just unmapped)."
                     << "The two ends of a paired-end read should be adjacent. "
                     << "Don't know how to proceed; exiting!\n\n";
-            logger_->warn() << errmsg.str();
+            logger_->warn(errmsg.str());
             std::exit(-1);
         }
         // We've observed two, consecutive paired reads; now check if our reads
@@ -480,7 +480,7 @@ inline bool BAMQueue<FragT>::getFrag_(ReadPair& rpair, FilterT filt) {
                     << ((bam_flag(rpair.read2) & BAM_FUNMAP) ? "not " : "") << "mapped; mate"
                     << ((bam_flag(rpair.read2) & BAM_FMUNMAP) ? "not " : "") << "mapped\n\n";
             }
-            logger_->warn() << errmsg.str();
+            logger_->warn(errmsg.str());
         }
 
 
diff --git a/include/BiasParams.hpp b/include/BiasParams.hpp
new file mode 100644
index 0000000..1effa00
--- /dev/null
+++ b/include/BiasParams.hpp
@@ -0,0 +1,43 @@
+#ifndef __BIAS_PARAMS__
+#define __BIAS_PARAMS__
+
+#include "SBModel.hpp"
+#include "GCFragModel.hpp"
+#include "ReadKmerDist.hpp"
+#include "SalmonMath.hpp"
+#include "SimplePosBias.hpp"
+#include "DistributionUtils.hpp"
+#include <vector>
+
+struct BiasParams {
+    double massFwd{salmon::math::LOG_0};
+    double massRC{salmon::math::LOG_0};
+
+  /**
+   * Positional bias
+   **/
+  std::vector<SimplePosBias> posBiasFW;
+  std::vector<SimplePosBias> posBiasRC;
+  
+  /**
+   * fragment-GC bias counts
+   **/
+    //std::vector<double> observedGCMass = std::vector<double>(101, salmon::math::LOG_0);
+    GCFragModel observedGCMass;
+
+    ReadKmerDist<8, uint32_t> seqBiasFW;
+    ReadKmerDist<8, uint32_t> seqBiasRC;
+
+  /**
+   * Sequence-specific bias models
+   **/
+    SBModel seqBiasModelFW;
+    SBModel seqBiasModelRC;
+
+  BiasParams(size_t numCondBins=3,
+	     size_t numGCBins=101,
+	     bool seqBiasPseudocount=false) : seqBiasFW(seqBiasPseudocount), seqBiasRC(seqBiasPseudocount),
+					      posBiasFW(5), posBiasRC(5), observedGCMass(numCondBins, numGCBins) {}
+};
+
+#endif //__GC_BIAS_PARAMS__
diff --git a/include/DistributionUtils.hpp b/include/DistributionUtils.hpp
new file mode 100644
index 0000000..5a5ad7d
--- /dev/null
+++ b/include/DistributionUtils.hpp
@@ -0,0 +1,66 @@
+#ifndef __DISTRIBUTION_UTILS__
+#define __DISTRIBUTION_UTILS__
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+// Do things involving distributions
+class FragmentLengthDistribution;
+class Transcript;
+
+namespace distribution_utils {
+enum class DistributionSpace : uint8_t { LOG = 0, LINEAR = 1 };
+
+  /**
+   *  Draw samples from the provided fragment length distribution. 
+   *  \param fld A pointer to the FragmentLengthDistribution from which 
+   *             samples will be drawn.
+   *  \param numSamples  The number of samples to draw.
+   */ 
+std::vector<int32_t> samplesFromLogPMF(FragmentLengthDistribution* fld,
+                                       int32_t numSamples);
+
+  /**
+   * The following two functions compute conditional means of the empirical fragment length 
+   * distribution, and apply them to the transcripts to compute the effective lengths.
+   * To the best of our knowledge, this particular approach to effective length correction was
+   * first introduced in Kallisto[1].
+   * [1] Bray, Nicolas L., et al. "Near-optimal probabilistic RNA-seq quantification." Nature biotechnology 34.5 (2016): 525-527.
+   **/
+
+
+  /**
+   * Compute the conditional mean fragment length for every length
+   * in the input fragment length distribution.  For each length i,  
+   * the conditional mean assumes that it is not possible to sample fragments
+   * of length > i, and so the probability mass is normalized by the 
+   * probability of all lengths <= i.
+   *
+   * \param mass The input fragment length distribution.  This should contain a number 
+   *             for each fragment length that is proportional to the probability of
+   *             drawing a fragment of that length.  The input need not be normalized.
+   * \param inputSpace A DistributionSpace parameter that determines whether mass should
+   *                   be interpreted as exisitng in log space or linear space.
+   * \returns The conditional means for each fragment length. 
+   */
+std::vector<double> correctionFactorsFromMass(std::vector<double>& mass,
+                                              DistributionSpace inputSpace);
+
+  /**
+   * Populate the effective lengths of the input transcripts based on the conditional
+   * means.
+   *
+   * \sa correctionFactorsFromMass()
+   * \param maxLength The maximum fragment length.
+   * \param transcripts The transcripts whose lengths are to be corrected.
+   * \param correctionFactors The conditional means (computed with correctionFactorsFromMass())
+   */
+void computeSmoothedEffectiveLengths(size_t maxLength,
+                                     std::vector<Transcript>& transcripts,
+                                     std::vector<double>& correctionFactors,
+                                     DistributionSpace outputSpace);
+
+}
+
+#endif // __DISTRIBUTION_UTILS__
diff --git a/include/FASTAParser.hpp b/include/FASTAParser.hpp
index 8626552..48d9a62 100644
--- a/include/FASTAParser.hpp
+++ b/include/FASTAParser.hpp
@@ -4,11 +4,13 @@
 #include <vector>
 
 class Transcript;
+class SalmonOpts;
 
 class FASTAParser {
 public:
     FASTAParser(const std::string& fname);
-    void populateTargets(std::vector<Transcript>& transcripts);
+    void populateTargets(std::vector<Transcript>& transcripts, SalmonOpts& sopt);
+
 private:
     std::string fname_;
 };
diff --git a/include/FastxParser.hpp b/include/FastxParser.hpp
new file mode 100644
index 0000000..6dfa3ca
--- /dev/null
+++ b/include/FastxParser.hpp
@@ -0,0 +1,150 @@
+#ifndef __FASTX_PARSER__
+#define __FASTX_PARSER__
+
+#include "fcntl.h"
+#include "unistd.h"
+#include <atomic>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <thread>
+#include <vector>
+
+extern "C" {
+#include "kseq.h"
+}
+
+#include "concurrentqueue.h"
+
+#ifndef __FASTX_PARSER_PRECXX14_MAKE_UNIQUE__
+#define __FASTX_PARSER_PRECXX14_MAKE_UNIQUE__
+
+#if __cplusplus >= 201402L
+#include <memory>
+using std::make_unique
+#else
+
+#include <cstddef>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+template <class T> struct _Unique_if {
+  using _Single_object = std::unique_ptr<T>;
+};
+
+template <class T> struct _Unique_if<T[]> {
+  using _Unknown_bound = std::unique_ptr<T[]>;
+};
+
+template <class T, size_t N> struct _Unique_if<T[N]> {
+  using _Known_bound = void;
+};
+
+template <class T, class... Args>
+typename _Unique_if<T>::_Single_object make_unique(Args&&... args) {
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+
+template <class T>
+typename _Unique_if<T>::_Unknown_bound make_unique(size_t n) {
+  using U = typename std::remove_extent<T>::type;
+  return std::unique_ptr<T>(new U[n]());
+}
+
+template <class T, class... Args>
+typename _Unique_if<T>::_Known_bound make_unique(Args&&...) = delete;
+
+#endif // C++11
+#endif //__FASTX_PARSER_PRECXX14_MAKE_UNIQUE__
+
+namespace fastx_parser {
+struct ReadSeq {
+    std::string seq;
+    std::string name;
+    ~ReadSeq() {}
+};
+
+struct ReadPair {
+  ReadSeq first;
+  ReadSeq second;
+};
+
+template <typename T> class ReadChunk {
+public:
+  ReadChunk(size_t want) : group_(want), want_(want), have_(want) {}
+  inline void have(size_t num) { have_ = num; }
+  inline size_t size() { return have_; }
+  inline size_t want() const { return want_; }
+  T& operator[](size_t i) { return group_[i]; }
+  typename std::vector<T>::iterator begin() { return group_.begin(); }
+  typename std::vector<T>::iterator end() { return group_.begin() + have_; }
+
+private:
+  std::vector<T> group_;
+  size_t want_;
+  size_t have_;
+};
+
+template <typename T> class ReadGroup {
+public:
+  ReadGroup(moodycamel::ProducerToken&& pt, moodycamel::ConsumerToken&& ct)
+      : pt_(std::move(pt)), ct_(std::move(ct)) {}
+  moodycamel::ConsumerToken& consumerToken() { return ct_; }
+  moodycamel::ProducerToken& producerToken() { return pt_; }
+  // get a reference to the chunk this ReadGroup owns
+  std::unique_ptr<ReadChunk<T>>& chunkPtr() { return chunk_; }
+  // get a *moveable* reference to the chunk this ReadGroup owns
+  std::unique_ptr<ReadChunk<T>>&& takeChunkPtr() { return std::move(chunk_); }
+  inline void have(size_t num) { chunk_->have(num); }
+  inline size_t size() { return chunk_->size(); }
+  inline size_t want() const { return chunk_->want(); }
+  T& operator[](size_t i) { return (*chunk_)[i]; }
+  typename std::vector<T>::iterator begin() { return chunk_->begin(); }
+  typename std::vector<T>::iterator end() {
+    return chunk_->begin() + chunk_->size();
+  }
+  void setChunkEmpty() { chunk_.release(); }
+  bool empty() const { return chunk_.get() == nullptr; }
+
+private:
+  std::unique_ptr<ReadChunk<T>> chunk_{nullptr};
+  moodycamel::ProducerToken pt_;
+  moodycamel::ConsumerToken ct_;
+};
+
+template <typename T> class FastxParser {
+public:
+  FastxParser(std::vector<std::string> files, uint32_t numConsumers,
+              uint32_t numParsers = 1, uint32_t chunkSize = 1000);
+
+  FastxParser(std::vector<std::string> files, std::vector<std::string> files2,
+              uint32_t numConsumers, uint32_t numParsers = 1,
+              uint32_t chunkSize = 1000);
+  ~FastxParser();
+  bool start();
+  ReadGroup<T> getReadGroup();
+  bool refill(ReadGroup<T>& rg);
+  void finishedWithGroup(ReadGroup<T>& s);
+
+private:
+  moodycamel::ProducerToken getProducerToken_();
+  moodycamel::ConsumerToken getConsumerToken_();
+
+  std::vector<std::string> inputStreams_;
+  std::vector<std::string> inputStreams2_;
+  uint32_t numParsers_;
+  std::atomic<uint32_t> numParsing_;
+  std::vector<std::unique_ptr<std::thread>> parsingThreads_;
+  size_t blockSize_;
+  moodycamel::ConcurrentQueue<std::unique_ptr<ReadChunk<T>>> readQueue_,
+      seqContainerQueue_;
+
+  // holds the indices of files (file-pairs) to be processed
+  moodycamel::ConcurrentQueue<uint32_t> workQueue_;
+
+  std::vector<std::unique_ptr<moodycamel::ProducerToken>> produceReads_;
+  std::vector<std::unique_ptr<moodycamel::ConsumerToken>> consumeContainers_;
+};
+}
+#endif // __FASTX_PARSER__
diff --git a/include/GCFragModel.hpp b/include/GCFragModel.hpp
new file mode 100644
index 0000000..67fcafa
--- /dev/null
+++ b/include/GCFragModel.hpp
@@ -0,0 +1,181 @@
+#ifndef __GC_FRAG_MODEL__
+#define __GC_FRAG_MODEL__
+
+#include "DistributionUtils.hpp"
+#include "SalmonMath.hpp"
+#include "Eigen/Dense"
+
+#include <boost/iostreams/filtering_stream.hpp>
+
+#include <vector>
+#include <iostream>
+
+struct GCDesc {
+    int32_t fragFrac;
+    int32_t contextFrac;
+
+    // assumes 101 bins
+    int32_t fragBin() { return fragFrac; }
+    int32_t contextBin() { return contextFrac; }
+
+    int32_t fragBin(int32_t n) {
+        double w = (100.0 / n);
+        return std::min(n-1, static_cast<int32_t>(fragFrac / w));
+    }
+    int32_t contextBin(int32_t n) {
+        double w = (100.0 / n);
+        return std::min(n-1, static_cast<int32_t>(contextFrac / w));
+    }
+};
+
+class GCFragModel {
+public:
+  GCFragModel(size_t condBins=3,
+	      size_t numGCBins=101,
+	      distribution_utils::DistributionSpace dspace=distribution_utils::DistributionSpace::LOG) : 
+    condBins_(condBins),
+    numGCBins_(numGCBins),
+	dspace_(dspace),
+        normalized_(false)
+    {
+        counts_ = Eigen::MatrixXd(condBins_, numGCBins_);
+	if (dspace_ == distribution_utils::DistributionSpace::LOG) {
+	  counts_.setOnes();
+	  counts_ *= salmon::math::LOG_0;
+	} else {
+	  counts_.setZero();
+	}
+    }
+
+    bool writeBinary(boost::iostreams::filtering_ostream& out) const {
+        auto* mutThis = const_cast<GCFragModel*>(this);
+        int32_t dtype = (dspace_ == distribution_utils::DistributionSpace::LINEAR) ? 0 : 1;
+        out.write(reinterpret_cast<char*>(&dtype), sizeof(dtype));
+        typename Eigen::MatrixXd::Index rows= counts_.rows(), cols= counts_.cols();
+        out.write(reinterpret_cast<char*>(&rows), sizeof(typename Eigen::MatrixXd::Index));
+        out.write(reinterpret_cast<char*>(&cols), sizeof(typename Eigen::MatrixXd::Index));
+        out.write(reinterpret_cast<char*>(mutThis->counts_.data()), rows*cols*sizeof(typename Eigen::MatrixXd::Scalar));
+        return true;
+    }
+
+
+    GCFragModel(const GCFragModel&) = default;
+    GCFragModel(GCFragModel&&) = default;
+    GCFragModel& operator=(const GCFragModel&) = default;
+    GCFragModel& operator=(GCFragModel&&) = default;
+ 
+    void reset(distribution_utils::DistributionSpace dspace=distribution_utils::DistributionSpace::LOG) {
+        normalized_ = false;
+	dspace_=dspace;
+	if (dspace_ == distribution_utils::DistributionSpace::LOG) {
+	  counts_.setOnes();
+	  counts_ *= salmon::math::LOG_0;
+	} else {
+	  counts_.setZero();
+	}
+    }
+    
+    GCFragModel ratio(GCFragModel& other, double maxRatio) {
+        if (!normalized_) { normalize(); }
+        if (!other.normalized_) { other.normalize(); }
+        double minRatio = 1.0 / maxRatio;
+
+        GCFragModel ratioModel(condBins_, numGCBins_, dspace_);
+        for (size_t r = 0; r <condBins_; ++r) {
+            for (size_t c = 0; c < numGCBins_; ++c) {
+                double rat = (counts_(r,c) / other.counts_(r,c));
+                if (rat > maxRatio) { rat = maxRatio; }
+                if (rat < minRatio ) { rat = minRatio; }
+                ratioModel.counts_(r,c) =  rat;
+            }
+        }
+        return ratioModel;
+    }
+
+    void inc(
+             GCDesc desc,
+             double fragWeight    //< the weight associated with this fragment 
+             ) {
+      auto ctx = (condBins_ > 1) ? desc.contextBin(condBins_) : 0;
+        auto frag = (numGCBins_ != 101) ? desc.fragBin(numGCBins_) : desc.fragBin();
+
+	if (dspace_ == distribution_utils::DistributionSpace::LOG) {
+	  counts_(ctx, frag) = salmon::math::logAdd(counts_(ctx, frag), fragWeight);
+	} else {
+	  counts_(ctx, frag) += fragWeight;
+	}
+    }
+
+  double get(GCDesc desc) {
+      auto ctx = (condBins_ > 1) ? desc.contextBin(condBins_) : 0;
+        auto frag = (numGCBins_ != 101) ? desc.fragBin(numGCBins_) : desc.fragBin();
+        return counts_(ctx, frag); 
+    }
+
+  distribution_utils::DistributionSpace distributionSpace() const { return dspace_; }
+
+    void combineCounts(const GCFragModel& other) {
+      if (dspace_ != other.dspace_) {
+	std::cerr << "Cannot combine distributions that live in a different space!\n";
+	std::exit(1);
+      }
+      if (dspace_ == distribution_utils::DistributionSpace::LOG) {
+	for (size_t r = 0; r <condBins_; ++r) {
+	  for (size_t c = 0; c < numGCBins_; ++c) {
+	    counts_(r,c) = salmon::math::logAdd(counts_(r,c), other.counts_(r,c));
+	  }
+	}
+      } else {
+	for (size_t r = 0; r <condBins_; ++r) {
+	  for (size_t c = 0; c < numGCBins_; ++c) {
+	    counts_(r,c) += other.counts_(r,c);
+	  }
+	}
+      }
+    }
+
+    /**
+     * NOTE: Improve interface --- also converts out of log space
+     */
+    void normalize(double prior=0.1) {
+        if (!normalized_){
+	  if (dspace_ == distribution_utils::DistributionSpace::LOG) {
+	    prior = std::log(prior);
+	    for (size_t r = 0; r < condBins_; ++r) {
+	      double rowMass{salmon::math::LOG_0};
+	      for (size_t c = 0; c < numGCBins_; ++c) {
+		rowMass = salmon::math::logAdd(prior, salmon::math::logAdd(rowMass, counts_(r,c)));
+	      }
+	      if (!salmon::math::isLog0(rowMass)) {
+		for (size_t c = 0; c < numGCBins_; ++c) {
+		  counts_(r,c) = std::exp(salmon::math::logAdd(prior, counts_(r,c)) - rowMass);
+		}
+	      }
+	    }
+	  } else {
+	    for (size_t r = 0; r < condBins_; ++r) {
+	      double rowMass = 0.0;
+	      for (size_t c = 0; c < numGCBins_; ++c) {
+		rowMass += (prior + counts_(r,c));
+	      }
+	      if (rowMass > 0.0) {
+		double norm = 1.0 / rowMass;
+		for (size_t c = 0; c < numGCBins_; ++c) {
+		  counts_(r,c) = (prior + counts_(r,c)) * norm;
+		}
+	      }
+	    }
+	  }
+	  normalized_ = true;
+	  dspace_ = distribution_utils::DistributionSpace::LINEAR;
+	}
+    }
+private:
+  size_t condBins_;
+  size_t numGCBins_;
+    distribution_utils::DistributionSpace dspace_;
+    bool normalized_;
+    Eigen::MatrixXd counts_;
+};
+
+#endif //__GC_FRAG_MODEL__
diff --git a/include/GZipWriter.hpp b/include/GZipWriter.hpp
index 4708823..deb6e0e 100644
--- a/include/GZipWriter.hpp
+++ b/include/GZipWriter.hpp
@@ -21,6 +21,11 @@ class GZipWriter {
     ~GZipWriter();
 
     template <typename ExpT>
+    bool writeEquivCounts(
+	const SalmonOpts& opts,
+	ExpT& experiment);
+
+    template <typename ExpT>
     bool writeMeta(
 	const SalmonOpts& opts,
 	const ExpT& experiment,
diff --git a/include/KmerContext.hpp b/include/KmerContext.hpp
new file mode 100644
index 0000000..5d38d4d
--- /dev/null
+++ b/include/KmerContext.hpp
@@ -0,0 +1,39 @@
+#ifndef __KMER_CONTEXT_HPP__
+#define __KMER_CONTEXT_HPP__
+
+#include <iostream>
+#include <limits>
+#include "SalmonUtils.hpp"
+#include "UtilityFunctions.hpp"
+
+class KmerContext {
+public:
+    KmerContext(uint32_t K, salmon::utils::Direction dir) : _valid(false), _length(K), _dir(dir), _repr(_invalidIdx) {
+        if (K == 0) {
+            std::cerr << "Cannot create a k-mer context of size 0!\n";
+            std::exit(1);
+        }
+    }
+    bool valid() const { return _valid; } 
+    uint32_t index() const { return _repr; }
+    std::string str() const { return (valid() ? kmerForIndex(_repr, _length) : std::string(_length, 'X')); }
+    
+    void operator()(const char* s) {
+        if (valid()) {
+            _repr = nextKmerIndex(_repr, s[_length-1], _length, _dir);
+        } else {
+            _repr = 0;
+            _repr = indexForKmer(s, _length, _dir);
+            _valid = true;
+        }
+    }
+private:
+    bool _valid;
+    uint32_t _length;
+    salmon::utils::Direction _dir;
+    uint32_t _repr;
+    uint32_t _invalidIdx{std::numeric_limits<uint32_t>::max()};
+};
+
+
+#endif //__KMER_CONTEXT_HPP__
diff --git a/include/LibraryFormat.hpp b/include/LibraryFormat.hpp
index 3db1139..6f42f5c 100644
--- a/include/LibraryFormat.hpp
+++ b/include/LibraryFormat.hpp
@@ -84,6 +84,76 @@ public:
         return LibraryFormat(rt, ro, rs);
     }
 
+    std::string toString() const {
+        std::string desc;
+        if (type == ReadType::SINGLE_END) {
+            if (orientation != ReadOrientation::NONE) {
+                return desc;
+            }
+        }
+        if (type == ReadType::PAIRED_END) {
+            if (orientation == ReadOrientation::NONE) {
+                return desc;
+            }
+        }
+        
+        /*
+        switch (type) {
+        case ReadType::SINGLE_END:
+            desc += "single:";
+            break;
+        case ReadType::PAIRED_END:
+            desc += "paired:";
+            break;
+        }
+        */
+        
+
+        switch (orientation)  {
+        case ReadOrientation::TOWARD:
+            desc += "I";
+            break;
+        case ReadOrientation::AWAY:
+            desc += "O";
+            break;
+        case ReadOrientation::SAME:
+            desc += "M";
+            break;
+        case ReadOrientation::NONE:
+            break;
+        }
+
+        switch (strandedness)  {
+        case ReadStrandedness::SA:
+            desc += "SF";
+            break;
+        case ReadStrandedness::AS:
+            desc += "SR";
+            break;
+        case ReadStrandedness::S:
+            desc += "F";
+            break;
+        case ReadStrandedness::A:
+            desc += "R";
+            break;
+        case ReadStrandedness::U:
+            desc += "U";
+            break;
+        }
+        /*
+        if (type == ReadType::PAIRED_END) {
+            if (desc == "SF" or 
+                desc == "SR" or
+                desc == "F" or
+                desc == "R" or 
+                desc == "U" ) { 
+                desc.clear();
+            }
+        }
+        */
+        return desc;
+    }
+
     // Assigns a unique ID to each potential library
     // type.  The IDs are such that 0 <= formatID(lib) < num possible formats
     inline uint8_t formatID() const {
diff --git a/include/LibraryTypeDetector.hpp b/include/LibraryTypeDetector.hpp
new file mode 100644
index 0000000..774d711
--- /dev/null
+++ b/include/LibraryTypeDetector.hpp
@@ -0,0 +1,153 @@
+#ifndef __LIBRARY_TYPE_DETECTOR__
+#define __LIBRARY_TYPE_DETECTOR__
+
+#include "spdlog/fmt/ostr.h"
+#include "spdlog/fmt/fmt.h"
+#include "spdlog/sinks/ostream_sink.h"
+#include "spdlog/spdlog.h"
+
+#include "LibraryFormat.hpp"
+
+#include <atomic>
+#include <mutex>
+
+class LibraryTypeDetector {
+public:
+  LibraryTypeDetector(ReadType type) : type_(type),
+				       libTypeCounts_(std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1)) {}
+  
+  LibraryTypeDetector(const LibraryTypeDetector& other) {
+    active_ = other.active_;
+    type_ = other.type_;
+    numSamplesNeeded_.store(other.numSamplesNeeded_.load());
+    libTypeCounts_ = std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1);
+    for (size_t i = 0; i < other.libTypeCounts_.size(); ++i) {
+      libTypeCounts_[i] = other.libTypeCounts_[i].load();
+    }
+  }
+  
+  bool isActive() { return active_; }
+  bool canGuess() { return numSamplesNeeded_ <= 0; }
+
+  bool mostLikelyType(LibraryFormat& ifmt) {
+    bool ret{false};
+    if (mut_.try_lock()) {
+      if (active_) {
+	if (type_ == ReadType::SINGLE_END) {
+	  uint64_t nf{0};
+	  uint64_t nr{0};
+	  for (size_t i = 0; i <= LibraryFormat::maxLibTypeID(); ++i) {
+	    auto f = LibraryFormat::formatFromID(i);
+	    auto c = libTypeCounts_[i].load();
+	    nf += (f.strandedness == ReadStrandedness::S) ? c : 0;
+	    nr += (f.strandedness == ReadStrandedness::A) ? c : 0;
+	  }
+	  double ratio = -1.0;
+	  if (nf + nr > 0) {
+	    ratio = static_cast<double>(nf) / (nf + nr);
+	  }
+
+	  ifmt.type = type_;
+	  ifmt.orientation = ReadOrientation::NONE;
+
+	  // If we have some issue computing this, leave as unstranded
+	  if (ratio < 0.0) {
+	    ifmt.strandedness = ReadStrandedness::U;
+	  } else if (ratio < 0.3) {
+	    // If we map to the forward strand < 30% of the time, we are antisesnse 
+	    ifmt.strandedness = ReadStrandedness::A;
+	  } else if (ratio < 0.7) {
+	    // Between 30% and 70% of the time, we are unstranded 
+	    ifmt.strandedness = ReadStrandedness::U;
+	  } else {
+	    // Greater than 70% of the time, we are sense 
+	    ifmt.strandedness = ReadStrandedness::S; 
+	  }
+	} else { // paired end
+	  uint64_t nsf{0};
+	  uint64_t nsr{0};
+
+	  uint64_t ninward{0};
+	  uint64_t noutward{0};
+	  uint64_t nsame{0};
+	  for (size_t i = 0; i <= LibraryFormat::maxLibTypeID(); ++i) {
+	    auto f = LibraryFormat::formatFromID(i);
+	    auto c = libTypeCounts_[i].load();
+	    nsf += (f.strandedness == ReadStrandedness::S or
+		    f.strandedness == ReadStrandedness::SA) ? c : 0;
+	    nsr += (f.strandedness == ReadStrandedness::A or
+		    f.strandedness == ReadStrandedness::AS) ? c : 0;
+	    ninward += (f.orientation == ReadOrientation::TOWARD) ? c : 0;
+	    noutward += (f.orientation == ReadOrientation::AWAY) ? c : 0;
+	    nsame += (f.orientation == ReadOrientation::SAME) ? c : 0;
+	  }
+      
+	  ifmt.type = type_;
+	  if ((ninward + noutward + nsame > 0) and
+	      (nsf + nsr > 0)){
+	    auto numOrient = ninward + noutward + nsame;
+	    double ratioIn = static_cast<double>(ninward) / numOrient;
+	    double ratioOut = static_cast<double>(noutward) / numOrient;
+	    double ratioSame = static_cast<double>(nsame) / numOrient;
+
+	    ifmt.orientation = ReadOrientation::NONE;
+	    bool same{false};
+	
+	    if (ratioIn >= ratioOut and ratioIn >= ratioSame) {
+	      ifmt.orientation = ReadOrientation::TOWARD;
+	    } else if (ratioOut >= ratioIn and ratioOut >= ratioSame) {
+	      ifmt.orientation = ReadOrientation::AWAY;
+	    } else {
+	      ifmt.orientation = ReadOrientation::SAME;
+	      same = true;
+	    }
+	
+	    auto numStrand = nsf + nsr;
+	    double ratioFW = static_cast<double>(nsf) / numStrand;
+	    if (ratioFW < 0.3) {
+	      ifmt.strandedness = (same) ? ReadStrandedness::A : ReadStrandedness::AS;
+	    } else if (ratioFW < 0.7) {
+	      ifmt.strandedness = ReadStrandedness::U; 
+	    } else {
+	      ifmt.strandedness = (same) ? ReadStrandedness::S : ReadStrandedness::SA;
+	    }
+
+	  } else {
+	    ifmt.orientation = ReadOrientation::TOWARD;
+	    ifmt.strandedness = ReadStrandedness::U;
+	  }
+	} // end paired-end
+
+	auto log = spdlog::get("jointLog");
+	log->info("Automatically detected most likely library type as {}", ifmt.toString());
+
+	active_ = false;
+	ret = true;
+      } // end if active_
+    } // end try_lock()
+    return ret;
+  }
+
+  void addSample(LibraryFormat f) {
+    if (f.type == type_ and numSamplesNeeded_ >= 0) {
+      ++libTypeCounts_[f.formatID()];
+      --numSamplesNeeded_;
+    }
+  }
+  
+private:
+  // set to false once we have guessed the type
+  bool active_{true};
+  std::mutex mut_;
+  
+  // single or paired-end
+  ReadType type_;
+  // number of samples needed before we can guess a type
+  std::atomic<int64_t> numSamplesNeeded_{50000};
+
+  // the counts for each library type
+  std::vector<std::atomic<uint64_t>> libTypeCounts_;
+
+};
+
+#endif //__LIBRARY_TYPE_DETECTOR__
diff --git a/include/LightweightAlignmentDefs.hpp b/include/LightweightAlignmentDefs.hpp
index 637ba20..d7f637f 100644
--- a/include/LightweightAlignmentDefs.hpp
+++ b/include/LightweightAlignmentDefs.hpp
@@ -25,7 +25,7 @@ class SMEMAlignment {
                   double logProbIn = salmon::math::LOG_0) :
             pos(hitPosIn), fwd(false), mateIsFwd(false), transcriptID_(transcriptIDIn),
             format_(format), score_(scoreIn),
-            fragLength_(fragLengthIn), logProb(logProbIn) {}
+            fragLength_(fragLengthIn), fragLen(fragLengthIn), logProb(logProbIn) {}
 
         SMEMAlignment(const SMEMAlignment& o) = default;
         SMEMAlignment(SMEMAlignment&& o) = default;
@@ -35,6 +35,7 @@ class SMEMAlignment {
 
         inline TranscriptID transcriptID() const { return transcriptID_; }
         inline uint32_t fragLength() const { return fragLength_; }
+        inline uint32_t fragLengthPedantic(uint32_t txpLen) const { return fragLength_; }
         inline LibraryFormat libFormat() const { return format_; }
         inline double score() const { return score_; }
         inline int32_t hitPos() const { return pos; }
@@ -56,8 +57,12 @@ class SMEMAlignment {
 
         rapmap::utils::MateStatus mateStatus;
         int32_t pos;
+        int32_t matePos; // JUST FOR COMPATIBILITY WITH QUASI!
         bool fwd;
         bool mateIsFwd;
+        uint32_t readLen;
+        uint32_t mateLen;
+        uint32_t fragLen;
     private:
         TranscriptID transcriptID_;
         LibraryFormat format_;
@@ -537,10 +542,12 @@ void processMiniBatch(
         std::vector<Transcript>& transcripts,
         ClusterForest& clusterForest,
         FragmentLengthDistribution& fragLengthDist,
+        BiasParams& observedGCParams,
         std::atomic<uint64_t>& numAssignedFragments,
         std::default_random_engine& randEng,
         bool initialRound,
-        std::atomic<bool>& burnedIn
+        std::atomic<bool>& burnedIn,
+        double& maxZeroFrac
         );
 
 template <typename CoverageCalculator>
@@ -781,7 +788,9 @@ inline bool nearEndOfTranscript(
 }
 
 template <typename CoverageCalculator>
-inline void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& frag,
+inline void getHitsForFragment(
+                               fastx_parser::ReadPair& frag,
+                               //std::pair<header_sequence_qual, header_sequence_qual>& frag,
                         SalmonIndex* sidx,
                         smem_i *itr,
                         const bwtintv_v *a,
@@ -1192,7 +1201,8 @@ inline void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_q
   *
   */
 template <typename CoverageCalculator>
-inline void getHitsForFragment(jellyfish::header_sequence_qual& frag,
+inline void getHitsForFragment(fastx_parser::ReadSeq& frag,
+                               //jellyfish::header_sequence_qual& frag,
                         SalmonIndex* sidx,
                         smem_i *itr,
                         const bwtintv_v *a,
@@ -1345,6 +1355,7 @@ void processReadsMEM(ParserT* parser,
                ForgettingMassCalculator& fmCalc,
                ClusterForest& clusterForest,
                FragmentLengthDistribution& fragLengthDist,
+               BiasParams& observedGCParams,
                mem_opt_t* memOptions,
                const SalmonOpts& salmonOpts,
                double coverageThresh,
@@ -1371,6 +1382,7 @@ void processReadsMEM(ParserT* parser,
                ForgettingMassCalculator& fmCalc,
                ClusterForest& clusterForest,
                FragmentLengthDistribution& fragLengthDist,
+               BiasParams& observedGCParams,
                mem_opt_t* memOptions,
                const SalmonOpts& salmonOpts,
                double coverageThresh,
@@ -1401,23 +1413,31 @@ void processReadsMEM(ParserT* parser,
   size_t locRead{0};
   uint64_t localUpperBoundHits{0};
   size_t rangeSize{0};
+  double maxZeroFrac{0.0};
+  auto rg = parser->getReadGroup();
+  while (parser->refill(rg)) {
+      rangeSize = rg.size();
 
+      /*
   while(true) {
+      
     typename ParserT::job j(*parser); // Get a job from the parser: a bunch of read (at most max_read_group)
     if(j.is_empty()) break;           // If got nothing, quit
-
     rangeSize = j->nb_filled;
+      */
     if (rangeSize > structureVec.size()) {
         salmonOpts.jointLog->error("rangeSize = {}, but structureVec.size() = {} --- this shouldn't happen.\n"
                                    "Please report this bug on GitHub", rangeSize, structureVec.size());
         std::exit(1);
     }
 
-    for(size_t i = 0; i < j->nb_filled; ++i) { // For all the read in this batch
+    for(size_t i = 0; i < rangeSize; ++i) { // For all the read in this batch
         localUpperBoundHits = 0;
 
         auto& hitList = structureVec[i];
-        getHitsForFragment<CoverageCalculator>(j->data[i], sidx, itr, a,
+        getHitsForFragment<CoverageCalculator>(rg[i],
+                                               //j->data[i], 
+                                               sidx, itr, a,
                                                auxHits,
                                                memOptions,
                                                readExp,
@@ -1459,8 +1479,15 @@ void processReadsMEM(ParserT* parser,
     prevObservedFrags = numObservedFragments;
     AlnGroupVecRange<SMEMAlignment> hitLists = boost::make_iterator_range(structureVec.begin(), structureVec.begin() + rangeSize);
     processMiniBatch<SMEMAlignment>(readExp, fmCalc,firstTimestepOfRound, rl, salmonOpts, hitLists, transcripts, clusterForest,
-                     fragLengthDist, numAssignedFragments, eng, initialRound, burnedIn);
+                                    fragLengthDist, observedGCParams, numAssignedFragments, eng, initialRound, burnedIn, maxZeroFrac);
+    
+  }
+
+  if (maxZeroFrac > 0.0) {
+      salmonOpts.jointLog->info("Thread saw mini-batch with a maximum of {0:.2f}\% zero probability fragments", 
+                                maxZeroFrac);
   }
+
   smem_aux_destroy(auxHits);
   smem_itr_destroy(itr);
 }
diff --git a/include/PairSequenceParser.hpp b/include/PairSequenceParser.hpp
index 6b0f4dd..f013509 100644
--- a/include/PairSequenceParser.hpp
+++ b/include/PairSequenceParser.hpp
@@ -126,7 +126,7 @@ protected:
     // Update the type of the current file and move past first header
     // to beginning of sequence.
     file_type type1 = peek_file_type(*st.stream1);
-    file_type type2 = peek_file_type(*st.stream1);
+    file_type type2 = peek_file_type(*st.stream2);
     if(type1 == DONE_TYPE || type2 == DONE_TYPE)
       return open_next_files(st);
     if(type1 != type2)
diff --git a/include/ReadExperiment.hpp b/include/ReadExperiment.hpp
index 21bc7c6..3d333d0 100644
--- a/include/ReadExperiment.hpp
+++ b/include/ReadExperiment.hpp
@@ -10,6 +10,8 @@ extern "C" {
 
 // Our includes
 #include "ClusterForest.hpp"
+#include "DistributionUtils.hpp"
+#include "GCFragModel.hpp"
 #include "Transcript.hpp"
 #include "ReadLibrary.hpp"
 #include "FragmentLengthDistribution.hpp"
@@ -17,10 +19,13 @@ extern "C" {
 #include "SequenceBiasModel.hpp"
 #include "SalmonOpts.hpp"
 #include "SalmonIndex.hpp"
+#include "SalmonUtils.hpp"
 #include "EquivalenceClassBuilder.hpp"
 #include "SpinLock.hpp" // RapMap's with try_lock
 #include "UtilityFunctions.hpp"
 #include "ReadKmerDist.hpp"
+#include "SBModel.hpp"
+#include "SimplePosBias.hpp"
 
 // Logger includes
 #include "spdlog/spdlog.h"
@@ -29,6 +34,9 @@ extern "C" {
 #include <boost/filesystem.hpp>
 #include <boost/range/irange.hpp>
 
+// Cereal includes
+#include "cereal/archives/json.hpp"
+
 // Standard includes
 #include <vector>
 #include <memory>
@@ -55,9 +63,16 @@ class ReadExperiment {
         transcripts_(std::vector<Transcript>()),
         totalAssignedFragments_(0),
         fragStartDists_(5),
+        posBiasFW_(5),
+        posBiasRC_(5),
         seqBiasModel_(1.0),
 	eqBuilder_(sopt.jointLog),
-        expectedBias_(constExprPow(4, readBias_.getK()), 1.0) {
+        expectedBias_(constExprPow(4, readBias_[0].getK()), 1.0),
+	expectedGC_( sopt.numConditionalGCBins,
+		    sopt.numFragGCBins, distribution_utils::DistributionSpace::LOG),
+        observedGC_( sopt.numConditionalGCBins,
+		    sopt.numFragGCBins, distribution_utils::DistributionSpace::LOG)
+    {
             namespace bfs = boost::filesystem;
 
             // Make sure the read libraries are valid.
@@ -107,9 +122,17 @@ class ReadExperiment {
 	    switch (salmonIndex_->indexType()) {
             case SalmonIndexType::QUASI:
                 if (salmonIndex_->is64BitQuasi()) {
-                  loadTranscriptsFromQuasi(salmonIndex_->quasiIndex64());
+                    if (salmonIndex_->isPerfectHashQuasi()) {
+                        loadTranscriptsFromQuasi(salmonIndex_->quasiIndexPerfectHash64(), sopt);
+                    } else {
+                        loadTranscriptsFromQuasi(salmonIndex_->quasiIndex64(), sopt);
+                    }
                 } else {
-                  loadTranscriptsFromQuasi(salmonIndex_->quasiIndex32());
+                    if (salmonIndex_->isPerfectHashQuasi()) {
+                        loadTranscriptsFromQuasi(salmonIndex_->quasiIndexPerfectHash32(), sopt);
+                    } else {
+                        loadTranscriptsFromQuasi(salmonIndex_->quasiIndex32(), sopt);
+                    }
                 }
                 break;
             case SalmonIndexType::FMD:
@@ -132,13 +155,12 @@ class ReadExperiment {
     void updateTranscriptLengthsAtomic(std::atomic<bool>& done) {
         if (sl_.try_lock()) {
             if (!done) {
-                auto& fld = *(fragLengthDist_.get());
-
+                auto fld = fragLengthDist_.get();
+                // Convert the PMF to non-log scale
                 std::vector<double> logPMF;
                 size_t minVal;
                 size_t maxVal;
-                double logFLDMean = fld.mean();
-                fld.dumpPMF(logPMF, minVal, maxVal);
+                fld->dumpPMF(logPMF, minVal, maxVal);
                 double sum = salmon::math::LOG_0;
                 for (auto v : logPMF) {
                     sum = salmon::math::logAdd(sum, v);
@@ -146,10 +168,28 @@ class ReadExperiment {
                 for (auto& v : logPMF) {
                     v -= sum;
                 }
+
+                // Create the non-logged distribution.
+                // Here, we multiply by 100 to discourage small
+                // numbers in the correctionFactorsfromCounts call
+                // below.
+                std::vector<double> pmf(maxVal + 1, 0.0);
+                for (size_t i = minVal; i < maxVal; ++i) {
+                    pmf[i] = 100.0 * std::exp(logPMF[i - minVal]);
+                }
+
+		using distribution_utils::DistributionSpace;
+		// We compute the factors in linear space (since we've de-logged the pmf)
+                auto correctionFactors = distribution_utils::correctionFactorsFromMass(pmf, DistributionSpace::LINEAR);
+		// Since we'll continue treating effective lengths in log space, populate them as such
+		distribution_utils::computeSmoothedEffectiveLengths(pmf.size(), transcripts_, correctionFactors, DistributionSpace::LOG);
+
+                /*
                 // Update the effective length of *every* transcript
                 for( auto& t : transcripts_ ) {
                     t.updateEffectiveLength(logPMF, logFLDMean, minVal, maxVal);
                 }
+                */
                 // then declare that we are done
                 done = true;
                 sl_.unlock();
@@ -166,6 +206,15 @@ class ReadExperiment {
     std::atomic<uint64_t>& numAssignedFragmentsAtomic() { return numAssignedFragments_; }
 
     void setNumObservedFragments(uint64_t numObserved) { numObservedFragments_ = numObserved; }
+    
+    void updateShortFrags(salmon::utils::ShortFragStats& fs) { 
+        sl_.lock();
+        shortFragStats_.numTooShort += fs.numTooShort; 
+        shortFragStats_.shortest = (fs.shortest < shortFragStats_.shortest) ? fs.shortest : shortFragStats_.shortest; 
+        sl_.unlock();
+    }
+
+    salmon::utils::ShortFragStats getShortFragStats() const { return shortFragStats_; }
 
     uint64_t numObservedFragments() const {
         return numObservedFragments_;
@@ -182,10 +231,11 @@ class ReadExperiment {
     SalmonIndex* getIndex() { return salmonIndex_.get(); }
 
     template <typename QuasiIndexT>
-    void loadTranscriptsFromQuasi(QuasiIndexT* idx_) {
+    void loadTranscriptsFromQuasi(QuasiIndexT* idx_, const SalmonOpts& sopt) {
 	    size_t numRecords = idx_->txpNames.size();
+        auto log = spdlog::get("jointLog");
 
-	    fmt::print(stderr, "Index contained {} targets\n", numRecords);
+	    log->info("Index contained {} targets", numRecords);
 	    //transcripts_.resize(numRecords);
 	    double alpha = 0.005;
 	    for (auto i : boost::irange(size_t(0), numRecords)) {
@@ -199,25 +249,27 @@ class ReadExperiment {
 		    //auto txpSeq = idx_->seq.substr(idx_->txpOffsets[i], len);
 
 		    // Set the transcript sequence
-		    txp.Sequence = idx_->seq.c_str() + idx_->txpOffsets[i];
+		    txp.setSequenceBorrowed(idx_->seq.c_str() + idx_->txpOffsets[i],
+                                    sopt.gcBiasCorrect, sopt.gcSampFactor);
 		    // Length classes taken from
+            // https://github.com/cole-trapnell-lab/cufflinks/blob/master/src/biascorrection.cpp
 		    // ======
 		    // Roberts, Adam, et al.
 		    // "Improving RNA-Seq expression estimates by correcting for fragment bias."
 		    // Genome Biol 12.3 (2011): R22.
 		    // ======
 		    // perhaps, define these in a more data-driven way
-        if (txp.RefLength <= 1334) {
-          txp.lengthClassIndex(0);
-        } else if (txp.RefLength <= 2104) {
-          txp.lengthClassIndex(0);
-        } else if (txp.RefLength <= 2988) {
-          txp.lengthClassIndex(0);
-        } else if (txp.RefLength <= 4389) {
-          txp.lengthClassIndex(0);
-        } else {
-          txp.lengthClassIndex(0);
-        }
+            if (txp.RefLength <= 791) {
+                txp.lengthClassIndex(0);
+            } else if (txp.RefLength <= 1265) {
+                txp.lengthClassIndex(1);
+            } else if (txp.RefLength <= 1707) {
+                txp.lengthClassIndex(2);
+            } else if (txp.RefLength <= 2433) {
+                txp.lengthClassIndex(3);
+            } else {
+                txp.lengthClassIndex(4);
+            }
       }
 	    // ====== Done loading the transcripts from file
     }
@@ -226,10 +278,11 @@ class ReadExperiment {
 	    bwaidx_t* idx_ = salmonIndex_->bwaIndex();
 	    size_t numRecords = idx_->bns->n_seqs;
 	    std::vector<Transcript> transcripts_tmp;
+        auto log = spdlog::get("jointLog");
         //transcripts_tmp.reserve(numRecords);
         //transcripts_.reserve(numRecords);
 
-	    fmt::print(stderr, "Index contained {} targets\n", numRecords);
+	    log->info("Index contained {} targets", numRecords);
 	    //transcripts_.resize(numRecords);
 	    for (auto i : boost::irange(size_t(0), numRecords)) {
 		    uint32_t id = i;
@@ -276,36 +329,28 @@ class ReadExperiment {
             // allocate space for the new copy
             char* seqCopy = new char[seq.length()+1];
             std::strcpy(seqCopy, seq.c_str());
-            txp.Sequence = seqCopy;
-            txp.freeSeqOnDestruct = false;
+            txp.setSequenceOwned(seqCopy);
+		    txp.setSAMSequenceOwned(salmon::stringtools::encodeSequenceInSAM(seq.c_str(), t.RefLength));
 
-		    txp.SAMSequence = salmon::stringtools::encodeSequenceInSAM(seq.c_str(), t.RefLength);
-		    // Length classes taken from
+            // Length classes taken from
+            // https://github.com/cole-trapnell-lab/cufflinks/blob/master/src/biascorrection.cpp
 		    // ======
 		    // Roberts, Adam, et al.
 		    // "Improving RNA-Seq expression estimates by correcting for fragment bias."
 		    // Genome Biol 12.3 (2011): R22.
 		    // ======
 		    // perhaps, define these in a more data-driven way
-		    if (t.RefLength <= 1334) {
-			    txp.lengthClassIndex(0);
-		    } else if (t.RefLength <= 2104) {
-			    txp.lengthClassIndex(0);
-		    } else if (t.RefLength <= 2988) {
-			    txp.lengthClassIndex(0);
-		    } else if (t.RefLength <= 4389) {
-			    txp.lengthClassIndex(0);
-		    } else {
-			    txp.lengthClassIndex(0);
-		    }
-		    /*
-		       std::cerr << "TS = " << t.RefName << " : \n";
-		       std::cerr << seq << "\n VS \n";
-		       for (size_t i = 0; i < t.RefLength; ++i) {
-		       std::cerr << transcripts_.back().charBaseAt(i);
-		       }
-		       std::cerr << "\n\n";
-		       */
+            if (txp.RefLength <= 791) {
+                txp.lengthClassIndex(0);
+            } else if (txp.RefLength <= 1265) {
+                txp.lengthClassIndex(1);
+            } else if (txp.RefLength <= 1707) {
+                txp.lengthClassIndex(2);
+            } else if (txp.RefLength <= 2433) {
+                txp.lengthClassIndex(3);
+            } else {
+                txp.lengthClassIndex(4);
+            }
 		    free(rseq);
 		    /* end BWA code */
             ++tnum;
@@ -409,7 +454,10 @@ class ReadExperiment {
         LibraryFormat fmt1(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::U);
         LibraryFormat fmt2(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::U);
 
-        std::ofstream ofile(opath.string());
+        std::ofstream os(opath.string());
+        cereal::JSONOutputArchive oa(os);
+
+        //std::ofstream ofile(opath.string());
 
         fmt::MemoryWriter errstr;
 
@@ -467,13 +515,27 @@ class ReadExperiment {
 
                 if ( std::abs(ratio - 0.5) > 0.01) {
                     errstr << "NOTE: Read Lib [" << rl.readFilesAsString() << "] :\n";
-                    errstr << "\nDetected a strand bias > 1\% in an unstranded protocol "
+                    errstr << "\nDetected a *potential* strand bias > 1\% in an unstranded protocol "
                            << "check the file: " << opath.string() << " for details\n";
 
-                    log->warn() << errstr.str();
+                    log->warn(errstr.str());
                     errstr.clear();
                 }
+                
+
+                oa(cereal::make_nvp("read_files", rl.readFilesAsString()));
+                std::string expectedFormat = rl.format().toString();
+                oa(cereal::make_nvp("expected_format", expectedFormat));
+
+                double compatFragmentRatio = rl.numCompat() / static_cast<double>(numAssignedFragments_);
+                oa(cereal::make_nvp("compatible_fragment_ratio", compatFragmentRatio));
+                oa(cereal::make_nvp("num_compatible_fragments", rl.numCompat()));
+                oa(cereal::make_nvp("num_assigned_fragments", numAssignedFragments_.load()));
 
+                oa(cereal::make_nvp("num_consistent_mappings", numAgree));
+                oa(cereal::make_nvp("num_inconsistent_mappings", numDisagree));
+                oa(cereal::make_nvp("strand_mapping_bias", ratio));
+                    /*
                 ofile << "========\n"
                       << "Read library consisting of files: "
                       << rl.readFilesAsString()
@@ -486,6 +548,7 @@ class ReadExperiment {
                       << "# alignments with format " << fmt1 << ": " << numFmt1 << "\n"
                       << "# alignments with format " << fmt2 << ": " << numFmt2 << "\n"
                       << "\n========\n";
+                    */
             } else {
                 numAgree = 0;
                 numDisagree = 0;
@@ -498,6 +561,19 @@ class ReadExperiment {
                     }
                 } // end for
 
+                oa(cereal::make_nvp("read_files", rl.readFilesAsString()));
+                std::string expectedFormat = rl.format().toString();
+                oa(cereal::make_nvp("expected_format", expectedFormat));
+
+                double compatFragmentRatio = rl.numCompat() / static_cast<double>(numAssignedFragments_);
+                oa(cereal::make_nvp("compatible_fragment_ratio", compatFragmentRatio));
+                oa(cereal::make_nvp("num_compatible_fragments", rl.numCompat()));
+                oa(cereal::make_nvp("num_assigned_fragments", numAssignedFragments_.load()));
+
+                oa(cereal::make_nvp("num_consistent_mappings", numAgree));
+                oa(cereal::make_nvp("num_inconsistent_mappings", numDisagree));
+
+                /*
                 ofile << "========\n"
                       << "Read library consisting of files: "
                       << rl.readFilesAsString()
@@ -507,47 +583,105 @@ class ReadExperiment {
                       << "# of consistent alignments: " << numAgree << "\n"
                       << "# of inconsistent alignments: " << numDisagree << "\n"
                       << "\n========\n";
-
+                */
             } //end else
 
-            double disagreeRatio = static_cast<double>(numDisagree) / (numAgree + numDisagree);
+
+            double compatFragmentRatio = rl.numCompat() / static_cast<double>(numAssignedFragments_);
+            double disagreeRatio = 1.0 - compatFragmentRatio;
             if (disagreeRatio > 0.05) {
                 errstr << "NOTE: Read Lib [" << rl.readFilesAsString() << "] :\n";
-                errstr << "\nGreater than 5\% of the alignments (but not, necessarily reads) "
+                errstr << "\nGreater than 5\% of the fragments "
                        << "disagreed with the provided library type; "
                        << "check the file: " << opath.string() << " for details\n";
 
-                log->warn() << errstr.str();
+                log->warn(errstr.str());
                 errstr.clear();
             }
 
-            ofile << "---- counts for each format type ---\n";
+            //ofile << "---- counts for each format type ---\n";
             for (size_t i = 0; i < counts.size(); ++i) {
-                ofile << LibraryFormat::formatFromID(i) << " : " << counts[i] << "\n";
+                //ofile << LibraryFormat::formatFromID(i) << " : " << counts[i] << "\n";
+                std::string desc = LibraryFormat::formatFromID(i).toString();
+                if (!desc.empty()) {
+                    oa(cereal::make_nvp(desc, counts[i].load()));
+                }
             }
-            ofile << "------------------------------------\n\n";
+            //ofile << "------------------------------------\n\n";
         }
-        ofile.close();
+        //ofile.close();
     }
 
     std::vector<ReadLibrary>& readLibraries() { return readLibraries_; }
+    const std::vector<ReadLibrary>& readLibraries() const { return readLibraries_; }
     FragmentLengthDistribution* fragmentLengthDistribution() const { return fragLengthDist_.get(); }
 
-    void setExpectedBias(const std::vector<double>& expectedBiasIn) {
-        expectedBias_ = expectedBiasIn;
-    }
+    void setGCFracForward(double fracForward) { gcFracFwd_ = fracForward; }
 
-    std::vector<double>& expectedBias() {
+    double gcFracFwd() const { return gcFracFwd_; }
+    double gcFracRC() const { return 1.0 - gcFracFwd_; }
+
+
+    std::vector<double>& expectedSeqBias() {
         return expectedBias_;
     }
 
-    const std::vector<double>& expectedBias() const {
+    const std::vector<double>& expectedSeqBias() const {
         return expectedBias_;
     }
 
-    ReadKmerDist<6, std::atomic<uint32_t>>& readBias() { return readBias_; }
-    const ReadKmerDist<6, std::atomic<uint32_t>>& readBias() const { return readBias_; }
+    void setExpectedGCBias(const GCFragModel& expectedBiasIn) {
+        expectedGC_ = expectedBiasIn;
+    }
+
+    GCFragModel& expectedGCBias() {
+        return expectedGC_;
+    }
+
+    const GCFragModel& expectedGCBias() const {
+        return expectedGC_;
+    }
 
+    const GCFragModel& observedGC() const {
+        return observedGC_;
+    }
+
+    GCFragModel& observedGC() {
+        return observedGC_;
+    }
+
+    std::vector<SimplePosBias>& posBias(salmon::utils::Direction dir) { 
+        return (dir == salmon::utils::Direction::FORWARD) ? posBiasFW_ : posBiasRC_; 
+    }
+    const std::vector<SimplePosBias>& posBias(salmon::utils::Direction dir) const { 
+        return (dir == salmon::utils::Direction::FORWARD) ? posBiasFW_ : posBiasRC_; 
+    }
+
+    ReadKmerDist<6, std::atomic<uint32_t>>& readBias(salmon::utils::Direction dir) { 
+        return (dir == salmon::utils::Direction::FORWARD) ? readBias_[0] : readBias_[1]; 
+    }
+    const ReadKmerDist<6, std::atomic<uint32_t>>& readBias(salmon::utils::Direction dir) const { 
+        return (dir == salmon::utils::Direction::FORWARD) ? readBias_[0] : readBias_[1]; 
+    }
+
+    SBModel& readBiasModelObserved(salmon::utils::Direction dir) { 
+        return (dir == salmon::utils::Direction::FORWARD) ? readBiasModelObserved_[0] : readBiasModelObserved_[1]; 
+    }
+    const SBModel& readBiasModelObserved(salmon::utils::Direction dir) const { 
+        return (dir == salmon::utils::Direction::FORWARD) ? readBiasModelObserved_[0] : readBiasModelObserved_[1]; 
+    }
+
+    SBModel& readBiasModelExpected(salmon::utils::Direction dir) { 
+	return (dir == salmon::utils::Direction::FORWARD) ? readBiasModelExpected_[0] : readBiasModelExpected_[1]; 
+    }
+    const SBModel& readBiasModelExpected(salmon::utils::Direction dir) const { 
+	return (dir == salmon::utils::Direction::FORWARD) ? readBiasModelExpected_[0] : readBiasModelExpected_[1]; 
+   }
+    void setReadBiasModelExpected(SBModel&& model, salmon::utils::Direction dir) {
+        size_t idx = (dir == salmon::utils::Direction::FORWARD) ? 0 : 1;
+	readBiasModelExpected_[idx] = std::move(model);
+    }
+  
     private:
     /**
      * The file from which the alignments will be read.
@@ -587,6 +721,7 @@ class ReadExperiment {
     /** Keeps track of the number of passes that have been
      *  made through the alignment file.
      */
+    salmon::utils::ShortFragStats shortFragStats_;
     std::atomic<uint64_t> numObservedFragments_{0};
     std::atomic<uint64_t> numAssignedFragments_{0};
     uint64_t totalAssignedFragments_{0};
@@ -599,10 +734,23 @@ class ReadExperiment {
     std::unique_ptr<FragmentLengthDistribution> fragLengthDist_;
     EquivalenceClassBuilder eqBuilder_;
 
+    /** Positional bias things**/
+    std::vector<SimplePosBias> posBiasFW_;
+    std::vector<SimplePosBias> posBiasRC_;
+ 
+    /** GC-fragment bias things **/
+    // One bin for each percentage GC content
+    double gcFracFwd_{-1.0};
+    GCFragModel observedGC_;
+    GCFragModel expectedGC_;
+
     /** Sequence specific bias things **/
     // Since multiple threads can touch this dist, we
     // need atomic counters.
-    ReadKmerDist<6, std::atomic<uint32_t>> readBias_;
+    std::array<ReadKmerDist<6, std::atomic<uint32_t>>, 2> readBias_;
+    std::array<SBModel, 2> readBiasModelObserved_;
+    std::array<SBModel, 2> readBiasModelExpected_;
+    //std::array<std::vector<double>, 2> expectedBias_;
     std::vector<double> expectedBias_;
 };
 
diff --git a/include/ReadKmerDist.hpp b/include/ReadKmerDist.hpp
index e4c900b..7eaed91 100644
--- a/include/ReadKmerDist.hpp
+++ b/include/ReadKmerDist.hpp
@@ -1,6 +1,7 @@
 #ifndef READ_KMER_DIST_HPP
 #define READ_KMER_DIST_HPP
 
+#include <limits>
 #include <fstream>
 #include <iostream>
 #include <unordered_map>
@@ -14,12 +15,16 @@ template <uint32_t K, typename CountT = uint32_t>
 class ReadKmerDist {
   public:
     std::array<CountT, constExprPow(4,K)> counts;
+    std::unordered_map<std::string, CountT> strCounts;
+    std::map<char, uint32_t> startMap;
 
-    ReadKmerDist() {
-      // set a pseudo-count of 1
-      for (size_t i = 0; i < counts.size(); ++i) {
-	counts[i] = 1;
-      }
+    ReadKmerDist(bool pseudoCount=true) {
+        // set a pseudo-count of 1
+        if (pseudoCount) {
+            for (size_t i = 0; i < counts.size(); ++i) {
+                counts[i] = 1;
+            }
+        }
     }
 
     inline constexpr uint32_t getK() { return K; }
@@ -32,44 +37,42 @@ class ReadKmerDist {
 
     // update the k-mer context for the hit at position p.
     // The underlying transcript is from [start, end)
-    inline bool update(const char* start, const char *p, const char *end,
-	salmon::utils::Direction dir) {
-      using salmon::utils::Direction;
-      int posBeforeHit = 2;
-      int posAfterHit = 4;
-      bool success{false};
-      switch (dir) {
-	case Direction::FORWARD :
-	  {
-	    // If we can fit the window before and after the read
-	    if ((p - start) >= posBeforeHit and
-		((p - posBeforeHit + K) < end) ) {
-	      p -= posBeforeHit;
-	      // If the read matches in the forward direction, we take
-	      // the RC sequence.
-	      auto idx = indexForKmer(p, K, Direction::REVERSE_COMPLEMENT);
-	      if (idx > counts.size()) { return false; }
-	      counts[idx]++;
-	      success = true;
-	    }
-	  }
-	  break;
-	case Direction::REVERSE_COMPLEMENT :
-	  {
-	    if ((p - start) >= posAfterHit and
-		((p - posAfterHit + K) < end) ) {
-	      p -= posAfterHit;
-	      auto idx = indexForKmer(p, K, Direction::FORWARD);
-	      if (idx > counts.size()) { return false; }
-	      counts[idx]++;
-	      success = true;
-	    }
-	  }
-	  break;
-	default:
-	  break;
-      }
-      return success;
+    inline bool update(
+		       const char* start,
+		       const char* p,
+		       const char* end,
+		       salmon::utils::Direction dir
+		       ){
+        using salmon::utils::Direction;
+        //int posBeforeHit = 3;
+        //int posAfterHit = 2; 
+	int posBeforeHit = 4;
+	int posAfterHit = 3;
+        bool success{false};
+        bool contextExists{false};
+        
+        if (dir == Direction::FORWARD) {
+            // If we can fit the window before and after the read
+            if ((p - start) >= posBeforeHit and
+                ((p - posBeforeHit + K) < end) ) {
+                p -= posBeforeHit;
+                contextExists = true;
+            }
+        } else if (dir == Direction::REVERSE_COMPLEMENT) {
+            if ((p - start) >= posAfterHit and
+                ((p - posAfterHit + K) < end) ) {
+                p -= posAfterHit;
+                contextExists = true;
+            }
+        }
+
+        auto idx = contextExists ? indexForKmer(p, K, dir) : std::numeric_limits<uint32_t>::max();
+
+        if (idx < counts.size()) {
+            counts[idx]++;
+            success = true;
+        }
+        return success;
     }
 
 };
diff --git a/include/ReadLibrary.hpp b/include/ReadLibrary.hpp
index 834f40d..378953e 100644
--- a/include/ReadLibrary.hpp
+++ b/include/ReadLibrary.hpp
@@ -8,6 +8,7 @@
 #include <boost/filesystem.hpp>
 
 #include "LibraryFormat.hpp"
+#include "LibraryTypeDetector.hpp"
 
 /**
  * This class represents the basic information about a library of reads, like
@@ -20,7 +21,9 @@ public:
      * Construct a new ReadLibrary of the given format
      */
     ReadLibrary(LibraryFormat& fmt) : fmt_(fmt),
-        libTypeCounts_(std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1)){}
+                                      libTypeCounts_(std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1)),
+                                      numCompat_(0)
+    {}
 
     /**
      * Copy constructor
@@ -33,6 +36,8 @@ public:
         libTypeCounts_(std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1)) {
             size_t mc = LibraryFormat::maxLibTypeID() + 1;
             for (size_t i = 0; i < mc; ++i) { libTypeCounts_[i].store(rl.libTypeCounts_[i].load()); }
+            numCompat_.store(rl.numCompat());
+	    if (rl.detector_) { detector_.reset(new LibraryTypeDetector(*(rl.detector_.get()))); }
         }
 
     /**
@@ -46,6 +51,8 @@ public:
         libTypeCounts_(std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1)) {
             size_t mc = LibraryFormat::maxLibTypeID() + 1;
             for (size_t i = 0; i < mc; ++i) { libTypeCounts_[i].store(rl.libTypeCounts_[i].load()); }
+            numCompat_.store(rl.numCompat());
+	    if (rl.detector_) { detector_ = std::move(detector_); }
         }
 
     /**
@@ -76,12 +83,42 @@ public:
         return (fmt_.type == ReadType::PAIRED_END);
     }
 
+    /**
+    * If this is set, attempt to automatically detect this library's type
+    */
+    void enableAutodetect() {
+      // if auto detection is not already enabled, and we're enabling it
+      if (!detector_){
+	detector_.reset(new LibraryTypeDetector(fmt_.type));
+      }
+    }
+
+    bool autoDetect() const { return (detector_.get() != nullptr);}
+
+    LibraryTypeDetector* getDetector() { return detector_.get(); }
+    
+    LibraryFormat& getFormat() { return fmt_; }
+    const LibraryFormat& getFormat() const { return fmt_; }
+  
+    bool allExist_(std::vector<std::string>& filenames, std::stringstream& errorStream) {
+        namespace bfs = boost::filesystem;
+        bool allExist{true};
+        for (auto& fn : filenames) {
+            if (!bfs::exists(fn)) {
+                errorStream << "ERROR: file [" << fn << "] does not appear to exist!\n\n";
+                allExist = false;
+            }
+        }
+        return allExist;
+    }
 
     bool checkFileExtensions_(std::vector<std::string>& filenames, std::stringstream& errorStream) {
         namespace bfs = boost::filesystem;
 
         std::set<std::string> acceptableExensions = {".FASTA", ".FASTQ", ".FA", ".FQ",
-                                                     ".fasta", ".fastq", ".fa", ".fq"};
+                                                     ".fasta", ".fastq", ".fa", ".fq",
+                                                     ".GZ", ".gz"};
+
         bool extensionsOK{true};
         for (auto& fn : filenames) {
             auto fpath = bfs::path(fn);
@@ -89,8 +126,8 @@ public:
             if (bfs::is_regular_file(fpath)) {
                 if (acceptableExensions.find(ext) == acceptableExensions.end()) {
                     errorStream << "ERROR: file [" << fn << "] has extension " << ext << ", "
-                        << "which suggests it is neither a fasta nor a fastq file.\n"
-                        << "Is this a compressed file?  If so, consider replacing: \n\n"
+                        << "which suggests it is neither a fasta nor a fastq file (or gzip compressed fasta/q).\n"
+                        << "Is this file compressed in some other way?  If so, consider replacing: \n\n"
                         << fn << "\n\nwith\n\n"
                         << "<(decompressor " << fn << ")\n\n"
                         << "which will decompress the reads \"on-the-fly\"\n\n";
@@ -185,9 +222,9 @@ public:
         // (i.e. named-pipes).  If the user passed in a non-regular file, we should
         // have some other mechanism to check if it's of an expected format and provide
         // a reasonable error message otherwise.
-        readsOK = readsOK && checkFileExtensions_(mateOneFilenames_, errorStream);
-        readsOK = readsOK && checkFileExtensions_(mateTwoFilenames_, errorStream);
-        readsOK = readsOK && checkFileExtensions_(unmatedFilenames_, errorStream);
+        readsOK = readsOK && allExist_(mateOneFilenames_, errorStream) && checkFileExtensions_(mateOneFilenames_, errorStream);
+        readsOK = readsOK && allExist_(mateTwoFilenames_, errorStream) && checkFileExtensions_(mateTwoFilenames_, errorStream);
+        readsOK = readsOK && allExist_(unmatedFilenames_, errorStream) && checkFileExtensions_(unmatedFilenames_, errorStream);
 
         if (!readsOK) {
             throw std::invalid_argument(errorStream.str());
@@ -215,6 +252,15 @@ public:
     const LibraryFormat& format() const { return fmt_; }
 
     /**
+     * Update the number of fragments compatible with this library type
+     */
+    inline void updateCompatCounts(uint64_t numCompat) {
+        numCompat_ += numCompat;
+    }
+
+    uint64_t numCompat() const { return numCompat_; }
+
+    /**
     * Update the library type counts for this read library given the counts
     * in the vector `counts` which has been passed in.
     */
@@ -233,6 +279,8 @@ private:
     std::vector<std::string> mateOneFilenames_;
     std::vector<std::string> mateTwoFilenames_;
     std::vector<std::atomic<uint64_t>> libTypeCounts_;
+    std::atomic<uint64_t> numCompat_;
+    std::unique_ptr<LibraryTypeDetector> detector_{nullptr};
 };
 
 #endif // READ_LIBRARY_HPP
diff --git a/include/ReadPair.hpp b/include/ReadPair.hpp
index 1a06f5c..1939907 100644
--- a/include/ReadPair.hpp
+++ b/include/ReadPair.hpp
@@ -7,7 +7,7 @@
 #include "SalmonUtils.hpp"
 #include "RapMapUtils.hpp"
 
-#include "spdlog/details/format.h"
+#include "spdlog/fmt/fmt.h"
 
 struct ReadPair {
     bam_seq_t* read1 = nullptr;
@@ -108,6 +108,27 @@ struct ReadPair {
         return l;//bam_name_len(read1);
     }
 
+    // from the leftmost end of the 5' read to the rightmost 
+    // end of the 3' read (can be less than the length of a single read)
+    inline uint32_t fragLengthPedantic(uint32_t txpLen) const { 
+        if (!isPaired()) { return 0; }
+
+        bool fw1 = !bam_strand(read1);
+        bool fw2 = !bam_strand(read2);
+
+        if (fw1 != fw2) {
+            int32_t p1 = fw1 ? bam_pos(read1) : bam_pos(read2);
+            p1 = (p1 < 0) ? 0 : p1;
+            p1 = (p1 > txpLen) ? txpLen : p1;
+            int32_t p2 = fw1 ? bam_pos(read2) + bam_seq_len(read2) : bam_pos(read1) + bam_seq_len(read1); 
+            p2 = (p2 < 0) ? 0 : p2;
+            p2 = (p2 > txpLen) ? txpLen : p2;
+            return (p1 > p2) ? p1 - p2 : p2 - p1;
+        } 
+
+        return 0;
+    }
+    
     inline uint32_t fragLen() const {
         if (!isPaired()) { return 0; }
         auto leftmost1 = bam_pos(read1);
diff --git a/include/SBModel.hpp b/include/SBModel.hpp
new file mode 100644
index 0000000..a081472
--- /dev/null
+++ b/include/SBModel.hpp
@@ -0,0 +1,90 @@
+#ifndef __SB_MODEL_HPP__
+#define __SB_MODEL_HPP__
+
+#include <boost/iostreams/filtering_stream.hpp>
+
+#include "jellyfish/mer_dna.hpp"
+#include "UtilityFunctions.hpp"
+#include <Eigen/Dense>
+#include <cmath>
+
+using Mer = jellyfish::mer_dna_ns::mer_base_static<uint64_t, 4>;
+
+class SBModel {
+public:
+  SBModel();   
+  
+  SBModel(const SBModel&) = default;
+  SBModel(SBModel&&) = default;
+  SBModel& operator=(const SBModel&) = default;
+  SBModel& operator=(SBModel&&) = default;
+
+  bool writeBinary(boost::iostreams::filtering_ostream& out) const; 
+
+  inline int32_t contextBefore(bool rc) { return rc ? _contextRight : _contextLeft; }
+  inline int32_t contextAfter(bool rc) { return rc ? _contextLeft : _contextRight; }
+
+    bool addSequence(const char* seqIn, bool revCmp, double weight = 1.0);
+    bool addSequence(const Mer& mer, double weight); 
+
+    Eigen::MatrixXd& counts();
+    Eigen::MatrixXd& marginals();
+  
+    double evaluateLog(const char* seqIn); 
+    double evaluateLog(const Mer& mer);
+ 
+  bool normalize();
+
+  bool checkTransitionProbabilities();
+  
+  void combineCounts(const SBModel& other);
+
+  void dumpConditionalProbabilities(std::ostream& os);
+
+  int32_t getContextLength(); 
+
+  template <typename CountVecT>
+  bool train(CountVecT& kmerCounts, const uint32_t K);
+  
+  inline double evaluate(uint32_t kmer, uint32_t K) {
+    std::vector<uint32_t> _order{0, 0, 2,2,2,2};
+    double p{1.0};
+    for (int32_t pos = 0; pos < K - _order.back(); ++pos) {
+      uint32_t offset = 2 * (K - (pos + 1) - _order[pos]);
+      auto idx = _getIndex(kmer, offset, _order[pos]);
+      p *= _probs(idx, pos);
+    }
+    return p;
+  }
+
+private:
+  inline uint32_t _getIndex(uint32_t kmer, uint32_t offset, uint32_t _order) {
+    kmer >>= offset;
+    switch (_order) {
+    case 0:
+      return kmer & 0x3;
+    case 1:
+      return kmer & 0xF;
+    case 2:
+      return kmer & 0x3F;
+    default:
+      return 0;
+    }
+    return 0;
+  }
+  bool _trained;
+
+  int32_t _contextLength;
+  int32_t _contextLeft;
+  int32_t _contextRight;
+
+  Eigen::MatrixXd _probs;
+  Eigen::MatrixXd _marginals;
+
+  Mer _mer;
+  std::vector<int32_t> _order;
+  std::vector<int32_t> _shifts;
+  std::vector<int32_t> _widths;
+};
+
+#endif //__SB_MODEL_HPP__
diff --git a/include/SGSmooth.hpp b/include/SGSmooth.hpp
new file mode 100644
index 0000000..ceacd95
--- /dev/null
+++ b/include/SGSmooth.hpp
@@ -0,0 +1,13 @@
+#ifndef __SGSMOOTH_HPP__
+#define __SGSMOOTH_HPP__
+
+#include <vector>
+
+// savitzky golay smoothing.
+std::vector<double> sg_smooth(const std::vector<double> &v, const int w, const int deg);
+//! numerical derivative based on savitzky golay smoothing.
+std::vector<double> sg_derivative(const std::vector<double> &v, const int w, 
+                                const int deg, const double h=1.0);
+
+#endif // __SGSMOOTH_HPP__
+
diff --git a/include/SalmonConfig.hpp b/include/SalmonConfig.hpp
index 62f0568..1e42435 100644
--- a/include/SalmonConfig.hpp
+++ b/include/SalmonConfig.hpp
@@ -27,9 +27,9 @@
 
 namespace salmon {
 	constexpr char majorVersion[] = "0";
-	constexpr char minorVersion[] = "6";
-	constexpr char patchVersion[] = "0";
-	constexpr char version[] = "0.6.0";
+	constexpr char minorVersion[] = "7";
+	constexpr char patchVersion[] = "1";
+	constexpr char version[] = "0.7.1";
     constexpr uint32_t indexVersion = 2;
 }
 
diff --git a/include/SalmonIndex.hpp b/include/SalmonIndex.hpp
index 7e65d64..d3b2b53 100644
--- a/include/SalmonIndex.hpp
+++ b/include/SalmonIndex.hpp
@@ -17,6 +17,7 @@ extern "C" {
 #include "cereal/archives/json.hpp"
 #include "cereal/types/vector.hpp"
 
+#include "BooMap.hpp"
 #include "RapMapSAIndex.hpp"
 #include "IndexHeader.hpp"
 #include "BWAUtils.hpp"
@@ -30,6 +31,12 @@ int bwa_index(int argc, char* argv[]);
 // declaration of quasi index function
 int rapMapSAIndex(int argc, char* argv[]);
 
+template <typename IndexT> 
+using DenseHash = google::dense_hash_map<uint64_t, 
+                                         rapmap::utils::SAInterval<IndexT>, 
+                                         rapmap::utils::KmerKeyHasher>;
+template <typename IndexT> 
+using PerfectHash = BooMap<uint64_t, rapmap::utils::SAInterval<IndexT>>;
 
 class SalmonIndex{
         public:
@@ -93,7 +100,7 @@ class SalmonIndex{
                               tend = tstart + len;
                               rseq = bns_get_seq(l_pac, idx_->pac, tstart, tend, &compLen);
                               if (compLen != len) {
-                                  fmt::print(stderr,
+                                  logger_->error(
                                           "For transcript {}, stored length ({}) != computed length ({}) --- index may be corrupt. exiting\n",
                                           name, compLen, len);
                                   std::exit(1);
@@ -144,8 +151,13 @@ class SalmonIndex{
             bwaidx_t* bwaIndex() { return idx_; }
 
             bool is64BitQuasi() { return largeQuasi_; }
-            RapMapSAIndex<int32_t>* quasiIndex32() { return quasiIndex32_.get(); }
-            RapMapSAIndex<int64_t>* quasiIndex64() { return quasiIndex64_.get(); }
+            bool isPerfectHashQuasi() { return perfectHashQuasi_;} 
+
+            RapMapSAIndex<int32_t, DenseHash<int32_t>>* quasiIndex32() { return quasiIndex32_.get(); }
+            RapMapSAIndex<int64_t, DenseHash<int64_t>>* quasiIndex64() { return quasiIndex64_.get(); }
+
+            RapMapSAIndex<int32_t, PerfectHash<int32_t>>* quasiIndexPerfectHash32() { return quasiIndexPerfectHash32_.get(); }
+            RapMapSAIndex<int64_t, PerfectHash<int64_t>>* quasiIndexPerfectHash64() { return quasiIndexPerfectHash64_.get(); }
 
             bool hasAuxKmerIndex() { return versionInfo_.hasAuxKmerIndex(); }
             KmerIntervalMap& auxIndex() { return auxIdx_; }
@@ -209,16 +221,13 @@ class SalmonIndex{
                                   std::vector<std::string>& quasiArgVec,
                                   uint32_t k) {
                 namespace bfs = boost::filesystem;
-                char* quasiArgv[] = {
-                    const_cast<char*>(quasiArgVec[0].c_str()),
-                    const_cast<char*>(quasiArgVec[1].c_str()),
-                    const_cast<char*>(quasiArgVec[2].c_str()),
-                    const_cast<char*>(quasiArgVec[3].c_str()),
-                    const_cast<char*>(quasiArgVec[4].c_str()),
-                    const_cast<char*>(quasiArgVec[5].c_str()),
-                    const_cast<char*>(quasiArgVec[6].c_str())
-                };
-                int quasiArgc = 7;
+		int quasiArgc = static_cast<int>(quasiArgVec.size());
+		char** quasiArgv = new char*[quasiArgc];
+		for (size_t i = 0; i < quasiArgc; ++i) {
+		  auto& arg = quasiArgVec[i];
+		  quasiArgv[i] = new char[arg.size() + 1];
+		  std::strcpy(quasiArgv[i], arg.c_str());
+		}
 
                 int ret = rapMapSAIndex(quasiArgc, quasiArgv);
 
@@ -228,7 +237,14 @@ class SalmonIndex{
                 versionInfo_.auxKmerLength(k);
                 versionInfo_.indexType(SalmonIndexType::QUASI);
                 versionInfo_.save(versionFile);
-                return (ret == 0);
+	
+		// Free the memory used for the arg vector
+		for (size_t i = 0; i < quasiArgc; ++i) {
+		  delete quasiArgv[i];
+		}
+		delete [] quasiArgv;
+	
+		return (ret == 0);
             }
 
           bool loadFMDIndex_(const boost::filesystem::path& indexDir) {
@@ -249,8 +265,8 @@ class SalmonIndex{
                   boost::filesystem::path indexPath = indexDir / "bwaidx";
                   //if ((idx_ = bwa_idx_load(indexPath.string().c_str(), BWA_IDX_BWT|BWA_IDX_BNS|BWA_IDX_PAC)) == 0) {
                   if ((idx_ = bwa_idx_load(indexPath.string().c_str(), BWA_IDX_ALL)) == 0) {
-                      fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
-                      fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
+                      logger_->error("Couldn't open index [{}] --- ", indexPath);
+                      logger_->error("Please make sure that 'salmon index' has been run successfully");
                       std::exit(1);
                   }
               }
@@ -281,22 +297,44 @@ class SalmonIndex{
                     std::exit(1);
                   }
 
+                  // Is the quasi-index using a perfect hash
+                  perfectHashQuasi_ = h.perfectHash();
+
                   if (h.bigSA()) {
                     largeQuasi_ = true;
-                    fmt::print(stderr, "Loading 64-bit quasi index");
-                    quasiIndex64_.reset(new RapMapSAIndex<int64_t>);
-                    if (!quasiIndex64_->load(indexStr)) {
-                      fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
-                      fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
-                      std::exit(1);
+                    logger_->info("Loading 64-bit quasi index");
+                    if (perfectHashQuasi_) {
+                        quasiIndexPerfectHash64_.reset(new RapMapSAIndex<int64_t, PerfectHash<int64_t>>);
+                        if (!quasiIndexPerfectHash64_->load(indexStr)) {
+                            fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
+                            fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
+                            std::exit(1);
+                        }
+                    } else {
+                        quasiIndex64_.reset(new RapMapSAIndex<int64_t, DenseHash<int64_t>>);
+                        if (!quasiIndex64_->load(indexStr)) {
+                            fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
+                            fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
+                            std::exit(1);
+                        }
                     }
-                  } else {
-                    fmt::print(stderr, "Loading 32-bit quasi index");
-                    quasiIndex32_.reset(new RapMapSAIndex<int32_t>);
-                    if(!quasiIndex32_->load(indexStr)) {
-                      fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
-                      fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
-                      std::exit(1);
+                  } else { // 32-bit index
+                    logger_->info("Loading 32-bit quasi index");
+                    
+                    if (perfectHashQuasi_) {
+                        quasiIndexPerfectHash32_.reset(new RapMapSAIndex<int32_t, PerfectHash<int32_t>>);
+                        if (!quasiIndexPerfectHash32_->load(indexStr)) {
+                            fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
+                            fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
+                            std::exit(1);
+                        }
+                    } else {
+                        quasiIndex32_.reset(new RapMapSAIndex<int32_t, DenseHash<int32_t>>);
+                        if (!quasiIndex32_->load(indexStr)) {
+                            fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
+                            fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
+                            std::exit(1);
+                        }
                     }
                   }
               }
@@ -310,8 +348,13 @@ class SalmonIndex{
           // Can't think of a generally better way to do this now
           // without making the entire code-base look crazy
           bool largeQuasi_{false};
-      	  std::unique_ptr<RapMapSAIndex<int32_t>> quasiIndex32_{nullptr};
-      	  std::unique_ptr<RapMapSAIndex<int64_t>> quasiIndex64_{nullptr};
+          bool perfectHashQuasi_{false};
+
+          std::unique_ptr<RapMapSAIndex<int32_t, DenseHash<int32_t>>> quasiIndex32_{nullptr};
+          std::unique_ptr<RapMapSAIndex<int64_t, DenseHash<int64_t>>> quasiIndex64_{nullptr};
+
+          std::unique_ptr<RapMapSAIndex<int32_t, PerfectHash<int32_t>>> quasiIndexPerfectHash32_{nullptr};
+          std::unique_ptr<RapMapSAIndex<int64_t, PerfectHash<int64_t>>> quasiIndexPerfectHash64_{nullptr};
 
           bwaidx_t *idx_{nullptr};
           KmerIntervalMap auxIdx_;
diff --git a/include/SalmonIndexVersionInfo.hpp b/include/SalmonIndexVersionInfo.hpp
index ea885bb..925097e 100644
--- a/include/SalmonIndexVersionInfo.hpp
+++ b/include/SalmonIndexVersionInfo.hpp
@@ -1,7 +1,7 @@
 #ifndef __SALMON_INDEX_VERSION_INFO_HPP__
 #define __SALMON_INDEX_VERSION_INFO_HPP__
 
-#include "spdlog/details/format.h"
+#include "spdlog/fmt/fmt.h"
 #include "boost/filesystem.hpp"
 #include "cereal/archives/json.hpp"
 
diff --git a/include/SalmonMath.hpp b/include/SalmonMath.hpp
index 6729232..fb2db58 100644
--- a/include/SalmonMath.hpp
+++ b/include/SalmonMath.hpp
@@ -42,6 +42,9 @@ namespace salmon {
         constexpr double EPSILON = 0.375e-10;
         const double LOG_EPSILON = log(EPSILON);
 
+        inline double log(double v) { return (v > 0) ? std::log(v) : LOG_0; }
+        inline double exp(double v) { return std::exp(v); }
+        inline bool isLog0(double v) { return v == LOG_0; }
         // Taken from https://github.com/adarob/eXpress/blob/master/src/main.h
         inline bool approxEqual(double a, double b, double eps=EPSILON) {
             return std::abs(a-b) <= eps;
diff --git a/include/SalmonOpts.hpp b/include/SalmonOpts.hpp
index f3cd122..332c7d2 100644
--- a/include/SalmonOpts.hpp
+++ b/include/SalmonOpts.hpp
@@ -33,6 +33,14 @@ struct SalmonOpts {
 
     bool allowOrphans; // Consider orphaned reads when performing lightweight alignemnt.
 
+    std::string auxDir; // The directory where auxiliary files will be written.
+
+    std::string runStartTime; // String representation of the date / time at which the run began.
+
+    bool consistentHits;  // Enforce consistency of hits gathered during quasi-mapping.
+
+    bool dumpEq; 	     // Dump the equivalence classes and counts to file
+
     bool splitSpanningSeeds; // Attempt to split seeds that span multiple transcripts.
 
     bool noFragLengthDist ; // Don't give a fragment assignment a likelihood based on an emperically
@@ -42,6 +50,14 @@ struct SalmonOpts {
                                       // account when computing the probability that a
                                      // fragment was generated from a transcript.
 
+    bool noBiasLengthThreshold; // Don't require that the recomputed effective length for a target
+                                // be above a threshold before applying it.
+    bool useBiasLengthThreshold; // Don't require that the recomputed effective length for a target
+                                // be above a threshold before applying it.
+
+    bool rankEqClasses; // Keep a separate equivalence class for each ordering (by conditional probability) 
+                        // of the the transcript labels 
+    
     bool useFSPD; // Learn a non-uniform start distribution
 
     bool noSeqBiasModel; // Don't learn and use a sequence-specific bias model.
@@ -61,6 +77,9 @@ struct SalmonOpts {
                           // the provided library type could correspond to the true
                           // fragment origin.
 
+    bool ignoreIncompat; // If incompatPrior is 0, this flag is set to true and we completely 
+                         // ignore incompatible fragments.
+
     bool useErrorModel; // Learn and apply the error model when computing the likelihood
                         // of a given alignment.
 
@@ -79,6 +98,13 @@ struct SalmonOpts {
 
     uint32_t maxExpectedReadLen; // Maximum expected length of an observed read.
 
+    uint64_t numRequiredFragments; //
+
+    uint32_t gcSampFactor; // The factor by which to down-sample the GC distribution of transcripts
+    uint32_t pdfSampFactor; // The factor by which to down-sample the fragment length pmf when
+                            // evaluating gc-bias for effective length correction.
+
+    bool strictIntersect; // Use strict rather than fuzzy intersection in quasi-mapping
     bool useMassBanking; // DEPRECATED
 
     bool sensitive; // Perform splitting of long SMEMs into MEMs
@@ -91,21 +117,35 @@ struct SalmonOpts {
 
     boost::filesystem::path indexDirectory; // Index directory
 
+    boost::filesystem::path geneMapPath; // Gene map path 
+    
+    bool quiet; // Be quiet during quantification.
+
     bool useVBOpt; // Use Variational Bayesian EM instead of "regular" EM in the batch passes
 
     bool useQuasi; // Are we using the quasi-mapping based index or not.
 
+  std::unique_ptr<std::ofstream> unmappedFile{nullptr};
+    bool writeUnmappedNames; // write the names of unmapped reads
     bool sampleOutput; // Sample alignments according to posterior estimates of transcript abundance.
     bool sampleUnaligned; // Pass along un-aligned reads in the sampling.
 
     uint32_t numGibbsSamples; // Number of rounds of Gibbs sampling to perform
     uint32_t numBootstraps; // Number of bootstrap samples to draw
 
+    bool initUniform{false}; // initialize offline optimization parameters uniformly, rather than with online estimates.
     bool alnMode{false};     // true if we're in alignment based mode, false otherwise
     bool biasCorrect{false}; // Perform sequence-specific bias correction
+    bool gcBiasCorrect{false}; // Perform gc-fragment bias correction
+    bool posBiasCorrect{false}; // Perform positional bias correction
+    size_t numConditionalGCBins{3};
+    size_t numFragGCBins{20};
     std::atomic<int32_t> numBiasSamples{1000000}; // The number of fragment mappings to consider when building
 						  // the sequence-specific "foreground" distribution.
 
+    // Related to the prior of the VBEM algorithm
+    double vbPrior{1e-3};
+    bool perTranscriptPrior{false};
     // Related to the fragment length distribution
     size_t fragLenDistMax;
     size_t fragLenDistPriorMean;
diff --git a/include/SalmonUtils.hpp b/include/SalmonUtils.hpp
index 6fd8ed5..2fb7279 100644
--- a/include/SalmonUtils.hpp
+++ b/include/SalmonUtils.hpp
@@ -20,7 +20,7 @@ extern "C" {
 
 #include <Eigen/Dense>
 
-#include "spdlog/details/format.h"
+#include "spdlog/fmt/fmt.h"
 
 #include "SalmonOpts.hpp"
 #include "SalmonMath.hpp"
@@ -43,10 +43,25 @@ using NameVector = std::vector<string>;
 using IndexVector = std::vector<size_t>;
 using KmerVector = std::vector<uint64_t>;
 using MateStatus = rapmap::utils::MateStatus;
+    
+// Keep track of the type of mapping that was obtained for this read
+enum class MappingType : uint8_t { 
+    UNMAPPED = 0, LEFT_ORPHAN = 1, RIGHT_ORPHAN = 2, BOTH_ORPHAN = 3, 
+        PAIRED_MAPPED = 4,  SINGLE_MAPPED = 5 };
+
+std::string str(const MappingType& mt);
+
+
+// To keep track of short fragments (shorter than the k-mer length)
+// on which the index was built.
+struct ShortFragStats {
+    size_t numTooShort{0};
+    size_t shortest{std::numeric_limits<size_t>::max()};
+};
 
 // An enum class for direction to avoid potential errors
 // with keeping everything as a bool
-enum class Direction { FORWARD = 0, REVERSE_COMPLEMENT = 1 };
+enum class Direction { FORWARD = 0, REVERSE_COMPLEMENT = 1, REVERSE = 2 };
 
 // Returns FORWARD if isFwd is true and REVERSE_COMPLEMENT otherwise
 constexpr inline Direction boolToDirection(bool isFwd) {
@@ -88,11 +103,21 @@ TranscriptGeneMap readTranscriptToGeneMap( std::ifstream &ifile );
 
 TranscriptGeneMap transcriptToGeneMapFromFasta( const std::string& transcriptsFile );
 
+/*
+template <typename AbundanceVecT, typename ReadExpT>
+Eigen::VectorXd updateEffectiveLengths(
+        SalmonOpts& sopt,
+        ReadExpT& readExp,
+        Eigen::VectorXd& effLensIn,
+        AbundanceVecT& alphas,
+	bool finalRound = false);
+*/
+
 template <typename AbundanceVecT, typename ReadExpT>
-Eigen::VectorXd updateEffectiveLengths(ReadExpT& readExp,
-    Eigen::VectorXd& effLensIn,
-    AbundanceVecT& alphas,
-    std::vector<double>& transcriptKmerDist);
+Eigen::VectorXd updateEffectiveLengths(SalmonOpts& sopt, ReadExpT& readExp,
+                                                      Eigen::VectorXd& effLensIn,
+                                                      AbundanceVecT& alphas, bool finalRound=false);
+
 
 /*
  * Use atomic compare-and-swap to update val to
@@ -135,10 +160,11 @@ inline void incLoop(tbb::atomic<double>& val, double inc) {
         } while (returnedMass != oldMass);
 }
 
+bool processQuantOptions(SalmonOpts& sopt, boost::program_options::variables_map& vm, int32_t numBiasSamples);
 
-void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::path& inputPath);
 
-std::vector<int32_t> samplesFromLogPMF(FragmentLengthDistribution* fld, int32_t numSamples);
+
+void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::path& inputPath);
 
 // NOTE: Throws an invalid_argument exception of the quant or quant_bias_corrected files do
 // not exist!
@@ -151,6 +177,40 @@ void generateGeneLevelEstimates(boost::filesystem::path& geneMapPath,
 
     bool headersAreConsistent(std::vector<SAM_hdr*>&& headers);
 
+    inline void reverseComplement(const char* s, int32_t l, std::string& o) {
+        if (l > o.size()) { o.resize(l, 'A'); }
+        int32_t j = 0;
+        for (int32_t i = l-1; i >= 0; --i, ++j) {
+            switch(s[i]) {
+            case 'A':
+            case 'a':
+                o[j] = 'T';
+                break;
+            case 'C':
+            case 'c':
+                o[j] = 'G';
+                break;
+            case 'T':
+            case 't':
+                o[j] = 'A';
+                break;
+            case 'G':
+            case 'g':
+                o[j] = 'C';
+                break;
+            default:
+                o[j] = 'N';
+                break;
+            } 
+        }
+    }
+
+    inline std::string reverseComplement(const char* s, int32_t l) {
+        std::string o(l, 'A');
+        reverseComplement(s, l, o);
+        return o;
+    }
+   
     template <typename AlnLibT>
     void writeAbundances(const SalmonOpts& sopt,
                          AlnLibT& alnLib,
@@ -167,6 +227,12 @@ void generateGeneLevelEstimates(boost::filesystem::path& geneMapPath,
     void normalizeAlphas(const SalmonOpts& sopt,
                          AlnLibT& alnLib);
 
+    bool isCompatible(const LibraryFormat observed,
+                      const LibraryFormat expected,
+                      int32_t start,
+                      bool isForward,
+                      rapmap::utils::MateStatus ms);
+
     double logAlignFormatProb(const LibraryFormat observed,
                               const LibraryFormat expected,
                               int32_t start, bool isForward,
diff --git a/include/Sampler.hpp b/include/Sampler.hpp
index c71310c..4ed6ec1 100644
--- a/include/Sampler.hpp
+++ b/include/Sampler.hpp
@@ -10,7 +10,7 @@ extern "C" {
 
 // for cpp-format
 #include "spdlog/spdlog.h"
-#include "spdlog/details/format.h"
+#include "spdlog/fmt/fmt.h"
 
 #include <tbb/atomic.h>
 #include <iostream>
@@ -300,7 +300,7 @@ namespace salmon {
                 msgStr << "Sampling alignments; outputting results to "
                        << sampleFilePath.string() << "\n";
 
-                log->info() << msgStr.str();
+                log->info(msgStr.str());
 
                 auto& refs = alnLib.transcripts();
                 size_t numTranscripts = refs.size();
@@ -375,7 +375,7 @@ namespace salmon {
                                        << ioutils::RESET_COLOR
                                        << "Couldn't open output bam file "
                                        << sampleFilePath.string() << ". Exiting\n";
-                                log->warn() << errstr.str();
+                                log->warn(errstr.str());
                                 std::exit(-1);
                             }
 
@@ -393,7 +393,7 @@ namespace salmon {
                                                 << "file. Please check that the file can "
                                                 << "be created properly and that the disk "
                                                 << "is not full.  Exiting.\n";
-                                            log->warn() << errstr.str();
+                                            log->warn(errstr.str());
                                             std::exit(-1);
                                         }
                                         // Eventually, as we do in BAMQueue, we should
diff --git a/include/SimplePosBias.hpp b/include/SimplePosBias.hpp
new file mode 100644
index 0000000..d619ff4
--- /dev/null
+++ b/include/SimplePosBias.hpp
@@ -0,0 +1,43 @@
+#ifndef SIMPLE_POS_BIAS_HPP
+#define SIMPLE_POS_BIAS_HPP
+
+#include "spline.h"
+#include <array>
+#include <vector>
+
+class SimplePosBias {
+public:
+  SimplePosBias(int32_t numBins = 20, bool logSpace = true);
+
+  // Add a mass of @mass to bin @bin
+  void addMass(int32_t bin, double mass);
+
+  // Compute the bin for @pos on a transcript of length @length,
+  // and add @mass to the appropriate bin
+  void addMass(int32_t pos, int32_t length, double mass);
+
+  // Project, via linear interpolation, the weights contained in "bins"
+  // into the vector @out.
+  void projectWeights(std::vector<double>& out);
+
+  // Combine the distribution @other
+  // with this distribution
+  void combine(const SimplePosBias& other);
+
+  // We're finished updating this distribution, so
+  // compute the cdf etc.
+  void finalize();
+
+private:
+  int32_t numBins_;
+  std::vector<double> masses_;
+  bool isLogged_{true};
+  tk::spline s_;
+  // position bins taken from Cufflinks:
+  // https://github.com/cole-trapnell-lab/cufflinks/blob/master/src/biascorrection.cpp
+  const std::vector<double> positionBins_{{.02, .04, .06, .08, .10, .15, .2,
+                                           .3,  .4,  .5,  .6,  .7,  .8,  .85,
+                                           .9,  .92, .94, .96, .98, 1.0}};
+};
+
+#endif // SIMPLE_POS_BIAS_HPP
diff --git a/include/Transcript.hpp b/include/Transcript.hpp
index 7923230..5250be5 100644
--- a/include/Transcript.hpp
+++ b/include/Transcript.hpp
@@ -4,6 +4,8 @@
 #include <atomic>
 #include <cmath>
 #include <limits>
+#include <memory>
+#include "GCFragModel.hpp"
 #include "SalmonStringUtils.hpp"
 #include "SalmonUtils.hpp"
 #include "SalmonMath.hpp"
@@ -13,55 +15,52 @@
 
 class Transcript {
 public:
-    Transcript() : RefName(""), RefLength(0), EffectiveLength(-1.0), id(std::numeric_limits<uint32_t>::max()), 
-	SAMSequence(nullptr), Sequence(nullptr),
+
+    Transcript() :
+        RefName(nullptr), RefLength(std::numeric_limits<uint32_t>::max()),
+        EffectiveLength(-1.0), id(std::numeric_limits<uint32_t>::max()),
         logPerBasePrior_(salmon::math::LOG_0),
         priorMass_(salmon::math::LOG_0),
         mass_(salmon::math::LOG_0), sharedCount_(0.0),
         avgMassBias_(salmon::math::LOG_0),
-        active_(false),
-    	freeSeqOnDestruct(false){}
-    
+        active_(false) {
+            uniqueCount_.store(0);
+            lastUpdate_.store(0);
+            lastTimestepUpdated_.store(0);
+            cachedEffectiveLength_.store(salmon::math::LOG_0);
+        }
+
+
     Transcript(size_t idIn, const char* name, uint32_t len, double alpha = 0.05) :
-        RefName(name), RefLength(len), EffectiveLength(-1.0), id(idIn), SAMSequence(nullptr), Sequence(nullptr),
+        RefName(name), RefLength(len), EffectiveLength(-1.0), id(idIn),
         logPerBasePrior_(std::log(alpha)),
         priorMass_(std::log(alpha*len)),
         mass_(salmon::math::LOG_0), sharedCount_(0.0),
         avgMassBias_(salmon::math::LOG_0),
-        active_(false),
-    	freeSeqOnDestruct(false){
+        active_(false) {
             uniqueCount_.store(0);
             lastUpdate_.store(0);
             lastTimestepUpdated_.store(0);
             cachedEffectiveLength_.store(std::log(static_cast<double>(RefLength)));
         }
 
-    ~Transcript() {
-      // Free the sequence if it belongs to us
-      if (freeSeqOnDestruct) { delete [] Sequence; }
-      // Free the SAMSequence if it exists
-      if (SAMSequence) { delete [] SAMSequence; }
-    }
+    // We cannot copy; only move
+    Transcript(Transcript& other) = delete;
+    Transcript& operator=(Transcript& other) = delete;
 
     Transcript(Transcript&& other) {
         id = other.id;
-        //std::swap(RefName, other.RefName);
+
         RefName = std::move(other.RefName);
         RefLength = other.RefLength;
         EffectiveLength = other.EffectiveLength;
-        SAMSequence = other.SAMSequence;
-        // If this is an owned-resource, then move it
-        if (other.SAMSequence) {
-            other.SAMSequence = nullptr;
-        }
 
-        Sequence = other.Sequence;
-        // If this is an owned-resource, then move it
-        if (other.freeSeqOnDestruct) {
-            freeSeqOnDestruct = true;
-            other.freeSeqOnDestruct = false;
-            other.Sequence = nullptr;
-        }
+        SAMSequence_ = std::move(other.SAMSequence_);
+        Sequence_ = std::move(other.Sequence_);
+        GCCount_ = std::move(other.GCCount_);
+        gcStep_ = other.gcStep_;
+        gcFracLen_ = other.gcFracLen_;
+        lastRegularSample_ = other.lastRegularSample_;
 
         uniqueCount_.store(other.uniqueCount_);
         totalCount_.store(other.totalCount_.load());
@@ -80,23 +79,16 @@ public:
 
     Transcript& operator=(Transcript&& other) {
         id = other.id;
-        //std::swap(RefName, other.RefName);
+
         RefName = std::move(other.RefName);
         RefLength = other.RefLength;
         EffectiveLength = other.EffectiveLength;
-        SAMSequence = other.SAMSequence;
-        // If this is an owned-resource, then move it
-        if (other.SAMSequence) {
-            other.SAMSequence = nullptr;
-        }
-
-        Sequence = other.Sequence;
-        // If this is an owned-resource, then move it
-        if (other.freeSeqOnDestruct) {
-            freeSeqOnDestruct = true;
-            other.freeSeqOnDestruct = false;
-            other.Sequence = nullptr;
-        }
+        SAMSequence_ = std::move(other.SAMSequence_);
+        Sequence_ = std::move(other.Sequence_);
+        GCCount_ = std::move(other.GCCount_);
+        gcStep_ = other.gcStep_;
+        gcFracLen_ = other.gcFracLen_;
+        lastRegularSample_ = other.lastRegularSample_;
 
         uniqueCount_.store(other.uniqueCount_);
         totalCount_.store(other.totalCount_.load());
@@ -122,7 +114,7 @@ public:
     inline void addUniqueCount(size_t newCount) { uniqueCount_ += newCount; }
     inline void addTotalCount(size_t newCount) { totalCount_ += newCount; }
 
-    inline double uniqueUpdateFraction() {
+    inline double uniqueUpdateFraction() const {
         double ambigCount = static_cast<double>(totalCount_ - uniqueCount_);
         return uniqueCount_ / ambigCount;
     }
@@ -138,20 +130,21 @@ public:
         using salmon::stringtools::encodedRevComp;
         size_t byte = idx >> 1;
         size_t nibble = idx & 0x1;
+        uint8_t* sseq = SAMSequence_.get();
 
         switch(dir) {
         case strand::forward:
             if (nibble) {
-                return SAMSequence[byte] & 0x0F;
+                return sseq[byte] & 0x0F;
             } else {
-                return ((SAMSequence[byte] & 0xF0) >> 4) & 0x0F;
+                return ((sseq[byte] & 0xF0) >> 4) & 0x0F;
             }
             break;
         case strand::reverse:
             if (nibble) {
-                return encodedRevComp[SAMSequence[byte] & 0x0F];
+                return encodedRevComp[sseq[byte] & 0x0F];
             } else {
-                return encodedRevComp[((SAMSequence[byte] & 0xF0) >> 4) & 0x0F];
+                return encodedRevComp[((sseq[byte] & 0xF0) >> 4) & 0x0F];
             }
             break;
         }
@@ -225,9 +218,10 @@ public:
         double refLen = static_cast<double>(RefLength);
         double logRefLength = std::log(refLen);
 
-        if (logRefLength <= logFLDMean) {
-            effectiveLength = logRefLength;
-        } else {
+	// JUNE 17 (just ensure it's >= 1)
+        //if (logRefLength <= logFLDMean) {
+        //    effectiveLength = logRefLength;
+        //} else {
             uint32_t mval = maxVal;
             size_t clen = minVal;
             size_t maxLen = std::min(RefLength, mval);
@@ -238,10 +232,11 @@ public:
                         logPMF[i] + std::log(refLen - clen + 1));
                 ++clen;
             }
-        }
-        if (std::exp(effectiveLength) <= 1.0) {
-            effectiveLength = salmon::math::LOG_1;
-        }
+	//}
+	if (salmon::math::isLog0(effectiveLength) or std::exp(effectiveLength) < 1.0) {
+	  effectiveLength = logRefLength;
+	  //effectiveLength = //salmon::math::LOG_1;
+    }
 
         return effectiveLength;
     }
@@ -253,6 +248,10 @@ public:
         return cachedEffectiveLength_.load();
     }
 
+    void setCachedLogEffectiveLength(double l) {
+        cachedEffectiveLength_.store(l);
+    }
+
     void updateEffectiveLength(
             std::vector<double>& logPMF,
             double logFLDMean,
@@ -290,7 +289,7 @@ public:
     inline size_t lastTimestepUpdated() { return lastTimestepUpdated_.load(); }
 
     void lengthClassIndex(uint32_t ind) { lengthClassIndex_ = ind; }
-    uint32_t lengthClassIndex() { return lengthClassIndex_; }
+    uint32_t lengthClassIndex() const { return lengthClassIndex_; }
 
     void setAnchorFragment() {
         hasAnchorFragment_.store(true);
@@ -300,6 +299,112 @@ public:
         return hasAnchorFragment_.load();
     }
 
+    inline GCDesc gcDesc(int32_t s, int32_t e) const {
+        int outsideContext{3};
+        int insideContext{2};
+        
+        int outside5p = outsideContext + 1;
+        int outside3p = outsideContext;
+
+        int inside5p = insideContext - 1;
+        int inside3p = insideContext;
+
+        int contextSize = outsideContext + insideContext;
+        int lastPos = RefLength - 1;
+        if (gcStep_ == 1) {
+            auto cs = GCCount_[s];
+            auto ce = GCCount_[e];
+
+            auto fps = (s >= outside5p) ? GCCount_[s-outside5p] : 0;
+            auto fpe = (inside5p > 0) ? GCCount_[std::min(s+inside5p, lastPos)] : cs;
+            auto tps = (inside3p > 0) ? 
+                ((e >= inside3p) ? GCCount_[e-inside3p] : 0) : ce;
+            auto tpe = GCCount_[std::min(e+outside3p, lastPos)];
+            
+            int32_t fragFrac = std::lrint((100.0 * (ce - cs)) / (e - s + 1));
+            int32_t contextFrac = std::lrint((100.0 * (((fpe - fps) + (tpe - tps)) / (2.0 * contextSize))));
+            GCDesc desc = {fragFrac, contextFrac};
+            return desc;
+        } else {
+            auto cs = gcCountInterp_(s);
+            auto ce = gcCountInterp_(e);
+
+	    auto fps = (s >= outside5p) ? gcCountInterp_(s-outside5p) : 0;
+	    auto fpe = (inside5p > 0) ? gcCountInterp_(std::min(s+inside5p, lastPos)) : cs;
+	    auto tps = (inside3p > 0) ? 
+	      ((e >= inside3p) ? gcCountInterp_(e-inside3p) : 0) : ce;
+	    auto tpe = gcCountInterp_(std::min(e+outside3p, lastPos));
+	    
+            int32_t fragFrac = std::lrint((100.0 * (ce - cs)) / (e - s + 1));
+            int32_t contextFrac = std::lrint((100.0 * (((fpe - fps) + (tpe - tps)) / (10.0))));
+            GCDesc desc = {fragFrac, contextFrac};
+            return desc;
+        }
+
+    }
+    inline double gcAt(int32_t s) const {
+        return (s < 0) ? 0.0 : ((s >= RefLength) ? gcCount_(RefLength) : gcCount_(s));
+    }
+
+    // Return the fractional GC content along this transcript
+    // in the interval [s,e] (note; this interval is closed on both sides).
+    inline int32_t gcFrac(int32_t s, int32_t e) const {
+        if (gcStep_ == 1) {
+            auto cs = GCCount_[s];
+            auto ce = GCCount_[e];
+            return std::lrint((100.0 * (ce - cs)) / (e - s + 1));
+        } else {
+            auto cs = gcCountInterp_(s);
+            auto ce = gcCountInterp_(e);
+            return std::lrint((100.0 * (ce - cs)) / (e - s + 1));
+        }
+    }
+
+    // Will *not* delete seq on destruction
+    void setSequenceBorrowed(const char* seq, bool needGC=false, uint32_t gcSampFactor=1) {
+        Sequence_ = std::unique_ptr<const char, void(*)(const char*)>(
+                seq,                 // store seq
+                [](const char* p) {} // do nothing deleter
+                );
+        if (needGC) { computeGCContent_(gcSampFactor); }
+    }
+
+    // Will delete seq on destruction
+    void setSequenceOwned(const char* seq, bool needGC=false, uint32_t gcSampFactor=1) {
+        Sequence_ = std::unique_ptr<const char, void(*)(const char*)>(
+                seq,                 // store seq
+                [](const char* p) { delete [] p; } // do nothing deleter
+                );
+        if (needGC) { computeGCContent_(gcSampFactor); }
+    }
+
+    // Will *not* delete seq on destruction
+    void setSAMSequenceBorrowed(uint8_t* seq, bool needGC=false, uint32_t gcSampFactor=1) {
+        SAMSequence_ = std::unique_ptr<uint8_t, void(*)(uint8_t*)>(
+                seq,                 // store seq
+                [](uint8_t* p) {} // do nothing deleter
+                );
+        if (needGC) { computeGCContent_(gcSampFactor); }
+    }
+
+    // Will delete seq on destruction
+    void setSAMSequenceOwned(uint8_t* seq, bool needGC=false,  uint32_t gcSampFactor=1) {
+        SAMSequence_ = std::unique_ptr<uint8_t, void(*)(uint8_t*)>(
+                seq,                 // store seq
+                [](uint8_t* p) { delete [] p; } // do nothing deleter
+                );
+        if (needGC) { computeGCContent_(gcSampFactor); }
+    }
+
+    const char* Sequence() const {
+        return Sequence_.get();
+    }
+
+    uint8_t* SAMSequence() const {
+        return SAMSequence_.get();
+    }
+
+
     std::string RefName;
     uint32_t RefLength;
     double EffectiveLength;
@@ -310,11 +415,137 @@ public:
     double projectedCounts{0.0};
     double sharedCounts{0.0};
 
-    uint8_t* SAMSequence;
-    const char* Sequence;
-    bool freeSeqOnDestruct;
-
 private:
+    // NOTE: Is it worth it to check if we have GC here?
+    // we should never access these without bias correction.
+    inline double gcCount_(int32_t p) {
+        return (gcStep_ == 1) ? static_cast<double>(GCCount_[p]) : gcCountInterp_(p);
+    }
+    inline double gcCount_(int32_t p) const {
+        return (gcStep_ == 1) ? static_cast<double>(GCCount_[p]) : gcCountInterp_(p);
+    }
+
+    inline int32_t closestBin_(int32_t p) const {
+      return static_cast<int32_t>(std::round( static_cast<double>(p) / gcStep_ )); 
+    }
+
+    inline double gcCountInterp_(int32_t p) const {
+        //std::cerr << "in gcCountInterp\n";
+        if (p == RefLength - 1) {
+            // If p is the last position, just return the last value
+            return static_cast<double>(GCCount_.back());
+        }
+
+	// The index of the closest bin
+	auto cb = closestBin_(p);
+	// The actual position to which this bin corresponds
+	int32_t binPos = cb * gcStep_;
+	// Can't go past the end
+	if (binPos > RefLength - 1) {
+	  binPos = RefLength - 1;
+	  cb = GCCount_.size() - 1;
+	}
+
+	// The count of {G,C} at the checkpoint
+	auto binCount = GCCount_[cb];
+	// The count before or after the bin, until p
+	int32_t count{0};
+        const char* seq = Sequence_.get();
+
+	// we hit a sampled position
+	if (binPos == p) {
+	} else if (binPos > p) {
+	  for (size_t i = binPos; i > p; --i) {
+	    auto c = seq[i];
+	    // If the character is a G or C, we subtract 1
+	    count -= (c == 'G' or c == 'C') ? 1 : 0;
+	  }
+	} else {
+	  for (size_t i = binPos + 1; i <= p; ++i) {
+	    auto c = seq[i];
+	    // If the character is a G or C, we add 1
+	    count += (c == 'G' or c == 'C') ? 1 : 0;
+	  }
+	}
+	return  binCount + count;
+	/*
+        // The fractional sampling factor position p would have
+        double fracP = static_cast<double>(p) / gcStep_;
+
+        // The largest sampled index for some position <= p
+        uint32_t sampInd = std::floor(fracP);
+
+        // The fraction sampling factor for the largest sampled
+        // position <= p
+        double fracSample = static_cast<double>(sampInd);
+
+        int32_t nextSample{0};
+        double fracNextSample{0.0};
+
+        // special case: The last bin may not be evenly spaced.
+        if (sampInd >= lastRegularSample_) {
+            nextSample = GCCount_.size() - 1;
+            fracNextSample = gcFracLen_;
+        } else {
+            nextSample = sampInd + 1;
+            fracNextSample = static_cast<double>(nextSample);
+        }
+        double lambda = (fracP - fracSample) / (fracNextSample - fracSample);
+        return lambda * GCCount_[sampInd] + (1.0 - lambda) * GCCount_[nextSample];
+	*/
+    }
+
+    void computeGCContentSampled_(uint32_t step) {
+        gcStep_ = step;
+        const char* seq = Sequence_.get();
+        size_t nsamp = std::ceil(static_cast<double>(RefLength) / step);
+        GCCount_.reserve(nsamp + 2);
+
+        size_t lastSamp{0};
+        size_t totGC{0};
+        for (size_t i = 0; i < RefLength; ++i) {
+            auto c = std::toupper(seq[i]);
+            if (c == 'G' or c == 'C') {
+                totGC++;
+            }
+            if (i % step == 0) {
+                GCCount_.push_back(totGC);
+                lastSamp = i;
+            }
+        }
+
+        if (lastSamp < RefLength - 1) {
+            GCCount_.push_back(totGC);
+        }
+
+        gcFracLen_ = static_cast<double>(RefLength - 1) / gcStep_;
+        lastRegularSample_ = std::ceil(gcFracLen_);
+    }
+
+    void computeGCContent_(uint32_t gcSampFactor) {
+        const char* seq = Sequence_.get();
+        GCCount_.clear();
+        if (gcSampFactor == 1) {
+            GCCount_.resize(RefLength, 0);
+            size_t totGC{0};
+            for (size_t i = 0; i < RefLength; ++i) {
+                auto c = std::toupper(seq[i]);
+                if (c == 'G' or c == 'C') {
+                    totGC++;
+                }
+                GCCount_[i] = totGC;
+            }
+        } else {
+            computeGCContentSampled_(gcSampFactor);
+        }
+    }
+
+    std::unique_ptr<uint8_t, void(*)(uint8_t*)> SAMSequence_ =
+        std::unique_ptr<uint8_t, void(*)(uint8_t*)> (nullptr, [](uint8_t*){});
+
+    std::unique_ptr<const char, void(*)(const char*)> Sequence_ =
+        std::unique_ptr<const char, void(*)(const char*)> (nullptr, [](const char*){});
+
     std::atomic<size_t> uniqueCount_;
     std::atomic<size_t> totalCount_;
     // The most recent timestep at which this transcript's mass was updated.
@@ -332,6 +563,11 @@ private:
     // pair of reads mapping to it.
     std::atomic<bool> hasAnchorFragment_{false};
     bool active_;
+
+    uint32_t gcStep_{1};
+    double gcFracLen_{0.0};
+    uint32_t lastRegularSample_{0};
+    std::vector<uint32_t> GCCount_;
 };
 
 #endif //TRANSCRIPT
diff --git a/include/TranscriptCluster.hpp b/include/TranscriptCluster.hpp
index 9394a3c..fbd8dee 100644
--- a/include/TranscriptCluster.hpp
+++ b/include/TranscriptCluster.hpp
@@ -90,9 +90,6 @@ public:
                 }
             }
             ++round;
-            if (round % 100 == 0) {
-                std::cerr << "\r\rproject to polytope: " << round;
-            }
             if (round > 5000) {
                 return;
             }
diff --git a/include/TryableSpinLock.hpp b/include/TryableSpinLock.hpp
new file mode 100644
index 0000000..5ed3fe3
--- /dev/null
+++ b/include/TryableSpinLock.hpp
@@ -0,0 +1,25 @@
+#ifndef __TRYABLE_SPIN_LOCK_HPP__
+#define __TRYABLE_SPIN_LOCK_HPP__
+
+#include <atomic>
+
+// Taken from http://stackoverflow.com/questions/26583433/c11-implementation-of-spinlock-using-atomic
+class TryableSpinLock {
+    std::atomic_flag locked = ATOMIC_FLAG_INIT ;
+public:
+    void lock() {
+        while (locked.test_and_set(std::memory_order_acquire)) { ; }
+    }
+
+    // from http://stackoverflow.com/questions/19742993/implementing-a-spinlock-in-boost-example-neededhttp://stackoverflow.com/questions/19742993/implementing-a-spinlock-in-boost-example-needed
+    // is this legit?
+    bool try_lock() {
+        return !locked.test_and_set(std::memory_order_acquire);
+    }
+
+    void unlock() {
+        locked.clear(std::memory_order_release);
+    }
+};
+
+#endif //__TRYABLE_SPIN_LOCK_HPP__
diff --git a/include/UnpairedRead.hpp b/include/UnpairedRead.hpp
index 7bd8e37..d855d5b 100644
--- a/include/UnpairedRead.hpp
+++ b/include/UnpairedRead.hpp
@@ -81,7 +81,11 @@ struct UnpairedRead {
    inline bool isLeft()  const { return !isRight(); }
    inline int32_t left() const { return bam_pos(read); }
    inline int32_t right() const { return left() + bam_seq_len(read); }
+   // will always be at least the length of a single read
    inline uint32_t fragLen() const { return 0; }
+   // from the leftmost end of the 5' read to the rightmost 
+   // end of the 3' read (can be less than the length of a single read)
+   inline uint32_t fragLengthPedantic(uint32_t txpLen) const { return 0; }
    inline ReadType fragType() const { return ReadType::SINGLE_END; }
    inline int32_t transcriptID() const { return bam_ref(read); }
 
diff --git a/include/UtilityFunctions.hpp b/include/UtilityFunctions.hpp
index cf5ce23..9a35c7e 100644
--- a/include/UtilityFunctions.hpp
+++ b/include/UtilityFunctions.hpp
@@ -40,55 +40,67 @@ inline std::string kmerForIndex(uint32_t idx, uint32_t K) {
 inline uint32_t nextKmerIndex(uint32_t idx, char n, uint32_t K,
                               salmon::utils::Direction dir) {
     using salmon::utils::Direction;
-    idx = idx << 2;
-    if(dir == Direction::REVERSE_COMPLEMENT) {
+    if(dir == Direction::REVERSE or dir == Direction::REVERSE_COMPLEMENT) {
+      // drop the leftmost character, and replace it with the complement of the
+      // new one.
+      idx = idx >> 2;
         switch(n) {
             case 'A':
             case 'a':
-                n='T';
+              //  n='T';
+	      // complement is 'T';
+	      idx = idx | (3 << 2*(K-1));
                 break;
             case 'C':
             case 'c':
-                n='G';
+              //  n='G';
+	      // complement is 'G';
+	      idx = idx | (2 << 2*(K-1));
                 break;
             case 'g':
             case 'G':
-                n='C';
+	      // n='C';
+	      // complement is 'C';
+	      idx = idx | (1 << 2*(K-1));
                 break;
             case 'T':
             case 't':
             case 'U':
             case 'u':
-                n='A';
+	      // n='A';
+	      // complement is 'A';
                 break;
         }
+	return idx;
+    } else {
+      // drop the rightmost character and replace it with the new one.
+      idx = idx << 2;
+      switch(n) {
+      case 'A':
+      case 'a': break;
+      case 'C':
+      case 'c': idx = idx + 1;
+	break;
+      case 'G':
+      case 'g': idx = idx + 2;
+	break;
+      case 'T':
+      case 't':
+      case 'U':
+      case 'u':
+	idx = idx + 3;
+	break;
+      }
+      // Clear the top 32 - 2*K bits.
+      uint32_t clearShift = (32 - 2*K);
+      return idx & (0xFFFFFFFF >> clearShift);
     }
-
-    switch(n) {
-        case 'A':
-        case 'a': break;
-        case 'C':
-        case 'c': idx = idx + 1;
-                  break;
-        case 'G':
-        case 'g': idx = idx + 2;
-                  break;
-        case 'T':
-        case 't':
-        case 'U':
-        case 'u':
-                  idx = idx + 3;
-                  break;
-    }
-    // Clear the top 32 - 2*K bits.
-    uint32_t clearShift = (32 - 2*K);
-    return idx & (0xFFFFFFFF >> clearShift);
 }
 
 
 inline uint32_t indexForKmer(const char* s,
-        uint32_t K,
-        salmon::utils::Direction dir) {
+	uint32_t K,
+	salmon::utils::Direction dir) {
     using salmon::utils::Direction;
     // The index we'll return
     uint32_t idx{0};
diff --git a/include/blockingconcurrentqueue.h b/include/blockingconcurrentqueue.h
index a0412a7..325a32b 100644
--- a/include/blockingconcurrentqueue.h
+++ b/include/blockingconcurrentqueue.h
@@ -1,5 +1,5 @@
 // Provides an efficient blocking version of moodycamel::ConcurrentQueue.
-// ©2015 Cameron Desrochers. Distributed under the terms of the simplified
+// ©2015-2016 Cameron Desrochers. Distributed under the terms of the simplified
 // BSD license, available at the top of concurrentqueue.h.
 // Uses Jeff Preshing's semaphore implementation (under the terms of its
 // separate zlib license, embedded below).
@@ -8,7 +8,10 @@
 
 #include "concurrentqueue.h"
 #include <type_traits>
+#include <cerrno>
 #include <memory>
+#include <chrono>
+#include <ctime>
 
 #if defined(_WIN32)
 // Avoid including windows.h in a header; we only need a handful of
@@ -48,11 +51,11 @@ namespace details
 	// freely, subject to the following restrictions:
 	//
 	// 1. The origin of this software must not be misrepresented; you must not
-	//    claim that you wrote the original software. If you use this software
-	//    in a product, an acknowledgement in the product documentation would be
-	//    appreciated but is not required.
+	//	claim that you wrote the original software. If you use this software
+	//	in a product, an acknowledgement in the product documentation would be
+	//	appreciated but is not required.
 	// 2. Altered source versions must be plainly marked as such, and must not be
-	//    misrepresented as being the original software.
+	//	misrepresented as being the original software.
 	// 3. This notice may not be removed or altered from any source distribution.
 	namespace mpmc_sema
 	{
@@ -60,34 +63,46 @@ namespace details
 		class Semaphore
 		{
 		private:
-		    void* m_hSema;
-		    
-		    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-		    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+			void* m_hSema;
+			
+			Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+			Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
 
 		public:
-		    Semaphore(int initialCount = 0)
-		    {
-		        assert(initialCount >= 0);
-		        const long maxLong = 0x7fffffff;
-		        m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
-		    }
-
-		    ~Semaphore()
-		    {
-		        CloseHandle(m_hSema);
-		    }
-
-		    void wait()
-		    {
-		    	const unsigned long infinite = 0xffffffff;
-		        WaitForSingleObject(m_hSema, infinite);
-		    }
-
-		    void signal(int count = 1)
-		    {
-		        ReleaseSemaphore(m_hSema, count, nullptr);
-		    }
+			Semaphore(int initialCount = 0)
+			{
+				assert(initialCount >= 0);
+				const long maxLong = 0x7fffffff;
+				m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
+			}
+
+			~Semaphore()
+			{
+				CloseHandle(m_hSema);
+			}
+
+			void wait()
+			{
+				const unsigned long infinite = 0xffffffff;
+				WaitForSingleObject(m_hSema, infinite);
+			}
+			
+			bool try_wait()
+			{
+				const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
+				return WaitForSingleObject(m_hSema, 0) != RC_WAIT_TIMEOUT;
+			}
+			
+			bool timed_wait(std::uint64_t usecs)
+			{
+				const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
+				return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) != RC_WAIT_TIMEOUT;
+			}
+
+			void signal(int count = 1)
+			{
+				ReleaseSemaphore(m_hSema, count, nullptr);
+			}
 		};
 #elif defined(__MACH__)
 		//---------------------------------------------------------
@@ -97,40 +112,57 @@ namespace details
 		class Semaphore
 		{
 		private:
-		    semaphore_t m_sema;
+			semaphore_t m_sema;
 
-		    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-		    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+			Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+			Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
 
 		public:
-		    Semaphore(int initialCount = 0)
-		    {
-		        assert(initialCount >= 0);
-		        semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
-		    }
-
-		    ~Semaphore()
-		    {
-		        semaphore_destroy(mach_task_self(), m_sema);
-		    }
-
-		    void wait()
-		    {
-		        semaphore_wait(m_sema);
-		    }
-
-		    void signal()
-		    {
-		        semaphore_signal(m_sema);
-		    }
-
-		    void signal(int count)
-		    {
-		        while (count-- > 0)
-		        {
-		            semaphore_signal(m_sema);
-		        }
-		    }
+			Semaphore(int initialCount = 0)
+			{
+				assert(initialCount >= 0);
+				semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
+			}
+
+			~Semaphore()
+			{
+				semaphore_destroy(mach_task_self(), m_sema);
+			}
+
+			void wait()
+			{
+				semaphore_wait(m_sema);
+			}
+			
+			bool try_wait()
+			{
+				return timed_wait(0);
+			}
+			
+			bool timed_wait(std::uint64_t timeout_usecs)
+			{
+				mach_timespec_t ts;
+				ts.tv_sec = timeout_usecs / 1000000;
+				ts.tv_nsec = (timeout_usecs % 1000000) * 1000;
+
+				// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
+				kern_return_t rc = semaphore_timedwait(m_sema, ts);
+
+				return rc != KERN_OPERATION_TIMED_OUT;
+			}
+
+			void signal()
+			{
+				semaphore_signal(m_sema);
+			}
+
+			void signal(int count)
+			{
+				while (count-- > 0)
+				{
+					semaphore_signal(m_sema);
+				}
+			}
 		};
 #elif defined(__unix__)
 		//---------------------------------------------------------
@@ -139,46 +171,75 @@ namespace details
 		class Semaphore
 		{
 		private:
-		    sem_t m_sema;
+			sem_t m_sema;
 
-		    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-		    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+			Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+			Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
 
 		public:
-		    Semaphore(int initialCount = 0)
-		    {
-		        assert(initialCount >= 0);
-		        sem_init(&m_sema, 0, initialCount);
-		    }
-
-		    ~Semaphore()
-		    {
-		        sem_destroy(&m_sema);
-		    }
-
-		    void wait()
-		    {
-		        // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
-		        int rc;
-		        do
-		        {
-		            rc = sem_wait(&m_sema);
-		        }
-		        while (rc == -1 && errno == EINTR);
-		    }
-
-		    void signal()
-		    {
-		        sem_post(&m_sema);
-		    }
-
-		    void signal(int count)
-		    {
-		        while (count-- > 0)
-		        {
-		            sem_post(&m_sema);
-		        }
-		    }
+			Semaphore(int initialCount = 0)
+			{
+				assert(initialCount >= 0);
+				sem_init(&m_sema, 0, initialCount);
+			}
+
+			~Semaphore()
+			{
+				sem_destroy(&m_sema);
+			}
+
+			void wait()
+			{
+				// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
+				int rc;
+				do {
+					rc = sem_wait(&m_sema);
+				} while (rc == -1 && errno == EINTR);
+			}
+
+			bool try_wait()
+			{
+				int rc;
+				do {
+					rc = sem_trywait(&m_sema);
+				} while (rc == -1 && errno == EINTR);
+				return !(rc == -1 && errno == EAGAIN);
+			}
+
+			bool timed_wait(std::uint64_t usecs)
+			{
+				struct timespec ts;
+				const int usecs_in_1_sec = 1000000;
+				const int nsecs_in_1_sec = 1000000000;
+				clock_gettime(CLOCK_REALTIME, &ts);
+				ts.tv_sec += usecs / usecs_in_1_sec;
+				ts.tv_nsec += (usecs % usecs_in_1_sec) * 1000;
+				// sem_timedwait bombs if you have more than 1e9 in tv_nsec
+				// so we have to clean things up before passing it in
+				if (ts.tv_nsec > nsecs_in_1_sec) {
+					ts.tv_nsec -= nsecs_in_1_sec;
+					++ts.tv_sec;
+				}
+
+				int rc;
+				do {
+					rc = sem_timedwait(&m_sema, &ts);
+				} while (rc == -1 && errno == EINTR);
+				return !(rc == -1 && errno == ETIMEDOUT);
+			}
+
+			void signal()
+			{
+				sem_post(&m_sema);
+			}
+
+			void signal(int count)
+			{
+				while (count-- > 0)
+				{
+					sem_post(&m_sema);
+				}
+			}
 		};
 #else
 #error Unsupported platform! (No semaphore wrapper available)
@@ -191,119 +252,163 @@ namespace details
 		{
 		public:
 			typedef std::make_signed<std::size_t>::type ssize_t;
-			
+
 		private:
-		    std::atomic<ssize_t> m_count;
-		    Semaphore m_sema;
-
-		    void waitWithPartialSpinning()
-		    {
-		        ssize_t oldCount;
-		        // Is there a better way to set the initial spin count?
-		        // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
-		        // as threads start hitting the kernel semaphore.
-		        int spin = 10000;
-		        while (--spin >= 0)
-		        {
-		            oldCount = m_count.load(std::memory_order_relaxed);
-		            if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
-		                return;
-		            std::atomic_signal_fence(std::memory_order_acquire);     // Prevent the compiler from collapsing the loop.
-		        }
-		        oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
-		        if (oldCount <= 0)
-		        {
-		            m_sema.wait();
-		        }
-		    }
-
-		    ssize_t waitManyWithPartialSpinning(ssize_t max)
-		    {
-		    	assert(max > 0);
-		        ssize_t oldCount;
-		        int spin = 10000;
-		        while (--spin >= 0)
-		        {
-		            oldCount = m_count.load(std::memory_order_relaxed);
-		            if (oldCount > 0)
-	            	{
-	            		ssize_t newCount = oldCount > max ? oldCount - max : 0;
-			        	if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
-			        		return oldCount - newCount;
-		            }
-		            std::atomic_signal_fence(std::memory_order_acquire);
-		        }
-		        oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
-		        if (oldCount <= 0)
-		            m_sema.wait();
-		        if (max > 1)
-		        	return 1 + tryWaitMany(max - 1);
-		        return 1;
-		    }
+			std::atomic<ssize_t> m_count;
+			Semaphore m_sema;
+
+			bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
+			{
+				ssize_t oldCount;
+				// Is there a better way to set the initial spin count?
+				// If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
+				// as threads start hitting the kernel semaphore.
+				int spin = 10000;
+				while (--spin >= 0)
+				{
+					oldCount = m_count.load(std::memory_order_relaxed);
+					if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+						return true;
+					std::atomic_signal_fence(std::memory_order_acquire);	 // Prevent the compiler from collapsing the loop.
+				}
+				oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+				if (oldCount > 0)
+					return true;
+				if (timeout_usecs < 0)
+				{
+					m_sema.wait();
+					return true;
+				}
+				if (m_sema.timed_wait((std::uint64_t)timeout_usecs))
+					return true;
+				// At this point, we've timed out waiting for the semaphore, but the
+				// count is still decremented indicating we may still be waiting on
+				// it. So we have to re-adjust the count, but only if the semaphore
+				// wasn't signaled enough times for us too since then. If it was, we
+				// need to release the semaphore too.
+				while (true)
+				{
+					oldCount = m_count.load(std::memory_order_acquire);
+					if (oldCount >= 0 && m_sema.try_wait())
+						return true;
+					if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed))
+						return false;
+				}
+			}
+
+			ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1)
+			{
+				assert(max > 0);
+				ssize_t oldCount;
+				int spin = 10000;
+				while (--spin >= 0)
+				{
+					oldCount = m_count.load(std::memory_order_relaxed);
+					if (oldCount > 0)
+					{
+						ssize_t newCount = oldCount > max ? oldCount - max : 0;
+						if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+							return oldCount - newCount;
+					}
+					std::atomic_signal_fence(std::memory_order_acquire);
+				}
+				oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+				if (oldCount <= 0)
+				{
+					if (timeout_usecs < 0)
+						m_sema.wait();
+					else if (!m_sema.timed_wait((std::uint64_t)timeout_usecs))
+					{
+						while (true)
+						{
+							oldCount = m_count.load(std::memory_order_acquire);
+							if (oldCount >= 0 && m_sema.try_wait())
+								break;
+							if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed))
+								return 0;
+						}
+					}
+				}
+				if (max > 1)
+					return 1 + tryWaitMany(max - 1);
+				return 1;
+			}
 
 		public:
-		    LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
-		    {
-		        assert(initialCount >= 0);
-		    }
-
-		    bool tryWait()
-		    {
-		        ssize_t oldCount = m_count.load(std::memory_order_relaxed);
-		        while (oldCount > 0)
-		        {
-		        	if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
-		        		return true;
-		        }
-		        return false;
-		    }
-
-		    void wait()
-		    {
-		        if (!tryWait())
-		            waitWithPartialSpinning();
-		    }
-
-		    // Acquires between 0 and (greedily) max, inclusive
-		    ssize_t tryWaitMany(ssize_t max)
-		    {
-		    	assert(max >= 0);
-		    	ssize_t oldCount = m_count.load(std::memory_order_relaxed);
-		        while (oldCount > 0)
-		        {
-		        	ssize_t newCount = oldCount > max ? oldCount - max : 0;
-		        	if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
-		        		return oldCount - newCount;
-		        }
-		        return 0;
-		    }
-
-		    // Acquires at least one, and (greedily) at most max
-		    ssize_t waitMany(ssize_t max)
-		    {
-		    	assert(max >= 0);
-		    	ssize_t result = tryWaitMany(max);
-		    	if (result == 0 && max > 0)
-		            result = waitManyWithPartialSpinning(max);
-		        return result;
-		    }
-
-		    void signal(ssize_t count = 1)
-		    {
-		    	assert(count >= 0);
-		        ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
-		        ssize_t toRelease = -oldCount < count ? -oldCount : count;
-		        if (toRelease > 0)
-		        {
-		            m_sema.signal((int)toRelease);
-		        }
-		    }
-		    
-		    ssize_t availableApprox() const
-		    {
-		    	ssize_t count = m_count.load(std::memory_order_relaxed);
-		    	return count > 0 ? count : 0;
-		    }
+			LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
+			{
+				assert(initialCount >= 0);
+			}
+
+			bool tryWait()
+			{
+				ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+				while (oldCount > 0)
+				{
+					if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+						return true;
+				}
+				return false;
+			}
+
+			void wait()
+			{
+				if (!tryWait())
+					waitWithPartialSpinning();
+			}
+
+			bool wait(std::int64_t timeout_usecs)
+			{
+				return tryWait() || waitWithPartialSpinning(timeout_usecs);
+			}
+
+			// Acquires between 0 and (greedily) max, inclusive
+			ssize_t tryWaitMany(ssize_t max)
+			{
+				assert(max >= 0);
+				ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+				while (oldCount > 0)
+				{
+					ssize_t newCount = oldCount > max ? oldCount - max : 0;
+					if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+						return oldCount - newCount;
+				}
+				return 0;
+			}
+
+			// Acquires at least one, and (greedily) at most max
+			ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs)
+			{
+				assert(max >= 0);
+				ssize_t result = tryWaitMany(max);
+				if (result == 0 && max > 0)
+					result = waitManyWithPartialSpinning(max, timeout_usecs);
+				return result;
+			}
+			
+			ssize_t waitMany(ssize_t max)
+			{
+				ssize_t result = waitMany(max, -1);
+				assert(result > 0);
+				return result;
+			}
+
+			void signal(ssize_t count = 1)
+			{
+				assert(count >= 0);
+				ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
+				ssize_t toRelease = -oldCount < count ? -oldCount : count;
+				if (toRelease > 0)
+				{
+					m_sema.signal((int)toRelease);
+				}
+			}
+			
+			ssize_t availableApprox() const
+			{
+				ssize_t count = m_count.load(std::memory_order_relaxed);
+				return count > 0 ? count : 0;
+			}
 		};
 	}	// end namespace mpmc_sema
 }	// end namespace details
@@ -654,6 +759,35 @@ public:
 			continue;
 		}
 	}
+
+	// Blocks the current thread until either there's something to dequeue
+	// or the timeout (specified in microseconds) expires. Returns false
+	// without setting `item` if the timeout expires, otherwise assigns
+	// to `item` and returns true.
+	// Using a negative timeout indicates an indefinite timeout,
+	// and is thus functionally equivalent to calling wait_dequeue.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs)
+	{
+		if (!sema->wait(timeout_usecs)) {
+			return false;
+		}
+		while (!inner.try_dequeue(item)) {
+			continue;
+		}
+		return true;
+	}
+    
+    // Blocks the current thread until either there's something to dequeue
+	// or the timeout expires. Returns false without setting `item` if the
+    // timeout expires, otherwise assigns to `item` and returns true.
+	// Never allocates. Thread-safe.
+	template<typename U, typename Rep, typename Period>
+	inline bool wait_dequeue_timed(U& item, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_timed(item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
 	
 	// Blocks the current thread until there's something to dequeue, then
 	// dequeues it using an explicit consumer token.
@@ -667,6 +801,35 @@ public:
 		}
 	}
 	
+	// Blocks the current thread until either there's something to dequeue
+	// or the timeout (specified in microseconds) expires. Returns false
+	// without setting `item` if the timeout expires, otherwise assigns
+	// to `item` and returns true.
+	// Using a negative timeout indicates an indefinite timeout,
+	// and is thus functionally equivalent to calling wait_dequeue.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs)
+	{
+		if (!sema->wait(timeout_usecs)) {
+			return false;
+		}
+		while (!inner.try_dequeue(token, item)) {
+			continue;
+		}
+		return true;
+	}
+    
+    // Blocks the current thread until either there's something to dequeue
+	// or the timeout expires. Returns false without setting `item` if the
+    // timeout expires, otherwise assigns to `item` and returns true.
+	// Never allocates. Thread-safe.
+	template<typename U, typename Rep, typename Period>
+	inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_timed(token, item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+	
 	// Attempts to dequeue several elements from the queue.
 	// Returns the number of items actually dequeued, which will
 	// always be at least one (this method blocks until the queue
@@ -683,6 +846,35 @@ public:
 		return count;
 	}
 	
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued, which can
+	// be 0 if the timeout expires while waiting for elements,
+	// and at most max.
+	// Using a negative timeout indicates an indefinite timeout,
+	// and is thus functionally equivalent to calling wait_dequeue_bulk.
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs)
+	{
+		size_t count = 0;
+		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+		}
+		return count;
+	}
+    
+    // Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued, which can
+	// be 0 if the timeout expires while waiting for elements,
+	// and at most max.
+	// Never allocates. Thread-safe.
+	template<typename It, typename Rep, typename Period>
+	inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_bulk_timed<It&>(itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+	
 	// Attempts to dequeue several elements from the queue using an explicit consumer token.
 	// Returns the number of items actually dequeued, which will
 	// always be at least one (this method blocks until the queue
@@ -699,6 +891,35 @@ public:
 		return count;
 	}
 	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued, which can
+	// be 0 if the timeout expires while waiting for elements,
+	// and at most max.
+	// Using a negative timeout indicates an indefinite timeout,
+	// and is thus functionally equivalent to calling wait_dequeue_bulk.
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::int64_t timeout_usecs)
+	{
+		size_t count = 0;
+		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+		}
+		return count;
+	}
+	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued, which can
+	// be 0 if the timeout expires while waiting for elements,
+	// and at most max.
+	// Never allocates. Thread-safe.
+	template<typename It, typename Rep, typename Period>
+	inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_bulk_timed<It&>(token, itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+	
 	
 	// Returns an estimate of the total number of elements currently in the queue. This
 	// estimate is only accurate if the queue has completely stabilized before it is called
diff --git a/include/concurrentqueue.h b/include/concurrentqueue.h
index 6983974..2eb54d6 100644
--- a/include/concurrentqueue.h
+++ b/include/concurrentqueue.h
@@ -5,7 +5,7 @@
 //    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
 
 // Simplified BSD license:
-// Copyright (c) 2013-2015, Cameron Desrochers.
+// Copyright (c) 2013-2016, Cameron Desrochers.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -42,6 +42,10 @@
 #endif
 #endif
 
+#if defined(__APPLE__)
+#include "TargetConditionals.h"
+#endif
+
 #ifdef MCDBGQ_USE_RELACY
 #include "relacy/relacy_std.hpp"
 #include "relacy_shims.h"
@@ -55,6 +59,7 @@
 #include <atomic>		// Requires C++11. Sorry VS2010.
 #include <cassert>
 #endif
+#include <cstddef>              // for max_align_t
 #include <cstdint>
 #include <cstdlib>
 #include <type_traits>
@@ -63,9 +68,16 @@
 #include <limits>
 #include <climits>		// for CHAR_BIT
 #include <array>
-#include <thread>		// for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
+#include <thread>		// partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
 
 // Platform-specific definitions of a numeric thread ID type and an invalid value
+namespace moodycamel { namespace details {
+	template<typename thread_id_t> struct thread_id_converter {
+		typedef thread_id_t thread_id_numeric_size_t;
+		typedef thread_id_t thread_id_hash_t;
+		static thread_id_hash_t prehash(thread_id_t const& x) { return x; }
+	};
+} }
 #if defined(MCDBGQ_USE_RELACY)
 namespace moodycamel { namespace details {
 	typedef std::uint32_t thread_id_t;
@@ -84,6 +96,40 @@ namespace moodycamel { namespace details {
 	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
 	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
 } }
+#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE)
+namespace moodycamel { namespace details {
+	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
+	
+	typedef std::thread::id thread_id_t;
+	static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID
+
+	// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
+	// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
+	// be.
+	static inline thread_id_t thread_id() { return std::this_thread::get_id(); }
+
+	template<std::size_t> struct thread_id_size { };
+	template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };
+	template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };
+
+	template<> struct thread_id_converter<thread_id_t> {
+		typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;
+#ifndef __APPLE__
+		typedef std::size_t thread_id_hash_t;
+#else
+		typedef thread_id_numeric_size_t thread_id_hash_t;
+#endif
+
+		static thread_id_hash_t prehash(thread_id_t const& x)
+		{
+#ifndef __APPLE__
+			return std::hash<std::thread::id>()(x);
+#else
+			return *reinterpret_cast<thread_id_hash_t const*>(&x);
+#endif
+		}
+	};
+} }
 #else
 // Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
 // In order to get a numeric thread ID in a platform-independent way, we use a thread-local
@@ -146,12 +192,13 @@ namespace moodycamel { namespace details {
 #ifdef MCDBGQ_USE_RELACY
 #define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 #else
-//// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
-//// g++ <=4.7 doesn't support thread_local either
-//#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
-//// Assume `thread_local` is fully supported in all other C++11 compilers/runtimes
-//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-//#endif
+// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
+// g++ <=4.7 doesn't support thread_local either.
+// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
+// Assume `thread_local` is fully supported in all other C++11 compilers/platforms
+//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // always disabled for now since several users report having problems with it on
+#endif
 #endif
 #endif
 
@@ -189,6 +236,12 @@ namespace details {
 			? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1)
 			: static_cast<T>(-1);
 	};
+
+#if (defined(__GNUC__) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)))
+	typedef ::max_align_t max_align_t;      // GCC forgot to add it to std:: for a while
+#else
+	typedef std::max_align_t max_align_t;   // Others (e.g. MSVC) insist it can *only* be accessed via std::
+#endif
 }
 
 // Default traits for the ConcurrentQueue. To change some of the
@@ -301,7 +354,7 @@ namespace details
 		ProducerToken* token;
 		
 		ConcurrentQueueProducerTypelessBase()
-			: inactive(false), token(nullptr)
+			: next(nullptr), inactive(false), token(nullptr)
 		{
 		}
 	};
@@ -335,7 +388,8 @@ namespace details
 	static inline size_t hash_thread_id(thread_id_t id)
 	{
 		static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
-		return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_t)>::hash(id));
+		return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
+			thread_id_converter<thread_id_t>::prehash(id)));
 	}
 	
 	template<typename T>
@@ -889,7 +943,7 @@ public:
 	bool enqueue_bulk(It itemFirst, size_t count)
 	{
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue_bulk<CanAlloc>(std::forward<It>(itemFirst), count);
+		return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
 	}
 	
 	// Enqueues several items using an explicit producer token.
@@ -901,7 +955,7 @@ public:
 	template<typename It>
 	bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
 	{
-		return inner_enqueue_bulk<CanAlloc>(token, std::forward<It>(itemFirst), count);
+		return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
 	}
 	
 	// Enqueues a single item (by copying it).
@@ -953,7 +1007,7 @@ public:
 	bool try_enqueue_bulk(It itemFirst, size_t count)
 	{
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue_bulk<CannotAlloc>(std::forward<It>(itemFirst), count);
+		return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
 	}
 	
 	// Enqueues several items using an explicit producer token.
@@ -964,7 +1018,7 @@ public:
 	template<typename It>
 	bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
 	{
-		return inner_enqueue_bulk<CannotAlloc>(token, std::forward<It>(itemFirst), count);
+		return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
 	}
 	
 	
@@ -1102,7 +1156,7 @@ public:
 	{
 		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
 			if (!update_current_producer_after_rotation(token)) {
-				return false;
+				return 0;
 			}
 		}
 		
@@ -1195,7 +1249,7 @@ public:
 			details::static_is_lock_free<std::uint32_t>::value == 2 &&
 			details::static_is_lock_free<index_t>::value == 2 &&
 			details::static_is_lock_free<void*>::value == 2 &&
-			details::static_is_lock_free<details::thread_id_t>::value == 2;
+			details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value == 2;
 	}
 
 
@@ -1228,14 +1282,14 @@ private:
 	template<AllocationMode canAlloc, typename It>
 	inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
 	{
-		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(std::forward<It>(itemFirst), count);
+		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
 	}
 	
 	template<AllocationMode canAlloc, typename It>
 	inline bool inner_enqueue_bulk(It itemFirst, size_t count)
 	{
 		auto producer = get_or_add_implicit_producer();
-		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(std::forward<It>(itemFirst), count);
+		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
 	}
 	
 	inline bool update_current_producer_after_rotation(consumer_token_t& token)
@@ -1408,7 +1462,7 @@ private:
 	struct Block
 	{
 		Block()
-			: elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true)
+			: next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true)
 		{
 #if MCDBGQ_TRACKMEM
 			owner = nullptr;
@@ -1512,15 +1566,28 @@ private:
 			}
 		}
 		
-		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return reinterpret_cast<T*>(elements) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
-		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return reinterpret_cast<T const*>(elements) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
 		
+	private:
+		// IMPORTANT: This must be the first member in Block, so that if T depends on the alignment of
+		// addresses returned by malloc, that alignment will be preserved. Apparently clang actually
+		// generates code that uses this assumption for AVX instructions in some cases. Ideally, we
+		// should also align Block to the alignment of T in case it's higher than malloc's 16-byte
+		// alignment, but this is hard to do in a cross-platform way. Assert for this case:
+		static_assert(std::alignment_of<T>::value <= std::alignment_of<details::max_align_t>::value, "The queue does not support super-aligned types at this time");
+		// Additionally, we need the alignment of Block itself to be a multiple of max_align_t since
+		// otherwise the appropriate padding will not be added at the end of Block in order to make
+		// arrays of Blocks all be properly aligned (not just the first one). We use a union to force
+		// this.
+		union {
+			char elements[sizeof(T) * BLOCK_SIZE];
+			details::max_align_t dummy;
+		};
 	public:
 		Block* next;
 		std::atomic<size_t> elementsCompletelyDequeued;
 		std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];
-	private:
-		char elements[sizeof(T) * BLOCK_SIZE];
 	public:
 		std::atomic<std::uint32_t> freeListRefs;
 		std::atomic<Block*> freeListNext;
@@ -1531,6 +1598,7 @@ private:
 		void* owner;
 #endif
 	};
+	static_assert(std::alignment_of<Block>::value >= std::alignment_of<details::max_align_t>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
 
 
 #if MCDBGQ_TRACKMEM
@@ -1677,11 +1745,14 @@ private:
 			if (this->tailBlock != nullptr) {
 				auto block = this->tailBlock;
 				do {
-					auto next = block->next;
+					auto nextBlock = block->next;
 					if (block->dynamicallyAllocated) {
 						destroy(block);
 					}
-					block = next;
+					else {
+						this->parent->add_block_to_free_list(block);
+					}
+					block = nextBlock;
 				} while (block != this->tailBlock);
 			}
 			
@@ -2289,9 +2360,9 @@ private:
 			bool forceFreeLastBlock = index != tail;		// If we enter the loop, then the last (tail) block will not be freed
 			while (index != tail) {
 				if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {
-					if (block != nullptr && block->dynamicallyAllocated) {
+					if (block != nullptr) {
 						// Free the old block
-						this->parent->destroy(block);
+						this->parent->add_block_to_free_list(block);
 					}
 					
 					block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
@@ -2303,8 +2374,8 @@ private:
 			// Even if the queue is empty, there's still one block that's not on the free list
 			// (unless the head index reached the end of it, in which case the tail will be poised
 			// to create a new block).
-			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) && this->tailBlock->dynamicallyAllocated) {
-				this->parent->destroy(this->tailBlock);
+			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
+				this->parent->add_block_to_free_list(this->tailBlock);
 			}
 			
 			// Destroy block index
@@ -3123,7 +3194,7 @@ private:
 		std::atomic<details::thread_id_t> key;
 		ImplicitProducer* value;		// No need for atomicity since it's only read by the thread that sets it in the first place
 		
-		ImplicitProducerKVP() { }
+		ImplicitProducerKVP() : value(nullptr) { }
 		
 		ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
 		{
diff --git a/include/cuckoohash_config.hh b/include/cuckoohash_config.hh
index fa6ca4d..bfa091a 100644
--- a/include/cuckoohash_config.hh
+++ b/include/cuckoohash_config.hh
@@ -11,6 +11,12 @@ const size_t DEFAULT_SLOT_PER_BUCKET = 4;
 //! The default number of elements in an empty hash table
 const size_t DEFAULT_SIZE = (1U << 16) * DEFAULT_SLOT_PER_BUCKET;
 
+//! On a scale of 0 to 16, the memory granularity of the locks array. 0 is the
+//! least granular, meaning the array is a contiguous array and thus offers the
+//! best performance but the greatest memory overhead. 16 is the most granular,
+//! offering the least memory overhead but worse performance.
+const size_t LOCK_ARRAY_GRANULARITY = 0;
+
 //! The default minimum load factor that the table allows for automatic
 //! expansion. It must be a number between 0.0 and 1.0. The table will throw
 //! libcuckoo_load_factor_too_low if the load factor falls below this value
diff --git a/include/cuckoohash_map.hh b/include/cuckoohash_map.hh
index 3ef2db4..fdc0735 100644
--- a/include/cuckoohash_map.hh
+++ b/include/cuckoohash_map.hh
@@ -23,12 +23,12 @@
 #include <thread>
 #include <tuple>
 #include <type_traits>
-#include <unistd.h>
 #include <utility>
 #include <vector>
 
 #include "cuckoohash_config.hh"
 #include "cuckoohash_util.hh"
+#include "lazy_array.hh"
 #include "default_hasher.hh"
 
 //! cuckoohash_map is the hash table class.
@@ -145,13 +145,13 @@ private:
 
     // number of cores on the machine
     static size_t kNumCores() {
-        static size_t cores = std::thread::hardware_concurrency() == 0 ?
-            sysconf(_SC_NPROCESSORS_ONLN) : std::thread::hardware_concurrency();
+        static size_t cores = std::thread::hardware_concurrency();
         return cores;
     }
 
     // A fast, lightweight spinlock
-    class spinlock {
+    LIBCUCKOO_SQUELCH_PADDING_WARNING
+    class LIBCUCKOO_ALIGNAS(64) spinlock {
         std::atomic_flag lock_;
     public:
         spinlock() {
@@ -170,7 +170,7 @@ private:
             return !lock_.test_and_set(std::memory_order_acquire);
         }
 
-    } __attribute__((aligned(64)));
+    };
 
     typedef enum {
         ok,
@@ -185,13 +185,16 @@ private:
     // a occupied bitset, which indicates whether the slot at the given bit
     // index is in the table or not. It uses aligned_storage arrays to store the
     // keys and values to allow constructing and destroying key-value pairs in
-    // place.
+    // place. Internally, the values are stored without the const qualifier in
+    // the key, to enable modifying bucket memory.
+    typedef std::pair<Key, T> storage_value_type;
     class Bucket {
     private:
         std::array<partial_t, slot_per_bucket> partials_;
         std::bitset<slot_per_bucket> occupied_;
         std::array<typename std::aligned_storage<
-                       sizeof(value_type), alignof(value_type)>::type,
+                       sizeof(storage_value_type),
+                       alignof(storage_value_type)>::type,
                    slot_per_bucket> kvpairs_;
 
     public:
@@ -213,8 +216,13 @@ private:
                 static_cast<void*>(&kvpairs_[ind]));
         }
 
+        storage_value_type& storage_kvpair(size_t ind) {
+            return *static_cast<storage_value_type*>(
+                static_cast<void*>(&kvpairs_[ind]));
+        }
+
         bool occupied(size_t ind) const {
-            return occupied_.test(ind);
+            return occupied_[ind];
         }
 
         const key_type& key(size_t ind) const {
@@ -229,15 +237,19 @@ private:
             return kvpair(ind).second;
         }
 
-        template <class... Args>
-        void setKV(size_t ind, Args&&... args) {
+        template <typename K, typename... Args>
+        void setKV(size_t ind, K&& k, Args&&... args) {
             static allocator_type pair_allocator;
-            occupied_.set(ind);
-            pair_allocator.construct(&kvpair(ind), std::forward<Args>(args)...);
+            occupied_[ind] = true;
+            pair_allocator.construct(
+                &storage_kvpair(ind),
+                std::piecewise_construct,
+                std::forward_as_tuple(std::forward<K>(k)),
+                std::forward_as_tuple(std::forward<Args>(args)...));
         }
 
         void eraseKV(size_t ind) {
-            occupied_.reset(ind);
+            occupied_[ind] = false;
             (&kvpair(ind))->~value_type();
         }
 
@@ -249,13 +261,40 @@ private:
             }
         }
 
-        ~Bucket() {
-            clear();
+        // Moves the item in b1[slot1] into b2[slot2] without copying
+        static void move_to_bucket(
+            Bucket& b1, size_t slot1,
+            Bucket& b2, size_t slot2) {
+            assert(b1.occupied(slot1));
+            assert(!b2.occupied(slot2));
+            storage_value_type& tomove = b1.storage_kvpair(slot1);
+            b2.setKV(slot2, std::move(tomove.first), std::move(tomove.second));
+            b2.partial(slot2) = b1.partial(slot1);
+            b1.occupied_.reset(slot1);
+            b2.occupied_.set(slot2);
         }
     };
 
+    // The type of the buckets container
+    typedef std::vector<
+        Bucket, typename allocator_type::template rebind<Bucket>::other>
+    buckets_t;
+
+    // The type of the locks container
+    static_assert(LOCK_ARRAY_GRANULARITY >= 0 && LOCK_ARRAY_GRANULARITY <= 16,
+                  "LOCK_ARRAY_GRANULARITY constant must be between 0 and 16,"
+                  " inclusive");
+    typedef lazy_array<
+        16 - LOCK_ARRAY_GRANULARITY, LOCK_ARRAY_GRANULARITY,
+        spinlock,
+        typename allocator_type::template rebind<spinlock>::other> locks_t;
+
+    // The type of the expansion lock
+    typedef std::mutex expansion_lock_t;
+
     // cacheint is a cache-aligned atomic integer type.
-    struct cacheint {
+    LIBCUCKOO_SQUELCH_PADDING_WARNING
+    struct LIBCUCKOO_ALIGNAS(64) cacheint {
         std::atomic<size_t> num;
         cacheint(): num(0) {}
         cacheint(size_t x): num(x) {}
@@ -269,14 +308,7 @@ private:
             num = x.num.load();
             return *this;
         }
-    } __attribute__((aligned(64)));
-
-
-    // 2**hashpower is the number of buckets. This cannot be changed unless all
-    // the locks are taken on the table. Since it is still read and written by
-    // multiple threads not necessarily synchronized by a lock, we keep it
-    // atomic
-    std::atomic<size_t> hashpower_;
+    };
 
     // Helper methods to read and write hashpower_ with the correct memory
     // barriers
@@ -288,41 +320,11 @@ private:
         hashpower_.store(val, std::memory_order_release);
     }
 
-    // vector of buckets. The size or memory location of the buckets cannot be
-    // changed unless al the locks are taken on the table. Thus, it is only safe
-    // to access the buckets_ vector when you have at least one lock held.
-    typedef std::vector<
-        Bucket, typename allocator_type::template rebind<Bucket>::other>
-    buckets_t;
-    buckets_t buckets_;
-
-    // array of locks. marked mutable, so that const methods can take locks.
-    // Even though it's a vector, it should not ever change in size after the
-    // initial allocation.
-    typedef std::vector<spinlock> locks_t;
-    mutable locks_t locks_;
-
-    // per-core counters for the number of inserts and deletes
-    std::vector<
-        cacheint, typename allocator_type::template rebind<cacheint>::other>
-    num_inserts_, num_deletes_;
-
-    // stores the minimum load factor allowed for automatic expansions. Whenever
-    // an automatic expansion is triggered (during an insertion where cuckoo
-    // hashing fails, for example), we check the load factor against this
-    // double, and throw an exception if it's lower than this value. It can be
-    // used to signal when the hash function is bad or the input adversarial.
-    std::atomic<double> minimum_load_factor_;
-
-    // stores the maximum hashpower allowed for any expansions. If set to
-    // NO_MAXIMUM_HASHPOWER, this limit will be disregarded.
-    std::atomic<size_t> maximum_hashpower_;
-
     // get_counterid returns the counterid for the current thread.
     static inline int get_counterid() {
         // counterid stores the per-thread counter index of each thread. Each
         // counter value corresponds to a core on the machine.
-        static __thread int counterid = -1;
+        static LIBCUCKOO_THREAD_LOCAL int counterid = -1;
 
         if (counterid < 0) {
             counterid = rand() % kNumCores();
@@ -332,23 +334,22 @@ private:
 
     // reserve_calc takes in a parameter specifying a certain number of slots
     // for a table and returns the smallest hashpower that will hold n elements.
-    static size_t reserve_calc(size_t n) {
-        double nhd = ceil(log2((double)n / (double)slot_per_bucket));
-        size_t new_hp = (size_t) (nhd <= 0 ? 1.0 : nhd);
-        assert(n <= hashsize(new_hp) * slot_per_bucket);
-        return new_hp;
-    }
-
-    // hashfn returns an instance of the hash function
-    static hasher hashfn() {
-        static hasher hash;
-        return hash;
-    }
-
-    // eqfn returns an instance of the equality predicate
-    static key_equal eqfn() {
-        static key_equal eq;
-        return eq;
+    static size_t reserve_calc(const size_t n) {
+        size_t buckets = (n + slot_per_bucket - 1) / slot_per_bucket;
+        size_t blog2;
+        if (buckets <= 1) {
+            blog2 = 1;
+        } else {
+            blog2 = 0;
+            for (size_t bcounter = buckets; bcounter > 1; bcounter >>= 1) {
+                ++blog2;
+            }
+            if (hashsize(blog2) < buckets) {
+                ++blog2;
+            }
+        }
+        assert(n <= hashsize(blog2) * slot_per_bucket);
+        return blog2;
     }
 
 public:
@@ -365,8 +366,10 @@ public:
      */
     cuckoohash_map(size_t n = DEFAULT_SIZE,
                    double mlf = DEFAULT_MINIMUM_LOAD_FACTOR,
-                   size_t mhp = NO_MAXIMUM_HASHPOWER)
-        : locks_(kNumLocks) {
+                   size_t mhp = NO_MAXIMUM_HASHPOWER,
+                   const hasher& hf = hasher(),
+                   const key_equal eql = key_equal())
+        : hash_fn(hf), eq_fn(eql) {
         minimum_load_factor(mlf);
         maximum_hashpower(mhp);
         size_t hp = reserve_calc(n);
@@ -377,10 +380,15 @@ public:
         }
         set_hashpower(hp);
         buckets_.resize(hashsize(hp));
+        locks_.allocate(std::min(locks_t::size(), hashsize(hp)));
         num_inserts_.resize(kNumCores(), 0);
         num_deletes_.resize(kNumCores(), 0);
     }
 
+    ~cuckoohash_map() {
+        cuckoo_clear();
+    }
+
     //! clear removes all the elements in the hash table, calling their
     //! destructors.
     void clear() noexcept {
@@ -436,14 +444,14 @@ public:
                 "load factor " + std::to_string(mlf) + " cannot be "
                 " greater than 1");
         }
-        minimum_load_factor_ = mlf;
+        minimum_load_factor_.store(mlf, std::memory_order_release);
     }
 
     /**
      * @return the minimum load factor of the table
      */
     double minimum_load_factor() noexcept {
-        return minimum_load_factor_;
+        return minimum_load_factor_.load(std::memory_order_acquire);
     }
 
     /**
@@ -453,32 +461,29 @@ public:
      * @param mhp the hashpower to set the maximum to
      */
     void maximum_hashpower(size_t mhp) noexcept {
-        maximum_hashpower_ = mhp;
+        maximum_hashpower_.store(mhp, std::memory_order_release);
     }
 
     /**
      * @return the maximum hashpower of the table
      */
     size_t maximum_hashpower() noexcept {
-        return maximum_hashpower_;
+        return maximum_hashpower_.load(std::memory_order_acquire);
     }
 
     //! find searches through the table for \p key, and stores the associated
-    //! value it finds in \p val.
-    ENABLE_IF(, value_copy_assignable, bool)
-    find(const key_type& key, mapped_type& val) const {
+    //! value it finds in \p val. must be copy assignable.
+    bool find(const key_type& key, mapped_type& val) const {
         size_t hv = hashed_key(key);
         auto b = snapshot_and_lock_two(hv);
-        const cuckoo_status st = cuckoo_find(key, val, hv, b.first, b.second);
-        unlock_two(b.first, b.second);
+        const cuckoo_status st = cuckoo_find(key, val, hv, b.i[0], b.i[1]);
         return (st == ok);
     }
 
     //! This version of find does the same thing as the two-argument version,
     //! except it returns the value it finds, throwing an \p std::out_of_range
     //! exception if the key isn't in the table.
-    ENABLE_IF(, value_copy_assignable, mapped_type)
-    find(const key_type& key) const {
+    mapped_type find(const key_type& key) const {
         mapped_type val;
         bool done = find(key, val);
         if (done) {
@@ -493,8 +498,7 @@ public:
     bool contains(const key_type& key) const {
         size_t hv = hashed_key(key);
         auto b = snapshot_and_lock_two(hv);
-        const bool result = cuckoo_contains(key, hv, b.first, b.second);
-        unlock_two(b.first, b.second);
+        const bool result = cuckoo_contains(key, hv, b.i[0], b.i[1]);
         return result;
     }
 
@@ -511,9 +515,10 @@ public:
      * @throw libcuckoo_maximum_hashpower_exceeded if expansion is required
      * beyond the maximum hash power, if one was set
      */
-    template <class V>
-    bool insert(const key_type& key, V&& val) {
-        return cuckoo_insert_loop(key, std::forward<V>(val), hashed_key(key));
+    template <typename K, typename... Args>
+    bool insert(K&& key, Args&&... val) {
+        return cuckoo_insert_loop(hashed_key(key), std::forward<K>(key),
+                                  std::forward<Args>(val)...);
     }
 
     //! erase removes \p key and it's associated value from the table, calling
@@ -522,20 +527,18 @@ public:
     bool erase(const key_type& key) {
         size_t hv = hashed_key(key);
         auto b = snapshot_and_lock_two(hv);
-        const cuckoo_status st = cuckoo_delete(key, hv, b.first, b.second);
-        unlock_two(b.first, b.second);
+        const cuckoo_status st = cuckoo_delete(key, hv, b.i[0], b.i[1]);
         return (st == ok);
     }
 
     //! update changes the value associated with \p key to \p val. If \p key is
     //! not there, it returns false, otherwise it returns true.
-    ENABLE_IF(, value_copy_assignable, bool)
-    update(const key_type& key, const mapped_type& val) {
+    template <typename V>
+    bool update(const key_type& key, V&& val) {
         size_t hv = hashed_key(key);
         auto b = snapshot_and_lock_two(hv);
-        const cuckoo_status st = cuckoo_update(key, val, hv, b.first,
-                                               b.second);
-        unlock_two(b.first, b.second);
+        const cuckoo_status st = cuckoo_update(hv, b.i[0], b.i[1],
+                                               key, std::forward<V>(val));
         return (st == ok);
     }
 
@@ -549,9 +552,7 @@ public:
         bool>::type update_fn(const key_type& key, Updater fn) {
         size_t hv = hashed_key(key);
         auto b = snapshot_and_lock_two(hv);
-        const cuckoo_status st = cuckoo_update_fn(key, fn, hv, b.first,
-                                                  b.second);
-        unlock_two(b.first, b.second);
+        const cuckoo_status st = cuckoo_update_fn(key, fn, hv, b.i[0], b.i[1]);
         return (st == ok);
     }
 
@@ -560,18 +561,17 @@ public:
     //! table, then it runs an insert with \p key and \p val. It will always
     //! succeed, since if the update fails and the insert finds the key already
     //! inserted, it can retry the update.
-    template <typename Updater, typename V>
+    template <typename Updater, typename K, typename... Args>
     typename std::enable_if<
         std::is_convertible<Updater, updater_type>::value,
-        void>::type upsert(const key_type& key, Updater fn, V val) {
+        void>::type upsert(K&& key, Updater fn, Args&&... val) {
         size_t hv = hashed_key(key);
         cuckoo_status st;
         do {
             auto b = snapshot_and_lock_two(hv);
             size_t hp = get_hashpower();
-            st = cuckoo_update_fn(key, fn, hv, b.first, b.second);
+            st = cuckoo_update_fn(key, fn, hv, b.i[0], b.i[1]);
             if (st == ok) {
-                unlock_two(b.first, b.second);
                 break;
             }
 
@@ -579,12 +579,13 @@ public:
             // the locks, we don't run cuckoo_insert_loop immediately, to avoid
             // releasing and re-grabbing the locks. Recall, that the locks will
             // be released at the end of this call to cuckoo_insert.
-            st = cuckoo_insert(key, std::forward<V>(val), hv,
-                               b.first, b.second);
+            st = cuckoo_insert(hv, std::move(b), std::forward<K>(key),
+                               std::forward<Args>(val)...);
             if (st == failure_table_full) {
-                cuckoo_expand_simple(hp + 1, true);
+                cuckoo_fast_double(hp);
                 // Retry until the insert doesn't fail due to expansion.
-                if (cuckoo_insert_loop(key, val, hv)) {
+                if (cuckoo_insert_loop(hv, std::forward<K>(key),
+                                       std::forward<Args>(val)...)) {
                     break;
                 }
                 // The only valid reason for failure is a duplicate key. In this
@@ -634,12 +635,12 @@ public:
 
     //! hash_function returns the hash function object used by the table.
     hasher hash_function() const noexcept {
-        return hashfn();
+        return hash_fn;
     }
 
     //! key_eq returns the equality predicate object used by the table.
     key_equal key_eq() const noexcept {
-        return eqfn();
+        return eq_fn;
     }
 
     //! Returns a \ref reference to the mapped value stored at the given key.
@@ -658,6 +659,88 @@ public:
 
 private:
 
+    template <size_t N>
+    struct BucketContainer {
+        static_assert(N >= 1 && N <= 3, "BucketContainer should only be used"
+                      " for between 1 and 3 locks");
+        const cuckoohash_map* map;
+        std::array<size_t, N> i;
+
+        BucketContainer() : map(nullptr), i() {}
+
+        template <typename... Args>
+        BucketContainer(const cuckoohash_map* _map, Args&&... inds)
+            : map(_map), i{{inds...}} {}
+
+        BucketContainer(const cuckoohash_map* _map, std::array<size_t, N> _i)
+            : map(_map), i(_i) {}
+
+        BucketContainer(const BucketContainer&) = delete;
+        BucketContainer& operator=(const BucketContainer&) = delete;
+
+        // Moving will not invalidate the bucket bucket indices
+        BucketContainer(BucketContainer&& bp) {
+            *this = std::move(bp);
+        }
+
+        BucketContainer& operator=(BucketContainer&& bp) {
+            map = bp.map;
+            i = bp.i;
+            bp.map = nullptr;
+            return *this;
+        }
+
+        void release() {
+            this->~BucketContainer();
+            map = nullptr;
+        }
+
+        bool is_active() const {
+            return map != nullptr;
+        }
+
+        ~BucketContainer() {
+            if (map) {
+                unlock(i);
+            }
+        }
+
+    private:
+        // unlocks the given bucket index.
+        void unlock(std::array<size_t, 1> inds) const {
+            map->locks_[lock_ind(inds[0])].unlock();
+        }
+
+        // unlocks both of the given bucket indexes, or only one if they are
+        // equal. Order doesn't matter here.
+        void unlock(std::array<size_t, 2> inds) const {
+            const size_t l0 = lock_ind(inds[0]);
+            const size_t l1 = lock_ind(inds[1]);
+            map->locks_[l0].unlock();
+            if (l0 != l1) {
+                map->locks_[l1].unlock();
+            }
+        }
+
+        // unlocks the three given buckets
+        void unlock(std::array<size_t, 3> inds) const {
+            const size_t l0 = lock_ind(inds[0]);
+            const size_t l1 = lock_ind(inds[1]);
+            const size_t l2 = lock_ind(inds[2]);
+            map->locks_[l0].unlock();
+            if (l1 != l0) {
+                map->locks_[l1].unlock();
+            }
+            if (l2 != l0 && l2 != l1) {
+                map->locks_[l2].unlock();
+            }
+        }
+    };
+
+    typedef BucketContainer<1> OneBucket;
+    typedef BucketContainer<2> TwoBuckets;
+    typedef BucketContainer<3> ThreeBuckets;
+
     // This exception is thrown whenever we try to lock a bucket, but the
     // hashpower is not what was expected
     class hashpower_changed {};
@@ -669,7 +752,7 @@ private:
     inline void check_hashpower(const size_t hp, const size_t lock) const {
         if (get_hashpower() != hp) {
             locks_[lock].unlock();
-            LIBCUCKOO_DBG("hashpower changed\n");
+            LIBCUCKOO_DBG("%s", "hashpower changed\n");
             throw hashpower_changed();
         }
     }
@@ -677,89 +760,57 @@ private:
     // locks the given bucket index.
     //
     // throws hashpower_changed if it changed after taking the lock.
-    inline void lock_one(const size_t hp, size_t i) const {
-        i = lock_ind(i);
-        locks_[i].lock();
-        check_hashpower(hp, i);
-    }
-
-    // unlocks the given bucket index.
-    inline void unlock_one(const size_t i) const {
-        locks_[lock_ind(i)].unlock();
+    inline OneBucket lock_one(const size_t hp, const size_t i) const {
+        const size_t l = lock_ind(i);
+        locks_[l].lock();
+        check_hashpower(hp, l);
+        return OneBucket{this, i};
     }
 
     // locks the two bucket indexes, always locking the earlier index first to
     // avoid deadlock. If the two indexes are the same, it just locks one.
     //
     // throws hashpower_changed if it changed after taking the lock.
-    void lock_two(const size_t hp, size_t i1, size_t i2) const {
-        i1 = lock_ind(i1);
-        i2 = lock_ind(i2);
-        if (i2 < i1) {
-            std::swap(i1, i2);
-        }
-        locks_[i1].lock();
-        check_hashpower(hp, i1);
-        if (i2 != i1) {
-            locks_[i2].lock();
+    TwoBuckets lock_two(const size_t hp, const size_t i1,
+                        const size_t i2) const {
+        size_t l1 = lock_ind(i1);
+        size_t l2 = lock_ind(i2);
+        if (l2 < l1) {
+            std::swap(l1, l2);
         }
-    }
-
-    // unlock_two unlocks both of the given bucket indexes, or only one if they
-    // are equal. Order doesn't matter here.
-    void unlock_two(size_t i1, size_t i2) const {
-        i1 = lock_ind(i1);
-        i2 = lock_ind(i2);
-        locks_[i1].unlock();
-        if (i1 != i2) {
-            locks_[i2].unlock();
+        locks_[l1].lock();
+        check_hashpower(hp, l1);
+        if (l2 != l1) {
+            locks_[l2].lock();
         }
+        return TwoBuckets{this, i1, i2};
     }
 
-    // lock_three locks the three bucket indexes in numerical order.
+    // lock_two_one locks the three bucket indexes in numerical order, returning
+    // the containers as a two (i1 and i2) and a one (i3). The one will not be
+    // active if i3 shares a lock index with i1 or i2.
     //
     // throws hashpower_changed if it changed after taking the lock.
-    void lock_three(const size_t hp, size_t i1, size_t i2, size_t i3) const {
-        i1 = lock_ind(i1);
-        i2 = lock_ind(i2);
-        i3 = lock_ind(i3);
-        // If any are the same, we just run lock_two
-        if (i1 == i2) {
-            lock_two(hp, i1, i3);
-        } else if (i2 == i3) {
-            lock_two(hp, i1, i3);
-        } else if (i1 == i3) {
-            lock_two(hp, i1, i2);
-        } else {
-            if (i2 < i1) {
-                std::swap(i1, i2);
-            }
-            if (i3 < i2) {
-                std::swap(i2, i3);
-            }
-            // Now i3 is the largest, but i2 could now be less than i1
-            if (i2 < i1) {
-                std::swap(i1, i2);
-            }
-            locks_[i1].lock();
-            check_hashpower(hp, i1);
-            locks_[i2].lock();
-            locks_[i3].lock();
-        }
-    }
-
-    // unlock_three unlocks the three given buckets
-    void unlock_three(size_t i1, size_t i2, size_t i3) const {
-        i1 = lock_ind(i1);
-        i2 = lock_ind(i2);
-        i3 = lock_ind(i3);
-        locks_[i1].unlock();
-        if (i2 != i1) {
-            locks_[i2].unlock();
-        }
-        if (i3 != i1 && i3 != i2) {
-            locks_[i3].unlock();
-        }
+    std::pair<TwoBuckets, OneBucket>
+    lock_three(const size_t hp, const size_t i1,
+               const size_t i2, const size_t i3) const {
+        std::array<size_t, 3> l{{
+                lock_ind(i1), lock_ind(i2), lock_ind(i3)}};
+        std::sort(l.begin(), l.end());
+        locks_[l[0]].lock();
+        check_hashpower(hp, l[0]);
+        if (l[1] != l[0]) {
+            locks_[l[1]].lock();
+        }
+        if (l[2] != l[1]) {
+            locks_[l[2]].lock();
+        }
+        return std::make_pair(
+            TwoBuckets{this, i1, i2},
+            OneBucket{
+                (lock_ind(i3) == lock_ind(i1) ||
+                 lock_ind(i3) == lock_ind(i2)) ?
+                    nullptr : this, i3});
     }
 
     // snapshot_and_lock_two loads locks the buckets associated with the given
@@ -768,21 +819,19 @@ private:
     // hash value will stay correct as long as the locks are held. It returns
     // the bucket indices associated with the hash value and the current
     // hashpower.
-    std::pair<size_t, size_t>
+    TwoBuckets
     snapshot_and_lock_two(const size_t hv) const noexcept {
-        size_t i1, i2;
         while (true) {
             // Store the current hashpower we're using to compute the buckets
             size_t hp = get_hashpower();
-            i1 = index_hash(hp, hv);
-            i2 = alt_index(hp, partial_key(hv), i1);
+            size_t i1 = index_hash(hp, hv);
+            size_t i2 = alt_index(hp, partial_key(hv), i1);
             try {
-                lock_two(hp, i1, i2);
+                return lock_two(hp, i1, i2);
             } catch (hashpower_changed&) {
                 // The hashpower changed while taking the locks. Try again.
                 continue;
             }
-            return {i1, i2};
         }
     }
 
@@ -812,8 +861,8 @@ private:
 
         void release() {
             if (locks_) {
-                for (auto& lock : *locks_) {
-                    lock.unlock();
+                for (size_t i = 0; i < locks_->allocated_size(); ++i) {
+                    (*locks_)[i].unlock();
                 }
                 deactivate();
             }
@@ -829,8 +878,8 @@ private:
     // locks, it is okay to change the buckets_ vector and the hashpower_, since
     // no other threads should be accessing the buckets.
     AllUnlocker snapshot_and_lock_all() const noexcept {
-        for (auto& lock : locks_) {
-            lock.lock();
+        for (size_t i = 0; i < locks_.allocated_size(); ++i) {
+            locks_[i].lock();
         }
         return AllUnlocker(&locks_);
     }
@@ -843,7 +892,7 @@ private:
     // hashsize returns the number of buckets corresponding to a given
     // hashpower.
     static inline size_t hashsize(const size_t hp) {
-        return 1U << hp;
+        return size_t(1) << hp;
     }
 
     // hashmask returns the bitmask for the buckets array corresponding to a
@@ -853,8 +902,8 @@ private:
     }
 
     // hashed_key hashes the given key.
-    static inline size_t hashed_key(const key_type &key) {
-        return hashfn()(key);
+    inline size_t hashed_key(const key_type &key) const {
+        return hash_function()(key);
     }
 
     // index_hash returns the first possible bucket that the given hashed key
@@ -873,7 +922,8 @@ private:
         // ensure tag is nonzero for the multiply.
         const partial_t nonzero_tag = (partial >> 1 << 1) + 1;
         // 0xc6a4a7935bd1e995 is the hash constant from 64-bit MurmurHash2
-        const size_t hash_of_tag = nonzero_tag * 0xc6a4a7935bd1e995;
+        const size_t hash_of_tag =
+            static_cast<size_t>(nonzero_tag * 0xc6a4a7935bd1e995);
         return (index ^ hash_of_tag) & hashmask(hp);
     }
 
@@ -892,17 +942,20 @@ private:
     // The maximum number of items in a BFS path.
     static const uint8_t MAX_BFS_PATH_LEN = 5;
 
-    // CuckooRecord holds one position in a cuckoo path.
-    typedef struct  {
+    // CuckooRecord holds one position in a cuckoo path. Since cuckoopath
+    // elements only define a sequence of alternate hashings for different hash
+    // values, we only need to keep track of the hash values being moved, rather
+    // than the keys themselves.
+    typedef struct {
         size_t bucket;
         size_t slot;
-        key_type key;
-        partial_t partial;
-    }  CuckooRecord;
+        size_t hv;
+    } CuckooRecord;
 
     typedef std::array<CuckooRecord, MAX_BFS_PATH_LEN> CuckooRecords;
 
     // b_slot holds the information for a BFS path through the table
+    #pragma pack(push,1)
     struct b_slot {
         // The bucket of the last item in the path
         size_t bucket;
@@ -929,9 +982,11 @@ private:
             : bucket(b), pathcode(p), depth(d) {
             assert(d < MAX_BFS_PATH_LEN);
         }
-    } __attribute__((__packed__));
+    };
+    #pragma pack(pop)
 
     // b_queue is the queue used to store b_slots for BFS cuckoo hashing.
+    #pragma pack(push,1)
     class b_queue {
         // The maximum size of the BFS queue. Note that unless it's less than
         // SLOT_PER_BUCKET^MAX_BFS_PATH_LEN, it won't really mean anything.
@@ -974,7 +1029,8 @@ private:
         bool full() {
             return increment(last) == first;
         }
-    } __attribute__((__packed__));
+    };
+    #pragma pack(pop)
 
     // slot_search searches for a cuckoo path using breadth-first search. It
     // starts with the i1 and i2 buckets, and, until it finds a bucket with an
@@ -996,21 +1052,19 @@ private:
             for (size_t i = 0; i < slot_per_bucket && !q.full();
                  ++i) {
                 size_t slot = (starting_slot + i) % slot_per_bucket;
-                lock_one(hp, x.bucket);
-                if (!buckets_[x.bucket].occupied(slot)) {
+                OneBucket ob = lock_one(hp, x.bucket);
+                Bucket& b = buckets_[x.bucket];
+                if (!b.occupied(slot)) {
                     // We can terminate the search here
                     x.pathcode = x.pathcode * slot_per_bucket + slot;
-                    unlock_one(x.bucket);
                     return x;
                 }
 
                 // If x has less than the maximum number of path components,
                 // create a new b_slot item, that represents the bucket we would
                 // have come from if we kicked out the item at this slot.
+                const partial_t partial = b.partial(slot);
                 if (x.depth < MAX_BFS_PATH_LEN - 1) {
-                    const partial_t partial =
-                        buckets_[x.bucket].partial(slot);
-                    unlock_one(x.bucket);
                     b_slot y(alt_index(hp, partial, x.bucket),
                              x.pathcode * slot_per_bucket + slot, x.depth+1);
                     q.enqueue(y);
@@ -1050,47 +1104,35 @@ private:
         CuckooRecord& first = cuckoo_path[0];
         if (x.pathcode == 0) {
             first.bucket = i1;
-            lock_one(hp, first.bucket);
-            if (!buckets_[first.bucket].occupied(first.slot)) {
-                // We can terminate here
-                unlock_one(first.bucket);
-                return 0;
-            }
-            first.partial = buckets_[first.bucket].partial(first.slot);
-            first.key = buckets_[first.bucket].key(first.slot);
-            unlock_one(first.bucket);
         } else {
             assert(x.pathcode == 1);
             first.bucket = i2;
-            lock_one(hp, first.bucket);
-            if (!buckets_[first.bucket].occupied(first.slot)) {
+        }
+        {
+            OneBucket ob = lock_one(hp, first.bucket);
+            Bucket& b = buckets_[first.bucket];
+            if (!b.occupied(first.slot)) {
                 // We can terminate here
-                unlock_one(first.bucket);
                 return 0;
             }
-            first.partial = buckets_[first.bucket].partial(first.slot);
-            first.key = buckets_[first.bucket].key(first.slot);
-            unlock_one(first.bucket);
+            first.hv = hashed_key(b.key(first.slot));
         }
         for (int i = 1; i <= x.depth; ++i) {
             CuckooRecord& curr = cuckoo_path[i];
             CuckooRecord& prev = cuckoo_path[i-1];
-            const size_t prevhv = hashed_key(prev.key);
-            assert(prev.bucket == index_hash(hp, prevhv) ||
-                   prev.bucket == alt_index(hp, prev.partial,
-                                            index_hash(hp, prevhv)));
+            assert(prev.bucket == index_hash(hp, prev.hv) ||
+                   prev.bucket == alt_index(hp, partial_key(prev.hv),
+                                            index_hash(hp, prev.hv)));
             // We get the bucket that this slot is on by computing the alternate
             // index of the previous bucket
-            curr.bucket = alt_index(hp, prev.partial, prev.bucket);
-            lock_one(hp, curr.bucket);
-            if (!buckets_[curr.bucket].occupied(curr.slot)) {
+            curr.bucket = alt_index(hp, partial_key(prev.hv), prev.bucket);
+            OneBucket ob = lock_one(hp, curr.bucket);
+            Bucket& b = buckets_[curr.bucket];
+            if (!b.occupied(curr.slot)) {
                 // We can terminate here
-                unlock_one(curr.bucket);
                 return i;
             }
-            curr.partial = buckets_[curr.bucket].partial(curr.slot);
-            curr.key = buckets_[curr.bucket].key(curr.slot);
-            unlock_one(curr.bucket);
+            curr.hv = hashed_key(b.key(curr.slot));
         }
         return x.depth;
     }
@@ -1099,29 +1141,27 @@ private:
     // an empty slot in one of the buckets in cuckoo_insert. Before the start of
     // this function, the two insert-locked buckets were unlocked in run_cuckoo.
     // At the end of the function, if the function returns true (success), then
-    // the last bucket it looks at (which is either i1 or i2 in run_cuckoo)
-    // remains locked. If the function is unsuccessful, then both insert-locked
-    // buckets will be unlocked.
+    // the both insert-locked buckets remain locked. If the function is
+    // unsuccessful, then both insert-locked buckets will be unlocked.
     //
-
     // throws hashpower_changed if it changed during the move
-    bool cuckoopath_move(const size_t hp,
-                         CuckooRecords& cuckoo_path, size_t depth,
-                         const size_t i1, const size_t i2) {
+    bool cuckoopath_move(const size_t hp, CuckooRecords& cuckoo_path,
+                         size_t depth, TwoBuckets& b) {
+        assert(!b.is_active());
         if (depth == 0) {
-            // There is a chance that depth == 0, when try_add_to_bucket sees i1
-            // and i2 as full and cuckoopath_search finds one empty. In this
-            // case, we lock both buckets. If the bucket that cuckoopath_search
-            // found empty isn't empty anymore, we unlock them and return false.
-            // Otherwise, the bucket is empty and insertable, so we hold the
-            // locks and return true.
+            // There is a chance that depth == 0, when try_add_to_bucket sees
+            // both buckets as full and cuckoopath_search finds one empty. In
+            // this case, we lock both buckets. If the slot that
+            // cuckoopath_search found empty isn't empty anymore, we unlock them
+            // and return false. Otherwise, the bucket is empty and insertable,
+            // so we hold the locks and return true.
             const size_t bucket = cuckoo_path[0].bucket;
-            assert(bucket == i1 || bucket == i2);
-            lock_two(hp, i1, i2);
+            assert(bucket == b.i[0] || bucket == b.i[1]);
+            b = lock_two(hp, b.i[0], b.i[1]);
             if (!buckets_[bucket].occupied(cuckoo_path[0].slot)) {
                 return true;
             } else {
-                unlock_two(i1, i2);
+                b.release();
                 return false;
             }
         }
@@ -1129,20 +1169,22 @@ private:
         while (depth > 0) {
             CuckooRecord& from = cuckoo_path[depth-1];
             CuckooRecord& to   = cuckoo_path[depth];
-            size_t fb = from.bucket;
-            size_t fs = from.slot;
-            size_t tb = to.bucket;
-            size_t ts = to.slot;
-
-            size_t ob = 0;
+            Bucket& fb = buckets_[from.bucket];
+            const size_t fs = from.slot;
+            Bucket& tb = buckets_[to.bucket];
+            const size_t ts = to.slot;
+            TwoBuckets twob;
+            OneBucket extrab;
             if (depth == 1) {
-                // Even though we are only swapping out of i1 or i2, we have to
-                // lock both of them along with the slot we are swapping to,
-                // since at the end of this function, i1 and i2 must be locked.
-                ob = (fb == i1) ? i2 : i1;
-                lock_three(hp, fb, tb, ob);
+                // Even though we are only swapping out of one of the original
+                // buckets, we have to lock both of them along with the slot we
+                // are swapping to, since at the end of this function, they both
+                // must be locked. We store tb inside the extrab container so it
+                // is unlocked at the end of the loop.
+                std::tie(twob, extrab) = lock_three(hp, b.i[0], b.i[1],
+                                                    to.bucket);
             } else {
-                lock_two(hp, fb, tb);
+                twob = lock_two(hp, from.bucket, to.bucket);
             }
 
             // We plan to kick out fs, but let's check if it is still there;
@@ -1150,31 +1192,18 @@ private:
             // that happened, just... try again. Also the slot we are filling in
             // may have already been filled in by another thread, or the slot we
             // are moving from may be empty, both of which invalidate the swap.
-            if (!eqfn()(buckets_[fb].key(fs), from.key) ||
-                buckets_[tb].occupied(ts) ||
-                !buckets_[fb].occupied(fs)) {
-                if (depth == 1) {
-                    unlock_three(fb, tb, ob);
-                } else {
-                    unlock_two(fb, tb);
-                }
+            // We only need to check that the hash value is the same, because,
+            // even if the keys are different and have the same hash value, then
+            // the cuckoopath is still valid.
+            if (hashed_key(fb.key(fs)) != from.hv || tb.occupied(ts) ||
+                !fb.occupied(fs)) {
                 return false;
             }
 
-            buckets_[tb].partial(ts) = buckets_[fb].partial(fs);
-            buckets_[tb].setKV(ts, buckets_[fb].key(fs),
-                               std::move(buckets_[fb].val(fs)));
-            buckets_[fb].eraseKV(fs);
+            Bucket::move_to_bucket(fb, fs, tb, ts);
             if (depth == 1) {
-                // Don't unlock fb or ob, since they are needed in
-                // cuckoo_insert. Only unlock tb if it doesn't unlock the same
-                // bucket as fb or ob.
-                if (lock_ind(tb) != lock_ind(fb) &&
-                    lock_ind(tb) != lock_ind(ob)) {
-                    unlock_one(tb);
-                }
-            } else {
-                unlock_two(fb, tb);
+                // Hold onto the locks contained in twob
+                b = std::move(twob);
             }
             depth--;
         }
@@ -1182,50 +1211,50 @@ private:
     }
 
     // run_cuckoo performs cuckoo hashing on the table in an attempt to free up
-    // a slot on either i1 or i2, which are assumed to be locked before the
-    // start. On success, the bucket and slot that was freed up is stored in
-    // insert_bucket and insert_slot. In order to perform the search and the
-    // swaps, it has to unlock both i1 and i2, which can lead to certain
+    // a slot on either of the insert buckets, which are assumed to be locked
+    // before the start. On success, the bucket and slot that was freed up is
+    // stored in insert_bucket and insert_slot. In order to perform the search
+    // and the swaps, it has to release the locks, which can lead to certain
     // concurrency issues, the details of which are explained in the function.
-    // If run_cuckoo returns ok (success), then the slot it freed up is still
-    // locked. Otherwise it is unlocked.
-    cuckoo_status run_cuckoo(const size_t i1, const size_t i2,
-                             size_t &insert_bucket, size_t &insert_slot) {
-        // We must unlock i1 and i2 here, so that cuckoopath_search and
+    // If run_cuckoo returns ok (success), then the bucket container will be
+    // active, otherwise it will not.
+    cuckoo_status run_cuckoo(TwoBuckets& b, size_t &insert_bucket,
+                             size_t &insert_slot) {
+        // We must unlock the buckets here, so that cuckoopath_search and
         // cuckoopath_move can lock buckets as desired without deadlock.
-        // cuckoopath_move has to look at either i1 or i2 as its last slot, and
-        // it will lock both buckets and leave them locked after finishing. This
-        // way, we know that if cuckoopath_move succeeds, then the buckets
-        // needed for insertion are still locked. If cuckoopath_move fails, the
-        // buckets are unlocked and we try again. This unlocking does present
-        // two problems. The first is that another insert on the same key runs
-        // and, finding that the key isn't in the table, inserts the key into
-        // the table. Then we insert the key into the table, causing a
-        // duplication. To check for this, we search i1 and i2 for the key we
-        // are trying to insert before doing so (this is done in cuckoo_insert,
-        // and requires that both i1 and i2 are locked). Another problem is that
-        // an expansion runs and changes the hashpower, meaning the buckets may
-        // not be valid anymore. In this case, the cuckoopath functions will
-        // have thrown a hashpower_changed exception, which we catch and handle
-        // here.
+        // cuckoopath_move has to move something out of one of the original
+        // buckets as its last operation, and it will lock both buckets and
+        // leave them locked after finishing. This way, we know that if
+        // cuckoopath_move succeeds, then the buckets needed for insertion are
+        // still locked. If cuckoopath_move fails, the buckets are unlocked and
+        // we try again. This unlocking does present two problems. The first is
+        // that another insert on the same key runs and, finding that the key
+        // isn't in the table, inserts the key into the table. Then we insert
+        // the key into the table, causing a duplication. To check for this, we
+        // search the buckets for the key we are trying to insert before doing
+        // so (this is done in cuckoo_insert, and requires that both buckets are
+        // locked). Another problem is that an expansion runs and changes the
+        // hashpower, meaning the buckets may not be valid anymore. In this
+        // case, the cuckoopath functions will have thrown a hashpower_changed
+        // exception, which we catch and handle here.
         size_t hp = get_hashpower();
-        unlock_two(i1, i2);
-
+        assert(b.is_active());
+        b.release();
         CuckooRecords cuckoo_path;
         bool done = false;
         try {
             while (!done) {
-                int depth = cuckoopath_search(hp, cuckoo_path, i1, i2);
+                int depth = cuckoopath_search(hp, cuckoo_path, b.i[0], b.i[1]);
                 if (depth < 0) {
                     break;
                 }
 
-                if (cuckoopath_move(hp, cuckoo_path, depth, i1, i2)) {
+                if (cuckoopath_move(hp, cuckoo_path, depth, b)) {
                     insert_bucket = cuckoo_path[0].bucket;
                     insert_slot = cuckoo_path[0].slot;
-                    assert(insert_bucket == i1 || insert_bucket == i2);
-                    assert(!locks_[lock_ind(i1)].try_lock());
-                    assert(!locks_[lock_ind(i2)].try_lock());
+                    assert(insert_bucket == b.i[0] || insert_bucket == b.i[1]);
+                    assert(!locks_[lock_ind(b.i[0])].try_lock());
+                    assert(!locks_[lock_ind(b.i[1])].try_lock());
                     assert(!buckets_[insert_bucket].occupied(insert_slot));
                     done = true;
                     break;
@@ -1233,7 +1262,8 @@ private:
             }
         } catch (hashpower_changed&) {
             // The hashpower changed while we were trying to cuckoo, which means
-            // we want to retry. i1 and i2 should not be locked in this case.
+            // we want to retry. b.i[0] and b.i[1] should not be locked in this
+            // case.
             return failure_under_expansion;
         }
         return done ? ok : failure;
@@ -1241,18 +1271,19 @@ private:
 
     // try_read_from_bucket will search the bucket for the given key and store
     // the associated value if it finds it.
-    ENABLE_IF(, value_copy_assignable, bool)
-    try_read_from_bucket(const partial_t partial, const key_type &key,
-                         mapped_type &val, const size_t i) const {
-        for (size_t j = 0; j < slot_per_bucket; ++j) {
-            if (!buckets_[i].occupied(j)) {
+    bool try_read_from_bucket(const partial_t partial, const key_type &key,
+                              mapped_type &val, const Bucket& b) const {
+        // Silence a warning from MSVC about partial being unused if is_simple.
+        (void)partial;
+        for (size_t i = 0; i < slot_per_bucket; ++i) {
+            if (!b.occupied(i)) {
                 continue;
             }
-            if (!is_simple && partial != buckets_[i].partial(j)) {
+            if (!is_simple && partial != b.partial(i)) {
                 continue;
             }
-            if (eqfn()(key, buckets_[i].key(j))) {
-                val = buckets_[i].val(j);
+            if (key_eq()(key, b.key(i))) {
+                val = b.val(i);
                 return true;
             }
         }
@@ -1262,28 +1293,32 @@ private:
     // check_in_bucket will search the bucket for the given key and return true
     // if the key is in the bucket, and false if it isn't.
     bool check_in_bucket(const partial_t partial, const key_type &key,
-                         const size_t i) const {
-        for (size_t j = 0; j < slot_per_bucket; ++j) {
-            if (!buckets_[i].occupied(j)) {
+                         const Bucket& b) const {
+        // Silence a warning from MSVC about partial being unused if is_simple.
+        (void)partial;
+        for (size_t i = 0; i < slot_per_bucket; ++i) {
+            if (!b.occupied(i)) {
                 continue;
             }
-            if (!is_simple && partial != buckets_[i].partial(j)) {
+            if (!is_simple && partial != b.partial(i)) {
                 continue;
             }
-            if (eqfn()(key, buckets_[i].key(j))) {
+            if (key_eq()(key, b.key(i))) {
                 return true;
             }
         }
         return false;
     }
 
-    // add_to_bucket will insert the given key-value pair into the slot.
-    template <class V>
-    void add_to_bucket(const partial_t partial, const key_type &key,
-                       V&& val, const size_t i, const size_t j) {
-        assert(!buckets_[i].occupied(j));
-        buckets_[i].partial(j) = partial;
-        buckets_[i].setKV(j, key, std::forward<V>(val));
+    // add_to_bucket will insert the given key-value pair into the slot. The key
+    // and value will be move-constructed into the table, so they are not valid
+    // for use afterwards.
+    template <typename K, typename... Args>
+    void add_to_bucket(const partial_t partial, Bucket& b,
+                       const size_t slot, K&& key, Args&&... val) {
+        assert(!b.occupied(slot));
+        b.partial(slot) = partial;
+        b.setKV(slot, std::forward<K>(key), std::forward<Args>(val)...);
         num_inserts_[get_counterid()].num.fetch_add(
             1, std::memory_order_relaxed);
     }
@@ -1293,21 +1328,23 @@ private:
     // search the entire bucket and return false if it finds the key already in
     // the table (duplicate key error) and true otherwise.
     bool try_find_insert_bucket(const partial_t partial, const key_type &key,
-                                const size_t i, int& j) const {
-        j = -1;
+                                const Bucket& b, int& slot) const {
+        // Silence a warning from MSVC about partial being unused if is_simple.
+        (void)partial;
+        slot = -1;
         bool found_empty = false;
-        for (size_t k = 0; k < slot_per_bucket; ++k) {
-            if (buckets_[i].occupied(k)) {
-                if (!is_simple && partial != buckets_[i].partial(k)) {
+        for (int i = 0; i < static_cast<int>(slot_per_bucket); ++i) {
+            if (b.occupied(i)) {
+                if (!is_simple && partial != b.partial(i)) {
                     continue;
                 }
-                if (eqfn()(key, buckets_[i].key(k))) {
+                if (key_eq()(key, b.key(i))) {
                     return false;
                 }
             } else {
                 if (!found_empty) {
                     found_empty = true;
-                    j = k;
+                    slot = i;
                 }
             }
         }
@@ -1317,16 +1354,16 @@ private:
     // try_del_from_bucket will search the bucket for the given key, and set the
     // slot of the key to empty if it finds it.
     bool try_del_from_bucket(const partial_t partial,
-                             const key_type &key, const size_t i) {
-        for (size_t j = 0; j < slot_per_bucket; ++j) {
-            if (!buckets_[i].occupied(j)) {
+                             const key_type &key, Bucket& b) {
+        for (size_t i = 0; i < slot_per_bucket; ++i) {
+            if (!b.occupied(i)) {
                 continue;
             }
-            if (!is_simple && buckets_[i].partial(j) != partial) {
+            if (!is_simple && b.partial(i) != partial) {
                 continue;
             }
-            if (eqfn()(buckets_[i].key(j), key)) {
-                buckets_[i].eraseKV(j);
+            if (key_eq()(b.key(i), key)) {
+                b.eraseKV(i);
                 num_deletes_[get_counterid()].num.fetch_add(
                     1, std::memory_order_relaxed);
                 return true;
@@ -1337,18 +1374,18 @@ private:
 
     // try_update_bucket will search the bucket for the given key and change its
     // associated value if it finds it.
-    ENABLE_IF(, value_copy_assignable, bool)
-    try_update_bucket(const partial_t partial, const key_type &key,
-                      const mapped_type &value, const size_t i) {
-        for (size_t j = 0; j < slot_per_bucket; ++j) {
-            if (!buckets_[i].occupied(j)) {
+    template <typename V>
+    bool try_update_bucket(const partial_t partial, Bucket& b,
+                           const key_type &key, V&& val) {
+        for (size_t i = 0; i < slot_per_bucket; ++i) {
+            if (!b.occupied(i)) {
                 continue;
             }
-            if (!is_simple && buckets_[i].partial(j) != partial) {
+            if (!is_simple && b.partial(i) != partial) {
                 continue;
             }
-            if (eqfn()(buckets_[i].key(j), key)) {
-                buckets_[i].val(j) = value;
+            if (key_eq()(b.key(i), key)) {
+                b.val(i) = std::forward<V>(val);
                 return true;
             }
         }
@@ -1359,16 +1396,18 @@ private:
     // its associated value with the given function if it finds it.
     template <typename Updater>
     bool try_update_bucket_fn(const partial_t partial, const key_type &key,
-                              Updater fn, const size_t i) {
-        for (size_t j = 0; j < slot_per_bucket; ++j) {
-            if (!buckets_[i].occupied(j)) {
+                              Updater fn, Bucket& b) {
+        // Silence a warning from MSVC about partial being unused if is_simple.
+        (void)partial;
+        for (size_t i = 0; i < slot_per_bucket; ++i) {
+            if (!b.occupied(i)) {
                 continue;
             }
-            if (!is_simple && buckets_[i].partial(j) != partial) {
+            if (!is_simple && b.partial(i) != partial) {
                 continue;
             }
-            if (eqfn()(buckets_[i].key(j), key)) {
-                fn(buckets_[i].val(j));
+            if (key_eq()(b.key(i), key)) {
+                fn(b.val(i));
                 return true;
             }
         }
@@ -1378,14 +1417,14 @@ private:
     // cuckoo_find searches the table for the given key and value, storing the
     // value in the val if it finds the key. It expects the locks to be taken
     // and released outside the function.
-    ENABLE_IF(, value_copy_assignable, cuckoo_status)
-    cuckoo_find(const key_type& key, mapped_type& val,
-                const size_t hv, const size_t i1, const size_t i2) const {
+    cuckoo_status cuckoo_find(const key_type& key, mapped_type& val,
+                              const size_t hv, const size_t i1,
+                              const size_t i2) const {
         const partial_t partial = partial_key(hv);
-        if (try_read_from_bucket(partial, key, val, i1)) {
+        if (try_read_from_bucket(partial, key, val, buckets_[i1])) {
             return ok;
         }
-        if (try_read_from_bucket(partial, key, val, i2)) {
+        if (try_read_from_bucket(partial, key, val, buckets_[i2])) {
             return ok;
         }
         return failure_key_not_found;
@@ -1394,75 +1433,74 @@ private:
     // cuckoo_contains searches the table for the given key, returning true if
     // it's in the table and false otherwise. It expects the locks to be taken
     // and released outside the function.
-    bool cuckoo_contains(const key_type& key, const size_t hv,
-                         const size_t i1, const size_t i2) const {
+    bool cuckoo_contains(const key_type& key, const size_t hv, const size_t i1,
+                         const size_t i2) const {
         const partial_t partial = partial_key(hv);
-        if (check_in_bucket(partial, key, i1)) {
+        if (check_in_bucket(partial, key, buckets_[i1])) {
             return true;
         }
-        if (check_in_bucket(partial, key, i2)) {
+        if (check_in_bucket(partial, key, buckets_[i2])) {
             return true;
         }
         return false;
     }
 
     // cuckoo_insert tries to insert the given key-value pair into an empty slot
-    // in i1 or i2, performing cuckoo hashing if necessary. It expects the locks
-    // to be taken outside the function, but they are released here, since
-    // different scenarios require different handling of the locks. Before
-    // inserting, it checks that the key isn't already in the table. cuckoo
-    // hashing presents multiple concurrency issues, which are explained in the
-    // function.
-    template <class V>
-    cuckoo_status cuckoo_insert(const key_type &key, V&& val, const size_t hv,
-                                const size_t i1, const size_t i2) {
+    // in either of the buckets, performing cuckoo hashing if necessary. It
+    // expects the locks to be taken outside the function, but they are released
+    // here, since different scenarios require different handling of the locks.
+    // Before inserting, it checks that the key isn't already in the table.
+    // cuckoo hashing presents multiple concurrency issues, which are explained
+    // in the function. If the insert fails, the key and value won't be
+    // move-constructed, so they can be retried.
+    template <typename K, typename... Args>
+    cuckoo_status cuckoo_insert(const size_t hv, TwoBuckets b,
+                                K&& key, Args&&... val) {
         int res1, res2;
         const partial_t partial = partial_key(hv);
-        if (!try_find_insert_bucket(partial, key, i1, res1)) {
-            unlock_two(i1, i2);
+        Bucket& b0 = buckets_[b.i[0]];
+        if (!try_find_insert_bucket(partial, key, b0, res1)) {
             return failure_key_duplicated;
         }
-        if (!try_find_insert_bucket(partial, key, i2, res2)) {
-            unlock_two(i1, i2);
+        Bucket& b1 = buckets_[b.i[1]];
+        if (!try_find_insert_bucket(partial, key, b1, res2)) {
             return failure_key_duplicated;
         }
         if (res1 != -1) {
-            add_to_bucket(partial, key, std::forward<V>(val), i1, res1);
-            unlock_two(i1, i2);
+            add_to_bucket(partial, b0, res1, std::forward<K>(key),
+                          std::forward<Args>(val)...);
             return ok;
         }
         if (res2 != -1) {
-            add_to_bucket(partial, key, std::forward<V>(val), i2, res2);
-            unlock_two(i1, i2);
+            add_to_bucket(partial, b1, res2, std::forward<K>(key),
+                          std::forward<Args>(val)...);
             return ok;
         }
 
         // we are unlucky, so let's perform cuckoo hashing
         size_t insert_bucket = 0;
         size_t insert_slot = 0;
-        cuckoo_status st = run_cuckoo(i1, i2, insert_bucket, insert_slot);
+        cuckoo_status st = run_cuckoo(b, insert_bucket, insert_slot);
         if (st == failure_under_expansion) {
             // The run_cuckoo operation operated on an old version of the table,
             // so we have to try again. We signal to the calling insert method
             // to try again by returning failure_under_expansion.
             return failure_under_expansion;
         } else if (st == ok) {
-            assert(!locks_[lock_ind(i1)].try_lock());
-            assert(!locks_[lock_ind(i2)].try_lock());
+            assert(!locks_[lock_ind(b.i[0])].try_lock());
+            assert(!locks_[lock_ind(b.i[1])].try_lock());
             assert(!buckets_[insert_bucket].occupied(insert_slot));
             assert(insert_bucket == index_hash(get_hashpower(), hv) ||
                    insert_bucket == alt_index(get_hashpower(), partial,
                                               index_hash(get_hashpower(), hv)));
             // Since we unlocked the buckets during run_cuckoo, another insert
-            // could have inserted the same key into either i1 or i2, so we
-            // check for that before doing the insert.
-            if (cuckoo_contains(key, hv, i1, i2)) {
-                unlock_two(i1, i2);
+            // could have inserted the same key into either b.i[0] or b.i[1], so
+            // we check for that before doing the insert.
+            if (cuckoo_contains(key, hv, b.i[0], b.i[1])) {
                 return failure_key_duplicated;
             }
-            add_to_bucket(partial, key, std::forward<V>(val),
-                          insert_bucket, insert_slot);
-            unlock_two(i1, i2);
+            add_to_bucket(partial, buckets_[insert_bucket], insert_slot,
+                          std::forward<K>(key), std::forward<Args>(val)...);
             return ok;
         }
         assert(st == failure);
@@ -1484,14 +1522,14 @@ private:
      * @throw libcuckoo_load_factor_too_low if expansion is necessary, but the
      * load factor of the table is below the threshold
      */
-    template <class V>
-    bool cuckoo_insert_loop(const key_type& key, V&& val, size_t hv) {
+    template <typename K, typename... Args>
+    bool cuckoo_insert_loop(size_t hv, K&& key, Args&&... val) {
         cuckoo_status st;
         do {
             auto b = snapshot_and_lock_two(hv);
             size_t hp = get_hashpower();
-            st = cuckoo_insert(key, std::forward<V>(val), hv,
-                               b.first, b.second);
+            st = cuckoo_insert(hv, std::move(b), std::forward<K>(key),
+                               std::forward<Args>(val)...);
             if (st == failure_key_duplicated) {
                 return false;
             } else if (st == failure_table_full) {
@@ -1499,7 +1537,7 @@ private:
                     throw libcuckoo_load_factor_too_low(minimum_load_factor());
                 }
                 // Expand the table and try again
-                cuckoo_expand_simple(hp + 1, true);
+                cuckoo_fast_double(hp);
             }
         } while (st != ok);
         return true;
@@ -1511,10 +1549,10 @@ private:
     cuckoo_status cuckoo_delete(const key_type &key, const size_t hv,
                                 const size_t i1, const size_t i2) {
         const partial_t partial = partial_key(hv);
-        if (try_del_from_bucket(partial, key, i1)) {
+        if (try_del_from_bucket(partial, key, buckets_[i1])) {
             return ok;
         }
-        if (try_del_from_bucket(partial, key, i2)) {
+        if (try_del_from_bucket(partial, key, buckets_[i2])) {
             return ok;
         }
         return failure_key_not_found;
@@ -1523,14 +1561,16 @@ private:
     // cuckoo_update searches the table for the given key and updates its value
     // if it finds it. It expects the locks to be taken and released outside the
     // function.
-    ENABLE_IF(, value_copy_assignable, cuckoo_status)
-    cuckoo_update(const key_type &key, const mapped_type &val, const size_t hv,
-                  const size_t i1, const size_t i2) {
+    template <typename V>
+    cuckoo_status cuckoo_update(const size_t hv, const size_t i1,
+                                const size_t i2, const key_type &key, V&& val) {
         const partial_t partial = partial_key(hv);
-        if (try_update_bucket(partial, key, val, i1)) {
+        if (try_update_bucket(partial, buckets_[i1], key,
+                              std::forward<V>(val))) {
             return ok;
         }
-        if (try_update_bucket(partial, key, val, i2)) {
+        if (try_update_bucket(partial, buckets_[i2], key,
+                              std::forward<V>(val))) {
             return ok;
         }
         return failure_key_not_found;
@@ -1542,13 +1582,13 @@ private:
     // outside the function.
     template <typename Updater>
     cuckoo_status cuckoo_update_fn(const key_type &key, Updater fn,
-                                   const size_t hv,
-                                   const size_t i1, const size_t i2) {
+                                   const size_t hv, const size_t i1,
+                                   const size_t i2) {
         const partial_t partial = partial_key(hv);
-        if (try_update_bucket_fn(partial, key, fn, i1)) {
+        if (try_update_bucket_fn(partial, key, fn, buckets_[i1])) {
             return ok;
         }
-        if (try_update_bucket_fn(partial, key, fn, i2)) {
+        if (try_update_bucket_fn(partial, key, fn, buckets_[i2])) {
             return ok;
         }
         return failure_key_not_found;
@@ -1585,17 +1625,116 @@ private:
                 hashsize(hp));
     }
 
+    void move_buckets(size_t current_hp, size_t new_hp,
+                      size_t start_lock_ind, size_t end_lock_ind) {
+        for (; start_lock_ind < end_lock_ind; ++start_lock_ind) {
+            for (size_t bucket_i = start_lock_ind;
+                 bucket_i < hashsize(current_hp);
+                 bucket_i += locks_t::size()) {
+                // By doubling the table size, the index_hash and alt_index of
+                // each key got one bit added to the top, at position
+                // current_hp, which means anything we have to move will either
+                // be at the same bucket position, or exactly
+                // hashsize(current_hp) later than the current bucket
+                Bucket& old_bucket = buckets_[bucket_i];
+                const size_t new_bucket_i = bucket_i + hashsize(current_hp);
+                Bucket& new_bucket = buckets_[new_bucket_i];
+                size_t new_bucket_slot = 0;
+
+                // Move each item from the old bucket that needs moving into the
+                // new bucket
+                for (size_t slot = 0; slot < slot_per_bucket; ++slot) {
+                    if (!old_bucket.occupied(slot)) {
+                        continue;
+                    }
+                    const size_t hv = hashed_key(old_bucket.key(slot));
+                    const size_t old_ihash = index_hash(current_hp, hv);
+                    const size_t old_ahash = alt_index(
+                        current_hp, old_bucket.partial(slot), old_ihash);
+                    const size_t new_ihash = index_hash(new_hp, hv);
+                    const size_t new_ahash = alt_index(
+                        new_hp, old_bucket.partial(slot), new_ihash);
+                    if ((bucket_i == old_ihash && new_ihash == new_bucket_i) ||
+                        (bucket_i == old_ahash && new_ahash == new_bucket_i)) {
+                        // We're moving the key from the old bucket to the new
+                        // one
+                        Bucket::move_to_bucket(
+                            old_bucket, slot, new_bucket, new_bucket_slot++);
+                    } else {
+                        // Check that we don't want to move the new key
+                        assert(
+                            (bucket_i == old_ihash && new_ihash == old_ihash) ||
+                            (bucket_i == old_ahash && new_ahash == old_ahash));
+                    }
+                }
+            }
+            // Now we can unlock the lock, because all the buckets corresponding
+            // to it have been unlocked
+            locks_[start_lock_ind].unlock();
+        }
+    }
+
+    // cuckoo_fast_double will double the size of the table by taking advantage
+    // of the properties of index_hash and alt_index.
+    cuckoo_status cuckoo_fast_double(size_t current_hp) {
+        size_t new_hp = current_hp + 1;
+        size_t mhp = maximum_hashpower();
+        if (mhp != NO_MAXIMUM_HASHPOWER && new_hp > mhp) {
+            throw libcuckoo_maximum_hashpower_exceeded(new_hp);
+        }
+
+        std::lock_guard<expansion_lock_t> l(expansion_lock_);
+        if (get_hashpower() != current_hp) {
+            // Most likely another expansion ran before this one could grab the
+            // locks
+            LIBCUCKOO_DBG("%s", "another expansion is on-going\n");
+            return failure_under_expansion;
+        }
+
+        locks_.allocate(std::min(locks_t::size(), hashsize(new_hp)));
+        auto unlocker = snapshot_and_lock_all();
+        buckets_.resize(buckets_.size() * 2);
+        set_hashpower(new_hp);
+
+        // We gradually unlock the new table, by processing each of the buckets
+        // corresponding to each lock we took. For each slot in an old bucket,
+        // we either leave it in the old bucket, or move it to the corresponding
+        // new bucket. After we're done with the bucket, we release the lock on
+        // it and the new bucket, letting other threads using the new map
+        // gradually. We only unlock the locks being used by the old table,
+        // because unlocking new locks would enable operations on the table
+        // before we want them.
+        const size_t locks_to_move = std::min(locks_t::size(),
+                                              hashsize(current_hp));
+        parallel_exec(0, locks_to_move, kNumCores(),
+                      [this, current_hp, new_hp](size_t start, size_t end) {
+                          move_buckets(current_hp, new_hp, start, end);
+                      });
+        parallel_exec(locks_to_move, locks_.allocated_size(), kNumCores(),
+                      [this](size_t i, size_t end) {
+                          for (; i < end; ++i) {
+                              locks_[i].unlock();
+                          }
+                      });
+        // Since we've unlocked the buckets ourselves, we don't need the
+        // unlocker to do it for us.
+        unlocker.deactivate();
+        return ok;
+    }
+
     // insert_into_table is a helper function used by cuckoo_expand_simple to
-    // fill up the new table.
+    // fill up the new table. It moves data out of the original table into the
+    // new one.
     static void insert_into_table(
         cuckoohash_map<Key, T, Hash, Pred, Alloc, slot_per_bucket>& new_map,
         buckets_t& buckets, size_t i, size_t end) {
         for (; i < end; ++i) {
             for (size_t j = 0; j < slot_per_bucket; ++j) {
                 if (buckets[i].occupied(j)) {
+                    storage_value_type& kvpair = buckets[i].storage_kvpair(j);
                     new_map.insert(
-                        buckets[i].key(j),
-                        std::move((mapped_type&)buckets[i].val(j)));
+                        std::move(kvpair.first),
+                        std::move(kvpair.second));
                 }
             }
         }
@@ -1622,7 +1761,7 @@ private:
             (!is_expansion && new_hp >= hp)) {
             // Most likely another expansion ran before this one could grab the
             // locks
-            LIBCUCKOO_DBG("another expansion is on-going\n");
+            LIBCUCKOO_DBG("%s", "another expansion is on-going\n");
             return failure_under_expansion;
         }
 
@@ -1725,11 +1864,13 @@ public:
         class templated_iterator :
             public std::iterator<std::bidirectional_iterator_tag, value_type> {
 
+            typedef typename std::conditional<
+                IS_CONST, const buckets_t, buckets_t>::type
+            maybe_const_buckets_t;
+
             // The buckets locked and owned by the locked table being iterated
             // over.
-            std::reference_wrapper<
-                typename std::conditional<
-                IS_CONST, const buckets_t, buckets_t>::type> buckets_;
+            std::reference_wrapper<maybe_const_buckets_t> buckets_;
 
             // The shared boolean indicating whether the iterator points to a
             // still-locked table or not. It should never be nullptr.
@@ -1773,7 +1914,8 @@ public:
             //! iterator is at the end.
             ENABLE_IF(, !IS_CONST, value_type&) operator*() {
                 check_iterator();
-                return buckets_.get()[index_].kvpair(slot_);
+                return buckets_.get()[static_cast<size_t>(index_)].
+                    kvpair(static_cast<size_t>(slot_));
             }
 
             //! Return a pointer to the immutable key-value pair pointed to by
@@ -1800,9 +1942,11 @@ public:
                 // Move forward until we get to a slot that is occupied, or we
                 // get to the end
                 check_iterator();
-                for (; (size_t)index_ < buckets_.get().size(); ++index_) {
-                    while ((size_t)++slot_ < SLOT_PER_BUCKET) {
-                        if (buckets_.get()[index_].occupied(slot_)) {
+                for (; static_cast<size_t>(index_) < buckets_.get().size();
+                     ++index_) {
+                    while (static_cast<size_t>(++slot_) < SLOT_PER_BUCKET) {
+                        if (buckets_.get()[static_cast<size_t>(index_)].
+                            occupied(static_cast<size_t>(slot_))) {
                             return *this;
                         }
                     }
@@ -1831,7 +1975,8 @@ public:
                 check_iterator();
                 for (; index_ >= 0; --index_) {
                     while (--slot_ >= 0) {
-                        if (buckets_.get()[index_].occupied(slot_)) {
+                        if (buckets_.get()[static_cast<size_t>(index_)]
+                            .occupied(static_cast<size_t>(slot_))) {
                             return *this;
                         }
                     }
@@ -1871,12 +2016,14 @@ public:
             // end of the table, or that spot is occupied, stay. Otherwise, step
             // forward to the next data item, or to the end of the table.
             templated_iterator(
-                typename decltype(buckets_)::type& buckets,
+                maybe_const_buckets_t& buckets,
                 std::shared_ptr<bool> has_table_lock, size_t index, size_t slot)
                 : buckets_(buckets), has_table_lock_(has_table_lock),
-                  index_(index), slot_(slot) {
+                  index_(static_cast<intmax_t>(index)),
+                  slot_(static_cast<intmax_t>(slot)) {
                 if (std::make_pair(index_, slot_) != end_pos(buckets) &&
-                    !buckets[index_].occupied(slot_)) {
+                    !buckets[static_cast<size_t>(index_)]
+                    .occupied(static_cast<size_t>(slot_))) {
                     operator++();
                 }
             }
@@ -1919,7 +2066,8 @@ public:
             check_table();
             const auto end_pos = const_iterator::end_pos(buckets_.get());
             return iterator(buckets_.get(), has_table_lock_,
-                            end_pos.first, end_pos.second);
+                            static_cast<size_t>(end_pos.first),
+                            static_cast<size_t>(end_pos.second));
         }
 
         //! end returns a const_iterator to the end of the table
@@ -1927,7 +2075,8 @@ public:
             check_table();
             const auto end_pos = const_iterator::end_pos(buckets_.get());
             return const_iterator(buckets_.get(), has_table_lock_,
-                                  end_pos.first, end_pos.second);
+                                  static_cast<size_t>(end_pos.first),
+                                  static_cast<size_t>(end_pos.second));
         }
 
         //! cend returns a const_iterator to the end of the table
@@ -1956,6 +2105,49 @@ public:
 
     // This class is a friend for unit testing
     friend class UnitTestInternalAccess;
+
+    // Member variables
+private:
+    // 2**hashpower is the number of buckets. This cannot be changed unless all
+    // the locks are taken on the table. Since it is still read and written by
+    // multiple threads not necessarily synchronized by a lock, we keep it
+    // atomic
+    std::atomic<size_t> hashpower_;
+
+    // vector of buckets. The size or memory location of the buckets cannot be
+    // changed unless al the locks are taken on the table. Thus, it is only safe
+    // to access the buckets_ vector when you have at least one lock held.
+    buckets_t buckets_;
+
+    // array of locks. marked mutable, so that const methods can take locks.
+    // Even though it's a vector, it should not ever change in size after the
+    // initial allocation.
+    mutable locks_t locks_;
+
+    // a lock to synchronize expansions
+    expansion_lock_t expansion_lock_;
+
+    // per-core counters for the number of inserts and deletes
+    std::vector<
+        cacheint, typename allocator_type::template rebind<cacheint>::other>
+    num_inserts_, num_deletes_;
+
+    // stores the minimum load factor allowed for automatic expansions. Whenever
+    // an automatic expansion is triggered (during an insertion where cuckoo
+    // hashing fails, for example), we check the load factor against this
+    // double, and throw an exception if it's lower than this value. It can be
+    // used to signal when the hash function is bad or the input adversarial.
+    std::atomic<double> minimum_load_factor_;
+
+    // stores the maximum hashpower allowed for any expansions. If set to
+    // NO_MAXIMUM_HASHPOWER, this limit will be disregarded.
+    std::atomic<size_t> maximum_hashpower_;
+
+    // The hash function
+    hasher hash_fn;
+
+    // The equality function
+    key_equal eq_fn;
 };
 
 #endif // _CUCKOOHASH_MAP_HH
diff --git a/include/cuckoohash_util.hh b/include/cuckoohash_util.hh
index 7fe43d6..38521af 100644
--- a/include/cuckoohash_util.hh
+++ b/include/cuckoohash_util.hh
@@ -4,15 +4,54 @@
 #define _CUCKOOHASH_UTIL_HH
 
 #include <exception>
-#include <pthread.h>
+#include <thread>
+#include <vector>
 #include "cuckoohash_config.hh" // for LIBCUCKOO_DEBUG
 
 #if LIBCUCKOO_DEBUG
-#  define LIBCUCKOO_DBG(fmt, args...)                                   \
-     fprintf(stderr, "\x1b[32m""[libcuckoo:%s:%d:%lu] " fmt"" "\x1b[0m", \
-             __FILE__,__LINE__, (unsigned long)pthread_self(), ##args)
+#  define LIBCUCKOO_DBG(fmt, ...)                                          \
+     fprintf(stderr, "\x1b[32m""[libcuckoo:%s:%d:%lu] " fmt"" "\x1b[0m",   \
+             __FILE__,__LINE__, (unsigned long)std::this_thread::get_id(), \
+             __VA_ARGS__)
 #else
-#  define LIBCUCKOO_DBG(fmt, args...)  do {} while (0)
+#  define LIBCUCKOO_DBG(fmt, ...)  do {} while (0)
+#endif
+
+/**
+ * alignas() requires GCC >= 4.9, so we stick with the alignment attribute for
+ * GCC.
+ */
+#ifdef __GNUC__
+#define LIBCUCKOO_ALIGNAS(x) __attribute__((aligned(x)))
+#else
+#define LIBCUCKOO_ALIGNAS(x) alignas(x)
+#endif
+
+/**
+ * At higher warning levels, MSVC produces an annoying warning that alignment
+ * may cause wasted space: "structure was padded due to __declspec(align())".
+ */
+#ifdef _MSC_VER
+#define LIBCUCKOO_SQUELCH_PADDING_WARNING __pragma(warning(suppress : 4324))
+#else
+#define LIBCUCKOO_SQUELCH_PADDING_WARNING
+#endif
+
+/**
+ * thread_local requires GCC >= 4.8 and is not supported in some clang versions,
+ * so we use __thread if thread_local is not supported
+ */
+#define LIBCUCKOO_THREAD_LOCAL thread_local
+#if defined(__clang__)
+#  if !__has_feature(cxx_thread_local)
+#    undef LIBCUCKOO_THREAD_LOCAL
+#    define LIBCUCKOO_THREAD_LOCAL __thread
+#  endif
+#elif defined(__GNUC__)
+#  if __GNUC__ == 4 && __GNUC_MINOR__ < 8
+#    undef LIBCUCKOO_THREAD_LOCAL
+#    define LIBCUCKOO_THREAD_LOCAL __thread
+#  endif
 #endif
 
 // For enabling certain methods based on a condition. Here's an example.
@@ -41,7 +80,7 @@ public:
     libcuckoo_load_factor_too_low(const double lf)
         : load_factor_(lf) {}
 
-    virtual const char* what() const noexcept {
+    virtual const char* what() const noexcept override {
         return "Automatic expansion triggered when load factor was below "
             "minimum threshold";
     }
@@ -71,7 +110,7 @@ public:
     libcuckoo_maximum_hashpower_exceeded(const size_t hp)
         : hashpower_(hp) {}
 
-    virtual const char* what() const noexcept {
+    virtual const char* what() const noexcept override {
         return "Expansion beyond maximum hashpower";
     }
 
@@ -85,4 +124,54 @@ private:
     const size_t hashpower_;
 };
 
+// Allocates an array of the given size and value-initializes each element with
+// the 0-argument constructor
+template <class T, class Alloc>
+T* create_array(const size_t size) {
+    Alloc allocator;
+    T* arr = allocator.allocate(size);
+    // Initialize all the elements, safely deallocating and destroying
+    // everything in case of error.
+    size_t i;
+    try {
+        for (i = 0; i < size; ++i) {
+            allocator.construct(&arr[i]);
+        }
+    } catch (...) {
+        for (size_t j = 0; j < i; ++j) {
+            allocator.destroy(&arr[j]);
+        }
+        allocator.deallocate(arr, size);
+        throw;
+    }
+    return arr;
+}
+
+// Destroys every element of an array of the given size and then deallocates the
+// memory.
+template <class T, class Alloc>
+void destroy_array(T* arr, const size_t size) {
+    Alloc allocator;
+    for (size_t i = 0; i < size; ++i) {
+        allocator.destroy(&arr[i]);
+    }
+    allocator.deallocate(arr, size);
+}
+
+// executes the function over the given range split over num_threads threads
+template <class F>
+static void parallel_exec(size_t start, size_t end,
+                          size_t num_threads, F func) {
+    size_t work_per_thread = (end - start) / num_threads;
+    std::vector<std::thread> threads(num_threads);
+    for (size_t i = 0; i < num_threads - 1; ++i) {
+        threads[i] = std::thread(func, start, start + work_per_thread);
+        start += work_per_thread;
+    }
+    threads[num_threads - 1] = std::thread(func, start, end);
+    for (std::thread& t : threads) {
+        t.join();
+    }
+}
+
 #endif // _CUCKOOHASH_UTIL_HH
diff --git a/include/lazy_array.hh b/include/lazy_array.hh
new file mode 100644
index 0000000..80191e7
--- /dev/null
+++ b/include/lazy_array.hh
@@ -0,0 +1,119 @@
+/** \file */
+
+#ifndef _LAZY_ARRAY_HH
+#define _LAZY_ARRAY_HH
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+
+#include "cuckoohash_util.hh"
+
+// lazy array. A fixed-size array, broken up into segments that are dynamically
+// allocated, only when requested. The array size and segment size are
+// pre-defined, and are powers of two. The user must make sure the necessary
+// segments are allocated before accessing the array.
+template <uint8_t OFFSET_BITS, uint8_t SEGMENT_BITS,
+          class T, class Alloc = std::allocator<T>
+          >
+class lazy_array {
+    static_assert(SEGMENT_BITS + OFFSET_BITS <= sizeof(size_t)*8,
+                  "The number of segment and offset bits cannot exceed "
+                  " the number of bits in a size_t");
+private:
+    static const size_t SEGMENT_SIZE = 1UL << OFFSET_BITS;
+    static const size_t NUM_SEGMENTS = 1UL << SEGMENT_BITS;
+    // The segments array itself is mutable, so that the const subscript
+    // operator can still add segments
+    mutable std::array<T*, NUM_SEGMENTS> segments_;
+
+    void move_other_array(lazy_array&& arr) {
+        clear();
+        std::copy(arr.segments_.begin(), arr.segments_.end(),
+                  segments_.begin());
+        std::fill(arr.segments_.begin(), arr.segments_.end(), nullptr);
+    }
+
+    inline size_t get_segment(size_t i) {
+        return i >> OFFSET_BITS;
+    }
+
+    static const size_t OFFSET_MASK = ((1UL << OFFSET_BITS) - 1);
+    inline size_t get_offset(size_t i) {
+        return i & OFFSET_MASK;
+    }
+
+public:
+    lazy_array(): segments_{{nullptr}} {}
+
+    // No copying
+    lazy_array(const lazy_array&) = delete;
+    lazy_array& operator=(const lazy_array&) = delete;
+
+    // Moving is allowed
+    lazy_array(lazy_array&& arr) : segments_{{nullptr}} {
+        move_other_array(std::move(arr));
+    }
+    lazy_array& operator=(lazy_array&& arr) {
+        move_other_vector(std::move(arr));
+        return *this;
+    }
+
+    ~lazy_array() {
+        clear();
+    }
+
+    void clear() {
+        for (size_t i = 0; i < segments_.size(); ++i) {
+            if (segments_[i] != nullptr) {
+                destroy_array<T, Alloc>(segments_[i], SEGMENT_SIZE);
+                segments_[i] = nullptr;
+            }
+        }
+    }
+
+    T& operator[](size_t i) {
+        assert(segments_[get_segment(i)] != nullptr);
+        return segments_[get_segment(i)][get_offset(i)];
+    }
+
+    const T& operator[](size_t i) const {
+        assert(segments_[get_segment(i)] != nullptr);
+        return segments_[get_segment(i)][get_offset(i)];
+    }
+
+    // Ensures that the array has enough segments to index target elements, not
+    // exceeding the total size. The user must ensure that the array is properly
+    // allocated before accessing a certain index. This saves having to check
+    // every index operation.
+    void allocate(size_t target) {
+        assert(target <= size());
+        if (target == 0) {
+            return;
+        }
+        const size_t last_segment = get_segment(target - 1);
+        for (size_t i = 0; i <= last_segment; ++i) {
+            if (segments_[i] == nullptr) {
+                segments_[i] = create_array<T, Alloc>(SEGMENT_SIZE);
+            }
+        }
+    }
+
+    // Returns the number of elements in the array that can be indexed, starting
+    // contiguously from the beginning.
+    size_t allocated_size() const {
+        size_t num_allocated_segments = 0;
+        for (;
+             (num_allocated_segments < NUM_SEGMENTS &&
+              segments_[num_allocated_segments] != nullptr);
+             ++num_allocated_segments) {}
+        return num_allocated_segments * SEGMENT_SIZE;
+    }
+
+    static constexpr size_t size() {
+        return 1UL << (OFFSET_BITS + SEGMENT_BITS);
+    }
+};
+
+#endif // _LAZY_ARRAY_HH
diff --git a/include/make_unique.hpp b/include/make_unique.hpp
new file mode 100644
index 0000000..1ace46b
--- /dev/null
+++ b/include/make_unique.hpp
@@ -0,0 +1,45 @@
+#ifndef __PRECXX14_MAKE_UNIQUE__
+#define __PRECXX14_MAKE_UNIQUE__
+
+#if __cplusplus >= 201402L
+    #include <memory>
+    using std::make_unique
+#else
+
+#include <cstddef>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+    template<class T> struct _Unique_if {
+        typedef unique_ptr<T> _Single_object;
+    };
+
+    template<class T> struct _Unique_if<T[]> {
+        typedef unique_ptr<T[]> _Unknown_bound;
+    };
+
+    template<class T, size_t N> struct _Unique_if<T[N]> {
+        typedef void _Known_bound;
+    };
+
+    template<class T, class... Args>
+        typename _Unique_if<T>::_Single_object
+        make_unique(Args&&... args) {
+            return unique_ptr<T>(new T(std::forward<Args>(args)...));
+        }
+
+    template<class T>
+        typename _Unique_if<T>::_Unknown_bound
+        make_unique(size_t n) {
+            typedef typename remove_extent<T>::type U;
+            return unique_ptr<T>(new U[n]());
+        }
+
+    template<class T, class... Args>
+        typename _Unique_if<T>::_Known_bound
+        make_unique(Args&&...) = delete;
+
+#endif // C++11
+#endif //__PRECXX14_MAKE_UNIQUE__
+
diff --git a/include/spline.h b/include/spline.h
new file mode 100644
index 0000000..28e3dea
--- /dev/null
+++ b/include/spline.h
@@ -0,0 +1,404 @@
+/*
+ * spline.h
+ *
+ * simple cubic spline interpolation library without external
+ * dependencies
+ *
+ * ---------------------------------------------------------------------
+ * Copyright (C) 2011, 2014 Tino Kluge (ttk448 at gmail.com)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * ---------------------------------------------------------------------
+ *
+ */
+
+
+#ifndef TK_SPLINE_H
+#define TK_SPLINE_H
+
+#include <cstdio>
+#include <cassert>
+#include <vector>
+#include <algorithm>
+
+
+// unnamed namespace only because the implementation is in this
+// header file and we don't want to export symbols to the obj files
+namespace
+{
+
+namespace tk
+{
+
+// band matrix solver
+class band_matrix
+{
+private:
+    std::vector< std::vector<double> > m_upper;  // upper band
+    std::vector< std::vector<double> > m_lower;  // lower band
+public:
+    band_matrix() {};                             // constructor
+    band_matrix(int dim, int n_u, int n_l);       // constructor
+    ~band_matrix() {};                            // destructor
+    void resize(int dim, int n_u, int n_l);      // init with dim,n_u,n_l
+    int dim() const;                             // matrix dimension
+    int num_upper() const
+    {
+        return m_upper.size()-1;
+    }
+    int num_lower() const
+    {
+        return m_lower.size()-1;
+    }
+    // access operator
+    double & operator () (int i, int j);            // write
+    double   operator () (int i, int j) const;      // read
+    // we can store an additional diogonal (in m_lower)
+    double& saved_diag(int i);
+    double  saved_diag(int i) const;
+    void lu_decompose();
+    std::vector<double> r_solve(const std::vector<double>& b) const;
+    std::vector<double> l_solve(const std::vector<double>& b) const;
+    std::vector<double> lu_solve(const std::vector<double>& b,
+                                 bool is_lu_decomposed=false);
+
+};
+
+
+// spline interpolation
+class spline
+{
+public:
+    enum bd_type {
+        first_deriv = 1,
+        second_deriv = 2
+    };
+
+private:
+    std::vector<double> m_x,m_y;            // x,y coordinates of points
+    // interpolation parameters
+    // f(x) = a*(x-x_i)^3 + b*(x-x_i)^2 + c*(x-x_i) + y_i
+    std::vector<double> m_a,m_b,m_c;        // spline coefficients
+    double  m_b0, m_c0;                     // for left extrapol
+    bd_type m_left, m_right;
+    double  m_left_value, m_right_value;
+    bool    m_force_linear_extrapolation;
+
+public:
+    // set default boundary condition to be zero curvature at both ends
+    spline(): m_left(second_deriv), m_right(second_deriv),
+        m_left_value(0.0), m_right_value(0.0),
+        m_force_linear_extrapolation(false)
+    {
+        ;
+    }
+
+    // optional, but if called it has to come be before set_points()
+    void set_boundary(bd_type left, double left_value,
+                      bd_type right, double right_value,
+                      bool force_linear_extrapolation=false);
+    void set_points(const std::vector<double>& x,
+                    const std::vector<double>& y, bool cubic_spline=true);
+    double operator() (double x) const;
+};
+
+
+
+// ---------------------------------------------------------------------
+// implementation part, which could be separated into a cpp file
+// ---------------------------------------------------------------------
+
+
+// band_matrix implementation
+// -------------------------
+
+band_matrix::band_matrix(int dim, int n_u, int n_l)
+{
+    resize(dim, n_u, n_l);
+}
+void band_matrix::resize(int dim, int n_u, int n_l)
+{
+    assert(dim>0);
+    assert(n_u>=0);
+    assert(n_l>=0);
+    m_upper.resize(n_u+1);
+    m_lower.resize(n_l+1);
+    for(size_t i=0; i<m_upper.size(); i++) {
+        m_upper[i].resize(dim);
+    }
+    for(size_t i=0; i<m_lower.size(); i++) {
+        m_lower[i].resize(dim);
+    }
+}
+int band_matrix::dim() const
+{
+    if(m_upper.size()>0) {
+        return m_upper[0].size();
+    } else {
+        return 0;
+    }
+}
+
+
+// defines the new operator (), so that we can access the elements
+// by A(i,j), index going from i=0,...,dim()-1
+double & band_matrix::operator () (int i, int j)
+{
+    int k=j-i;       // what band is the entry
+    assert( (i>=0) && (i<dim()) && (j>=0) && (j<dim()) );
+    assert( (-num_lower()<=k) && (k<=num_upper()) );
+    // k=0 -> diogonal, k<0 lower left part, k>0 upper right part
+    if(k>=0)   return m_upper[k][i];
+    else	    return m_lower[-k][i];
+}
+double band_matrix::operator () (int i, int j) const
+{
+    int k=j-i;       // what band is the entry
+    assert( (i>=0) && (i<dim()) && (j>=0) && (j<dim()) );
+    assert( (-num_lower()<=k) && (k<=num_upper()) );
+    // k=0 -> diogonal, k<0 lower left part, k>0 upper right part
+    if(k>=0)   return m_upper[k][i];
+    else	    return m_lower[-k][i];
+}
+// second diag (used in LU decomposition), saved in m_lower
+double band_matrix::saved_diag(int i) const
+{
+    assert( (i>=0) && (i<dim()) );
+    return m_lower[0][i];
+}
+double & band_matrix::saved_diag(int i)
+{
+    assert( (i>=0) && (i<dim()) );
+    return m_lower[0][i];
+}
+
+// LR-Decomposition of a band matrix
+void band_matrix::lu_decompose()
+{
+    int  i_max,j_max;
+    int  j_min;
+    double x;
+
+    // preconditioning
+    // normalize column i so that a_ii=1
+    for(int i=0; i<this->dim(); i++) {
+        assert(this->operator()(i,i)!=0.0);
+        this->saved_diag(i)=1.0/this->operator()(i,i);
+        j_min=std::max(0,i-this->num_lower());
+        j_max=std::min(this->dim()-1,i+this->num_upper());
+        for(int j=j_min; j<=j_max; j++) {
+            this->operator()(i,j) *= this->saved_diag(i);
+        }
+        this->operator()(i,i)=1.0;          // prevents rounding errors
+    }
+
+    // Gauss LR-Decomposition
+    for(int k=0; k<this->dim(); k++) {
+        i_max=std::min(this->dim()-1,k+this->num_lower());  // num_lower not a mistake!
+        for(int i=k+1; i<=i_max; i++) {
+            assert(this->operator()(k,k)!=0.0);
+            x=-this->operator()(i,k)/this->operator()(k,k);
+            this->operator()(i,k)=-x;                         // assembly part of L
+            j_max=std::min(this->dim()-1,k+this->num_upper());
+            for(int j=k+1; j<=j_max; j++) {
+                // assembly part of R
+                this->operator()(i,j)=this->operator()(i,j)+x*this->operator()(k,j);
+            }
+        }
+    }
+}
+// solves Ly=b
+std::vector<double> band_matrix::l_solve(const std::vector<double>& b) const
+{
+    assert( this->dim()==(int)b.size() );
+    std::vector<double> x(this->dim());
+    int j_start;
+    double sum;
+    for(int i=0; i<this->dim(); i++) {
+        sum=0;
+        j_start=std::max(0,i-this->num_lower());
+        for(int j=j_start; j<i; j++) sum += this->operator()(i,j)*x[j];
+        x[i]=(b[i]*this->saved_diag(i)) - sum;
+    }
+    return x;
+}
+// solves Rx=y
+std::vector<double> band_matrix::r_solve(const std::vector<double>& b) const
+{
+    assert( this->dim()==(int)b.size() );
+    std::vector<double> x(this->dim());
+    int j_stop;
+    double sum;
+    for(int i=this->dim()-1; i>=0; i--) {
+        sum=0;
+        j_stop=std::min(this->dim()-1,i+this->num_upper());
+        for(int j=i+1; j<=j_stop; j++) sum += this->operator()(i,j)*x[j];
+        x[i]=( b[i] - sum ) / this->operator()(i,i);
+    }
+    return x;
+}
+
+std::vector<double> band_matrix::lu_solve(const std::vector<double>& b,
+        bool is_lu_decomposed)
+{
+    assert( this->dim()==(int)b.size() );
+    std::vector<double>  x,y;
+    if(is_lu_decomposed==false) {
+        this->lu_decompose();
+    }
+    y=this->l_solve(b);
+    x=this->r_solve(y);
+    return x;
+}
+
+
+
+
+// spline implementation
+// -----------------------
+
+void spline::set_boundary(spline::bd_type left, double left_value,
+                          spline::bd_type right, double right_value,
+                          bool force_linear_extrapolation)
+{
+    assert(m_x.size()==0);          // set_points() must not have happened yet
+    m_left=left;
+    m_right=right;
+    m_left_value=left_value;
+    m_right_value=right_value;
+    m_force_linear_extrapolation=force_linear_extrapolation;
+}
+
+
+void spline::set_points(const std::vector<double>& x,
+                        const std::vector<double>& y, bool cubic_spline)
+{
+    assert(x.size()==y.size());
+    assert(x.size()>2);
+    m_x=x;
+    m_y=y;
+    int   n=x.size();
+    // TODO: maybe sort x and y, rather than returning an error
+    for(int i=0; i<n-1; i++) {
+        assert(m_x[i]<m_x[i+1]);
+    }
+
+    if(cubic_spline==true) { // cubic spline interpolation
+        // setting up the matrix and right hand side of the equation system
+        // for the parameters b[]
+        band_matrix A(n,1,1);
+        std::vector<double>  rhs(n);
+        for(int i=1; i<n-1; i++) {
+            A(i,i-1)=1.0/3.0*(x[i]-x[i-1]);
+            A(i,i)=2.0/3.0*(x[i+1]-x[i-1]);
+            A(i,i+1)=1.0/3.0*(x[i+1]-x[i]);
+            rhs[i]=(y[i+1]-y[i])/(x[i+1]-x[i]) - (y[i]-y[i-1])/(x[i]-x[i-1]);
+        }
+        // boundary conditions
+        if(m_left == spline::second_deriv) {
+            // 2*b[0] = f''
+            A(0,0)=2.0;
+            A(0,1)=0.0;
+            rhs[0]=m_left_value;
+        } else if(m_left == spline::first_deriv) {
+            // c[0] = f', needs to be re-expressed in terms of b:
+            // (2b[0]+b[1])(x[1]-x[0]) = 3 ((y[1]-y[0])/(x[1]-x[0]) - f')
+            A(0,0)=2.0*(x[1]-x[0]);
+            A(0,1)=1.0*(x[1]-x[0]);
+            rhs[0]=3.0*((y[1]-y[0])/(x[1]-x[0])-m_left_value);
+        } else {
+            assert(false);
+        }
+        if(m_right == spline::second_deriv) {
+            // 2*b[n-1] = f''
+            A(n-1,n-1)=2.0;
+            A(n-1,n-2)=0.0;
+            rhs[n-1]=m_right_value;
+        } else if(m_right == spline::first_deriv) {
+            // c[n-1] = f', needs to be re-expressed in terms of b:
+            // (b[n-2]+2b[n-1])(x[n-1]-x[n-2])
+            // = 3 (f' - (y[n-1]-y[n-2])/(x[n-1]-x[n-2]))
+            A(n-1,n-1)=2.0*(x[n-1]-x[n-2]);
+            A(n-1,n-2)=1.0*(x[n-1]-x[n-2]);
+            rhs[n-1]=3.0*(m_right_value-(y[n-1]-y[n-2])/(x[n-1]-x[n-2]));
+        } else {
+            assert(false);
+        }
+
+        // solve the equation system to obtain the parameters b[]
+        m_b=A.lu_solve(rhs);
+
+        // calculate parameters a[] and c[] based on b[]
+        m_a.resize(n);
+        m_c.resize(n);
+        for(int i=0; i<n-1; i++) {
+            m_a[i]=1.0/3.0*(m_b[i+1]-m_b[i])/(x[i+1]-x[i]);
+            m_c[i]=(y[i+1]-y[i])/(x[i+1]-x[i])
+                   - 1.0/3.0*(2.0*m_b[i]+m_b[i+1])*(x[i+1]-x[i]);
+        }
+    } else { // linear interpolation
+        m_a.resize(n);
+        m_b.resize(n);
+        m_c.resize(n);
+        for(int i=0; i<n-1; i++) {
+            m_a[i]=0.0;
+            m_b[i]=0.0;
+            m_c[i]=(m_y[i+1]-m_y[i])/(m_x[i+1]-m_x[i]);
+        }
+    }
+
+    // for left extrapolation coefficients
+    m_b0 = (m_force_linear_extrapolation==false) ? m_b[0] : 0.0;
+    m_c0 = m_c[0];
+
+    // for the right extrapolation coefficients
+    // f_{n-1}(x) = b*(x-x_{n-1})^2 + c*(x-x_{n-1}) + y_{n-1}
+    double h=x[n-1]-x[n-2];
+    // m_b[n-1] is determined by the boundary condition
+    m_a[n-1]=0.0;
+    m_c[n-1]=3.0*m_a[n-2]*h*h+2.0*m_b[n-2]*h+m_c[n-2];   // = f'_{n-2}(x_{n-1})
+    if(m_force_linear_extrapolation==true)
+        m_b[n-1]=0.0;
+}
+
+double spline::operator() (double x) const
+{
+    size_t n=m_x.size();
+    // find the closest point m_x[idx] < x, idx=0 even if x<m_x[0]
+    std::vector<double>::const_iterator it;
+    it=std::lower_bound(m_x.begin(),m_x.end(),x);
+    int idx=std::max( int(it-m_x.begin())-1, 0);
+
+    double h=x-m_x[idx];
+    double interpol;
+    if(x<m_x[0]) {
+        // extrapolation to the left
+        interpol=(m_b0*h + m_c0)*h + m_y[0];
+    } else if(x>m_x[n-1]) {
+        // extrapolation to the right
+        interpol=(m_b[n-1]*h + m_c[n-1])*h + m_y[n-1];
+    } else {
+        // interpolation
+        interpol=((m_a[idx]*h + m_b[idx])*h + m_c[idx])*h + m_y[idx];
+    }
+    return interpol;
+}
+
+
+} // namespace tk
+
+
+} // namespace
+
+#endif /* TK_SPLINE_H */
diff --git a/scripts/ConvertBootstrapsToTSV.py b/scripts/ConvertBootstrapsToTSV.py
new file mode 100644
index 0000000..12bb878
--- /dev/null
+++ b/scripts/ConvertBootstrapsToTSV.py
@@ -0,0 +1,95 @@
+import gzip
+import struct
+import argparse
+import os
+import logging
+import logging.handlers
+import sys
+import errno
+import json
+
+# from: http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError as exc:  # Python >2.5
+        if exc.errno == errno.EEXIST and os.path.isdir(path):
+            pass
+        else:
+            raise
+
+def main(args):
+    logging.basicConfig(level=logging.INFO)
+    quantDir = args.quantDir
+    auxDir = "aux"
+
+    # Check for a custom auxDir
+    with open(os.path.sep.join([quantDir, "cmd_info.json"])) as cmdFile:
+        dat = json.load(cmdFile)
+        if 'auxDir' in dat:
+            auxDir = dat['auxDir']
+
+    bootstrapFile = os.path.sep.join([quantDir, "aux", "bootstrap", "bootstraps.gz"])
+    nameFile = os.path.sep.join([quantDir, "aux", "bootstrap", "names.tsv.gz"])
+    if not os.path.isfile(bootstrapFile):
+       logging.error("The required bootstrap file {} doesn't appear to exist".format(bootstrapFile)) 
+       sys.exit(1)
+    if not os.path.isfile(nameFile):
+       logging.error("The required transcript name file {} doesn't appear to exist".format(nameFile)) 
+       sys.exit(1)
+    
+    txpNames = None
+    with gzip.open(nameFile) as nf:
+        txpNames = nf.read().strip().split('\t')
+    
+    ntxp = len(txpNames)
+    logging.info("Expecting bootstrap info for {} transcripts".format(ntxp))
+    
+    with open(os.path.sep.join([quantDir, "aux", "meta_info.json"])) as fh:
+        meta_info = json.load(fh)
+        
+    if meta_info['samp_type'] == 'gibbs':
+        s = struct.Struct('<' + 'i' * ntxp)
+    elif meta_info['samp_type'] == 'bootstrap':
+        s = struct.Struct('@' + 'd' * ntxp)
+    else:
+        logging.error("Unknown sampling method: {}".format(meta_info['samp_type']))
+        sys.exit(1)
+        
+    numBoot = 0
+    outDir = args.outDir
+    if os.path.exists(outDir):
+        if os.path.isfile(outDir):
+            logging.error("The requested output directory {} already exists, but is a file".format(outDir))
+            sys.exit(1)
+        else:
+            logging.warn("The requested output directory {} already exists --- any existing bootstraps may be overwritten".format(outDir))
+    else:
+        mkdir_p(outDir)
+    
+    outFile = os.path.sep.join([outDir, 'quant_bootstraps.tsv'])
+    with open(outFile,'w') as ofile:
+        # write the header
+        ofile.write('\t'.join(txpNames) + '\n')
+        
+        # Now, iterate over the bootstrap samples and write each
+        with gzip.open(bootstrapFile) as bf:
+            while True:
+                try:
+                    x = s.unpack_from(bf.read(s.size))
+                    xs = map(str, x)
+                    ofile.write('\t'.join(xs) + '\n')
+                    numBoot += 1
+                except:
+                    logging.info("read all bootstrap values")
+                    break
+
+    logging.info("wrote {} bootstrap samples".format(numBoot))
+    logging.info("converted bootstraps successfully.")
+
+if __name__ == "__main__":
+   parser = argparse.ArgumentParser(description="Convert bootstrap results to text format") 
+   parser.add_argument('quantDir', type=str, help="path to (sailfish / salmon) quantification directory")
+   parser.add_argument('outDir', type=str, help="path to directory where results should be written")
+   args = parser.parse_args()
+   main(args)
diff --git a/scripts/compile.sh b/scripts/compile.sh
new file mode 100644
index 0000000..5a4005d
--- /dev/null
+++ b/scripts/compile.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+set -e
+
+branch=$1
+version=$2
+
+echo "Building salmon [branch = ${branch}]. Tagging version as ${version}"
+
+# Activate Holy Build Box environment.
+source /hbb_exe/activate
+
+set -x
+
+# Install things we need
+yum install -y --quiet wget
+wget http://download.fedoraproject.org/pub/epel/5/x86_64/epel-release-5-4.noarch.rpm
+rpm -i --quiet epel-release-5-4.noarch.rpm
+#yum install -y --quiet git
+yum install -y --quiet unzip
+yum install -y --quiet bzip2-devel.x86_64
+yum install -y --quiet xz-devel.x86_64
+
+curl -k -L https://github.com/COMBINE-lab/salmon/archive/${branch}.zip -o ${branch}.zip
+unzip ${branch}.zip
+mv salmon-${branch} salmon
+cd salmon
+mkdir build
+cd build
+cmake -DFETCH_BOOST=TRUE ..
+make
+make install
+make test
+cd ../scripts
+bash make-release.sh -v ${version} -n linux_x86_64
+cd ../RELEASES
+cp *.tar.gz /io/
diff --git a/scripts/fetchRapMap.sh b/scripts/fetchRapMap.sh
index 68f49f2..a148f05 100755
--- a/scripts/fetchRapMap.sh
+++ b/scripts/fetchRapMap.sh
@@ -17,10 +17,10 @@ if [ -d ${INSTALL_DIR}/src/rapmap ] ; then
 fi
 
 mkdir -p ${EXTERNAL_DIR}
-curl -k -L https://github.com/COMBINE-lab/RapMap/archive/large-index.zip -o ${EXTERNAL_DIR}/rapmap.zip
+curl -k -L https://github.com/COMBINE-lab/RapMap/archive/develop-salmon.zip -o ${EXTERNAL_DIR}/rapmap.zip
 rm -fr ${EXTERNAL_DIR}/RapMap
 unzip ${EXTERNAL_DIR}/rapmap.zip -d ${EXTERNAL_DIR}
-mv ${EXTERNAL_DIR}/RapMap-large-index ${EXTERNAL_DIR}/RapMap
+mv ${EXTERNAL_DIR}/RapMap-develop-salmon ${EXTERNAL_DIR}/RapMap
 
 mkdir -p ${INSTALL_DIR}/include/rapmap
 mkdir -p ${INSTALL_DIR}/src/rapmap
@@ -34,3 +34,4 @@ cp -r ${EXTERNAL_DIR}/RapMap/src/*.cpp ${INSTALL_DIR}/src/rapmap
 cp -r ${EXTERNAL_DIR}/RapMap/include/tclap ${INSTALL_DIR}/include/rapmap
 cp -r ${EXTERNAL_DIR}/RapMap/include/*.h ${INSTALL_DIR}/include/rapmap
 cp -r ${EXTERNAL_DIR}/RapMap/include/*.hpp ${INSTALL_DIR}/include/rapmap
+cp -r ${EXTERNAL_DIR}/RapMap/include/emphf ${INSTALL_DIR}/include/rapmap
diff --git a/scripts/make-release.sh b/scripts/make-release.sh
index 7014feb..77fa60c 100755
--- a/scripts/make-release.sh
+++ b/scripts/make-release.sh
@@ -33,7 +33,7 @@ echo -e "Version = ${version}"
 echo -e "Host = ${host}"
 
 # create the binary directory 
-betaname=SalmonBeta-${version}_${host}
+betaname=Salmon-${version}_${host}
 mkdir ${DIR}/../RELEASES
 mkdir ${DIR}/../RELEASES/${betaname}
 mkdir ${DIR}/../RELEASES/${betaname}/bin
diff --git a/scripts/runner.sh b/scripts/runner.sh
new file mode 100644
index 0000000..5fa6736
--- /dev/null
+++ b/scripts/runner.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+cmd="$@"
+interleaved_file=`echo $cmd | sed -n 's/.*--interleaved\s\+\(\S\+\)\s\+.*/\1/p'`
+
+if [ -z "$interleaved_file" ]
+then
+    #Run normally in this branch
+    ${@}
+else
+   new_cmd=`echo $cmd | sed 's/--interleaved\s\+\S\+\s\+//'`
+   tmpdir=$(mktemp -d)
+   # Cleanup on exit
+   trap 'rm -rf "$tmpdir"' EXIT INT TERM HUP
+   p1="$tmpdir/p1.fq"
+   p2="$tmpdir/p2.fq"
+    mkfifo $p1
+    mkfifo $p2
+    # The following interleaved to split conversion is courtesy of
+    # https://gist.github.com/nathanhaigh/3521724
+    (paste - - - - - - - - | tee >(cut -f 1-4 | tr '\t' '\n' > $p1) | cut -f 5-8 | tr '\t' '\n' > $p2) < $interleaved_file &
+    echo "Running command [${new_cmd} -1 $p1 -2 $p2]"
+    ${new_cmd} -1 $p1 -2 $p2
+fi
\ No newline at end of file
diff --git a/src/BuildSalmonIndex.cpp b/src/BuildSalmonIndex.cpp
index ca65ade..6f6ef3d 100644
--- a/src/BuildSalmonIndex.cpp
+++ b/src/BuildSalmonIndex.cpp
@@ -21,7 +21,6 @@
 #include "cereal/types/vector.hpp"
 #include "cereal/archives/binary.hpp"
 
-#include "jellyfish/config.h"
 #include "jellyfish/err.hpp"
 #include "jellyfish/misc.hpp"
 #include "jellyfish/jellyfish.hpp"
@@ -45,8 +44,9 @@
 #include "SalmonUtils.hpp"
 #include "SalmonIndex.hpp"
 #include "GenomicFeature.hpp"
+#include "spdlog/fmt/ostr.h"
+#include "spdlog/fmt/fmt.h"
 #include "spdlog/spdlog.h"
-#include "spdlog/details/format.h"
 
 using my_mer = jellyfish::mer_dna_ns::mer_base_static<uint64_t, 1>;
 
@@ -74,9 +74,10 @@ int salmonIndex(int argc, char* argv[]) {
     string indexTypeStr = "fmd";
     uint32_t saSampInterval = 1;
     uint32_t auxKmerLen = 0;
-    uint32_t maxThreads = std::thread::hardware_concurrency();
     uint32_t numThreads;
     bool useQuasi{false};
+    bool perfectHash{false};
+    bool gencodeRef{false};
 
     po::options_description generic("Command Line Options");
     generic.add_options()
@@ -86,8 +87,15 @@ int salmonIndex(int argc, char* argv[]) {
     ("kmerLen,k", po::value<uint32_t>(&auxKmerLen)->default_value(31)->required(),
                     "The size of k-mers that should be used for the quasi index.")
     ("index,i", po::value<string>()->required(), "Salmon index.")
-    ("threads,p", po::value<uint32_t>(&numThreads)->default_value(maxThreads)->required(),
+    ("gencode", po::bool_switch(&gencodeRef)->default_value(false), 
+         "This flag will expect the input transcript fasta to be in GENCODE format, and will split "
+         "the transcript name at the first \'|\' character.  These reduced names will be used in the "
+         "output and when looking for these transcripts in a gene to transcript GTF.")
+    ("threads,p", po::value<uint32_t>(&numThreads)->default_value(2)->required(),
                             "Number of threads to use (only used for computing bias features)")
+    ("perfectHash", po::bool_switch(&perfectHash)->default_value(false), 
+                             "[quasi index only] Build the index using a perfect hash rather than a dense hash.  This "
+                             "will require less memory (especially during quantification), but will take longer to construct")
     ("type", po::value<string>(&indexTypeStr)->default_value("quasi")->required(), "The type of index to build; options are \"fmd\" and \"quasi\" "
     							   			   "\"quasi\" is recommended, and \"fmd\" may be removed in the future")
     ("sasamp,s", po::value<uint32_t>(&saSampInterval)->default_value(1)->required(),
@@ -98,7 +106,7 @@ int salmonIndex(int argc, char* argv[]) {
     ;
 
     po::variables_map vm;
-    int ret = 1;
+    int ret = 0;
     try {
 
         po::store(po::command_line_parser(argc, argv).options(generic).run(), vm);
@@ -111,7 +119,7 @@ Creates a salmon index.
 )";
             std::cout << hstring << std::endl;
             std::cout << generic << std::endl;
-            std::exit(1);
+            std::exit(0);
         }
         po::notify(vm);
 
@@ -149,8 +157,8 @@ Creates a salmon index.
         auto fileSink = std::make_shared<spdlog::sinks::simple_file_sink_mt>(logPath.string(), true);
         auto consoleSink = std::make_shared<spdlog::sinks::stderr_sink_mt>();
         auto consoleLog = spdlog::create("consoleLog", {consoleSink});
-        auto fileLog = spdlog::create("fileLog", {fileSink});
-        auto jointLog = spdlog::create("jointLog", {fileSink, consoleSink});
+        auto fileLog = spdlog::create("fLog", {fileSink});
+        auto jointLog = spdlog::create("jLog", {fileSink, consoleSink});
 
         std::vector<std::string> transcriptFiles = {transcriptFile};
         fmt::MemoryWriter infostr;
@@ -174,10 +182,26 @@ Creates a salmon index.
 
             optWriter << auxKmerLen;
             argVec->push_back(optWriter.str());
+	    optWriter.clear();
+	    
             argVec->push_back("-t");
             argVec->push_back(transcriptFile);
             argVec->push_back("-i");
             argVec->push_back(outputPrefix.string());
+
+	    argVec->push_back("-x");
+	    optWriter << numThreads;
+	    argVec->push_back(optWriter.str());
+	    optWriter.clear();
+	    
+            if (perfectHash) {
+                argVec->push_back("--perfectHash");
+            }
+            if (gencodeRef) {
+                argVec->push_back("-s");
+                argVec->push_back("\"|\"");
+            }
+
             sidx.reset(new SalmonIndex(jointLog, SalmonIndexType::QUASI));
         } else {
             // Build the FMD-based index
@@ -208,11 +232,13 @@ Creates a salmon index.
         std::cerr << "exception : [" << e.what() << "]. Exiting.\n";
         std::exit(1);
     } catch (const spdlog::spdlog_ex& ex) {
-        std::cout << "logger failed with : [" << ex.what() << "]. Exiting.\n";
+        std::cerr << "logger failed with : [" << ex.what() << "]. Exiting.\n";
+        ret = 1;
     } catch (std::exception& e) {
         std::cerr << "Exception : [" << e.what() << "]\n";
         std::cerr << argv[0] << " index was invoked improperly.\n";
         std::cerr << "For usage information, try " << argv[0] << " index --help\nExiting.\n";
+        ret = 1;
     }
     return ret;
 }
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1190e95..f31a927 100755
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -4,7 +4,7 @@ ${GAT_SOURCE_DIR}/include/eigen3
 ${GAT_SOURCE_DIR}/external
 ${GAT_SOURCE_DIR}/external/cereal/include
 ${GAT_SOURCE_DIR}/external/install/include
-${GAT_SOURCE_DIR}/external/install/include/jellyfish-2.2.3
+${GAT_SOURCE_DIR}/external/install/include/jellyfish-2.2.6
 ${GAT_SOURCE_DIR}/external/install/include/bwa
 ${ZLIB_INCLUDE_DIR}
 ${TBB_INCLUDE_DIRS}
@@ -12,12 +12,26 @@ ${Boost_INCLUDE_DIRS}
 ${GAT_SOURCE_DIR}/external/install/include/rapmap
 )
 
+if (JELLYFISH_FOUND)
+    include_directories(${JELLYFISH_INCLUDE_DIR})
+else()
+    include_directories(${GAT_SOURCE_DIR}/external/install/include/jellyfish-2.2.6)
+endif()
+
 set ( SALMON_MAIN_SRCS
 QSufSort.c
 is.c
 bwt_gen.c
 bwtindex.c
 xxhash.c
+${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapFileSystem.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapSAIndexer.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapSAIndex.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapSAMapper.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapUtils.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/HitManager.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/rank9b.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/bit_array.c
 CollapsedEMOptimizer.cpp
 CollapsedGibbsSampler.cpp
 Salmon.cpp
@@ -29,14 +43,6 @@ SequenceBiasModel.cpp
 StadenUtils.cpp
 TranscriptGroup.cpp
 GZipWriter.cpp
-${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapFileSystem.cpp
-${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapSAIndexer.cpp
-${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapSAIndex.cpp
-${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapSAMapper.cpp
-${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapUtils.cpp
-${GAT_SOURCE_DIR}/external/install/src/rapmap/HitManager.cpp
-${GAT_SOURCE_DIR}/external/install/src/rapmap/rank9b.cpp
-${GAT_SOURCE_DIR}/external/install/src/rapmap/bit_array.c
 #${GAT_SOURCE_DIR}/external/install/src/rapmap/sais.c
 )
 
@@ -53,8 +59,13 @@ BWAUtils.cpp
 LibraryFormat.cpp
 GenomicFeature.cpp
 VersionChecker.cpp
+SBModel.cpp
+FastxParser.cpp
 SalmonUtils.cpp
+DistributionUtils.cpp
 SalmonStringUtils.cpp
+SimplePosBias.cpp
+SGSmooth.cpp
 )
 
 set ( UNIT_TESTS_SRCS
@@ -76,16 +87,24 @@ message("TBB_LIBRARIES = ${TBB_LIBRARIES}")
 message("Boost_LIBRARIES = ${Boost_LIBRARIES}")
 
 # Set the RPATH
-if (APPLE)
-    ## This DOES NOT do what I / any one sane, expects.  Setting the
-    ## linker path on OSX is messed up.  Just tell the user to use
-    ## DYLD_FALLBACK_LIBRARY_PATH for now
-    set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
-else()
+if (NOT APPLE)
     set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/../../lib:$ORIGIN/:$ORIGIN/../../external/install/lib")
-endif()
+    set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+else()
+  # use, i.e. don't skip the full RPATH for the build tree
+  set(CMAKE_SKIP_BUILD_RPATH  FALSE)
 
-set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+  # when building, don't use the install RPATH already
+  # (but later on when installing)
+  set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) 
+
+  # the RPATH to be used when installing
+  set(CMAKE_INSTALL_RPATH "")
+
+  # don't add the automatically determined parts of the RPATH
+  # which point to directories outside the build tree to the install RPATH
+  set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
+endif()
 
 # Build the Salmon library
 add_library(salmon_core STATIC ${SALMON_LIB_SRCS} )
@@ -101,22 +120,6 @@ add_executable(unitTests ${UNIT_TESTS_SRCS})
 
 #set_target_properties(salmon_core salmon PROPERTIES LINK_SEARCH_END_STATIC TRUE)
 
-# Grumble grumble . . . OSX
-if (APPLE)
-    # only attempt install_name_tool for tbb if we installed it
-    if (${TBB_LIBRARY_DIRS} MATCHES ${GAT_SOURCE_DIR}/external/install/lib)
-        add_custom_command(TARGET salmon
-            PRE_LINK
-            COMMAND install_name_tool -id @rpath/libtbb.dylib ${TBB_LIBRARY_DIRS}/libtbb.dylib
-            COMMAND install_name_tool -id @rpath/libtbbmalloc.dylib ${TBB_LIBRARY_DIRS}/libtbbmalloc.dylib
-        )
-    endif()
-
-else()
-    # related to complete static linking --- on hold
-    set (BOOST_THREAD_LIBRARY)
-endif()
-
 # our suffix array construction libraries
 set (SUFFARRAY_LIB ${GAT_SOURCE_DIR}/external/install/lib/libdivsufsort.a)
 set (SUFFARRAY64_LIB ${GAT_SOURCE_DIR}/external/install/lib/libdivsufsort64.a)
@@ -166,32 +169,32 @@ target_link_libraries(unitTests
     ${FAST_MALLOC_LIB}
     )
 
-##
-#  This ensures that the salmon executable should work with or without `make install`
-##
-# Grumble grumble . . . OSX
-if (APPLE)
-    # only attempt install_name_tool for tbb if we installed it
-    if (${TBB_LIBRARY_DIRS} MATCHES ${GAT_SOURCE_DIR}/external/install/lib)
-        add_custom_command(TARGET salmon
-            POST_BUILD
-            COMMAND install_name_tool -change libtbb.dylib @rpath/libtbb.dylib ${GAT_SOURCE_DIR}/build/src/salmon
-            COMMAND install_name_tool -change libtbbmalloc.dylib @rpath/libtbbmalloc.dylib ${GAT_SOURCE_DIR}/build/src/salmon
-            COMMAND install_name_tool -change libtbbmalloc_proxy.dylib @rpath/libtbbmalloc_proxy.dylib ${GAT_SOURCE_DIR}/build/src/salmon
-            COMMAND install_name_tool -add_rpath  ${GAT_SOURCE_DIR}/external/install/lib ${GAT_SOURCE_DIR}/build/src/salmon
-            )
-        add_custom_command(TARGET unitTests
-            POST_BUILD
-            COMMAND install_name_tool -change libtbb.dylib @rpath/libtbb.dylib ${GAT_SOURCE_DIR}/build/src/unitTests
-            COMMAND install_name_tool -change libtbbmalloc.dylib @rpath/libtbbmalloc.dylib ${GAT_SOURCE_DIR}/build/src/unitTests
-            COMMAND install_name_tool -change libtbbmalloc_proxy.dylib @rpath/libtbbmalloc_proxy.dylib ${GAT_SOURCE_DIR}/build/src/unitTests
-            COMMAND install_name_tool -add_rpath  ${GAT_SOURCE_DIR}/external/install/lib ${GAT_SOURCE_DIR}/build/src/unitTests
-            )
-    endif()
-else()
-    # related to complete static linking --- on hold    
-    set (BOOST_THREAD_LIBRARY)
-endif()
+### No need for this, I think
+##  This ensures that the salmon executable should work with or without `make install`
+###
+## Grumble grumble . . . OSX
+#if (APPLE)
+#    # only attempt install_name_tool for tbb if we installed it
+#    if (${TBB_LIBRARY_DIRS} MATCHES ${GAT_SOURCE_DIR}/external/install/lib)
+#        add_custom_command(TARGET salmon
+#            POST_BUILD
+#            COMMAND install_name_tool -change libtbb.dylib @rpath/libtbb.dylib ${GAT_SOURCE_DIR}/build/src/salmon
+#            COMMAND install_name_tool -change libtbbmalloc.dylib @rpath/libtbbmalloc.dylib ${GAT_SOURCE_DIR}/build/src/salmon
+#            COMMAND install_name_tool -change libtbbmalloc_proxy.dylib @rpath/libtbbmalloc_proxy.dylib ${GAT_SOURCE_DIR}/build/src/salmon
+#            COMMAND install_name_tool -add_rpath  ${GAT_SOURCE_DIR}/external/install/lib ${GAT_SOURCE_DIR}/build/src/salmon
+#            )
+#        add_custom_command(TARGET unitTests
+#            POST_BUILD
+#            COMMAND install_name_tool -change libtbb.dylib @rpath/libtbb.dylib ${GAT_SOURCE_DIR}/build/src/unitTests
+#            COMMAND install_name_tool -change libtbbmalloc.dylib @rpath/libtbbmalloc.dylib ${GAT_SOURCE_DIR}/build/src/unitTests
+#            COMMAND install_name_tool -change libtbbmalloc_proxy.dylib @rpath/libtbbmalloc_proxy.dylib ${GAT_SOURCE_DIR}/build/src/unitTests
+#            COMMAND install_name_tool -add_rpath  ${GAT_SOURCE_DIR}/external/install/lib ${GAT_SOURCE_DIR}/build/src/unitTests
+#            )
+#    endif()
+#else()
+#    # related to complete static linking --- on hold    
+#    set (BOOST_THREAD_LIBRARY)
+#endif()
 
 #if (APPLE)
 #	add_custom_command(TARGET salmon
@@ -228,8 +231,9 @@ install(TARGETS salmon salmon_core
                 ARCHIVE DESTINATION lib
         )
 
-install(TARGETS unitTests
-        RUNTIME DESTINATION tests
+add_custom_command(TARGET unitTests POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:unitTests> ${GAT_SOURCE_DIR}/tests/$<TARGET_FILE_NAME:unitTests>
+    COMMENT "Copying unitTests"
 )
 
 set(POST_INSTALL_SCRIPT ${GAT_SOURCE_DIR}/cmake/PostInstall.cmake)
diff --git a/src/CollapsedEMOptimizer.cpp b/src/CollapsedEMOptimizer.cpp
index 1ae4bab..7ce4ed7 100644
--- a/src/CollapsedEMOptimizer.cpp
+++ b/src/CollapsedEMOptimizer.cpp
@@ -1,227 +1,234 @@
-#include <vector>
-#include <unordered_map>
 #include <atomic>
+#include <unordered_map>
+#include <vector>
 
-#include "tbb/task_scheduler_init.h"
+#include "tbb/blocked_range.h"
 #include "tbb/parallel_for.h"
 #include "tbb/parallel_for_each.h"
 #include "tbb/parallel_reduce.h"
-#include "tbb/blocked_range.h"
 #include "tbb/partitioner.h"
+#include "tbb/task_scheduler_init.h"
 
 //#include "fastapprox.h"
 #include <boost/math/special_functions/digamma.hpp>
 
 // C++ string formatting library
-#include "spdlog/details/format.h"
+#include "spdlog/fmt/fmt.h"
 
-#include "cuckoohash_map.hh"
 #include "Eigen/Dense"
+#include "cuckoohash_map.hh"
 
+#include "AlignmentLibrary.hpp"
+#include "BootstrapWriter.hpp"
 #include "CollapsedEMOptimizer.hpp"
+#include "MultinomialSampler.hpp"
+#include "ReadExperiment.hpp"
+#include "ReadPair.hpp"
+#include "SalmonMath.hpp"
 #include "Transcript.hpp"
 #include "TranscriptGroup.hpp"
-#include "SalmonMath.hpp"
-#include "AlignmentLibrary.hpp"
-#include "ReadPair.hpp"
 #include "UnpairedRead.hpp"
-#include "ReadExperiment.hpp"
-#include "MultinomialSampler.hpp"
-#include "BootstrapWriter.hpp"
 
-using BlockedIndexRange =  tbb::blocked_range<size_t>;
+using BlockedIndexRange = tbb::blocked_range<size_t>;
 
 // intelligently chosen value adopted from
 // https://github.com/pachterlab/kallisto/blob/master/src/EMAlgorithm.h#L18
 constexpr double minEQClassWeight = std::numeric_limits<double>::denorm_min();
 constexpr double minWeight = std::numeric_limits<double>::denorm_min();
+// A bit more conservative of a minimum as an argument to the digamma function.
+constexpr double digammaMin = 1e-10;
 
 double normalize(std::vector<tbb::atomic<double>>& vec) {
-    double sum{0.0};
-    for (auto& v : vec) {
-        sum += v;
-    }
+  double sum{0.0};
+  for (auto& v : vec) {
+    sum += v;
+  }
 
-    // too small!
-    if (sum < minWeight) {
-        return sum;
-    }
+  // too small!
+  if (sum < minWeight) {
+    return sum;
+  }
 
-    double invSum  = 1.0 / sum;
-    for (auto& v : vec) {
-        v.store(v.load() * invSum);
-    }
+  double invSum = 1.0 / sum;
+  for (auto& v : vec) {
+    v.store(v.load() * invSum);
+  }
 
-    return sum;
+  return sum;
 }
 
-
 template <typename VecT>
 double truncateCountVector(VecT& alphas, double cutoff) {
-    // Truncate tiny expression values
-    double alphaSum = 0.0;
+  // Truncate tiny expression values
+  double alphaSum = 0.0;
 
-    for (size_t i = 0; i < alphas.size(); ++i) {
-        if (alphas[i] <= cutoff) { alphas[i] = 0.0; }
-        alphaSum += alphas[i];
+  for (size_t i = 0; i < alphas.size(); ++i) {
+    if (alphas[i] <= cutoff) {
+      alphas[i] = 0.0;
     }
-    return alphaSum;
+    alphaSum += alphas[i];
+  }
+  return alphaSum;
+}
+
+template <typename VecT>
+double truncateCountVector(VecT& alphas, std::vector<double>& cutoff) {
+  // Truncate tiny expression values
+  double alphaSum = 0.0;
+
+  for (size_t i = 0; i < alphas.size(); ++i) {
+    if (alphas[i] <= cutoff[i]) {
+      alphas[i] = 0.0;
+    }
+    alphaSum += alphas[i];
+  }
+  return alphaSum;
 }
 
 /**
  * Single-threaded EM-update routine for use in bootstrapping
  */
 template <typename VecT>
-void EMUpdate_(
-        std::vector<std::vector<uint32_t>>& txpGroupLabels,
-        std::vector<std::vector<double>>& txpGroupCombinedWeights,
-        std::vector<uint64_t>& txpGroupCounts,
-        std::vector<Transcript>& transcripts,
-        Eigen::VectorXd& effLens,
-        const VecT& alphaIn,
-        VecT& alphaOut) {
-
-    assert(alphaIn.size() == alphaOut.size());
-
-    size_t numEqClasses = txpGroupLabels.size();
-    for (size_t eqID = 0; eqID < numEqClasses; ++eqID) {
-        uint64_t count = txpGroupCounts[eqID];
-        // for each transcript in this class
-        const std::vector<uint32_t>& txps = txpGroupLabels[eqID];
-        const auto& auxs = txpGroupCombinedWeights[eqID];
-
-        double denom = 0.0;
-        size_t groupSize = txps.size();
-        // If this is a single-transcript group,
-        // then it gets the full count.  Otherwise,
-        // update according to our VBEM rule.
-        if (BOOST_LIKELY(groupSize > 1)) {
-           for (size_t i = 0; i < groupSize; ++i) {
-               auto tid = txps[i];
-               auto aux = auxs[i]; 
-               double v = alphaIn[tid] * aux;
-               denom += v;
-            }
-
-            if (denom <= ::minEQClassWeight) {
-                // tgroup.setValid(false);
-            } else {
-                double invDenom = count / denom;
-                for (size_t i = 0; i < groupSize; ++i) {
-                    auto tid = txps[i];
-                    auto aux = auxs[i]; 
-                    double v = alphaIn[tid] * aux;
-                    if (!std::isnan(v)) {
-                        salmon::utils::incLoop(alphaOut[tid], v * invDenom);
-                    }
-                }
-            }
-        } else {
-            salmon::utils::incLoop(alphaOut[txps.front()], count);
+void EMUpdate_(std::vector<std::vector<uint32_t>>& txpGroupLabels,
+               std::vector<std::vector<double>>& txpGroupCombinedWeights,
+               std::vector<uint64_t>& txpGroupCounts,
+               std::vector<Transcript>& transcripts, const VecT& alphaIn,
+               VecT& alphaOut) {
+
+  assert(alphaIn.size() == alphaOut.size());
+
+  size_t numEqClasses = txpGroupLabels.size();
+  for (size_t eqID = 0; eqID < numEqClasses; ++eqID) {
+    uint64_t count = txpGroupCounts[eqID];
+    // for each transcript in this class
+    const std::vector<uint32_t>& txps = txpGroupLabels[eqID];
+    const auto& auxs = txpGroupCombinedWeights[eqID];
+
+    double denom = 0.0;
+    size_t groupSize = txps.size();
+    // If this is a single-transcript group,
+    // then it gets the full count.  Otherwise,
+    // update according to our VBEM rule.
+    if (BOOST_LIKELY(groupSize > 1)) {
+      for (size_t i = 0; i < groupSize; ++i) {
+        auto tid = txps[i];
+        auto aux = auxs[i];
+        double v = alphaIn[tid] * aux;
+        denom += v;
+      }
+
+      if (denom <= ::minEQClassWeight) {
+        // tgroup.setValid(false);
+      } else {
+        double invDenom = count / denom;
+        for (size_t i = 0; i < groupSize; ++i) {
+          auto tid = txps[i];
+          auto aux = auxs[i];
+          double v = alphaIn[tid] * aux;
+          if (!std::isnan(v)) {
+            salmon::utils::incLoop(alphaOut[tid], v * invDenom);
+          }
         }
+      }
+    } else {
+      salmon::utils::incLoop(alphaOut[txps.front()], count);
     }
+  }
 }
 
 /**
  * Single-threaded VBEM-update routine for use in bootstrapping
  */
 template <typename VecT>
-void VBEMUpdate_(
-		std::vector<std::vector<uint32_t>>& txpGroupLabels,
-		std::vector<std::vector<double>>& txpGroupCombinedWeights,
-		std::vector<uint64_t>& txpGroupCounts,
-		std::vector<Transcript>& transcripts,
-		Eigen::VectorXd& effLens,
-		double priorAlpha,
-		double totLen,
-		const VecT& alphaIn,
-		VecT& alphaOut,
-		VecT& expTheta) {
-
-	assert(alphaIn.size() == alphaOut.size());
-
-	size_t numEQClasses = txpGroupLabels.size();
-	double alphaSum = {0.0};
-	for (auto& e : alphaIn) { alphaSum += e; }
-
-	double logNorm = boost::math::digamma(alphaSum);
-
-
-	double prior = priorAlpha;
-	double priorNorm = prior * totLen;
-
-	for (size_t i = 0; i < transcripts.size(); ++i) {
-	  if (alphaIn[i] > ::minWeight) {
-	    expTheta[i] = std::exp(boost::math::digamma(alphaIn[i]) - logNorm);
-	  } else {
-	    expTheta[i] = 0.0;
-	  }
-	  alphaOut[i] = prior;
-	}
-
-	for (size_t eqID = 0; eqID < numEQClasses; ++eqID) {
-	  uint64_t count = txpGroupCounts[eqID];
-	  const std::vector<uint32_t>& txps = txpGroupLabels[eqID];
-	  const auto& auxs = txpGroupCombinedWeights[eqID];
-
-	  double denom = 0.0;
-	  size_t groupSize = txps.size();
-	  // If this is a single-transcript group,
-	  // then it gets the full count.  Otherwise,
-	  // update according to our VBEM rule.
-	  if (BOOST_LIKELY(groupSize > 1)) {
-	    for (size_t i = 0; i < groupSize; ++i) {
-	      auto tid = txps[i];
-	      auto aux = auxs[i]; 
-	      if (expTheta[tid] > 0.0) {
-		double v = expTheta[tid] * aux;
-		denom += v;
-	      }
-	    }
-	    if (denom <= ::minEQClassWeight) {
-	      // tgroup.setValid(false);
-	    } else {
-	      double invDenom = count / denom;
-	      for (size_t i = 0; i < groupSize; ++i) {
-		auto tid = txps[i];
-		auto aux = auxs[i];
-		if (expTheta[tid] > 0.0) {
-		  double v = expTheta[tid] * aux;
-		  salmon::utils::incLoop(alphaOut[tid], v * invDenom);
-		}
-	      }
-	    }
-
-	  } else {
-	    salmon::utils::incLoop(alphaOut[txps.front()], count);
-	  }
-	}
-}
+void VBEMUpdate_(std::vector<std::vector<uint32_t>>& txpGroupLabels,
+                 std::vector<std::vector<double>>& txpGroupCombinedWeights,
+                 std::vector<uint64_t>& txpGroupCounts,
+                 std::vector<Transcript>& transcripts, std::vector<double>& priorAlphas,
+                 double totLen, const VecT& alphaIn, VecT& alphaOut,
+                 VecT& expTheta) {
+
+  assert(alphaIn.size() == alphaOut.size());
+
+  size_t numEQClasses = txpGroupLabels.size();
+  double alphaSum = {0.0};
+  for (auto& e : alphaIn) {
+    alphaSum += e;
+  }
+
+  double logNorm = boost::math::digamma(alphaSum);
+
+  //double prior = priorAlpha;
+
+  for (size_t i = 0; i < transcripts.size(); ++i) {
+    if (alphaIn[i] > ::digammaMin) {
+      expTheta[i] = std::exp(boost::math::digamma(alphaIn[i]) - logNorm);
+    } else {
+      expTheta[i] = 0.0;
+    }
+    alphaOut[i] = priorAlphas[i];
+  }
+
+  for (size_t eqID = 0; eqID < numEQClasses; ++eqID) {
+    uint64_t count = txpGroupCounts[eqID];
+    const std::vector<uint32_t>& txps = txpGroupLabels[eqID];
+    const auto& auxs = txpGroupCombinedWeights[eqID];
+
+    double denom = 0.0;
+    size_t groupSize = txps.size();
+    // If this is a single-transcript group,
+    // then it gets the full count.  Otherwise,
+    // update according to our VBEM rule.
+    if (BOOST_LIKELY(groupSize > 1)) {
+      for (size_t i = 0; i < groupSize; ++i) {
+        auto tid = txps[i];
+        auto aux = auxs[i];
+        if (expTheta[tid] > 0.0) {
+          double v = expTheta[tid] * aux;
+          denom += v;
+        }
+      }
+      if (denom <= ::minEQClassWeight) {
+        // tgroup.setValid(false);
+      } else {
+        double invDenom = count / denom;
+        for (size_t i = 0; i < groupSize; ++i) {
+          auto tid = txps[i];
+          auto aux = auxs[i];
+          if (expTheta[tid] > 0.0) {
+            double v = expTheta[tid] * aux;
+            salmon::utils::incLoop(alphaOut[tid], v * invDenom);
+          }
+        }
+      }
 
+    } else {
+      salmon::utils::incLoop(alphaOut[txps.front()], count);
+    }
+  }
+}
 
 /*
  * Use the "standard" EM algorithm over equivalence
  * classes to estimate the latent variables (alphaOut)
  * given the current estimates (alphaIn).
  */
-void EMUpdate_(
-        std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
-        std::vector<Transcript>& transcripts,
-        Eigen::VectorXd& effLens,
-        const CollapsedEMOptimizer::VecType& alphaIn,
-        CollapsedEMOptimizer::VecType& alphaOut) {
-
-    assert(alphaIn.size() == alphaOut.size());
-
-    tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(eqVec.size())),
-            [&eqVec, &alphaIn, &effLens, &alphaOut](const BlockedIndexRange& range) -> void {
-            for (auto eqID : boost::irange(range.begin(), range.end())) {
-            auto& kv = eqVec[eqID];
-
-            uint64_t count = kv.second.count;
-            // for each transcript in this class
-            const TranscriptGroup& tgroup = kv.first;
-            if (tgroup.valid) {
+void EMUpdate_(std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
+               std::vector<Transcript>& transcripts,
+               const CollapsedEMOptimizer::VecType& alphaIn,
+               CollapsedEMOptimizer::VecType& alphaOut) {
+
+  assert(alphaIn.size() == alphaOut.size());
+
+  tbb::parallel_for(
+      BlockedIndexRange(size_t(0), size_t(eqVec.size())),
+      [&eqVec, &alphaIn, &alphaOut](const BlockedIndexRange& range) -> void {
+        for (auto eqID : boost::irange(range.begin(), range.end())) {
+          auto& kv = eqVec[eqID];
+
+          uint64_t count = kv.second.count;
+          // for each transcript in this class
+          const TranscriptGroup& tgroup = kv.first;
+          if (tgroup.valid) {
             const std::vector<uint32_t>& txps = tgroup.txps;
             const auto& auxs = kv.second.combinedWeights;
 
@@ -231,33 +238,32 @@ void EMUpdate_(
             // then it gets the full count.  Otherwise,
             // update according to our VBEM rule.
             if (BOOST_LIKELY(groupSize > 1)) {
-            for (size_t i = 0; i < groupSize; ++i) {
-            auto tid = txps[i];
-            auto aux = auxs[i]; 
-            double v = alphaIn[tid] * aux;
-            denom += v;
-            }
+              for (size_t i = 0; i < groupSize; ++i) {
+                auto tid = txps[i];
+                auto aux = auxs[i];
+                double v = alphaIn[tid] * aux;
+                denom += v;
+              }
 
-            if (denom <= ::minEQClassWeight) {
+              if (denom <= ::minEQClassWeight) {
                 // tgroup.setValid(false);
-            } else {
+              } else {
                 double invDenom = count / denom;
                 for (size_t i = 0; i < groupSize; ++i) {
-                    auto tid = txps[i];
-                    auto aux = auxs[i]; 
-                    double v = alphaIn[tid] * aux;
-                    if (!std::isnan(v)) {
-                        salmon::utils::incLoop(alphaOut[tid], v * invDenom);
-                    }
+                  auto tid = txps[i];
+                  auto aux = auxs[i];
+                  double v = alphaIn[tid] * aux;
+                  if (!std::isnan(v)) {
+                    salmon::utils::incLoop(alphaOut[tid], v * invDenom);
+                  }
                 }
-            }
+              }
             } else {
-                salmon::utils::incLoop(alphaOut[txps.front()], count);
-            }
+              salmon::utils::incLoop(alphaOut[txps.front()], count);
             }
-    }
-    });
-
+          }
+        }
+      });
 }
 
 /*
@@ -265,694 +271,765 @@ void EMUpdate_(
  * classes to estimate the latent variables (alphaOut)
  * given the current estimates (alphaIn).
  */
-void VBEMUpdate_(
-        std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
-        std::vector<Transcript>& transcripts,
-        Eigen::VectorXd& effLens,
-        double priorAlpha,
-        double totLen,
-        const CollapsedEMOptimizer::VecType& alphaIn,
-        CollapsedEMOptimizer::VecType& alphaOut,
-	    CollapsedEMOptimizer::VecType& expTheta) {
-
-    assert(alphaIn.size() == alphaOut.size());
-
-    double alphaSum = {0.0};
-    for (auto& e : alphaIn) { alphaSum += e; }
-
-    double logNorm = boost::math::digamma(alphaSum);
-
-    tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcripts.size())),
-            [logNorm, priorAlpha, totLen, &effLens, &alphaIn,
-             &alphaOut, &expTheta]( const BlockedIndexRange& range) -> void {
-
-             double prior = priorAlpha;
-             double priorNorm = prior * totLen;
-
-             for (auto i : boost::irange(range.begin(), range.end())) {
-                if (alphaIn[i] > ::minWeight) {
-                    expTheta[i] = std::exp(boost::math::digamma(alphaIn[i].load()) - logNorm);
-                } else {
-                    expTheta[i] = 0.0;
-                }
-                alphaOut[i] = prior;
-            }
-        });
-
-    tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(eqVec.size())),
-            [&eqVec, &alphaIn,
-             &alphaOut, &effLens, 
-	     &expTheta]( const BlockedIndexRange& range) -> void {
-            for (auto eqID : boost::irange(range.begin(), range.end())) {
-            auto& kv = eqVec[eqID];
-
-            uint64_t count = kv.second.count;
-            // for each transcript in this class
-            const TranscriptGroup& tgroup = kv.first;
-            if (tgroup.valid) {
-                const std::vector<uint32_t>& txps = tgroup.txps;
-                const auto& auxs = kv.second.combinedWeights;
-
-                double denom = 0.0;
-                size_t groupSize = txps.size();
-                // If this is a single-transcript group,
-                // then it gets the full count.  Otherwise,
-                // update according to our VBEM rule.
-                if (BOOST_LIKELY(groupSize > 1)) {
-                    for (size_t i = 0; i < groupSize; ++i) {
-                        auto tid = txps[i];
-                        auto aux = auxs[i]; 
-                        if (expTheta[tid] > 0.0) {
-                            double v = expTheta[tid] * aux;
-                            denom += v;
-                       }
-                    }
-                    if (denom <= ::minEQClassWeight) {
-                        // tgroup.setValid(false);
-                    } else {
-                        double invDenom = count / denom;
-                        for (size_t i = 0; i < groupSize; ++i) {
-                            auto tid = txps[i];
-                            auto aux = auxs[i];
-                            if (expTheta[tid] > 0.0) {
-                              double v = expTheta[tid] * aux;
-			      salmon::utils::incLoop(alphaOut[tid], v * invDenom);
-                            }
+void VBEMUpdate_(std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
+                 std::vector<Transcript>& transcripts, std::vector<double>& priorAlphas,
+                 double totLen, const CollapsedEMOptimizer::VecType& alphaIn,
+                 CollapsedEMOptimizer::VecType& alphaOut,
+                 CollapsedEMOptimizer::VecType& expTheta) {
+
+  assert(alphaIn.size() == alphaOut.size());
+
+  double alphaSum = {0.0};
+  for (auto& e : alphaIn) {
+    alphaSum += e;
+  }
+
+  double logNorm = boost::math::digamma(alphaSum);
+
+  tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcripts.size())),
+                    [logNorm, totLen, &priorAlphas, &alphaIn, &alphaOut,
+                     &expTheta](const BlockedIndexRange& range) -> void {
+
+                      //double prior = priorAlpha;
+
+                      for (auto i : boost::irange(range.begin(), range.end())) {
+                        if (alphaIn[i] > ::digammaMin) {
+                          expTheta[i] =
+                              std::exp(boost::math::digamma(alphaIn[i].load()) -
+                                       logNorm);
+                        } else {
+                          expTheta[i] = 0.0;
                         }
-                    }
+                        //alphaOut[i] = prior * transcripts[i].RefLength;
+                        alphaOut[i] = priorAlphas[i];
+                      }
+                    });
+
+  tbb::parallel_for(
+      BlockedIndexRange(size_t(0), size_t(eqVec.size())),
+      [&eqVec, &alphaIn, &alphaOut,
+       &expTheta](const BlockedIndexRange& range) -> void {
+        for (auto eqID : boost::irange(range.begin(), range.end())) {
+          auto& kv = eqVec[eqID];
+
+          uint64_t count = kv.second.count;
+          // for each transcript in this class
+          const TranscriptGroup& tgroup = kv.first;
+          if (tgroup.valid) {
+            const std::vector<uint32_t>& txps = tgroup.txps;
+            const auto& auxs = kv.second.combinedWeights;
 
-                } else {
-                    salmon::utils::incLoop(alphaOut[txps.front()], count);
+            double denom = 0.0;
+            size_t groupSize = txps.size();
+            // If this is a single-transcript group,
+            // then it gets the full count.  Otherwise,
+            // update according to our VBEM rule.
+            if (BOOST_LIKELY(groupSize > 1)) {
+              for (size_t i = 0; i < groupSize; ++i) {
+                auto tid = txps[i];
+                auto aux = auxs[i];
+                if (expTheta[tid] > 0.0) {
+                  double v = expTheta[tid] * aux;
+                  denom += v;
                 }
-            }
-        }});
-
-}
+              }
+              if (denom <= ::minEQClassWeight) {
+                // tgroup.setValid(false);
+              } else {
+                double invDenom = count / denom;
+                for (size_t i = 0; i < groupSize; ++i) {
+                  auto tid = txps[i];
+                  auto aux = auxs[i];
+                  if (expTheta[tid] > 0.0) {
+                    double v = expTheta[tid] * aux;
+                    salmon::utils::incLoop(alphaOut[tid], v * invDenom);
+                  }
+                }
+              }
 
-template <typename VecT>
-size_t markDegenerateClasses(
-        std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
-        VecT& alphaIn,
-        Eigen::VectorXd& effLens,
-        std::shared_ptr<spdlog::logger> jointLog,
-        bool verbose=false) {
-
-    size_t numDropped{0};
-    size_t idx{0};
-    for (auto& kv : eqVec) {
-        uint64_t count = kv.second.count;
-        // for each transcript in this class
-        const TranscriptGroup& tgroup = kv.first;
-        const std::vector<uint32_t>& txps = tgroup.txps;
-        const auto& auxs = kv.second.combinedWeights;
-
-        double denom = 0.0;
-        for (size_t i = 0; i < txps.size(); ++i) {
-            auto tid = txps[i];
-            auto aux = auxs[i]; 
-            double v = alphaIn[tid] * aux;
-            if (!std::isnan(v)) {
-                denom += v;
             } else {
-                std::cerr << "val is NAN; alpha( "
-                          << tid << " ) = " << alphaIn[tid]
-                          << ", aux = " << aux << "\n";
+              salmon::utils::incLoop(alphaOut[txps.front()], count);
             }
+          }
         }
-        if (denom <= minEQClassWeight) {
-            fmt::MemoryWriter errstream;
-
-            errstream << "\nDropping weighted eq class\n";
-            errstream << "============================\n";
-
-            errstream << "denom = 0, count = " << count << "\n";
-            errstream << "class = { ";
-            for (auto e : txps) {
-                errstream << e << " ";
-            }
-            errstream << "}\n";
-            errstream << "alphas = { ";
-            for (auto e : txps) {
-                errstream << alphaIn[e] << " ";
-            }
-            errstream << "}\n";
-            errstream << "weights = { ";
-            for (auto e : auxs) {
-                errstream << e << " ";
-            }
-            errstream << "}\n";
-            errstream << "============================\n\n";
+      });
+}
 
-            bool verbose{false};
-            if (verbose) {
-                jointLog->info(errstream.str());
-            }
-            ++numDropped;
-            kv.first.setValid(false);
-        }
+template <typename VecT>
+size_t markDegenerateClasses(
+    std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
+    VecT& alphaIn, Eigen::VectorXd& effLens,
+    std::shared_ptr<spdlog::logger> jointLog, bool verbose = false) {
+
+  size_t numDropped{0};
+  size_t idx{0};
+  for (auto& kv : eqVec) {
+    uint64_t count = kv.second.count;
+    // for each transcript in this class
+    const TranscriptGroup& tgroup = kv.first;
+    const std::vector<uint32_t>& txps = tgroup.txps;
+    const auto& auxs = kv.second.combinedWeights;
+
+    double denom = 0.0;
+    for (size_t i = 0; i < txps.size(); ++i) {
+      auto tid = txps[i];
+      auto aux = auxs[i];
+      double v = alphaIn[tid] * aux;
+      if (!std::isnan(v)) {
+        denom += v;
+      } else {
+        std::cerr << "val is NAN; alpha( " << tid << " ) = " << alphaIn[tid]
+                  << ", aux = " << aux << "\n";
+      }
     }
-    return numDropped;
+    if (denom <= minEQClassWeight) {
+      fmt::MemoryWriter errstream;
+
+      errstream << "\nDropping weighted eq class\n";
+      errstream << "============================\n";
+
+      errstream << "denom = 0, count = " << count << "\n";
+      errstream << "class = { ";
+      for (auto e : txps) {
+        errstream << e << " ";
+      }
+      errstream << "}\n";
+      errstream << "alphas = { ";
+      for (auto e : txps) {
+        errstream << alphaIn[e] << " ";
+      }
+      errstream << "}\n";
+      errstream << "weights = { ";
+      for (auto e : auxs) {
+        errstream << e << " ";
+      }
+      errstream << "}\n";
+      errstream << "============================\n\n";
+
+      bool verbose{false};
+      if (verbose) {
+        jointLog->info(errstream.str());
+      }
+      ++numDropped;
+      kv.first.setValid(false);
+    }
+  }
+  return numDropped;
 }
 
-
 CollapsedEMOptimizer::CollapsedEMOptimizer() {}
 
-
 bool doBootstrap(
-        std::vector<std::vector<uint32_t>>& txpGroups,
-        std::vector<std::vector<double>>& txpGroupCombinedWeights,
-        std::vector<Transcript>& transcripts,
-        Eigen::VectorXd& effLens,
-        std::vector<double>& sampleWeights,
-        uint64_t totalNumFrags,
-        uint64_t numMappedFrags,
-        double uniformTxpWeight,
-        std::atomic<uint32_t>& bsNum,
-        SalmonOpts& sopt,
-        std::function<bool(const std::vector<double>&)>& writeBootstrap,
-        double relDiffTolerance,
-        uint32_t maxIter) {
-
-    uint32_t minIter = 50;
-
-    // Determine up front if we're going to use scaled counts.
-    bool useScaledCounts = !(sopt.useQuasi or sopt.allowOrphans);
-    bool useVBEM{sopt.useVBOpt};
-    size_t numClasses = txpGroups.size();
-    CollapsedEMOptimizer::SerialVecType alphas(transcripts.size(), 0.0);
-    CollapsedEMOptimizer::SerialVecType alphasPrime(transcripts.size(), 0.0);
-    CollapsedEMOptimizer::SerialVecType expTheta(transcripts.size(), 0.0);
-    std::vector<uint64_t> sampCounts(numClasses, 0);
-
-    uint32_t numBootstraps = sopt.numBootstraps;
-
-    auto& jointLog = sopt.jointLog;
-
-    std::random_device rd;
-    MultinomialSampler msamp(rd);
-
-    while (bsNum++ < numBootstraps) {
-        // Do a new bootstrap
-        msamp(sampCounts.begin(), totalNumFrags, numClasses, sampleWeights.begin());
-
-	double totalLen{0.0};
-        for (size_t i = 0; i < transcripts.size(); ++i) {
-            alphas[i] = transcripts[i].getActive() ? uniformTxpWeight * totalNumFrags : 0.0;
-            totalLen += effLens(i);
-        }
+    std::vector<std::vector<uint32_t>>& txpGroups,
+    std::vector<std::vector<double>>& txpGroupCombinedWeights,
+    std::vector<Transcript>& transcripts, Eigen::VectorXd& effLens,
+    std::vector<double>& sampleWeights, uint64_t totalNumFrags,
+    uint64_t numMappedFrags, double uniformTxpWeight,
+    std::atomic<uint32_t>& bsNum, SalmonOpts& sopt,
+    std::vector<double>& priorAlphas,
+    std::function<bool(const std::vector<double>&)>& writeBootstrap,
+    double relDiffTolerance, uint32_t maxIter) {
+
+  uint32_t minIter = 50;
+
+  // Determine up front if we're going to use scaled counts.
+  bool useScaledCounts = !(sopt.useQuasi or sopt.allowOrphans);
+  bool useVBEM{sopt.useVBOpt};
+  size_t numClasses = txpGroups.size();
+  CollapsedEMOptimizer::SerialVecType alphas(transcripts.size(), 0.0);
+  CollapsedEMOptimizer::SerialVecType alphasPrime(transcripts.size(), 0.0);
+  CollapsedEMOptimizer::SerialVecType expTheta(transcripts.size(), 0.0);
+  std::vector<uint64_t> sampCounts(numClasses, 0);
+
+  uint32_t numBootstraps = sopt.numBootstraps;
+  bool perTranscriptPrior{sopt.perTranscriptPrior};
+
+  auto& jointLog = sopt.jointLog;
+
+  std::random_device rd;
+  MultinomialSampler msamp(rd);
+
+  while (bsNum++ < numBootstraps) {
+    // Do a new bootstrap
+    msamp(sampCounts.begin(), totalNumFrags, numClasses, sampleWeights.begin());
 
-        bool converged{false};
-        double maxRelDiff = -std::numeric_limits<double>::max();
-        size_t itNum = 0;
-
-        // If we use VBEM, we'll need the prior parameters
-        double priorAlpha = 0.01;
-        double minAlpha = 1e-8;
-        double alphaCheckCutoff = 1e-2;
-        double cutoff = (useVBEM) ? (priorAlpha + minAlpha) : minAlpha;
-
-        while (itNum < minIter or (itNum < maxIter and !converged)) {
-
-            if (useVBEM) {
-                VBEMUpdate_(txpGroups, txpGroupCombinedWeights, sampCounts, transcripts,
-                        effLens, priorAlpha, totalLen, alphas, alphasPrime, expTheta);
-            } else {
-                EMUpdate_(txpGroups, txpGroupCombinedWeights, sampCounts, transcripts,
-                        effLens, alphas, alphasPrime);
-            }
+    double totalLen{0.0};
+    for (size_t i = 0; i < transcripts.size(); ++i) {
+      alphas[i] =
+          transcripts[i].getActive() ? uniformTxpWeight * totalNumFrags : 0.0;
+      totalLen += effLens(i);
+    }
 
-            converged = true;
-            maxRelDiff = -std::numeric_limits<double>::max();
-            for (size_t i = 0; i < transcripts.size(); ++i) {
-                if (alphasPrime[i] > alphaCheckCutoff) {
-                    double relDiff = std::abs(alphas[i] - alphasPrime[i]) / alphasPrime[i];
-                    maxRelDiff = (relDiff > maxRelDiff) ? relDiff : maxRelDiff;
-                    if (relDiff > relDiffTolerance) {
-                        converged = false;
-                    }
-                }
-                alphas[i] = alphasPrime[i];
-                alphasPrime[i] = 0.0;
-            }
+    bool converged{false};
+    double maxRelDiff = -std::numeric_limits<double>::max();
+    size_t itNum = 0;
 
-            ++itNum;
-        }
+    // If we use VBEM, we'll need the prior parameters
+    //double priorAlpha = 1.00;
+    double minAlpha = 1e-8;
+    double alphaCheckCutoff = 1e-2;
+    double cutoff = minAlpha;
 
-        double alphaSum = truncateCountVector(alphas, cutoff);
+    while (itNum < minIter or (itNum < maxIter and !converged)) {
 
-        if (alphaSum < minWeight) {
-            jointLog->error("Total alpha weight was too small! "
-                    "Make sure you ran salmon correclty.");
-            return false;
+      if (useVBEM) {
+        VBEMUpdate_(txpGroups, txpGroupCombinedWeights, sampCounts, transcripts,
+                    priorAlphas, totalLen, alphas, alphasPrime, expTheta);
+      } else {
+        EMUpdate_(txpGroups, txpGroupCombinedWeights, sampCounts, transcripts,
+                  alphas, alphasPrime);
+      }
+
+      converged = true;
+      maxRelDiff = -std::numeric_limits<double>::max();
+      for (size_t i = 0; i < transcripts.size(); ++i) {
+        if (alphasPrime[i] > alphaCheckCutoff) {
+          double relDiff =
+              std::abs(alphas[i] - alphasPrime[i]) / alphasPrime[i];
+          maxRelDiff = (relDiff > maxRelDiff) ? relDiff : maxRelDiff;
+          if (relDiff > relDiffTolerance) {
+            converged = false;
+          }
         }
+        alphas[i] = alphasPrime[i];
+        alphasPrime[i] = 0.0;
+      }
 
-        if (useScaledCounts) {
-            double mappedFragsDouble = static_cast<double>(numMappedFrags);
-            double alphaSum = 0.0;
-            for (auto a : alphas) { alphaSum += a; }
-            if (alphaSum > ::minWeight) {
-                double scaleFrac = 1.0 / alphaSum;
-                // scaleFrac converts alpha to nucleotide fraction,
-                // and multiplying by numMappedFrags scales by the total
-                // number of mapped fragments to provide an estimated count.
-                for (auto& a : alphas) { a = mappedFragsDouble * (a * scaleFrac); }
-            } else { // This shouldn't happen!
-                sopt.jointLog->error("Bootstrap had insufficient number of fragments!"
-                                     "Something is probably wrong; please check that you "
-                                     "have run salmon correctly and report this to GitHub.");
-            }
-        }
-        writeBootstrap(alphas);
+      ++itNum;
     }
-    return true;
-}
 
-template <typename ExpT>
-bool CollapsedEMOptimizer::gatherBootstraps(
-        ExpT& readExp,
-        SalmonOpts& sopt,
-        std::function<bool(const std::vector<double>&)>& writeBootstrap,
-        double relDiffTolerance,
-        uint32_t maxIter) {
-
-    std::vector<Transcript>& transcripts = readExp.transcripts();
-    using VecT = CollapsedEMOptimizer::SerialVecType;
-    // With atomics
-    VecT alphas(transcripts.size(), 0.0);
-    VecT alphasPrime(transcripts.size(), 0.0);
-    VecT expTheta(transcripts.size());
-    Eigen::VectorXd effLens(transcripts.size());
-
-    bool scaleCounts = (!sopt.useQuasi and !sopt.allowOrphans);
-
-    auto& fragStartDists = readExp.fragmentStartPositionDistributions();
-    uint64_t numMappedFrags = scaleCounts ? readExp.upperBoundHits() : readExp.numMappedFragments();
-
-    uint32_t numBootstraps = sopt.numBootstraps;
-
-    std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec =
-        readExp.equivalenceClassBuilder().eqVec();
-
-    std::unordered_set<uint32_t> activeTranscriptIDs;
-    for (auto& kv : eqVec) {
-        auto& tg = kv.first;
-        for (auto& t : tg.txps) {
-            transcripts[t].setActive();
-            activeTranscriptIDs.insert(t);
+    // Truncate tiny expression values
+    double alphaSum = 0.0;
+    if (useVBEM and !perTranscriptPrior) {
+        std::vector<double> cutoffs(transcripts.size(), 0.0);
+        for (size_t i = 0; i < transcripts.size(); ++i) {
+            cutoffs[i] = priorAlphas[i] + minAlpha;
         }
+        //alphaSum = truncateCountVector(alphas, cutoffs);
+        alphaSum = truncateCountVector(alphas, cutoffs);
+    } else {
+        // Truncate tiny expression values
+        alphaSum = truncateCountVector(alphas, cutoff);
     }
 
-    bool useVBEM{sopt.useVBOpt};
-    // If we use VBEM, we'll need the prior parameters
-    double priorAlpha = 0.01;
-
-    auto jointLog = sopt.jointLog;
-
-    jointLog->info("Will draw {} bootstrap samples", numBootstraps);
-    jointLog->info("Optimizing over {} equivalence classes", eqVec.size());
-
-    double totalNumFrags{static_cast<double>(readExp.numMappedFragments())};
-    double totalLen{0.0};
-
-    if (activeTranscriptIDs.size() == 0) {
-        jointLog->error("It seems that no transcripts are expressed; something is likely wrong!");
-        std::exit(1);
-    }
 
-    double scale = 1.0 / activeTranscriptIDs.size();
-    for (size_t i = 0; i < transcripts.size(); ++i) {
-        //double m = transcripts[i].mass(false);
-        alphas[i] = transcripts[i].getActive() ? scale * totalNumFrags : 0.0;
-        effLens(i) = (sopt.noEffectiveLengthCorrection) ?
-                      transcripts[i].RefLength :
-					  std::exp(transcripts[i].getCachedLogEffectiveLength());
-        totalLen += effLens(i);
+    if (alphaSum < minWeight) {
+      jointLog->error("Total alpha weight was too small! "
+                      "Make sure you ran salmon correclty.");
+      return false;
     }
 
-    auto numRemoved = markDegenerateClasses(eqVec, alphas, effLens, sopt.jointLog);
-    sopt.jointLog->info("Marked {} weighted equivalence classes as degenerate",
-            numRemoved);
-
-    size_t itNum{0};
-    double minAlpha = 1e-8;
-    double cutoff = (useVBEM) ? (priorAlpha + minAlpha) : minAlpha;
-
-    // Since we will use the same weights and transcript groups for each
-    // of the bootstrap samples (only the count vector will change), it
-    // makes sense to keep only one copy of these.
-    using TGroupLabelT = std::vector<uint32_t>;
-    using TGroupWeightVec = std::vector<double>;
-    std::vector<TGroupLabelT> txpGroups;
-    std::vector<TGroupWeightVec> txpGroupCombinedWeights;
-    std::vector<uint64_t> origCounts;
-    uint64_t totalCount{0};
-
-    for (auto& kv : eqVec) {
-        uint64_t count = kv.second.count;
-        // for each transcript in this class
-        const TranscriptGroup& tgroup = kv.first;
-        if (tgroup.valid) {
-            const std::vector<uint32_t>& txps = tgroup.txps;
-            const auto& auxs = kv.second.combinedWeights;
-            txpGroups.push_back(txps);
-	    // Convert to non-atomic
-            txpGroupCombinedWeights.emplace_back(auxs.begin(), auxs.end());
-            origCounts.push_back(count);
-            totalCount += count;
+    if (useScaledCounts) {
+      double mappedFragsDouble = static_cast<double>(numMappedFrags);
+      double alphaSum = 0.0;
+      for (auto a : alphas) {
+        alphaSum += a;
+      }
+      if (alphaSum > ::minWeight) {
+        double scaleFrac = 1.0 / alphaSum;
+        // scaleFrac converts alpha to nucleotide fraction,
+        // and multiplying by numMappedFrags scales by the total
+        // number of mapped fragments to provide an estimated count.
+        for (auto& a : alphas) {
+          a = mappedFragsDouble * (a * scaleFrac);
         }
+      } else { // This shouldn't happen!
+        sopt.jointLog->error(
+            "Bootstrap had insufficient number of fragments!"
+            "Something is probably wrong; please check that you "
+            "have run salmon correctly and report this to GitHub.");
+      }
     }
+    writeBootstrap(alphas);
+  }
+  return true;
+}
 
-    double floatCount = totalCount;
-    std::vector<double> samplingWeights(txpGroups.size(), 0.0);
-    for (size_t i = 0; i < origCounts.size(); ++i) {
-        samplingWeights[i] = origCounts[i] / floatCount;
-    }
-
-    size_t numWorkerThreads{1};
-    if (sopt.numThreads > 1 and numBootstraps > 1) {
-        numWorkerThreads = std::min(sopt.numThreads - 1, numBootstraps - 1);
+template <typename ExpT>
+bool CollapsedEMOptimizer::gatherBootstraps(
+    ExpT& readExp, SalmonOpts& sopt,
+    std::function<bool(const std::vector<double>&)>& writeBootstrap,
+    double relDiffTolerance, uint32_t maxIter) {
+
+  std::vector<Transcript>& transcripts = readExp.transcripts();
+  using VecT = CollapsedEMOptimizer::SerialVecType;
+  // With atomics
+  VecT alphas(transcripts.size(), 0.0);
+  VecT alphasPrime(transcripts.size(), 0.0);
+  VecT expTheta(transcripts.size());
+  Eigen::VectorXd effLens(transcripts.size());
+
+  bool scaleCounts = (!sopt.useQuasi and !sopt.allowOrphans);
+
+  auto& fragStartDists = readExp.fragmentStartPositionDistributions();
+  uint64_t numMappedFrags =
+      scaleCounts ? readExp.upperBoundHits() : readExp.numMappedFragments();
+
+  uint32_t numBootstraps = sopt.numBootstraps;
+
+  std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec =
+      readExp.equivalenceClassBuilder().eqVec();
+
+  std::unordered_set<uint32_t> activeTranscriptIDs;
+  for (auto& kv : eqVec) {
+    auto& tg = kv.first;
+    for (auto& t : tg.txps) {
+      transcripts[t].setActive();
+      activeTranscriptIDs.insert(t);
     }
-
-    std::atomic<uint32_t> bsCounter{0};
-    std::vector<std::thread> workerThreads;
-    for (size_t tn = 0; tn < numWorkerThreads; ++tn) {
-        workerThreads.emplace_back(doBootstrap,
-                std::ref(txpGroups),
-                std::ref(txpGroupCombinedWeights),
-                std::ref(transcripts),
-                std::ref(effLens),
-                std::ref(samplingWeights),
-                totalCount,
-                numMappedFrags,
-                scale,
-                std::ref(bsCounter),
-                std::ref(sopt),
-                std::ref(writeBootstrap),
-                relDiffTolerance,
-                maxIter);
+  }
+
+  bool useVBEM{sopt.useVBOpt};
+  bool perTranscriptPrior{sopt.perTranscriptPrior};
+  double priorValue{sopt.vbPrior};
+  
+  // If we use VBEM, we'll need the prior parameters
+  std::vector<double> priorAlphas(transcripts.size(), priorValue);
+  // If the prior is per-nucleotide (default, then we need a potentially different
+  // value for each transcript based on its length).
+  if (!perTranscriptPrior) {
+    for (size_t i = 0; i < transcripts.size(); ++i) {
+      priorAlphas[i] = priorValue * transcripts[i].RefLength;
     }
-
-    for (auto& t : workerThreads) {
-        t.join();
+  }
+  //double priorAlpha = 1e-3;//1.00;
+
+  auto jointLog = sopt.jointLog;
+
+  jointLog->info("Will draw {} bootstrap samples", numBootstraps);
+  jointLog->info("Optimizing over {} equivalence classes", eqVec.size());
+
+  double totalNumFrags{static_cast<double>(readExp.numMappedFragments())};
+  double totalLen{0.0};
+
+  if (activeTranscriptIDs.size() == 0) {
+    jointLog->error("It seems that no transcripts are expressed; something is "
+                    "likely wrong!");
+    std::exit(1);
+  }
+
+  double scale = 1.0 / activeTranscriptIDs.size();
+  for (size_t i = 0; i < transcripts.size(); ++i) {
+    // double m = transcripts[i].mass(false);
+    alphas[i] = transcripts[i].getActive() ? scale * totalNumFrags : 0.0;
+    effLens(i) = (sopt.noEffectiveLengthCorrection)
+                     ? transcripts[i].RefLength
+                     : std::exp(transcripts[i].getCachedLogEffectiveLength());
+    totalLen += effLens(i);
+  }
+
+  auto numRemoved =
+      markDegenerateClasses(eqVec, alphas, effLens, sopt.jointLog);
+  sopt.jointLog->info("Marked {} weighted equivalence classes as degenerate",
+                      numRemoved);
+
+  size_t itNum{0};
+  double minAlpha = 1e-8;
+  double cutoff = minAlpha;
+
+  // Since we will use the same weights and transcript groups for each
+  // of the bootstrap samples (only the count vector will change), it
+  // makes sense to keep only one copy of these.
+  using TGroupLabelT = std::vector<uint32_t>;
+  using TGroupWeightVec = std::vector<double>;
+  std::vector<TGroupLabelT> txpGroups;
+  std::vector<TGroupWeightVec> txpGroupCombinedWeights;
+  std::vector<uint64_t> origCounts;
+  uint64_t totalCount{0};
+
+  for (auto& kv : eqVec) {
+    uint64_t count = kv.second.count;
+    // for each transcript in this class
+    const TranscriptGroup& tgroup = kv.first;
+    if (tgroup.valid) {
+      const std::vector<uint32_t>& txps = tgroup.txps;
+      const auto& auxs = kv.second.combinedWeights;
+      txpGroups.push_back(txps);
+      // Convert to non-atomic
+      txpGroupCombinedWeights.emplace_back(auxs.begin(), auxs.end());
+      origCounts.push_back(count);
+      totalCount += count;
     }
-    return true;
+  }
+
+  double floatCount = totalCount;
+  std::vector<double> samplingWeights(txpGroups.size(), 0.0);
+  for (size_t i = 0; i < origCounts.size(); ++i) {
+    samplingWeights[i] = origCounts[i] / floatCount;
+  }
+
+  size_t numWorkerThreads{1};
+  if (sopt.numThreads > 1 and numBootstraps > 1) {
+    numWorkerThreads = std::min(sopt.numThreads - 1, numBootstraps - 1);
+  }
+
+  std::atomic<uint32_t> bsCounter{0};
+  std::vector<std::thread> workerThreads;
+  for (size_t tn = 0; tn < numWorkerThreads; ++tn) {
+    workerThreads.emplace_back(
+        doBootstrap, std::ref(txpGroups), std::ref(txpGroupCombinedWeights),
+        std::ref(transcripts), std::ref(effLens), std::ref(samplingWeights),
+        totalCount, numMappedFrags, scale, std::ref(bsCounter), std::ref(sopt),
+	std::ref(priorAlphas), std::ref(writeBootstrap), relDiffTolerance, maxIter);
+  }
+
+  for (auto& t : workerThreads) {
+    t.join();
+  }
+  return true;
 }
 
-void updateEqClassWeights(std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
-                          Eigen::VectorXd& posWeightInvDenoms) {
-    tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(eqVec.size())),
-            [&eqVec, &posWeightInvDenoms]( const BlockedIndexRange& range) -> void {
-                // For each index in the equivalence class vector
-                for (auto eqID : boost::irange(range.begin(), range.end())) {
-                    // The vector entry
-                    auto& kv = eqVec[eqID];
-                    // The label of the equivalence class
-                    const TranscriptGroup& k = kv.first;
-                    // The size of the label
-                    size_t classSize = k.txps.size();
-                    // The weights of the label
-                    TGValue& v = kv.second;
-
-                    // Iterate over each weight and set it equal to
-                    // 1 / effLen of the corresponding transcript
-                    double wsum{0.0};
-                    for (size_t i = 0; i < classSize; ++i) {
-		      auto tid = k.txps[i]; 
-                      v.combinedWeights[i] = kv.second.count * (v.weights[i] * v.posWeights[i] * posWeightInvDenoms[tid]); 
-                      wsum += v.combinedWeights[i];
-                    }
-                    double wnorm = 1.0 / wsum;
-                    for (size_t i = 0; i < classSize; ++i) {
-                        v.combinedWeights[i] *= wnorm;
-                    }
-                }
-            });
+void updateEqClassWeights(
+    std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
+    Eigen::VectorXd& posWeightInvDenoms, Eigen::VectorXd& effLens) {
+  tbb::parallel_for(
+      BlockedIndexRange(size_t(0), size_t(eqVec.size())),
+      [&eqVec, &effLens,
+       &posWeightInvDenoms](const BlockedIndexRange& range) -> void {
+        // For each index in the equivalence class vector
+        for (auto eqID : boost::irange(range.begin(), range.end())) {
+          // The vector entry
+          auto& kv = eqVec[eqID];
+          // The label of the equivalence class
+          const TranscriptGroup& k = kv.first;
+          // The size of the label
+          size_t classSize = k.txps.size();
+          // The weights of the label
+          TGValue& v = kv.second;
+
+          // Iterate over each weight and set it equal to
+          // 1 / effLen of the corresponding transcript
+          double wsum{0.0};
+          for (size_t i = 0; i < classSize; ++i) {
+            auto tid = k.txps[i];
+            v.posWeights[i] = 1.0 / effLens(tid);
+            v.combinedWeights[i] =
+                kv.second.count *
+                (v.weights[i] * v.posWeights[i] * posWeightInvDenoms[tid]);
+            wsum += v.combinedWeights[i];
+          }
+          double wnorm = 1.0 / wsum;
+          for (size_t i = 0; i < classSize; ++i) {
+            v.combinedWeights[i] *= wnorm;
+          }
+        }
+      });
 }
 
 template <typename ExpT>
-bool CollapsedEMOptimizer::optimize(ExpT& readExp,
-        SalmonOpts& sopt,
-        double relDiffTolerance,
-        uint32_t maxIter) {
-
-    tbb::task_scheduler_init tbbScheduler(sopt.numThreads);
-    std::vector<Transcript>& transcripts = readExp.transcripts();
-
-    uint32_t minIter = 50;
-    bool doBiasCorrect = sopt.biasCorrect;
-    auto& expectedDist = readExp.expectedBias();
-
-    using VecT = CollapsedEMOptimizer::VecType;
-    // With atomics
-    VecType alphas(transcripts.size(), 0.0);
-    VecType alphasPrime(transcripts.size(), 0.0);
-    VecType expTheta(transcripts.size());
+bool CollapsedEMOptimizer::optimize(ExpT& readExp, SalmonOpts& sopt,
+                                    double relDiffTolerance, uint32_t maxIter) {
+
+  tbb::task_scheduler_init tbbScheduler(sopt.numThreads);
+  std::vector<Transcript>& transcripts = readExp.transcripts();
+
+  uint32_t minIter = 50;
+  bool seqBiasCorrect = sopt.biasCorrect;
+  bool gcBiasCorrect = sopt.gcBiasCorrect;
+  bool doBiasCorrect = seqBiasCorrect or gcBiasCorrect;
+
+  using VecT = CollapsedEMOptimizer::VecType;
+  // With atomics
+  VecType alphas(transcripts.size(), 0.0);
+  VecType alphasPrime(transcripts.size(), 0.0);
+  VecType expTheta(transcripts.size());
+
+  Eigen::VectorXd effLens(transcripts.size());
+  Eigen::VectorXd posWeightInvDenoms(transcripts.size());
+
+  std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec =
+      readExp.equivalenceClassBuilder().eqVec();
+
+  bool noRichEq = sopt.noRichEqClasses;
+  bool useFSPD{sopt.useFSPD};
+
+  bool useVBEM{sopt.useVBOpt};
+  bool perTranscriptPrior{sopt.perTranscriptPrior};
+  double priorValue{sopt.vbPrior};
+  
+  // If we use VBEM, we'll need the prior parameters
+  std::vector<double> priorAlphas(transcripts.size(), priorValue);
+  // If the prior is per-nucleotide (default, then we need a potentially different
+  // value for each transcript based on its length).
+  if (!perTranscriptPrior) {
+    for (size_t i = 0; i < transcripts.size(); ++i) {
+      priorAlphas[i] = priorValue * transcripts[i].RefLength;
+    }
+  }
 
-    Eigen::VectorXd effLens(transcripts.size());
-    Eigen::VectorXd posWeightInvDenoms(transcripts.size());
+  // If we use VBEM, we'll need the prior parameters
+  //double priorAlpha = 1e-3;//0.01;
+  //double priorAlpha = 1.0;
 
-    std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec =
-        readExp.equivalenceClassBuilder().eqVec();
+  auto jointLog = sopt.jointLog;
 
-    bool noRichEq = sopt.noRichEqClasses;
-    bool useFSPD{sopt.useFSPD};
-    bool useVBEM{sopt.useVBOpt};
+  auto& fragStartDists = readExp.fragmentStartPositionDistributions();
+  double totalNumFrags{static_cast<double>(readExp.numMappedFragments())};
+  double totalLen{0.0};
 
-    // If we use VBEM, we'll need the prior parameters
-    double priorAlpha = 0.01;
+  // If effective length correction isn't turned off, then use effective
+  // lengths rather than reference lengths.
+  bool useEffectiveLengths = !sopt.noEffectiveLengthCorrection;
 
-    auto jointLog = sopt.jointLog;
+  int64_t numActive{0};
+  double totalWeight{0.0};
 
-    auto& fragStartDists = readExp.fragmentStartPositionDistributions();
-    double totalNumFrags{static_cast<double>(readExp.numMappedFragments())};
-    double totalLen{0.0};
+  for (size_t i = 0; i < transcripts.size(); ++i) {
+    auto& txp = transcripts[i];
+    alphas[i] = txp.projectedCounts;
 
-    // If effective length correction isn't turned off, then use effective
-    // lengths rather than reference lengths.
-    bool useEffectiveLengths = !sopt.noEffectiveLengthCorrection;
 
-    double uniformPrior = 1.0 / transcripts.size();
+    effLens(i) = useEffectiveLengths
+                     ? std::exp(txp.getCachedLogEffectiveLength())
+                     : txp.RefLength;
+    txp.EffectiveLength = effLens(i);
 
-    for (size_t i = 0; i < transcripts.size(); ++i) {
-        auto& txp = transcripts[i];
-	alphas[i] = txp.projectedCounts;
-        effLens(i) = useEffectiveLengths ? std::exp(txp.getCachedLogEffectiveLength()) : txp.RefLength;
-        txp.EffectiveLength = effLens(i);
-
-	if (noRichEq or !useFSPD) {
-	  posWeightInvDenoms(i) = 1.0;
-	} else {
-	  auto& fragStartDist = fragStartDists[txp.lengthClassIndex()];
-	  double denomFactor = fragStartDist.evalCDF(static_cast<int32_t>(txp.EffectiveLength), txp.RefLength);
-	  posWeightInvDenoms(i) = (denomFactor >= salmon::math::LOG_EPSILON) ? 
-	    std::exp(-denomFactor) : (1e-5);
-	}
-
-        totalLen += effLens(i);
+    if (txp.uniqueCount() > 0) {
+      totalWeight += txp.uniqueCount();
+      alphasPrime[i] = 1.0;
+      ++numActive;
+    } else {
+      totalWeight += 1e-3 * effLens(i);
+      alphasPrime[i] = 1.0;
+      ++numActive;
     }
 
-    // If the user requested *not* to use "rich" equivalence classes,
-    // then wipe out all of the weight information here and simply replace
-    // the weights with the effective length terms (here, the *inverse* of
-    // the effective length).  Otherwise, multiply the existing weight terms
-    // by the effective length term.
-    tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(eqVec.size())),
-            [&eqVec, &effLens, &posWeightInvDenoms, useFSPD, noRichEq]( const BlockedIndexRange& range) -> void {
-            // For each index in the equivalence class vector
-            for (auto eqID : boost::irange(range.begin(), range.end())) {
-                // The vector entry
-                auto& kv = eqVec[eqID];
-                // The label of the equivalence class
-                const TranscriptGroup& k = kv.first;
-                // The size of the label
-                size_t classSize = k.txps.size();
-                // The weights of the label
-                TGValue& v = kv.second;
-
-                // Iterate over each weight and set it
-                double wsum{0.0};
-
-		// If we don't have positional weights, then 
-		// create them here.
-		bool createdPosWeights{false};
-		if (v.weights.size() != v.posWeights.size()) {
-		  createdPosWeights = true;
-		  v.posWeights = std::vector<tbb::atomic<double>>(v.weights.size()); 
-		}
-
-                for (size_t i = 0; i < classSize; ++i) {
-        	    auto tid = k.txps[i];
-                    double el = effLens(tid);
-                    if (el <= 1.0) { el = 1.0; }
-                    if (noRichEq) {
-                        // Keep length factor separate for the time being
-                        v.weights[i] = 1.0;
-			// Pos weight
-			v.posWeights[i] = 1.0 / el;	
-                    } else if (createdPosWeights or !useFSPD) {
-		    // If the positional weights are new, then give them 
-		    // meaningful values.
-			v.posWeights[i] = 1.0 / el;	
-		    } 
-
-		    // combined weight
-		    v.combinedWeights.push_back( 
-			v.weights[i].load() * (v.posWeights[i].load() * posWeightInvDenoms[tid]));
-		    wsum += v.combinedWeights.back();
-                }
+    if (noRichEq or !useFSPD) {
+      posWeightInvDenoms(i) = 1.0;
+    } else {
+      auto& fragStartDist = fragStartDists[txp.lengthClassIndex()];
+      double denomFactor = fragStartDist.evalCDF(
+          static_cast<int32_t>(txp.EffectiveLength), txp.RefLength);
+      posWeightInvDenoms(i) = (denomFactor >= salmon::math::LOG_EPSILON)
+                                  ? std::exp(-denomFactor)
+                                  : (1e-5);
+    }
 
-                double wnorm = 1.0 / wsum;
-                for (size_t i = 0; i < classSize; ++i) {
-                  v.combinedWeights[i] = v.combinedWeights[i] * wnorm;
-                }
+    totalLen += effLens(i);
+  }
+
+  // Based on the number of observed reads, use
+  // a linear combination of the online estimates
+  // and the uniform distribution.
+  double uniformPrior = totalWeight / static_cast<double>(numActive);
+  // double fracObserved = 1.0;
+  double fracObserved = std::min(1.0, totalWeight / sopt.numRequiredFragments);
+  if (sopt.initUniform) { fracObserved = 0.0; }
+  for (size_t i = 0; i < alphas.size(); ++i) {
+    alphas[i] = (alphasPrime[i] == 1.0)
+                    ? ((alphas[i] * fracObserved) +
+                       (uniformPrior * (1.0 - fracObserved)))
+                    : 0.0;
+  }
+
+  // If the user requested *not* to use "rich" equivalence classes,
+  // then wipe out all of the weight information here and simply replace
+  // the weights with the effective length terms (here, the *inverse* of
+  // the effective length).  Otherwise, multiply the existing weight terms
+  // by the effective length term.
+  tbb::parallel_for(
+      BlockedIndexRange(size_t(0), size_t(eqVec.size())),
+      [&eqVec, &effLens, &posWeightInvDenoms, useFSPD,
+       noRichEq](const BlockedIndexRange& range) -> void {
+        // For each index in the equivalence class vector
+        for (auto eqID : boost::irange(range.begin(), range.end())) {
+          // The vector entry
+          auto& kv = eqVec[eqID];
+          // The label of the equivalence class
+          const TranscriptGroup& k = kv.first;
+          // The size of the label
+          size_t classSize = k.txps.size();
+          // The weights of the label
+          TGValue& v = kv.second;
+
+          // Iterate over each weight and set it
+          double wsum{0.0};
+
+          // If we don't have positional weights, then
+          // create them here.
+          bool createdPosWeights{false};
+          if (v.weights.size() != v.posWeights.size()) {
+            createdPosWeights = true;
+            v.posWeights = std::vector<tbb::atomic<double>>(v.weights.size());
+          }
+
+          for (size_t i = 0; i < classSize; ++i) {
+            auto tid = k.txps[i];
+            double el = effLens(tid);
+            if (el <= 1.0) {
+              el = 1.0;
             }
-    });
-
-    auto numRemoved = markDegenerateClasses(eqVec, alphas, effLens, sopt.jointLog);
-    sopt.jointLog->info("Marked {} weighted equivalence classes as degenerate",
-            numRemoved);
-
-    size_t itNum{0};
-    double minAlpha = 1e-8;
-    double alphaCheckCutoff = 1e-2;
-    double cutoff = (useVBEM) ? (priorAlpha + minAlpha) : minAlpha;
-
-    // Iterations in which we will allow re-computing the effective lengths
-    // if bias-correction is enabled.
-    std::vector<uint32_t> recomputeIt{50, 500, 1000};
-
-    bool converged{false};
-    double maxRelDiff = -std::numeric_limits<double>::max();
-    while (itNum < minIter or (itNum < maxIter and !converged)) {
-        if (doBiasCorrect and
-            (find(recomputeIt.begin(), recomputeIt.end(), itNum) != recomputeIt.end())) {
-
-            jointLog->info("iteration {}, recomputing effective lengths", itNum);
-            effLens = salmon::utils::updateEffectiveLengths(
-                    readExp,
-                    effLens,
-                    alphas,
-                    expectedDist
-                    );
-            // Check for strangeness with the lengths.
-            for (size_t i = 0; i < effLens.size(); ++i) {
-                if (effLens(i) <= 0.0) {
-                    jointLog->warn("Transcript {} had length {}", i, effLens(i));
-                }
-		if (noRichEq or !useFSPD) {
-		  posWeightInvDenoms(i) = 1.0;
-		} else {
-		  auto& txp = transcripts[i];
-		  auto& fragStartDist = fragStartDists[txp.lengthClassIndex()];
-		  double denomFactor = fragStartDist.evalCDF(static_cast<int32_t>(effLens(i)), txp.RefLength);
-		  posWeightInvDenoms(i) = (denomFactor >= salmon::math::LOG_EPSILON) ? 
-		    std::exp(-denomFactor) : 1e-5;
-		}
+            if (noRichEq) {
+              // Keep length factor separate for the time being
+              v.weights[i] = 1.0;
+              // Pos weight
+              v.posWeights[i] = 1.0 / el;
+            } else if (createdPosWeights or !useFSPD) {
+              // If the positional weights are new, then give them
+              // meaningful values.
+              v.posWeights[i] = 1.0 / el;
             }
-	   updateEqClassWeights(eqVec, posWeightInvDenoms);
-        }
 
-        if (useVBEM) {
-            VBEMUpdate_(eqVec, transcripts, effLens, 
-                        priorAlpha, totalLen, alphas, alphasPrime, expTheta);
-        } else {
-            EMUpdate_(eqVec, transcripts, effLens, alphas, alphasPrime);
+            // combined weight
+            v.combinedWeights.push_back(
+                v.weights[i].load() *
+                (v.posWeights[i].load() * posWeightInvDenoms[tid]));
+            wsum += v.combinedWeights.back();
+          }
+
+          double wnorm = 1.0 / wsum;
+          for (size_t i = 0; i < classSize; ++i) {
+            v.combinedWeights[i] = v.combinedWeights[i] * wnorm;
+          }
         }
-
-        converged = true;
-        maxRelDiff = -std::numeric_limits<double>::max();
-        for (size_t i = 0; i < transcripts.size(); ++i) {
-            if (alphasPrime[i] > alphaCheckCutoff) {
-                double relDiff = std::abs(alphas[i] - alphasPrime[i]) / alphasPrime[i];
-                maxRelDiff = (relDiff > maxRelDiff) ? relDiff : maxRelDiff;
-                if (relDiff > relDiffTolerance) {
-                    converged = false;
-                }
-            }
-            alphas[i] = alphasPrime[i];
-            alphasPrime[i] = 0.0;
+      });
+
+  auto numRemoved =
+      markDegenerateClasses(eqVec, alphas, effLens, sopt.jointLog);
+  sopt.jointLog->info("Marked {} weighted equivalence classes as degenerate",
+                      numRemoved);
+
+  size_t itNum{0};
+  double minAlpha = 1e-8;
+  double alphaCheckCutoff = 1e-2;
+  double cutoff = minAlpha;
+
+  // Iterations in which we will allow re-computing the effective lengths
+  // if bias-correction is enabled.
+  // std::vector<uint32_t> recomputeIt{100, 500, 1000};
+
+  minIter = 100;
+
+  bool converged{false};
+  double maxRelDiff = -std::numeric_limits<double>::max();
+  bool needBias = doBiasCorrect;
+  //bool secondPass = false;
+  size_t targetIt{10};
+  
+  while (itNum < minIter or (itNum < maxIter and !converged) or needBias) {
+    if (needBias and (itNum > targetIt or converged)) {
+
+      jointLog->info("iteration {}, adjusting effective lengths to account for biases", itNum);
+      effLens = salmon::utils::updateEffectiveLengths(sopt, readExp, effLens,
+                                                      alphas, true);
+      //(itNum == recomputeIt.front()));
+
+      // Check for strangeness with the lengths.
+      for (size_t i = 0; i < effLens.size(); ++i) {
+        if (effLens(i) <= 0.0) {
+          jointLog->warn("Transcript {} had length {}", i, effLens(i));
         }
-
-        if (itNum % 100 == 0) {
-            jointLog->info("iteration = {} | max rel diff. = {}",
-                            itNum, maxRelDiff);
+        if (noRichEq or !useFSPD) {
+          posWeightInvDenoms(i) = 1.0;
+        } else {
+          auto& txp = transcripts[i];
+          auto& fragStartDist = fragStartDists[txp.lengthClassIndex()];
+          double denomFactor = fragStartDist.evalCDF(
+              static_cast<int32_t>(effLens(i)), txp.RefLength);
+          posWeightInvDenoms(i) = (denomFactor >= salmon::math::LOG_EPSILON)
+                                      ? std::exp(-denomFactor)
+                                      : 1e-5;
         }
-
-        ++itNum;
+      }
+      updateEqClassWeights(eqVec, posWeightInvDenoms, effLens);
+      needBias = false;
     }
 
-    jointLog->info("iteration = {} | max rel diff. = {}",
-                    itNum, maxRelDiff);
+    if (useVBEM) {
+      VBEMUpdate_(eqVec, transcripts, priorAlphas, totalLen, alphas, alphasPrime,
+                  expTheta);
+    } else {
+      EMUpdate_(eqVec, transcripts, alphas, alphasPrime);
+    }
 
-    // Truncate tiny expression values
-    double alphaSum = truncateCountVector(alphas, cutoff);
+    converged = true;
+    maxRelDiff = -std::numeric_limits<double>::max();
+    for (size_t i = 0; i < transcripts.size(); ++i) {
+      if (alphasPrime[i] > alphaCheckCutoff) {
+        double relDiff = std::abs(alphas[i] - alphasPrime[i]) / alphasPrime[i];
+        maxRelDiff = (relDiff > maxRelDiff) ? relDiff : maxRelDiff;
+        if (relDiff > relDiffTolerance) {
+          converged = false;
+        }
+      }
+      alphas[i] = alphasPrime[i];
+      alphasPrime[i] = 0.0;
+    }
 
-    if (alphaSum < minWeight) {
-        jointLog->error("Total alpha weight was too small! "
-                        "Make sure you ran salmon correclty.");
-        return false;
+    if (itNum % 100 == 0) {
+      jointLog->info("iteration = {} | max rel diff. = {}", itNum, maxRelDiff);
     }
 
-    // Set the mass of each transcript using the
-    // computed alphas.
-    for (size_t i = 0; i < transcripts.size(); ++i) {
-        // Set the mass to the normalized (after truncation)
-        // relative abundance
-        // If we changed the effective lengths, copy them over here
-        if (doBiasCorrect) { transcripts[i].EffectiveLength = effLens(i); }
-        transcripts[i].setSharedCount(alphas[i]);
-        transcripts[i].setMass(alphas[i] / alphaSum);
+    ++itNum;
+  }
+
+  // Reset the original bias correction options
+  sopt.gcBiasCorrect = gcBiasCorrect;
+  sopt.biasCorrect = seqBiasCorrect;
+
+  jointLog->info("iteration = {} | max rel diff. = {}", itNum, maxRelDiff);
+
+  double alphaSum = 0.0;
+  if (useVBEM and !perTranscriptPrior) {
+      std::vector<double> cutoffs(transcripts.size(), 0.0);
+      for (size_t i = 0; i < transcripts.size(); ++i) {
+	cutoffs[i] = priorAlphas[i] + minAlpha;
+      }
+      //alphaSum = truncateCountVector(alphas, cutoffs);
+      alphaSum = truncateCountVector(alphas, cutoffs);
+  } else {
+      // Truncate tiny expression values
+      alphaSum = truncateCountVector(alphas, cutoff);
+  }
+
+  if (alphaSum < minWeight) {
+    jointLog->error("Total alpha weight was too small! "
+                    "Make sure you ran salmon correclty.");
+    return false;
+  }
+
+  // Set the mass of each transcript using the
+  // computed alphas.
+  for (size_t i = 0; i < transcripts.size(); ++i) {
+    // Set the mass to the normalized (after truncation)
+    // relative abundance
+    // If we changed the effective lengths, copy them over here
+    if (doBiasCorrect) {
+      transcripts[i].EffectiveLength = effLens(i);
     }
-    return true;
+    transcripts[i].setSharedCount(alphas[i]);
+    transcripts[i].setMass(alphas[i] / alphaSum);
+  }
+  return true;
 }
 
-template
-bool CollapsedEMOptimizer::optimize<ReadExperiment>(ReadExperiment& readExp,
-        SalmonOpts& sopt,
-        double relDiffTolerance,
-        uint32_t maxIter);
-
-template
-bool CollapsedEMOptimizer::optimize<AlignmentLibrary<UnpairedRead>>(
-        AlignmentLibrary<UnpairedRead>& readExp,
-        SalmonOpts& sopt,
-        double relDiffTolerance,
-        uint32_t maxIter);
-
-
-template
-bool CollapsedEMOptimizer::optimize<AlignmentLibrary<ReadPair>>(
-        AlignmentLibrary<ReadPair>& readExp,
-        SalmonOpts& sopt,
-        double relDiffTolerance,
-        uint32_t maxIter);
-
-
-template
-bool CollapsedEMOptimizer::gatherBootstraps<ReadExperiment>(
-        ReadExperiment& readExp,
-        SalmonOpts& sopt,
-        std::function<bool(const std::vector<double>&)>& writeBootstrap,
-        double relDiffTolerance,
-        uint32_t maxIter);
-
-
-template
-bool CollapsedEMOptimizer::gatherBootstraps<AlignmentLibrary<UnpairedRead>>(
-        AlignmentLibrary<UnpairedRead>& readExp,
-        SalmonOpts& sopt,
-        std::function<bool(const std::vector<double>&)>& writeBootstrap,
-        double relDiffTolerance,
-        uint32_t maxIter);
-
-
-template
-bool CollapsedEMOptimizer::gatherBootstraps<AlignmentLibrary<ReadPair>>(
-        AlignmentLibrary<ReadPair>& readExp,
-        SalmonOpts& sopt,
-        std::function<bool(const std::vector<double>&)>& writeBootstrap,
-        double relDiffTolerance,
-        uint32_t maxIter);
+template bool CollapsedEMOptimizer::optimize<ReadExperiment>(
+    ReadExperiment& readExp, SalmonOpts& sopt, double relDiffTolerance,
+    uint32_t maxIter);
 
-// Unused / old
+template bool CollapsedEMOptimizer::optimize<AlignmentLibrary<UnpairedRead>>(
+    AlignmentLibrary<UnpairedRead>& readExp, SalmonOpts& sopt,
+    double relDiffTolerance, uint32_t maxIter);
 
+template bool CollapsedEMOptimizer::optimize<AlignmentLibrary<ReadPair>>(
+    AlignmentLibrary<ReadPair>& readExp, SalmonOpts& sopt,
+    double relDiffTolerance, uint32_t maxIter);
+
+template bool CollapsedEMOptimizer::gatherBootstraps<ReadExperiment>(
+    ReadExperiment& readExp, SalmonOpts& sopt,
+    std::function<bool(const std::vector<double>&)>& writeBootstrap,
+    double relDiffTolerance, uint32_t maxIter);
+
+template bool
+CollapsedEMOptimizer::gatherBootstraps<AlignmentLibrary<UnpairedRead>>(
+    AlignmentLibrary<UnpairedRead>& readExp, SalmonOpts& sopt,
+    std::function<bool(const std::vector<double>&)>& writeBootstrap,
+    double relDiffTolerance, uint32_t maxIter);
+
+template bool
+CollapsedEMOptimizer::gatherBootstraps<AlignmentLibrary<ReadPair>>(
+    AlignmentLibrary<ReadPair>& readExp, SalmonOpts& sopt,
+    std::function<bool(const std::vector<double>&)>& writeBootstrap,
+    double relDiffTolerance, uint32_t maxIter);
+
+// Unused / old
diff --git a/src/CollapsedGibbsSampler.cpp b/src/CollapsedGibbsSampler.cpp
index 56aa007..96b10c8 100644
--- a/src/CollapsedGibbsSampler.cpp
+++ b/src/CollapsedGibbsSampler.cpp
@@ -15,7 +15,7 @@
 #include <boost/filesystem.hpp>
 
 // C++ string formatting library
-#include "spdlog/details/format.h"
+#include "spdlog/fmt/fmt.h"
 
 #include "cuckoohash_map.hh"
 #include "Eigen/Dense"
diff --git a/src/DistributionUtils.cpp b/src/DistributionUtils.cpp
new file mode 100644
index 0000000..ce97547
--- /dev/null
+++ b/src/DistributionUtils.cpp
@@ -0,0 +1,89 @@
+#include "DistributionUtils.hpp"
+#include "FragmentLengthDistribution.hpp"
+#include "Transcript.hpp"
+
+#include <random>
+
+namespace distribution_utils {
+
+std::vector<double> correctionFactorsFromMass(std::vector<double>& mass,
+                                              DistributionSpace inputSpace) {
+  auto maxLen = mass.size();
+
+  std::vector<double> correctionFactors(maxLen, 0.0);
+  std::vector<double> vals(maxLen, 0.0);
+  std::vector<double> multiplicities(maxLen, 0);
+
+  multiplicities[0] = mass[0];
+
+  double v{0.0};
+  for (size_t i = 1; i < maxLen; ++i) {
+    v = mass[i];
+    vals[i] = static_cast<double>(v * i) + vals[i - 1];
+    multiplicities[i] = v + multiplicities[i - 1];
+    if (multiplicities[i] > 0) {
+      correctionFactors[i] = vals[i] / multiplicities[i];
+    }
+  }
+  return correctionFactors;
+}
+
+void computeSmoothedEffectiveLengths(size_t maxLength,
+                                     std::vector<Transcript>& transcripts,
+                                     std::vector<double>& correctionFactors,
+                                     DistributionSpace outputSpace) {
+
+  auto maxLen = maxLength;
+
+  for (auto& txp : transcripts) {
+    auto origLen = static_cast<double>(txp.RefLength);
+    double correctionFactor = (origLen >= maxLen)
+                                  ? correctionFactors[maxLen - 1]
+                                  : correctionFactors[origLen];
+
+    double effLen = origLen - correctionFactor + 1.0;
+    if (effLen < 1.0) {
+      effLen = origLen;
+    }
+
+    if (outputSpace == DistributionSpace::LOG) {
+      txp.setCachedLogEffectiveLength(std::log(effLen));
+    } else {
+      txp.EffectiveLength = effLen;
+    }
+  }
+}
+
+std::vector<int32_t> samplesFromLogPMF(FragmentLengthDistribution* fld,
+                                       int32_t numSamples) {
+  std::vector<double> logPMF;
+  size_t minVal;
+  size_t maxVal;
+  double logFLDMean = fld->mean();
+  fld->dumpPMF(logPMF, minVal, maxVal);
+  double sum = salmon::math::LOG_0;
+  for (auto v : logPMF) {
+    sum = salmon::math::logAdd(sum, v);
+  }
+  for (auto& v : logPMF) {
+    v -= sum;
+  }
+
+  // Create the non-logged pmf
+  std::vector<double> pmf(maxVal + 1, 0.0);
+  for (size_t i = minVal; i < maxVal; ++i) {
+    pmf[i] = std::exp(logPMF[i - minVal]);
+  }
+
+  // generate samples
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::discrete_distribution<int32_t> dist(pmf.begin(), pmf.end());
+
+  std::vector<int32_t> samples(pmf.size());
+  for (int32_t i = 0; i < numSamples; ++i) {
+    ++samples[dist(gen)];
+  }
+  return samples;
+}
+}
diff --git a/src/FASTAParser.cpp b/src/FASTAParser.cpp
index b5409cc..bfb5236 100644
--- a/src/FASTAParser.cpp
+++ b/src/FASTAParser.cpp
@@ -11,10 +11,11 @@
 #include "FASTAParser.hpp"
 #include "Transcript.hpp"
 #include "SalmonStringUtils.hpp"
+#include "SalmonOpts.hpp"
 
 FASTAParser::FASTAParser(const std::string& fname): fname_(fname) {}
 
-void FASTAParser::populateTargets(std::vector<Transcript>& refs) {
+void FASTAParser::populateTargets(std::vector<Transcript>& refs, SalmonOpts& sopt) {
     using stream_manager = jellyfish::stream_manager<std::vector<std::string>::const_iterator>;
     using single_parser = jellyfish::whole_sequence_parser<stream_manager>;
 
@@ -51,12 +52,12 @@ void FASTAParser::populateTargets(std::vector<Transcript>& refs) {
             if (it == nameToID.end()) {
                 std::cerr << "WARNING: Transcript " << name << " appears in the reference but did not appear in the BAM\n";
             } else {
-	      
+
 	      std::string& seq = j->data[i].seq;
               size_t readLen = seq.length();
 
-	      refs[it->second].SAMSequence = salmon::stringtools::encodeSequenceInSAM(seq.c_str(), readLen);
-	      
+	      refs[it->second].setSAMSequenceOwned(salmon::stringtools::encodeSequenceInSAM(seq.c_str(), readLen));
+
 	      // Replace non-ACGT bases
 	      for (size_t b = 0; b < readLen; ++b) {
 		seq[b] = ::toupper(seq[b]);
@@ -71,10 +72,9 @@ void FASTAParser::populateTargets(std::vector<Transcript>& refs) {
 	      }
 
 	      // allocate space for the new copy
-	      char* seqCopy = new char[seq.length()+1]; 
-	      std::strcpy(seqCopy, seq.c_str()); 
-	      refs[it->second].Sequence = seqCopy;
-	      refs[it->second].freeSeqOnDestruct = true;
+	      char* seqCopy = new char[seq.length()+1];
+	      std::strcpy(seqCopy, seq.c_str());
+	      refs[it->second].setSequenceOwned(seqCopy, sopt.gcBiasCorrect, sopt.gcSampFactor);
 	      // seqCopy will only be freed when the transcript is destructed!
             }
         }
diff --git a/src/FastxParser.cpp b/src/FastxParser.cpp
new file mode 100644
index 0000000..7c2faec
--- /dev/null
+++ b/src/FastxParser.cpp
@@ -0,0 +1,306 @@
+#include "FastxParser.hpp"
+
+#include "fcntl.h"
+#include "unistd.h"
+#include <atomic>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <poll.h>
+#include <thread>
+#include <vector>
+#include <zlib.h>
+
+// STEP 1: declare the type of file handler and the read() function
+KSEQ_INIT(gzFile, gzread)
+
+namespace fastx_parser {
+template <typename T>
+FastxParser<T>::FastxParser(std::vector<std::string> files,
+                            uint32_t numConsumers, uint32_t numParsers,
+                            uint32_t chunkSize)
+    : FastxParser(files, {}, numConsumers, numParsers, chunkSize) {}
+
+template <typename T>
+FastxParser<T>::FastxParser(std::vector<std::string> files,
+                            std::vector<std::string> files2,
+                            uint32_t numConsumers, uint32_t numParsers,
+                            uint32_t chunkSize)
+    : inputStreams_(files), inputStreams2_(files2), numParsing_(0),
+      blockSize_(chunkSize) {
+
+  if (numParsers > files.size()) {
+    std::cerr << "Can't make user of more parsing threads than file (pairs); "
+                 "setting # of parsing threads to "
+              << files.size();
+    numParsers = files.size();
+  }
+  numParsers_ = numParsers;
+
+  // nobody is parsing yet
+  numParsing_ = 0;
+
+  readQueue_ = moodycamel::ConcurrentQueue<std::unique_ptr<ReadChunk<T>>>(
+      4 * numConsumers, numParsers, 0);
+
+  seqContainerQueue_ =
+      moodycamel::ConcurrentQueue<std::unique_ptr<ReadChunk<T>>>(
+          4 * numConsumers, 1 + numConsumers, 0);
+
+  workQueue_ = moodycamel::ConcurrentQueue<uint32_t>(numParsers_);
+
+  // push all file ids on the queue
+  for (size_t i = 0; i < files.size(); ++i) {
+    workQueue_.enqueue(i);
+  }
+
+  // every parsing thread gets a consumer token for the seqContainerQueue
+  // and a producer token for the readQueue.
+  for (size_t i = 0; i < numParsers_; ++i) {
+    consumeContainers_.emplace_back(
+        new moodycamel::ConsumerToken(seqContainerQueue_));
+    produceReads_.emplace_back(new moodycamel::ProducerToken(readQueue_));
+  }
+
+  // enqueue the appropriate number of read chunks so that we can start
+  // filling them once the parser has been started.
+  moodycamel::ProducerToken produceContainer(seqContainerQueue_);
+  for (size_t i = 0; i < 4 * numConsumers; ++i) {
+    auto chunk = make_unique<ReadChunk<T>>(blockSize_);
+    seqContainerQueue_.enqueue(produceContainer, std::move(chunk));
+  }
+}
+
+template <typename T> ReadGroup<T> FastxParser<T>::getReadGroup() {
+  return ReadGroup<T>(getProducerToken_(), getConsumerToken_());
+}
+
+template <typename T>
+moodycamel::ProducerToken FastxParser<T>::getProducerToken_() {
+  return moodycamel::ProducerToken(seqContainerQueue_);
+}
+
+template <typename T>
+moodycamel::ConsumerToken FastxParser<T>::getConsumerToken_() {
+  return moodycamel::ConsumerToken(readQueue_);
+}
+
+template <typename T> FastxParser<T>::~FastxParser() {
+  for (auto& t : parsingThreads_) {
+    t->join();
+  }
+}
+
+inline void copyRecord(kseq_t* seq, ReadSeq* s) {
+  // Copy over the sequence and read name
+  s->seq.assign(seq->seq.s, seq->seq.l);
+  s->name.assign(seq->name.s, seq->name.l);
+}
+
+template <typename T>
+void parseReads(
+    std::vector<std::string>& inputStreams, std::atomic<uint32_t>& numParsing,
+    moodycamel::ConsumerToken* cCont, moodycamel::ProducerToken* pRead,
+    moodycamel::ConcurrentQueue<uint32_t>& workQueue,
+    moodycamel::ConcurrentQueue<std::unique_ptr<ReadChunk<T>>>&
+        seqContainerQueue_,
+    moodycamel::ConcurrentQueue<std::unique_ptr<ReadChunk<T>>>& readQueue_) {
+  kseq_t* seq;
+  T* s;
+  uint32_t fn{0};
+  while (workQueue.try_dequeue(fn)) {
+    auto file = inputStreams[fn];
+    std::unique_ptr<ReadChunk<T>> local;
+    while (!seqContainerQueue_.try_dequeue(*cCont, local)) {
+      std::cerr << "couldn't dequeue read chunk\n";
+    }
+    size_t numObtained{local->size()};
+    // open the file and init the parser
+    auto fp = gzopen(file.c_str(), "r");
+
+    // The number of reads we have in the local vector
+    size_t numWaiting{0};
+
+    seq = kseq_init(fp);
+    int ksv = kseq_read(seq);
+
+    while (ksv >= 0) {
+      s = &((*local)[numWaiting++]);
+
+      copyRecord(seq, s);
+
+      // If we've filled the local vector, then dump to the concurrent queue
+      if (numWaiting == numObtained) {
+        while (!readQueue_.try_enqueue(std::move(local))) {
+        }
+        numWaiting = 0;
+        numObtained = 0;
+        // And get more empty reads
+        while (!seqContainerQueue_.try_dequeue(*cCont, local)) {
+        }
+        numObtained = local->size();
+      }
+      ksv = kseq_read(seq);
+    }
+
+    // If we hit the end of the file and have any reads in our local buffer
+    // then dump them here.
+    if (numWaiting > 0) {
+      local->have(numWaiting);
+      while (!readQueue_.try_enqueue(*pRead, std::move(local))) {
+      }
+      numWaiting = 0;
+    }
+    // destroy the parser and close the file
+    kseq_destroy(seq);
+    gzclose(fp);
+  }
+
+  --numParsing;
+}
+
+template <typename T>
+void parseReadPair(
+    std::vector<std::string>& inputStreams,
+    std::vector<std::string>& inputStreams2, std::atomic<uint32_t>& numParsing,
+    moodycamel::ConsumerToken* cCont, moodycamel::ProducerToken* pRead,
+    moodycamel::ConcurrentQueue<uint32_t>& workQueue,
+    moodycamel::ConcurrentQueue<std::unique_ptr<ReadChunk<T>>>&
+        seqContainerQueue_,
+    moodycamel::ConcurrentQueue<std::unique_ptr<ReadChunk<T>>>& readQueue_) {
+
+  kseq_t* seq;
+  kseq_t* seq2;
+  T* s;
+
+  uint32_t fn{0};
+  while (workQueue.try_dequeue(fn)) {
+    // for (size_t fn = 0; fn < inputStreams.size(); ++fn) {
+    auto& file = inputStreams[fn];
+    auto& file2 = inputStreams2[fn];
+
+    std::unique_ptr<ReadChunk<T>> local;
+    while (!seqContainerQueue_.try_dequeue(*cCont, local)) {
+      std::cerr << "couldn't dequeue read chunk\n";
+    }
+    size_t numObtained{local->size()};
+    // open the file and init the parser
+    auto fp = gzopen(file.c_str(), "r");
+    auto fp2 = gzopen(file2.c_str(), "r");
+
+    // The number of reads we have in the local vector
+    size_t numWaiting{0};
+
+    seq = kseq_init(fp);
+    seq2 = kseq_init(fp2);
+
+    int ksv = kseq_read(seq);
+    int ksv2 = kseq_read(seq2);
+    while (ksv >= 0 and ksv2 >= 0) {
+
+      s = &((*local)[numWaiting++]);
+      copyRecord(seq, &s->first);
+      copyRecord(seq2, &s->second);
+
+      // If we've filled the local vector, then dump to the concurrent queue
+      if (numWaiting == numObtained) {
+        while (!readQueue_.try_enqueue(std::move(local))) {
+        }
+        numWaiting = 0;
+        numObtained = 0;
+        // And get more empty reads
+        while (!seqContainerQueue_.try_dequeue(*cCont, local)) {
+        }
+        numObtained = local->size();
+      }
+      ksv = kseq_read(seq);
+      ksv2 = kseq_read(seq2);
+    }
+
+    // If we hit the end of the file and have any reads in our local buffer
+    // then dump them here.
+    if (numWaiting > 0) {
+      local->have(numWaiting);
+      while (!readQueue_.try_enqueue(*pRead, std::move(local))) {
+      }
+      numWaiting = 0;
+    }
+    // destroy the parser and close the file
+    kseq_destroy(seq);
+    gzclose(fp);
+    kseq_destroy(seq2);
+    gzclose(fp2);
+  }
+
+  --numParsing;
+}
+
+template <> bool FastxParser<ReadSeq>::start() {
+  if (numParsing_ == 0) {
+    for (size_t i = 0; i < numParsers_; ++i) {
+      ++numParsing_;
+      parsingThreads_.emplace_back(new std::thread([this, i]() {
+        parseReads(this->inputStreams_, this->numParsing_,
+                   this->consumeContainers_[i].get(),
+                   this->produceReads_[i].get(), this->workQueue_,
+                   this->seqContainerQueue_, this->readQueue_);
+      }));
+    }
+    return true;
+  } else {
+    return false;
+  }
+}
+
+template <> bool FastxParser<ReadPair>::start() {
+  if (numParsing_ == 0) {
+
+    // Some basic checking to ensure the read files look "sane".
+    if (inputStreams_.size() != inputStreams2_.size()) {
+      throw std::invalid_argument("There should be the same number "
+                                  "of files for the left and right reads");
+    }
+    for (size_t i = 0; i < inputStreams_.size(); ++i) {
+      auto& s1 = inputStreams_[i];
+      auto& s2 = inputStreams2_[i];
+      if (s1 == s2) {
+        throw std::invalid_argument("You provided the same file " + s1 +
+                                    " as both a left and right file");
+      }
+    }
+    for (size_t i = 0; i < numParsers_; ++i) {
+      ++numParsing_;
+      parsingThreads_.emplace_back(new std::thread([this, i]() {
+        parseReadPair(this->inputStreams_, this->inputStreams2_,
+                      this->numParsing_, this->consumeContainers_[i].get(),
+                      this->produceReads_[i].get(), this->workQueue_,
+                      this->seqContainerQueue_, this->readQueue_);
+      }));
+    }
+    return true;
+  } else {
+    return false;
+  }
+}
+
+template <typename T> bool FastxParser<T>::refill(ReadGroup<T>& seqs) {
+  finishedWithGroup(seqs);
+  while (numParsing_ > 0) {
+    if (readQueue_.try_dequeue(seqs.consumerToken(), seqs.chunkPtr())) {
+      return true;
+    }
+  }
+  return readQueue_.try_dequeue(seqs.consumerToken(), seqs.chunkPtr());
+}
+
+template <typename T> void FastxParser<T>::finishedWithGroup(ReadGroup<T>& s) {
+  // If this read group is holding a valid chunk, then give it back
+  if (!s.empty()) {
+    seqContainerQueue_.enqueue(s.producerToken(), std::move(s.takeChunkPtr()));
+    s.setChunkEmpty();
+  }
+}
+
+template class FastxParser<ReadSeq>;
+template class FastxParser<ReadPair>;
+}
diff --git a/src/GZipWriter.cpp b/src/GZipWriter.cpp
index 651d452..b8b94d1 100644
--- a/src/GZipWriter.cpp
+++ b/src/GZipWriter.cpp
@@ -3,6 +3,7 @@
 
 #include "cereal/archives/json.hpp"
 
+#include "DistributionUtils.hpp"
 #include "GZipWriter.hpp"
 #include "SalmonOpts.hpp"
 #include "ReadExperiment.hpp"
@@ -47,6 +48,72 @@ bool writeVectorToFile(boost::filesystem::path path,
 }
 
 /**
+ * Write the equivalence class information to file.
+ * The header will contain the transcript / target ids in
+ * a fixed order, then each equivalence class will consist
+ * of a line / row.
+ */
+template <typename ExpT>
+bool GZipWriter::writeEquivCounts(
+    const SalmonOpts& opts,
+    ExpT& experiment) {
+
+  namespace bfs = boost::filesystem;
+
+  bfs::path auxDir = path_ / opts.auxDir;
+  bool auxSuccess = boost::filesystem::create_directories(auxDir);
+  bfs::path eqFilePath = auxDir / "eq_classes.txt";
+
+  std::ofstream equivFile(eqFilePath.string());
+
+  auto& transcripts = experiment.transcripts();
+  std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec =
+        experiment.equivalenceClassBuilder().eqVec();
+
+  // Number of transcripts
+  equivFile << transcripts.size() << '\n';
+
+  // Number of equivalence classes
+  equivFile << eqVec.size() << '\n';
+
+  for (auto& t : transcripts) {
+    equivFile << t.RefName << '\n';
+  }
+
+  for (auto& eq : eqVec) {
+    uint64_t count = eq.second.count;
+    // for each transcript in this class
+    const TranscriptGroup& tgroup = eq.first;
+    const std::vector<uint32_t>& txps = tgroup.txps;
+    // group size
+    equivFile << txps.size() << '\t';
+    // each group member
+    for (auto tid : txps) { equivFile << tid << '\t'; }
+    // count for this class
+    equivFile << count << '\n';
+  }
+
+  equivFile.close();
+  return true;
+}
+
+std::vector<std::string> getLibTypeStrings(const ReadExperiment& experiment) {
+  auto& libs = experiment.readLibraries();
+  std::vector<std::string> libStrings;
+  for (auto& rl : libs) {
+    libStrings.push_back(rl.getFormat().toString());
+  }
+  return libStrings;
+}
+
+template <typename AlnT>
+std::vector<std::string> getLibTypeStrings(const AlignmentLibrary<AlnT>& experiment) {
+  std::vector<std::string> libStrings;
+  libStrings.push_back(experiment.format().toString());
+  return libStrings;
+}
+
+/**
  * Write the ``main'' metadata to file.  Currently this includes:
  *   -- Names of the target id's if bootstrapping / gibbs is performed
  *   -- The fragment length distribution
@@ -62,7 +129,7 @@ bool GZipWriter::writeMeta(
 
   namespace bfs = boost::filesystem;
 
-  bfs::path auxDir = path_ / "aux";
+  bfs::path auxDir = path_ / opts.auxDir;
   bool auxSuccess = boost::filesystem::create_directories(auxDir);
 
   auto numBootstraps = opts.numBootstraps;
@@ -95,19 +162,103 @@ bool GZipWriter::writeMeta(
 
   bfs::path fldPath = auxDir / "fld.gz";
   int32_t numFLDSamples{10000};
-  auto fldSamples = salmon::utils::samplesFromLogPMF(
+  auto fldSamples = distribution_utils::samplesFromLogPMF(
                         experiment.fragmentLengthDistribution(), numFLDSamples);
   writeVectorToFile(fldPath, fldSamples);
 
   bfs::path normBiasPath = auxDir / "expected_bias.gz";
-  writeVectorToFile(normBiasPath, experiment.expectedBias());
+  writeVectorToFile(normBiasPath, experiment.expectedSeqBias());
 
   bfs::path obsBiasPath = auxDir / "observed_bias.gz";
-  const auto& bcounts = experiment.readBias().counts;
+  // TODO: dump both sense and anti-sense models
+  const auto& bcounts = experiment.readBias(salmon::utils::Direction::FORWARD).counts;
   std::vector<int32_t> observedBias(bcounts.size(), 0);
   std::copy(bcounts.begin(), bcounts.end(), observedBias.begin());
   writeVectorToFile(obsBiasPath, observedBias);
+  
+  bfs::path obsBiasPath3p = auxDir / "observed_bias_3p.gz";
+  const auto& bcounts3p = experiment.readBias(salmon::utils::Direction::REVERSE_COMPLEMENT).counts;
+  std::vector<int32_t> observedBias3p(bcounts3p.size(), 0);
+  std::copy(bcounts3p.begin(), bcounts3p.end(), observedBias3p.begin());
+  writeVectorToFile(obsBiasPath3p, observedBias3p);
+
+  if (opts.biasCorrect) {
+    // 5' observed
+    {
+      bfs::path obs5Path = auxDir / "obs5_seq.gz";
+      auto flags = std::ios_base::out | std::ios_base::binary;
+      boost::iostreams::filtering_ostream out;
+      out.push(boost::iostreams::gzip_compressor(6));
+      out.push(boost::iostreams::file_sink(obs5Path.string(), flags));
+      auto& obs5 = experiment.readBiasModelObserved(salmon::utils::Direction::FORWARD);
+      obs5.writeBinary(out);
+    }
+    // 3' observed
+    {
+      bfs::path obs3Path = auxDir / "obs3_seq.gz";
+      auto flags = std::ios_base::out | std::ios_base::binary;
+      boost::iostreams::filtering_ostream out;
+      out.push(boost::iostreams::gzip_compressor(6));
+      out.push(boost::iostreams::file_sink(obs3Path.string(), flags));
+      auto& obs3 = experiment.readBiasModelObserved(salmon::utils::Direction::REVERSE_COMPLEMENT);
+      obs3.writeBinary(out);
+    }
+
+    // 5' expected
+    {
+      bfs::path exp5Path = auxDir / "exp5_seq.gz";
+      auto flags = std::ios_base::out | std::ios_base::binary;
+      boost::iostreams::filtering_ostream out;
+      out.push(boost::iostreams::gzip_compressor(6));
+      out.push(boost::iostreams::file_sink(exp5Path.string(), flags));
+      auto& exp5 = experiment.readBiasModelExpected(salmon::utils::Direction::FORWARD);
+      exp5.writeBinary(out);
+    }
+    // 3' expected
+    {
+      bfs::path exp3Path = auxDir / "exp3_seq.gz";
+      auto flags = std::ios_base::out | std::ios_base::binary;
+      boost::iostreams::filtering_ostream out;
+      out.push(boost::iostreams::gzip_compressor(6));
+      out.push(boost::iostreams::file_sink(exp3Path.string(), flags));
+      auto& exp3 = experiment.readBiasModelExpected(salmon::utils::Direction::REVERSE_COMPLEMENT);
+      exp3.writeBinary(out);
+    }
+  }
 
+  if (opts.gcBiasCorrect) {
+      // GC observed 
+      {
+          bfs::path obsGCPath = auxDir / "obs_gc.gz";
+          auto flags = std::ios_base::out | std::ios_base::binary;
+          boost::iostreams::filtering_ostream out;
+          out.push(boost::iostreams::gzip_compressor(6));
+          out.push(boost::iostreams::file_sink(obsGCPath.string(), flags));
+          auto& obsgc = experiment.observedGC();
+          obsgc.writeBinary(out);
+      }
+      // GC expected 
+      {
+          bfs::path expGCPath = auxDir / "exp_gc.gz";
+          auto flags = std::ios_base::out | std::ios_base::binary;
+          boost::iostreams::filtering_ostream out;
+          out.push(boost::iostreams::gzip_compressor(6));
+          out.push(boost::iostreams::file_sink(expGCPath.string(), flags));
+          auto& expgc = experiment.expectedGCBias();
+          expgc.writeBinary(out);
+      }
+  }
+  /*
+  bfs::path normGCPath = auxDir / "expected_gc.gz";
+  writeVectorToFile(normGCPath, experiment.expectedGCBias());
+
+  bfs::path obsGCPath = auxDir / "observed_gc.gz";
+  const auto& gcCounts = experiment.observedGC();
+  std::vector<double> observedGC(gcCounts.size(), 0.0);
+  std::copy(gcCounts.begin(), gcCounts.end(), observedGC.begin());
+  writeVectorToFile(obsGCPath, observedGC);
+  */
+  
   bfs::path info = auxDir / "meta_info.json";
 
   {
@@ -125,8 +276,14 @@ bool GZipWriter::writeMeta(
       auto& transcripts = experiment.transcripts();
       oa(cereal::make_nvp("salmon_version", std::string(salmon::version)));
       oa(cereal::make_nvp("samp_type", sampType));
+
+      auto libStrings = getLibTypeStrings(experiment);
+      oa(cereal::make_nvp("num_libraries", libStrings.size())); 
+      oa(cereal::make_nvp("library_types", libStrings));
+
       oa(cereal::make_nvp("frag_dist_length", fldSamples.size()));
-      oa(cereal::make_nvp("bias_correct", opts.biasCorrect));
+      oa(cereal::make_nvp("seq_bias_correct", opts.biasCorrect));
+      oa(cereal::make_nvp("gc_bias_correct", opts.gcBiasCorrect));
       oa(cereal::make_nvp("num_bias_bins", bcounts.size()));
 
       std::string mapTypeStr = opts.alnMode ? "alignment" : "mapping";
@@ -224,6 +381,15 @@ template
 bool GZipWriter::writeBootstrap<int>(const std::vector<int>& abund);
 
 template
+bool GZipWriter::writeEquivCounts<ReadExperiment>(const SalmonOpts& sopt,
+                                                 ReadExperiment& readExp);
+template
+bool GZipWriter::writeEquivCounts<AlignmentLibrary<UnpairedRead>>(const SalmonOpts& sopt,
+                                                 AlignmentLibrary<UnpairedRead>& readExp);
+template
+bool GZipWriter::writeEquivCounts<AlignmentLibrary<ReadPair>>(const SalmonOpts& sopt,
+                                                 AlignmentLibrary<ReadPair>& readExp);
+template
 bool GZipWriter::writeAbundances<ReadExperiment>(const SalmonOpts& sopt,
                                                  ReadExperiment& readExp);
 template
@@ -251,3 +417,8 @@ bool GZipWriter::writeMeta<AlignmentLibrary<ReadPair>>(
     const AlignmentLibrary<ReadPair>& experiment,
     const std::string& tstring);
 
+template 
+std::vector<std::string> getLibTypeStrings(const AlignmentLibrary<UnpairedRead>& experiment);
+
+template 
+std::vector<std::string> getLibTypeStrings(const AlignmentLibrary<ReadPair>& experiment);
diff --git a/src/SBModel.cpp b/src/SBModel.cpp
new file mode 100644
index 0000000..36c2bdb
--- /dev/null
+++ b/src/SBModel.cpp
@@ -0,0 +1,295 @@
+#include "SBModel.hpp"
+#include <sstream>
+#include <utility>
+
+SBModel::SBModel() : _trained(false) {
+  // Roberts et al. model
+  // _order = {0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0, 0};
+  //       -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8  9  10 11 12
+
+  // Roberts et al. model (eXpress)
+  // _order = {0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
+  //      -10 -9 -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8  9 10
+  //_order = {0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
+
+  //_order = {0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1};
+  //         -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8  9
+      
+  // Simple model
+  // _order = {0, 0, 1, 2, 2, 2, 2, 1, 0};
+   _order = {0, 1, 2, 2, 2, 2, 2, 2, 2};
+  //        -3 -2 -1  0  1  2  3  4  5
+
+  // Short model
+  //_order = {0, 1, 2, 2, 2, 2};
+  //       -2 -1  0 1  2  3    
+
+  // The total length of the contexts we'll consider
+  _contextLength = _order.size();
+  // The number of bases before the read start position.
+  _contextLeft = 3;
+  // The number of bases after the read start position.
+  _contextRight = 5;
+
+  if ((_contextLeft + _contextRight + 1) !=  _contextLength) {
+    std::cerr << "The left (" << _contextLeft << ") and right (" << _contextRight << ") "
+                 "context length (+1) didn't match the size of the model provided "
+                 "(" << _contextLength << "); something is wrong!\n";
+    std::exit(1);
+  }
+
+  _marginals = Eigen::MatrixXd(4, _contextLength);
+  _marginals.setZero();
+
+  _shifts.clear();
+  _widths.clear();
+  _shifts.reserve(_order.size());
+  _widths.reserve(_order.size());
+  for (int32_t i = 0; i < _contextLength; ++i) {
+    _shifts.push_back((2 * _contextLength) - 2 * (i + 1));
+    _widths.push_back(2 * (_order[i] + 1));
+  }
+  
+  // Find the maximum order present in our model
+  int32_t maxOrder{0};
+  for (auto e : _order) { maxOrder = std::max(maxOrder, e); }
+
+  // Set k equal to the size of the contexts we'll parse. 
+  _mer.k(_contextLength);
+
+  // To hold all probabilities the matrix must be 4^{max_order + 1} by context-length
+  _probs = Eigen::MatrixXd(constExprPow(4, maxOrder + 1), _contextLength); 
+  // We have no intial observations
+  _probs.setZero();
+}
+
+bool SBModel::writeBinary(boost::iostreams::filtering_ostream& out) const {
+  auto* mutThis = const_cast<SBModel*>(this);
+  out.write(reinterpret_cast<char*>(&(mutThis->_contextLength)), sizeof(int32_t));
+  out.write(reinterpret_cast<char*>(&(mutThis->_contextLeft)), sizeof(int32_t));
+  out.write(reinterpret_cast<char*>(&(mutThis->_contextRight)), sizeof(int32_t));
+  // write the orders
+  out.write(reinterpret_cast<char*>(mutThis->_order.data()), _contextLength * sizeof(int32_t));
+  // write the shifts 
+  out.write(reinterpret_cast<char*>(mutThis->_shifts.data()), _contextLength * sizeof(int32_t));
+  // write the widths 
+  out.write(reinterpret_cast<char*>(mutThis->_widths.data()), _contextLength * sizeof(int32_t));
+
+  // Following adopted from: http://stackoverflow.com/questions/25389480/how-to-write-read-an-eigen-matrix-from-binary-file
+  // write all probabilities
+  typename Eigen::MatrixXd::Index prows = _probs.rows(), pcols= _probs.cols();
+  out.write(reinterpret_cast<char*>(&prows), sizeof(typename Eigen::MatrixXd::Index));
+  out.write(reinterpret_cast<char*>(&pcols), sizeof(typename Eigen::MatrixXd::Index));
+  out.write(reinterpret_cast<char*>(mutThis->_probs.data()), prows*pcols*sizeof(typename Eigen::MatrixXd::Scalar));
+
+  // write marginal probabilities
+  typename Eigen::MatrixXd::Index mrows= _marginals.rows(), mcols= _marginals.cols();
+  out.write(reinterpret_cast<char*>(&mrows), sizeof(typename Eigen::MatrixXd::Index));
+  out.write(reinterpret_cast<char*>(&mcols), sizeof(typename Eigen::MatrixXd::Index));
+  out.write(reinterpret_cast<char*>(mutThis->_marginals.data()), mrows*mcols*sizeof(typename Eigen::MatrixXd::Scalar));
+
+  return true;
+}
+ 
+double SBModel::evaluateLog(const char* seqIn) {
+    double p = 0;
+    Mer mer;
+    mer.from_chars(seqIn);
+
+    for (int32_t i = 0; i < _contextLength; ++i) {
+        uint64_t idx = mer.get_bits(_shifts[i], _widths[i]);
+        p += _probs(idx, i);
+    }
+    return p;
+}
+
+double SBModel::evaluateLog(const Mer& mer) {
+    double p = 0;
+
+    for (int32_t i = 0; i < _contextLength; ++i) {
+        uint64_t idx = mer.get_bits(_shifts[i], _widths[i]);
+        p += _probs(idx, i);
+    }
+    return p;
+}
+
+ 
+/** inlined member functions 
+
+inline int32_t SBModel::contextBefore(bool rc); 
+inline int32_t SBModel::contextAfter(bool rc);
+inline bool SBModel::addSequence(const char* seqIn, bool revCmp, double weight = 1.0);
+inline double SBModel::evaluate(const char* seqIn);
+inline double SBModel::evaluate(uint32_t kmer, uint32_t K);
+inline uint32_t SBModel::_getIndex(uint32_t kmer, uint32_t offset, uint32_t _order);
+
+**/
+
+Eigen::MatrixXd& SBModel::counts() { return _probs; }
+
+Eigen::MatrixXd& SBModel::marginals() { return _marginals; }
+
+void SBModel::dumpConditionalProbabilities(std::ostream& os) {
+    typedef jellyfish::mer_dna_ns::mer_base_dynamic<uint64_t> mer64;
+    // For each position
+    for (size_t i = 0; i < _contextLength; ++i) {
+        mer64 k(_order[i]+1);
+        size_t nbit = 2 * (_order[i] + 1);
+        uint32_t N = constExprPow(4, _order[i] + 1);
+        // header
+        for (size_t j = 0; j < N; ++j) {
+            k.set_bits(0, nbit, j);
+            std::string s = k.to_str();
+            if (s.length() > 1) {
+                os << s.substr(0, s.length()-1) << " -> "; 
+                os << s.back();
+            } else {
+                os << "\'\' -> " << s.front();
+            }
+            if (j < N-1) { os << '\t'; }
+        }
+        os << '\n';
+        // probs 
+        for (size_t j = 0; j < N; ++j) {
+            auto p = _probs(j, i);
+            os << std::exp(p);
+            if (j < N-1) { os << '\t'; }
+        }
+        os << '\n';
+    }
+}
+
+bool SBModel::addSequence(const char* seqIn, bool revCmp, double weight) {
+    _mer.from_chars(seqIn);
+    if (revCmp) { _mer.reverse_complement(); }
+    return addSequence(_mer, weight);
+}
+
+bool SBModel::addSequence(const Mer& mer, double weight) {
+    for (int32_t i = 0; i < _contextLength; ++i) {
+        uint64_t idx = mer.get_bits(_shifts[i], _widths[i]);
+        _probs(idx, i) += weight;
+    }
+    return true;
+}
+
+/**
+ * Once the _prob matrix has been filled out with observations, calling
+ * this function will normalize all counts so that the entries of _prob 
+ * represent proper transition probabilities.
+ * NOTE: The _prob matrix can only be normalized once.
+ *
+ * returns : true if the matrix was normalized and false otherwise.
+ **/
+bool SBModel::normalize() {
+  if (_trained) { return false; }
+
+  // now normalize the rest of the sub-contexts in groups
+  // each consecutive group of 4 rows shares the same prefix
+  for (int32_t pos = 0; pos < _contextLength; ++pos) {
+    size_t numStates = constExprPow(4, _order[pos]);
+    size_t rowsPerNode = 4; 
+    size_t nodeStart = 0;
+    for (int32_t i = 0; i < numStates; ++i) {
+        // Group the transition probabilities corresponding to
+        // the current context.  Normalize them so they are 
+        // conditional probabilities.
+        auto tot = _probs.col(pos).segment(nodeStart, rowsPerNode).sum();
+        _probs.col(pos).segment(nodeStart, rowsPerNode) /= tot;
+
+        _marginals(0, pos) += _probs(nodeStart, pos);
+        _marginals(1, pos) += _probs(nodeStart+1, pos);
+        _marginals(2, pos) += _probs(nodeStart+2, pos);
+        _marginals(3, pos) += _probs(nodeStart+3, pos);
+        nodeStart += rowsPerNode;
+    }
+    _marginals.col(pos) /= numStates;
+    //std::cerr << "pos = " << pos << ", marginals = " << _marginals.col(pos) << '\n';
+  }
+
+  double logSmall = std::log(1e-5);
+  auto takeLog = [logSmall](double x) -> double {
+    return (x > 0.0) ? std::log(x) : logSmall;
+  };
+  _probs = _probs.unaryExpr(takeLog);
+  _trained = true;
+  return true;
+}
+
+bool SBModel::checkTransitionProbabilities() {
+  if (!_trained) { return true; }
+
+  // now normalize the rest of the sub-contexts in groups
+  // each consecutive group of 4 rows shares the same 2-mer prefix
+  for (int32_t pos = 0; pos < _contextLength; ++pos) {
+    size_t numStates = constExprPow(4, _order[pos]);
+    size_t rowsPerNode = 4; 
+    size_t nodeStart = 0;
+    for (int32_t i = 0; i < numStates; ++i) {
+      auto tot = 0.0;
+      for (size_t j = nodeStart; j < nodeStart + rowsPerNode; ++j) {
+	tot += std::exp(_probs(j, pos));
+      }
+      if (tot < 0.98 or tot > 1.02) {
+	std::cerr << "Transition probabilites for position " << i << ", rows[" << nodeStart << ", " << nodeStart + rowsPerNode << "] = " << tot << '\n';
+	return false;
+      }
+      nodeStart += rowsPerNode;
+    }
+  }
+
+  return true;
+}
+
+void SBModel::combineCounts(const SBModel& other) {
+  _probs += other._probs;
+}
+
+int32_t SBModel::getContextLength() { return _contextLength; }
+
+template <typename CountVecT>
+bool SBModel::train(CountVecT& kmerCounts, const uint32_t K) {
+  // The _order of the model *ending at* positions 2, 3, 4, and 5 (0-based)
+  std::vector<uint32_t> _order{0, 0, 2, 2, 2, 2};
+  const auto numKmers = constExprPow(4, K);
+
+  if (!_trained) {
+    // For each starting position 
+    for (int32_t pos = 0; pos < K - _order.back(); ++pos) {
+      uint32_t offset = 2 * (K - (pos + 1) - _order[pos]);
+
+      // See how frequently sub-contexts starting at this position appear 
+      for (uint32_t kmer = 0; kmer < numKmers; ++kmer) {
+	auto idx = _getIndex(kmer, offset, _order[pos]);
+	_probs(idx, pos) += static_cast<double>(kmerCounts[kmer]);
+      }
+    }
+
+    // Normalize the first column (all 3-mers)
+    int32_t startIdx = 0;
+    _probs.col(0) /= _probs.col(0).sum();
+    _probs.col(1) /= _probs.col(1).sum();
+    _probs.col(2) /= _probs.col(2).sum();
+    // now normalize the rest of the sub-contexts in groups
+    // each consecutive group of 4 rows shares the same 2-mer prefix
+    for (int32_t pos = 3; pos < K - _order.back(); ++pos) {
+      size_t numStates = constExprPow(4, _order[pos]);
+      size_t rowsPerNode = 4; 
+      size_t nodeStart = 0;
+      for (size_t i = 0; i < numStates; ++i) {
+	auto tot = _probs.col(pos).segment(nodeStart, rowsPerNode).sum();
+	_probs.col(pos).segment(nodeStart, rowsPerNode) /= tot;
+	nodeStart += rowsPerNode;
+      }
+    }
+    _trained = true;
+  }
+  return true;
+}
+
+
+template
+bool SBModel::train<std::array<std::atomic<uint32_t>, constExprPow(4, 6)>>(std::array<std::atomic<uint32_t>, constExprPow(4, 6)>& counts, const uint32_t K);					      
+
+template
+bool SBModel::train<std::vector<double>>(std::vector<double>& counts, const uint32_t K);
diff --git a/src/SGSmooth.cpp b/src/SGSmooth.cpp
new file mode 100644
index 0000000..a8e50de
--- /dev/null
+++ b/src/SGSmooth.cpp
@@ -0,0 +1,555 @@
+// From : https://raw.githubusercontent.com/thatchristoph/vmd-cvs-github/master/plugins/signalproc/src/sgsmooth.C
+//! 
+// Sliding window signal processing (and linear algebra toolkit).
+//
+// supported operations:
+// <ul>
+// <li> Savitzky-Golay smoothing.
+// <li> computing a numerical derivative based of Savitzky-Golay smoothing.
+// <li> required linear algebra support for SG smoothing using STL based
+//      vector/matrix classes 
+// </ul>
+//
+// \brief Linear Algebra "Toolkit".
+//
+// modified by Rob Patro, 2016
+
+// system headers
+#include <cstdio>
+#include <cstddef>             // for size_t
+#include <cmath>               // for fabs  
+#include <vector>
+
+//! default convergence
+static const double TINY_FLOAT = 1.0e-300;
+
+//! comfortable array of doubles
+using float_vect = std::vector<double>;
+//! comfortable array of ints;
+using int_vect = std::vector<int>;
+
+/*! matrix class.
+ *
+ * This is a matrix class derived from a vector of float_vects.  Note that
+ * the matrix elements indexed [row][column] with indices starting at 0 (c
+ * style). Also note that because of its design looping through rows should
+ * be faster than looping through columns.  
+ *
+ * \brief two dimensional floating point array
+ */
+class float_mat : public std::vector<float_vect> {
+private:
+    //! disable the default constructor
+    explicit float_mat() {};
+    //! disable assignment operator until it is implemented.
+    float_mat &operator =(const float_mat &) { return *this; };
+public:
+    //! constructor with sizes
+    float_mat(const size_t rows, const size_t cols, const double def=0.0);
+    //! copy constructor for matrix
+    float_mat(const float_mat &m);
+    //! copy constructor for vector
+    float_mat(const float_vect &v);
+
+    //! use default destructor
+    // ~float_mat() {};
+
+    //! get size
+    size_t nr_rows(void) const { return size(); };
+    //! get size
+    size_t nr_cols(void) const { return front().size(); };
+};
+
+
+
+// constructor with sizes
+float_mat::float_mat(const size_t rows,const size_t cols,const double defval) 
+        : std::vector<float_vect>(rows) {
+    int i;
+    for (i = 0; i < rows; ++i) {
+        (*this)[i].resize(cols, defval);
+    }
+    if ((rows < 1) || (cols < 1)) {
+        char buffer[1024];
+        
+        sprintf(buffer, "cannot build matrix with %d rows and %d columns\n",
+                rows, cols);
+        //sgs_error(buffer);
+    }
+}
+
+// copy constructor for matrix
+float_mat::float_mat(const float_mat &m) : std::vector<float_vect>(m.size()) {
+                
+    float_mat::iterator inew = begin();
+    float_mat::const_iterator iold = m.begin();
+    for (/* empty */; iold < m.end(); ++inew, ++iold) {
+        const size_t oldsz = iold->size();
+        inew->resize(oldsz);
+        const float_vect oldvec(*iold);
+        *inew = oldvec;
+    }
+}
+
+// copy constructor for vector
+float_mat::float_mat(const float_vect &v) 
+        : std::vector<float_vect>(1) {
+
+    const size_t oldsz = v.size();
+    front().resize(oldsz);
+    front() = v;
+}
+
+//////////////////////
+// Helper functions //
+//////////////////////
+
+//! permute() orders the rows of A to match the integers in the index array.
+void permute(float_mat &A, int_vect &idx) 
+{
+    int_vect i(idx.size());
+    int j,k;
+    
+    for (j = 0; j < A.nr_rows(); ++j) {
+        i[j] = j;
+    }
+  
+    // loop over permuted indices
+    for (j = 0; j < A.nr_rows(); ++j) { 
+        if (i[j] != idx[j]) {
+
+            // search only the remaining indices
+            for (k = j+1; k < A.nr_rows(); ++k) { 
+                if (i[k] ==idx[j]) {
+                    std::swap(A[j],A[k]); // swap the rows and
+                    i[k] = i[j];     // the elements of
+                    i[j] = idx[j];   // the ordered index.
+                    break; // next j
+                }
+            }
+        }
+    }
+}
+
+/*! \brief Implicit partial pivoting.  
+ *
+ * The function looks for pivot element only in rows below the current
+ * element, A[idx[row]][column], then swaps that row with the current one in
+ * the index map. The algorithm is for implicit pivoting (i.e., the pivot is
+ * chosen as if the max coefficient in each row is set to 1) based on the
+ * scaling information in the vector scale. The map of swapped indices is
+ * recorded in swp. The return value is +1 or -1 depending on whether the
+ * number of row swaps was even or odd respectively. */
+static int partial_pivot(float_mat &A, const size_t row, const size_t col, 
+                         float_vect &scale, int_vect &idx, double tol)
+{
+    if (tol <= 0.0)
+        tol = TINY_FLOAT;
+
+    int swapNum = 1;
+
+    // default pivot is the current position, [row,col]
+    int pivot = row; 
+    double piv_elem = fabs(A[idx[row]][col]) * scale[idx[row]];
+
+    // loop over possible pivots below current
+    int j;
+    for (j = row + 1; j < A.nr_rows(); ++j) { 
+
+        const double tmp = fabs(A[idx[j]][col]) * scale[idx[j]];  
+
+        // if this elem is larger, then it becomes the pivot 
+        if (tmp > piv_elem) {     
+            pivot = j;
+            piv_elem = tmp;
+        }
+    }
+
+#if 0
+    if(piv_elem < tol) {
+      //sgs_error("partial_pivot(): Zero pivot encountered.\n")
+#endif
+
+    if(pivot > row) {           // bring the pivot to the diagonal
+        j = idx[row];           // reorder swap array
+        idx[row] = idx[pivot];
+        idx[pivot] = j;
+        swapNum = -swapNum;     // keeping track of odd or even swap
+    }
+    return swapNum;
+}
+
+/*! \brief Perform backward substitution.
+ *
+ * Solves the system of equations A*b=a, ASSUMING that A is upper
+ * triangular. If diag==1, then the diagonal elements are additionally
+ * assumed to be 1.  Note that the lower triangular elements are never
+ * checked, so this function is valid to use after a LU-decomposition in
+ * place.  A is not modified, and the solution, b, is returned in a. */
+static void lu_backsubst(float_mat &A, float_mat &a, bool diag=false) 
+{
+    int r,c,k;
+    
+    for (r = (A.nr_rows() - 1); r >= 0; --r) {
+        for (c = (A.nr_cols() - 1); c > r; --c) {
+            for (k = 0; k < A.nr_cols(); ++k) {
+                a[r][k] -= A[r][c] * a[c][k];
+            }
+        }
+        if(!diag) {
+            for (k = 0; k < A.nr_cols(); ++k) {
+                a[r][k] /= A[r][r];
+            }
+        }
+    }
+}
+
+/*! \brief Perform forward substitution.
+ *
+ * Solves the system of equations A*b=a, ASSUMING that A is lower
+ * triangular. If diag==1, then the diagonal elements are additionally
+ * assumed to be 1.  Note that the upper triangular elements are never
+ * checked, so this function is valid to use after a LU-decomposition in
+ * place.  A is not modified, and the solution, b, is returned in a. */
+static void lu_forwsubst(float_mat &A, float_mat &a, bool diag=true) 
+{
+    int r,k,c;
+    for (r = 0;r < A.nr_rows(); ++r) {
+        for(c = 0; c < r; ++c) {
+            for (k = 0; k < A.nr_cols(); ++k) {
+                a[r][k] -= A[r][c] * a[c][k];
+            }
+        }
+        if(!diag) {
+            for (k = 0; k < A.nr_cols(); ++k) {
+                a[r][k] /= A[r][r];
+            }
+        }
+    }
+}
+
+/*! \brief Performs LU factorization in place.  
+ *
+ * This is Crout's algorithm (cf., Num. Rec. in C, Section 2.3).  The map of
+ * swapped indeces is recorded in idx. The return value is +1 or -1
+ * depending on whether the number of row swaps was even or odd
+ * respectively.  idx must be preinitialized to a valid set of indices
+ * (e.g., {1,2, ... ,A.nr_rows()}). */
+static int lu_factorize(float_mat &A, int_vect &idx, double tol=TINY_FLOAT)
+{
+    if ( tol <= 0.0) 
+        tol = TINY_FLOAT;
+
+    if ((A.nr_rows() == 0) || (A.nr_rows() != A.nr_cols())) {
+      //sgs_error("lu_factorize(): cannot handle empty "
+      //           "or nonsquare matrices.\n");
+        
+        return 0;
+    }
+
+    float_vect scale(A.nr_rows());  // implicit pivot scaling
+    int i,j;
+    for (i = 0; i < A.nr_rows(); ++i) {
+        double maxval = 0.0;
+        for (j = 0; j < A.nr_cols(); ++j) {
+            if (fabs(A[i][j]) > maxval)
+                maxval = fabs(A[i][j]);
+        }
+        if (maxval == 0.0) {
+	  //sgs_error("lu_factorize(): zero pivot found.\n");
+            return 0;
+        }
+        scale[i] = 1.0 / maxval;
+    }
+
+    int swapNum = 1;
+    int c,r;
+    for (c = 0; c < A.nr_cols() ; ++c) {            // loop over columns
+        swapNum *= partial_pivot(A, c, c, scale, idx, tol); // bring pivot to diagonal
+        for(r = 0; r < A.nr_rows(); ++r) {      //  loop over rows
+            int lim = (r < c) ? r : c;
+            for (j = 0; j < lim; ++j) {
+                A[idx[r]][c] -= A[idx[r]][j] * A[idx[j]][c];
+            }
+            if (r > c) 
+                A[idx[r]][c] /= A[idx[c]][c];
+        }
+    }
+    permute(A,idx);
+    return swapNum;
+}
+
+/*! \brief Solve a system of linear equations. 
+ * Solves the inhomogeneous matrix problem with lu-decomposition. Note that
+ * inversion may be accomplished by setting a to the identity_matrix. */
+static float_mat lin_solve(const float_mat &A, const float_mat &a, 
+                           double tol=TINY_FLOAT) 
+{
+    float_mat B(A);
+    float_mat b(a);
+    int_vect idx(B.nr_rows());
+    int j;
+    
+    for (j = 0; j < B.nr_rows(); ++j) {
+        idx[j] = j;  // init row swap label array
+    }
+    lu_factorize(B,idx,tol); // get the lu-decomp.
+    permute(b,idx);          // sort the inhomogeneity to match the lu-decomp
+    lu_forwsubst(B,b);       // solve the forward problem
+    lu_backsubst(B,b);       // solve the backward problem
+    return b;
+}
+
+///////////////////////
+// related functions //
+///////////////////////
+
+//! Returns the inverse of a matrix using LU-decomposition. 
+static float_mat invert(const float_mat &A) 
+{
+    const int n = A.size();
+    float_mat E(n, n, 0.0);
+    float_mat B(A);
+    int i;
+
+    for (i = 0; i < n; ++i) {
+        E[i][i] = 1.0;
+    }
+
+    return lin_solve(B, E);
+}
+
+//! returns the transposed matrix.
+static float_mat transpose(const float_mat &a)
+{
+    float_mat res(a.nr_cols(), a.nr_rows());
+    int i,j;
+    
+    for (i = 0; i < a.nr_rows(); ++i) {
+        for (j = 0; j < a.nr_cols(); ++j) {
+            res[j][i] = a[i][j];
+        }
+    }
+    return res;
+}
+
+//! matrix multiplication.
+float_mat operator *(const float_mat &a, const float_mat &b)
+{
+    float_mat res(a.nr_rows(), b.nr_cols());
+    if (a.nr_cols() != b.nr_rows()) {
+      //sgs_error("incompatible matrices in multiplication\n");
+        return res;
+    }
+
+    int i,j,k;
+    
+    for (i = 0; i < a.nr_rows(); ++i) {
+        for (j = 0; j < b.nr_cols(); ++j) {
+            double sum(0.0);
+            for (k = 0; k < a.nr_cols(); ++k) {
+                sum += a[i][k] * b[k][j];
+            }
+            res[i][j] = sum;
+        }
+    }
+    return res;
+}
+
+
+//! calculate savitzky golay coefficients.
+static float_vect sg_coeff(const float_vect &b, const size_t deg)
+{
+    const size_t rows(b.size());
+    const size_t cols(deg + 1);
+    float_mat A(rows, cols);
+    float_vect res(rows);
+        
+    // generate input matrix for least squares fit
+    int i,j;
+    for (i = 0; i < rows; ++i) {
+        for (j = 0; j < cols; ++j) {
+            A[i][j] = pow(double(i), double(j));
+        }
+    }
+
+    float_mat c(invert(transpose(A) * A) * (transpose(A) * transpose(b)));
+
+    for (i = 0; i < b.size(); ++i) {
+        res[i] = c[0][0];
+        for (j = 1; j <= deg; ++j) {
+            res[i] += c[j][0] * pow(double(i), double(j));
+        }
+    }
+    return res;
+}
+
+/*! \brief savitzky golay smoothing.  
+ *
+ * This method means fitting a polynome of degree 'deg' to a sliding window
+ * of width 2w+1 throughout the data.  The needed coefficients are
+ * generated dynamically by doing a least squares fit on a "symmetric" unit
+ * vector of size 2w+1, e.g. for w=2 b=(0,0,1,0,0). evaluating the polynome
+ * yields the sg-coefficients.  at the border non symmectric vectors b are
+ * used. */
+float_vect sg_smooth(const float_vect &v, const int width, const int deg)
+{
+    float_vect res(v.size(), 0.0);
+    if ((width < 1) || (deg < 0) || (v.size() < (2 * width + 2))) {
+      //sgs_error("sgsmooth: parameter error.\n");
+        return res;
+    }
+
+    const int window = 2 * width + 1;
+    const int endidx = v.size() - 1;
+
+    // do a regular sliding window average
+    int i,j;
+    if (deg == 0) {
+        // handle border cases first because we need different coefficients
+#if defined(_OPENMP)
+#pragma omp parallel for private(i,j) schedule(static)
+#endif    
+        for (i = 0; i < width; ++i) {
+	    const double scale = 1.0/double(i+1);
+            const float_vect c1(width, scale);
+            for (j = 0; j <= i; ++j) {
+                res[i]          += c1[j] * v[j];
+                res[endidx - i] += c1[j] * v[endidx - j];
+            } 
+        }
+
+        // now loop over rest of data. reusing the "symmetric" coefficients.
+	const double scale = 1.0/double(window);
+        const  float_vect c2(window, scale);
+#if defined(_OPENMP)
+#pragma omp parallel for private(i,j) schedule(static)
+#endif    
+        for (i = 0; i <= (v.size() - window); ++i) {
+            for (j = 0; j < window; ++j) {
+                res[i + width] += c2[j] * v[i + j];
+            } 
+        }
+        return res;
+    }
+
+    // handle border cases first because we need different coefficients
+#if defined(_OPENMP)
+#pragma omp parallel for private(i,j) schedule(static)
+#endif    
+    for (i = 0; i < width; ++i) {
+        float_vect b1(window, 0.0);
+        b1[i] = 1.0;
+
+        const float_vect c1(sg_coeff(b1, deg));
+        for (j = 0; j < window; ++j) {
+            res[i]          += c1[j] * v[j];
+            res[endidx - i] += c1[j] * v[endidx - j];
+        } 
+    }
+
+    // now loop over rest of data. reusing the "symmetric" coefficients.
+    float_vect b2(window, 0.0);
+    b2[width] = 1.0;
+    const float_vect c2(sg_coeff(b2, deg));
+
+#if defined(_OPENMP)
+#pragma omp parallel for private(i,j) schedule(static)
+#endif    
+    for (i = 0; i <= (v.size() - window); ++i) {
+        for (j = 0; j < window; ++j) {
+            res[i + width] += c2[j] * v[i + j];
+        } 
+    }
+    return res;
+}
+
+/*! least squares fit a polynome of degree 'deg' to data in 'b'.
+ *  then calculate the first derivative and return it. */
+static float_vect lsqr_fprime(const float_vect &b, const int deg)
+{
+    const int rows(b.size());
+    const int cols(deg + 1);
+    float_mat A(rows, cols);
+    float_vect res(rows);
+
+    // generate input matrix for least squares fit
+    int i,j;
+    for (i = 0; i < rows; ++i) {
+        for (j = 0; j < cols; ++j) {
+            A[i][j] = pow(double(i), double(j));
+        }
+    }
+
+    float_mat c(invert(transpose(A) * A) * (transpose(A) * transpose(b)));
+        
+    for (i = 0; i < b.size(); ++i) {
+        res[i] = c[1][0];
+        for (j = 1; j < deg; ++j) {
+            res[i] += c[j + 1][0] * double(j+1)
+                * pow(double(i), double(j));
+        }
+    }
+    return res;
+}
+
+/*! \brief savitzky golay smoothed numerical derivative.  
+ *
+ * This method means fitting a polynome of degree 'deg' to a sliding window
+ * of width 2w+1 throughout the data.  
+ *
+ * In contrast to the sg_smooth function we do a brute force attempt by
+ * always fitting the data to a polynome of degree 'deg' and using the
+ * result. */
+float_vect sg_derivative(const float_vect &v, const int width, 
+                         const int deg, const double h)
+{
+    float_vect res(v.size(), 0.0);
+    if ((width < 1) || (deg < 1) || (v.size() < (2 * width + 2))) {
+      //sgs_error("sgsderiv: parameter error.\n");
+        return res;
+    }
+
+    const int window = 2 * width + 1;
+
+    // handle border cases first because we do not repeat the fit
+    // lower part
+    float_vect b(window, 0.0);
+    int i,j;
+    
+    for (i = 0; i < window; ++i) {
+        b[i] = v[i] / h;
+    }
+    const float_vect c(lsqr_fprime(b, deg));
+    for (j = 0; j <= width; ++j) {
+        res[j] = c[j];
+    }
+    // upper part. direction of fit is reversed
+    for (i = 0; i < window; ++i) {
+        b[i] = v[v.size() - 1 - i] / h;
+    }
+    const float_vect d(lsqr_fprime(b, deg));
+    for (i = 0; i <= width; ++i) {
+        res[v.size() - 1 - i] = -d[i];
+    }
+
+    // now loop over rest of data. wasting a lot of least squares calcs
+    // since we only use the middle value.
+#if defined(_OPENMP)
+#pragma omp parallel for private(i,j) schedule(static)
+#endif    
+    for (i = 1; i < (v.size() - window); ++i) {
+        for (j = 0; j < window; ++j) {
+            b[j] = v[i + j] / h;
+        }
+        res[i + width] = lsqr_fprime(b, deg)[width];
+    }
+    return res;
+}
+
+// Local Variables:
+// mode: c++
+// c-basic-offset: 4
+// fill-column: 76
+// indent-tabs-mode: nil
+// End:
diff --git a/src/Salmon.cpp b/src/Salmon.cpp
index 201e814..ffaf8c2 100644
--- a/src/Salmon.cpp
+++ b/src/Salmon.cpp
@@ -40,6 +40,9 @@
 #include <boost/range/irange.hpp>
 #include <boost/filesystem.hpp>
 
+// C++ string formatting library
+#include "spdlog/fmt/fmt.h"
+
 #include "BiasIndex.hpp"
 #include "SailfishUtils.hpp"
 #include "GenomicFeature.hpp"
@@ -47,7 +50,18 @@
 #include "VersionChecker.hpp"
 
 int help(int argc, char* argv[]) {
-    auto helpmsg = R"(
+    fmt::MemoryWriter helpMsg;
+    helpMsg.write("Salmon v{}\n\n", salmon::version);
+    helpMsg.write("Usage:  salmon -h|--help or \n"
+                  "        salmon -v|--version or \n"
+                  "        salmon [--no-version-check] <COMMAND> [-h | options]\n\n");
+    helpMsg.write("Commands:\n");
+    helpMsg.write("     index Create a salmon index\n");
+    helpMsg.write("     quant Quantify a sample\n");
+    helpMsg.write("     swim  Perform super-secret operation\n");
+
+    /*
+    auto orighelpmsg = R"(
     ===============
 
     Please invoke salmon with one of the following commands {index, quant, swim}.
@@ -58,8 +72,10 @@ int help(int argc, char* argv[]) {
 
     will give you detailed help information about the index command.
     )";
+    */
 
-    std::cerr << "    Salmon v" << salmon::version << helpmsg << "\n";
+    std::cerr << helpMsg.str();
+//std::cerr << "    Salmon v" << salmon::version << helpmsg << "\n";
     return 1;
 }
 
@@ -117,6 +133,12 @@ int main( int argc, char* argv[] ) {
   using std::string;
   namespace po = boost::program_options;
 
+  // With no arguments, print help
+  if (argc == 1) {
+      help(argc, argv);
+      std::exit(1);
+  }
+
   try {
 
     po::options_description hidden("hidden");
@@ -154,7 +176,7 @@ int main( int argc, char* argv[] ) {
     }
 
     if (vm.count("help") and !vm.count("command")) {
-        std::cout << sfopts << std::endl;
+        //std::cout << sfopts << std::endl;
         help(argc, argv);
         std::exit(0);
     }
@@ -163,7 +185,7 @@ int main( int argc, char* argv[] ) {
       std::string versionMessage = getVersionMessage();
       std::cerr << versionMessage;
     }
-
+    
     po::notify(vm);
 
     std::unordered_map<string, std::function<int(int, char*[])>> cmds({
diff --git a/src/SalmonQuantify.cpp b/src/SalmonQuantify.cpp
index 2d9c9f1..c88940e 100644
--- a/src/SalmonQuantify.cpp
+++ b/src/SalmonQuantify.cpp
@@ -1,6 +1,6 @@
 /**
 >HEADER
-    Copyright (c) 2013, 2014, 2015 Rob Patro rob.patro at cs.stonybrook.edu
+    Copyright (c) 2013, 2014, 2015, 2016 Rob Patro rob.patro at cs.stonybrook.edu
 
     This file is part of Salmon.
 
@@ -19,37 +19,36 @@
 <HEADER
 **/
 
-
+#include "btree_map.h"
+#include "btree_set.h"
 #include <algorithm>
 #include <atomic>
 #include <cassert>
 #include <cmath>
 #include <cstdio>
-#include <unordered_map>
-#include <map>
-#include <vector>
-#include <unordered_set>
+#include <exception>
+#include <functional>
 #include <iterator>
+#include <map>
 #include <mutex>
-#include <thread>
-#include <sstream>
-#include <exception>
-#include <random>
 #include <queue>
+#include <random>
+#include <sstream>
+#include <thread>
 #include <unordered_map>
-#include <functional>
-#include "btree_map.h"
-#include "btree_set.h"
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
 
 // C++ string formatting library
-#include "spdlog/details/format.h"
+#include "spdlog/fmt/fmt.h"
 
 // C Includes for BWA
+#include <cctype>
 #include <cstdio>
-#include <unistd.h>
 #include <cstdlib>
 #include <cstring>
-#include <cctype>
+#include <unistd.h>
 
 extern "C" {
 #include "bwa.h"
@@ -65,63 +64,65 @@ extern "C" {
 #include "jellyfish/whole_sequence_parser.hpp"
 
 // Boost Includes
-#include <boost/filesystem.hpp>
 #include <boost/container/flat_map.hpp>
 #include <boost/dynamic_bitset/dynamic_bitset.hpp>
-#include <boost/range/irange.hpp>
-#include <boost/program_options.hpp>
+#include <boost/filesystem.hpp>
 #include <boost/lockfree/queue.hpp>
-#include <boost/thread/thread.hpp>
+#include <boost/program_options.hpp>
+#include <boost/range/irange.hpp>
 #include <boost/range/iterator_range.hpp>
+#include <boost/thread/thread.hpp>
 
 // TBB Includes
+#include "tbb/blocked_range.h"
+#include "tbb/concurrent_queue.h"
+#include "tbb/concurrent_unordered_map.h"
 #include "tbb/concurrent_unordered_set.h"
 #include "tbb/concurrent_vector.h"
-#include "tbb/concurrent_unordered_map.h"
-#include "tbb/concurrent_queue.h"
 #include "tbb/parallel_for.h"
 #include "tbb/parallel_for_each.h"
 #include "tbb/parallel_reduce.h"
-#include "tbb/blocked_range.h"
-#include "tbb/task_scheduler_init.h"
 #include "tbb/partitioner.h"
+#include "tbb/task_scheduler_init.h"
 
 // logger includes
 #include "spdlog/spdlog.h"
 
 // Cereal includes
-#include "cereal/types/vector.hpp"
 #include "cereal/archives/binary.hpp"
+#include "cereal/types/vector.hpp"
 
 #include "concurrentqueue.h"
 
-// salmon / Salmon includes
+// salmon includes
 #include "ClusterForest.hpp"
-#include "SalmonMath.hpp"
-#include "Transcript.hpp"
+#include "FastxParser.hpp"
+#include "IOUtils.hpp"
 #include "LibraryFormat.hpp"
-#include "SalmonUtils.hpp"
 #include "ReadLibrary.hpp"
 #include "SalmonConfig.hpp"
-#include "IOUtils.hpp"
 #include "SalmonIndex.hpp"
+#include "SalmonMath.hpp"
+#include "SalmonUtils.hpp"
+#include "Transcript.hpp"
 
-#include "BWAUtils.hpp"
-#include "KmerIntervalMap.hpp"
 #include "AlignmentGroup.hpp"
-#include "PairSequenceParser.hpp"
-#include "ForgettingMassCalculator.hpp"
-#include "FragmentLengthDistribution.hpp"
-#include "ReadExperiment.hpp"
-#include "SalmonOpts.hpp"
-#include "EquivalenceClassBuilder.hpp"
+#include "BWAUtils.hpp"
+#include "BiasParams.hpp"
 #include "CollapsedEMOptimizer.hpp"
 #include "CollapsedGibbsSampler.hpp"
-#include "RapMapUtils.hpp"
+#include "EquivalenceClassBuilder.hpp"
+#include "ForgettingMassCalculator.hpp"
+#include "FragmentLengthDistribution.hpp"
+#include "GZipWriter.hpp"
 #include "HitManager.hpp"
-#include "SASearcher.hpp"
+#include "KmerIntervalMap.hpp"
+#include "PairSequenceParser.hpp"
+#include "RapMapUtils.hpp"
+#include "ReadExperiment.hpp"
 #include "SACollector.hpp"
-#include "GZipWriter.hpp"
+#include "SASearcher.hpp"
+#include "SalmonOpts.hpp"
 //#include "TextBootstrapWriter.hpp"
 
 /****** QUASI MAPPING DECLARATIONS *********/
@@ -129,9 +130,10 @@ using MateStatus = rapmap::utils::MateStatus;
 using QuasiAlignment = rapmap::utils::QuasiAlignment;
 /****** QUASI MAPPING DECLARATIONS  *******/
 
-using paired_parser = pair_sequence_parser<char**>;
-using stream_manager = jellyfish::stream_manager<std::vector<std::string>::const_iterator>;
-using single_parser = jellyfish::whole_sequence_parser<stream_manager>;
+using paired_parser = fastx_parser::FastxParser<fastx_parser::ReadPair>;
+using stream_manager =
+    jellyfish::stream_manager<std::vector<std::string>::const_iterator>;
+using single_parser = fastx_parser::FastxParser<fastx_parser::ReadSeq>;
 
 using TranscriptID = uint32_t;
 using TranscriptIDVector = std::vector<TranscriptID>;
@@ -140,347 +142,497 @@ using my_mer = jellyfish::mer_dna_ns::mer_base_static<uint64_t, 1>;
 
 constexpr uint32_t miniBatchSize{5000};
 
-template <typename AlnT>
-using AlnGroupVec = std::vector<AlignmentGroup<AlnT>>;
+template <typename AlnT> using AlnGroupVec = std::vector<AlignmentGroup<AlnT>>;
 
 template <typename AlnT>
-using AlnGroupVecRange = boost::iterator_range<typename AlnGroupVec<AlnT>::iterator>;
+using AlnGroupVecRange =
+    boost::iterator_range<typename AlnGroupVec<AlnT>::iterator>;
 
 #define __MOODYCAMEL__
 #if defined(__MOODYCAMEL__)
- template <typename AlnT>
- using AlnGroupQueue = moodycamel::ConcurrentQueue<AlignmentGroup<AlnT>*>;
+template <typename AlnT>
+using AlnGroupQueue = moodycamel::ConcurrentQueue<AlignmentGroup<AlnT>*>;
 #else
- template <typename AlnT>
- using AlnGroupQueue = tbb::concurrent_queue<AlignmentGroup<AlnT>*>;
+template <typename AlnT>
+using AlnGroupQueue = tbb::concurrent_queue<AlignmentGroup<AlnT>*>;
 #endif
 
 #include "LightweightAlignmentDefs.hpp"
 
 template <typename AlnT>
-void processMiniBatch(
-        ReadExperiment& readExp,
-        ForgettingMassCalculator& fmCalc,
-        uint64_t firstTimestepOfRound,
-        ReadLibrary& readLib,
-        const SalmonOpts& salmonOpts,
-        AlnGroupVecRange<AlnT> batchHits,
-        std::vector<Transcript>& transcripts,
-        ClusterForest& clusterForest,
-        FragmentLengthDistribution& fragLengthDist,
-        std::atomic<uint64_t>& numAssignedFragments,
-        std::default_random_engine& randEng,
-        bool initialRound,
-        std::atomic<bool>& burnedIn
-        ) {
-
-    using salmon::math::LOG_0;
-    using salmon::math::LOG_1;
-    using salmon::math::LOG_ONEHALF;
-    using salmon::math::logAdd;
-    using salmon::math::logSub;
-
-    const uint64_t numBurninFrags = salmonOpts.numBurninFrags;
-
-    auto log = spdlog::get("jointLog");
-    size_t numTranscripts{transcripts.size()};
-    size_t localNumAssignedFragments{0};
-    size_t priorNumAssignedFragments{numAssignedFragments};
-    std::uniform_real_distribution<> uni(0.0, 1.0 + std::numeric_limits<double>::min());
-    std::vector<uint64_t> libTypeCounts(LibraryFormat::maxLibTypeID() + 1);
-
-    std::vector<FragmentStartPositionDistribution>& fragStartDists =
-        readExp.fragmentStartPositionDistributions();
-    auto& biasModel = readExp.sequenceBiasModel();
-
-    bool updateCounts = initialRound;
-    bool useReadCompat = salmonOpts.incompatPrior != salmon::math::LOG_1;
-    bool useFSPD{salmonOpts.useFSPD};
-    bool useFragLengthDist{!salmonOpts.noFragLengthDist};
-    bool noFragLenFactor{salmonOpts.noFragLenFactor};
-
-    const auto expectedLibraryFormat = readLib.format();
-    uint64_t zeroProbFrags{0};
-
-    //EQClass
-    EquivalenceClassBuilder& eqBuilder = readExp.equivalenceClassBuilder();
-
-    // Build reverse map from transcriptID => hit id
-    using HitID = uint32_t;
-
-    double logForgettingMass{0.0};
-    uint64_t currentMinibatchTimestep{0};
-
-    // logForgettingMass and currentMinibatchTimestep are OUT parameters!
-    fmCalc.getLogMassAndTimestep(logForgettingMass, currentMinibatchTimestep);
-
-    double startingCumulativeMass = fmCalc.cumulativeLogMassAt(firstTimestepOfRound);
-    int i{0};
-    {
-        // Iterate over each group of alignments (a group consists of all alignments reported
-        // for a single read).  Distribute the read's mass to the transcripts
-        // where it potentially aligns.
-        for (auto& alnGroup : batchHits) {
-	    // If we had no alignments for this read, then skip it
-            if (alnGroup.size() == 0) { continue; }
-
-            // We start out with probability 0
-            double sumOfAlignProbs{LOG_0};
-
-            // Record whether or not this read is unique to a single transcript.
-            bool transcriptUnique{true};
-
-            auto firstTranscriptID = alnGroup.alignments().front().transcriptID();
-            std::unordered_set<size_t> observedTranscripts;
-
-            // New incompat. handling.
-            /**
-            // The equivalence class information for
-            // compatible fragments
-            std::vector<uint32_t> txpIDsCompat;
-            std::vector<double> auxProbsCompat;
-	        std::vector<double> posProbsCompat;
-            double auxDenomCompat = salmon::math::LOG_0;
-
-            // The equivalence class information for
-            // all fragments (if there is no compatible fragment)
-            std::vector<uint32_t> txpIDsAll;
-            std::vector<double> auxProbsAll;
-            std::vector<double> posProbsAll;
-            double auxDenomAll = salmon::math::LOG_0;
-
-            std::vector<uint32_t>* txpIDsFinal = nullptr;
-            std::vector<uint32_t>* txpIDsFinal = nullptr;
-            std::vector<uint32_t>* txpIDsFinal = nullptr;
-            double auxDenomFinal = salmon::math::LOG_0;
-            **/
-
-            std::vector<uint32_t> txpIDs;
-            std::vector<double> auxProbs;
-	        std::vector<double> posProbs;
-            double auxDenom= salmon::math::LOG_0;
-
-            uint32_t numInGroup{0};
-            uint32_t prevTxpID{0};
-
-            // For each alignment of this read
-            for (auto& aln : alnGroup.alignments()) {
-                auto transcriptID = aln.transcriptID();
-                auto& transcript = transcripts[transcriptID];
-                transcriptUnique = transcriptUnique and (transcriptID == firstTranscriptID);
-
-                double refLength = transcript.RefLength > 0 ? transcript.RefLength : 1.0;
-                double coverage = aln.score();
-                double logFragCov = (coverage > 0) ? std::log(coverage) : LOG_1;
-
-                // The alignment probability is the product of a
-                // transcript-level term (based on abundance and) an
-                // alignment-level term.
-                double logRefLength{salmon::math::LOG_0};
-                if (salmonOpts.noEffectiveLengthCorrection or !burnedIn) {
-                    logRefLength = std::log(transcript.RefLength);
-                } else {
-                    logRefLength = transcript.getCachedLogEffectiveLength();
-                }
+void processMiniBatch(ReadExperiment& readExp, ForgettingMassCalculator& fmCalc,
+                      uint64_t firstTimestepOfRound, ReadLibrary& readLib,
+                      const SalmonOpts& salmonOpts,
+                      AlnGroupVecRange<AlnT> batchHits,
+                      std::vector<Transcript>& transcripts,
+                      ClusterForest& clusterForest,
+                      FragmentLengthDistribution& fragLengthDist,
+                      BiasParams& observedBiasParams,
+                      std::atomic<uint64_t>& numAssignedFragments,
+                      std::default_random_engine& randEng, bool initialRound,
+                      std::atomic<bool>& burnedIn, double& maxZeroFrac) {
+
+  using salmon::math::LOG_0;
+  using salmon::math::LOG_1;
+  using salmon::math::LOG_ONEHALF;
+  using salmon::math::logAdd;
+  using salmon::math::logSub;
+
+  const uint64_t numBurninFrags = salmonOpts.numBurninFrags;
+
+  auto log = spdlog::get("jointLog");
+  size_t numTranscripts{transcripts.size()};
+  size_t localNumAssignedFragments{0};
+  size_t priorNumAssignedFragments{numAssignedFragments};
+  std::uniform_real_distribution<> uni(
+      0.0, 1.0 + std::numeric_limits<double>::min());
+  std::vector<uint64_t> libTypeCounts(LibraryFormat::maxLibTypeID() + 1);
+  bool hasCompatibleMapping{false};
+  uint64_t numCompatibleFragments{0};
+
+  std::vector<FragmentStartPositionDistribution>& fragStartDists =
+      readExp.fragmentStartPositionDistributions();
+  auto& biasModel = readExp.sequenceBiasModel();
+  auto& observedGCMass = observedBiasParams.observedGCMass;
+  auto& obsFwd = observedBiasParams.massFwd;
+  auto& obsRC = observedBiasParams.massRC;
+  auto& observedPosBiasFwd = observedBiasParams.posBiasFW;
+  auto& observedPosBiasRC = observedBiasParams.posBiasRC;
+
+  bool posBiasCorrect = salmonOpts.posBiasCorrect;
+  bool gcBiasCorrect = salmonOpts.gcBiasCorrect;
+  bool updateCounts = initialRound;
+  bool useReadCompat = salmonOpts.incompatPrior != salmon::math::LOG_1;
+  bool useFSPD{salmonOpts.useFSPD};
+  bool useFragLengthDist{!salmonOpts.noFragLengthDist};
+  bool noFragLenFactor{salmonOpts.noFragLenFactor};
+  bool useRankEqClasses{salmonOpts.rankEqClasses};
+
+  // If we're auto detecting the library type
+  auto* detector = readLib.getDetector();
+  bool autoDetect = (detector != nullptr) ? detector->isActive() : false;
+  const auto expectedLibraryFormat = readLib.format();
+  uint64_t zeroProbFrags{0};
+
+  // EQClass
+  EquivalenceClassBuilder& eqBuilder = readExp.equivalenceClassBuilder();
+
+  // Build reverse map from transcriptID => hit id
+  using HitID = uint32_t;
+
+  double logForgettingMass{0.0};
+  uint64_t currentMinibatchTimestep{0};
+
+  // logForgettingMass and currentMinibatchTimestep are OUT parameters!
+  fmCalc.getLogMassAndTimestep(logForgettingMass, currentMinibatchTimestep);
+
+  double startingCumulativeMass =
+      fmCalc.cumulativeLogMassAt(firstTimestepOfRound);
+  int i{0};
+  {
+    // Iterate over each group of alignments (a group consists of all alignments
+    // reported
+    // for a single read).  Distribute the read's mass to the transcripts
+    // where it potentially aligns.
+    for (auto& alnGroup : batchHits) {
+      // If we had no alignments for this read, then skip it
+      if (alnGroup.size() == 0) {
+        continue;
+      }
+
+      // We start out with probability 0
+      double sumOfAlignProbs{LOG_0};
+
+      // Record whether or not this read is unique to a single transcript.
+      bool transcriptUnique{true};
+
+      auto firstTranscriptID = alnGroup.alignments().front().transcriptID();
+      std::unordered_set<size_t> observedTranscripts;
+
+      // New incompat. handling.
+      /**
+      // The equivalence class information for
+      // compatible fragments
+      std::vector<uint32_t> txpIDsCompat;
+      std::vector<double> auxProbsCompat;
+      std::vector<double> posProbsCompat;
+      double auxDenomCompat = salmon::math::LOG_0;
+
+      // The equivalence class information for
+      // all fragments (if there is no compatible fragment)
+      std::vector<uint32_t> txpIDsAll;
+      std::vector<double> auxProbsAll;
+      std::vector<double> posProbsAll;
+      double auxDenomAll = salmon::math::LOG_0;
+
+      std::vector<uint32_t>* txpIDsFinal = nullptr;
+      std::vector<uint32_t>* txpIDsFinal = nullptr;
+      std::vector<uint32_t>* txpIDsFinal = nullptr;
+      double auxDenomFinal = salmon::math::LOG_0;
+      **/
+
+      std::vector<uint32_t> txpIDs;
+      std::vector<double> auxProbs;
+      std::vector<double> posProbs;
+      double auxDenom = salmon::math::LOG_0;
+
+      uint32_t numInGroup{0};
+      uint32_t prevTxpID{0};
+
+      hasCompatibleMapping = false;
+      // For each alignment of this read
+      for (auto& aln : alnGroup.alignments()) {
+        auto transcriptID = aln.transcriptID();
+        auto& transcript = transcripts[transcriptID];
+        transcriptUnique =
+            transcriptUnique and (transcriptID == firstTranscriptID);
+
+        double refLength =
+            transcript.RefLength > 0 ? transcript.RefLength : 1.0;
+        double coverage = aln.score();
+        double logFragCov = (coverage > 0) ? std::log(coverage) : LOG_1;
+
+        // The alignment probability is the product of a
+        // transcript-level term (based on abundance and) an
+        // alignment-level term.
+        double logRefLength{salmon::math::LOG_0};
+        if (salmonOpts.noEffectiveLengthCorrection or !burnedIn) {
+          logRefLength = std::log(transcript.RefLength);
+        } else {
+          logRefLength = transcript.getCachedLogEffectiveLength();
+        }
 
-                double transcriptLogCount = transcript.mass(initialRound);
-
-                // If the transcript had a non-zero count (including pseudocount)
-                if (std::abs(transcriptLogCount) != LOG_0 ) {
-
-                    // The probability of drawing a fragment of this length;
-                    double logFragProb = LOG_1;
-                    if (burnedIn and useFragLengthDist and aln.fragLength() > 0) {
-                        logFragProb = fragLengthDist.pmf(static_cast<size_t>(aln.fragLength()));
-                    }
-
-                    // TESTING
-                    if (noFragLenFactor) { logFragProb = LOG_1; }
-
-                    // TODO: Maybe take the fragment length distribution into account
-                    // for single-end fragments?
-
-                    // The probability that the fragments align to the given strands in the
-                    // given orientations.
-                    double logAlignCompatProb =
-                        (useReadCompat) ?
-                        (salmon::utils::logAlignFormatProb(
-                            aln.libFormat(),
-                            expectedLibraryFormat,
-                            static_cast<int32_t>(aln.pos),
-                            aln.fwd, aln.mateStatus, salmonOpts.incompatPrior)
-                         ) : LOG_1;
-
-                    /** New compat handling
-                    // True if the read is compatible with the
-                    // expected library type; false otherwise.
-                    bool compat = ignoreCompat;
-                    if (!compat) {
-                        if (aln.mateStatus == rapmap::utils::MateStatus::PAIRED_END_PAIRED) {
-                            compat = salmon::utils::compatibleHit(
-                                    expectedLibType, observedLibType);
-                        } else {
-                            int32_t pos = static_cast<int32_t>(aln.pos);
-                            compat = salmon::utils::compatibleHit(
-                                    expectedLibraryFormat, pos,
-                                    aln.fwd, aln.mateStatus);
-                        }
-                    }
-                    **/
-
-                    // Allow for a non-uniform fragment start position distribution
-                    double startPosProb{-logRefLength};
-                    double fragStartLogNumerator{salmon::math::LOG_1};
-                    double fragStartLogDenominator{salmon::math::LOG_1};
-
-                    auto hitPos = aln.hitPos();
-                    if (useFSPD and burnedIn and hitPos < refLength) {
-                        auto& fragStartDist = fragStartDists[transcript.lengthClassIndex()];
-                        // Get the log(numerator) and log(denominator) for the fragment start position
-                        // probability.
-                        bool nonZeroProb = fragStartDist.logNumDenomMass(hitPos, refLength, logRefLength,
-                                fragStartLogNumerator, fragStartLogDenominator);
-                        // Set the overall probability.
-                        startPosProb = (nonZeroProb) ?
-                            fragStartLogNumerator - fragStartLogDenominator :
-                            salmon::math::LOG_0;
-                    }
-
-                    // Increment the count of this type of read that we've seen
-                    ++libTypeCounts[aln.libFormat().formatID()];
-
-                    // The total auxiliary probabilty is the product (sum in log-space) of
-                    // The start position probability
-                    // The fragment length probabilty
-                    // The mapping score (coverage) probability
-                    // The fragment compatibility probability
-                    // The bias probability
-                    double auxProb =  logFragProb + logFragCov +
-                                      logAlignCompatProb;
-
-                    aln.logProb = transcriptLogCount + auxProb + startPosProb;
-
-                    // If this alignment had a zero probability, then skip it
-                    if (std::abs(aln.logProb) == LOG_0) { continue; }
-
-                    sumOfAlignProbs = logAdd(sumOfAlignProbs, aln.logProb);
-
-                    if (updateCounts and
-                        observedTranscripts.find(transcriptID) == observedTranscripts.end()) {
-                        transcripts[transcriptID].addTotalCount(1);
-                        observedTranscripts.insert(transcriptID);
-                    }
-                    // EQCLASS
-                    if (transcriptID < prevTxpID) { std::cerr << "[ERROR] Transcript IDs are not in sorted order; please report this bug on GitHub!\n"; }
-                    prevTxpID = transcriptID;
-                    txpIDs.push_back(transcriptID);
-                    auxProbs.push_back(auxProb);
-                    auxDenom = salmon::math::logAdd(auxDenom, auxProb);
-
-                    // If we're using the fragment start position distribution
-                    // remember *the numerator* of (x / cdf(effLen / len)) where
-                    // x = cdf(p+1 / len) - cdf(p / len)
-                    if (useFSPD) { posProbs.push_back(std::exp(fragStartLogNumerator)); }
-                } else {
-                    aln.logProb = LOG_0;
+        double transcriptLogCount = transcript.mass(initialRound);
 
-                }
-            }
+        // If the transcript had a non-zero count (including pseudocount)
+        if (std::abs(transcriptLogCount) != LOG_0) {
 
-            // If this fragment has a zero probability,
-            // go to the next one
-            if (sumOfAlignProbs == LOG_0) {
-                ++zeroProbFrags;
-                continue;
-            } else { // otherwise, count it as assigned
-                ++localNumAssignedFragments;
-            }
+          // The probability of drawing a fragment of this length;
+          double logFragProb = LOG_1;
+          if (burnedIn and useFragLengthDist and aln.fragLength() > 0) {
+            logFragProb =
+                fragLengthDist.pmf(static_cast<size_t>(aln.fragLength()));
+          }
 
-            // EQCLASS
-            double auxProbSum{0.0};
-            for (auto& p : auxProbs) {
-                p = std::exp(p - auxDenom);
-                auxProbSum += p;
-            }
-            if (txpIDs.size() > 0) {
-               TranscriptGroup tg(txpIDs);
-               eqBuilder.addGroup(std::move(tg), auxProbs, posProbs);
-            }
+          // TESTING
+          if (noFragLenFactor) {
+            logFragProb = LOG_1;
+          }
 
-            // normalize the hits
-            for (auto& aln : alnGroup.alignments()) {
-                if (std::abs(aln.logProb) == LOG_0) { continue; }
-                // Normalize the log-probability of this alignment
-                aln.logProb -= sumOfAlignProbs;
-                // Get the transcript referenced in this alignment
-                auto transcriptID = aln.transcriptID();
-                auto& transcript = transcripts[transcriptID];
-
-                // Add the new mass to this transcript
-                double newMass = logForgettingMass + aln.logProb;
-                transcript.addMass( newMass );
-
-                double r = uni(randEng);
-                if (!burnedIn and r < std::exp(aln.logProb)) {
-                    //errMod.update(aln, transcript, aln.logProb, logForgettingMass);
-                    double fragLength = aln.fragLength();
-                    if (useFragLengthDist and fragLength > 0.0) {
-                        //if (aln.fragType() == ReadType::PAIRED_END) {
-                        fragLengthDist.addVal(fragLength, logForgettingMass);
-                    }
-                    if (useFSPD) {
-                        auto hitPos = aln.hitPos();
-                        auto& fragStartDist = fragStartDists[transcript.lengthClassIndex()];
-                        fragStartDist.addVal(hitPos,
-                                             transcript.RefLength,
-                                             logForgettingMass);
-                    }
+	  if (autoDetect) {
+	    detector->addSample(aln.libFormat());
+	    if (detector->canGuess()) {
+	      detector->mostLikelyType(readLib.getFormat());
+	      autoDetect = false;
+	    }
+	  }
+	  
+          // TODO: Maybe take the fragment length distribution into account
+          // for single-end fragments?
+
+          // The probability that the fragments align to the given strands in
+          // the
+          // given orientations.
+          double logAlignCompatProb =
+              (useReadCompat) ? (salmon::utils::logAlignFormatProb(
+                                    aln.libFormat(), expectedLibraryFormat,
+                                    static_cast<int32_t>(aln.pos), aln.fwd,
+                                    aln.mateStatus, salmonOpts.incompatPrior))
+                              : LOG_1;
+
+          /** New compat handling
+          // True if the read is compatible with the
+          // expected library type; false otherwise.
+          bool compat = ignoreCompat;
+          if (!compat) {
+              if (aln.mateStatus ==
+          rapmap::utils::MateStatus::PAIRED_END_PAIRED) {
+                  compat = salmon::utils::compatibleHit(
+                          expectedLibType, observedLibType);
+              } else {
+                  int32_t pos = static_cast<int32_t>(aln.pos);
+                  compat = salmon::utils::compatibleHit(
+                          expectedLibraryFormat, pos,
+                          aln.fwd, aln.mateStatus);
+              }
+          }
+          **/
+
+          // Allow for a non-uniform fragment start position distribution
+          double startPosProb{-logRefLength};
+          double fragStartLogNumerator{salmon::math::LOG_1};
+          double fragStartLogDenominator{salmon::math::LOG_1};
+
+          auto hitPos = aln.hitPos();
+          if (useFSPD and burnedIn and hitPos < refLength) {
+            auto& fragStartDist = fragStartDists[transcript.lengthClassIndex()];
+            // Get the log(numerator) and log(denominator) for the fragment
+            // start position
+            // probability.
+            bool nonZeroProb = fragStartDist.logNumDenomMass(
+                hitPos, refLength, logRefLength, fragStartLogNumerator,
+                fragStartLogDenominator);
+            // Set the overall probability.
+            startPosProb = (nonZeroProb)
+                               ? fragStartLogNumerator - fragStartLogDenominator
+                               : salmon::math::LOG_0;
+          }
+
+          // Increment the count of this type of read that we've seen
+          ++libTypeCounts[aln.libFormat().formatID()];
+          //
+          if (!hasCompatibleMapping and logAlignCompatProb == LOG_1) { hasCompatibleMapping = true; }
+
+          // The total auxiliary probabilty is the product (sum in log-space) of
+          // The start position probability
+          // The fragment length probabilty
+          // The mapping score (coverage) probability
+          // The fragment compatibility probability
+          // The bias probability
+          double auxProb = logFragProb + logFragCov + logAlignCompatProb;
+
+          aln.logProb = transcriptLogCount + auxProb + startPosProb;
+
+          // If this alignment had a zero probability, then skip it
+          if (std::abs(aln.logProb) == LOG_0) {
+            continue;
+          }
+
+          sumOfAlignProbs = logAdd(sumOfAlignProbs, aln.logProb);
+
+          if (updateCounts and
+              observedTranscripts.find(transcriptID) ==
+                  observedTranscripts.end()) {
+            transcripts[transcriptID].addTotalCount(1);
+            observedTranscripts.insert(transcriptID);
+          }
+          // EQCLASS
+          if (transcriptID < prevTxpID) {
+            std::cerr << "[ERROR] Transcript IDs are not in sorted order; "
+                         "please report this bug on GitHub!\n";
+          }
+          prevTxpID = transcriptID;
+          txpIDs.push_back(transcriptID);
+          auxProbs.push_back(auxProb);
+          auxDenom = salmon::math::logAdd(auxDenom, auxProb);
+
+          // If we're using the fragment start position distribution
+          // remember *the numerator* of (x / cdf(effLen / len)) where
+          // x = cdf(p+1 / len) - cdf(p / len)
+          if (useFSPD) {
+            posProbs.push_back(std::exp(fragStartLogNumerator));
+          }
+        } else {
+          aln.logProb = LOG_0;
+        }
+      }
+
+      // If this fragment has a zero probability,
+      // go to the next one
+      if (sumOfAlignProbs == LOG_0) {
+        ++zeroProbFrags;
+        continue;
+      } else { // otherwise, count it as assigned
+        ++localNumAssignedFragments;
+        if (hasCompatibleMapping) { ++numCompatibleFragments; }
+      }
+
+      // EQCLASS
+      double auxProbSum{0.0};
+      for (auto& p : auxProbs) {
+        p = std::exp(p - auxDenom);
+        auxProbSum += p;
+      }
+      
+      auto eqSize = txpIDs.size();
+      if (eqSize > 0) {
+        if (useRankEqClasses and eqSize > 1) {
+            std::vector<int> inds(eqSize);
+            std::iota(inds.begin(), inds.end(), 0);
+            // Get the indices in order by conditional probability
+            std::sort(inds.begin(), inds.end(), 
+                      [&auxProbs](int i, int j) -> bool { return auxProbs[i] < auxProbs[j]; });
+            // Reorder the other vectors
+            if (useFSPD) {
+                decltype(txpIDs) txpIDsNew(txpIDs.size());
+                decltype(auxProbs) auxProbsNew(auxProbs.size());
+                decltype(posProbs) posProbsNew(posProbs.size());
+                for (size_t r = 0; r < eqSize; ++r) {
+                    auto ind = inds[r];
+                    txpIDsNew[r] = txpIDs[ind];
+                    auxProbsNew[r] = auxProbs[ind];
+                    posProbsNew[r] = posProbs[ind];
                 }
-            } // end normalize
-
-            // update the single target transcript
-            if (transcriptUnique) {
-                if (updateCounts) {
-                    transcripts[firstTranscriptID].addUniqueCount(1);
+                std::swap(txpIDsNew, txpIDs);
+                std::swap(auxProbsNew, auxProbs);
+                std::swap(posProbsNew, posProbs);
+            } else {
+                decltype(txpIDs) txpIDsNew(txpIDs.size());
+                decltype(auxProbs) auxProbsNew(auxProbs.size());
+                for (size_t r = 0; r < eqSize; ++r) {
+                    auto ind = inds[r];
+                    txpIDsNew[r] = txpIDs[ind];
+                    auxProbsNew[r] = auxProbs[ind];
                 }
-                clusterForest.updateCluster(
-                        firstTranscriptID,
-                        1.0,
-                        logForgettingMass, updateCounts);
-            } else { // or the appropriate clusters
-                clusterForest.mergeClusters<AlnT>(alnGroup.alignments().begin(), alnGroup.alignments().end());
-                clusterForest.updateCluster(
-                        alnGroup.alignments().front().transcriptID(),
-                        1.0,
-                        logForgettingMass, updateCounts);
+                std::swap(txpIDsNew, txpIDs);
+                std::swap(auxProbsNew, auxProbs);
             }
+        }
+        
+        TranscriptGroup tg(txpIDs);
+        eqBuilder.addGroup(std::move(tg), auxProbs, posProbs);
+      }
+
+      // normalize the hits
+      for (auto& aln : alnGroup.alignments()) {
+        if (std::abs(aln.logProb) == LOG_0) {
+          continue;
+        }
+        // Normalize the log-probability of this alignment
+        aln.logProb -= sumOfAlignProbs;
+        // Get the transcript referenced in this alignment
+        auto transcriptID = aln.transcriptID();
+        auto& transcript = transcripts[transcriptID];
+
+        // Add the new mass to this transcript
+        double newMass = logForgettingMass + aln.logProb;
+        transcript.addMass(newMass);
+
+        // Paired-end
+        if (aln.libFormat().type == ReadType::PAIRED_END) {
+          // TODO: Is this right for *all* library types?
+          if (aln.fwd) {
+            obsFwd = salmon::math::logAdd(obsFwd, aln.logProb);
+          } else {
+            obsRC = salmon::math::logAdd(obsRC, aln.logProb);
+          }
+        } else if (aln.libFormat().type == ReadType::SINGLE_END) {
+          int32_t p = (aln.pos < 0) ? 0 : aln.pos;
+          if (p >= transcript.RefLength) {
+            p = transcript.RefLength - 1;
+          }
+          // Single-end or orphan
+          if (aln.libFormat().strandedness == ReadStrandedness::S) {
+            obsFwd = salmon::math::logAdd(obsFwd, aln.logProb);
+          } else {
+            obsRC = salmon::math::logAdd(obsRC, aln.logProb);
+          }
+        }
 
-            } // end read group
-        }// end timer
-
-	if (zeroProbFrags > 0) {
-            log->warn("Minibatch contained {} "
-                      "0 probability fragments", zeroProbFrags);
+        if (posBiasCorrect) {
+          auto lengthClassIndex = transcript.lengthClassIndex();
+          switch (aln.mateStatus) {
+          case rapmap::utils::MateStatus::PAIRED_END_PAIRED: {
+            // TODO: Handle the non opposite strand case
+            if (aln.fwd != aln.mateIsFwd) {
+              int32_t posFW = aln.fwd ? aln.pos : aln.matePos;
+              int32_t posRC = aln.fwd ? aln.matePos : aln.pos;
+              posFW = posFW < 0 ? 0 : posFW;
+              posFW = posFW >= transcript.RefLength ? transcript.RefLength - 1
+                                                    : posFW;
+              posRC = posRC < 0 ? 0 : posRC;
+              posRC = posRC >= transcript.RefLength ? transcript.RefLength - 1
+                                                    : posRC;
+              observedPosBiasFwd[lengthClassIndex].addMass(
+                  posFW, transcript.RefLength, aln.logProb);
+              observedPosBiasRC[lengthClassIndex].addMass(
+                  posRC, transcript.RefLength, aln.logProb);
+            }
+          } break;
+          case rapmap::utils::MateStatus::PAIRED_END_LEFT:
+          case rapmap::utils::MateStatus::PAIRED_END_RIGHT:
+          case rapmap::utils::MateStatus::SINGLE_END: {
+            int32_t pos = aln.pos;
+            pos = pos < 0 ? 0 : pos;
+            pos = pos >= transcript.RefLength ? transcript.RefLength - 1 : pos;
+            if (aln.fwd) {
+              observedPosBiasFwd[lengthClassIndex].addMass(
+                  pos, transcript.RefLength, aln.logProb);
+            } else {
+              observedPosBiasRC[lengthClassIndex].addMass(
+                  pos, transcript.RefLength, aln.logProb);
+            }
+          } break;
+          default:
+            break;
+          }
         }
 
-        numAssignedFragments += localNumAssignedFragments;
-        if (numAssignedFragments >= numBurninFrags and !burnedIn) {
-            if (useFSPD) {
-                // update all of the fragment start position
-                // distributions
-                for (auto& fspd : fragStartDists) {
-                    fspd.update();
-                }
+        if (gcBiasCorrect and aln.libFormat().type == ReadType::PAIRED_END) {
+          int32_t start = std::min(aln.pos, aln.matePos);
+          int32_t stop = start + aln.fragLen - 1;
+
+          // WITH CONTEXT
+          if (start >= 0 and stop < transcript.RefLength) {
+              auto desc = transcript.gcDesc(start, stop);
+              observedGCMass.inc(desc, aln.logProb);
+            /*
+            int32_t gcFrac = transcript.gcFrac(start, stop);
+            // Add this fragment's contribution
+            observedGCMass[gcFrac] =
+                salmon::math::logAdd(observedGCMass[gcFrac], aln.logProb);
+            */
+          }
+        }
+        double r = uni(randEng);
+        if (!burnedIn and r < std::exp(aln.logProb)) {
+            
+            //Old fragment length calc: double fragLength = aln.fragLength();
+            auto fragLength = aln.fragLengthPedantic(transcript.RefLength);
+            if (fragLength > 0) {
+                fragLengthDist.addVal(fragLength, logForgettingMass);
             }
-            // NOTE: only one thread should succeed here, and that
-            // thread will set burnedIn to true.
-            readExp.updateTranscriptLengthsAtomic(burnedIn);
+
+          if (useFSPD) {
+            auto hitPos = aln.hitPos();
+            auto& fragStartDist = fragStartDists[transcript.lengthClassIndex()];
+            fragStartDist.addVal(hitPos, transcript.RefLength,
+                                 logForgettingMass);
+          }
         }
-        if (initialRound) {
-            readLib.updateLibTypeCounts(libTypeCounts);
+      } // end normalize
+
+      // update the single target transcript
+      if (transcriptUnique) {
+        if (updateCounts) {
+          transcripts[firstTranscriptID].addUniqueCount(1);
         }
-}
+        clusterForest.updateCluster(firstTranscriptID, 1.0, logForgettingMass,
+                                    updateCounts);
+      } else { // or the appropriate clusters
+        clusterForest.mergeClusters<AlnT>(alnGroup.alignments().begin(),
+                                          alnGroup.alignments().end());
+        clusterForest.updateCluster(
+            alnGroup.alignments().front().transcriptID(), 1.0,
+            logForgettingMass, updateCounts);
+      }
+
+    } // end read group
+  }   // end timer
+
+  if (zeroProbFrags > 0) {
+      auto batchReads = batchHits.size();
+      maxZeroFrac = std::max(maxZeroFrac, static_cast<double>(100.0 * zeroProbFrags) / batchReads);
+  }
 
+  numAssignedFragments += localNumAssignedFragments;
+  if (numAssignedFragments >= numBurninFrags and !burnedIn) {
+    if (useFSPD) {
+      // update all of the fragment start position
+      // distributions
+      for (auto& fspd : fragStartDists) {
+        fspd.update();
+      }
+    }
+    // NOTE: only one thread should succeed here, and that
+    // thread will set burnedIn to true.
+    readExp.updateTranscriptLengthsAtomic(burnedIn);
+  }
+  if (initialRound) {
+    readLib.updateLibTypeCounts(libTypeCounts);
+    readLib.updateCompatCounts(numCompatibleFragments);
+  }
+}
 
 /// START QUASI
 
@@ -488,79 +640,57 @@ void processMiniBatch(
 // available. A job behaves like a pointer to the type
 // jellyfish::sequence_list (see whole_sequence_parser.hpp).
 template <typename RapMapIndexT>
-void processReadsQuasi(paired_parser* parser,
-               ReadExperiment& readExp,
-               ReadLibrary& rl,
-               AlnGroupVec<SMEMAlignment>& structureVec,
-               std::atomic<uint64_t>& numObservedFragments,
-               std::atomic<uint64_t>& numAssignedFragments,
-               std::atomic<uint64_t>& validHits,
-               std::atomic<uint64_t>& upperBoundHits,
-               RapMapIndexT* idx,
-               std::vector<Transcript>& transcripts,
-               ForgettingMassCalculator& fmCalc,
-               ClusterForest& clusterForest,
-               FragmentLengthDistribution& fragLengthDist,
-               mem_opt_t* memOptions,
-               SalmonOpts& salmonOpts,
-               double coverageThresh,
-	           std::mutex& iomutex,
-               bool initialRound,
-               std::atomic<bool>& burnedIn,
-               volatile bool& writeToCache) {
-
-    	// ERROR
-	salmonOpts.jointLog->error("MEM-mapping cannot be used with the Quasi index --- please report this bug on GitHub");
-	std::exit(1);
+void processReadsQuasi(
+    paired_parser* parser, ReadExperiment& readExp, ReadLibrary& rl,
+    AlnGroupVec<SMEMAlignment>& structureVec,
+    std::atomic<uint64_t>& numObservedFragments,
+    std::atomic<uint64_t>& numAssignedFragments,
+    std::atomic<uint64_t>& validHits, std::atomic<uint64_t>& upperBoundHits,
+    RapMapIndexT* idx, std::vector<Transcript>& transcripts,
+    ForgettingMassCalculator& fmCalc, ClusterForest& clusterForest,
+    FragmentLengthDistribution& fragLengthDist, BiasParams& observedBiasParams,
+    mem_opt_t* memOptions, SalmonOpts& salmonOpts, double coverageThresh,
+    std::mutex& iomutex, bool initialRound, std::atomic<bool>& burnedIn,
+    volatile bool& writeToCache) {
+
+  // ERROR
+  salmonOpts.jointLog->error("MEM-mapping cannot be used with the Quasi index "
+                             "--- please report this bug on GitHub");
+  std::exit(1);
 }
 
 template <typename RapMapIndexT>
-void processReadsQuasi(single_parser* parser,
-               ReadExperiment& readExp,
-               ReadLibrary& rl,
-               AlnGroupVec<SMEMAlignment>& structureVec,
-               std::atomic<uint64_t>& numObservedFragments,
-               std::atomic<uint64_t>& numAssignedFragments,
-               std::atomic<uint64_t>& validHits,
-               std::atomic<uint64_t>& upperBoundHits,
-               RapMapIndexT* sidx,
-               std::vector<Transcript>& transcripts,
-               ForgettingMassCalculator& fmCalc,
-               ClusterForest& clusterForest,
-               FragmentLengthDistribution& fragLengthDist,
-               mem_opt_t* memOptions,
-               SalmonOpts& salmonOpts,
-               double coverageThresh,
-	           std::mutex& iomutex,
-               bool initialRound,
-               std::atomic<bool>& burnedIn,
-               volatile bool& writeToCache) {
-    	// ERROR
-	salmonOpts.jointLog->error("MEM-mapping cannot be used with the Quasi index --- please report this bug on GitHub");
-	std::exit(1);
+void processReadsQuasi(
+    single_parser* parser, ReadExperiment& readExp, ReadLibrary& rl,
+    AlnGroupVec<SMEMAlignment>& structureVec,
+    std::atomic<uint64_t>& numObservedFragments,
+    std::atomic<uint64_t>& numAssignedFragments,
+    std::atomic<uint64_t>& validHits, std::atomic<uint64_t>& upperBoundHits,
+    RapMapIndexT* sidx, std::vector<Transcript>& transcripts,
+    ForgettingMassCalculator& fmCalc, ClusterForest& clusterForest,
+    FragmentLengthDistribution& fragLengthDist, BiasParams& observedBiasParams,
+    mem_opt_t* memOptions, SalmonOpts& salmonOpts, double coverageThresh,
+    std::mutex& iomutex, bool initialRound, std::atomic<bool>& burnedIn,
+    volatile bool& writeToCache) {
+  // ERROR
+  salmonOpts.jointLog->error("MEM-mapping cannot be used with the Quasi index "
+                             "--- please report this bug on GitHub");
+  std::exit(1);
 }
 
 template <typename RapMapIndexT>
-void processReadsQuasi(paired_parser* parser,
-               ReadExperiment& readExp,
-               ReadLibrary& rl,
-               AlnGroupVec<QuasiAlignment>& structureVec,
-               std::atomic<uint64_t>& numObservedFragments,
-               std::atomic<uint64_t>& numAssignedFragments,
-               std::atomic<uint64_t>& validHits,
-               std::atomic<uint64_t>& upperBoundHits,
-               RapMapIndexT* qidx,
-               std::vector<Transcript>& transcripts,
-               ForgettingMassCalculator& fmCalc,
-               ClusterForest& clusterForest,
-               FragmentLengthDistribution& fragLengthDist,
-               mem_opt_t* memOptions,
-               SalmonOpts& salmonOpts,
-               double coverageThresh,
-	           std::mutex& iomutex,
-               bool initialRound,
-               std::atomic<bool>& burnedIn,
-               volatile bool& writeToCache) {
+void processReadsQuasi(
+    paired_parser* parser, ReadExperiment& readExp, ReadLibrary& rl,
+    AlnGroupVec<QuasiAlignment>& structureVec,
+    std::atomic<uint64_t>& numObservedFragments,
+    std::atomic<uint64_t>& numAssignedFragments,
+    std::atomic<uint64_t>& validHits, std::atomic<uint64_t>& upperBoundHits,
+    RapMapIndexT* qidx, std::vector<Transcript>& transcripts,
+    ForgettingMassCalculator& fmCalc, ClusterForest& clusterForest,
+    FragmentLengthDistribution& fragLengthDist, BiasParams& observedBiasParams,
+    mem_opt_t* memOptions, SalmonOpts& salmonOpts, double coverageThresh,
+    std::mutex& iomutex, bool initialRound, std::atomic<bool>& burnedIn,
+    volatile bool& writeToCache) {
   uint64_t count_fwd = 0, count_bwd = 0;
   // Seed with a real random value, if available
   std::random_device rd;
@@ -571,183 +701,336 @@ void processReadsQuasi(paired_parser* parser,
   uint64_t prevObservedFrags{1};
   uint64_t leftHitCount{0};
   uint64_t hitListCount{0};
-
-  auto& readBias = readExp.readBias();
+  salmon::utils::ShortFragStats shortFragStats;
+  double maxZeroFrac{0.0};
+
+  // Write unmapped reads
+  fmt::MemoryWriter unmappedNames;
+  auto unmappedLogger = spdlog::get("unmappedLog");
+  bool writeUnmapped = (unmappedLogger.get() == nullptr) ? false : true;
+
+  auto& readBiasFW =
+      observedBiasParams
+          .seqBiasModelFW; // readExp.readBias(salmon::utils::Direction::FORWARD);
+  auto& readBiasRC =
+      observedBiasParams
+          .seqBiasModelRC; // readExp.readBias(salmon::utils::Direction::REVERSE_COMPLEMENT);
+  // k-mers for sequence bias context
+  Mer leftMer;
+  Mer rightMer;
 
   auto expectedLibType = rl.format();
 
   uint64_t firstTimestepOfRound = fmCalc.getCurrentTimestep();
+  size_t minK = rapmap::utils::my_mer::k();
 
   size_t locRead{0};
   uint64_t localUpperBoundHits{0};
   size_t rangeSize{0};
-  uint64_t  localNumAssignedFragments{0};
-
+  uint64_t localNumAssignedFragments{0};
+  bool strictIntersect = salmonOpts.strictIntersect;
+  bool consistentHits = salmonOpts.consistentHits;
+  bool quiet = salmonOpts.quiet;
+  
   bool tooManyHits{false};
   size_t maxNumHits{salmonOpts.maxReadOccs};
-  size_t readLen{0};
+  size_t readLenLeft{0};
+  size_t readLenRight{0};
   SACollector<RapMapIndexT> hitCollector(qidx);
   SASearcher<RapMapIndexT> saSearcher(qidx);
   std::vector<QuasiAlignment> leftHits;
   std::vector<QuasiAlignment> rightHits;
   rapmap::utils::HitCounters hctr;
+  salmon::utils::MappingType mapType{salmon::utils::MappingType::UNMAPPED};
+  
+  auto rg = parser->getReadGroup();
+  while (parser->refill(rg)) {
+      rangeSize = rg.size();
 
-  while(true) {
-    typename paired_parser::job j(*parser); // Get a job from the parser: a bunch of reads (at most max_read_group)
-    if(j.is_empty()) break;           // If got nothing, quit
-
-    rangeSize = j->nb_filled;
     if (rangeSize > structureVec.size()) {
-        salmonOpts.jointLog->error("rangeSize = {}, but structureVec.size() = {} --- this shouldn't happen.\n"
-                                   "Please report this bug on GitHub", rangeSize, structureVec.size());
-        std::exit(1);
+      salmonOpts.jointLog->error("rangeSize = {}, but structureVec.size() = {} "
+                                 "--- this shouldn't happen.\n"
+                                 "Please report this bug on GitHub",
+                                 rangeSize, structureVec.size());
+      std::exit(1);
     }
 
-    for(size_t i = 0; i < j->nb_filled; ++i) { // For all the read in this batch
-        readLen = j->data[i].first.seq.length();
-        tooManyHits = false;
-        localUpperBoundHits = 0;
-        auto& jointHitGroup = structureVec[i];
-        jointHitGroup.clearAlignments();
-        auto& jointHits = jointHitGroup.alignments();
-        leftHits.clear();
-        rightHits.clear();
-
-        bool lh = hitCollector(j->data[i].first.seq,
-                               leftHits, saSearcher,
-                               MateStatus::PAIRED_END_LEFT,
-                               true);
-        bool rh = hitCollector(j->data[i].second.seq,
-                               rightHits, saSearcher,
-                               MateStatus::PAIRED_END_RIGHT,
-                               true);
-
-        rapmap::utils::mergeLeftRightHits(
-                               leftHits, rightHits, jointHits,
-                               readLen, maxNumHits, tooManyHits, hctr);
+    for (size_t i = 0; i < rangeSize; ++i) { // For all the read in this batch
+        auto& rp = rg[i];
+        readLenLeft = rp.first.seq.length();
+        readLenRight= rp.second.seq.length();
+      bool tooShortLeft = (readLenLeft < minK);
+      bool tooShortRight = (readLenRight < minK);
+      tooManyHits = false;
+      localUpperBoundHits = 0;
+      auto& jointHitGroup = structureVec[i];
+      jointHitGroup.clearAlignments();
+      auto& jointHits = jointHitGroup.alignments();
+      leftHits.clear();
+      rightHits.clear();
+      mapType = salmon::utils::MappingType::UNMAPPED;
+
+      bool lh = tooShortLeft ? false : hitCollector(rp.first.seq,
+                                                    leftHits, saSearcher,
+                                                    MateStatus::PAIRED_END_LEFT,
+                                                    true, consistentHits);
+
+      bool rh = tooShortRight ? false : hitCollector(rp.second.seq,
+                                   rightHits, saSearcher,
+                                   MateStatus::PAIRED_END_RIGHT, true,
+                                   consistentHits);
+
+      // Consider a read as too short if both ends are too short
+      if (tooShortLeft and tooShortRight) {
+        ++shortFragStats.numTooShort;
+        shortFragStats.shortest = std::min(shortFragStats.shortest,
+                                           std::max(readLenLeft, readLenRight));
+      } else {
+        // If we actually attempted to map the fragment (it wasn't too short),
+        // then
+        // do the intersection.
+        if (strictIntersect) {
+          rapmap::utils::mergeLeftRightHits(leftHits, rightHits, jointHits,
+                                            readLenLeft, maxNumHits,
+                                            tooManyHits, hctr);
+        } else {
+          rapmap::utils::mergeLeftRightHitsFuzzy(lh, rh, leftHits, rightHits,
+                                                 jointHits, readLenLeft,
+                                                 maxNumHits, tooManyHits, hctr);
+        }
 
         if (initialRound) {
-            upperBoundHits += (jointHits.size() > 0);
+          upperBoundHits += (jointHits.size() > 0);
         }
 
         // If the read mapped to > maxReadOccs places, discard it
-        if (jointHits.size() > salmonOpts.maxReadOccs ) { jointHitGroup.clearAlignments(); }
-
-
-	// If we have mappings, then process them.
-	if (jointHits.size() > 0) {
-	  bool isPaired = jointHits.front().mateStatus == rapmap::utils::MateStatus::PAIRED_END_PAIRED;
-	  // If we are ignoring orphans
-	  if (!salmonOpts.allowOrphans) {
-	    // If the mappings for the current read are not properly-paired (i.e. are orphans)
-	    // then just clear the group.
-	    if (!isPaired) { jointHitGroup.clearAlignments(); }
-	  } else {
-	    // If these aren't paired-end reads --- so that
-	    // we have orphans --- make sure we sort the
-	    // mappings so that they are in transcript order
-	    if (!isPaired) {
-	      // Find the end of the hits for the left read
-	      auto leftHitEndIt = std::partition_point(
-		  jointHits.begin(), jointHits.end(),
-		  [](const QuasiAlignment& q) -> bool {
-		  return q.mateStatus == rapmap::utils::MateStatus::PAIRED_END_LEFT;
-		  });
-	      // Merge the hits so that the entire list is in order
-	      // by transcript ID.
-	      std::inplace_merge(jointHits.begin(), leftHitEndIt, jointHits.end(),
-		  [](const QuasiAlignment& a, const QuasiAlignment& b) -> bool {
-		  return a.transcriptID() < b.transcriptID();
-		  });
-	    }
-	  }
-
-	  bool needBiasSample = salmonOpts.biasCorrect;
-
-	  for (auto& h : jointHits) {
-
-	    // ---- Collect bias samples ------ //
-	    int32_t pos = static_cast<int32_t>(h.pos);
-	    auto dir = salmon::utils::boolToDirection(h.fwd);
-
-	    // If bias correction is turned on, and we haven't sampled a mapping
-	    // for this read yet, and we haven't collected the required number of
-	    // samples overall.
-	    if(needBiasSample and salmonOpts.numBiasSamples > 0){
-	      // the "start" position is the leftmost position if
-	      // we hit the forward strand, and the leftmost
-	      // position + the read length if we hit the reverse complement
-	      int32_t startPos = h.fwd ? pos : pos + h.readLen;
-
-	      auto& t = transcripts[h.tid];
-	      if (startPos > 0 and startPos < t.RefLength) {
-		const char* txpStart = t.Sequence;
-		const char* readStart = txpStart + startPos;
-		const char* txpEnd = txpStart + t.RefLength;
-		bool success = readBias.update(txpStart, readStart, txpEnd, dir);
-		if (success) {
-		  salmonOpts.numBiasSamples -= 1;
-		  needBiasSample = false;
-		}
-	      }
-	    }
-	    // ---- Collect bias samples ------ //
-
-
-	    switch (h.mateStatus) {
-	      case MateStatus::PAIRED_END_LEFT:
-		{
-		  h.format = salmon::utils::hitType(h.pos, h.fwd);
-		}
-		break;
-	      case MateStatus::PAIRED_END_RIGHT:
-		{
-		  h.format = salmon::utils::hitType(h.pos, h.fwd);
-		}
-		break;
-	      case MateStatus::PAIRED_END_PAIRED:
-		{
-		  uint32_t end1Pos = (h.fwd) ? h.pos : h.pos + h.readLen;
-		  uint32_t end2Pos = (h.mateIsFwd) ? h.matePos : h.matePos + h.mateLen;
-		  bool canDovetail = false;
-		  h.format = salmon::utils::hitType(end1Pos, h.fwd, h.readLen,
-		      end2Pos, h.mateIsFwd, h.mateLen, canDovetail);
-		}
-		break;
-	    }
-	  }
-	} // If we have no mappings --- then there's nothing to do
-
-        validHits += jointHits.size();
-        localNumAssignedFragments += (jointHits.size() > 0);
-        locRead++;
-        ++numObservedFragments;
-        if (numObservedFragments % 500000 == 0) {
-    	    iomutex.lock();
-            const char RESET_COLOR[] = "\x1b[0m";
-            char green[] = "\x1b[30m";
-            green[3] = '0' + static_cast<char>(fmt::GREEN);
-            char red[] = "\x1b[30m";
-            red[3] = '0' + static_cast<char>(fmt::RED);
-            if (initialRound) {
-                fmt::print(stderr, "\033[A\r\r{}processed{} {} {}fragments{}\n", green, red, numObservedFragments, green, RESET_COLOR);
-                fmt::print(stderr, "hits: {}, hits per frag:  {}",
-                        validHits,
-                        validHits / static_cast<float>(prevObservedFrags));
-            } else {
-                fmt::print(stderr, "\r\r{}processed{} {} {}fragments{}", green, red, numObservedFragments, green, RESET_COLOR);
+        if (jointHits.size() > salmonOpts.maxReadOccs) {
+          jointHitGroup.clearAlignments();
+        }
+      }
+
+      // If we have mappings, then process them.
+      bool isPaired{false};
+      if (jointHits.size() > 0) {
+        bool isPaired = jointHits.front().mateStatus ==
+                        rapmap::utils::MateStatus::PAIRED_END_PAIRED;
+        if (isPaired) { 
+            mapType = salmon::utils::MappingType::PAIRED_MAPPED; 
+        }
+        // If we are ignoring orphans
+        if (!salmonOpts.allowOrphans) {
+          // If the mappings for the current read are not properly-paired (i.e.
+          // are orphans)
+          // then just clear the group.
+          if (!isPaired) {
+            jointHitGroup.clearAlignments();
+          }
+        } else {
+          // If these aren't paired-end reads --- so that
+          // we have orphans --- make sure we sort the
+          // mappings so that they are in transcript order
+          if (!isPaired) {
+            // Find the end of the hits for the left read
+            auto leftHitEndIt = std::partition_point(
+                jointHits.begin(), jointHits.end(),
+                [](const QuasiAlignment& q) -> bool {
+                  return q.mateStatus ==
+                         rapmap::utils::MateStatus::PAIRED_END_LEFT;
+                });
+            // If we found left hits
+            bool foundLeftMappings = (leftHitEndIt > jointHits.begin());
+            // If we found right hits
+            bool foundRightMappings = (leftHitEndIt  < jointHits.end());
+
+            if (foundLeftMappings and foundRightMappings) {
+                mapType = salmon::utils::MappingType::BOTH_ORPHAN;
+            } else if (foundLeftMappings) { 
+                mapType = salmon::utils::MappingType::LEFT_ORPHAN;
+            } else if (foundRightMappings) { 
+                mapType = salmon::utils::MappingType::RIGHT_ORPHAN;
             }
-    	    iomutex.unlock();
+
+            // Merge the hits so that the entire list is in order
+            // by transcript ID.
+            std::inplace_merge(
+                jointHits.begin(), leftHitEndIt, jointHits.end(),
+                [](const QuasiAlignment& a, const QuasiAlignment& b) -> bool {
+                  return a.transcriptID() < b.transcriptID();
+                });
+          }
         }
 
+        bool needBiasSample = salmonOpts.biasCorrect;
+
+        std::uniform_int_distribution<> dis(0, jointHits.size());
+        // Randomly select a hit from which to draw the bias sample.
+        int32_t hitSamp{dis(eng)};
+        int32_t hn{0};
+
+        for (auto& h : jointHits) {
+
+          // ---- Collect bias samples ------ //
+
+          // If bias correction is turned on, and we haven't sampled a mapping
+          // for this read yet, and we haven't collected the required number of
+          // samples overall.
+          if (needBiasSample and salmonOpts.numBiasSamples > 0 and isPaired and
+              hn == hitSamp) {
+            auto& t = transcripts[h.tid];
+
+            // The "start" position is the leftmost position if
+            // map to the forward strand, and the leftmost
+            // position + the read length if we map to the reverse complement.
+
+            // read 1
+            int32_t pos1 = static_cast<int32_t>(h.pos);
+            auto dir1 = salmon::utils::boolToDirection(h.fwd);
+            int32_t startPos1 = h.fwd ? pos1 : (pos1 + h.readLen - 1);
+
+            // read 2
+            int32_t pos2 = static_cast<int32_t>(h.matePos);
+            auto dir2 = salmon::utils::boolToDirection(h.mateIsFwd);
+            int32_t startPos2 = h.mateIsFwd ? pos2 : (pos2 + h.mateLen - 1);
+
+            bool success = false;
+
+            if ((dir1 != dir2) and // Shouldn't be from the same strand
+                (startPos1 > 0 and startPos1 < t.RefLength) and
+                (startPos2 > 0 and startPos2 < t.RefLength)) {
+
+              const char* txpStart = t.Sequence();
+              const char* txpEnd = txpStart + t.RefLength;
+
+              const char* readStart1 = txpStart + startPos1;
+              auto& readBias1 = (h.fwd) ? readBiasFW : readBiasRC;
+
+              const char* readStart2 = txpStart + startPos2;
+              auto& readBias2 = (h.mateIsFwd) ? readBiasFW : readBiasRC;
+
+              int32_t fwPre = readBias1.contextBefore(!h.fwd);
+              int32_t fwPost = readBias1.contextAfter(!h.fwd);
+
+              int32_t rcPre = readBias2.contextBefore(!h.mateIsFwd);
+              int32_t rcPost = readBias2.contextAfter(!h.mateIsFwd);
+
+              bool read1RC = !h.fwd;
+              bool read2RC = !h.mateIsFwd;
+
+              if ((startPos1 >= readBias1.contextBefore(read1RC) and
+                   startPos1 + readBias1.contextAfter(read1RC) <
+                       t.RefLength) and
+                  (startPos2 >= readBias2.contextBefore(read2RC) and
+                   startPos2 + readBias2.contextAfter(read2RC) < t.RefLength)) {
+
+                int32_t fwPos = (h.fwd) ? startPos1 : startPos2;
+                int32_t rcPos = (h.fwd) ? startPos2 : startPos1;
+                if (fwPos < rcPos) {
+                  leftMer.from_chars(txpStart + startPos1 -
+                                     readBias1.contextBefore(read1RC));
+                  rightMer.from_chars(txpStart + startPos2 -
+                                      readBias2.contextBefore(read2RC));
+                  if (read1RC) {
+                    leftMer.reverse_complement();
+                  } else {
+                    rightMer.reverse_complement();
+                  }
+
+                  success = readBias1.addSequence(leftMer, 1.0);
+                  success = readBias2.addSequence(rightMer, 1.0);
+                }
+              }
+
+              if (success) {
+                salmonOpts.numBiasSamples -= 1;
+                needBiasSample = false;
+              }
+            }
+          }
+          // ---- Collect bias samples ------ //
+          ++hn;
+
+          switch (h.mateStatus) {
+          case MateStatus::PAIRED_END_LEFT: {
+            h.format = salmon::utils::hitType(h.pos, h.fwd);
+          } break;
+          case MateStatus::PAIRED_END_RIGHT: {
+            // we pass in !h.fwd here because the right read
+            // will have the opposite orientation from its mate.
+            h.format = salmon::utils::hitType(h.pos, !h.fwd);
+          } break;
+          case MateStatus::PAIRED_END_PAIRED: {
+            uint32_t end1Pos = (h.fwd) ? h.pos : h.pos + h.readLen;
+            uint32_t end2Pos =
+                (h.mateIsFwd) ? h.matePos : h.matePos + h.mateLen;
+            bool canDovetail = false;
+            h.format =
+                salmon::utils::hitType(end1Pos, h.fwd, h.readLen, end2Pos,
+                                       h.mateIsFwd, h.mateLen, canDovetail);
+          } break;
+          }
+        }
+      } else {
+          // This read was completely unmapped.
+          mapType = salmon::utils::MappingType::UNMAPPED;
+      } 
+      
+      if (writeUnmapped and mapType != salmon::utils::MappingType::PAIRED_MAPPED) {
+          // If we have no mappings --- then there's nothing to do
+          // unless we're outputting names for un-mapped reads
+          unmappedNames << rp.first.name << ' ' << salmon::utils::str(mapType) << '\n';
+      }
+
+      validHits += jointHits.size();
+      localNumAssignedFragments += (jointHits.size() > 0);
+      locRead++;
+      ++numObservedFragments;
+      if (!quiet and numObservedFragments % 500000 == 0) {
+        iomutex.lock();
+        const char RESET_COLOR[] = "\x1b[0m";
+        char green[] = "\x1b[30m";
+        green[3] = '0' + static_cast<char>(fmt::GREEN);
+        char red[] = "\x1b[30m";
+        red[3] = '0' + static_cast<char>(fmt::RED);
+        if (initialRound) {
+          fmt::print(stderr, "\033[A\r\r{}processed{} {} {}fragments{}\n",
+                     green, red, numObservedFragments, green, RESET_COLOR);
+          fmt::print(stderr, "hits: {}, hits per frag:  {}", validHits,
+                     validHits / static_cast<float>(prevObservedFrags));
+        } else {
+          fmt::print(stderr, "\r\r{}processed{} {} {}fragments{}", green, red,
+                     numObservedFragments, green, RESET_COLOR);
+        }
+        iomutex.unlock();
+      }
 
     } // end for i < j->nb_filled
 
+    if (writeUnmapped) {
+        std::string outStr(unmappedNames.str());
+        // Get rid of last newline
+        if (!outStr.empty()) {
+            outStr.pop_back();
+            unmappedLogger->info(std::move(outStr));
+        }
+        unmappedNames.clear();
+    }
+	    
+
     prevObservedFrags = numObservedFragments;
-    AlnGroupVecRange<QuasiAlignment> hitLists = boost::make_iterator_range(structureVec.begin(), structureVec.begin() + rangeSize);
-    processMiniBatch<QuasiAlignment>(readExp, fmCalc,firstTimestepOfRound, rl, salmonOpts, hitLists, transcripts, clusterForest,
-                     fragLengthDist, numAssignedFragments, eng, initialRound, burnedIn);
+    AlnGroupVecRange<QuasiAlignment> hitLists = boost::make_iterator_range(
+        structureVec.begin(), structureVec.begin() + rangeSize);
+    processMiniBatch<QuasiAlignment>(
+        readExp, fmCalc, firstTimestepOfRound, rl, salmonOpts, hitLists,
+        transcripts, clusterForest, fragLengthDist, observedBiasParams,
+        numAssignedFragments, eng, initialRound, burnedIn, maxZeroFrac);
   }
+
+  if (maxZeroFrac > 0.0) {
+      salmonOpts.jointLog->info("Thread saw mini-batch with a maximum of {0:.2f}\% zero probability fragments", 
+                                maxZeroFrac);
+  }
+
+  readExp.updateShortFrags(shortFragStats);
 }
 
 // SINGLE END
@@ -756,26 +1039,18 @@ void processReadsQuasi(paired_parser* parser,
 // available. A job behaves like a pointer to the type
 // jellyfish::sequence_list (see whole_sequence_parser.hpp).
 template <typename RapMapIndexT>
-void processReadsQuasi(single_parser* parser,
-               ReadExperiment& readExp,
-               ReadLibrary& rl,
-               AlnGroupVec<QuasiAlignment>& structureVec,
-               std::atomic<uint64_t>& numObservedFragments,
-               std::atomic<uint64_t>& numAssignedFragments,
-               std::atomic<uint64_t>& validHits,
-               std::atomic<uint64_t>& upperBoundHits,
-               RapMapIndexT* qidx,
-               std::vector<Transcript>& transcripts,
-               ForgettingMassCalculator& fmCalc,
-               ClusterForest& clusterForest,
-               FragmentLengthDistribution& fragLengthDist,
-               mem_opt_t* memOptions,
-               SalmonOpts& salmonOpts,
-               double coverageThresh,
-	           std::mutex& iomutex,
-               bool initialRound,
-               std::atomic<bool>& burnedIn,
-               volatile bool& writeToCache) {
+void processReadsQuasi(
+    single_parser* parser, ReadExperiment& readExp, ReadLibrary& rl,
+    AlnGroupVec<QuasiAlignment>& structureVec,
+    std::atomic<uint64_t>& numObservedFragments,
+    std::atomic<uint64_t>& numAssignedFragments,
+    std::atomic<uint64_t>& validHits, std::atomic<uint64_t>& upperBoundHits,
+    RapMapIndexT* qidx, std::vector<Transcript>& transcripts,
+    ForgettingMassCalculator& fmCalc, ClusterForest& clusterForest,
+    FragmentLengthDistribution& fragLengthDist, BiasParams& observedBiasParams,
+    mem_opt_t* memOptions, SalmonOpts& salmonOpts, double coverageThresh,
+    std::mutex& iomutex, bool initialRound, std::atomic<bool>& burnedIn,
+    volatile bool& writeToCache) {
   uint64_t count_fwd = 0, count_bwd = 0;
   // Seed with a real random value, if available
   std::random_device rd;
@@ -786,14 +1061,25 @@ void processReadsQuasi(single_parser* parser,
   uint64_t prevObservedFrags{1};
   uint64_t leftHitCount{0};
   uint64_t hitListCount{0};
+  salmon::utils::ShortFragStats shortFragStats;
+  bool tooShort{false};
+  double maxZeroFrac{0.0};
+
+  // Write unmapped reads
+  fmt::MemoryWriter unmappedNames;
+  auto unmappedLogger = spdlog::get("unmappedLog");
+  bool writeUnmapped = (unmappedLogger.get() == nullptr) ? false : true;
+
+  auto& readBiasFW = observedBiasParams.seqBiasModelFW;
+  auto& readBiasRC = observedBiasParams.seqBiasModelRC;
+  Mer context;
 
-  auto& readBias = readExp.readBias();
   const char* txomeStr = qidx->seq.c_str();
 
   auto expectedLibType = rl.format();
 
-
   uint64_t firstTimestepOfRound = fmCalc.getCurrentTimestep();
+  size_t minK = rapmap::utils::my_mer::k();
 
   size_t locRead{0};
   uint64_t localUpperBoundHits{0};
@@ -802,388 +1088,500 @@ void processReadsQuasi(single_parser* parser,
   bool tooManyHits{false};
   size_t readLen{0};
   size_t maxNumHits{salmonOpts.maxReadOccs};
+  bool consistentHits{salmonOpts.consistentHits};
+  bool quiet{salmonOpts.quiet};
+
   SACollector<RapMapIndexT> hitCollector(qidx);
   SASearcher<RapMapIndexT> saSearcher(qidx);
   rapmap::utils::HitCounters hctr;
-
-  while(true) {
-    typename single_parser::job j(*parser); // Get a job from the parser: a bunch of read (at most max_read_group)
-    if(j.is_empty()) break;           // If got nothing, quit
-
-    rangeSize = j->nb_filled;
+ auto rg = parser->getReadGroup();
+  while (parser->refill(rg)) {
+      rangeSize = rg.size();
     if (rangeSize > structureVec.size()) {
-        salmonOpts.jointLog->error("rangeSize = {}, but structureVec.size() = {} --- this shouldn't happen.\n"
-                                   "Please report this bug on GitHub", rangeSize, structureVec.size());
-        std::exit(1);
+      salmonOpts.jointLog->error("rangeSize = {}, but structureVec.size() = {} "
+                                 "--- this shouldn't happen.\n"
+                                 "Please report this bug on GitHub",
+                                 rangeSize, structureVec.size());
+      std::exit(1);
     }
 
-    for(size_t i = 0; i < j->nb_filled; ++i) { // For all the read in this batch
-        readLen = j->data[i].seq.length();
-        tooManyHits = false;
-        localUpperBoundHits = 0;
-        auto& jointHitGroup = structureVec[i];
-        auto& jointHits = jointHitGroup.alignments();
+    for (size_t i = 0; i < rangeSize; ++i) { // For all the read in this batch
+        auto& rp = rg[i];
+      readLen = rp.seq.length();
+      tooShort = (readLen < minK);
+      tooManyHits = false;
+      localUpperBoundHits = 0;
+      auto& jointHitGroup = structureVec[i];
+      auto& jointHits = jointHitGroup.alignments();
+      jointHitGroup.clearAlignments();
+
+      bool lh =
+          tooShort ? false
+          : hitCollector(rp.seq,
+                                  jointHits, saSearcher,
+                                  MateStatus::SINGLE_END, true, consistentHits);
+
+      // If the fragment was too short, record it
+      if (tooShort) {
+        ++shortFragStats.numTooShort;
+        shortFragStats.shortest = std::min(shortFragStats.shortest, readLen);
+      }
+
+      if (initialRound) {
+        upperBoundHits += (jointHits.size() > 0);
+      }
+
+      // If the read mapped to > maxReadOccs places, discard it
+      if (jointHits.size() > salmonOpts.maxReadOccs) {
         jointHitGroup.clearAlignments();
+      }
+
+      bool needBiasSample = salmonOpts.biasCorrect;
+
+      for (auto& h : jointHits) {
+
+        // ---- Collect bias samples ------ //
+        int32_t pos = static_cast<int32_t>(h.pos);
+        auto dir = salmon::utils::boolToDirection(h.fwd);
+
+        // If bias correction is turned on, and we haven't sampled a mapping
+        // for this read yet, and we haven't collected the required number of
+        // samples overall.
+        if (needBiasSample and salmonOpts.numBiasSamples > 0) {
+          // the "start" position is the leftmost position if
+          // we hit the forward strand, and the leftmost
+          // position + the read length if we hit the reverse complement
+          int32_t startPos = h.fwd ? pos : pos + h.readLen;
+
+          auto& t = transcripts[h.tid];
+          if (startPos > 0 and startPos < t.RefLength) {
+            auto& readBias = (h.fwd) ? readBiasFW : readBiasRC;
+            const char* txpStart = t.Sequence();
+            const char* readStart = txpStart + startPos;
+            const char* txpEnd = txpStart + t.RefLength;
+
+            bool success{false};
+            // If the context exists around the read, add it to the observed
+            // read start sequences.
+            if (startPos >= readBias.contextBefore(!h.fwd) and
+                startPos + readBias.contextAfter(!h.fwd) < t.RefLength) {
+              context.from_chars(txpStart + startPos -
+                                 readBias.contextBefore(!h.fwd));
+              if (!h.fwd) {
+                context.reverse_complement();
+              }
+              success = readBias.addSequence(context, 1.0);
+            }
 
-        bool lh = hitCollector(j->data[i].seq,
-                               jointHits, saSearcher,
-                               MateStatus::SINGLE_END,
-                               true);
+            if (success) {
+              salmonOpts.numBiasSamples -= 1;
+              needBiasSample = false;
+            }
+          }
+        }
+        // ---- Collect bias samples ------ //
 
+        switch (h.mateStatus) {
+        case MateStatus::SINGLE_END: {
+          h.format = salmon::utils::hitType(h.pos, h.fwd);
+        } break;
+        }
+      }
+
+      if (writeUnmapped and jointHits.empty()) {
+          // If we have no mappings --- then there's nothing to do
+          // unless we're outputting names for un-mapped reads
+          unmappedNames << rp.name << " u\n";
+      }
+
+      validHits += jointHits.size();
+      locRead++;
+      ++numObservedFragments;
+      if (!quiet and numObservedFragments % 500000 == 0) {
+        iomutex.lock();
+        const char RESET_COLOR[] = "\x1b[0m";
+        char green[] = "\x1b[30m";
+        green[3] = '0' + static_cast<char>(fmt::GREEN);
+        char red[] = "\x1b[30m";
+        red[3] = '0' + static_cast<char>(fmt::RED);
         if (initialRound) {
-            upperBoundHits += (jointHits.size() > 0);
+          fmt::print(stderr, "\033[A\r\r{}processed{} {} {}fragments{}\n",
+                     green, red, numObservedFragments, green, RESET_COLOR);
+          fmt::print(stderr, "hits: {}; hits per frag:  {}", validHits,
+                     validHits / static_cast<float>(prevObservedFrags));
+        } else {
+          fmt::print(stderr, "\r\r{}processed{} {} {}fragments{}", green, red,
+                     numObservedFragments, green, RESET_COLOR);
         }
+        iomutex.unlock();
+      }
 
-        // If the read mapped to > maxReadOccs places, discard it
-        if (jointHits.size() > salmonOpts.maxReadOccs ) { jointHitGroup.clearAlignments(); }
-
-	bool needBiasSample = salmonOpts.biasCorrect;
+    } // end for i < j->nb_filled
 
-        for (auto& h : jointHits) {
+    if (writeUnmapped) {
+        std::string outStr(unmappedNames.str());
+        // Get rid of last newline
+        if (!outStr.empty()) {
+            outStr.pop_back();
+            unmappedLogger->info(std::move(outStr));
+        }
+        unmappedNames.clear();
+    }
 
-	    // ---- Collect bias samples ------ //
-	    int32_t pos = static_cast<int32_t>(h.pos);
-	    auto dir = salmon::utils::boolToDirection(h.fwd);
-
-	    // If bias correction is turned on, and we haven't sampled a mapping
-	    // for this read yet, and we haven't collected the required number of
-	    // samples overall.
-	    if(needBiasSample and salmonOpts.numBiasSamples > 0){
-	      // the "start" position is the leftmost position if
-	      // we hit the forward strand, and the leftmost
-	      // position + the read length if we hit the reverse complement
-	      int32_t startPos = h.fwd ? pos : pos + h.readLen;
-
-
-	      auto& t = transcripts[h.tid];
-	      if (startPos > 0 and startPos < t.RefLength) {
-		const char* txpStart = t.Sequence;
-		const char* readStart = txpStart + startPos;
-		const char* txpEnd = txpStart + t.RefLength;
-		bool success = readBias.update(txpStart, readStart, txpEnd, dir);
-		if (success) {
-		  salmonOpts.numBiasSamples -= 1;
-		  needBiasSample = false;
-		}
-	      }
-	    }
-	    // ---- Collect bias samples ------ //
+    prevObservedFrags = numObservedFragments;
+    AlnGroupVecRange<QuasiAlignment> hitLists = boost::make_iterator_range(
+        structureVec.begin(), structureVec.begin() + rangeSize);
+    processMiniBatch<QuasiAlignment>(
+        readExp, fmCalc, firstTimestepOfRound, rl, salmonOpts, hitLists,
+        transcripts, clusterForest, fragLengthDist, observedBiasParams,
+        numAssignedFragments, eng, initialRound, burnedIn, maxZeroFrac);
+  }
+  readExp.updateShortFrags(shortFragStats);
 
+  if (maxZeroFrac > 0.0) {
+      salmonOpts.jointLog->info("Thread saw mini-batch with a maximum of {0:.2f}\% zero probability fragments", 
+                                maxZeroFrac);
+  }
+}
 
+/// DONE QUASI
 
-            switch (h.mateStatus) {
-                case MateStatus::SINGLE_END:
-                    {
-                        h.format = salmon::utils::hitType(h.pos, h.fwd);
-                    }
-                    break;
-            }
-        }
+template <typename AlnT>
+void processReadLibrary(
+    ReadExperiment& readExp, ReadLibrary& rl, SalmonIndex* sidx,
+    std::vector<Transcript>& transcripts, ClusterForest& clusterForest,
+    std::atomic<uint64_t>&
+        numObservedFragments, // total number of reads we've looked at
+    std::atomic<uint64_t>&
+        numAssignedFragments,              // total number of assigned reads
+    std::atomic<uint64_t>& upperBoundHits, // upper bound on # of mapped frags
+    bool initialRound,
+    std::atomic<bool>& burnedIn, ForgettingMassCalculator& fmCalc,
+    FragmentLengthDistribution& fragLengthDist, mem_opt_t* memOptions,
+    SalmonOpts& salmonOpts, double coverageThresh, bool greedyChain,
+    std::mutex& iomutex, size_t numThreads,
+    std::vector<AlnGroupVec<AlnT>>& structureVec, volatile bool& writeToCache) {
+
+  std::vector<std::thread> threads;
+
+  std::atomic<uint64_t> numValidHits{0};
+  rl.checkValid();
+
+  auto indexType = sidx->indexType();
+
+  std::unique_ptr<paired_parser> pairedParserPtr{nullptr};
+  std::unique_ptr<single_parser> singleParserPtr{nullptr};
+
+  /** sequence-specific and GC-fragment bias vectors --- each thread gets it's
+   * own **/
+  std::vector<BiasParams> observedBiasParams(numThreads,
+					     BiasParams(salmonOpts.numConditionalGCBins, salmonOpts.numFragGCBins, false));
+
+  // If the read library is paired-end
+  // ------ Paired-end --------
+  if (rl.format().type == ReadType::PAIRED_END) {
+
+    if (rl.mates1().size() != rl.mates2().size()) {
+      salmonOpts.jointLog->error("The number of provided files for "
+                                 "-1 and -2 must be the same!");
+      std::exit(1);
+    }
 
-        validHits += jointHits.size();
-        locRead++;
-        ++numObservedFragments;
-        if (numObservedFragments % 500000 == 0) {
-    	    iomutex.lock();
-            const char RESET_COLOR[] = "\x1b[0m";
-            char green[] = "\x1b[30m";
-            green[3] = '0' + static_cast<char>(fmt::GREEN);
-            char red[] = "\x1b[30m";
-            red[3] = '0' + static_cast<char>(fmt::RED);
-            if (initialRound) {
-                fmt::print(stderr, "\033[A\r\r{}processed{} {} {}fragments{}\n", green, red, numObservedFragments, green, RESET_COLOR);
-                fmt::print(stderr, "hits: {}; hits per frag:  {}",
-                        validHits,
-                        validHits / static_cast<float>(prevObservedFrags));
-            } else {
-                fmt::print(stderr, "\r\r{}processed{} {} {}fragments{}", green, red, numObservedFragments, green, RESET_COLOR);
-            }
-    	    iomutex.unlock();
-        }
+    size_t numFiles = rl.mates1().size() + rl.mates2().size();
+    uint32_t numParsingThreads{1};
+    // HACK!
+    if (rl.mates1().size() > 1 and numThreads > 8) { numParsingThreads = 2; }
+    pairedParserPtr.reset(new paired_parser(rl.mates1(), rl.mates2(), numThreads, numParsingThreads, miniBatchSize));
+    pairedParserPtr->start();
+    
+    switch (indexType) {
+    case SalmonIndexType::FMD: {
+      for (int i = 0; i < numThreads; ++i) {
+        // NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
+        // change value before the lambda below is evaluated --- crazy!
+        auto threadFun = [&, i]() -> void {
+          processReadsMEM<paired_parser, TranscriptHitList>(
+              pairedParserPtr.get(), readExp, rl, structureVec[i],
+              numObservedFragments, numAssignedFragments, numValidHits,
+              upperBoundHits, sidx, transcripts, fmCalc, clusterForest,
+              fragLengthDist, observedBiasParams[i], memOptions, salmonOpts,
+              coverageThresh, iomutex, initialRound, burnedIn, writeToCache);
+        };
+        threads.emplace_back(threadFun);
+      }
+      break;
+    case SalmonIndexType::QUASI: {
+      // True if we have a 64-bit SA index, false otherwise
+      bool largeIndex = sidx->is64BitQuasi();
+      bool perfectHashIndex = sidx->isPerfectHashQuasi();
+      for (int i = 0; i < numThreads; ++i) {
+        // NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
+        // change value before the lambda below is evaluated --- crazy!
+        if (largeIndex) {
+          if (perfectHashIndex) { // Perfect Hash
+            auto threadFun = [&, i]() -> void {
+              processReadsQuasi<RapMapSAIndex<int64_t, PerfectHash<int64_t>>>(
+                  pairedParserPtr.get(), readExp, rl, structureVec[i],
+                  numObservedFragments, numAssignedFragments, numValidHits,
+                  upperBoundHits, sidx->quasiIndexPerfectHash64(), transcripts,
+                  fmCalc, clusterForest, fragLengthDist, observedBiasParams[i],
+                  memOptions, salmonOpts, coverageThresh, iomutex, initialRound,
+                  burnedIn, writeToCache);
+            };
+            threads.emplace_back(threadFun);
+          } else { // Dense Hash
+            auto threadFun = [&, i]() -> void {
+              processReadsQuasi<RapMapSAIndex<int64_t, DenseHash<int64_t>>>(
+                  pairedParserPtr.get(), readExp, rl, structureVec[i],
+                  numObservedFragments, numAssignedFragments, numValidHits,
+                  upperBoundHits, sidx->quasiIndex64(), transcripts, fmCalc,
+                  clusterForest, fragLengthDist, observedBiasParams[i],
+                  memOptions, salmonOpts, coverageThresh, iomutex, initialRound,
+                  burnedIn, writeToCache);
+            };
+            threads.emplace_back(threadFun);
+          }
+        } else {
+          if (perfectHashIndex) { // Perfect Hash
+            auto threadFun = [&, i]() -> void {
+              processReadsQuasi<RapMapSAIndex<int32_t, PerfectHash<int32_t>>>(
+                  pairedParserPtr.get(), readExp, rl, structureVec[i],
+                  numObservedFragments, numAssignedFragments, numValidHits,
+                  upperBoundHits, sidx->quasiIndexPerfectHash32(), transcripts,
+                  fmCalc, clusterForest, fragLengthDist, observedBiasParams[i],
+                  memOptions, salmonOpts, coverageThresh, iomutex, initialRound,
+                  burnedIn, writeToCache);
+            };
+            threads.emplace_back(threadFun);
+          } else { // Dense Hash
+            auto threadFun = [&, i]() -> void {
+              processReadsQuasi<RapMapSAIndex<int32_t, DenseHash<int32_t>>>(
+                  pairedParserPtr.get(), readExp, rl, structureVec[i],
+                  numObservedFragments, numAssignedFragments, numValidHits,
+                  upperBoundHits, sidx->quasiIndex32(), transcripts, fmCalc,
+                  clusterForest, fragLengthDist, observedBiasParams[i],
+                  memOptions, salmonOpts, coverageThresh, iomutex, initialRound,
+                  burnedIn, writeToCache);
+            };
+            threads.emplace_back(threadFun);
+          }
+
+        } // End spawn current thread
+
+      } // End spawn all threads
+    }   // End Quasi index
+    break;
+    } // end switch
+    }
+    for (int i = 0; i < numThreads; ++i) {
+      threads[i].join();
+    }
 
+    /** GC-fragment bias **/
+    // Set the global distribution based on the sum of local
+    // distributions.
+    double gcFracFwd{0.0};
+    double globalMass{salmon::math::LOG_0};
+    double globalFwdMass{salmon::math::LOG_0};
+    auto& globalGCMass = readExp.observedGC();
+    for (auto& gcp : observedBiasParams) {
+      auto& gcm = gcp.observedGCMass;
+      globalGCMass.combineCounts(gcm);
+
+      auto& fw = readExp.readBiasModelObserved(salmon::utils::Direction::FORWARD);
+      auto& rc =
+          readExp.readBiasModelObserved(salmon::utils::Direction::REVERSE_COMPLEMENT);
+
+      auto& fwloc = gcp.seqBiasModelFW;
+      auto& rcloc = gcp.seqBiasModelRC;
+      fw.combineCounts(fwloc);
+      rc.combineCounts(rcloc);
+
+      /**
+       * positional biases
+       **/
+      auto& posBiasesFW = readExp.posBias(salmon::utils::Direction::FORWARD);
+      auto& posBiasesRC =
+          readExp.posBias(salmon::utils::Direction::REVERSE_COMPLEMENT);
+      for (size_t i = 0; i < posBiasesFW.size(); ++i) {
+        posBiasesFW[i].combine(gcp.posBiasFW[i]);
+        posBiasesRC[i].combine(gcp.posBiasRC[i]);
+      }
+      /*
+              for (size_t i = 0; i < fwloc.counts.size(); ++i) {
+                  fw.counts[i] += fwloc.counts[i];
+                  rc.counts[i] += rcloc.counts[i];
+              }
+      */
+
+      globalMass = salmon::math::logAdd(globalMass, gcp.massFwd);
+      globalMass = salmon::math::logAdd(globalMass, gcp.massRC);
+      globalFwdMass = salmon::math::logAdd(globalFwdMass, gcp.massFwd);
+    }
+    globalGCMass.normalize();
 
-    } // end for i < j->nb_filled
+    if (globalMass != salmon::math::LOG_0) {
+      if (globalFwdMass != salmon::math::LOG_0) {
+        gcFracFwd = std::exp(globalFwdMass - globalMass);
+      }
+      readExp.setGCFracForward(gcFracFwd);
+    }
 
-    prevObservedFrags = numObservedFragments;
-    AlnGroupVecRange<QuasiAlignment> hitLists = boost::make_iterator_range(structureVec.begin(), structureVec.begin() + rangeSize);
-    processMiniBatch<QuasiAlignment>(readExp, fmCalc,firstTimestepOfRound, rl, salmonOpts, hitLists, transcripts, clusterForest,
-                     fragLengthDist, numAssignedFragments, eng, initialRound, burnedIn);
-  }
-}
+    // finalize the positional biases
+    if (salmonOpts.posBiasCorrect) {
+      auto& posBiasesFW = readExp.posBias(salmon::utils::Direction::FORWARD);
+      auto& posBiasesRC =
+          readExp.posBias(salmon::utils::Direction::REVERSE_COMPLEMENT);
+      for (size_t i = 0; i < posBiasesFW.size(); ++i) {
+        posBiasesFW[i].finalize();
+        posBiasesRC[i].finalize();
+      }
+    }
 
-/// DONE QUASI
+    /** END GC-fragment bias **/
+
+  } // ------ Single-end --------
+  else if (rl.format().type == ReadType::SINGLE_END) {
+
+    uint32_t numParsingThreads{1};
+    // HACK!
+    if (rl.unmated().size() > 1 and numThreads > 8) { numParsingThreads = 2; }
+    singleParserPtr.reset(new single_parser(rl.unmated(), numThreads, numParsingThreads, miniBatchSize));
+    singleParserPtr->start();
+    switch (indexType) {
+    case SalmonIndexType::FMD: {
+      for (int i = 0; i < numThreads; ++i) {
+        // NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
+        // change value before the lambda below is evaluated --- crazy!
+        auto threadFun = [&, i]() -> void {
+          processReadsMEM<single_parser, TranscriptHitList>(
+              singleParserPtr.get(), readExp, rl, structureVec[i],
+              numObservedFragments, numAssignedFragments, numValidHits,
+              upperBoundHits, sidx, transcripts, fmCalc, clusterForest,
+              fragLengthDist, observedBiasParams[i], memOptions, salmonOpts,
+              coverageThresh, iomutex, initialRound, burnedIn, writeToCache);
+        };
+        threads.emplace_back(threadFun);
+      }
+    } break;
+
+    case SalmonIndexType::QUASI: {
+      // True if we have a 64-bit SA index, false otherwise
+      bool largeIndex = sidx->is64BitQuasi();
+      bool perfectHashIndex = sidx->isPerfectHashQuasi();
+      for (int i = 0; i < numThreads; ++i) {
+        // NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
+        // change value before the lambda below is evaluated --- crazy!
+        if (largeIndex) {
+          if (perfectHashIndex) { // Perfect Hash
+            auto threadFun = [&, i]() -> void {
+              processReadsQuasi<RapMapSAIndex<int64_t, PerfectHash<int64_t>>>(
+                  pairedParserPtr.get(), readExp, rl, structureVec[i],
+                  numObservedFragments, numAssignedFragments, numValidHits,
+                  upperBoundHits, sidx->quasiIndexPerfectHash64(), transcripts,
+                  fmCalc, clusterForest, fragLengthDist, observedBiasParams[i],
+                  memOptions, salmonOpts, coverageThresh, iomutex, initialRound,
+                  burnedIn, writeToCache);
+            };
+            threads.emplace_back(threadFun);
+          } else { // Dense Hash
+            auto threadFun = [&, i]() -> void {
+              processReadsQuasi<RapMapSAIndex<int64_t, DenseHash<int64_t>>>(
+                  singleParserPtr.get(), readExp, rl, structureVec[i],
+                  numObservedFragments, numAssignedFragments, numValidHits,
+                  upperBoundHits, sidx->quasiIndex64(), transcripts, fmCalc,
+                  clusterForest, fragLengthDist, observedBiasParams[i],
+                  memOptions, salmonOpts, coverageThresh, iomutex, initialRound,
+                  burnedIn, writeToCache);
+            };
+            threads.emplace_back(threadFun);
+          }
+        } else {
+          if (perfectHashIndex) { // Perfect Hash
+            auto threadFun = [&, i]() -> void {
+              processReadsQuasi<RapMapSAIndex<int32_t, PerfectHash<int32_t>>>(
+                  singleParserPtr.get(), readExp, rl, structureVec[i],
+                  numObservedFragments, numAssignedFragments, numValidHits,
+                  upperBoundHits, sidx->quasiIndexPerfectHash32(), transcripts,
+                  fmCalc, clusterForest, fragLengthDist, observedBiasParams[i],
+                  memOptions, salmonOpts, coverageThresh, iomutex, initialRound,
+                  burnedIn, writeToCache);
+            };
+            threads.emplace_back(threadFun);
+          } else { // Dense Hash
+            auto threadFun = [&, i]() -> void {
+              processReadsQuasi<RapMapSAIndex<int32_t, DenseHash<int32_t>>>(
+                  singleParserPtr.get(), readExp, rl, structureVec[i],
+                  numObservedFragments, numAssignedFragments, numValidHits,
+                  upperBoundHits, sidx->quasiIndex32(), transcripts, fmCalc,
+                  clusterForest, fragLengthDist, observedBiasParams[i],
+                  memOptions, salmonOpts, coverageThresh, iomutex, initialRound,
+                  burnedIn, writeToCache);
+            };
+            threads.emplace_back(threadFun);
+          }
+
+        } // End spawn current thread
+
+      } // End spawn all threads
+    }   // End Quasi index
+    break;
+    }
+    for (int i = 0; i < numThreads; ++i) {
+      threads[i].join();
+    }
 
+    // Set the global distribution based on the sum of local
+    // distributions.
+    for (auto& gcp : observedBiasParams) {
+      /*
+              auto& fw = readExp.readBias(salmon::utils::Direction::FORWARD);
+              auto& rc =
+         readExp.readBias(salmon::utils::Direction::REVERSE_COMPLEMENT);
+
+              auto& fwloc = gcp.seqBiasFW;
+              auto& rcloc = gcp.seqBiasRC;
+              for (size_t i = 0; i < fwloc.counts.size(); ++i) {
+                  fw.counts[i] += fwloc.counts[i];
+                  rc.counts[i] += rcloc.counts[i];
+              }
+      */
+      auto& fw = readExp.readBiasModelObserved(salmon::utils::Direction::FORWARD);
+      auto& rc =
+          readExp.readBiasModelObserved(salmon::utils::Direction::REVERSE_COMPLEMENT);
+
+      auto& fwloc = gcp.seqBiasModelFW;
+      auto& rcloc = gcp.seqBiasModelRC;
+      fw.combineCounts(fwloc);
+      rc.combineCounts(rcloc);
+
+      /**
+       * positional biases
+       **/
+      auto& posBiasesFW = readExp.posBias(salmon::utils::Direction::FORWARD);
+      auto& posBiasesRC =
+          readExp.posBias(salmon::utils::Direction::REVERSE_COMPLEMENT);
+      for (size_t i = 0; i < posBiasesFW.size(); ++i) {
+        posBiasesFW[i].combine(gcp.posBiasFW[i]);
+        posBiasesRC[i].combine(gcp.posBiasRC[i]);
+      }
+    }
+    // finalize the positional biases
+    if (salmonOpts.posBiasCorrect) {
+      auto& posBiasesFW = readExp.posBias(salmon::utils::Direction::FORWARD);
+      auto& posBiasesRC =
+          readExp.posBias(salmon::utils::Direction::REVERSE_COMPLEMENT);
+      for (size_t i = 0; i < posBiasesFW.size(); ++i) {
+        posBiasesFW[i].finalize();
+        posBiasesRC[i].finalize();
+      }
+    }
 
-template <typename AlnT>
-void processReadLibrary(
-        ReadExperiment& readExp,
-        ReadLibrary& rl,
-        SalmonIndex* sidx,
-        std::vector<Transcript>& transcripts,
-        ClusterForest& clusterForest,
-        std::atomic<uint64_t>& numObservedFragments, // total number of reads we've looked at
-        std::atomic<uint64_t>& numAssignedFragments, // total number of assigned reads
-        std::atomic<uint64_t>& upperBoundHits, // upper bound on # of mapped frags
-        bool initialRound,
-        std::atomic<bool>& burnedIn,
-        ForgettingMassCalculator& fmCalc,
-        FragmentLengthDistribution& fragLengthDist,
-        mem_opt_t* memOptions,
-        SalmonOpts& salmonOpts,
-        double coverageThresh,
-        bool greedyChain,
-        std::mutex& iomutex,
-        size_t numThreads,
-        std::vector<AlnGroupVec<AlnT>>& structureVec,
-        volatile bool& writeToCache){
-
-            std::vector<std::thread> threads;
-
-            std::atomic<uint64_t> numValidHits{0};
-            rl.checkValid();
-
-            auto indexType = sidx->indexType();
-
-            std::unique_ptr<paired_parser> pairedParserPtr{nullptr};
-            std::unique_ptr<single_parser> singleParserPtr{nullptr};
-            // If the read library is paired-end
-            // ------ Paired-end --------
-            if (rl.format().type == ReadType::PAIRED_END) {
-
-
-		    if (rl.mates1().size() != rl.mates2().size()) {
-			    salmonOpts.jointLog->error("The number of provided files for "
-					    "-1 and -2 must be the same!");
-			    std::exit(1);
-		    }
-
-		    size_t numFiles = rl.mates1().size() + rl.mates2().size();
-		    char** pairFileList = new char*[numFiles];
-		    for (size_t i = 0; i < rl.mates1().size(); ++i) {
-			    pairFileList[2*i] = const_cast<char*>(rl.mates1()[i].c_str());
-			    pairFileList[2*i+1] = const_cast<char*>(rl.mates2()[i].c_str());
-		    }
-
-		    size_t maxReadGroup{miniBatchSize}; // Number of reads in each "job"
-		    size_t concurrentFile{2}; // Number of files to read simultaneously
-		    pairedParserPtr.reset(new
-				    paired_parser(4 * numThreads, maxReadGroup,
-					    concurrentFile, pairFileList, pairFileList+numFiles));
-
-		    switch (indexType) {
-			case SalmonIndexType::FMD:
-			    {
-				for(int i = 0; i < numThreads; ++i)  {
-				    // NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
-				    // change value before the lambda below is evaluated --- crazy!
-				    auto threadFun = [&,i]() -> void {
-					processReadsMEM<paired_parser, TranscriptHitList>(
-						pairedParserPtr.get(),
-						readExp,
-						rl,
-						structureVec[i],
-						numObservedFragments,
-						numAssignedFragments,
-						numValidHits,
-						upperBoundHits,
-						sidx,
-						transcripts,
-						fmCalc,
-						clusterForest,
-						fragLengthDist,
-						memOptions,
-						salmonOpts,
-						coverageThresh,
-						iomutex,
-						initialRound,
-						burnedIn,
-						writeToCache);
-				    };
-				    threads.emplace_back(threadFun);
-				}
-				break;
-				case SalmonIndexType::QUASI:
-				{
-            // True if we have a 64-bit SA index, false otherwise
-            bool largeIndex = sidx->is64BitQuasi();
-				    for(int i = 0; i < numThreads; ++i)  {
-					// NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
-					// change value before the lambda below is evaluated --- crazy!
-          if (largeIndex) {
-            auto threadFun = [&,i]() -> void {
-              processReadsQuasi<RapMapSAIndex<int64_t>>(
-                pairedParserPtr.get(),
-                readExp,
-                rl,
-                structureVec[i],
-                numObservedFragments,
-                numAssignedFragments,
-                numValidHits,
-                upperBoundHits,
-                sidx->quasiIndex64(),
-                transcripts,
-                fmCalc,
-                clusterForest,
-                fragLengthDist,
-                memOptions,
-                salmonOpts,
-                coverageThresh,
-                iomutex,
-                initialRound,
-                burnedIn,
-                writeToCache);
-              };
-              threads.emplace_back(threadFun);
-            } else {
-              auto threadFun = [&,i]() -> void {
-              processReadsQuasi<RapMapSAIndex<int32_t>>(
-                pairedParserPtr.get(),
-                readExp,
-                rl,
-                structureVec[i],
-                numObservedFragments,
-                numAssignedFragments,
-                numValidHits,
-                upperBoundHits,
-                sidx->quasiIndex32(),
-                transcripts,
-                fmCalc,
-                clusterForest,
-                fragLengthDist,
-                memOptions,
-                salmonOpts,
-                coverageThresh,
-                iomutex,
-                initialRound,
-                burnedIn,
-                writeToCache);
-              };
-              threads.emplace_back(threadFun);
-            }
+    /** END: bias models **/
 
-				    }
-				}
-				break;
-			    } // end switch
-		    }
-		    for(int i = 0; i < numThreads; ++i) { threads[i].join(); }
-
-            } // ------ Single-end --------
-            else if (rl.format().type == ReadType::SINGLE_END) {
-
-                char* readFiles[] = { const_cast<char*>(rl.unmated().front().c_str()) };
-                size_t maxReadGroup{miniBatchSize}; // Number of files to read simultaneously
-                size_t concurrentFile{1}; // Number of reads in each "job"
-                stream_manager streams( rl.unmated().begin(),
-                        rl.unmated().end(), concurrentFile);
-
-                singleParserPtr.reset(new single_parser(4 * numThreads,
-                                      maxReadGroup,
-                                      concurrentFile,
-                                      streams));
-
-                switch (indexType) {
-                    case SalmonIndexType::FMD:
-                        {
-                            for(int i = 0; i < numThreads; ++i)  {
-                                // NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
-                                // change value before the lambda below is evaluated --- crazy!
-                                auto threadFun = [&,i]() -> void {
-                                    processReadsMEM<single_parser, TranscriptHitList>(
-                                            singleParserPtr.get(),
-                                            readExp,
-                                            rl,
-                                            structureVec[i],
-                                            numObservedFragments,
-                                            numAssignedFragments,
-                                            numValidHits,
-                                            upperBoundHits,
-                                            sidx,
-                                            transcripts,
-                                            fmCalc,
-                                            clusterForest,
-                                            fragLengthDist,
-                                            memOptions,
-                                            salmonOpts,
-                                            coverageThresh,
-                                            iomutex,
-                                            initialRound,
-                                            burnedIn,
-                                            writeToCache);
-                                };
-                                threads.emplace_back(threadFun);
-                            }
-                        }
-                        break;
-
-                    case SalmonIndexType::QUASI:
-                    {
-                      // True if we have a 64-bit SA index, false otherwise
-                      bool largeIndex = sidx->is64BitQuasi();
-                      for(int i = 0; i < numThreads; ++i)  {
-                        // NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
-                        // change value before the lambda below is evaluated --- crazy!
-                        if (largeIndex) {
-                          auto threadFun = [&,i]() -> void {
-                            processReadsQuasi<RapMapSAIndex<int64_t>>(
-                              singleParserPtr.get(),
-                              readExp,
-                              rl,
-                              structureVec[i],
-                              numObservedFragments,
-                              numAssignedFragments,
-                              numValidHits,
-                              upperBoundHits,
-                              sidx->quasiIndex64(),
-                              transcripts,
-                              fmCalc,
-                              clusterForest,
-                              fragLengthDist,
-                              memOptions,
-                              salmonOpts,
-                              coverageThresh,
-                              iomutex,
-                              initialRound,
-                              burnedIn,
-                              writeToCache);
-                            };
-                            threads.emplace_back(threadFun);
-                          } else {
-                            auto threadFun = [&,i]() -> void {
-                              processReadsQuasi<RapMapSAIndex<int32_t>>(
-                                singleParserPtr.get(),
-                                readExp,
-                                rl,
-                                structureVec[i],
-                                numObservedFragments,
-                                numAssignedFragments,
-                                numValidHits,
-                                upperBoundHits,
-                                sidx->quasiIndex32(),
-                                transcripts,
-                                fmCalc,
-                                clusterForest,
-                                fragLengthDist,
-                                memOptions,
-                                salmonOpts,
-                                coverageThresh,
-                                iomutex,
-                                initialRound,
-                                burnedIn,
-                                writeToCache);
-                              };
-                              threads.emplace_back(threadFun);
-                            }
-
-                        }
-                      }
-                      break;
-                }
-                for(int i = 0; i < numThreads; ++i) { threads[i].join(); }
-            } // ------ END Single-end --------
+  } // ------ END Single-end --------
 }
 
-
 /**
   *  Quantify the targets given in the file `transcriptFile` using the
   *  reads in the given set of `readLibraries`, and write the results
@@ -1192,653 +1590,872 @@ void processReadLibrary(
   *
   */
 template <typename AlnT>
-void quantifyLibrary(
-        ReadExperiment& experiment,
-        bool greedyChain,
-        mem_opt_t* memOptions,
-        SalmonOpts& salmonOpts,
-        double coverageThresh,
-        size_t numRequiredFragments,
-        uint32_t numQuantThreads) {
-
-    bool burnedIn{false};
-    std::atomic<uint64_t> upperBoundHits{0};
-    //ErrorModel errMod(1.00);
-    auto& refs = experiment.transcripts();
-    size_t numTranscripts = refs.size();
-    // The *total* number of fragments observed so far (over all passes through the data).
-    std::atomic<uint64_t> numObservedFragments{0};
-    uint64_t prevNumObservedFragments{0};
-    // The *total* number of fragments assigned so far (over all passes through the data).
-    std::atomic<uint64_t> totalAssignedFragments{0};
-    uint64_t prevNumAssignedFragments{0};
-
-    auto jointLog = spdlog::get("jointLog");
-
-    ForgettingMassCalculator fmCalc(salmonOpts.forgettingFactor);
-    size_t prefillSize = 1000000000 / miniBatchSize;
-    fmCalc.prefill(prefillSize);
-
-    bool initialRound{true};
-    uint32_t roundNum{0};
-
-    std::mutex ffMutex;
-    std::mutex ioMutex;
-
-    size_t numPrevObservedFragments = 0;
-
-    size_t maxReadGroup{miniBatchSize};
-    uint32_t structCacheSize = numQuantThreads * maxReadGroup * 10;
-
-    // EQCLASS
-    bool terminate{false};
-
-    while (numObservedFragments < numRequiredFragments and !terminate) {
-        prevNumObservedFragments = numObservedFragments;
-        if (!initialRound) {
-            bool didReset = (salmonOpts.disableMappingCache) ?
-                            (experiment.reset()) :
-                            (experiment.softReset());
-
-            if (!didReset) {
-                std::string errmsg = fmt::sprintf(
-                  "\n\n======== WARNING ========\n"
-                  "One of the provided read files: [{}] "
-                  "is not a regular file and therefore can't be read from "
-                  "more than once.\n\n"
-                  "We observed only {} mapping fragments when we wanted at least {}.\n\n"
-                  "Please consider re-running Salmon with these reads "
-                  "as a regular file!\n"
-                  "NOTE: If you received this warning from salmon but did not "
-                  "disable the mapping cache (--disableMappingCache), then there \n"
-                  "was some other problem. Please make sure, e.g., that you have not "
-                  "run out of disk space.\n"
-                  "==========================\n\n",
-                  experiment.readFilesAsString(), numObservedFragments, numRequiredFragments);
-                jointLog->warn() << errmsg;
-                break;
-            }
-
-            numPrevObservedFragments = numObservedFragments;
-        }
-
-        // This structure is a vector of vectors of alignment
-        // groups.  Each thread will get its own vector, so we
-        // allocate these up front to save time and allow
-        // reuse.
-        std::vector<AlnGroupVec<AlnT>> groupVec;
-        for (size_t i = 0; i < numQuantThreads; ++i) {
-            groupVec.emplace_back(maxReadGroup);
-        }
+void quantifyLibrary(ReadExperiment& experiment, bool greedyChain,
+                     mem_opt_t* memOptions, SalmonOpts& salmonOpts,
+                     double coverageThresh, uint32_t numQuantThreads) {
+
+  bool burnedIn{false};
+  uint64_t numRequiredFragments = salmonOpts.numRequiredFragments;
+  std::atomic<uint64_t> upperBoundHits{0};
+  // ErrorModel errMod(1.00);
+  auto& refs = experiment.transcripts();
+  size_t numTranscripts = refs.size();
+  // The *total* number of fragments observed so far (over all passes through
+  // the data).
+  std::atomic<uint64_t> numObservedFragments{0};
+  uint64_t prevNumObservedFragments{0};
+  // The *total* number of fragments assigned so far (over all passes through
+  // the data).
+  std::atomic<uint64_t> totalAssignedFragments{0};
+  uint64_t prevNumAssignedFragments{0};
+
+  auto jointLog = spdlog::get("jointLog");
+
+  ForgettingMassCalculator fmCalc(salmonOpts.forgettingFactor);
+  size_t prefillSize = 1000000000 / miniBatchSize;
+  fmCalc.prefill(prefillSize);
+
+  bool initialRound{true};
+  uint32_t roundNum{0};
+
+  std::mutex ffMutex;
+  std::mutex ioMutex;
+
+  size_t numPrevObservedFragments = 0;
+
+  size_t maxReadGroup{miniBatchSize};
+  uint32_t structCacheSize = numQuantThreads * maxReadGroup * 10;
+
+  // EQCLASS
+  bool terminate{false};
+
+  while (numObservedFragments < numRequiredFragments and !terminate) {
+    prevNumObservedFragments = numObservedFragments;
+    if (!initialRound) {
+      bool didReset = (salmonOpts.disableMappingCache)
+                          ? (experiment.reset())
+                          : (experiment.softReset());
+
+      if (!didReset) {
+        std::string errmsg = fmt::format(
+            "\n\n======== WARNING ========\n"
+            "One of the provided read files: [{}] "
+            "is not a regular file and therefore can't be read from "
+            "more than once.\n\n"
+            "We observed only {} mapping fragments when we wanted at least "
+            "{}.\n\n"
+            "Please consider re-running Salmon with these reads "
+            "as a regular file!\n"
+            "NOTE: If you received this warning from salmon but did not "
+            "disable the mapping cache (--disableMappingCache), then there \n"
+            "was some other problem. Please make sure, e.g., that you have not "
+            "run out of disk space.\n"
+            "==========================\n\n",
+            experiment.readFilesAsString(), numObservedFragments,
+            numRequiredFragments);
+        jointLog->warn(errmsg);
+        break;
+      }
+
+      numPrevObservedFragments = numObservedFragments;
+    }
 
+    // This structure is a vector of vectors of alignment
+    // groups.  Each thread will get its own vector, so we
+    // allocate these up front to save time and allow
+    // reuse.
+    std::vector<AlnGroupVec<AlnT>> groupVec;
+    for (size_t i = 0; i < numQuantThreads; ++i) {
+      groupVec.emplace_back(maxReadGroup);
+    }
 
-        bool writeToCache = !salmonOpts.disableMappingCache;
-        auto processReadLibraryCallback =  [&](
-                ReadLibrary& rl, SalmonIndex* sidx,
-                std::vector<Transcript>& transcripts, ClusterForest& clusterForest,
-                FragmentLengthDistribution& fragLengthDist,
-                std::atomic<uint64_t>& numAssignedFragments,
-                size_t numQuantThreads, std::atomic<bool>& burnedIn) -> void  {
+    bool writeToCache = !salmonOpts.disableMappingCache;
+    auto processReadLibraryCallback =
+        [&](ReadLibrary& rl, SalmonIndex* sidx,
+            std::vector<Transcript>& transcripts, ClusterForest& clusterForest,
+            FragmentLengthDistribution& fragLengthDist,
+            std::atomic<uint64_t>& numAssignedFragments, size_t numQuantThreads,
+            std::atomic<bool>& burnedIn) -> void {
+
+      processReadLibrary<AlnT>(experiment, rl, sidx, transcripts, clusterForest,
+                               numObservedFragments, totalAssignedFragments,
+                               upperBoundHits, initialRound, burnedIn, fmCalc,
+                               fragLengthDist, memOptions, salmonOpts,
+                               coverageThresh, greedyChain, ioMutex,
+                               numQuantThreads, groupVec, writeToCache);
+
+      numAssignedFragments = totalAssignedFragments - prevNumAssignedFragments;
+      prevNumAssignedFragments = totalAssignedFragments;
+    };
+
+    // Process all of the reads
+    if (!salmonOpts.quiet) {
+      fmt::print(stderr, "\n\n\n\n");
+    }
+    experiment.processReads(numQuantThreads, salmonOpts,
+                            processReadLibraryCallback);
+    experiment.setNumObservedFragments(numObservedFragments);
 
-            processReadLibrary<AlnT>(experiment, rl, sidx, transcripts, clusterForest,
-                    numObservedFragments, totalAssignedFragments, upperBoundHits,
-                    initialRound, burnedIn, fmCalc, fragLengthDist,
-                    memOptions, salmonOpts, coverageThresh, greedyChain,
-                    ioMutex, numQuantThreads,
-                    groupVec, writeToCache);
+    // EQCLASS
+    bool done = experiment.equivalenceClassBuilder().finish();
+    // skip the extra online rounds
+    terminate = true;
 
-            numAssignedFragments = totalAssignedFragments - prevNumAssignedFragments;
-            prevNumAssignedFragments = totalAssignedFragments;
-        };
+    initialRound = false;
+    ++roundNum;
 
-        // Process all of the reads
-        fmt::print(stderr, "\n\n\n\n");
-        experiment.processReads(numQuantThreads, salmonOpts, processReadLibraryCallback);
-        experiment.setNumObservedFragments(numObservedFragments);
-
-        //EQCLASS
-        bool done = experiment.equivalenceClassBuilder().finish();
-        // skip the extra online rounds
-        terminate = true;
-
-        initialRound = false;
-        ++roundNum;
-        fmt::print(stderr, "\n\n\n\n");
-        /*
-        fmt::print(stderr, "\n# observed = {} / # required = {}\n",
-                   numObservedFragments, numRequiredFragments);
-        fmt::print(stderr, "hard # assigned = {} / # observed (this round) = {} : "
-                           "upper bound assigned = {} \033[A\033[A",
-                   experiment.numAssignedFragments(),
-                   numObservedFragments - numPrevObservedFragments,
-                   upperBoundHits);
-        */
-        salmonOpts.fileLog->info("\nAt end of round {}\n"
-                                   "==================\n"
-                                   "Observed {} total fragments ({} in most recent round)\n",
-                                   roundNum - 1,
-                                   numObservedFragments,
-                                   numObservedFragments - numPrevObservedFragments);
+    if (!salmonOpts.quiet) {
+      fmt::print(stderr, "\n\n\n\n");
     }
+    /*
+    fmt::print(stderr, "\n# observed = {} / # required = {}\n",
+               numObservedFragments, numRequiredFragments);
+    fmt::print(stderr, "hard # assigned = {} / # observed (this round) = {} : "
+                       "upper bound assigned = {} \033[A\033[A",
+               experiment.numAssignedFragments(),
+               numObservedFragments - numPrevObservedFragments,
+               upperBoundHits);
+    */
+    salmonOpts.fileLog->info(
+        "\nAt end of round {}\n"
+        "==================\n"
+        "Observed {} total fragments ({} in most recent round)\n",
+        roundNum - 1, numObservedFragments,
+        numObservedFragments - numPrevObservedFragments);
+  }
+  if (!salmonOpts.quiet) {
     fmt::print(stderr, "\n\n\n\n");
+  }
 
-    // If we didn't achieve burnin, then at least compute effective
-    // lengths and mention this to the user.
-    if (totalAssignedFragments < salmonOpts.numBurninFrags) {
-        std::atomic<bool> dummyBool{false};
-        experiment.updateTranscriptLengthsAtomic(dummyBool);
-
-        jointLog->warn("Only {} fragments were mapped, but the number of burn-in fragments was set to {}.\n"
-                "The effective lengths have been computed using the observed mappings.\n",
-                totalAssignedFragments, salmonOpts.numBurninFrags);
-
-	// If we didn't have a sufficient number of samples for burnin,
-	// then also ignore modeling of the fragment start position
-	// distribution.
-	if (salmonOpts.useFSPD) {
-	  salmonOpts.useFSPD = false;
-	  jointLog->warn("Since only {} (< {}) fragments were observed, modeling of the fragment start position "
-			 "distribution has been disabled", totalAssignedFragments, salmonOpts.numBurninFrags);
-
-	}
+  // Report statistics about short fragments
+  salmon::utils::ShortFragStats shortFragStats = experiment.getShortFragStats();
+  if (shortFragStats.numTooShort > 0) {
+    double tooShortFrac =
+        (numObservedFragments > 0)
+            ? (static_cast<double>(shortFragStats.numTooShort) /
+               numObservedFragments)
+            : 0.0;
+    if (tooShortFrac > 0.0) {
+      size_t minK = rapmap::utils::my_mer::k();
+      fmt::print(stderr, "\n\n");
+      salmonOpts.jointLog->warn("{}% of fragments were shorter than the k used "
+                                "to build the index ({}).\n"
+                                "If this fraction is too large, consider "
+                                "re-building the index with a smaller k.\n"
+                                "The minimum read size found was {}.\n\n",
+                                tooShortFrac * 100.0, minK,
+                                shortFragStats.shortest);
+
+      // If *all* fragments were too short, then halt now
+      if (shortFragStats.numTooShort == numObservedFragments) {
+        salmonOpts.jointLog->error(
+            "All fragments were too short to quasi-map.  I won't proceed.");
+        std::exit(1);
+      }
+    } // end tooShortFrac > 0.0
+  }
+
+  // If we didn't achieve burnin, then at least compute effective
+  // lengths and mention this to the user.
+  if (totalAssignedFragments < salmonOpts.numBurninFrags) {
+    std::atomic<bool> dummyBool{false};
+    experiment.updateTranscriptLengthsAtomic(dummyBool);
+
+    jointLog->warn("Only {} fragments were mapped, but the number of burn-in "
+                   "fragments was set to {}.\n"
+                   "The effective lengths have been computed using the "
+                   "observed mappings.\n",
+                   totalAssignedFragments, salmonOpts.numBurninFrags);
+
+    // If we didn't have a sufficient number of samples for burnin,
+    // then also ignore modeling of the fragment start position
+    // distribution.
+    if (salmonOpts.useFSPD) {
+      salmonOpts.useFSPD = false;
+      jointLog->warn("Since only {} (< {}) fragments were observed, modeling "
+                     "of the fragment start position "
+                     "distribution has been disabled",
+                     totalAssignedFragments, salmonOpts.numBurninFrags);
     }
+  }
 
-    if (numObservedFragments <= prevNumObservedFragments) {
-        jointLog->warn() << "Something seems to be wrong with the calculation "
-            "of the mapping rate.  The recorded ratio is likely wrong.  Please "
-            "file this as a bug report.\n";
+  if (numObservedFragments <= prevNumObservedFragments) {
+    jointLog->warn(
+        "Something seems to be wrong with the calculation "
+           "of the mapping rate.  The recorded ratio is likely wrong.  Please "
+	"file this as a bug report.\n");
+  } else {
+    double upperBoundMappingRate =
+        upperBoundHits.load() /
+        static_cast<double>(numObservedFragments.load());
+    experiment.setNumObservedFragments(numObservedFragments -
+                                       prevNumObservedFragments);
+    experiment.setUpperBoundHits(upperBoundHits.load());
+    if (salmonOpts.allowOrphans) {
+      double mappingRate = totalAssignedFragments.load() /
+                           static_cast<double>(numObservedFragments.load());
+      experiment.setEffectiveMappingRate(mappingRate);
     } else {
-        double upperBoundMappingRate =
-            upperBoundHits.load() /
-            static_cast<double>(numObservedFragments.load());
-        experiment.setNumObservedFragments(numObservedFragments - prevNumObservedFragments);
-        experiment.setUpperBoundHits(upperBoundHits.load());
-        if (salmonOpts.allowOrphans) {
-           double mappingRate = totalAssignedFragments.load() /
-               static_cast<double>(numObservedFragments.load());
-           experiment.setEffectiveMappingRate(mappingRate);
-        } else {
-            experiment.setEffectiveMappingRate(upperBoundMappingRate);
-        }
+      experiment.setEffectiveMappingRate(upperBoundMappingRate);
     }
+  }
 
-        jointLog->info("Mapping rate = {}\%\n",
-                   experiment.effectiveMappingRate() * 100.0);
-    jointLog->info("finished quantifyLibrary()");
+  jointLog->info("Mapping rate = {}\%\n",
+                 experiment.effectiveMappingRate() * 100.0);
+  jointLog->info("finished quantifyLibrary()");
 }
 
-int salmonQuantify(int argc, char *argv[]) {
-    using std::cerr;
-    using std::vector;
-    using std::string;
-    namespace bfs = boost::filesystem;
-    namespace po = boost::program_options;
-
-    bool optChain{false};
-    size_t requiredObservations;
-    int32_t numBiasSamples{0};
-
-    SalmonOpts sopt;
-    mem_opt_t* memOptions = mem_opt_init();
-    memOptions->split_factor = 1.5;
-
-    sopt.numThreads = std::thread::hardware_concurrency();
-
-    double coverageThresh;
-    vector<string> unmatedReadFiles;
-    vector<string> mate1ReadFiles;
-    vector<string> mate2ReadFiles;
-
-    po::options_description generic("\n"
-		    		    "basic options");
-    generic.add_options()
-    ("version,v", "print version string")
-    ("help,h", "produce help message")
-    ("index,i", po::value<string>()->required(), "Salmon index")
-    ("libType,l", po::value<std::string>()->required(), "Format string describing the library type")
-    ("unmatedReads,r", po::value<vector<string>>(&unmatedReadFiles)->multitoken(),
-     "List of files containing unmated reads of (e.g. single-end reads)")
-    ("mates1,1", po::value<vector<string>>(&mate1ReadFiles)->multitoken(),
-        "File containing the #1 mates")
-    ("mates2,2", po::value<vector<string>>(&mate2ReadFiles)->multitoken(),
-        "File containing the #2 mates")
-    ("allowOrphans", po::bool_switch(&(sopt.allowOrphans))->default_value(false), "Consider orphaned reads as valid hits when "
-                        "performing lightweight-alignment.  This option will increase sensitivity (allow more reads to map and "
-                        "more transcripts to be detected), but may decrease specificity as orphaned alignments are more likely "
-                        "to be spurious -- this option is *always* set to true when using quasi-mapping.")
-    ("biasCorrect", po::value(&(sopt.biasCorrect))->zero_tokens(), "Perform sequence-specific bias correction.")
-    ("threads,p", po::value<uint32_t>(&(sopt.numThreads))->default_value(sopt.numThreads), "The number of threads to use concurrently.")
-    ("incompatPrior", po::value<double>(&(sopt.incompatPrior))->default_value(1e-20), "This option "
-                        "sets the prior probability that an alignment that disagrees with the specified "
-                        "library type (--libType) results from the true fragment origin.  Setting this to 0 "
-                        "specifies that alignments that disagree with the library type should be \"impossible\", "
-                        "while setting it to 1 says that alignments that disagree with the library type are no "
-                        "less likely than those that do")
-    ("minLen,k", po::value<int>(&(memOptions->min_seed_len))->default_value(19), "(S)MEMs smaller than this size won't be considered.")
-    ("sensitive", po::bool_switch(&(sopt.sensitive))->default_value(false), "Setting this option enables the splitting of SMEMs that are larger "
-                                        "than 1.5 times the minimum seed length (minLen/k above).  This may reveal high scoring chains of MEMs "
-                                        "that are masked by long SMEMs.  However, this option makes lightweight-alignment a bit slower and is "
-                                        "usually not necessary if the reference is of reasonable quality.")
-    ("extraSensitive", po::bool_switch(&(sopt.extraSeedPass))->default_value(false), "Setting this option enables an extra pass of \"seed\" search. "
-                                        "Enabling this option may improve sensitivity (the number of reads having sufficient coverage), but will "
-                                        "typically slow down quantification by ~40%.  Consider enabling this option if you find the mapping rate to "
-                                        "be significantly lower than expected.")
-    ("coverage,c", po::value<double>(&coverageThresh)->default_value(0.70), "required coverage of read by union of SMEMs to consider it a \"hit\".")
-    ("output,o", po::value<std::string>()->required(), "Output quantification file.")
-    ("geneMap,g", po::value<string>(), "File containing a mapping of transcripts to genes.  If this file is provided "
-                                        "Salmon will output both quant.sf and quant.genes.sf files, where the latter "
-                                        "contains aggregated gene-level abundance estimates.  The transcript to gene mapping "
-                                        "should be provided as either a GTF file, or a in a simple tab-delimited format "
-                                        "where each line contains the name of a transcript and the gene to which it belongs "
-                                        "separated by a tab.  The extension of the file is used to determine how the file "
-                                        "should be parsed.  Files ending in \'.gtf\' or \'.gff\' are assumed to be in GTF "
-                                        "format; files with any other extension are assumed to be in the simple format.");
-    //("optChain", po::bool_switch(&optChain)->default_value(false), "Chain MEMs optimally rather than greedily")
-
-    sopt.noRichEqClasses = false;
-    // mapping cache has been deprecated
-    sopt.disableMappingCache = true;
-
-    po::options_description advanced("\n"
-		    		     "advanced options");
-    advanced.add_options()
-    /*
-    ("disableMappingCache", po::bool_switch(&(sopt.disableMappingCache))->default_value(false), "Setting this option disables the creation and use "
-                                        "of the \"mapping cache\" file.  The mapping cache can speed up quantification significantly for smaller read "
-                                        "libraries (i.e. where the number of mapped fragments is less than the required number of observations). However, "
-                                        "for very large read libraries, the mapping cache is unnecessary, and disabling it may allow salmon to more effectively "
-                                        "make use of a very large number of threads.")
-    */
-    ("fldMax" , po::value<size_t>(&(sopt.fragLenDistMax))->default_value(800), "The maximum fragment length to consider when building the empirical "
-     											      "distribution")
-    ("fldMean", po::value<size_t>(&(sopt.fragLenDistPriorMean))->default_value(200), "The mean used in the fragment length distribution prior")
-    ("fldSD" , po::value<size_t>(&(sopt.fragLenDistPriorSD))->default_value(80), "The standard deviation used in the fragment length distribution prior")
-    ("forgettingFactor,f", po::value<double>(&(sopt.forgettingFactor))->default_value(0.65), "The forgetting factor used "
-                        "in the online learning schedule.  A smaller value results in quicker learning, but higher variance "
-                        "and may be unstable.  A larger value results in slower learning but may be more stable.  Value should "
-                        "be in the interval (0.5, 1.0].")
-    ("maxOcc,m", po::value<int>(&(memOptions->max_occ))->default_value(200), "(S)MEMs occuring more than this many times won't be considered.")
-    ("maxReadOcc,w", po::value<uint32_t>(&(sopt.maxReadOccs))->default_value(100), "Reads \"mapping\" to more than this many places won't be considered.")
-    ("noEffectiveLengthCorrection", po::bool_switch(&(sopt.noEffectiveLengthCorrection))->default_value(false), "Disables "
-                        "effective length correction when computing the probability that a fragment was generated "
-                        "from a transcript.  If this flag is passed in, the fragment length distribution is not taken "
-                        "into account when computing this probability.")
-    ("noFragLengthDist", po::bool_switch(&(sopt.noFragLengthDist))->default_value(false), "[experimental] : "
-                        "Don't consider concordance with the learned fragment length distribution when trying to determine "
-                        "the probability that a fragment has originated from a specified location.  Normally, Fragments with "
-                         "unlikely lengths will be assigned a smaller relative probability than those with more likely "
-                        "lengths.  When this flag is passed in, the observed fragment length has no effect on that fragment's "
-                        "a priori probability.")
-    ("useFSPD", po::bool_switch(&(sopt.useFSPD))->default_value(false), "[experimental] : "
-                        "Consider / model non-uniformity in the fragment start positions across the transcript.")
-    ("numBiasSamples", po::value<int32_t>(&numBiasSamples)->default_value(1000000),
-            "Number of fragment mappings to use when learning the sequence-specific bias model.")
-    ("numAuxModelSamples", po::value<uint32_t>(&(sopt.numBurninFrags))->default_value(5000000), "The first <numAuxModelSamples> are used to train the "
-     			"auxiliary model parameters (e.g. fragment length distribution, bias, etc.).  After ther first <numAuxModelSamples> observations "
-			"the auxiliary model parameters will be assumed to have converged and will be fixed.")
-    ("numPreAuxModelSamples", po::value<uint32_t>(&(sopt.numPreBurninFrags))->default_value(1000000), "The first <numPreAuxModelSamples> will have their "
-     			"assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models.  The purpose "
-			"of ignoring the auxiliary models for the first <numPreAuxModelSamples> observations is to avoid applying these models before thier "
-			"parameters have been learned sufficiently well.")
-    ("numRequiredObs,n", po::value(&requiredObservations)->default_value(50000000),
-                                        "[Deprecated]: The minimum number of observations (mapped reads) that must be observed before "
-                                        "the inference procedure will terminate.  If fewer mapped reads exist in the "
-                                        "input file, then it will be read through multiple times.")
-    ("splitWidth,s", po::value<int>(&(memOptions->split_width))->default_value(0), "If (S)MEM occurs fewer than this many times, search for smaller, contained MEMs. "
-                                        "The default value will not split (S)MEMs, a higher value will result in more MEMs being explore and, thus, will "
-                                        "result in increased running time.")
-    ("splitSpanningSeeds,b", po::bool_switch(&(sopt.splitSpanningSeeds))->default_value(false), "Attempt to split seeds that happen to fall on the "
-                                        "boundary between two transcripts.  This can improve the  fragment hit-rate, but is usually not necessary.")
-    ("useVBOpt", po::bool_switch(&(sopt.useVBOpt))->default_value(false), "Use the Variational Bayesian EM rather than the "
-     			"traditional EM algorithm for optimization in the batch passes.")
-    ("numGibbsSamples", po::value<uint32_t>(&(sopt.numGibbsSamples))->default_value(0), "Number of Gibbs sampling rounds to "
+int salmonQuantify(int argc, char* argv[]) {
+  using std::cerr;
+  using std::vector;
+  using std::string;
+  namespace bfs = boost::filesystem;
+  namespace po = boost::program_options;
+
+  bool optChain{false};
+  int32_t numBiasSamples{0};
+
+  SalmonOpts sopt;
+  mem_opt_t* memOptions = mem_opt_init();
+  memOptions->split_factor = 1.5;
+
+  sopt.numThreads = std::thread::hardware_concurrency();
+
+  double coverageThresh;
+  vector<string> unmatedReadFiles;
+  vector<string> mate1ReadFiles;
+  vector<string> mate2ReadFiles;
+
+  po::options_description generic("\n"
+                                  "basic options");
+  generic.add_options()("version,v", "print version string")
+    (
+      "help,h", "produce help message")
+    (
+      "index,i", po::value<string>()->required(),
+      "Salmon index")("libType,l", po::value<std::string>()->required(),
+                      "Format string describing the library type")
+    (
+      "unmatedReads,r",
+      po::value<vector<string>>(&unmatedReadFiles)->multitoken(),
+      "List of files containing unmated reads of (e.g. single-end reads)")(
+      "mates1,1", po::value<vector<string>>(&mate1ReadFiles)->multitoken(),
+      "File containing the #1 mates")
+    (
+      "mates2,2", po::value<vector<string>>(&mate2ReadFiles)->multitoken(),
+      "File containing the #2 mates")
+    (
+
+      "output,o", po::value<std::string>()->required(),
+      "Output quantification file.")
+    (
+      "allowOrphans",
+      po::bool_switch(&(sopt.allowOrphans))->default_value(false),
+      "Consider orphaned reads as valid hits when "
+      "performing lightweight-alignment.  This option will increase "
+      "sensitivity (allow more reads to map and "
+      "more transcripts to be detected), but may decrease specificity as "
+      "orphaned alignments are more likely "
+      "to be spurious -- this option is *always* set to true when using "
+      "quasi-mapping.")
+    (
+     "seqBias",
+     po::value(&(sopt.biasCorrect))->zero_tokens(),
+     "Perform sequence-specific bias correction.")
+    (
+      "gcBias", po::value(&(sopt.gcBiasCorrect))->zero_tokens(),
+      "[beta] Perform fragment GC bias correction")
+    (
+      "threads,p",
+      po::value<uint32_t>(&(sopt.numThreads))->default_value(sopt.numThreads),
+      "The number of threads to use concurrently.")
+    (
+      "incompatPrior",
+      po::value<double>(&(sopt.incompatPrior))->default_value(1e-20),
+      "This option "
+      "sets the prior probability that an alignment that disagrees with the "
+      "specified "
+      "library type (--libType) results from the true fragment origin.  "
+      "Setting this to 0 "
+      "specifies that alignments that disagree with the library type should be "
+      "\"impossible\", "
+      "while setting it to 1 says that alignments that disagree with the "
+      "library type are no "
+      "less likely than those that do")
+    (
+      "geneMap,g", po::value<string>(),
+      "File containing a mapping of transcripts to genes.  If this file is "
+      "provided "
+      "Salmon will output both quant.sf and quant.genes.sf files, where the "
+      "latter "
+      "contains aggregated gene-level abundance estimates.  The transcript to "
+      "gene mapping "
+      "should be provided as either a GTF file, or a in a simple tab-delimited "
+      "format "
+      "where each line contains the name of a transcript and the gene to which "
+      "it belongs "
+      "separated by a tab.  The extension of the file is used to determine how "
+      "the file "
+      "should be parsed.  Files ending in \'.gtf\' or \'.gff\' are assumed to "
+      "be in GTF "
+      "format; files with any other extension are assumed to be in the simple "
+      "format.");
+
+  sopt.noRichEqClasses = false;
+  // mapping cache has been deprecated
+  sopt.disableMappingCache = true;
+
+  po::options_description advanced("\n"
+                                   "advanced options");
+  advanced.add_options()
+      /*
+      ("disableMappingCache",
+      po::bool_switch(&(sopt.disableMappingCache))->default_value(false),
+      "Setting this option disables the creation and use "
+                                          "of the \"mapping cache\" file.  The
+      mapping cache can speed up quantification significantly for smaller read "
+                                          "libraries (i.e. where the number of
+      mapped fragments is less than the required number of observations).
+      However, "
+                                          "for very large read libraries, the
+      mapping cache is unnecessary, and disabling it may allow salmon to more
+      effectively "
+                                          "make use of a very large number of
+      threads.")
+      */
+    (
+     "auxDir", po::value<std::string>(&(sopt.auxDir))->default_value("aux_info"),
+     "The sub-directory of the quantification directory where auxiliary "
+     "information "
+     "e.g. bootstraps, bias parameters, etc. will be written.")
+    (
+     "consistentHits,c",
+     po::bool_switch(&(sopt.consistentHits))->default_value(false),
+     "Force hits gathered during "
+     "quasi-mapping to be \"consistent\" (i.e. co-linear and "
+     "approximately the right distance apart).")(
+						 "dumpEq", po::bool_switch(&(sopt.dumpEq))->default_value(false),
+						 "Dump the equivalence class counts "
+						 "that were computed during quasi-mapping")
+    (
+     "gcSizeSamp",
+     po::value<std::uint32_t>(&(sopt.gcSampFactor))->default_value(1),
+     "The value by which to down-sample transcripts when representing the "
+     "GC content.  Larger values will reduce memory usage, but may "
+     "decrease the fidelity of bias modeling results.")
+    (
+     "biasSpeedSamp",
+     po::value<std::uint32_t>(&(sopt.pdfSampFactor))->default_value(1),
+     "The value at which the fragment length PMF is down-sampled "
+     "when evaluating sequence-specific & GC fragment bias.  Larger values speed up effective "
+     "length correction, but may decrease the fidelity of bias modeling "
+     "results.")
+    (
+     "strictIntersect",
+     po::bool_switch(&(sopt.strictIntersect))->default_value(false),
+     "Modifies how orphans are "
+     "assigned.  When this flag is set, if the intersection of the "
+     "quasi-mappings for the left and right "
+     "is empty, then all mappings for the left and all mappings for the "
+     "right read are reported as orphaned "
+     "quasi-mappings")
+    (
+     "fldMax",
+     po::value<size_t>(&(sopt.fragLenDistMax))->default_value(1000),
+     "The maximum fragment length to consider when building the empirical "
+     "distribution")
+    (
+     "fldMean",
+     po::value<size_t>(&(sopt.fragLenDistPriorMean))->default_value(200),
+     "The mean used in the fragment length distribution prior")
+    (
+     "fldSD",
+     po::value<size_t>(&(sopt.fragLenDistPriorSD))->default_value(80),
+     "The standard deviation used in the fragment length distribution "
+     "prior")
+    (
+     "forgettingFactor,f",
+     po::value<double>(&(sopt.forgettingFactor))->default_value(0.65),
+     "The forgetting factor used "
+     "in the online learning schedule.  A smaller value results in "
+     "quicker learning, but higher variance "
+     "and may be unstable.  A larger value results in slower learning but "
+     "may be more stable.  Value should "
+     "be in the interval (0.5, 1.0].")
+    (
+     "maxOcc,m", 
+     po::value<int>(&(memOptions->max_occ))->default_value(200),
+     "(S)MEMs occuring more than this many times won't be considered.")
+    (
+     "initUniform", po::bool_switch(&(sopt.initUniform))->default_value(false),
+     "initialize the offline inference with uniform parameters, rather than seeding with online parameters.")
+    (
+     "maxReadOcc,w",
+     po::value<uint32_t>(&(sopt.maxReadOccs))->default_value(100),
+     "Reads \"mapping\" to more than this many places won't be "
+     "considered.")
+    (
+     "noEffectiveLengthCorrection",
+     po::bool_switch(&(sopt.noEffectiveLengthCorrection))
+     ->default_value(false),
+     "Disables "
+     "effective length correction when computing the "
+     "probability that a fragment was generated "
+     "from a transcript.  If this flag is passed in, the "
+     "fragment length distribution is not taken "
+     "into account when computing this probability.")
+    (
+     "noFragLengthDist",
+     po::bool_switch(&(sopt.noFragLengthDist))->default_value(false),
+     "[experimental] : "
+     "Don't consider concordance with the learned fragment length "
+     "distribution when trying to determine "
+     "the probability that a fragment has originated from a specified "
+     "location.  Normally, Fragments with "
+     "unlikely lengths will be assigned a smaller relative probability "
+     "than those with more likely "
+     "lengths.  When this flag is passed in, the observed fragment length "
+     "has no effect on that fragment's "
+     "a priori probability.")
+    (
+     "noBiasLengthThreshold",
+     po::bool_switch(&(sopt.noBiasLengthThreshold))->default_value(false),
+     "[experimental] : "
+     "If this option is enabled, then no (lower) threshold will be set on "
+     "how short bias correction can make effective lengths. This can increase the precision "
+     "of bias correction, but harm robustness.  The default correction applies a threshold")
+    (
+     "numBiasSamples",
+     po::value<int32_t>(&numBiasSamples)->default_value(2000000),
+     "Number of fragment mappings to use when learning the "
+     "sequence-specific bias model.")
+    (
+     "numAuxModelSamples",
+     po::value<uint32_t>(&(sopt.numBurninFrags))->default_value(5000000),
+     "The first <numAuxModelSamples> are used to train the "
+     "auxiliary model parameters (e.g. fragment length distribution, "
+     "bias, etc.).  After ther first <numAuxModelSamples> observations "
+     "the auxiliary model parameters will be assumed to have converged "
+     "and will be fixed.")
+    (
+     "numPreAuxModelSamples",
+     po::value<uint32_t>(&(sopt.numPreBurninFrags))
+     ->default_value(1000000),
+     "The first <numPreAuxModelSamples> will have their "
+     "assignment likelihoods and contributions to the transcript "
+     "abundances computed without applying any auxiliary models.  The "
+     "purpose "
+     "of ignoring the auxiliary models for the first "
+     "<numPreAuxModelSamples> observations is to avoid applying these "
+     "models before thier "
+     "parameters have been learned sufficiently well.")
+    (
+     "useVBOpt", po::bool_switch(&(sopt.useVBOpt))->default_value(false),
+     "Use the Variational Bayesian EM rather than the "
+     "traditional EM algorithm for optimization in the batch passes.")
+    (
+     "numGibbsSamples",
+     po::value<uint32_t>(&(sopt.numGibbsSamples))->default_value(0),
+     "Number of Gibbs sampling rounds to "
      "perform.")
-    ("numBootstraps", po::value<uint32_t>(&(sopt.numBootstraps))->default_value(0), "Number of bootstrap samples to generate. Note: "
-      "This is mutually exclusive with Gibbs sampling.");
-
-    po::options_description testing("\n"
-            "testing options");
-    testing.add_options()
-        ("noRichEqClasses", po::bool_switch(&(sopt.noRichEqClasses))->default_value(false),
-                        "[TESTING OPTION]: Disable \"rich\" equivalent classes.  If this flag is passed, then "
-                        "all information about the relative weights for each transcript in the "
-                        "label of an equivalence class will be ignored, and only the relative "
-                        "abundance and effective length of each transcript will be considered.")
-        ("noFragLenFactor", po::bool_switch(&(sopt.noFragLenFactor))->default_value(false),
-                        "[TESTING OPTION]: Disable the factor in the likelihood that takes into account the "
-                        "goodness-of-fit of an alignment with the empirical fragment length "
-                        "distribution");
-
-    po::options_description all("salmon quant options");
-    all.add(generic).add(advanced).add(testing);
-
-    po::options_description visible("salmon quant options");
-    visible.add(generic).add(advanced);
-
-    po::variables_map vm;
-    try {
-        auto orderedOptions = po::command_line_parser(argc,argv).
-            options(all).run();
-
-        po::store(orderedOptions, vm);
-
-        if ( vm.count("help") ) {
-            auto hstring = R"(
+    (
+     "numBootstraps",
+     po::value<uint32_t>(&(sopt.numBootstraps))->default_value(0),
+     "Number of bootstrap samples to generate. Note: "
+     "This is mutually exclusive with Gibbs sampling.")
+    (
+     "quiet,q", po::bool_switch(&(sopt.quiet))->default_value(false),
+     "Be quiet while doing quantification (don't write informative "
+     "output to the console unless something goes wrong).")
+    (
+     "perTranscriptPrior", po::bool_switch(&(sopt.perTranscriptPrior)), "The "
+     "prior (either the default or the argument provided via --vbPrior) will "
+     "be interpreted as a transcript-level prior (i.e. each transcript will "
+     "be given a prior read count of this value)")
+    (
+     "vbPrior", po::value<double>(&(sopt.vbPrior))->default_value(1e-3),
+     "The prior that will be used in the VBEM algorithm.  This is interpreted "
+     "as a per-nucleotide prior, unless the --perTranscriptPrior flag "
+     "is also given, in which case this is used as a transcript-level prior")
+    (
+     "writeUnmappedNames",
+     po::bool_switch(&(sopt.writeUnmappedNames))->default_value(false),
+     "Write the names of un-mapped reads to the file unmapped.txt in the auxiliary directory.");
+
+
+  po::options_description fmd("\noptions that apply to the old FMD index");
+  fmd.add_options()
+    (
+     "minLen,k",
+     po::value<int>(&(memOptions->min_seed_len))->default_value(19),
+     "(S)MEMs smaller than this size won't be considered.")
+    (
+     "sensitive", po::bool_switch(&(sopt.sensitive))->default_value(false),
+     "Setting this option enables the splitting of SMEMs that are larger "
+     "than 1.5 times the minimum seed length (minLen/k above).  This may "
+     "reveal high scoring chains of MEMs "
+     "that are masked by long SMEMs.  However, this option makes "
+     "lightweight-alignment a bit slower and is "
+     "usually not necessary if the reference is of reasonable quality.")
+    (
+     "extraSensitive",
+     po::bool_switch(&(sopt.extraSeedPass))->default_value(false),
+     "Setting this option enables an extra pass of \"seed\" search. "
+     "Enabling this option may improve sensitivity (the number of reads "
+     "having sufficient coverage), but will "
+     "typically slow down quantification by ~40%.  Consider enabling this "
+     "option if you find the mapping rate to "
+     "be significantly lower than expected.")
+    (
+     "coverage,c", po::value<double>(&coverageThresh)->default_value(0.70),
+     "required coverage of read by union of SMEMs to consider it a \"hit\".")
+    (
+     "splitWidth,s",
+     po::value<int>(&(memOptions->split_width))->default_value(0),
+     "If (S)MEM occurs fewer than this many times, search for smaller, "
+     "contained MEMs. "
+     "The default value will not split (S)MEMs, a higher value will "
+     "result in more MEMs being explore and, thus, will "
+     "result in increased running time.")
+    (
+     "splitSpanningSeeds,b",
+     po::bool_switch(&(sopt.splitSpanningSeeds))->default_value(false),
+     "Attempt to split seeds that happen to fall on the "
+     "boundary between two transcripts.  This can improve the  fragment "
+     "hit-rate, but is usually not necessary.");
+    
+  po::options_description hidden("\nhidden options");
+  hidden.add_options()
+    ("numGCBins", po::value<size_t>(&(sopt.numFragGCBins))->default_value(25),
+     "Number of bins to use when modeling fragment GC bias")
+    (
+     "conditionalGCBins", po::value<size_t>(&(sopt.numConditionalGCBins))->default_value(3),
+     "Number of different fragment GC models to learn based on read start/end context")
+    (
+     "numRequiredObs,n",
+     po::value(&(sopt.numRequiredFragments))->default_value(50000000),
+     "[Deprecated]: The minimum number of observations (mapped reads) "
+     "that must be observed before "
+     "the inference procedure will terminate.");
+
+  po::options_description testing("\n"
+                                  "testing options");
+  testing.add_options()
+    (
+     "posBias", po::value(&(sopt.posBiasCorrect))->zero_tokens(),
+     "[experimental] Perform positional bias correction")
+    (
+      "noRichEqClasses",
+      po::bool_switch(&(sopt.noRichEqClasses))->default_value(false),
+      "[TESTING OPTION]: Disable \"rich\" equivalent classes.  If this flag is "
+      "passed, then "
+      "all information about the relative weights for each transcript in the "
+      "label of an equivalence class will be ignored, and only the relative "
+      "abundance and effective length of each transcript will be considered.")(
+      "noFragLenFactor",
+      po::bool_switch(&(sopt.noFragLenFactor))->default_value(false),
+      "[TESTING OPTION]: Disable the factor in the likelihood that takes into "
+      "account the "
+      "goodness-of-fit of an alignment with the empirical fragment length "
+      "distribution")(
+      "rankEqClasses",
+      po::bool_switch(&(sopt.rankEqClasses))->default_value(false),
+      "[TESTING OPTION]: Keep separate equivalence classes for each distinct "
+      "ordering of transcripts in the label.");
+
+  po::options_description deprecated("\ndeprecated options about which to inform the user");
+  deprecated.add_options() (
+     "useFSPD", po::bool_switch(&(sopt.useFSPD))->default_value(false),
+     "[deprecated] : "
+     "Consider / model non-uniformity in the fragment start positions "
+     "across the transcript.");
+
+  po::options_description all("salmon quant options");
+  all.add(generic).add(advanced).add(testing).add(hidden).add(fmd).add(deprecated);
+
+  po::options_description visible("salmon quant options");
+  visible.add(generic).add(advanced);
+
+  po::variables_map vm;
+  try {
+    auto orderedOptions =
+        po::command_line_parser(argc, argv).options(all).run();
+
+    po::store(orderedOptions, vm);
+
+    if (vm.count("help")) {
+      auto hstring = R"(
 Quant
 ==========
 Perform streaming mapping-based estimation of
 transcript abundance from RNA-seq reads
 )";
-            std::cout << hstring << std::endl;
-            std::cout << visible << std::endl;
-            std::exit(1);
-        }
-
-        po::notify(vm);
-
+      std::cout << hstring << std::endl;
+      std::cout << visible << std::endl;
+      std::exit(0);
+    }
 
+    po::notify(vm);
+    
+    // If we're supposed to be quiet, set the global logger level to >= warn
+    if (sopt.quiet) {
+        spdlog::set_level(spdlog::level::warn); //Set global log level to info
+    }
 
-        std::stringstream commentStream;
-        commentStream << "# salmon (mapping-based) v" << salmon::version << "\n";
-        commentStream << "# [ program ] => salmon \n";
-        commentStream << "# [ command ] => quant \n";
-        for (auto& opt : orderedOptions.options) {
-            commentStream << "# [ " << opt.string_key << " ] => {";
-            for (auto& val : opt.value) {
-                commentStream << " " << val;
-            }
-            commentStream << " }\n";
-        }
-        std::string commentString = commentStream.str();
+    std::stringstream commentStream;
+    commentStream << "### salmon (mapping-based) v" << salmon::version << "\n";
+    commentStream << "### [ program ] => salmon \n";
+    commentStream << "### [ command ] => quant \n";
+    for (auto& opt : orderedOptions.options) {
+      commentStream << "### [ " << opt.string_key << " ] => {";
+      for (auto& val : opt.value) {
+        commentStream << " " << val;
+      }
+      commentStream << " }\n";
+    }
+    std::string commentString = commentStream.str();
+    if (!sopt.quiet) {
         fmt::print(stderr, "{}", commentString);
+    }
 
-        // TODO: Fix fragment start pos dist
-        // sopt.useFSPD = false;
-
-	// Set the atomic variable numBiasSamples from the local version
-	sopt.numBiasSamples.store(numBiasSamples);
-
-        // Get the time at the start of the run
-        std::time_t result = std::time(NULL);
-        std::string runStartTime(std::asctime(std::localtime(&result)));
-        runStartTime.pop_back(); // remove the newline
-
-        // Verify the geneMap before we start doing any real work.
-        bfs::path geneMapPath;
-        if (vm.count("geneMap")) {
-            // Make sure the provided file exists
-            geneMapPath = vm["geneMap"].as<std::string>();
-            if (!bfs::exists(geneMapPath)) {
-                std::cerr << "Could not find transcript <=> gene map file " << geneMapPath << "\n";
-                std::cerr << "Exiting now: please either omit the \'geneMap\' option or provide a valid file\n";
-                std::exit(1);
-            }
-        }
-
-        bool greedyChain = !optChain;
-        bfs::path outputDirectory(vm["output"].as<std::string>());
-        bfs::create_directories(outputDirectory);
-        if (!(bfs::exists(outputDirectory) and bfs::is_directory(outputDirectory))) {
-            std::cerr << "Couldn't create output directory " << outputDirectory << "\n";
-            std::cerr << "exiting\n";
-            std::exit(1);
-        }
+    // TODO: Fix fragment start pos dist
+    // sopt.useFSPD = false;
+    bool optionsOK =
+        salmon::utils::processQuantOptions(sopt, vm, numBiasSamples);
+    if (!optionsOK) {
+      std::exit(1);
+    }
+ 
+    auto fileLog = sopt.fileLog;
+    auto jointLog = sopt.jointLog;
+    auto indexDirectory = sopt.indexDirectory;
+    auto outputDirectory = sopt.outputDirectory;
+    bool greedyChain = true;
+    
+    // If the user is enabling *just* GC bias correction
+    // i.e. without seq-specific bias correction, then disable
+    // the conditional model.
+    if (sopt.gcBiasCorrect and !sopt.biasCorrect) {
+      sopt.numConditionalGCBins = 1;
+    }
 
-        bfs::path indexDirectory(vm["index"].as<string>());
-        bfs::path logDirectory = outputDirectory / "logs";
+    jointLog->info("parsing read library format");
 
-        sopt.indexDirectory = indexDirectory;
-        sopt.outputDirectory = outputDirectory;
+    vector<ReadLibrary> readLibraries =
+        salmon::utils::extractReadLibraries(orderedOptions);
+    
+    if (readLibraries.size() == 0) {
+        jointLog->error("Failed to successfully parse any complete read libraries."
+                        " Please make sure you provided arguments properly to -1, -2 (for paired-end libraries)"
+                        " or -r (for single-end libraries).");
+        std::exit(1);
+    }
 
-        // Create the logger and the logging directory
-        bfs::create_directories(logDirectory);
-        if (!(bfs::exists(logDirectory) and bfs::is_directory(logDirectory))) {
-            std::cerr << "Couldn't create log directory " << logDirectory << "\n";
-            std::cerr << "exiting\n";
-            std::exit(1);
-        }
-        std::cerr << "Logs will be written to " << logDirectory.string() << "\n";
-
-        bfs::path logPath = logDirectory / "salmon_quant.log";
-	    // must be a power-of-two
-        size_t max_q_size = 2097152;
-        spdlog::set_async_mode(max_q_size);
-
-        auto fileSink = std::make_shared<spdlog::sinks::simple_file_sink_mt>(logPath.string(), true);
-        auto consoleSink = std::make_shared<spdlog::sinks::stderr_sink_mt>();
-        auto consoleLog = spdlog::create("stderrLog", {consoleSink});
-        auto fileLog = spdlog::create("fileLog", {fileSink});
-        auto jointLog = spdlog::create("jointLog", {fileSink, consoleSink});
-
-        sopt.jointLog = jointLog;
-        sopt.fileLog = fileLog;
-
-        // Verify that no inconsistent options were provided
-        if (sopt.numGibbsSamples > 0 and sopt.numBootstraps > 0) {
-            jointLog->error("You cannot perform both Gibbs sampling and bootstrapping. "
-                            "Please choose one.");
-            jointLog->flush();
-            std::exit(1);
+    SalmonIndexVersionInfo versionInfo;
+    boost::filesystem::path versionPath = indexDirectory / "versionInfo.json";
+    versionInfo.load(versionPath);
+    auto idxType = versionInfo.indexType();
+
+    ReadExperiment experiment(readLibraries, indexDirectory, sopt);
+
+    // This will be the class in charge of maintaining our
+    // rich equivalence classes
+    experiment.equivalenceClassBuilder().start();
+
+    auto indexType = experiment.getIndex()->indexType();
+
+    switch (indexType) {
+    case SalmonIndexType::FMD: {
+      /** Currently no seq-specific bias correction with
+       *  FMD index.
+       */
+      if (sopt.biasCorrect or sopt.gcBiasCorrect) {
+        sopt.biasCorrect = false;
+        sopt.gcBiasCorrect = false;
+        jointLog->warn(
+            "Sequence-specific or fragment GC bias correction require "
+            "use of the quasi-index. Disabling all bias correction");
+      }
+      quantifyLibrary<SMEMAlignment>(experiment, greedyChain, memOptions, sopt,
+                                     coverageThresh, sopt.numThreads);
+    } break;
+    case SalmonIndexType::QUASI: {
+      // We can only do fragment GC bias correction, for the time being, with
+      // paired-end reads
+      if (sopt.gcBiasCorrect) {
+        for (auto& rl : readLibraries) {
+          if (rl.format().type != ReadType::PAIRED_END) {
+            jointLog->warn("Fragment GC bias correction is currently only "
+                           "implemented for paired-end libraries.  Disabling "
+                           "fragment GC bias correction for this run");
+            sopt.gcBiasCorrect = false;
+          }
         }
+      }
 
-        {
-            if (sopt.noFragLengthDist and !sopt.noEffectiveLengthCorrection) {
-                jointLog->info() << "Error: You cannot enable --noFragLengthDist without "
-                                 << "also enabling --noEffectiveLengthCorrection; exiting!\n";
-                jointLog->flush();
-                std::exit(1);
-            }
-        }
+      sopt.allowOrphans = true;
+      sopt.useQuasi = true;
+      quantifyLibrary<QuasiAlignment>(experiment, greedyChain, memOptions, sopt,
+                                      coverageThresh, sopt.numThreads);
+    } break;
+    }
 
-        // maybe arbitrary, but if it's smaller than this, consider it
-        // equal to LOG_0
-        if (sopt.incompatPrior < 1e-320) {
-            sopt.incompatPrior = salmon::math::LOG_0;
+    // Write out information about the command / run
+    {
+      bfs::path cmdInfoPath = outputDirectory / "cmd_info.json";
+      std::ofstream os(cmdInfoPath.string());
+      cereal::JSONOutputArchive oa(os);
+      oa(cereal::make_nvp("salmon_version", std::string(salmon::version)));
+      for (auto& opt : orderedOptions.options) {
+        if (opt.value.size() == 1) {
+          oa(cereal::make_nvp(opt.string_key, opt.value.front()));
         } else {
-            sopt.incompatPrior = std::log(sopt.incompatPrior);
-        }
-        // END: option checking
-
-        // Write out information about the command / run
-        {
-            bfs::path cmdInfoPath = outputDirectory / "cmd_info.json";
-            std::ofstream os(cmdInfoPath.string());
-            cereal::JSONOutputArchive oa(os);
-            oa(cereal::make_nvp("salmon_version", std::string(salmon::version)));
-            for (auto& opt : orderedOptions.options) {
-                if (opt.value.size() == 1) {
-                    oa(cereal::make_nvp(opt.string_key, opt.value.front()));
-                } else {
-                    oa(cereal::make_nvp(opt.string_key, opt.value));
-                }
-            }
+          oa(cereal::make_nvp(opt.string_key, opt.value));
         }
+      }
+      // explicitly ouput the aux directory as well
+      oa(cereal::make_nvp("auxDir", sopt.auxDir));
+    }
 
-        jointLog->info() << "parsing read library format";
+    GZipWriter gzw(outputDirectory, jointLog);
 
-        vector<ReadLibrary> readLibraries = salmon::utils::extractReadLibraries(orderedOptions);
+    // If we are dumping the equivalence classes, then
+    // do it here.
+    if (sopt.dumpEq) {
+      gzw.writeEquivCounts(sopt, experiment);
+    }
 
-        SalmonIndexVersionInfo versionInfo;
-        boost::filesystem::path versionPath = indexDirectory / "versionInfo.json";
-        versionInfo.load(versionPath);
-        versionInfo.indexType();
+    // Now that the streaming pass is complete, we have
+    // our initial estimates, and our rich equivalence
+    // classes.  Perform further optimization until
+    // convergence.
+    // NOTE: A side-effect of calling the optimizer is that
+    // the `EffectiveLength` field of each transcript is
+    // set to its final value.
+    CollapsedEMOptimizer optimizer;
+    jointLog->info("Starting optimizer");
+    salmon::utils::normalizeAlphas(sopt, experiment);
+    bool optSuccess = optimizer.optimize(experiment, sopt, 0.01, 10000);
+
+    if (!optSuccess) {
+      jointLog->error(
+          "The optimization algorithm failed. This is likely the result of "
+          "bad input (or a bug). If you cannot track down the cause, please "
+          "report this issue on GitHub.");
+      return 1;
+    }
+    jointLog->info("Finished optimizer");
 
-        ReadExperiment experiment(readLibraries, indexDirectory, sopt);
+    free(memOptions);
+    size_t tnum{0};
 
-        // Parameter validation
-        // If we're allowing orphans, make sure that the read libraries are paired-end.
-        // Otherwise, this option makes no sense.
-        /*
-        if (sopt.allowOrphans) {
-            for (auto& rl : readLibraries) {
-                if (!rl.isPairedEnd()) {
-                    jointLog->error("You cannot specify the --allowOrphans argument "
-                                    "for single-end libraries; exiting!");
-                    std::exit(1);
-                }
-            }
-        }
-        */
-        // end parameter validation
-
-
-        // This will be the class in charge of maintaining our
-    	// rich equivalence classes
-        experiment.equivalenceClassBuilder().start();
-
-        auto indexType = experiment.getIndex()->indexType();
-
-        switch (indexType) {
-            case SalmonIndexType::FMD:
-                {
-                    /** Currently no seq-specific bias correction with
-                     *  FMD index.
-                     */
-                    if (sopt.biasCorrect) {
-                        sopt.biasCorrect = false;
-                        jointLog->warn("Sequence-specific bias correction requires "
-                                "use of the quasi-index. Disabling bias correction");
-                    }
-                    quantifyLibrary<SMEMAlignment>(experiment, greedyChain, memOptions, sopt, coverageThresh,
-                            requiredObservations, sopt.numThreads);
-                }
-                break;
-            case SalmonIndexType::QUASI:
-                {
-                    sopt.allowOrphans = true;
-                    sopt.useQuasi = true;
-                     quantifyLibrary<QuasiAlignment>(experiment, greedyChain, memOptions, sopt, coverageThresh,
-                                                     requiredObservations, sopt.numThreads);
-                }
-                break;
-        }
+    jointLog->info("writing output \n");
+
+    bfs::path estFilePath = outputDirectory / "quant.sf";
+
+    // Write the main results
+    gzw.writeAbundances(sopt, experiment);
+    // Write meta-information about the run
+    gzw.writeMeta(sopt, experiment, sopt.runStartTime);
 
-        // Now that the streaming pass is complete, we have
-        // our initial estimates, and our rich equivalence
-        // classes.  Perform further optimization until
-        // convergence.
-        // NOTE: A side-effect of calling the optimizer is that
-        // the `EffectiveLength` field of each transcript is
-        // set to its final value.
-        CollapsedEMOptimizer optimizer;
-        jointLog->info("Starting optimizer");
-    	salmon::utils::normalizeAlphas(sopt, experiment);
-        bool optSuccess = optimizer.optimize(experiment, sopt, 0.01, 10000);
-
-	if (!optSuccess) {
-	  jointLog->error("The optimization algorithm failed. This is likely the result of "
-			  "bad input (or a bug). If you cannot track down the cause, please "
-			  "report this issue on GitHub.");
-	  return 1;
-	}
-        jointLog->info("Finished optimizer");
-
-        free(memOptions);
-        size_t tnum{0};
-
-        jointLog->info("writing output \n");
-
-        bfs::path estFilePath = outputDirectory / "quant.sf";
-
-        commentStream << "# [ mapping rate ] => { " << experiment.effectiveMappingRate() * 100.0 << "\% }\n";
-        commentString = commentStream.str();
-
-        GZipWriter gzw(outputDirectory, jointLog);
-        // Write the main results
-        gzw.writeAbundances(sopt, experiment);
-        // Write meta-information about the run
-        gzw.writeMeta(sopt, experiment, runStartTime);
-
-        if (sopt.numGibbsSamples > 0) {
-
-            jointLog->info("Starting Gibbs Sampler");
-            CollapsedGibbsSampler sampler;
-            // The function we'll use as a callback to write samples
-            std::function<bool(const std::vector<int>&)> bsWriter =
-                [&gzw](const std::vector<int>& alphas) -> bool {
-                    return gzw.writeBootstrap(alphas);
-                };
-
-            bool sampleSuccess = sampler.sample(experiment, sopt,
-                    bsWriter,
-                    sopt.numGibbsSamples);
-            if (!sampleSuccess) {
-                jointLog->error("Encountered error during Gibb sampling .\n"
+    if (sopt.numGibbsSamples > 0) {
+
+      jointLog->info("Starting Gibbs Sampler");
+      CollapsedGibbsSampler sampler;
+      // The function we'll use as a callback to write samples
+      std::function<bool(const std::vector<int>&)> bsWriter =
+          [&gzw](const std::vector<int>& alphas) -> bool {
+        return gzw.writeBootstrap(alphas);
+      };
+
+      bool sampleSuccess =
+          sampler.sample(experiment, sopt, bsWriter, sopt.numGibbsSamples);
+      if (!sampleSuccess) {
+        jointLog->error("Encountered error during Gibb sampling .\n"
                         "This should not happen.\n"
                         "Please file a bug report on GitHub.\n");
-                return 1;
-            }
-            jointLog->info("Finished Gibbs Sampler");
-        } else if (sopt.numBootstraps > 0) {
-            // The function we'll use as a callback to write samples
-            std::function<bool(const std::vector<double>&)> bsWriter =
-                [&gzw](const std::vector<double>& alphas) -> bool {
-                    return gzw.writeBootstrap(alphas);
-                };
-
-            jointLog->info("Staring Bootstrapping");
-            bool bootstrapSuccess = optimizer.gatherBootstraps(
-                    experiment, sopt,
-                    bsWriter, 0.01, 10000);
-            jointLog->info("Finished Bootstrapping");
-            if (!bootstrapSuccess) {
-                jointLog->error("Encountered error during bootstrapping.\n"
+        return 1;
+      }
+      jointLog->info("Finished Gibbs Sampler");
+    } else if (sopt.numBootstraps > 0) {
+      // The function we'll use as a callback to write samples
+      std::function<bool(const std::vector<double>&)> bsWriter =
+          [&gzw](const std::vector<double>& alphas) -> bool {
+        return gzw.writeBootstrap(alphas);
+      };
+
+      jointLog->info("Staring Bootstrapping");
+      bool bootstrapSuccess =
+          optimizer.gatherBootstraps(experiment, sopt, bsWriter, 0.01, 10000);
+      jointLog->info("Finished Bootstrapping");
+      if (!bootstrapSuccess) {
+        jointLog->error("Encountered error during bootstrapping.\n"
                         "This should not happen.\n"
                         "Please file a bug report on GitHub.\n");
-                return 1;
-            }
-        }
-
+        return 1;
+      }
+    }
 
-        // Now create a subdirectory for any parameters of interest
-        bfs::path paramsDir = outputDirectory / "libParams";
-        if (!boost::filesystem::exists(paramsDir)) {
-            if (!boost::filesystem::create_directories(paramsDir)) {
-                fmt::print(stderr, "{}ERROR{}: Could not create "
+    // Now create a subdirectory for any parameters of interest
+    bfs::path paramsDir = outputDirectory / "libParams";
+    if (!boost::filesystem::exists(paramsDir)) {
+      if (!boost::filesystem::create_directories(paramsDir)) {
+        fmt::print(stderr, "{}ERROR{}: Could not create "
                            "output directory for experimental parameter "
-                           "estimates [{}]. exiting.", ioutils::SET_RED,
-                           ioutils::RESET_COLOR, paramsDir);
-                std::exit(-1);
-            }
-        }
-
-        bfs::path libCountFilePath = outputDirectory / "libFormatCounts.txt";
-        experiment.summarizeLibraryTypeCounts(libCountFilePath);
-
-        // Test writing out the fragment length distribution
-        if (!sopt.noFragLengthDist) {
-            bfs::path distFileName = paramsDir / "flenDist.txt";
-            {
-                std::unique_ptr<std::FILE, int (*)(std::FILE *)> distOut(std::fopen(distFileName.c_str(), "w"), std::fclose);
-                fmt::print(distOut.get(), "{}\n", experiment.fragmentLengthDistribution()->toString());
-            }
-        }
+                           "estimates [{}]. exiting.",
+                   ioutils::SET_RED, ioutils::RESET_COLOR, paramsDir);
+        std::exit(-1);
+      }
+    }
 
-        /** If the user requested gene-level abundances, then compute those now **/
-        if (vm.count("geneMap")) {
-            try {
-                salmon::utils::generateGeneLevelEstimates(geneMapPath,
-                                                          outputDirectory);
-            } catch (std::invalid_argument& e) {
-                fmt::print(stderr, "Error: [{}] when trying to compute gene-level "\
-                                   "estimates. The gene-level file(s) may not exist",
-                                   e.what());
-            }
-        }
+    bfs::path libCountFilePath = outputDirectory / "lib_format_counts.json";
+    experiment.summarizeLibraryTypeCounts(libCountFilePath);
+
+    // Test writing out the fragment length distribution
+    if (!sopt.noFragLengthDist) {
+      bfs::path distFileName = paramsDir / "flenDist.txt";
+      {
+        std::unique_ptr<std::FILE, int (*)(std::FILE*)> distOut(
+            std::fopen(distFileName.c_str(), "w"), std::fclose);
+        fmt::print(distOut.get(), "{}\n",
+                   experiment.fragmentLengthDistribution()->toString());
+      }
+    }
 
-    } catch (po::error &e) {
-        std::cerr << "Exception : [" << e.what() << "]. Exiting.\n";
-        std::exit(1);
-    } catch (const spdlog::spdlog_ex& ex) {
-        std::cerr << "logger failed with : [" << ex.what() << "]. Exiting.\n";
-        std::exit(1);
-    } catch (std::exception& e) {
-        std::cerr << "Exception : [" << e.what() << "]\n";
-        std::cerr << argv[0] << " quant was invoked improperly.\n";
-        std::cerr << "For usage information, try " << argv[0] << " quant --help\nExiting.\n";
-        std::exit(1);
+    /** If the user requested gene-level abundances, then compute those now **/
+    if (vm.count("geneMap")) {
+      try {
+        salmon::utils::generateGeneLevelEstimates(sopt.geneMapPath,
+                                                  outputDirectory);
+      } catch (std::invalid_argument& e) {
+        fmt::print(stderr, "Error: [{}] when trying to compute gene-level "
+                           "estimates. The gene-level file(s) may not exist",
+                   e.what());
+      }
     }
 
+    if (sopt.writeUnmappedNames) {
+      auto l = spdlog::get("unmappedLog");
+      // If the logger was created, then flush it and
+      // close the associated file.
+      if (l) {
+	l->flush();
+	if (sopt.unmappedFile) { sopt.unmappedFile->close(); }
+      }
+    }
+  } catch (po::error& e) {
+    std::cerr << "Exception : [" << e.what() << "]. Exiting.\n";
+    std::exit(1);
+  } catch (const spdlog::spdlog_ex& ex) {
+    std::cerr << "logger failed with : [" << ex.what() << "]. Exiting.\n";
+    std::exit(1);
+  } catch (std::exception& e) {
+    std::cerr << "Exception : [" << e.what() << "]\n";
+    std::cerr << argv[0] << " quant was invoked improperly.\n";
+    std::cerr << "For usage information, try " << argv[0]
+              << " quant --help\nExiting.\n";
+    std::exit(1);
+  }
 
-    return 0;
+  return 0;
 }
diff --git a/src/SalmonQuantifyAlignments.cpp b/src/SalmonQuantifyAlignments.cpp
index dc5a5f4..6226974 100644
--- a/src/SalmonQuantifyAlignments.cpp
+++ b/src/SalmonQuantifyAlignments.cpp
@@ -5,7 +5,7 @@ extern "C" {
 }
 
 // for cpp-format
-#include "spdlog/details/format.h"
+#include "spdlog/fmt/fmt.h"
 
 // are these used?
 #include <boost/dynamic_bitset.hpp>
@@ -63,6 +63,7 @@ extern "C" {
 #include "CollapsedGibbsSampler.hpp"
 #include "GZipWriter.hpp"
 #include "TextBootstrapWriter.hpp"
+#include "BiasParams.hpp"
 
 namespace bfs = boost::filesystem;
 using salmon::math::LOG_0;
@@ -126,6 +127,7 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                       volatile bool& doneParsing,
                       std::atomic<size_t>& activeBatches,
                       SalmonOpts& salmonOpts,
+		      BiasParams& observedBiasParams,
                       std::atomic<bool>& burnedIn,
                       bool initialRound,
                       std::atomic<size_t>& processedReads) {
@@ -136,20 +138,31 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
 
     // Whether or not we are using "banking"
     bool useMassBanking = (!initialRound and salmonOpts.useMassBanking);
-    bool useReadCompat = salmonOpts.incompatPrior != salmon::math::LOG_0;
-
+    bool useReadCompat = salmonOpts.incompatPrior != salmon::math::LOG_1;
+    
     // Create a random uniform distribution
     std::default_random_engine eng(rd());
     std::uniform_real_distribution<> uni(0.0, 1.0 + std::numeric_limits<double>::min());
 
     //EQClass
     EquivalenceClassBuilder& eqBuilder = alnLib.equivalenceClassBuilder();
-    auto& readBias = alnLib.readBias();
+    auto& readBiasFW = observedBiasParams.seqBiasModelFW;
+    auto& readBiasRC = observedBiasParams.seqBiasModelRC;
+    auto& observedGCMass = observedBiasParams.observedGCMass;
+    auto& obsFwd = observedBiasParams.massFwd;
+    auto& obsRC = observedBiasParams.massRC;
+
+    bool gcBiasCorrect = salmonOpts.gcBiasCorrect;
 
     using salmon::math::LOG_0;
     using salmon::math::logAdd;
     using salmon::math::logSub;
 
+    // k-mers for sequence bias
+    Mer leftMer;
+    Mer rightMer;
+    Mer context;
+    
     auto& refs = alnLib.transcripts();
     auto& clusterForest = alnLib.clusterForest();
     auto& fragmentQueue = alnLib.fragmentQueue();
@@ -176,6 +189,8 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
     bool updateCounts = initialRound;
     size_t numTranscripts = refs.size();
 
+    double maxZeroFrac{0.0};
+
     while (!doneParsing or !workQueue.empty()) {
         uint32_t zeroProbFrags{0};
 
@@ -190,13 +205,17 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
             std::unique_lock<std::mutex> l(cvmutex);
             workAvailable.wait(l, [&miniBatch, &workQueue, &doneParsing]() { return workQueue.try_pop(miniBatch) or doneParsing; });
         }
+                 
+
+        uint64_t batchReads{0};
 
 	    // If we actually got some work
         if (miniBatch != nullptr) {
 
             useAuxParams = (processedReads > salmonOpts.numPreBurninFrags);
             ++activeBatches;
-            size_t batchReads{0};
+            batchReads = 0;
+            zeroProbFrags = 0;
 
             // double logForgettingMass = fmCalc();
             double logForgettingMass{0.0};
@@ -253,8 +272,9 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                             } else {
                             }
                             */
-                            if(aln->isPaired() and aln->fragLen() > 0) {
-                                logFragProb = fragLengthDist.pmf(static_cast<size_t>(aln->fragLen()));
+                            auto fragLen = aln->fragLengthPedantic(transcript.RefLength);
+                            if(aln->isPaired() and fragLen > 0) {
+                                logFragProb = fragLengthDist.pmf(static_cast<size_t>(fragLen));
                             }
                         }
 
@@ -278,6 +298,18 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
 
                         // The probability that the fragments align to the given strands in the
                         // given orientations.
+                        bool isCompat = 
+                            salmon::utils::isCompatible(
+                                  aln->libFormat(),
+                                  expectedLibraryFormat,
+                                  aln->pos(),
+                                  aln->fwd(), aln->mateStatus());
+                        double logAlignCompatProb = isCompat ? LOG_1 : salmonOpts.incompatPrior;
+                        if (!isCompat and salmonOpts.ignoreIncompat) {
+                            aln->logProb = salmon::math::LOG_0;
+                            continue;
+                        }
+                              /*
                         double logAlignCompatProb =
                             (useReadCompat) ?
                             (salmon::utils::logAlignFormatProb(
@@ -287,6 +319,13 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                                   aln->fwd(), aln->mateStatus(), salmonOpts.incompatPrior)
                             ) : LOG_1;
 
+                        if (logAlignCompatProb != salmon::math::LOG_1) {
+                            aln->logProb = salmon::math::LOG_0;
+                            std::cerr <<"here\n";
+                            continue;
+                        }
+                              */
+
                         // Adjustment to the likelihood due to the
                         // error model
                         double errLike = salmon::math::LOG_1;
@@ -353,9 +392,8 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
 
                     // If we have a 0-probability fragment
                     if (sumOfAlignProbs == LOG_0) {
-                        auto aln = alnGroup->alignments().front();
-                        log->warn("0 probability fragment [{}] "
-                                  "encountered \n", aln->getName());
+                        ++zeroProbFrags;
+                        ++batchReads;
                         continue;
                     }
 
@@ -387,27 +425,179 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                         transcript.addMass(newMass);
                         transcript.setLastTimestepUpdated(currentMinibatchTimestep);
 
-                        /**
-                         * Update the auxiliary models.
-                         **/
-                        double r = uni(eng);
-                        if (!burnedIn and r < std::exp(aln->logProb)) {
-                            /**
-                             * Update the bias sequence-specific bias model
-                             **/
-                            if (needBiasSample and salmonOpts.numBiasSamples > 0) {
-                                // the "start" position is the leftmost position if
-                                // we hit the forward strand, and the leftmost
-                                // position + the read length if we hit the reverse complement
-                                bam_seq_t* r = aln->get5PrimeRead();
-                                if (r) {
-                                    bool fwd{bam_strand(r) == 0};
-                                    int32_t pos{bam_pos(r)};
-                                    int32_t startPos = fwd ? pos : pos + bam_seq_len(r);
-                                    auto dir = salmon::utils::boolToDirection(fwd);
+                        // ---- Collect seq-specific bias samples ------ //
+                        auto getCIGARLength = [](bam_seq_t* s) -> uint32_t {
+                            auto cl = bam_cigar_len(s);
+                            uint32_t k, end;
+                            end = 0;//bam_pos(s);
+                            uint32_t* cigar = bam_cigar(s);
+                            for (k = 0; k < cl; ++k) {
+                                int op = cigar[k] & BAM_CIGAR_MASK;
+                                if (BAM_CONSUME_SEQ(op)) {
+                                    end += cigar[k] >> BAM_CIGAR_SHIFT;
+                                }
+                            }
+                            return end;
+                        };
+       
+                        bool success = false;
+                        if(needBiasSample and salmonOpts.numBiasSamples > 0) {
+			  const char* txpStart = transcript.Sequence();
+			  const char* txpEnd = txpStart + transcript.RefLength;
+			    if (aln->isPaired()){
+				ReadPair* alnp = reinterpret_cast<ReadPair*>(aln);
+                                bam_seq_t* r1 = alnp->read1; 
+                                bam_seq_t* r2 = alnp->read2; 
+                                if (r1 != nullptr and r2 != nullptr) {
+                                    int32_t pos1 = bam_pos(r1);
+                                    bool fwd1{bam_strand(r1) == 0};
+                                    int32_t startPos1 = fwd1 ? pos1 : pos1 + getCIGARLength(r1) - 1;
+
+                                    int32_t pos2 = bam_pos(r2);
+                                    bool fwd2{bam_strand(r2) == 0};
+                                    int32_t startPos2 = fwd2 ? pos2 : pos2 + getCIGARLength(r2) - 1;
+
+				    // Shouldn't be from the same strand and they should be in the right order
+				    if ((fwd1 != fwd2) and // Shouldn't be from the same strand
+					(startPos1 > 0 and startPos1 < transcript.RefLength) and 
+					(startPos2 > 0 and startPos2 < transcript.RefLength)) { 
+
+				      const char* readStart1 = txpStart + startPos1;
+				      auto& readBias1 = (fwd1) ? readBiasFW : readBiasRC;
+
+				      const char* readStart2 = txpStart + startPos2;
+				      auto& readBias2 = (fwd2) ? readBiasFW : readBiasRC;
+		
+				      int32_t fwPre = readBias1.contextBefore(!fwd1);
+				      int32_t fwPost = readBias1.contextAfter(!fwd1);
+
+				      int32_t rcPre = readBias2.contextBefore(!fwd2);
+				      int32_t rcPost = readBias2.contextAfter(!fwd2); 
+
+				      bool read1RC = !fwd1;
+				      bool read2RC = !fwd2;
+
+				      if ( (startPos1 >= readBias1.contextBefore(read1RC) and 
+					    startPos1 + readBias1.contextAfter(read1RC ) < transcript.RefLength) 
+					   and
+					   (startPos2 >= readBias2.contextBefore(read2RC) and
+					    startPos2 + readBias2.contextAfter(read2RC) < transcript.RefLength) ) {
+		    
+					int32_t fwPos = (fwd1) ? startPos1 : startPos2; 
+					int32_t rcPos = (fwd1) ? startPos2 : startPos1;
+					if (fwPos < rcPos) {
+					  leftMer.from_chars(txpStart + startPos1 - readBias1.contextBefore(read1RC));
+					  rightMer.from_chars(txpStart + startPos2 - readBias2.contextBefore(read2RC));
+					  if (read1RC) { leftMer.reverse_complement(); } else { rightMer.reverse_complement(); }
+
+					  success = readBias1.addSequence(leftMer, 1.0);
+					  success = readBias2.addSequence(rightMer, 1.0);
+					}
+				      }
+                                    }
+                                }
+                            } else {  // unpaired read
+                                UnpairedRead* alnp = reinterpret_cast<UnpairedRead*>(aln);
+                                bam_seq_t* r1 = alnp->read; 
+                                if (r1 != nullptr) { 
+                                    int32_t pos1 = bam_pos(r1);
+                                    bool fwd1{bam_strand(r1) == 0};
+                                    int32_t startPos1 = fwd1 ? pos1 : pos1 + getCIGARLength(r1) - 1;
+
+                                    if (startPos1 > 0 and startPos1 < transcript.RefLength) {
+
+                                        const char* txpStart = transcript.Sequence();
+                                        const char* txpEnd = txpStart + transcript.RefLength;
+
+                                        const char* readStart1 = txpStart + startPos1;
+                                        auto& readBias1 = (fwd1) ? readBiasFW : readBiasRC;
+
+                                        if (startPos1 >= readBias1.contextBefore(!fwd1) and 
+                                            startPos1 + readBias1.contextAfter(!fwd1) < transcript.RefLength) {
+                                            context.from_chars(txpStart + startPos1 - readBias1.contextBefore(!fwd1));
+                                            if (!fwd1) { context.reverse_complement(); } 
+                                            success = readBias1.addSequence(context, 1.0);
+                                        }
+                                    }
+
+                                }
+                            } // end unpaired read
+                            if (success) {
+                                salmonOpts.numBiasSamples -= 1;
+                                needBiasSample = false;
+                            }
+                        }
+                        // ---- Collect seq-specific bias samples ------ //
+
+
+			/**
+			 * Update the auxiliary models.
+			 **/
+			// Paired-end
+			if (aln->isPaired()) {
+			  // TODO: Is this right for *all* library types?
+			  if (aln->fwd()) {
+			    obsFwd = salmon::math::logAdd(obsFwd, aln->logProb);
+			  } else {
+			    obsRC = salmon::math::logAdd(obsRC, aln->logProb);
+			  }
+			} else if (aln->libFormat().type == ReadType::SINGLE_END) {
+			  // Single-end or orphan
+			  if (aln->libFormat().strandedness == ReadStrandedness::S) {
+			    obsFwd = salmon::math::logAdd(obsFwd, aln->logProb);
+			  } else {
+			    obsRC = salmon::math::logAdd(obsRC, aln->logProb);
+			  }
+			}
+
+			// Collect the GC-fragment bias samples
+			if (gcBiasCorrect and aln->isPaired()) {
+			  ReadPair* alnp = reinterpret_cast<ReadPair*>(aln);
+			  bam_seq_t* r1 = alnp->read1; 
+			  bam_seq_t* r2 = alnp->read2; 
+			  if (r1 != nullptr and r2 != nullptr) {
+                  bool fwd1{bam_strand(r1) == 0};
+                  bool fwd2{bam_strand(r2) == 0};
+                  int32_t start = alnp->left(); 
+                  int32_t stop = alnp->right(); 
+
+                  if (start >= 0 and stop < transcript.RefLength) {
+		      auto desc = transcript.gcDesc(start, stop);
+                      observedGCMass.inc(desc, aln->logProb);
+                   }
+
+          /*
+			    if (start >= 0 and stop < transcript.RefLength) {
+			      int32_t gcFrac = transcript.gcFrac(start, stop);
+			      // Add this fragment's contribution
+			      observedGCMass[gcFrac] = salmon::math::logAdd(observedGCMass[gcFrac], newMass); 
+			    }
+          */
+			  }
+			}
+			// END: GC-fragment bias
+			
+			double r = uni(eng);
+			if (!burnedIn and r < std::exp(aln->logProb)) {
+			    /**
+			     * Update the bias sequence-specific bias model
+			     **/
+
+			    /*
+			    if (needBiasSample and salmonOpts.numBiasSamples > 0 and isPaired) {
+				// the "start" position is the leftmost position if
+				// we hit the forward strand, and the leftmost
+				// position + the read length if we hit the reverse complement
+				bam_seq_t* r = aln->get5PrimeRead();
+				if (r) {
+				    bool fwd{bam_strand(r) == 0};
+				    int32_t pos{bam_pos(r)};
+				    int32_t startPos = fwd ? pos : pos + bam_seq_len(r);
+				    auto dir = salmon::utils::boolToDirection(fwd);
 
                                     if (startPos > 0 and startPos < transcript.RefLength) {
-                                        const char* txpStart = transcript.Sequence;
+                                        auto& readBias = (fwd) ? readBiasFW : readBiasRC;
+                                        const char* txpStart = transcript.Sequence();
                                         const char* readStart = txpStart + startPos;
                                         const char* txpEnd = txpStart + transcript.RefLength;
                                         bool success = readBias.update(txpStart, readStart, txpEnd, dir);
@@ -418,6 +608,8 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                                     }
                                 }
                             }
+                            */
+			  
 
                             // Update the error model
                             if (salmonOpts.useErrorModel) {
@@ -425,8 +617,10 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                             }
                             // Update the fragment length distribution
                             if (aln->isPaired() and !salmonOpts.noFragLengthDist) {
-                                double fragLength = aln->fragLen();
-                                fragLengthDist.addVal(fragLength, logForgettingMass);
+                                double fragLength = aln->fragLengthPedantic(transcript.RefLength);
+                                if (fragLength > 0) {
+                                    fragLengthDist.addVal(fragLength, logForgettingMass);
+                                }
                             }
                             // Update the fragment start position distribution
                             if (useFSPD) {
@@ -527,9 +721,20 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                 // thread will set burnedIn to true
                 alnLib.updateTranscriptLengthsAtomic(burnedIn);
             }
+
+            if (zeroProbFrags > 0) {
+                maxZeroFrac = std::max(maxZeroFrac, static_cast<double>(100.0 * zeroProbFrags) / batchReads);
+            }
         }
+
         miniBatch = nullptr;
     } // nothing left to process
+
+    if (maxZeroFrac > 0.0) {
+        log->info("Thread saw mini-batch with a maximum of {0:.2f}\% zero probability fragments", 
+                  maxZeroFrac);
+    }
+
 }
 
 /**
@@ -571,6 +776,7 @@ bool quantifyLibrary(
     bool initialRound{true};
     bool haveCache{false};
     bool doReset{true};
+    bool gcBiasCorrect{salmonOpts.gcBiasCorrect};
     size_t maxCacheSize{salmonOpts.mappingCacheMemoryLimit};
 
     NullFragmentFilter<FragT>* nff = nullptr;
@@ -630,7 +836,12 @@ bool quantifyLibrary(
             firstTimestepOfRound -= 1;
         }
 
-        for (uint32_t i = 0; i < currentQuantThreads; ++i) {
+	/** sequence-specific and GC-fragment bias vectors --- each thread gets it's own **/
+	std::vector<BiasParams> observedBiasParams(currentQuantThreads,
+						   BiasParams(salmonOpts.numConditionalGCBins, salmonOpts.numFragGCBins, false));
+
+
+	for (uint32_t i = 0; i < currentQuantThreads; ++i) {
             workers.emplace_back(processMiniBatch<FragT>,
                     std::ref(alnLib),
                     std::ref(fmCalc),
@@ -640,6 +851,7 @@ bool quantifyLibrary(
                     std::ref(workAvailable), std::ref(cvmutex),
                     std::ref(doneParsing), std::ref(activeBatches),
                     std::ref(salmonOpts),
+		    std::ref(observedBiasParams[i]),
                     std::ref(burnedIn),
                     initialRound,
                     std::ref(totalProcessedReads));
@@ -724,6 +936,48 @@ bool quantifyLibrary(
 
         numObservedFragments += alnLib.numMappedFragments();
 
+	/**
+	 *
+	 * Aggregate thread-local bias parameters 
+	 *
+	 **/
+            // Set the global distribution based on the sum of local
+            // distributions.
+            double gcFracFwd{0.0};
+            double globalMass{salmon::math::LOG_0};
+            double globalFwdMass{salmon::math::LOG_0};
+            auto& globalGCMass = alnLib.observedGC();
+            for (auto& gcp : observedBiasParams) {
+                auto& gcm = gcp.observedGCMass;
+                globalGCMass.combineCounts(gcm);
+                
+                auto& fw = alnLib.readBiasModelObserved(salmon::utils::Direction::FORWARD);
+                auto& rc = alnLib.readBiasModelObserved(salmon::utils::Direction::REVERSE_COMPLEMENT);
+                
+                auto& fwloc = gcp.seqBiasModelFW;
+                auto& rcloc = gcp.seqBiasModelRC;
+		fw.combineCounts(fwloc);
+		rc.combineCounts(rcloc);
+
+                globalMass = salmon::math::logAdd(globalMass, gcp.massFwd);
+                globalMass = salmon::math::logAdd(globalMass, gcp.massRC);
+                globalFwdMass = salmon::math::logAdd(globalFwdMass, gcp.massFwd);
+
+	    }
+            globalGCMass.normalize();
+
+	    if (globalMass != salmon::math::LOG_0) {
+		if (globalFwdMass != salmon::math::LOG_0) {
+                  gcFracFwd = std::exp(globalFwdMass - globalMass);
+                }
+                alnLib.setGCFracForward(gcFracFwd);
+            }
+
+           /** END: aggregate thread-local bias parameters **/
+
+
+
+	
         fmt::print(stderr, "# observed = {} / # required = {}\033[A\033[A\033[A\033[A\033[A",
                    numObservedFragments, numRequiredFragments);
 
@@ -775,7 +1029,6 @@ bool quantifyLibrary(
 	}
     }
 
-
     // In this case, we have to give the structures held
     // in the cache back to the appropriate queues
     if (haveCache) {
@@ -902,6 +1155,7 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
 
     uint32_t numThreads{4};
     size_t requiredObservations{50000000};
+    int32_t numBiasSamples{0};
 
     po::options_description basic("\nbasic options");
     basic.add_options()
@@ -915,7 +1169,8 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                                             "so until there is a faster multi-threaded SAM/BAM parser to feed the "
                                             "quantification threads, one should not expect much of a speed-up beyond "
                                             "~6 threads.")
-    ("biasCorrect", po::value(&(sopt.biasCorrect))->zero_tokens(), "Perform sequence-specific bias correction.")
+    ("seqBias", po::value(&(sopt.biasCorrect))->zero_tokens(), "Perform sequence-specific bias correction.")
+    ("gcBias", po::value(&(sopt.gcBiasCorrect))->zero_tokens(), "[experimental] Perform fragment GC bias correction")
     ("incompatPrior", po::value<double>(&(sopt.incompatPrior))->default_value(1e-20), "This option "
                         "sets the prior probability that an alignment that disagrees with the specified "
                         "library type (--libType) results from the true fragment origin.  Setting this to 0 "
@@ -927,10 +1182,6 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                         "the observed frequency of different types of mismatches when computing the likelihood of "
                         "a given alignment.")
     ("output,o", po::value<std::string>()->required(), "Output quantification directory.")
-    ("numRequiredObs,n", po::value(&requiredObservations)->default_value(50000000),
-                                        "[Deprecated]: The minimum number of observations (mapped reads) that must be observed before "
-                                        "the inference procedure will terminate.  If fewer mapped reads exist in the "
-                                        "input file, then it will be read through multiple times.")
     ("geneMap,g", po::value<std::string>(), "File containing a mapping of transcripts to genes.  If this file is provided "
                                         "Salmon will output both quant.sf and quant.genes.sf files, where the latter "
                                         "contains aggregated gene-level abundance estimates.  The transcript to gene mapping "
@@ -947,6 +1198,12 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
 
     po::options_description advanced("\nadvanced options");
     advanced.add_options()
+    ("auxDir", po::value<std::string>(&(sopt.auxDir))->default_value("aux"), "The sub-directory of the quantification directory where auxiliary information "
+     			"e.g. bootstraps, bias parameters, etc. will be written.")
+    ("noBiasLengthThreshold", po::bool_switch(&(sopt.noBiasLengthThreshold))->default_value(false), "[experimental] : "
+          "If this option is enabled, then no (lower) threshold will be set on "
+          "how short bias correction can make effective lengths. This can increase the precision "
+          "of bias correction, but harm robustness.  The default correction applies a threshold")
     ("fldMax" , po::value<size_t>(&(sopt.fragLenDistMax))->default_value(800), "The maximum fragment length to consider when building the empirical distribution")
     ("fldMean", po::value<size_t>(&(sopt.fragLenDistPriorMean))->default_value(200), "The mean used in the fragment length distribution prior")
     ("fldSD" , po::value<size_t>(&(sopt.fragLenDistPriorSD))->default_value(80), "The standard deviation used in the fragment length distribution prior")
@@ -954,6 +1211,14 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                         "in the online learning schedule.  A smaller value results in quicker learning, but higher variance "
                         "and may be unstable.  A larger value results in slower learning but may be more stable.  Value should "
                         "be in the interval (0.5, 1.0].")
+    ("gcSizeSamp", po::value<std::uint32_t>(&(sopt.gcSampFactor))->default_value(1), "The value by which to down-sample transcripts when representing the "
+         "GC content.  Larger values will reduce memory usage, but may decrease the fidelity of bias modeling results.")
+   ("biasSpeedSamp",
+          po::value<std::uint32_t>(&(sopt.pdfSampFactor))->default_value(1),
+          "The value at which the fragment length PMF is down-sampled "
+          "when evaluating sequence-specific & GC fragment bias.  Larger values speed up effective "
+          "length correction, but may decrease the fidelity of bias modeling "
+          "results.")
     ("mappingCacheMemoryLimit", po::value<uint32_t>(&(sopt.mappingCacheMemoryLimit))->default_value(2000000), "If the file contained fewer than this "
                                         "many mapped reads, then just keep the data in memory for subsequent rounds of inference. Obviously, this value should "
                                         "not be too large if you wish to keep a low memory usage, but setting it large enough to accommodate all of the mapped "
@@ -969,9 +1234,16 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                          "unlikely lengths will be assigned a smaller relative probability than those with more likely "
                         "lengths.  When this flag is passed in, the observed fragment length has no effect on that fragment's "
                         "a priori probability.")
-    ("useFSPD", po::bool_switch(&(sopt.useFSPD))->default_value(false), "[experimental] : "
-                        "Consider / model non-uniformity in the fragment start positions "
-                        "across the transcript.")
+    ("useVBOpt,v", po::bool_switch(&(sopt.useVBOpt))->default_value(false), "Use the Variational Bayesian EM rather than the "
+                           "traditional EM algorithm for optimization in the batch passes.")
+    ("perTranscriptPrior", po::bool_switch(&(sopt.perTranscriptPrior)), "The "
+    "prior (either the default or the argument provided via --vbPrior) will "
+    "be interpreted as a transcript-level prior (i.e. each transcript will "
+    "be given a prior read count of this value)")
+    ("vbPrior", po::value<double>(&(sopt.vbPrior))->default_value(1e-3),
+    "The prior that will be used in the VBEM algorithm.  This is interpreted "
+    "as a per-nucleotide prior, unless the --perTranscriptPrior flag "
+    "is also given, in which case this is used as a transcript-level prior")
     /*
     // Don't expose this yet
     ("noRichEqClasses", po::bool_switch(&(sopt.noRichEqClasses))->default_value(false),
@@ -980,11 +1252,16 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                         "label of an equivalence class will be ignored, and only the relative "
                         "abundance and effective length of each transcript will be considered.")
                         */
+    ("noBiasLengthThreshold", po::bool_switch(&(sopt.noBiasLengthThreshold))->default_value(false), "[experimental] : "
+                        "If this option is enabled, then bias correction will be allowed to estimate effective lengths "
+                        "shorter than the approximate mean fragment length")
     ("numErrorBins", po::value<uint32_t>(&(sopt.numErrorBins))->default_value(6), "The number of bins into which to divide "
                         "each read when learning and applying the error model.  For example, a value of 10 would mean that "
                         "effectively, a separate error model is leared and applied to each 10th of the read, while a value of "
                         "3 would mean that a separate error model is applied to the read beginning (first third), middle (second third) "
                         "and end (final third).")
+    ("numBiasSamples", po::value<int32_t>(&numBiasSamples)->default_value(2000000),
+            "Number of fragment mappings to use when learning the sequence-specific bias model.")
     ("numPreAuxModelSamples", po::value<uint32_t>(&(sopt.numPreBurninFrags))->default_value(1000000), "The first <numPreAuxModelSamples> will have their "
      			"assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models.  The purpose "
 			"of ignoring the auxiliary models for the first <numPreAuxModelSamples> observations is to avoid applying these models before thier "
@@ -1016,9 +1293,28 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                         "goodness-of-fit of an alignment with the empirical fragment length "
                         "distribution");
 
+    po::options_description hidden("\nhidden options");
+    hidden.add_options()
+      (
+       "numGCBins", po::value<size_t>(&(sopt.numFragGCBins))->default_value(100),
+       "Number of bins to use when modeling fragment GC bias")
+      (
+       "conditionalGCBins", po::value<size_t>(&(sopt.numConditionalGCBins))->default_value(3),
+       "Number of different fragment GC models to learn based on read start/end context")
+      (
+       "numRequiredObs,n", po::value(&requiredObservations)->default_value(50000000),
+       "[Deprecated]: The minimum number of observations (mapped reads) that must be observed before "
+       "the inference procedure will terminate.  If fewer mapped reads exist in the "
+       "input file, then it will be read through multiple times.");
+    
+    po::options_description deprecated("\ndeprecated options about which to inform the user");
+    deprecated.add_options()
+    ("useFSPD", po::bool_switch(&(sopt.useFSPD))->default_value(false), "[experimental] : "
+                        "Consider / model non-uniformity in the fragment start positions "
+     "across the transcript.");
 
     po::options_description all("salmon quant options");
-    all.add(basic).add(advanced).add(testing);
+    all.add(basic).add(advanced).add(testing).add(hidden).add(deprecated);
 
     po::options_description visible("salmon quant options");
     visible.add(basic).add(advanced);
@@ -1070,6 +1366,10 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
         // TODO: Fix fragment start pos dist
         // sopt.useFSPD = false;
 
+        // Set the atomic variable numBiasSamples from the local version
+        sopt.numBiasSamples.store(numBiasSamples);
+	sopt.noBiasLengthThreshold = !sopt.useBiasLengthThreshold;
+	
         // Get the time at the start of the run
         std::time_t result = std::time(NULL);
         std::string runStartTime(std::asctime(std::localtime(&result)));
@@ -1132,6 +1432,9 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                 throw std::invalid_argument(errstr.str());
             }
         }
+        // set the output directory
+        sopt.outputDirectory = outputDirectory;
+
 
         bfs::path logDirectory = outputDirectory / "logs";
 
@@ -1142,7 +1445,9 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
             std::cerr << "exiting\n";
             std::exit(1);
         }
-        std::cerr << "Logs will be written to " << logDirectory.string() << "\n";
+	if (!sopt.quiet) {
+	  std::cout << "Logs will be written to " << logDirectory.string() << "\n";
+	}
 
         bfs::path logPath = logDirectory / "salmon.log";
         size_t max_q_size = 2097152;
@@ -1157,7 +1462,15 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
         sopt.jointLog = jointLog;
         sopt.fileLog = fileLog;
 
-        // Verify that no inconsistent options were provided
+	// If the user is enabling *just* GC bias correction
+	// i.e. without seq-specific bias correction, then disable
+	// the conditional model.
+	if (sopt.gcBiasCorrect and !sopt.biasCorrect) {
+	  sopt.numConditionalGCBins = 1;
+	}
+
+	
+	// Verify that no inconsistent options were provided
         if (sopt.numGibbsSamples > 0 and sopt.numBootstraps > 0) {
             jointLog->error("You cannot perform both Gibbs sampling and bootstrapping. "
                             "Please choose one.");
@@ -1169,15 +1482,18 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
             fmt::MemoryWriter wstr;
             wstr << "WARNING: you passed in the (-u/--sampleUnaligned) flag, but did not request a sampled "
                  << "output file (-s/--sampleOut).  This flag will be ignored!\n";
-            jointLog->warn() << wstr.str();
+            jointLog->warn(wstr.str());
         }
 
         // maybe arbitrary, but if it's smaller than this, consider it
         // equal to LOG_0
-        if (sopt.incompatPrior < 1e-320) {
+        if (sopt.incompatPrior < 1e-320 or sopt.incompatPrior == 0.0) {
+            jointLog->info("Fragment incompatibility prior below threshold.  Incompatible fragments will be ignored.");
             sopt.incompatPrior = salmon::math::LOG_0;
+            sopt.ignoreIncompat = true;
         } else {
             sopt.incompatPrior = std::log(sopt.incompatPrior);
+            sopt.ignoreIncompat = false;
         }
 
         // Now create a subdirectory for any parameters of interest
@@ -1206,7 +1522,9 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                     oa(cereal::make_nvp(opt.string_key, opt.value));
                 }
             }
-        }
+	    // explicitly ouput the aux directory as well
+	    oa(cereal::make_nvp("auxDir", sopt.auxDir));
+	}
 
         // The transcript file contains the target sequences
         bfs::path transcriptFile(vm["targets"].as<std::string>());
@@ -1229,10 +1547,18 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
         switch (libFmt.type) {
             case ReadType::SINGLE_END:
                 {
-                    AlignmentLibrary<UnpairedRead> alnLib(alignmentFiles,
-                                                          transcriptFile,
-                                                          libFmt,
-                                                          sopt);
+		  // We can only do fragment GC bias correction, for the time being, with paired-end reads
+		  if (sopt.gcBiasCorrect) {
+		    jointLog->warn("Fragment GC bias correction is currently only "
+				   "implemented for paired-end libraries.  Disabling "
+				   "fragment GC bias correction for this run");
+		    sopt.gcBiasCorrect = false;
+		  } 
+
+		    AlignmentLibrary<UnpairedRead> alnLib(alignmentFiles,
+							  transcriptFile,
+							  libFmt,
+							  sopt);
 
                     success = processSample<UnpairedRead>(alnLib, runStartTime,
                                                           requiredObservations, sopt,
diff --git a/src/SalmonUtils.cpp b/src/SalmonUtils.cpp
index 2fe3f76..d81cba2 100644
--- a/src/SalmonUtils.cpp
+++ b/src/SalmonUtils.cpp
@@ -1,561 +1,624 @@
-#include <boost/thread/thread.hpp>
-#include <boost/filesystem.hpp>
 #include <algorithm>
+#include <boost/filesystem.hpp>
+#include <boost/filesystem.hpp>
+#include <boost/range/join.hpp>
+#include <boost/thread/thread.hpp>
+#include <fstream>
 #include <iostream>
+#include <random>
 #include <tuple>
-#include <unordered_set>
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
-#include <random>
-#include <boost/filesystem.hpp>
-#include <boost/range/join.hpp>
 
-#include "SalmonUtils.hpp"
+#include "tbb/combinable.h"
+#include "tbb/parallel_for.h"
+
 #include "AlignmentLibrary.hpp"
-#include "ReadPair.hpp"
-#include "UnpairedRead.hpp"
-#include "SalmonMath.hpp"
+#include "DistributionUtils.hpp"
+#include "GCFragModel.hpp"
+#include "KmerContext.hpp"
 #include "LibraryFormat.hpp"
 #include "ReadExperiment.hpp"
+#include "ReadPair.hpp"
+#include "SBModel.hpp"
+#include "SalmonMath.hpp"
+#include "SalmonUtils.hpp"
+#include "UnpairedRead.hpp"
+#include "TryableSpinLock.hpp"
 
+#include "spdlog/fmt/ostr.h"
+#include "spdlog/fmt/fmt.h"
+#include "spdlog/sinks/ostream_sink.h"
 #include "spdlog/spdlog.h"
 
 #include "gff.h"
 
+#include "jellyfish/mer_dna.hpp"
 #include "jellyfish/stream_manager.hpp"
 #include "jellyfish/whole_sequence_parser.hpp"
-#include "jellyfish/mer_dna.hpp"
 
-#include "TranscriptGeneMap.hpp"
 #include "GenomicFeature.hpp"
+#include "SGSmooth.hpp"
+#include "TranscriptGeneMap.hpp"
 
 namespace salmon {
 namespace utils {
 
-    bool headersAreConsistent(SAM_hdr* h1, SAM_hdr* h2) {
-
-        bool consistent{true};
-        // Both files must contain the same number of targets
-        if (h1->nref != h2->nref) { consistent = false; }
-
-        // Check each target to ensure that the name and length are the same.
-        size_t i = 0;
-        size_t n = h1->nref;
-        while (consistent and i < n) {
-            size_t l1 = h1->ref[i].len;
-            size_t l2 = h2->ref[i].len;
-            consistent = (l1 == l2) and
-                         (strcmp(h1->ref[i].name, h2->ref[i].name) == 0);
-            ++i;
-        }
-
-        return consistent;
-    }
-
-    bool headersAreConsistent(std::vector<SAM_hdr*>&& headers) {
-        if (headers.size() == 1) { return true; }
-
-        // Ensure that all of the headers are consistent (i.e. the same), by
-        // comparing each with the first.
-        bool consistent{true};
-        auto itFirst = headers.begin();
-        auto it = itFirst;
-        while (++it != headers.end()) {
-            if (!headersAreConsistent(*itFirst, *it)) {
-                consistent = false;
-                break;
-            }
-        }
-        return consistent;
-    }
-
-    std::ostream& operator<<(std::ostream& os, OrphanStatus s) {
-        switch (s) {
-            case OrphanStatus::LeftOrphan:
-                os << "left orphan";
-                break;
-            case OrphanStatus::RightOrphan:
-                os << "right orphan";
-                break;
-            case OrphanStatus::Paired:
-                os << "paired";
-                break;
-        }
-        return os;
-    }
-
-    double logAlignFormatProb(const LibraryFormat observed,
-                              const LibraryFormat expected,
-                              int32_t start, bool isForward,
-                              rapmap::utils::MateStatus ms,
-                              double incompatPrior) {
-        // If we're dealing with a single end read.
-        bool compat {false};
-        if (ms != rapmap::utils::MateStatus::PAIRED_END_PAIRED) {
-            compat = compatibleHit(expected, start, isForward, ms);
-        } else {
-            compat = compatibleHit(expected, observed);
-        }
-        return (compat) ? salmon::math::LOG_1 : incompatPrior;
-        /** Old compat code
-        if (expected.type == ReadType::PAIRED_END and
-            observed.type == ReadType::SINGLE_END) {
-            double logOrphanProb = salmon::math::LOG_ORPHAN_PROB;
-            if (expected.strandedness == ReadStrandedness::U or
-                expected.strandedness == ReadStrandedness::AS or
-                expected.strandedness == ReadStrandedness::SA) {
-                return salmon::math::LOG_1;
-            } else {
-                return (expected.strandedness == observed.strandedness) ? logOrphanProb : incompatPrior;
-            }
-        } else if (observed.type != expected.type or
-            observed.orientation != expected.orientation ) {
-            return incompatPrior;
-        } else {
-            if (expected.strandedness == ReadStrandedness::U) {
-                return salmon::math::LOG_ONEHALF;
-            } else {
-                if (expected.strandedness == observed.strandedness) {
-                    return salmon::math::LOG_1;
-                } else {
-                    return incompatPrior;
-                }
-            }
-        }
+std::string str(const MappingType& mt) {
+  switch (mt) {
+  case MappingType::UNMAPPED:
+    return "u";
+  case MappingType::LEFT_ORPHAN:
+    return "m1";
+  case MappingType::RIGHT_ORPHAN:
+    return "m2";
+  case MappingType::BOTH_ORPHAN:
+    return "m12";
+  case MappingType::PAIRED_MAPPED:
+    return "mp";
+  case MappingType::SINGLE_MAPPED:
+    return "ms";
+  }
+  // should never get here!
+  return "E";
+}
 
-        fmt::print(stderr, "WARNING: logAlignFormatProb --- should not get here");
-        return salmon::math::LOG_0;
-        */
-    }
-
-    // for single end reads or orphans
-    bool compatibleHit(const LibraryFormat expected,
-            int32_t start, bool isForward, MateStatus ms) {
-        auto expectedStrand = expected.strandedness;
-        switch (ms) {
-            case MateStatus::SINGLE_END:
-                if (isForward) { // U, SF
-                    return (expectedStrand == ReadStrandedness::U or
-                            expectedStrand == ReadStrandedness::S);
-                } else { // U, SR
-                    return (expectedStrand == ReadStrandedness::U or
-                            expectedStrand == ReadStrandedness::A);
-                }
-                break;
-            case MateStatus::PAIRED_END_LEFT:
-                // "M"atching or same orientation is a special case
-                if (expected.orientation == ReadOrientation::SAME) {
-                    return (expectedStrand == ReadStrandedness::U
-                            or
-                            (expectedStrand == ReadStrandedness::S and isForward)
-                            or
-                            (expectedStrand == ReadStrandedness::A and !isForward));
-                } else if (isForward) { // IU, ISF, OU, OSF, MU, MSF
-                    return (expectedStrand == ReadStrandedness::U or
-                            expectedStrand == ReadStrandedness::S);
-                } else { // IU, ISR, OU, OSR, MU, MSR
-                    return (expectedStrand == ReadStrandedness::U or
-                            expectedStrand == ReadStrandedness::A);
-                }
-                break;
-            case MateStatus::PAIRED_END_RIGHT:
-                // "M"atching or same orientation is a special case
-                if (expected.orientation == ReadOrientation::SAME) {
-                    return (expectedStrand == ReadStrandedness::U
-                            or
-                            (expectedStrand == ReadStrandedness::S and isForward)
-                            or
-                            (expectedStrand == ReadStrandedness::A and !isForward));
-                } else if (isForward) { // IU, ISR, OU, OSR, MU, MSR
-                    return (expectedStrand == ReadStrandedness::U or
-                            expectedStrand == ReadStrandedness::A);
-                } else { // IU, ISF, OU, OSF, MU, MSF
-                    return (expectedStrand == ReadStrandedness::U or
-                            expectedStrand == ReadStrandedness::S);
-                }
-                break;
-            default:
-                // SHOULD NOT GET HERE
-                fmt::print(stderr, "WARNING: Could not associate known library type with read!\n");
-                return false;
-                break;
-        }
-        // SHOULD NOT GET HERE
-        fmt::print(stderr, "WARNING: Could not associate known library type with read!\n");
-        return false;
-    }
+bool headersAreConsistent(SAM_hdr* h1, SAM_hdr* h2) {
 
+  bool consistent{true};
+  // Both files must contain the same number of targets
+  if (h1->nref != h2->nref) {
+    consistent = false;
+  }
 
-    // for paired-end reads
-    bool compatibleHit(const LibraryFormat expected, const LibraryFormat observed) {
-        if (observed.type != ReadType::PAIRED_END) {
-            // SHOULD NOT GET HERE
-            fmt::print(stderr, "WARNING: PE compatibility function called with SE read!\n");
-            return false;
-        }
+  // Check each target to ensure that the name and length are the same.
+  size_t i = 0;
+  size_t n = h1->nref;
+  while (consistent and i < n) {
+    size_t l1 = h1->ref[i].len;
+    size_t l2 = h2->ref[i].len;
+    consistent = (l1 == l2) and (strcmp(h1->ref[i].name, h2->ref[i].name) == 0);
+    ++i;
+  }
 
-        auto es = expected.strandedness;
-        auto eo = expected.orientation;
+  return consistent;
+}
 
-        auto os = observed.strandedness;
-        auto oo = observed.orientation;
+bool headersAreConsistent(std::vector<SAM_hdr*>&& headers) {
+  if (headers.size() == 1) {
+    return true;
+  }
 
-        // If the orientations are different, they are incompatible
-        if (eo != oo) {
-            return false;
-        } else { // In this branch, the orientations are always compatible
-            return (es == ReadStrandedness::U or
-                    es == os);
-        }
-        // SHOULD NOT GET HERE
-        fmt::print(stderr, "WARNING: Could not determine strand compatibility!");
-        fmt::print(stderr, "please report this.\n");
-        return false;
+  // Ensure that all of the headers are consistent (i.e. the same), by
+  // comparing each with the first.
+  bool consistent{true};
+  auto itFirst = headers.begin();
+  auto it = itFirst;
+  while (++it != headers.end()) {
+    if (!headersAreConsistent(*itFirst, *it)) {
+      consistent = false;
+      break;
     }
+  }
+  return consistent;
+}
 
-    template <typename ExpLib>
-    void writeAbundancesFromCollapsed(
-                         const SalmonOpts& sopt,
-                         ExpLib& alnLib,
-                         boost::filesystem::path& fname,
-                         std::string headerComments) {
-        using salmon::math::LOG_0;
-        using salmon::math::LOG_1;
-
-        // If we're using lightweight-alignment (FMD)
-        // and not allowing orphans.
-        bool useScaledCounts = (!sopt.useQuasi and sopt.allowOrphans == false);
-
-        std::unique_ptr<std::FILE, int (*)(std::FILE *)> output(std::fopen(fname.c_str(), "w"), std::fclose);
-
-        fmt::print(output.get(), "{}", headerComments);
-	fmt::print(output.get(), "Name\tLength\tEffectiveLength\tTPM\tNumReads\n");
-
-        double numMappedFrags = alnLib.upperBoundHits();
+std::ostream& operator<<(std::ostream& os, OrphanStatus s) {
+  switch (s) {
+  case OrphanStatus::LeftOrphan:
+    os << "left orphan";
+    break;
+  case OrphanStatus::RightOrphan:
+    os << "right orphan";
+    break;
+  case OrphanStatus::Paired:
+    os << "paired";
+    break;
+  }
+  return os;
+}
 
-        std::vector<Transcript>& transcripts_ = alnLib.transcripts();
-        for (auto& transcript : transcripts_) {
-            transcript.projectedCounts = useScaledCounts ?
-                (transcript.mass(false) * numMappedFrags) : transcript.sharedCount();
-        }
+bool isCompatible(const LibraryFormat observed,
+                  const LibraryFormat expected,
+                  int32_t start,
+                  bool isForward,
+                  rapmap::utils::MateStatus ms) {
+  // If we're dealing with a single end read.
+  bool compat{false};
+  if (ms != rapmap::utils::MateStatus::PAIRED_END_PAIRED) {
+    compat = compatibleHit(expected, start, isForward, ms);
+  } else {
+    compat = compatibleHit(expected, observed);
+  }
+  return compat;
+}
 
-        double tfracDenom{0.0};
-        for (auto& transcript : transcripts_) {
-            double refLength = sopt.noEffectiveLengthCorrection ?
-                               transcript.RefLength :
-                               std::exp(transcript.getCachedLogEffectiveLength());
-            tfracDenom += (transcript.projectedCounts / numMappedFrags) / refLength;
-        }
+double logAlignFormatProb(const LibraryFormat observed,
+                          const LibraryFormat expected, int32_t start,
+                          bool isForward, rapmap::utils::MateStatus ms,
+                          double incompatPrior) {
+  // If we're dealing with a single end read.
+  bool compat{false};
+  if (ms != rapmap::utils::MateStatus::PAIRED_END_PAIRED) {
+    compat = compatibleHit(expected, start, isForward, ms);
+  } else {
+    compat = compatibleHit(expected, observed);
+  }
+  return (compat) ? salmon::math::LOG_1 : incompatPrior;
+  /** Old compat code
+  if (expected.type == ReadType::PAIRED_END and
+      observed.type == ReadType::SINGLE_END) {
+      double logOrphanProb = salmon::math::LOG_ORPHAN_PROB;
+      if (expected.strandedness == ReadStrandedness::U or
+          expected.strandedness == ReadStrandedness::AS or
+          expected.strandedness == ReadStrandedness::SA) {
+          return salmon::math::LOG_1;
+      } else {
+          return (expected.strandedness == observed.strandedness) ?
+  logOrphanProb : incompatPrior;
+      }
+  } else if (observed.type != expected.type or
+      observed.orientation != expected.orientation ) {
+      return incompatPrior;
+  } else {
+      if (expected.strandedness == ReadStrandedness::U) {
+          return salmon::math::LOG_ONEHALF;
+      } else {
+          if (expected.strandedness == observed.strandedness) {
+              return salmon::math::LOG_1;
+          } else {
+              return incompatPrior;
+          }
+      }
+  }
 
-        double million = 1000000.0;
-        // Now posterior has the transcript fraction
-        for (auto& transcript : transcripts_) {
-            double logLength = sopt.noEffectiveLengthCorrection ?
-                               std::log(transcript.RefLength) :
-                               transcript.getCachedLogEffectiveLength();
-            double count = transcript.projectedCounts;
-            double npm = (transcript.projectedCounts / numMappedFrags);
-            double effLength = std::exp(logLength);
-            double tfrac = (npm / effLength) / tfracDenom;
-            double tpm = tfrac * million;
-            fmt::print(output.get(), "{}\t{}\t{}\t{}\t{}\n",
-                    transcript.RefName, transcript.RefLength, effLength,
-                    tpm, count);
-        }
+  fmt::print(stderr, "WARNING: logAlignFormatProb --- should not get here");
+  return salmon::math::LOG_0;
+  */
+}
 
+// for single end reads or orphans
+bool compatibleHit(const LibraryFormat expected, int32_t start, bool isForward,
+                   MateStatus ms) {
+  auto expectedStrand = expected.strandedness;
+  switch (ms) {
+  case MateStatus::SINGLE_END:
+    if (isForward) { // U, SF
+      return (expectedStrand == ReadStrandedness::U or
+              expectedStrand == ReadStrandedness::S);
+    } else { // U, SR
+      return (expectedStrand == ReadStrandedness::U or
+              expectedStrand == ReadStrandedness::A);
     }
+    break;
+  case MateStatus::PAIRED_END_LEFT:
+    // "M"atching or same orientation is a special case
+    if (expected.orientation == ReadOrientation::SAME) {
+      return (expectedStrand == ReadStrandedness::U or
+              (expectedStrand == ReadStrandedness::S and isForward) or
+              (expectedStrand == ReadStrandedness::A and !isForward));
+    } else if (isForward) { // IU, ISF, OU, OSF, MU, MSF
+      return (expectedStrand == ReadStrandedness::U or
+              expectedStrand == ReadStrandedness::S);
+    } else { // IU, ISR, OU, OSR, MU, MSR
+      return (expectedStrand == ReadStrandedness::U or
+              expectedStrand == ReadStrandedness::A);
+    }
+    break;
+  case MateStatus::PAIRED_END_RIGHT:
+    // "M"atching or same orientation is a special case
+    if (expected.orientation == ReadOrientation::SAME) {
+      return (expectedStrand == ReadStrandedness::U or
+              (expectedStrand == ReadStrandedness::S and isForward) or
+              (expectedStrand == ReadStrandedness::A and !isForward));
+    } else if (isForward) { // IU, ISR, OU, OSR, MU, MSR
+      return (expectedStrand == ReadStrandedness::U or
+              expectedStrand == ReadStrandedness::A);
+    } else { // IU, ISF, OU, OSF, MU, MSF
+      return (expectedStrand == ReadStrandedness::U or
+              expectedStrand == ReadStrandedness::S);
+    }
+    break;
+  default:
+    // SHOULD NOT GET HERE
+    fmt::print(stderr,
+               "WARNING: Could not associate known library type with read!\n");
+    return false;
+    break;
+  }
+  // SHOULD NOT GET HERE
+  fmt::print(stderr,
+             "WARNING: Could not associate known library type with read!\n");
+  return false;
+}
 
-    template <typename ExpLib>
-    void writeAbundances(const SalmonOpts& sopt,
-                         ExpLib& alnLib,
-                         boost::filesystem::path& fname,
-                         std::string headerComments) {
-        using salmon::math::LOG_0;
-        using salmon::math::LOG_1;
-
-        std::unique_ptr<std::FILE, int (*)(std::FILE *)> output(std::fopen(fname.c_str(), "w"), std::fclose);
-
-        fmt::print(output.get(), "{}", headerComments);
-        fmt::print(output.get(), "# Name\tLength\tTPM\tFPKM\tNumReads\n");
-
+// for paired-end reads
+bool compatibleHit(const LibraryFormat expected, const LibraryFormat observed) {
+  if (observed.type != ReadType::PAIRED_END) {
+    // SHOULD NOT GET HERE
+    fmt::print(stderr,
+               "WARNING: PE compatibility function called with SE read!\n");
+    fmt::print(stderr, "expected: {}, observed: {}\n", expected, observed);
+    return false;
+  }
 
-        auto& refs = alnLib.transcripts();
-        auto numMappedFragments = alnLib.numMappedFragments();
-        const double logBillion = std::log(1000000000.0);
-        const double million = 1000000.0;
-        const double logNumFragments = std::log(static_cast<double>(numMappedFragments));
-        const double upperBoundFactor = static_cast<double>(alnLib.upperBoundHits()) /
-                                        numMappedFragments;
+  auto es = expected.strandedness;
+  auto eo = expected.orientation;
 
-        auto clusters = alnLib.clusterForest().getClusters();
-        size_t clusterID = 0;
-        for(auto cptr : clusters) {
+  auto os = observed.strandedness;
+  auto oo = observed.orientation;
 
-            //double logClusterMass = cptr->logMass();
-            // EDIT
-            double logClusterMass = salmon::math::LOG_0;
-            double logClusterCount = std::log(upperBoundFactor * static_cast<double>(cptr->numHits()));
+  // If the orientations are different, they are incompatible
+  if (eo != oo) {
+    return false;
+  } else { // In this branch, the orientations are always compatible
+    return (es == ReadStrandedness::U or es == os);
+  }
+  // SHOULD NOT GET HERE
+  fmt::print(stderr, "WARNING: Could not determine strand compatibility!");
+  fmt::print(stderr, "please report this.\n");
+  return false;
+}
 
-            bool requiresProjection{false};
+template <typename ExpLib>
+void writeAbundancesFromCollapsed(const SalmonOpts& sopt, ExpLib& alnLib,
+                                  boost::filesystem::path& fname,
+                                  std::string headerComments) {
+  using salmon::math::LOG_0;
+  using salmon::math::LOG_1;
 
-            auto& members = cptr->members();
-            size_t clusterSize{0};
-            for (auto transcriptID : members) {
-                Transcript& t = refs[transcriptID];
-                t.uniqueCounts = t.uniqueCount();
-                t.totalCounts = t.totalCount();
-                logClusterMass = salmon::math::logAdd(logClusterMass,
-                                    t.mass(false));
-                ++clusterSize;
-            }
+  // If we're using lightweight-alignment (FMD)
+  // and not allowing orphans.
+  bool useScaledCounts = (!sopt.useQuasi and sopt.allowOrphans == false);
 
-            if (logClusterMass == LOG_0) {
-                // std::cerr << "Warning: cluster " << clusterID << " has 0 mass!\n";
-            }
+  std::unique_ptr<std::FILE, int (*)(std::FILE*)> output(
+      std::fopen(fname.c_str(), "w"), std::fclose);
 
-            for (auto transcriptID : members) {
-                Transcript& t = refs[transcriptID];
-                double logTranscriptMass = t.mass(false);
-                // Try bias
-                /*
-                double logBias = t.bias();
-                logTranscriptMass += t.bias();
-                */
+  fmt::print(output.get(), "{}", headerComments);
+  fmt::print(output.get(), "Name\tLength\tEffectiveLength\tTPM\tNumReads\n");
 
-                if (logTranscriptMass == LOG_0) {
-                    t.projectedCounts = 0;
-                } else {
-                    double logClusterFraction = logTranscriptMass - logClusterMass;
-                    t.projectedCounts = std::exp(logClusterFraction + logClusterCount);
-                    requiresProjection |= t.projectedCounts > static_cast<double>(t.totalCounts) or
-                        t.projectedCounts < static_cast<double>(t.uniqueCounts);
-                }
-            }
+  double numMappedFrags = alnLib.upperBoundHits();
 
-            if (clusterSize > 1 and requiresProjection) {
-                cptr->projectToPolytope(refs);
-            }
-            ++clusterID;
-        }
+  std::vector<Transcript>& transcripts_ = alnLib.transcripts();
+  for (auto& transcript : transcripts_) {
+    transcript.projectedCounts = useScaledCounts
+                                     ? (transcript.mass(false) * numMappedFrags)
+                                     : transcript.sharedCount();
+  }
 
-        auto& transcripts_ = refs;
-        double tfracDenom{0.0};
-        for (auto& transcript : transcripts_) {
-            double refLength = sopt.noEffectiveLengthCorrection ?
-                               transcript.RefLength :
-                               std::exp(transcript.getCachedLogEffectiveLength());
-            //refLength = transcript.RefLength;
-            tfracDenom += (transcript.projectedCounts / numMappedFragments) / refLength;
-        }
+  double tfracDenom{0.0};
+  for (auto& transcript : transcripts_) {
+    double refLength = sopt.noEffectiveLengthCorrection
+                           ? transcript.RefLength
+                           : std::exp(transcript.getCachedLogEffectiveLength());
+    tfracDenom += (transcript.projectedCounts / numMappedFrags) / refLength;
+  }
 
-        // Now posterior has the transcript fraction
-        for (auto& transcript : transcripts_) {
-            double logLength = sopt.noEffectiveLengthCorrection ?
-                               std::log(transcript.RefLength) :
-                               transcript.getCachedLogEffectiveLength();
-            /*
-            if (!sopt.noSeqBiasModel) {
-                double avgLogBias = transcript.getAverageSequenceBias(
-                                    alnLib.sequenceBiasModel());
-                logLength += avgLogBias;
-            }
-            */
-            //logLength = std::log(transcript.RefLength);
-            double fpkmFactor = std::exp(logBillion - logLength - logNumFragments);
-            double count = transcript.projectedCounts;
-            //double countTotal = transcripts_[transcriptID].totalCounts;
-            //double countUnique = transcripts_[transcriptID].uniqueCounts;
-            double fpkm = count > 0 ? fpkmFactor * count : 0.0;
-            double npm = (transcript.projectedCounts / numMappedFragments);
-            double refLength = std::exp(logLength);
-            double tfrac = (npm / refLength) / tfracDenom;
-            double tpm = tfrac * million;
-
-            fmt::print(output.get(), "{}\t{}\t{}\t{}\t{}\n",
-                    transcript.RefName, transcript.RefLength,
-                    tpm, fpkm, count);
-        }
+  double million = 1000000.0;
+  // Now posterior has the transcript fraction
+  for (auto& transcript : transcripts_) {
+    double logLength = sopt.noEffectiveLengthCorrection
+                           ? std::log(transcript.RefLength)
+                           : transcript.getCachedLogEffectiveLength();
+    double count = transcript.projectedCounts;
+    double npm = (transcript.projectedCounts / numMappedFrags);
+    double effLength = std::exp(logLength);
+    double tfrac = (npm / effLength) / tfracDenom;
+    double tpm = tfrac * million;
+    fmt::print(output.get(), "{}\t{}\t{}\t{}\t{}\n", transcript.RefName,
+               transcript.RefLength, effLength, tpm, count);
+  }
+}
 
+template <typename ExpLib>
+void writeAbundances(const SalmonOpts& sopt, ExpLib& alnLib,
+                     boost::filesystem::path& fname,
+                     std::string headerComments) {
+  using salmon::math::LOG_0;
+  using salmon::math::LOG_1;
+
+  std::unique_ptr<std::FILE, int (*)(std::FILE*)> output(
+      std::fopen(fname.c_str(), "w"), std::fclose);
+
+  fmt::print(output.get(), "{}", headerComments);
+  fmt::print(output.get(), "# Name\tLength\tTPM\tFPKM\tNumReads\n");
+
+  auto& refs = alnLib.transcripts();
+  auto numMappedFragments = alnLib.numMappedFragments();
+  const double logBillion = std::log(1000000000.0);
+  const double million = 1000000.0;
+  const double logNumFragments =
+      std::log(static_cast<double>(numMappedFragments));
+  const double upperBoundFactor =
+      static_cast<double>(alnLib.upperBoundHits()) / numMappedFragments;
+
+  auto clusters = alnLib.clusterForest().getClusters();
+  size_t clusterID = 0;
+  for (auto cptr : clusters) {
+
+    // double logClusterMass = cptr->logMass();
+    // EDIT
+    double logClusterMass = salmon::math::LOG_0;
+    double logClusterCount =
+        std::log(upperBoundFactor * static_cast<double>(cptr->numHits()));
+
+    bool requiresProjection{false};
+
+    auto& members = cptr->members();
+    size_t clusterSize{0};
+    for (auto transcriptID : members) {
+      Transcript& t = refs[transcriptID];
+      t.uniqueCounts = t.uniqueCount();
+      t.totalCounts = t.totalCount();
+      logClusterMass = salmon::math::logAdd(logClusterMass, t.mass(false));
+      ++clusterSize;
     }
 
+    if (logClusterMass == LOG_0) {
+      // std::cerr << "Warning: cluster " << clusterID << " has 0 mass!\n";
+    }
 
+    for (auto transcriptID : members) {
+      Transcript& t = refs[transcriptID];
+      double logTranscriptMass = t.mass(false);
+      // Try bias
+      /*
+      double logBias = t.bias();
+      logTranscriptMass += t.bias();
+      */
+
+      if (logTranscriptMass == LOG_0) {
+        t.projectedCounts = 0;
+      } else {
+        double logClusterFraction = logTranscriptMass - logClusterMass;
+        t.projectedCounts = std::exp(logClusterFraction + logClusterCount);
+        requiresProjection |=
+            t.projectedCounts > static_cast<double>(t.totalCounts) or
+            t.projectedCounts < static_cast<double>(t.uniqueCounts);
+      }
+    }
 
-    template <typename AlnLibT>
-    void normalizeAlphas(const SalmonOpts& sopt,
-                         AlnLibT& alnLib) {
-
-        using salmon::math::LOG_0;
-        using salmon::math::LOG_1;
-
-        auto& refs = alnLib.transcripts();
-        auto numMappedFragments = alnLib.numMappedFragments();
-        const double logNumFragments = std::log(static_cast<double>(numMappedFragments));
-        auto clusters = alnLib.clusterForest().getClusters();
-        size_t clusterID = 0;
-        for(auto cptr : clusters) {
-
-            //double logClusterMass = cptr->logMass();
-            // EDIT
-            double logClusterMass = salmon::math::LOG_0;
-            double logClusterCount = std::log(static_cast<double>(cptr->numHits()));
-
-            bool requiresProjection{false};
-
-            auto& members = cptr->members();
-            size_t clusterSize{0};
-            for (auto transcriptID : members) {
-                Transcript& t = refs[transcriptID];
-                t.uniqueCounts = t.uniqueCount();
-                t.totalCounts = t.totalCount();
-                logClusterMass = salmon::math::logAdd(logClusterMass,
-                                    t.mass(false));// + t.bias());
-                ++clusterSize;
-            }
+    if (clusterSize > 1 and requiresProjection) {
+      cptr->projectToPolytope(refs);
+    }
+    ++clusterID;
+  }
 
-            if (logClusterMass == LOG_0) {
-                // std::cerr << "Warning: cluster " << clusterID << " has 0 mass!\n";
-            }
+  auto& transcripts_ = refs;
+  double tfracDenom{0.0};
+  for (auto& transcript : transcripts_) {
+    double refLength = sopt.noEffectiveLengthCorrection
+                           ? transcript.RefLength
+                           : std::exp(transcript.getCachedLogEffectiveLength());
+    // refLength = transcript.RefLength;
+    tfracDenom += (transcript.projectedCounts / numMappedFragments) / refLength;
+  }
 
-            for (auto transcriptID : members) {
-                Transcript& t = refs[transcriptID];
-                double logTranscriptMass = t.mass(false);
-                // Try bias
-                // double logBias = t.bias();
-                // logTranscriptMass += t.bias();
+  // Now posterior has the transcript fraction
+  for (auto& transcript : transcripts_) {
+    double logLength = sopt.noEffectiveLengthCorrection
+                           ? std::log(transcript.RefLength)
+                           : transcript.getCachedLogEffectiveLength();
+    /*
+    if (!sopt.noSeqBiasModel) {
+        double avgLogBias = transcript.getAverageSequenceBias(
+                            alnLib.sequenceBiasModel());
+        logLength += avgLogBias;
+    }
+    */
+    // logLength = std::log(transcript.RefLength);
+    double fpkmFactor = std::exp(logBillion - logLength - logNumFragments);
+    double count = transcript.projectedCounts;
+    // double countTotal = transcripts_[transcriptID].totalCounts;
+    // double countUnique = transcripts_[transcriptID].uniqueCounts;
+    double fpkm = count > 0 ? fpkmFactor * count : 0.0;
+    double npm = (transcript.projectedCounts / numMappedFragments);
+    double refLength = std::exp(logLength);
+    double tfrac = (npm / refLength) / tfracDenom;
+    double tpm = tfrac * million;
+
+    fmt::print(output.get(), "{}\t{}\t{}\t{}\t{}\n", transcript.RefName,
+               transcript.RefLength, tpm, fpkm, count);
+  }
+}
 
-                if (logTranscriptMass == LOG_0) {
-                    t.projectedCounts = 0;
-                } else {
-                    double logClusterFraction = logTranscriptMass - logClusterMass;
-                    t.projectedCounts = std::exp(logClusterFraction + logClusterCount);
-                    requiresProjection |= t.projectedCounts > static_cast<double>(t.totalCounts) or
-                        t.projectedCounts < static_cast<double>(t.uniqueCounts);
-                }
-            }
+template <typename AlnLibT>
+void normalizeAlphas(const SalmonOpts& sopt, AlnLibT& alnLib) {
+
+  using salmon::math::LOG_0;
+  using salmon::math::LOG_1;
+
+  auto& refs = alnLib.transcripts();
+  auto numMappedFragments = alnLib.numMappedFragments();
+  const double logNumFragments =
+      std::log(static_cast<double>(numMappedFragments));
+  auto clusters = alnLib.clusterForest().getClusters();
+  size_t clusterID = 0;
+  for (auto cptr : clusters) {
+
+    // double logClusterMass = cptr->logMass();
+    // EDIT
+    double logClusterMass = salmon::math::LOG_0;
+    double logClusterCount = std::log(static_cast<double>(cptr->numHits()));
+
+    bool requiresProjection{false};
+
+    auto& members = cptr->members();
+    size_t clusterSize{0};
+    for (auto transcriptID : members) {
+      Transcript& t = refs[transcriptID];
+      t.uniqueCounts = t.uniqueCount();
+      t.totalCounts = t.totalCount();
+      logClusterMass = salmon::math::logAdd(logClusterMass,
+                                            t.mass(false)); // + t.bias());
+      ++clusterSize;
+    }
 
-            if (clusterSize > 1 and requiresProjection) {
-                cptr->projectToPolytope(refs);
-            }
-            ++clusterID;
-        }
+    if (logClusterMass == LOG_0) {
+      // std::cerr << "Warning: cluster " << clusterID << " has 0 mass!\n";
+    }
 
-        auto& transcripts_ = refs;
-        double nFracDenom{0.0};
-        for (auto& transcript : transcripts_) {
-            nFracDenom += (transcript.projectedCounts / numMappedFragments);
-        }
+    for (auto transcriptID : members) {
+      Transcript& t = refs[transcriptID];
+      double logTranscriptMass = t.mass(false);
+      // Try bias
+      // double logBias = t.bias();
+      // logTranscriptMass += t.bias();
 
-	    double invNFracTotal = 1.0 / nFracDenom;
-        for (auto& transcript : transcripts_) {
-		double v = transcript.projectedCounts / numMappedFragments;
-		//transcript.setMass(v * invNFracTotal);
-		transcript.setMass(transcript.projectedCounts);
-        }
+      if (logTranscriptMass == LOG_0) {
+        t.projectedCounts = 0;
+      } else {
+        double logClusterFraction = logTranscriptMass - logClusterMass;
+        t.projectedCounts = std::exp(logClusterFraction + logClusterCount);
+        requiresProjection |=
+            t.projectedCounts > static_cast<double>(t.totalCounts) or
+            t.projectedCounts < static_cast<double>(t.uniqueCounts);
+      }
+    }
 
+    if (clusterSize > 1 and requiresProjection) {
+      cptr->projectToPolytope(refs);
     }
+    ++clusterID;
+  }
 
+  auto& transcripts_ = refs;
+  double nFracDenom{0.0};
+  for (auto& transcript : transcripts_) {
+    nFracDenom += (transcript.projectedCounts / numMappedFragments);
+  }
 
-    LibraryFormat hitType(int32_t end1Start, bool end1Fwd,
-                          int32_t end2Start, bool end2Fwd) {
+  double invNFracTotal = 1.0 / nFracDenom;
+  for (auto& transcript : transcripts_) {
+    double v = transcript.projectedCounts / numMappedFragments;
+    // transcript.setMass(v * invNFracTotal);
+    transcript.setMass(transcript.projectedCounts);
+  }
+}
 
-        // If the reads come from opposite strands
-        if (end1Fwd != end2Fwd) {
-            // and if read 1 comes from the forward strand
-            if (end1Fwd) {
-                // then if read 1 start < read 2 start ==> ISF
-                if (end1Start <= end2Start) {
-                    return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::SA);
-                } // otherwise read 2 start < read 1 start ==> OSF
-                else {
-                    return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::SA);
-                }
-            }
-            // and if read 2 comes from the forward strand
-            if (end2Fwd) {
-                // then if read 2 start <= read 1 start ==> ISR
-                if (end2Start <= end1Start) {
-                    return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::AS);
-                } // otherwise, read 2 start > read 1 start ==> OSR
-                else {
-                    return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::AS);
-                }
-            }
-        } else { // Otherwise, the reads come from the same strand
-            if (end1Fwd) { // if it's the forward strand ==> MSF
-                return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::S);
-            } else { // if it's the reverse strand ==> MSR
-                return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::A);
-            }
-        }
-        // SHOULD NOT GET HERE
-        spdlog::get("jointLog")->error("ERROR: Could not associate any known library type with read! "
-                                       "Please report this bug!\n");
-        std::exit(-1);
-        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::NONE, ReadStrandedness::U);
-    }
-
-
-
-    LibraryFormat hitType(int32_t end1Start, bool end1Fwd, uint32_t len1,
-                          int32_t end2Start, bool end2Fwd, uint32_t len2, bool canDovetail) {
-
-        // If the reads come from opposite strands
-        if (end1Fwd != end2Fwd) {
-            // and if read 1 comes from the forward strand
-            if (end1Fwd) {
-                // then if read 1 start < read 2 start ==> ISF
-                // NOTE: We can't really delineate between inward facing reads that stretch
-                // past each other and outward facing reads --- the purpose of stretch is to help
-                // make this determinateion.
-                int32_t stretch = canDovetail ? len2 : 0;
-                if (end1Start <= end2Start + stretch) {
-                    return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::SA);
-                } // otherwise read 2 start < read 1 start ==> OSF
-                else {
-                    return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::SA);
-                }
-            }
-            // and if read 2 comes from the forward strand
-            if (end2Fwd) {
-                // then if read 2 start <= read 1 start ==> ISR
-                // NOTE: We can't really delineate between inward facing reads that stretch
-                // past each other and outward facing reads --- the purpose of stretch is to help
-                // make this determinateion.
-                int32_t stretch = canDovetail ? len1 : 0;
-                if (end2Start <= end1Start + stretch) {
-                    return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::AS);
-                } // otherwise, read 2 start > read 1 start ==> OSR
-                else {
-                    return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::AS);
-                }
-            }
-        } else { // Otherwise, the reads come from the same strand
-            if (end1Fwd) { // if it's the forward strand ==> MSF
-                return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::S);
-            } else { // if it's the reverse strand ==> MSR
-                return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::A);
-            }
-        }
-        // SHOULD NOT GET HERE
-        spdlog::get("jointLog")->error("ERROR: Could not associate any known library type with read! "
-                                       "Please report this bug!\n");
-        std::exit(-1);
-        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::NONE, ReadStrandedness::U);
+LibraryFormat hitType(int32_t end1Start, bool end1Fwd, int32_t end2Start,
+                      bool end2Fwd) {
+
+  // If the reads come from opposite strands
+  if (end1Fwd != end2Fwd) {
+    // and if read 1 comes from the forward strand
+    if (end1Fwd) {
+      // then if read 1 start < read 2 start ==> ISF
+      if (end1Start <= end2Start) {
+        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD,
+                             ReadStrandedness::SA);
+      } // otherwise read 2 start < read 1 start ==> OSF
+      else {
+        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY,
+                             ReadStrandedness::SA);
+      }
     }
+    // and if read 2 comes from the forward strand
+    if (end2Fwd) {
+      // then if read 2 start <= read 1 start ==> ISR
+      if (end2Start <= end1Start) {
+        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD,
+                             ReadStrandedness::AS);
+      } // otherwise, read 2 start > read 1 start ==> OSR
+      else {
+        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY,
+                             ReadStrandedness::AS);
+      }
+    }
+  } else {         // Otherwise, the reads come from the same strand
+    if (end1Fwd) { // if it's the forward strand ==> MSF
+      return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME,
+                           ReadStrandedness::S);
+    } else { // if it's the reverse strand ==> MSR
+      return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME,
+                           ReadStrandedness::A);
+    }
+  }
+  // SHOULD NOT GET HERE
+  spdlog::get("jointLog")
+      ->error("ERROR: Could not associate any known library type with read! "
+              "Please report this bug!\n");
+  std::exit(-1);
+  return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::NONE,
+                       ReadStrandedness::U);
+}
 
-
-    LibraryFormat hitType(int32_t start, bool isForward) {
-        // If the read comes from the forward strand
-        if (isForward) {
-            return LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::S);
-        } else {
-            return LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::A);
-        }
-        // SHOULD NOT GET HERE
-        fmt::print(stderr, "WARNING: Could not associate known library type with read!\n");
-        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::NONE, ReadStrandedness::U);
-
+LibraryFormat hitType(int32_t end1Start, bool end1Fwd, uint32_t len1,
+                      int32_t end2Start, bool end2Fwd, uint32_t len2,
+                      bool canDovetail) {
+
+  // If the reads come from opposite strands
+  if (end1Fwd != end2Fwd) {
+    // and if read 1 comes from the forward strand
+    if (end1Fwd) {
+      // then if read 1 start < read 2 start ==> ISF
+      // NOTE: We can't really delineate between inward facing reads that
+      // stretch
+      // past each other and outward facing reads --- the purpose of stretch is
+      // to help
+      // make this determinateion.
+      int32_t stretch = canDovetail ? len2 : 0;
+      if (end1Start <= end2Start + stretch) {
+        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD,
+                             ReadStrandedness::SA);
+      } // otherwise read 2 start < read 1 start ==> OSF
+      else {
+        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY,
+                             ReadStrandedness::SA);
+      }
     }
+    // and if read 2 comes from the forward strand
+    if (end2Fwd) {
+      // then if read 2 start <= read 1 start ==> ISR
+      // NOTE: We can't really delineate between inward facing reads that
+      // stretch
+      // past each other and outward facing reads --- the purpose of stretch is
+      // to help
+      // make this determinateion.
+      int32_t stretch = canDovetail ? len1 : 0;
+      if (end2Start <= end1Start + stretch) {
+        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD,
+                             ReadStrandedness::AS);
+      } // otherwise, read 2 start > read 1 start ==> OSR
+      else {
+        return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY,
+                             ReadStrandedness::AS);
+      }
+    }
+  } else {         // Otherwise, the reads come from the same strand
+    if (end1Fwd) { // if it's the forward strand ==> MSF
+      return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME,
+                           ReadStrandedness::S);
+    } else { // if it's the reverse strand ==> MSR
+      return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME,
+                           ReadStrandedness::A);
+    }
+  }
+  // SHOULD NOT GET HERE
+  spdlog::get("jointLog")
+      ->error("ERROR: Could not associate any known library type with read! "
+              "Please report this bug!\n");
+  std::exit(-1);
+  return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::NONE,
+                       ReadStrandedness::U);
+}
+
+LibraryFormat hitType(int32_t start, bool isForward) {
+  // If the read comes from the forward strand
+  if (isForward) {
+    return LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE,
+                         ReadStrandedness::S);
+  } else {
+    return LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE,
+                         ReadStrandedness::A);
+  }
+  // SHOULD NOT GET HERE
+  fmt::print(stderr,
+             "WARNING: Could not associate known library type with read!\n");
+  return LibraryFormat(ReadType::PAIRED_END, ReadOrientation::NONE,
+                       ReadStrandedness::U);
+}
 
 using std::string;
 using NameVector = std::vector<string>;
@@ -563,590 +626,844 @@ using IndexVector = std::vector<size_t>;
 using KmerVector = std::vector<uint64_t>;
 
 /**
- * This function parses the library format string that specifies the format in which
+ * This function parses the library format string that specifies the format in
+ * which
  * the reads are to be expected.
  */
 LibraryFormat parseLibraryFormatStringNew(std::string& fmt) {
-	using std::vector;
-	using std::string;
-	using std::map;
-	using std::stringstream;
-
-    map<string, LibraryFormat> formatMap = {
-        {"IU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::U)},
-        {"ISF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::SA)},
-        {"ISR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::AS)},
-        {"OU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::U)},
-        {"OSF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::SA)},
-        {"OSR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::AS)},
-        {"MU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::U)},
-        {"MSF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::S)},
-        {"MSR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::A)},
-        {"U", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::U)},
-        {"SF", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::S)},
-        {"SR", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::A)}};
-
-	// inspired by http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c
-	// first convert the string to upper-case
-	for (auto& c : fmt) { c = std::toupper(c); }
-
-
-    auto libFmtIt = formatMap.find(fmt);
-
-	if (libFmtIt == formatMap.end()) {
-		stringstream errstr;
-		errstr << "unknown library format string : " << fmt;
-		throw std::invalid_argument(errstr.str());
-	}
+  using std::vector;
+  using std::string;
+  using std::map;
+  using std::stringstream;
+
+  map<string, LibraryFormat> formatMap = {
+      {"IU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD,
+                           ReadStrandedness::U)},
+      {"ISF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD,
+                            ReadStrandedness::SA)},
+      {"ISR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD,
+                            ReadStrandedness::AS)},
+      {"OU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY,
+                           ReadStrandedness::U)},
+      {"OSF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY,
+                            ReadStrandedness::SA)},
+      {"OSR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY,
+                            ReadStrandedness::AS)},
+      {"MU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME,
+                           ReadStrandedness::U)},
+      {"MSF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME,
+                            ReadStrandedness::S)},
+      {"MSR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME,
+                            ReadStrandedness::A)},
+      {"U", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE,
+                          ReadStrandedness::U)},
+      {"SF", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE,
+                           ReadStrandedness::S)},
+      {"SR", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE,
+                           ReadStrandedness::A)}};
+
+  // inspired by
+  // http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c
+  // first convert the string to upper-case
+  for (auto& c : fmt) {
+    c = std::toupper(c);
+  }
+
+  auto libFmtIt = formatMap.find(fmt);
 
-    return libFmtIt->second;
+  if (libFmtIt == formatMap.end()) {
+    stringstream errstr;
+    errstr << "unknown library format string : " << fmt;
+    throw std::invalid_argument(errstr.str());
+  }
+
+  return libFmtIt->second;
 }
 
 /**
  * Parses a set of __ordered__ command line options and extracts the relevant
  * read libraries from them.
  */
-std::vector<ReadLibrary> extractReadLibraries(boost::program_options::parsed_options& orderedOptions) {
-	// The current (default) format for paired end data
-	LibraryFormat peFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::U);
-	// The current (default) format for single end data
-	LibraryFormat seFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::U);
-
-	std::vector<ReadLibrary> peLibs{ReadLibrary(peFormat)};
-	std::vector<ReadLibrary> seLibs{ReadLibrary(seFormat)};
-	for (auto& opt : orderedOptions.options) {
-		// Update the library type
-		if (opt.string_key == "libType") {
-			auto libFmt = parseLibraryFormatStringNew(opt.value[0]);
-			if (libFmt.type == ReadType::PAIRED_END) {
-				peFormat = libFmt;
-				peLibs.emplace_back(libFmt);
-			} else {
-				seFormat = libFmt;
-				seLibs.emplace_back(libFmt);
-			}
-		}
-		if (opt.string_key == "mates1") {
-			peLibs.back().addMates1(opt.value);
-		}
-		if (opt.string_key == "mates2") {
-			peLibs.back().addMates2(opt.value);
-		}
-		if (opt.string_key == "unmatedReads") {
-			seLibs.back().addUnmated(opt.value);
-		}
-	}
-
-	std::vector<ReadLibrary> libs;
-	libs.reserve(peLibs.size() + seLibs.size());
-	for (auto& lib : boost::range::join(seLibs, peLibs)) {
-		if (lib.format().type == ReadType::SINGLE_END) {
-			if (lib.unmated().size() == 0) {
-				// Didn't use default single end library type
-				continue;
-			}
-		} else if (lib.format().type == ReadType::PAIRED_END) {
-			if (lib.mates1().size() == 0 or lib.mates2().size() == 0) {
-                // Didn't use default paired-end library type
-				continue;
-			}
-		}
-		libs.push_back(lib);
+std::vector<ReadLibrary>
+extractReadLibraries(boost::program_options::parsed_options& orderedOptions) {
+  // The current (default) format for paired end data
+  LibraryFormat peFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD,
+                         ReadStrandedness::U);
+  // The current (default) format for single end data
+  LibraryFormat seFormat(ReadType::SINGLE_END, ReadOrientation::NONE,
+                         ReadStrandedness::U);
+
+  auto isAutoLibType = [](std::string& fmt) -> bool {
+    return (fmt.length() == 1 and (fmt.front() == 'a' or fmt.front() == 'A'));
+  };
+
+  bool autoLibType{false};
+  std::vector<ReadLibrary> peLibs{ReadLibrary(peFormat)};
+  std::vector<ReadLibrary> seLibs{ReadLibrary(seFormat)};
+  for (auto& opt : orderedOptions.options) {
+    // Update the library type
+    if (opt.string_key == "libType") {
+      if (!isAutoLibType(opt.value[0])) {
+	auto libFmt = parseLibraryFormatStringNew(opt.value[0]);
+	if (libFmt.type == ReadType::PAIRED_END) {
+	  peFormat = libFmt;
+	  peLibs.emplace_back(libFmt);
+	} else {
+	  seFormat = libFmt;
+	  seLibs.emplace_back(libFmt);
 	}
-    size_t numLibs = libs.size();
-	std::cerr << "there " << ((numLibs > 1) ? "are " : "is ") << libs.size() << ((numLibs > 1) ? " libs\n" : " lib\n");
-	return libs;
-}
+      } else {
+	autoLibType = true;
+      }
+    }
 
+    if (opt.string_key == "mates1") {
+      peLibs.back().addMates1(opt.value);
+      if (autoLibType) { peLibs.back().enableAutodetect(); }
+    }
+    if (opt.string_key == "mates2") {
+      peLibs.back().addMates2(opt.value);
+      if (autoLibType) { peLibs.back().enableAutodetect(); }
+    }
+    if (opt.string_key == "unmatedReads") {
+      seLibs.back().addUnmated(opt.value);
+      if (autoLibType) { seLibs.back().enableAutodetect(); }
+    }
+  }
 
+  std::vector<ReadLibrary> libs;
+  libs.reserve(peLibs.size() + seLibs.size());
+  for (auto& lib : boost::range::join(seLibs, peLibs)) {
+    if (lib.format().type == ReadType::SINGLE_END) {
+      if (lib.unmated().size() == 0) {
+        // Didn't use default single end library type
+        continue;
+      }
+    } else if (lib.format().type == ReadType::PAIRED_END) {
+      if (lib.mates1().size() == 0 or lib.mates2().size() == 0) {
+        // Didn't use default paired-end library type
+        continue;
+      }
+    }
+    libs.push_back(lib);
+  }
+  
+  auto log = spdlog::get("jointLog");
+  size_t numLibs = libs.size();
+  if (numLibs == 1) {
+      log->info("There is 1 library.");
+  } else if (numLibs > 1) {
+      log->info("There are {} libraries.", numLibs);
+  } 
+  return libs;
+}
 
 /**
- * This function parses the library format string that specifies the format in which
+ * This function parses the library format string that specifies the format in
+ * which
  * the reads are to be expected.
  */
 LibraryFormat parseLibraryFormatString(std::string& fmt) {
-    using std::vector;
-    using std::string;
-    using std::map;
-    using std::stringstream;
-
-    // inspired by http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c
-
-    // first convert the string to upper-case
-    for (auto& c : fmt) { c = std::toupper(c); }
-    // split on the delimiter ':', and put the key, value (k=v) pairs into a map
-    stringstream ss(fmt);
-    string item;
-    map<string, string> kvmap;
-    while (std::getline(ss, item, ':')) {
-        auto splitPos = item.find('=', 0);
-        string key{item.substr(0, splitPos)};
-        string value{item.substr(splitPos+1)};
-        kvmap[key] = value;
-    }
-
-    map<string, ReadType> readType = {{"SE", ReadType::SINGLE_END}, {"PE", ReadType::PAIRED_END}};
-    map<string, ReadOrientation> orientationType = {{">>", ReadOrientation::SAME},
-                                           {"<>", ReadOrientation::AWAY},
-                                           {"><", ReadOrientation::TOWARD},
-                                           {"*", ReadOrientation::NONE}};
-    map<string, ReadStrandedness> strandType = {{"SA", ReadStrandedness::SA},
-                                    {"AS", ReadStrandedness::AS},
-                                    {"A", ReadStrandedness::A},
-                                    {"S", ReadStrandedness::S},
-                                    {"U", ReadStrandedness::U}};
-    auto it = kvmap.find("T");
-    string typeStr = "";
+  using std::vector;
+  using std::string;
+  using std::map;
+  using std::stringstream;
+
+  // inspired by
+  // http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c
+
+  // first convert the string to upper-case
+  for (auto& c : fmt) {
+    c = std::toupper(c);
+  }
+  // split on the delimiter ':', and put the key, value (k=v) pairs into a map
+  stringstream ss(fmt);
+  string item;
+  map<string, string> kvmap;
+  while (std::getline(ss, item, ':')) {
+    auto splitPos = item.find('=', 0);
+    string key{item.substr(0, splitPos)};
+    string value{item.substr(splitPos + 1)};
+    kvmap[key] = value;
+  }
+
+  map<string, ReadType> readType = {{"SE", ReadType::SINGLE_END},
+                                    {"PE", ReadType::PAIRED_END}};
+  map<string, ReadOrientation> orientationType = {
+      {">>", ReadOrientation::SAME},
+      {"<>", ReadOrientation::AWAY},
+      {"><", ReadOrientation::TOWARD},
+      {"*", ReadOrientation::NONE}};
+  map<string, ReadStrandedness> strandType = {{"SA", ReadStrandedness::SA},
+                                              {"AS", ReadStrandedness::AS},
+                                              {"A", ReadStrandedness::A},
+                                              {"S", ReadStrandedness::S},
+                                              {"U", ReadStrandedness::U}};
+  auto it = kvmap.find("T");
+  string typeStr = "";
+  if (it != kvmap.end()) {
+    typeStr = it->second;
+  } else {
+    it = kvmap.find("TYPE");
     if (it != kvmap.end()) {
-        typeStr = it->second;
-    } else {
-        it = kvmap.find("TYPE");
-        if (it != kvmap.end()) {
-            typeStr = it->second;
-        }
+      typeStr = it->second;
     }
+  }
+
+  if (typeStr != "SE" and typeStr != "PE") {
+    string e = typeStr + " is not a valid read type; must be one of {SE, PE}";
+    throw std::invalid_argument(e);
+  }
 
-    if (typeStr != "SE" and typeStr != "PE") {
-        string e = typeStr + " is not a valid read type; must be one of {SE, PE}";
+  ReadType type =
+      (typeStr == "SE") ? ReadType::SINGLE_END : ReadType::PAIRED_END;
+  ReadOrientation orientation = (type == ReadType::SINGLE_END)
+                                    ? ReadOrientation::NONE
+                                    : ReadOrientation::TOWARD;
+  ReadStrandedness strandedness{ReadStrandedness::U};
+  // Construct the LibraryFormat class from the key, value map
+  for (auto& kv : kvmap) {
+    auto& k = kv.first;
+    auto& v = kv.second;
+    if (k == "O" or k == "ORIENTATION") {
+      auto it = orientationType.find(v);
+      if (it != orientationType.end()) {
+        orientation = orientationType[it->first];
+      } else {
+        string e =
+            v + " is not a valid orientation type; must be one of {>>, <>, ><}";
         throw std::invalid_argument(e);
+      }
     }
-
-    ReadType type = (typeStr == "SE") ? ReadType::SINGLE_END : ReadType::PAIRED_END;
-    ReadOrientation orientation = (type == ReadType::SINGLE_END) ? ReadOrientation::NONE : ReadOrientation::TOWARD;
-    ReadStrandedness strandedness{ReadStrandedness::U};
-    // Construct the LibraryFormat class from the key, value map
-    for (auto& kv : kvmap) {
-        auto& k = kv.first; auto& v = kv.second;
-        if (k == "O" or k == "ORIENTATION") {
-            auto it = orientationType.find(v);
-            if (it != orientationType.end()) { orientation = orientationType[it->first]; } else {
-                string e = v + " is not a valid orientation type; must be one of {>>, <>, ><}";
-                throw std::invalid_argument(e);
-            }
-
-        }
-        if (k == "S" or k == "STRAND") {
-            auto it = strandType.find(v);
-            if (it != strandType.end()) { strandedness = strandType[it->first]; } else {
-                string e = v + " is not a valid strand type; must be one of {SA, AS, S, A, U}";
-                throw std::invalid_argument(e);
-            }
-        }
-
+    if (k == "S" or k == "STRAND") {
+      auto it = strandType.find(v);
+      if (it != strandType.end()) {
+        strandedness = strandType[it->first];
+      } else {
+        string e =
+            v + " is not a valid strand type; must be one of {SA, AS, S, A, U}";
+        throw std::invalid_argument(e);
+      }
     }
-    LibraryFormat lf(type, orientation, strandedness);
-    return lf;
+  }
+  LibraryFormat lf(type, orientation, strandedness);
+  return lf;
 }
 
-
-
 uint64_t encode(uint64_t tid, uint64_t offset) {
-    uint64_t res = (((tid & 0xFFFFFFFF) << 32) | (offset & 0xFFFFFFFF));
-    return res;
+  uint64_t res = (((tid & 0xFFFFFFFF) << 32) | (offset & 0xFFFFFFFF));
+  return res;
 }
 
 uint32_t transcript(uint64_t enc) {
-    uint32_t t = (enc & 0xFFFFFFFF00000000) >> 32;
-    return t;
+  uint32_t t = (enc & 0xFFFFFFFF00000000) >> 32;
+  return t;
 }
 
 uint32_t offset(uint64_t enc) {
-    uint32_t o = enc & 0xFFFFFFFF;
-    return o;
+  uint32_t o = enc & 0xFFFFFFFF;
+  return o;
 }
 
 size_t numberOfReadsInFastaFile(const std::string& fname) {
-    constexpr size_t bufferSize = 16184;
-    char buffer[bufferSize];
-    std::ifstream ifile(fname, std::ifstream::in);
-    ifile.rdbuf()->pubsetbuf(buffer, bufferSize);
-
-    size_t numReads = 0;
-    std::string s;
-    while (ifile >> s) { if (s.front() == '>') { ++numReads; } }
+  constexpr size_t bufferSize = 16184;
+  char buffer[bufferSize];
+  std::ifstream ifile(fname, std::ifstream::in);
+  ifile.rdbuf()->pubsetbuf(buffer, bufferSize);
+
+  size_t numReads = 0;
+  std::string s;
+  while (ifile >> s) {
+    if (s.front() == '>') {
+      ++numReads;
+    }
+  }
 
-    ifile.close();
+  ifile.close();
 
-    return numReads;
+  return numReads;
 }
 
-bool readKmerOrder( const std::string& fname, std::vector<uint64_t>& kmers ) {
+bool readKmerOrder(const std::string& fname, std::vector<uint64_t>& kmers) {
 
   std::ifstream mlist(fname, std::ios::in | std::ios::binary);
   // Get the number of kmers from file
   size_t numKmers{0};
-  mlist.read( reinterpret_cast<char*>( &numKmers ), sizeof( size_t ) );
+  mlist.read(reinterpret_cast<char*>(&numKmers), sizeof(size_t));
 
   // Resize the array that will hold the sorted kmers
   kmers.resize(numKmers, 0);
-  mlist.read( reinterpret_cast<char*>( &kmers[0] ), sizeof( uint64_t) * kmers.size() );
+  mlist.read(reinterpret_cast<char*>(&kmers[0]),
+             sizeof(uint64_t) * kmers.size());
 
   mlist.close();
 
   return true;
 }
 
-template <template<typename> class S, typename T>
-bool overlap( const S<T> &a, const S<T> &b ) {
-    // Query from the smaller set to the larger set
-    if ( a.size() <= b.size() ) {
-        for ( auto & ae : a ) {
-            if (b.find(ae) != b.end()) {
-                return true;
-            }
-        }
-    } else {
-        for ( auto & be : b ) {
-            if (a.find(be) != b.end()) {
-                return true;
-            }
-        }
+template <template <typename> class S, typename T>
+bool overlap(const S<T>& a, const S<T>& b) {
+  // Query from the smaller set to the larger set
+  if (a.size() <= b.size()) {
+    for (auto& ae : a) {
+      if (b.find(ae) != b.end()) {
+        return true;
+      }
     }
-    // If nothing from the smaller set is in the larger set, then they don't overlap
-    return false;
+  } else {
+    for (auto& be : b) {
+      if (a.find(be) != b.end()) {
+        return true;
+      }
+    }
+  }
+  // If nothing from the smaller set is in the larger set, then they don't
+  // overlap
+  return false;
 }
 
+TranscriptGeneMap transcriptGeneMapFromGTF(const std::string& fname,
+                                           std::string key) {
 
-TranscriptGeneMap transcriptGeneMapFromGTF(const std::string& fname, std::string key) {
-
-    using std::unordered_set;
-    using std::unordered_map;
-    using std::vector;
-    using std::tuple;
-    using std::string;
-    using std::get;
-
-    // Use GffReader to read the file
-    GffReader reader(const_cast<char*>(fname.c_str()));
-    // Remember the optional attributes
-    reader.readAll(true);
-
-    struct TranscriptKeyPair {
-        const char* transcript_id;
-        const char* key;
-        TranscriptKeyPair(const char* t, const char* k) :
-            transcript_id(t), key(k) {}
-    };
-
-    // The user can group transcripts by gene_id, gene_name, or
-    // an optinal attribute that they provide as a string.
-    enum class TranscriptKey { GENE_ID, GENE_NAME, DYNAMIC };
-
-    // Select the proper attribute by which to group
-    TranscriptKey tkey = TranscriptKey::GENE_ID;
+  using std::unordered_set;
+  using std::unordered_map;
+  using std::vector;
+  using std::tuple;
+  using std::string;
+  using std::get;
+
+  // Use GffReader to read the file
+  GffReader reader(const_cast<char*>(fname.c_str()));
+  // Remember the optional attributes
+  reader.readAll(true);
+
+  struct TranscriptKeyPair {
+    const char* transcript_id;
+    const char* key;
+    TranscriptKeyPair(const char* t, const char* k)
+        : transcript_id(t), key(k) {}
+  };
+
+  // The user can group transcripts by gene_id, gene_name, or
+  // an optinal attribute that they provide as a string.
+  enum class TranscriptKey { GENE_ID, GENE_NAME, DYNAMIC };
+
+  // Select the proper attribute by which to group
+  TranscriptKey tkey = TranscriptKey::GENE_ID;
+
+  if (key == "gene_id") {
+  } else if (key == "gene_name") {
+    tkey = TranscriptKey::GENE_NAME;
+  } else {
+    tkey = TranscriptKey::DYNAMIC;
+  }
 
-    if (key == "gene_id") {
-    } else if (key == "gene_name") {
-        tkey = TranscriptKey::GENE_NAME;
-    } else {
-        tkey = TranscriptKey::DYNAMIC;
-    }
-
-    // Iterate over all transcript features and build the
-    // transcript <-> key vector.
-    auto nfeat = reader.gflst.Count();
-    std::vector<TranscriptKeyPair> feats;
-    for (int i=0; i < nfeat; ++i) {
-        auto f = reader.gflst[i];
-        if (f->isTranscript()) {
-            const char* keyStr;
-            switch (tkey) {
-                case TranscriptKey::GENE_ID:
-                    keyStr = f->getGeneID();
-                    break;
-                case TranscriptKey::GENE_NAME:
-                    keyStr = f->getGeneName();
-                    break;
-                case TranscriptKey::DYNAMIC:
-                    keyStr = f->getAttr(key.c_str());
-                    break;
-            }
-            feats.emplace_back(f->getID(), keyStr);
-        }
+  // Iterate over all transcript features and build the
+  // transcript <-> key vector.
+  auto nfeat = reader.gflst.Count();
+  std::vector<TranscriptKeyPair> feats;
+  for (int i = 0; i < nfeat; ++i) {
+    auto f = reader.gflst[i];
+    if (f->isTranscript()) {
+      const char* keyStr;
+      switch (tkey) {
+      case TranscriptKey::GENE_ID:
+        keyStr = f->getGeneID();
+        break;
+      case TranscriptKey::GENE_NAME:
+        keyStr = f->getGeneName();
+        break;
+      case TranscriptKey::DYNAMIC:
+        keyStr = f->getAttr(key.c_str());
+        break;
+      }
+      feats.emplace_back(f->getID(), keyStr);
     }
+  }
 
-    // Given the transcript <-> key vector, build the
-    // TranscriptGeneMap.
-
-    IndexVector t2g;
-    NameVector transcriptNames;
-    NameVector geneNames;
-
-    // holds the mapping from transcript ID to gene ID
-    IndexVector t2gUnordered;
-    // holds the set of gene IDs
-    unordered_map<string, size_t> geneNameToID;
-
-    // To read the input and assign ids
-    size_t transcriptCounter = 0;
-    size_t geneCounter = 0;
-    string transcript;
-    string gene;
-
-    std::sort( feats.begin(), feats.end(),
-    []( const TranscriptKeyPair & a, const TranscriptKeyPair & b) -> bool {
-        return std::strcmp(a.transcript_id, b.transcript_id) < 0;
-    } );
-
-    std::string currentTranscript = "";
-    for ( auto & feat : feats ) {
-
-        std::string gene(feat.key);
-        std::string transcript(feat.transcript_id);
-
-        if ( transcript != currentTranscript ) {
-            auto geneIt = geneNameToID.find(gene);
-            size_t geneID = 0;
-
-            if ( geneIt == geneNameToID.end() ) {
-                // If we haven't seen this gene yet, give it a new ID
-                geneNameToID[gene] = geneCounter;
-                geneID = geneCounter;
-                geneNames.push_back(gene);
-                ++geneCounter;
-            } else {
-                // Otherwise lookup the ID
-                geneID = geneIt->second;
-            }
-
-            transcriptNames.push_back(transcript);
-            t2g.push_back(geneID);
+  // Given the transcript <-> key vector, build the
+  // TranscriptGeneMap.
+
+  IndexVector t2g;
+  NameVector transcriptNames;
+  NameVector geneNames;
+
+  // holds the mapping from transcript ID to gene ID
+  IndexVector t2gUnordered;
+  // holds the set of gene IDs
+  unordered_map<string, size_t> geneNameToID;
+
+  // To read the input and assign ids
+  size_t transcriptCounter = 0;
+  size_t geneCounter = 0;
+  string transcript;
+  string gene;
+
+  std::sort(feats.begin(), feats.end(),
+            [](const TranscriptKeyPair& a, const TranscriptKeyPair& b) -> bool {
+              return std::strcmp(a.transcript_id, b.transcript_id) < 0;
+            });
+
+  std::string currentTranscript = "";
+  for (auto& feat : feats) {
+
+    std::string gene(feat.key);
+    std::string transcript(feat.transcript_id);
+
+    if (transcript != currentTranscript) {
+      auto geneIt = geneNameToID.find(gene);
+      size_t geneID = 0;
+
+      if (geneIt == geneNameToID.end()) {
+        // If we haven't seen this gene yet, give it a new ID
+        geneNameToID[gene] = geneCounter;
+        geneID = geneCounter;
+        geneNames.push_back(gene);
+        ++geneCounter;
+      } else {
+        // Otherwise lookup the ID
+        geneID = geneIt->second;
+      }
 
-            //++transcriptID;
-            currentTranscript = transcript;
-        }
+      transcriptNames.push_back(transcript);
+      t2g.push_back(geneID);
 
+      //++transcriptID;
+      currentTranscript = transcript;
     }
+  }
 
-    return TranscriptGeneMap(transcriptNames, geneNames, t2g);
-
+  return TranscriptGeneMap(transcriptNames, geneNames, t2g);
 }
 
+TranscriptGeneMap readTranscriptToGeneMap(std::ifstream& ifile) {
 
-TranscriptGeneMap readTranscriptToGeneMap( std::ifstream &ifile ) {
-
-    using std::unordered_set;
-    using std::unordered_map;
-    using std::vector;
-    using std::tuple;
-    using std::string;
-    using std::get;
-
-    using NameID = tuple<string, size_t>;
-
-    IndexVector t2g;
-    NameVector transcriptNames;
-    NameVector geneNames;
-
-    // holds the transcript name ID mapping
-    vector<NameID> transcripts;
-    // holds the mapping from transcript ID to gene ID
-    IndexVector t2gUnordered;
-    // holds the set of gene IDs
-    unordered_map<string, size_t> geneNameToID;
-
-    // To read the input and assign ids
-    size_t transcriptCounter = 0;
-    size_t geneCounter = 0;
-    string transcript;
-    string gene;
-
-    while ( ifile >> transcript >> gene ) {
-        // The transcript and it's ID
-        transcripts.push_back( make_tuple(transcript, transcriptCounter) );
-
-        auto geneIt = geneNameToID.find(gene);
-        size_t geneID = 0;
-
-        if ( geneIt == geneNameToID.end() ) {
-            // If we haven't seen this gene yet, give it a new ID
-            geneNameToID[gene] = geneCounter;
-            geneID = geneCounter;
-            geneNames.push_back(gene);
-            ++geneCounter;
-        } else {
-            // Otherwise lookup the ID
-            geneID = geneIt->second;
-        }
+  using std::unordered_set;
+  using std::unordered_map;
+  using std::vector;
+  using std::tuple;
+  using std::string;
+  using std::get;
+
+  using NameID = tuple<string, size_t>;
+
+  IndexVector t2g;
+  NameVector transcriptNames;
+  NameVector geneNames;
+
+  // holds the transcript name ID mapping
+  vector<NameID> transcripts;
+  // holds the mapping from transcript ID to gene ID
+  IndexVector t2gUnordered;
+  // holds the set of gene IDs
+  unordered_map<string, size_t> geneNameToID;
+
+  // To read the input and assign ids
+  size_t transcriptCounter = 0;
+  size_t geneCounter = 0;
+  string transcript;
+  string gene;
+
+  while (ifile >> transcript >> gene) {
+    // The transcript and it's ID
+    transcripts.push_back(make_tuple(transcript, transcriptCounter));
+
+    auto geneIt = geneNameToID.find(gene);
+    size_t geneID = 0;
+
+    if (geneIt == geneNameToID.end()) {
+      // If we haven't seen this gene yet, give it a new ID
+      geneNameToID[gene] = geneCounter;
+      geneID = geneCounter;
+      geneNames.push_back(gene);
+      ++geneCounter;
+    } else {
+      // Otherwise lookup the ID
+      geneID = geneIt->second;
+    }
 
-        // Map the transcript to the gene in terms of their IDs
-        t2gUnordered.push_back(geneID);
+    // Map the transcript to the gene in terms of their IDs
+    t2gUnordered.push_back(geneID);
 
-        ++transcriptCounter;
-    }
+    ++transcriptCounter;
+  }
 
-    std::sort( transcripts.begin(), transcripts.end(),
-               []( const NameID & a, const NameID & b) -> bool { return get<0>(a) < get<0>(b); } );
+  std::sort(transcripts.begin(), transcripts.end(),
+            [](const NameID& a, const NameID& b) -> bool {
+              return get<0>(a) < get<0>(b);
+            });
+
+  // Resize these vectors for fast access
+  transcriptNames.resize(t2gUnordered.size());
+  t2g.resize(t2gUnordered.size());
+
+  for (size_t newID = 0; newID < transcripts.size(); ++newID) {
+    // For each transcript, map it to the appropriate gene
+    string oldName;
+    size_t oldID;
+    std::tie(oldName, oldID) = transcripts[newID];
+    t2g[newID] = t2gUnordered[oldID];
+    transcriptNames[newID] = oldName;
+  }
 
-    // Resize these vectors for fast access
-    transcriptNames.resize(t2gUnordered.size());
-    t2g.resize(t2gUnordered.size());
+  return TranscriptGeneMap(transcriptNames, geneNames, t2g);
+}
 
-    for ( size_t newID = 0; newID < transcripts.size(); ++newID ) {
-        // For each transcript, map it to the appropriate gene
-        string oldName; size_t oldID;
-        std::tie(oldName, oldID) = transcripts[newID];
-        t2g[newID] = t2gUnordered[oldID];
-        transcriptNames[newID] = oldName;
+TranscriptGeneMap
+transcriptToGeneMapFromFasta(const std::string& transcriptsFile) {
+  using std::vector;
+  using stream_manager = jellyfish::stream_manager<char**>;
+  using sequence_parser = jellyfish::whole_sequence_parser<stream_manager>;
+  namespace bfs = boost::filesystem;
+
+  NameVector transcriptNames;
+  NameVector geneNames{"gene"};
+
+  vector<bfs::path> paths{transcriptsFile};
+
+  // Create a jellyfish parser
+  const int concurrentFile{1};
+  char** fnames = new char*[1];
+  fnames[0] = const_cast<char*>(transcriptsFile.c_str());
+  stream_manager streams(fnames, fnames + 1, concurrentFile);
+
+  size_t maxReadGroupSize{100};
+  sequence_parser parser(4, maxReadGroupSize, concurrentFile, streams);
+
+  // while there are transcripts left to process
+  while (true) {
+    sequence_parser::job j(parser);
+    // If this job is empty, then we're done
+    if (j.is_empty()) {
+      break;
     }
 
-    return TranscriptGeneMap(transcriptNames, geneNames, t2g);
-}
+    for (size_t i = 0; i < j->nb_filled; ++i) {
+      // The transcript name
+      std::string fullHeader(j->data[i].header);
+      std::string header = fullHeader.substr(0, fullHeader.find(' '));
+      transcriptNames.emplace_back(header);
+    }
+  }
 
+  // Sort the transcript names
+  std::sort(transcriptNames.begin(), transcriptNames.end());
 
-TranscriptGeneMap transcriptToGeneMapFromFasta( const std::string& transcriptsFile ) {
-    using std::vector;
-    using stream_manager = jellyfish::stream_manager<char**>;
-    using sequence_parser = jellyfish::whole_sequence_parser<stream_manager>;
-    namespace bfs = boost::filesystem;
+  // Since we have no real gene groupings, the t2g vector is trivial,
+  // everything maps to gene 0.
+  IndexVector t2g(transcriptNames.size(), 0);
 
-    NameVector transcriptNames;
-    NameVector geneNames {"gene"};
+  return TranscriptGeneMap(transcriptNames, geneNames, t2g);
+}
 
-    vector<bfs::path> paths{transcriptsFile};
+class ExpressionRecord {
+public:
+  ExpressionRecord(const std::string& targetIn, uint32_t lengthIn,
+                   double effLengthIn, std::vector<double>& expValsIn)
+      : target(targetIn), length(lengthIn), effLength(effLengthIn),
+        expVals(expValsIn) {}
+
+  ExpressionRecord(ExpressionRecord&& other) {
+    std::swap(target, other.target);
+    length = other.length;
+    effLength = other.effLength;
+    std::swap(expVals, other.expVals);
+  }
 
-    // Create a jellyfish parser
-    const int concurrentFile{1};
-    char** fnames = new char*[1];
-    fnames[0] = const_cast<char*>(transcriptsFile.c_str());
-    stream_manager streams(fnames, fnames + 1, concurrentFile);
+  ExpressionRecord(std::vector<std::string>& inputLine) {
+    if (inputLine.size() < 3) {
+      std::string err("Any expression line must contain at least 3 tokens");
+      throw std::invalid_argument(err);
+    } else {
+      auto it = inputLine.begin();
+      target = *it;
+      ++it;
+      length = std::stoi(*it);
+      ++it;
+      effLength = std::stod(*it);
+      ++it;
+      for (; it != inputLine.end(); ++it) {
+        expVals.push_back(std::stod(*it));
+      }
+    }
+  }
 
-    size_t maxReadGroupSize{100};
-    sequence_parser parser(4, maxReadGroupSize, concurrentFile, streams);
+  std::string target;
+  uint32_t length;
+  double effLength;
+  std::vector<double> expVals;
+};
 
-    // while there are transcripts left to process
-    while (true) {
-        sequence_parser::job j(parser);
-        // If this job is empty, then we're done
-        if (j.is_empty()) { break; }
+// From : http://stackoverflow.com/questions/9435385/split-a-string-using-c11
+std::vector<std::string> split(const std::string& str,
+                               int delimiter(int) = ::isspace) {
+  using namespace std;
+  vector<string> result;
+  auto e = str.end();
+  auto i = str.begin();
+  while (i != e) {
+    i = find_if_not(i, e, delimiter);
+    if (i == e)
+      break;
+    auto j = find_if(i, e, delimiter);
+    result.push_back(string(i, j));
+    i = j;
+  }
+  return result;
+}
 
-        for (size_t i=0; i < j->nb_filled; ++i) {
-            // The transcript name
-            std::string fullHeader(j->data[i].header);
-            std::string header = fullHeader.substr(0, fullHeader.find(' '));
-            transcriptNames.emplace_back(header);
-        }
+/**
+ * Validate the options for quasi-mapping-based salmon, and create the necessary
+ *output directories and
+ * logging infrastructure.
+ **/
+bool processQuantOptions(SalmonOpts& sopt,
+                         boost::program_options::variables_map& vm,
+                         int32_t numBiasSamples) {
+  using std::cerr;
+  using std::vector;
+  using std::string;
+  namespace bfs = boost::filesystem;
+  namespace po = boost::program_options;
+  // Set the atomic variable numBiasSamples from the local version
+  sopt.numBiasSamples.store(numBiasSamples);
+
+  // Get the time at the start of the run
+  std::time_t result = std::time(NULL);
+  sopt.runStartTime = std::string(std::asctime(std::localtime(&result)));
+  sopt.runStartTime.pop_back(); // remove the newline
+
+  // Verify the geneMap before we start doing any real work.
+  bfs::path geneMapPath;
+  if (vm.count("geneMap")) {
+    // Make sure the provided file exists
+    geneMapPath = vm["geneMap"].as<std::string>();
+    if (!bfs::exists(geneMapPath)) {
+      std::cerr << "Could not find transcript <=> gene map file " << geneMapPath
+                << "\n";
+      std::cerr << "Exiting now: please either omit the \'geneMap\' option or "
+                   "provide a valid file\n";
+      return false;
     }
+    sopt.geneMapPath = geneMapPath;
+  }
 
-    // Sort the transcript names
-    std::sort(transcriptNames.begin(), transcriptNames.end());
+  bfs::path outputDirectory(vm["output"].as<std::string>());
+  bfs::create_directories(outputDirectory);
+  if (!(bfs::exists(outputDirectory) and bfs::is_directory(outputDirectory))) {
+    std::cerr << "Couldn't create output directory " << outputDirectory << "\n";
+    std::cerr << "exiting\n";
+    return false;
+  }
 
-    // Since we have no real gene groupings, the t2g vector is trivial,
-    // everything maps to gene 0.
-    IndexVector t2g(transcriptNames.size(), 0);
+  bfs::path indexDirectory(vm["index"].as<string>());
+  bfs::path logDirectory = outputDirectory / "logs";
 
-    return TranscriptGeneMap(transcriptNames, geneNames, t2g);
-}
+  sopt.indexDirectory = indexDirectory;
+  sopt.outputDirectory = outputDirectory;
 
+  // Create the logger and the logging directory
+  bfs::create_directories(logDirectory);
+  if (!(bfs::exists(logDirectory) and bfs::is_directory(logDirectory))) {
+    std::cerr << "Couldn't create log directory " << logDirectory << "\n";
+    std::cerr << "exiting\n";
+    return false;
+  }
 
-class ExpressionRecord {
-  public:
-    ExpressionRecord(const std::string& targetIn, uint32_t lengthIn, double effLengthIn,
-	std::vector<double>& expValsIn) :
-      target(targetIn), length(lengthIn), effLength(effLengthIn), expVals(expValsIn) {}
+  if (!sopt.quiet) {
+    std::cout << "Logs will be written to " << logDirectory.string() << "\n";
+  }
 
-    ExpressionRecord( ExpressionRecord&& other ) {
-      std::swap(target, other.target);
-      length = other.length;
-      effLength = other.effLength;
-      std::swap(expVals, other.expVals);
-    }
+  bfs::path logPath = logDirectory / "salmon_quant.log";
+  // must be a power-of-two
 
-    ExpressionRecord(std::vector<std::string>& inputLine) {
-      if (inputLine.size() < 3) {
-	std::string err ("Any expression line must contain at least 3 tokens");
-	throw std::invalid_argument(err);
-      } else {
-	auto it = inputLine.begin();
-	target = *it; ++it;
-	length = std::stoi(*it); ++it;
-	effLength = std::stod(*it); ++it;
-	for (; it != inputLine.end(); ++it) {
-	  expVals.push_back(std::stod(*it));
-	}
-      }
-    }
+  size_t max_q_size = 2097152;
+  spdlog::set_async_mode(max_q_size);
 
-    std::string target;
-    uint32_t length;
-    double effLength;
-    std::vector<double> expVals;
-};
+  auto fileSink = std::make_shared<spdlog::sinks::simple_file_sink_mt>(
+      logPath.string(), true);
+  auto rawConsoleSink = std::make_shared<spdlog::sinks::stdout_sink_mt>();
+  auto consoleSink =
+      std::make_shared<spdlog::sinks::ansicolor_sink>(rawConsoleSink);
+  auto consoleLog = spdlog::create("stderrLog", {consoleSink});
+  auto fileLog = spdlog::create("fileLog", {fileSink});
+  auto jointLog = spdlog::create("jointLog", {fileSink, consoleSink});
 
+  // If we're being quiet, the only emit errors.
+  if (sopt.quiet) {
+    jointLog->set_level(spdlog::level::err);
+  }
 
-// From : http://stackoverflow.com/questions/9435385/split-a-string-using-c11
-std::vector<std::string> split(const std::string& str, int delimiter(int) = ::isspace){
-    using namespace std;
-    vector<string> result;
-    auto e=str.end();
-    auto i=str.begin();
-    while (i != e) {
-        i = find_if_not(i,e, delimiter);
-        if (i == e) break;
-        auto j = find_if(i,e, delimiter);
-        result.push_back(string(i,j));
-        i = j;
-    }
-    return result;
-}
+  sopt.jointLog = jointLog;
+  sopt.fileLog = fileLog;
 
-std::vector<int32_t> samplesFromLogPMF(FragmentLengthDistribution* fld, int32_t numSamples) {
-    std::vector<double> logPMF;
-    size_t minVal;
-    size_t maxVal;
-    double logFLDMean = fld->mean();
-    fld->dumpPMF(logPMF, minVal, maxVal);
-    double sum = salmon::math::LOG_0;
-    for (auto v : logPMF) {
-        sum = salmon::math::logAdd(sum, v);
+  // Create the file (and logger) for outputting unmapped reads, if the user has
+  // asked for it.
+  if (sopt.writeUnmappedNames) {
+    boost::filesystem::path auxDir = sopt.outputDirectory / sopt.auxDir;
+    bool auxSuccess = boost::filesystem::is_directory(auxDir);
+    if (!auxSuccess) {
+      auxSuccess = boost::filesystem::create_directories(auxDir);
     }
-    for (auto& v : logPMF) {
-        v -= sum;
+    if (auxSuccess) {
+      bfs::path unmappedNameFile = auxDir / "unmapped_names.txt";
+      std::ofstream* outFile = new std::ofstream(unmappedNameFile.string());
+
+      // Must be a power of 2
+      size_t queueSize{268435456};
+
+      spdlog::set_async_mode(queueSize);
+      auto outputSink =
+          std::make_shared<spdlog::sinks::ostream_sink_mt>(*outFile);
+
+      std::shared_ptr<spdlog::logger> outLog =
+          std::make_shared<spdlog::logger>("unmappedLog", outputSink);
+      spdlog::register_logger(outLog);
+      outLog->set_pattern("%v");
+      sopt.unmappedFile.reset(outFile);
+    } else {
+      jointLog->error("Couldn't create auxiliary directory in which to place "
+                      "\"unmapped_names.txt\"");
     }
+  }
+
+  // Verify that no inconsistent options were provided
+  if (sopt.numGibbsSamples > 0 and sopt.numBootstraps > 0) {
+    jointLog->error("You cannot perform both Gibbs sampling and bootstrapping. "
+                    "Please choose one.");
+    jointLog->flush();
+    return false;
+  }
 
-    // Create the non-logged pmf
-    std::vector<double> pmf(maxVal + 1, 0.0);
-    for (size_t i = minVal; i < maxVal; ++i) {
-        pmf[i] = std::exp(logPMF[i-minVal]);
+  {
+    if (sopt.noFragLengthDist and !sopt.noEffectiveLengthCorrection) {
+      jointLog->info(
+          "Error: You cannot enable --noFragLengthDist without "
+          "also enabling --noEffectiveLengthCorrection; exiting!\n");
+      jointLog->flush();
+      return false;
     }
+  }
 
-    // generate samples
-    std::random_device rd;
-    std::mt19937 gen(rd());
-    std::discrete_distribution<int32_t> dist(pmf.begin(), pmf.end());
+  /** WARN about any deprecated options! **/
+  //
+  if (sopt.useFSPD) {
+    jointLog->error("The --useFSPD option has been deprecated.  "
+		    "Positional bias modeling will return under the --posBias flag in a future release. "
+		    "For the time being, please remove the --useFSPD flag from your command.");
+    jointLog->flush();
+    return false;
+  }
+  
+  // maybe arbitrary, but if it's smaller than this, consider it
+  // equal to LOG_0.
+  if (sopt.incompatPrior < 1e-320 or sopt.incompatPrior == 0.0) {
+      jointLog->info("Fragment incompatibility prior below threshold.  Incompatible fragments will be ignored.");
+      sopt.incompatPrior = salmon::math::LOG_0;
+      sopt.ignoreIncompat = true;
+  } else {
+      sopt.incompatPrior = std::log(sopt.incompatPrior);
+      sopt.ignoreIncompat = false;
+  }
 
-    std::vector<int32_t> samples(pmf.size());
-    for (int32_t i = 0; i < numSamples; ++i) {
-        ++samples[dist(gen)];
-    }
-    return samples;
+  return true;
 }
 
-
 /**
  * Computes (and returns) new effective lengths for the transcripts
  * based on the current abundance estimates (alphas) and the current
- * effective lengths (effLensIn).  This approach is based on the one
- * taken in Kallisto, and seems to work well given its low computational
- * requirements.
+ * effective lengths (effLensIn).  This approach to sequence-specifc bias is
+ * based on the one taken in Roberts et al. (2011) [1].
+ * Here, we also consider fragment-GC bias which uses a novel method extending
+ * the idea of adjusting the effective lengths.
+ *
+ * [1] Roberts, Adam, et al. "Improving RNA-Seq expression estimates by
+ * correcting for fragment bias."
+ *     Genome Biol 12.3 (2011): R22.
  */
 template <typename AbundanceVecT, typename ReadExpT>
-Eigen::VectorXd updateEffectiveLengths(ReadExpT& readExp,
-    Eigen::VectorXd& effLensIn,
-    AbundanceVecT& alphas,
-    std::vector<double>& transcriptKmerDist) {
+Eigen::VectorXd updateEffectiveLengths(SalmonOpts& sopt, ReadExpT& readExp,
+                                       Eigen::VectorXd& effLensIn,
+                                       AbundanceVecT& alphas, bool writeBias) {
+
   using std::vector;
+  using BlockedIndexRange = tbb::blocked_range<size_t>;
+
   double minAlpha = 1e-8;
+  double minCDFMass = 1e-10;
+  uint32_t gcSamp{sopt.pdfSampFactor};
+  bool gcBiasCorrect{sopt.gcBiasCorrect};
+  bool seqBiasCorrect{sopt.biasCorrect};
+  bool posBiasCorrect{sopt.posBiasCorrect};
+
+  double probFwd = readExp.gcFracFwd();
+  double probRC = readExp.gcFracRC();
+
+  if (gcBiasCorrect and probFwd < 0.0) {
+    sopt.jointLog->warn("Had no fragments from which to estimate "
+                        "fwd vs. rev-comp mapping rate.  Skipping "
+                        "sequence-specific / fragment-gc bias correction");
+    return effLensIn;
+  }
 
   // calculate read bias normalization factor -- total count in read
   // distribution.
-  auto& readBias = readExp.readBias();
-  int32_t K = readBias.getK();
-  double readNormFactor = static_cast<double>(readBias.totalCount());
+  auto& obs5 = readExp.readBiasModelObserved(salmon::utils::Direction::FORWARD);
+  auto& obs3 =
+      readExp.readBiasModelObserved(salmon::utils::Direction::REVERSE_COMPLEMENT);
+  obs5.normalize();
+  obs3.normalize();
+
+  auto& pos5Obs = readExp.posBias(salmon::utils::Direction::FORWARD);
+  auto& pos3Obs = readExp.posBias(salmon::utils::Direction::REVERSE_COMPLEMENT);
+
+  int32_t K =
+      seqBiasCorrect ? static_cast<int32_t>(obs5.getContextLength()) : 1;
+  int32_t contextUpstream = seqBiasCorrect ? obs5.contextBefore(false) : 0;
+
+  FragmentLengthDistribution& fld = *(readExp.fragmentLengthDistribution());
+
+  // The *expected* biases from GC effects
+  auto& transcriptGCDist = readExp.expectedGCBias();
+  auto& gcCounts = readExp.observedGC();
+  double readGCNormFactor = 0.0;
+  int32_t fldLow{0};
+  int32_t fldHigh{1};
+
+  double quantileCutoffLow = 0.005;
+  double quantileCutoffHigh = 1.0 - quantileCutoffLow;
+
+  // The CDF and PDF of the fragment length distribution
+  std::vector<double> cdf(fld.maxVal() + 1, 0.0);
+  std::vector<double> pdf(fld.maxVal() + 1, 0.0);
+  {
+    transcriptGCDist.reset(distribution_utils::DistributionSpace::LINEAR);
+
+    bool lb{false};
+    bool ub{false};
+    for (size_t i = 0; i <= fld.maxVal(); ++i) {
+      pdf[i] = std::exp(fld.pmf(i));
+      cdf[i] = (i > 0) ? cdf[i - 1] + pdf[i] : pdf[i];
+      auto density = cdf[i];
+
+      if (!lb and density >= quantileCutoffLow) {
+        lb = true;
+        fldLow = i;
+      }
+      if (!ub and density >= quantileCutoffHigh) {
+        ub = true;
+        fldHigh = i;
+      }
+    }
 
-  // Reset the transcript (normalized) counts
-  transcriptKmerDist.clear();
-  transcriptKmerDist.resize(constExprPow(4, K), 1.0);
+    /*
+    if (gcBiasCorrect) {
+      for (auto& c : gcCounts) {
+        readGCNormFactor += c;
+      }
+    }
+    */
+  }
 
   // Make this const so there are no shenanigans
   const auto& transcripts = readExp.transcripts();
@@ -1154,131 +1471,559 @@ Eigen::VectorXd updateEffectiveLengths(ReadExpT& readExp,
   // The effective lengths adjusted for bias
   Eigen::VectorXd effLensOut(effLensIn.size());
 
-  for(size_t it=0; it < transcripts.size(); ++it) {
+  // How much to cut off
+  int32_t trunc = K;
 
-    // First in the forward direction
-    int32_t refLen = static_cast<int32_t>(transcripts[it].RefLength);
-    int32_t elen = static_cast<int32_t>(transcripts[it].EffectiveLength);
+  using GCBiasVecT = std::vector<double>;
+  using SeqBiasVecT = std::vector<double>;
 
-    // How much of this transcript (beginning and end) should
-    // not be considered
-    int32_t unprocessedLen = std::max(0, refLen - elen);
-
-    // Skip transcripts with trivial expression or that are too
-    // short.
-    if (alphas[it] < minAlpha or unprocessedLen <= 0) {
-      continue;
+  /**
+   * These will store "thread local" parameters
+   * for the appropriate bias terms.
+   */
+  class CombineableBiasParams {
+  public:
+    CombineableBiasParams(uint32_t K, size_t numCondBins, size_t numGCBins) :
+      expectGC(numCondBins, numGCBins, distribution_utils::DistributionSpace::LINEAR) {
+      expectPos5 = std::vector<SimplePosBias>(5);
+      expectPos3 = std::vector<SimplePosBias>(5);
     }
 
-    // Otherwise, proceed with the following weight.
-    double contribution = 0.5*(alphas[it]/effLensIn(it));
+    std::vector<SimplePosBias> expectPos5;
+    std::vector<SimplePosBias> expectPos3;
+    SBModel expectSeqFW;
+    SBModel expectSeqRC;
+    GCFragModel expectGC;
+  };
 
-    // From the start of the transcript up until the last valid
-    // kmer.
-    bool firstKmer{true};
-    uint32_t idx{0};
-
-    // This transcript's sequence
-    const char* tseq = transcripts[it].Sequence;
-    if (!tseq) {
-        std::cerr << "Transcript " << transcripts[it].RefName << " had no sequence available.\n";
-        std::cerr << "To enable sequence-specific bias correction, you must provide a "
-                  << "reference file with sequences for all transcripts.\n";
-        return effLensIn;
+  auto revComplement = [](const char* s, int32_t l, std::string& o) -> void {
+    if (l > o.size()) {
+      o.resize(l, 'A');
     }
-
-    // From the start of the transcript through the effective length
-    for (int32_t i = 0; i < elen - K; ++i) {
-      if (firstKmer) {
-	idx = indexForKmer(tseq, K, Direction::FORWARD);
-	firstKmer = false;
-      } else {
-	idx = nextKmerIndex(idx, tseq[i-1+K], K, Direction::FORWARD);
+    int32_t j = 0;
+    for (int32_t i = l - 1; i >= 0; --i, ++j) {
+      switch (s[i]) {
+      case 'A':
+      case 'a':
+        o[j] = 'T';
+        break;
+      case 'C':
+      case 'c':
+        o[j] = 'G';
+        break;
+      case 'T':
+      case 't':
+        o[j] = 'A';
+        break;
+      case 'G':
+      case 'g':
+        o[j] = 'C';
+        break;
+      default:
+        o[j] = 'N';
+        break;
       }
-      transcriptKmerDist[idx] += contribution;
-    }
-
-    // Then in the reverse complement direction
-    firstKmer = true;
-    idx = 0;
-    // Start from the end and go until the fragment length
-    // distribution says we should stop
-    for (int32_t i = refLen - K - 1; i >= unprocessedLen; --i) {
-      if (firstKmer) {
-	idx = indexForKmer(tseq + i, K, Direction::REVERSE_COMPLEMENT);
-	firstKmer = false;
-      } else {
-	idx = nextKmerIndex(idx, tseq[i], K, Direction::REVERSE_COMPLEMENT);
+    }
+  };
+
+  int outsideContext{3};
+  int insideContext{2};
+
+  int contextSize = outsideContext + insideContext;
+  double cscale = 100.0 / (2 * contextSize);
+  auto populateContextCounts = [outsideContext, insideContext, contextSize](
+      const Transcript& txp, const char* tseq, Eigen::VectorXd& contextCountsFP,
+      Eigen::VectorXd& contextCountsTP) {
+    auto refLen = static_cast<int32_t>(txp.RefLength);
+    auto lastPos = refLen - 1;
+    if (refLen > contextSize) {
+      int windowStart = -1;
+      int windowEnd = contextSize - 1;
+      int fp = outsideContext;
+      int tp = insideContext - 1;
+      double count = txp.gcAt(windowEnd);
+      contextCountsFP[fp] = count;
+      contextCountsTP[tp] = count;
+      ++windowStart;
+      ++windowEnd;
+      ++fp;
+      ++tp;
+      for (; tp < refLen; ++windowStart, ++windowEnd, ++fp, ++tp) {
+        switch (tseq[windowStart]) {
+        case 'G':
+        case 'g':
+        case 'C':
+        case 'c':
+          count -= 1;
+        }
+        if (windowEnd < refLen) {
+          switch (tseq[windowEnd]) {
+          case 'G':
+          case 'g':
+          case 'C':
+          case 'c':
+            count += 1;
+          }
+        }
+        if (fp < refLen) {
+          contextCountsFP[fp] = count;
+        }
+        contextCountsTP[tp] = count;
       }
-      transcriptKmerDist[idx] += contribution;
     }
-  }
+  };
+
+  /**
+   * The local bias terms from each thread can be combined
+   * via simple summation.
+   */
+  auto getBiasParams = [K, &sopt]() -> CombineableBiasParams {
+    return CombineableBiasParams(K, sopt.numConditionalGCBins, sopt.numFragGCBins);
+  };
+  tbb::combinable<CombineableBiasParams> expectedDist(getBiasParams);
+  std::atomic<size_t> numBackgroundTranscripts{0};
+  std::atomic<size_t> numExpressedTranscripts{0};
+
+  tbb::parallel_for(
+      BlockedIndexRange(size_t(0), size_t(transcripts.size())),
+      [&](const BlockedIndexRange& range) -> void {
+
+        auto& expectSeqFW = expectedDist.local().expectSeqFW;
+        auto& expectSeqRC = expectedDist.local().expectSeqRC;
+        auto& expectGC = expectedDist.local().expectGC;
+        auto& expectPos5 = expectedDist.local().expectPos5;
+        auto& expectPos3 = expectedDist.local().expectPos3;
+
+        std::string rcSeq;
+        // For each transcript
+        for (auto it : boost::irange(range.begin(), range.end())) {
+
+          // Get the transcript
+          const auto& txp = transcripts[it];
+
+          // Get the reference length and the
+          // "initial" effective length (not considering any biases)
+          int32_t refLen = static_cast<int32_t>(txp.RefLength);
+          int32_t elen = static_cast<int32_t>(txp.EffectiveLength);
+
+          // The difference between the actual and effective length
+          int32_t unprocessedLen = std::max(0, refLen - elen);
+
+          int32_t cdfMaxArg =
+              std::min(static_cast<int32_t>(cdf.size() - 1), refLen);
+          double cdfMaxVal = cdf[cdfMaxArg];
+          // need a reliable CDF
+          if (cdfMaxVal < minCDFMass) {
+            continue;
+          }
+          auto conditionalCDF = [cdfMaxArg, cdfMaxVal,
+                                 &cdf](double x) -> double {
+            return (x > cdfMaxArg) ? 1.0 : (cdf[x] / cdfMaxVal);
+          };
+
+          // Skip transcripts with trivial expression or that are too
+          // short
+          if (alphas[it] < minAlpha or
+              unprocessedLen <= 0) { // or txp.uniqueUpdateFraction() < 0.90) {
+            if (alphas[it] >= minAlpha) {
+              ++numExpressedTranscripts;
+            }
+            continue;
+          }
+          ++numBackgroundTranscripts;
 
-  // The total mass of the transcript distribution
-  double txomeNormFactor = 0.0;
-  for(auto m : transcriptKmerDist) { txomeNormFactor += m; }
+          // Otherwise, proceed giving this transcript the following weight
+          double weight = (alphas[it] / effLensIn(it));
 
-  // Now, compute the effective length of each transcript using
-  // the k-mer biases
-  for(size_t it = 0; it < transcripts.size(); ++it) {
-    // Starts out as 0
-    double effLength = 0.0;
+          Eigen::VectorXd contextCountsFP(refLen);
+          Eigen::VectorXd contextCountsTP(refLen);
+          contextCountsFP.setOnes();
+          contextCountsTP.setOnes();
 
-    // First in the forward direction, from the start of the
-    // transcript up until the last valid kmer.
-    int32_t refLen = static_cast<int32_t>(transcripts[it].RefLength);
-    int32_t elen = static_cast<int32_t>(transcripts[it].EffectiveLength);
+          // This transcript's sequence
+          const char* tseq = txp.Sequence();
+          revComplement(tseq, refLen, rcSeq);
+          const char* rseq = rcSeq.c_str();
 
-    // How much of this transcript (beginning and end) should
-    // not be considered
-    int32_t unprocessedLen = std::max(0, refLen - elen);
+          Mer fwmer;
+          fwmer.from_chars(tseq);
+          Mer rcmer;
+          rcmer.from_chars(rseq);
+          int32_t contextLength{expectSeqFW.getContextLength()};
 
-    if (alphas[it] >= minAlpha and unprocessedLen > 0) {
-      bool firstKmer{true};
-      uint32_t idx{0};
-      // This transcript's sequence
-      const char* tseq = transcripts[it].Sequence;
+          if (gcBiasCorrect and seqBiasCorrect) {
+            populateContextCounts(txp, tseq, contextCountsFP, contextCountsTP);
+          }
 
-      for (int32_t i = 0; i < elen - K; ++i) {
-	if (firstKmer) {
-	  idx = indexForKmer(tseq, K, Direction::FORWARD);
-	  firstKmer = false;
-	} else {
-	  idx = nextKmerIndex(idx, tseq[i-1+K], K, Direction::FORWARD);
-	}
-	effLength += (readBias.counts[idx]/transcriptKmerDist[idx]);
-      }
+          // The smallest and largest values of fragment
+          // lengths we'll consider for this transcript.
+          int32_t locFLDLow = (refLen < cdfMaxArg) ? 1 : fldLow;
+          int32_t locFLDHigh = (refLen < cdfMaxArg) ? cdfMaxArg : fldHigh;
+
+          // For each position along the transcript
+          // Starting from the 5' end and moving toward the 3' end
+          for (int32_t fragStartPos = 0; fragStartPos < refLen - K;
+               ++fragStartPos) {
+            // Seq-specific bias
+            if (seqBiasCorrect) {
+              int32_t contextEndPos =
+                  fragStartPos + K - 1; // -1 because pos is *inclusive*
+
+              if (contextEndPos >= 0 and contextEndPos < refLen) {
+                int32_t maxFragLen =
+                    refLen - (fragStartPos + expectSeqFW.contextBefore(false));
+                if (maxFragLen >= 0 and maxFragLen < refLen) {
+                  auto cdensity = conditionalCDF(maxFragLen);
+                  expectSeqFW.addSequence(fwmer, weight * cdensity);
+                  expectSeqRC.addSequence(rcmer, weight * cdensity);
+                }
+              }
 
-      // Then in the reverse complement direction
-      firstKmer = true;
-      idx = 0;
-      // Start from the end and go until the fragment length
-      // distribution says we should stop
-      for (int32_t i = refLen - K - 1; i >= unprocessedLen; --i) {
-	if (firstKmer) {
-	  idx = indexForKmer(tseq + i, K, Direction::REVERSE_COMPLEMENT);
-	  firstKmer = false;
-	} else {
-	  idx = nextKmerIndex(idx, tseq[i], K, Direction::REVERSE_COMPLEMENT);
-	}
-	effLength += (readBias.counts[idx]/transcriptKmerDist[idx]);
-      }
+              // shift the context one nucleotide to the right
+              fwmer.shift_left(tseq[fragStartPos + contextLength]);
+              rcmer.shift_left(rseq[fragStartPos + contextLength]);
+            } // end: Seq-specific bias
+
+            // fragment-GC bias
+            if (gcBiasCorrect) {
+              size_t sp =
+                  static_cast<size_t>((locFLDLow > 0) ? locFLDLow - 1 : 0);
+              double prevFLMass = conditionalCDF(sp);
+              int32_t fragStart = fragStartPos;
+              for (int32_t fl = locFLDLow; fl <= locFLDHigh; fl += gcSamp) {
+                int32_t fragEnd = fragStart + fl - 1;
+                if (fragEnd < refLen) {
+                  // The GC fraction for this putative fragment
+                  auto gcFrac = txp.gcFrac(fragStart, fragEnd);
+                  int32_t contextFrac = std::lrint(
+                      (contextCountsFP[fragStart] + contextCountsTP[fragEnd]) *
+                      cscale);
+                  GCDesc desc{gcFrac, contextFrac};
+                  expectGC.inc(desc,
+                               weight * (conditionalCDF(fl) - prevFLMass));
+                  prevFLMass = conditionalCDF(fl);
+                } else {
+                  break;
+                } // no more valid positions
+              }   // end: for each fragment length
+            }     // end: fragment GC bias
+
+            // positional bias
+            if (posBiasCorrect) {
+              int32_t maxFragLenFW = refLen - fragStartPos + 1;
+              int32_t maxFragLenRC = fragStartPos;
+              auto densityFW = conditionalCDF(maxFragLenFW);
+              auto densityRC = conditionalCDF(maxFragLenRC);
+              if (weight * densityFW > 1e-8) {
+                expectPos5[txp.lengthClassIndex()].addMass(
+                    fragStartPos, txp.RefLength, std::log(weight * densityFW));
+              }
+              if (weight * densityRC > 1e-8) {
+                expectPos3[txp.lengthClassIndex()].addMass(
+                    fragStartPos, txp.RefLength, std::log(weight * densityRC));
+              }
+            }
+          } // end: for every fragment start position
+        }   // end for each transcript
+
+      } // end tbb for function
+      );
+
+  size_t bgCutoff =
+      std::min(static_cast<size_t>(150),
+               static_cast<size_t>(numBackgroundTranscripts * 0.1));
+  if (numBackgroundTranscripts < bgCutoff) {
+    sopt.jointLog->warn("I found only {} transcripts meeting the necessary "
+                        "conditions to contribute to "
+                        "the bias background distribution.  This is likely too "
+                        "small to safely do bias correction. "
+                        "I'm skipping bias correction",
+                        numBackgroundTranscripts.load());
+    sopt.biasCorrect = false;
+    sopt.gcBiasCorrect = false;
+    sopt.posBiasCorrect = false;
+    return effLensIn;
+  }
 
-      effLength *= 0.5 * (txomeNormFactor / readNormFactor);
+  /**
+   * The local bias terms from each thread can be combined
+   * via simple summation.  Here, we combine the locally-computed
+   * bias terms.
+   */
+  SBModel exp5;
+  SBModel exp3;
+  std::vector<SimplePosBias> pos5Exp(5);
+  std::vector<SimplePosBias> pos3Exp(5);
+  auto combineBiasParams =
+      [seqBiasCorrect, gcBiasCorrect, posBiasCorrect, &pos5Exp, &pos3Exp, &exp5,
+       &exp3, &transcriptGCDist](const CombineableBiasParams& p) -> void {
+    if (seqBiasCorrect) {
+      exp5.combineCounts(p.expectSeqFW);
+      exp3.combineCounts(p.expectSeqRC);
     }
-
-    if(unprocessedLen > 0.0 and effLength > unprocessedLen) {
-      effLensOut(it) = effLength;
-    } else {
-      effLensOut(it) = effLensIn(it);
+    if (gcBiasCorrect) {
+      transcriptGCDist.combineCounts(p.expectGC);
+    }
+    if (posBiasCorrect) {
+      for (size_t i = 0; i < p.expectPos5.size(); ++i) {
+        pos5Exp[i].combine(p.expectPos5[i]);
+        pos3Exp[i].combine(p.expectPos3[i]);
+      }
+    }
+  };
+  expectedDist.combine_each(combineBiasParams);
+
+  // finalize expected positional biases
+  if (posBiasCorrect) {
+    for (size_t i = 0; i < pos5Exp.size(); ++i) {
+      pos5Exp[i].finalize();
+      pos3Exp[i].finalize();
     }
   }
+  if (gcBiasCorrect) {
+    transcriptGCDist.normalize();
+  }
+
+  sopt.jointLog->info("Computed expected counts (for bias correction)");
+
+  auto gcBias = gcCounts.ratio(transcriptGCDist, 1000.0);
+
+  exp5.normalize();
+  exp3.normalize();
+
+  bool noThreshold = sopt.noBiasLengthThreshold;
+  std::atomic<size_t> numCorrected{0};
+  std::atomic<size_t> numUncorrected{0};
+
+  std::atomic<uint32_t> numProcessed{0};
+  size_t numTranscripts = transcripts.size();
+  size_t stepSize = static_cast<size_t>(transcripts.size() * 0.1);
+  size_t nextUpdate{0};
+
+  //std::mutex updateMutex;
+  TryableSpinLock tsl;
+  /**
+   * Compute the effective lengths of each transcript (in parallel)
+   */
+  tbb::parallel_for(
+      BlockedIndexRange(size_t(0), size_t(transcripts.size())),
+      [&](const BlockedIndexRange& range) -> void {
+
+        std::string rcSeq;
+        // For each transcript
+        for (auto it : boost::irange(range.begin(), range.end())) {
+
+          auto& txp = transcripts[it];
+
+          // eff. length starts out as 0
+          double effLength = 0.0;
+
+          // Reference length
+          int32_t refLen = static_cast<int32_t>(txp.RefLength);
+          // Effective length before any bias correction
+          int32_t elen = static_cast<int32_t>(txp.EffectiveLength);
+
+          // How much of this transcript (beginning and end) should
+          // not be considered
+          int32_t unprocessedLen = std::max(0, refLen - elen);
+          int32_t cdfMaxArg =
+              std::min(static_cast<int32_t>(cdf.size() - 1), refLen);
+          double cdfMaxVal = cdf[cdfMaxArg];
+          auto conditionalCDF = [cdfMaxArg, cdfMaxVal,
+                                 &cdf](double x) -> double {
+            return (x > cdfMaxArg) ? 1.0 : (cdf[x] / cdfMaxVal);
+          };
+          // The smallest and largest values of fragment
+          // lengths we'll consider for this transcript.
+          int32_t locFLDLow = (refLen < cdfMaxArg) ? 1 : fldLow;
+          int32_t locFLDHigh = (refLen < cdfMaxArg) ? cdfMaxArg : fldHigh;
+
+          if (alphas[it] >= minAlpha and unprocessedLen > 0 and
+              cdfMaxVal > minCDFMass) {
+
+            Eigen::VectorXd seqFactorsFW(refLen);
+            Eigen::VectorXd seqFactorsRC(refLen);
+            seqFactorsFW.setOnes();
+            seqFactorsRC.setOnes();
+
+            Eigen::VectorXd contextCountsFP(refLen);
+            Eigen::VectorXd contextCountsTP(refLen);
+            contextCountsFP.setOnes();
+            contextCountsTP.setOnes();
+
+            std::vector<double> posFactorsFW(refLen, 1.0);
+            std::vector<double> posFactorsRC(refLen, 1.0);
+
+            // This transcript's sequence
+            const char* tseq = txp.Sequence();
+            revComplement(tseq, refLen, rcSeq);
+            const char* rseq = rcSeq.c_str();
+
+            int32_t fl = locFLDLow;
+            auto maxLen = std::min(refLen, locFLDHigh + 1);
+            bool done{fl >= maxLen};
+
+            if (gcBiasCorrect and seqBiasCorrect) {
+              populateContextCounts(txp, tseq, contextCountsFP,
+                                    contextCountsTP);
+            }
+
+            if (posBiasCorrect) {
+              std::vector<double> posFactorsObs5(refLen, 1.0);
+              std::vector<double> posFactorsObs3(refLen, 1.0);
+              std::vector<double> posFactorsExp5(refLen, 1.0);
+              std::vector<double> posFactorsExp3(refLen, 1.0);
+              auto li = txp.lengthClassIndex();
+              auto& p5O = pos5Obs[li];
+              auto& p3O = pos3Obs[li];
+              auto& p5E = pos5Exp[li];
+              auto& p3E = pos3Exp[li];
+              p5O.projectWeights(posFactorsObs5);
+              p3O.projectWeights(posFactorsObs3);
+              p5E.projectWeights(posFactorsExp5);
+              p3E.projectWeights(posFactorsExp3);
+              for (int32_t fragStart = 0; fragStart < refLen - K; ++fragStart) {
+                posFactorsFW[fragStart] =
+                    posFactorsObs5[fragStart] / posFactorsExp5[fragStart];
+                posFactorsRC[fragStart] =
+                    posFactorsObs3[fragStart] / posFactorsExp3[fragStart];
+              }
+            }
 
+            // Evaluate the sequence specific bias (5' and 3') over the length
+            // of the transcript.  After this loop,
+            // seqFactorsFW will contain the sequence-specific bias for each
+            // position on the 5' strand
+            // and seqFactorsRC will contain the sequence-specific bias for each
+            // position on the 3' strand.
+            if (seqBiasCorrect) {
+              Mer mer;
+              Mer rcmer;
+              mer.from_chars(tseq);
+              rcmer.from_chars(rseq);
+              int32_t contextLength{exp5.getContextLength()};
+
+              for (int32_t fragStart = 0; fragStart < refLen - K; ++fragStart) {
+                int32_t readStart = fragStart + obs5.contextBefore(false);
+                int32_t kmerEndPos =
+                    fragStart + K - 1; // -1 because pos is *inclusive*
+
+                if (kmerEndPos >= 0 and kmerEndPos < refLen and
+                    readStart < refLen) {
+                  seqFactorsFW[readStart] =
+                      std::exp(obs5.evaluateLog(mer) - exp5.evaluateLog(mer));
+                  seqFactorsRC[readStart] = std::exp(obs3.evaluateLog(rcmer) -
+                                                     exp3.evaluateLog(rcmer));
+                }
+                // shift the context one nucleotide to the right
+                mer.shift_left(tseq[fragStart + contextLength]);
+                rcmer.shift_left(rseq[fragStart + contextLength]);
+              }
+              // We need these in 5' -> 3' order, so reverse them
+              seqFactorsRC.reverseInPlace();
+            } // end sequence-specific factor calculation
+
+            if (numProcessed > nextUpdate) {
+                if (tsl.try_lock()) {
+                    if (numProcessed > nextUpdate) {
+                        sopt.jointLog->info(
+                                            "processed bias for {:3.1f}% of the transcripts",
+                                            100.0 *
+                                            (numProcessed / static_cast<double>(numTranscripts)));
+                        nextUpdate += stepSize;
+                        if (nextUpdate > numTranscripts) {
+                            nextUpdate = numTranscripts - 1;
+                        }
+                    }
+                    tsl.unlock();
+                }
+            }
+            ++numProcessed;
+
+            size_t sp = static_cast<size_t>((fl > 0) ? fl - 1 : 0);
+            double prevFLMass = conditionalCDF(sp);
+            double unbiasedMass{0.0};
+
+            // For every possible fragment length
+            while (!done) {
+              if (fl >= maxLen) {
+                done = true;
+                fl = maxLen - 1;
+              }
+              double flWeight = conditionalCDF(fl) - prevFLMass;
+              prevFLMass = conditionalCDF(fl);
+
+              double flMassTotal{0.0};
+              // For every position a fragment of length fl could start
+              for (int32_t kmerStartPos = 0; kmerStartPos < refLen - fl;
+                   ++kmerStartPos) {
+                int32_t fragStart = kmerStartPos;
+                int32_t fragEnd = fragStart + fl - 1;
+
+                // If the 3' end is within the transcript
+                if (fragStart < refLen and fragEnd < refLen) {
+                  double fragFactor =
+                      seqFactorsFW[fragStart] * seqFactorsRC[fragEnd];
+                  if (gcBiasCorrect) {
+                    auto gcFrac = txp.gcFrac(fragStart, fragEnd);
+                    int32_t contextFrac =
+                        std::lrint((contextCountsFP[fragStart] +
+                                    contextCountsTP[fragEnd]) *
+                                   cscale);
+                    GCDesc desc{gcFrac, contextFrac};
+                    fragFactor *= gcBias.get(desc);
+                    /*
+                    fragFactor *= gcBias[gcFrac];
+                    */
+                  }
+                  if (posBiasCorrect) {
+                    fragFactor *=
+                        posFactorsFW[fragStart] * posFactorsRC[fragEnd];
+                  }
+                  flMassTotal += fragFactor;
+                } else {
+                  break;
+                }
+              }
+
+              effLength += (flWeight * flMassTotal);
+              fl += gcSamp;
+            }
+          } // for the processed transcript
+
+          // throw caution to the wind
+          double thresh = noThreshold ? 1.0 : unprocessedLen;
+
+          if (noThreshold) {
+            if (unprocessedLen > 0.0 and effLength > thresh) {
+              effLensOut(it) = effLength;
+            } else {
+              effLensOut(it) = effLensIn(it);
+            }
+          } else {
+            double offset = std::max(1.0, thresh);
+            double effLengthNoBias = static_cast<double>(elen);
+            auto barrierLength = [effLengthNoBias, offset](double x) -> double {
+              return std::max(x, std::min(effLengthNoBias, offset));
+            };
+            effLensOut(it) = barrierLength(effLength);
+          }
+        }
+      } // end parallel_for lambda
+      );
+
+  // Copy over the expected sequence bias models
+  if (seqBiasCorrect) {
+    readExp.setReadBiasModelExpected(std::move(exp5), salmon::utils::Direction::FORWARD); 
+    readExp.setReadBiasModelExpected(std::move(exp3), salmon::utils::Direction::REVERSE_COMPLEMENT); 
+  }
+  
+  sopt.jointLog->info("processed bias for 100.0% of the transcripts");
   return effLensOut;
 }
 
-
-void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::path& inputPath) {
+void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm,
+                                   boost::filesystem::path& inputPath) {
   using std::vector;
   using std::string;
   using std::ofstream;
@@ -1300,30 +2045,29 @@ void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::pa
   string l;
   size_t ln{0};
 
-
   bool headerLine{true};
   while (getline(expFile, l)) {
-      if (++ln % 1000 == 0) {
-          cerr << "\r\rParsed " << ln << " expression lines";
-      }
-      auto it = find_if(l.begin(), l.end(),
-              [](char c) -> bool {return !isspace(c);});
-      if (it != l.end()) {
-          if (*it == '#') {
-              comments.push_back(l);
-          } else {
-              // If this isn't the first non-comment line
-              if (!headerLine) {
-                  vector<string> toks = split(l);
-                  ExpressionRecord er(toks);
-                  auto gn = tgm.geneName(er.target);
-                  geneExps[gn].push_back(move(er));
-              } else { // treat the header line as a comment
-                  comments.push_back(l);
-                  headerLine = false;
-              }
-          }
+    if (++ln % 1000 == 0) {
+      cerr << "\r\rParsed " << ln << " expression lines";
+    }
+    auto it =
+        find_if(l.begin(), l.end(), [](char c) -> bool { return !isspace(c); });
+    if (it != l.end()) {
+      if (*it == '#') {
+        comments.push_back(l);
+      } else {
+        // If this isn't the first non-comment line
+        if (!headerLine) {
+          vector<string> toks = split(l);
+          ExpressionRecord er(toks);
+          auto gn = tgm.geneName(er.target);
+          geneExps[gn].push_back(move(er));
+        } else { // treat the header line as a comment
+          comments.push_back(l);
+          headerLine = false;
+        }
       }
+    }
   }
   cerr << "\ndone\n";
   expFile.close();
@@ -1351,27 +2095,29 @@ void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::pa
     for (auto& tranExp : kv.second) {
       // expVals[0] = TPM
       // expVals[1] = count
-      for (size_t i = 0; i < NE; ++i) { expVals[i] += tranExp.expVals[i]; }
+      for (size_t i = 0; i < NE; ++i) {
+        expVals[i] += tranExp.expVals[i];
+      }
       totalTPM += expVals[tpmIdx];
     }
 
     // If this gene was expressed
     if (totalTPM > minTPM) {
-        geneLength = 0.0;
-        geneEffLength = 0.0;
-        for (auto& tranExp : kv.second) {
-            double frac = tranExp.expVals[tpmIdx] / totalTPM;
-            geneLength += tranExp.length * frac;
-            geneEffLength += tranExp.effLength * frac;
-        }
+      geneLength = 0.0;
+      geneEffLength = 0.0;
+      for (auto& tranExp : kv.second) {
+        double frac = tranExp.expVals[tpmIdx] / totalTPM;
+        geneLength += tranExp.length * frac;
+        geneEffLength += tranExp.effLength * frac;
+      }
     } else {
-        geneLength = 0.0;
-        geneEffLength = 0.0;
-        double frac = 1.0 / kv.second.size();
-        for (auto& tranExp : kv.second) {
-            geneLength += tranExp.length * frac;
-            geneEffLength += tranExp.effLength * frac;
-        }
+      geneLength = 0.0;
+      geneEffLength = 0.0;
+      double frac = 1.0 / kv.second.size();
+      for (auto& tranExp : kv.second) {
+        geneLength += tranExp.length * frac;
+        geneEffLength += tranExp.effLength * frac;
+      }
     }
 
     // Otherwise, if the gene wasn't expressed, the length
@@ -1391,216 +2137,843 @@ void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::pa
 
 void generateGeneLevelEstimates(boost::filesystem::path& geneMapPath,
                                 boost::filesystem::path& estDir) {
-    namespace bfs = boost::filesystem;
-    std::cerr << "Computing gene-level abundance estimates\n";
-    bfs::path gtfExtension(".gtf");
-    auto extension = geneMapPath.extension();
-
-    TranscriptGeneMap tranGeneMap;
-    // parse the map as a GTF file
-    if (extension == gtfExtension) {
-        // Using libgff
-        tranGeneMap = salmon::utils::transcriptGeneMapFromGTF(geneMapPath.string(), "gene_id");
-    } else { // parse the map as a simple format files
-        std::ifstream tgfile(geneMapPath.string());
-        tranGeneMap = salmon::utils::readTranscriptToGeneMap(tgfile);
-        tgfile.close();
-    }
-
-    std::cerr << "There were " << tranGeneMap.numTranscripts() << " transcripts mapping to "
-        << tranGeneMap.numGenes() << " genes\n";
-
-    bfs::path estFilePath = estDir / "quant.sf";
-    if (!bfs::exists(estFilePath)) {
-        std::stringstream errstr;
-        errstr << "Attempting to compute gene-level esimtates, but could not \n"
-            << "find isoform-level file " << estFilePath;
-        throw std::invalid_argument(errstr.str());
-    } else {
-        salmon::utils::aggregateEstimatesToGeneLevel(tranGeneMap, estFilePath);
-    }
+  namespace bfs = boost::filesystem;
+  std::cerr << "Computing gene-level abundance estimates\n";
+  bfs::path gtfExtension(".gtf");
+  auto extension = geneMapPath.extension();
+
+  TranscriptGeneMap tranGeneMap;
+  // parse the map as a GTF file
+  if (extension == gtfExtension) {
+    // Using libgff
+    tranGeneMap = salmon::utils::transcriptGeneMapFromGTF(geneMapPath.string(),
+                                                          "gene_id");
+  } else { // parse the map as a simple format files
+    std::ifstream tgfile(geneMapPath.string());
+    tranGeneMap = salmon::utils::readTranscriptToGeneMap(tgfile);
+    tgfile.close();
+  }
 
-    /** Create a gene-level summary of the bias-corrected estimates as well if these exist **/
-    /*
-    if (haveBiasCorrectedFile) {
-        bfs::path biasCorrectEstFilePath = estDir / "quant_bias_corrected.sf";
-        if (!bfs::exists(biasCorrectEstFilePath)) {
-            std::stringstream errstr;
-            errstr << "Attempting to compute gene-level esimtates, but could not \n"
-                << "find bias-corrected isoform-level file " << biasCorrectEstFilePath;
-            throw std::invalid_argument(errstr.str());
-        } else {
-            salmon::utils::aggregateEstimatesToGeneLevel(tranGeneMap, biasCorrectEstFilePath);
-        }
-    }
-    */
-}
+  std::cerr << "There were " << tranGeneMap.numTranscripts()
+            << " transcripts mapping to " << tranGeneMap.numGenes()
+            << " genes\n";
+
+  bfs::path estFilePath = estDir / "quant.sf";
+  if (!bfs::exists(estFilePath)) {
+    std::stringstream errstr;
+    errstr << "Attempting to compute gene-level esimtates, but could not \n"
+           << "find isoform-level file " << estFilePath;
+    throw std::invalid_argument(errstr.str());
+  } else {
+    salmon::utils::aggregateEstimatesToGeneLevel(tranGeneMap, estFilePath);
+  }
 
+  /** Create a gene-level summary of the bias-corrected estimates as well if
+   * these exist **/
+  /*
+  if (haveBiasCorrectedFile) {
+      bfs::path biasCorrectEstFilePath = estDir / "quant_bias_corrected.sf";
+      if (!bfs::exists(biasCorrectEstFilePath)) {
+          std::stringstream errstr;
+          errstr << "Attempting to compute gene-level esimtates, but could not
+  \n"
+              << "find bias-corrected isoform-level file " <<
+  biasCorrectEstFilePath;
+          throw std::invalid_argument(errstr.str());
+      } else {
+          salmon::utils::aggregateEstimatesToGeneLevel(tranGeneMap,
+  biasCorrectEstFilePath);
+      }
+  }
+  */
+}
 }
 }
-
 
 // === Explicit instantiations
 
-template
-void salmon::utils::writeAbundances<AlignmentLibrary<ReadPair>>(
-                                              const SalmonOpts& opts,
-                                              AlignmentLibrary<ReadPair>& alnLib,
-                                              boost::filesystem::path& fname,
-                                              std::string headerComments);
-
-template
-void salmon::utils::writeAbundances<AlignmentLibrary<UnpairedRead>>(
-                                                  const SalmonOpts& opts,
-                                                  AlignmentLibrary<UnpairedRead>& alnLib,
-                                                  boost::filesystem::path& fname,
-                                                  std::string headerComments);
-template
-void salmon::utils::writeAbundances<ReadExperiment>(
-                                                  const SalmonOpts& opts,
-                                                  ReadExperiment& alnLib,
-                                                  boost::filesystem::path& fname,
-                                                  std::string headerComments);
-template
-void salmon::utils::writeAbundancesFromCollapsed<AlignmentLibrary<ReadPair>>(
-                                              const SalmonOpts& opts,
-                                              AlignmentLibrary<ReadPair>& alnLib,
-                                              boost::filesystem::path& fname,
-                                              std::string headerComments);
-
-template
-void salmon::utils::writeAbundancesFromCollapsed<AlignmentLibrary<UnpairedRead>>(
-                                                  const SalmonOpts& opts,
-                                                  AlignmentLibrary<UnpairedRead>& alnLib,
-                                                  boost::filesystem::path& fname,
-                                                  std::string headerComments);
-template
-void salmon::utils::writeAbundancesFromCollapsed<ReadExperiment>(
-                                                  const SalmonOpts& opts,
-                                                  ReadExperiment& alnLib,
-                                                  boost::filesystem::path& fname,
-                                                  std::string headerComments);
-
-template
-void salmon::utils::normalizeAlphas<ReadExperiment>(const SalmonOpts& sopt,
-                         	     ReadExperiment& alnLib);
-
-template
-void salmon::utils::normalizeAlphas<AlignmentLibrary<UnpairedRead>>(const SalmonOpts& sopt,
-                         	     AlignmentLibrary<UnpairedRead>& alnLib);
-template
-void salmon::utils::normalizeAlphas<AlignmentLibrary<ReadPair>>(const SalmonOpts& sopt,
-                         	     AlignmentLibrary<ReadPair>& alnLib);
-
-
-template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>, ReadExperiment>(
-                ReadExperiment& readExp,
-                Eigen::VectorXd& effLensIn,
-                std::vector<tbb::atomic<double>>& alphas,
-                std::vector<double>& expectedBias
-                );
-
-template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<double>, ReadExperiment>(
-                ReadExperiment& readExp,
-                Eigen::VectorXd& effLensIn,
-                std::vector<double>& alphas,
-                std::vector<double>& expectedBias
-                );
-
-template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>, AlignmentLibrary<ReadPair>>(
-                AlignmentLibrary<ReadPair>& readExp,
-                Eigen::VectorXd& effLensIn,
-                std::vector<tbb::atomic<double>>& alphas,
-                std::vector<double>& expectedBias
-                );
-
-template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<double>, AlignmentLibrary<ReadPair>>(
-                AlignmentLibrary<ReadPair>& readExp,
-                Eigen::VectorXd& effLensIn,
-                std::vector<double>& alphas,
-                std::vector<double>& expectedBias
-                );
-
-template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>, AlignmentLibrary<UnpairedRead>>(
-                AlignmentLibrary<UnpairedRead>& readExp,
-                Eigen::VectorXd& effLensIn,
-                std::vector<tbb::atomic<double>>& alphas,
-                std::vector<double>& expectedBias
-                );
-
-template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<double>, AlignmentLibrary<UnpairedRead>>(
-                AlignmentLibrary<UnpairedRead>& readExp,
-                Eigen::VectorXd& effLensIn,
-                std::vector<double>& alphas,
-                std::vector<double>& expectedBias
-                );
-
-// Old / unused code
+// explicit instantiations for writing abundances ---
+template void salmon::utils::writeAbundances<AlignmentLibrary<ReadPair>>(
+    const SalmonOpts& opts, AlignmentLibrary<ReadPair>& alnLib,
+    boost::filesystem::path& fname, std::string headerComments);
+
+template void salmon::utils::writeAbundances<AlignmentLibrary<UnpairedRead>>(
+    const SalmonOpts& opts, AlignmentLibrary<UnpairedRead>& alnLib,
+    boost::filesystem::path& fname, std::string headerComments);
+template void salmon::utils::writeAbundances<ReadExperiment>(
+    const SalmonOpts& opts, ReadExperiment& alnLib,
+    boost::filesystem::path& fname, std::string headerComments);
+template void
+salmon::utils::writeAbundancesFromCollapsed<AlignmentLibrary<ReadPair>>(
+    const SalmonOpts& opts, AlignmentLibrary<ReadPair>& alnLib,
+    boost::filesystem::path& fname, std::string headerComments);
+
+template void
+salmon::utils::writeAbundancesFromCollapsed<AlignmentLibrary<UnpairedRead>>(
+    const SalmonOpts& opts, AlignmentLibrary<UnpairedRead>& alnLib,
+    boost::filesystem::path& fname, std::string headerComments);
+template void salmon::utils::writeAbundancesFromCollapsed<ReadExperiment>(
+    const SalmonOpts& opts, ReadExperiment& alnLib,
+    boost::filesystem::path& fname, std::string headerComments);
+
+// explicit instantiations for normalizing alpha vectors ---
+template void
+salmon::utils::normalizeAlphas<ReadExperiment>(const SalmonOpts& sopt,
+                                               ReadExperiment& alnLib);
+
+template void salmon::utils::normalizeAlphas<AlignmentLibrary<UnpairedRead>>(
+    const SalmonOpts& sopt, AlignmentLibrary<UnpairedRead>& alnLib);
+template void salmon::utils::normalizeAlphas<AlignmentLibrary<ReadPair>>(
+    const SalmonOpts& sopt, AlignmentLibrary<ReadPair>& alnLib);
+
+// explicit instantiations for effective length updates ---
+/*
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>,
+                                      ReadExperiment>(
+    SalmonOpts& sopt, ReadExperiment& readExp, Eigen::VectorXd& effLensIn,
+    std::vector<tbb::atomic<double>>& alphas, bool finalRound);
+
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<double>, ReadExperiment>(
+    SalmonOpts& sopt, ReadExperiment& readExp, Eigen::VectorXd& effLensIn,
+    std::vector<double>& alphas, bool finalRound);
+
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>,
+                                      AlignmentLibrary<ReadPair>>(
+    SalmonOpts& sopt, AlignmentLibrary<ReadPair>& readExp,
+    Eigen::VectorXd& effLensIn, std::vector<tbb::atomic<double>>& alphas,
+    bool finalRound);
+
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<double>,
+                                      AlignmentLibrary<ReadPair>>(
+    SalmonOpts& sopt, AlignmentLibrary<ReadPair>& readExp,
+    Eigen::VectorXd& effLensIn, std::vector<double>& alphas, bool finalRound);
+
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>,
+                                      AlignmentLibrary<UnpairedRead>>(
+    SalmonOpts& sopt, AlignmentLibrary<UnpairedRead>& readExp,
+    Eigen::VectorXd& effLensIn, std::vector<tbb::atomic<double>>& alphas,
+    bool finalRound);
+
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<double>,
+                                      AlignmentLibrary<UnpairedRead>>(
+    SalmonOpts& sopt, AlignmentLibrary<UnpairedRead>& readExp,
+    Eigen::VectorXd& effLensIn, std::vector<double>& alphas, bool finalRound);
+*/
+
+// explicit instantiations for effective length updates ---
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>,
+                                      ReadExperiment>(
+    SalmonOpts& sopt, ReadExperiment& readExp, Eigen::VectorXd& effLensIn,
+    std::vector<tbb::atomic<double>>& alphas, bool finalRound);
+
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<double>, ReadExperiment>(
+    SalmonOpts& sopt, ReadExperiment& readExp, Eigen::VectorXd& effLensIn,
+    std::vector<double>& alphas, bool finalRound);
+
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>,
+                                      AlignmentLibrary<ReadPair>>(
+    SalmonOpts& sopt, AlignmentLibrary<ReadPair>& readExp,
+    Eigen::VectorXd& effLensIn, std::vector<tbb::atomic<double>>& alphas,
+    bool finalRound);
+
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<double>,
+                                      AlignmentLibrary<ReadPair>>(
+    SalmonOpts& sopt, AlignmentLibrary<ReadPair>& readExp,
+    Eigen::VectorXd& effLensIn, std::vector<double>& alphas, bool finalRound);
+
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>,
+                                      AlignmentLibrary<UnpairedRead>>(
+    SalmonOpts& sopt, AlignmentLibrary<UnpairedRead>& readExp,
+    Eigen::VectorXd& effLensIn, std::vector<tbb::atomic<double>>& alphas,
+    bool finalRound);
+
+template Eigen::VectorXd
+salmon::utils::updateEffectiveLengths<std::vector<double>,
+                                      AlignmentLibrary<UnpairedRead>>(
+    SalmonOpts& sopt, AlignmentLibrary<UnpairedRead>& readExp,
+    Eigen::VectorXd& effLensIn, std::vector<double>& alphas, bool finalRound);
+
+//// 0th order model --- code for computing bias factors.
 
 /*
-template< typename T >
-TranscriptGeneMap transcriptToGeneMapFromFeatures(std::vector<GenomicFeature<T>> &feats ) {
-    using std::unordered_set;
-    using std::unordered_map;
-    using std::vector;
-    using std::tuple;
-    using std::string;
-    using std::get;
-
-    using NameID = tuple<string, size_t>;
-
-    IndexVector t2g;
-    NameVector transcriptNames;
-    NameVector geneNames;
-
-    // holds the mapping from transcript ID to gene ID
-    IndexVector t2gUnordered;
-    // holds the set of gene IDs
-    unordered_map<string, size_t> geneNameToID;
-
-    // To read the input and assign ids
-    size_t geneCounter = 0;
-    string transcript;
-    string gene;
-
-    std::sort( feats.begin(), feats.end(),
-	    []( const GenomicFeature<T> & a, const GenomicFeature<T> & b) -> bool {
-	    return a.sattr.transcript_id < b.sattr.transcript_id;
-	    } );
-
-    std::string currentTranscript = "";
-    for ( auto & feat : feats ) {
-
-	auto &gene = feat.sattr.gene_id;
-	auto &transcript = feat.sattr.transcript_id;
-
-	if ( transcript != currentTranscript ) {
-	    auto geneIt = geneNameToID.find(gene);
-	    size_t geneID = 0;
-
-	    if ( geneIt == geneNameToID.end() ) {
-		// If we haven't seen this gene yet, give it a new ID
-		geneNameToID[gene] = geneCounter;
-		geneID = geneCounter;
-		geneNames.push_back(gene);
-		++geneCounter;
-	    } else {
-		// Otherwise lookup the ID
-		geneID = geneIt->second;
-	    }
-
-	    transcriptNames.push_back(transcript);
-	    t2g.push_back(geneID);
-
-	    //++transcriptID;
-	    currentTranscript = transcript;
-	}
+double scoreExpected5 = 1.0;
+double scoreExpected3 = 1.0;
+double scoreObserved5 = 1.0;
+double scoreObserved3 = 1.0;
+for (size_t i = 0; i < 6; ++i) {
+                    scoreExpected5 *= zeroOrderModel5e(idx(tseq[fragStart + i]),
+i);
+                    scoreObserved5 *= zeroOrderModel5o(idx(tseq[fragStart + i]),
+i);
+                    scoreExpected3 *= zeroOrderModel3e(idx(rseq[fragStart + i]),
+i);
+                    scoreObserved3 *= zeroOrderModel3o(idx(rseq[fragStart + i]),
+i);
+                }
+                seqFactorsFW[fragStart + 2] = scoreObserved5 / scoreExpected5;
+                seqFactorsRC[fragStart + 3] = scoreObserved3 / scoreExpected3;
+                */
+
+// seqFactorsRC[fragStart + 1] = seqBias3p[dis(gen)];
+/*
+              seqFactorsFW[fragStart] +=
+                  (readBiasFW.counts[idxFW]/ (transcriptKmerDistFW[idxFW] +
+   seqPriorFW));
+              seqFactorsRC[fragStart] +=
+                  (readBiasRC.counts[idxRC]/ (transcriptKmerDistRC[idxRC] +
+   seqPriorRC));
+*/
+
+/**
+ * Computes (and returns) new effective lengths for the transcripts
+ * based on the current abundance estimates (alphas) and the current
+ * effective lengths (effLensIn).  This approach to sequence-specifc bias is
+ * based on the one taken in Roberts et al. (2011) [1].
+ * Here, we also consider fragment-GC bias which uses a novel method extending
+ * the idea of adjusting the effective lengths.
+ *
+ * [1] Roberts, Adam, et al. "Improving RNA-Seq expression estimates by
+ * correcting for fragment bias."
+ *     Genome Biol 12.3 (2011): R22.
+ */
+/*
+template <typename AbundanceVecT, typename ReadExpT>
+Eigen::VectorXd updateEffectiveLengths(SalmonOpts& sopt, ReadExpT& readExp,
+                                       Eigen::VectorXd& effLensIn,
+                                       AbundanceVecT& alphas, bool writeBias) {
+
+  using std::vector;
+  using BlockedIndexRange = tbb::blocked_range<size_t>;
 
+  double minAlpha = 1e-8;
+  uint32_t gcSamp{sopt.pdfSampFactor};
+  bool gcBiasCorrect{sopt.gcBiasCorrect};
+  bool seqBiasCorrect{sopt.biasCorrect};
+  bool posBiasCorrect{sopt.posBiasCorrect};
+
+  double probFwd = readExp.gcFracFwd();
+  double probRC = readExp.gcFracRC();
+
+  if (gcBiasCorrect and probFwd < 0.0) {
+    sopt.jointLog->warn("Had no fragments from which to estimate "
+                        "fwd vs. rev-comp mapping rate.  Skipping "
+                        "sequence-specific / fragment-gc bias correction");
+    return effLensIn;
+  }
+
+  // calculate read bias normalization factor -- total count in read
+  // distribution.
+  auto& obs5 = readExp.readBiasModel(salmon::utils::Direction::FORWARD);
+  auto& obs3 =
+      readExp.readBiasModel(salmon::utils::Direction::REVERSE_COMPLEMENT);
+  obs5.normalize();
+  obs3.normalize();
+
+  auto& pos5Obs = readExp.posBias(salmon::utils::Direction::FORWARD);
+  auto& pos3Obs = readExp.posBias(salmon::utils::Direction::REVERSE_COMPLEMENT);
+
+  int32_t K = static_cast<int32_t>(obs5.getContextLength());
+
+  FragmentLengthDistribution& fld = *(readExp.fragmentLengthDistribution());
+
+  // The *expected* biases from GC effects
+  auto& transcriptGCDist = readExp.expectedGCBias();
+  auto& gcCounts = readExp.observedGC();
+  double readGCNormFactor = 0.0;
+  int32_t fldLow{0};
+  int32_t fldHigh{1};
+
+  // The CDF and PDF of the fragment length distribution
+  std::vector<double> cdf(fld.maxVal() + 1, 0.0);
+  std::vector<double> pdf(fld.maxVal() + 1, 0.0);
+  {
+    transcriptGCDist.clear();
+    transcriptGCDist.resize(101, 0.0);
+
+    bool lb{false};
+    bool ub{false};
+    for (size_t i = 0; i <= fld.maxVal(); ++i) {
+      pdf[i] = std::exp(fld.pmf(i));
+      cdf[i] = (i > 0) ? cdf[i - 1] + pdf[i] : pdf[i];
+      auto density = cdf[i];
+
+      if (!lb and density >= 0.005) {
+        lb = true;
+        fldLow = i;
+      }
+      if (!ub and density >= 0.995) {
+        ub = true;
+        fldHigh = i;
+      }
     }
 
-    return TranscriptGeneMap(transcriptNames, geneNames, t2g);
-}
-*/
+    if (gcBiasCorrect) {
+      for (auto& c : gcCounts) {
+        readGCNormFactor += c;
+      }
+    }
+  }
+
+  // Make this const so there are no shenanigans
+  const auto& transcripts = readExp.transcripts();
+
+  double minObservedLength = effLensIn.minCoeff();
+
+  // The effective lengths adjusted for bias
+  Eigen::VectorXd effLensOut(effLensIn.size());
+
+  // How much to cut off
+  int32_t trunc = K;
+
+  using GCBiasVecT = std::vector<double>;
+  using SeqBiasVecT = std::vector<double>;
+
+  //
+  // These will store "thread local" parameters
+  // for the appropriate bias terms.
+  //
+  class CombineableBiasParams {
+  public:
+    CombineableBiasParams(uint32_t K) {
+      expectGC = std::vector<double>(101, 0.0);
+      expectPos5 = std::vector<SimplePosBias>(5);
+      expectPos3 = std::vector<SimplePosBias>(5);
+    }
+
+    std::vector<SimplePosBias> expectPos5;
+    std::vector<SimplePosBias> expectPos3;
+    SBModel expectSeqFW;
+    SBModel expectSeqRC;
+    std::vector<double> expectGC;
+  };
+
+  auto revComplement = [](const char* s, int32_t l, std::string& o) -> void {
+    if (l > o.size()) {
+      o.resize(l, 'A');
+    }
+    int32_t j = 0;
+    for (int32_t i = l - 1; i >= 0; --i, ++j) {
+      switch (s[i]) {
+      case 'A':
+      case 'a':
+        o[j] = 'T';
+        break;
+      case 'C':
+      case 'c':
+        o[j] = 'G';
+        break;
+      case 'T':
+      case 't':
+        o[j] = 'A';
+        break;
+      case 'G':
+      case 'g':
+        o[j] = 'C';
+        break;
+      default:
+        o[j] = 'N';
+        break;
+      }
+    }
+  };
+
+  //
+  // The local bias terms from each thread can be combined
+  // via simple summation.
+  //
+  auto getBiasParams = [K]() -> CombineableBiasParams {
+    return CombineableBiasParams(K);
+  };
+  tbb::combinable<CombineableBiasParams> expectedDist(getBiasParams);
+  std::atomic<size_t> numBackgroundTranscripts{0};
+  std::atomic<size_t> numExpressedTranscripts{0};
+
+  tbb::parallel_for(
+      BlockedIndexRange(size_t(0), size_t(transcripts.size())),
+      [&](const BlockedIndexRange& range) -> void {
+
+        auto& expectSeqFW = expectedDist.local().expectSeqFW;
+        auto& expectSeqRC = expectedDist.local().expectSeqRC;
+        auto& expectGC = expectedDist.local().expectGC;
+        auto& expectPos5 = expectedDist.local().expectPos5;
+        auto& expectPos3 = expectedDist.local().expectPos3;
+
+        std::string rcSeq;
+        // For each transcript
+        for (auto it : boost::irange(range.begin(), range.end())) {
+
+          // Get the transcript
+          const auto& txp = transcripts[it];
+
+          // Get the reference length and the
+          // "initial" effective length (not considering any biases)
+          int32_t refLen = static_cast<int32_t>(txp.RefLength);
+          int32_t elen = static_cast<int32_t>(txp.EffectiveLength);
+
+          // The difference between the actual and effective length
+          int32_t unprocessedLen = std::max(0, refLen - elen);
+
+          // Skip transcripts with trivial expression or that are too
+          // short
+          if (alphas[it] < minAlpha or unprocessedLen <= 0) {  // or
+txp.uniqueUpdateFraction() < 0.90) {
+            if (alphas[it] >= minAlpha) {
+              ++numExpressedTranscripts;
+            }
+            continue;
+          }
+          ++numBackgroundTranscripts;
+
+          // Otherwise, proceed giving this transcript the following weight
+          double weight = (alphas[it] / effLensIn(it));
+
+          // This transcript's sequence
+          const char* tseq = txp.Sequence();
+          revComplement(tseq, refLen, rcSeq);
+          const char* rseq = rcSeq.c_str();
+
+          Mer fwmer;
+          fwmer.from_chars(tseq);
+          Mer rcmer;
+          rcmer.from_chars(rseq);
+          int32_t contextLength{expectSeqFW.getContextLength()};
+
+          // For each position along the transcript
+          // Starting from the 5' end and moving toward the 3' end
+          for (int32_t fragStartPos = 0; fragStartPos < refLen - K;
+               ++fragStartPos) {
+            // Seq-specific bias
+            if (seqBiasCorrect) {
+              int32_t contextEndPos =
+                  fragStartPos + K - 1; // -1 because pos is *inclusive*
+
+              if (contextEndPos >= 0 and contextEndPos < refLen) {
+                int32_t maxFragLen =
+                    refLen - (fragStartPos + expectSeqFW.contextBefore(false));
+                if (maxFragLen >= 0 and maxFragLen < refLen) {
+                  auto cdensity =
+                      (maxFragLen >= cdf.size()) ? 1.0 : cdf[maxFragLen];
+                  expectSeqFW.addSequence(fwmer, weight * cdensity);
+                  expectSeqRC.addSequence(rcmer, weight * cdensity);
+                }
+              }
+
+              // shift the context one nucleotide to the right
+              fwmer.shift_left(tseq[fragStartPos + contextLength]);
+              rcmer.shift_left(rseq[fragStartPos + contextLength]);
+            } // end: Seq-specific bias
+
+            // fragment-GC bias
+            if (gcBiasCorrect) {
+              size_t sp = static_cast<size_t>((fldLow > 0) ? fldLow - 1 : 0);
+              double prevFLMass = cdf[sp];
+              int32_t fragStart = fragStartPos;
+              for (int32_t fl = fldLow; fl <= fldHigh; fl += gcSamp) {
+                int32_t fragEnd = fragStart + fl - 1;
+                if (fragEnd < refLen) {
+                  // The GC fraction for this putative fragment
+                  auto gcFrac = txp.gcFrac(fragStart, fragEnd);
+                  expectGC[gcFrac] += weight * (cdf[fl] - prevFLMass);
+                  prevFLMass = cdf[fl];
+                } else {
+                  break;
+                } // no more valid positions
+              }   // end: for each fragment length
+            }     // end: fragment GC bias
+
+            // positional bias
+            if (posBiasCorrect) {
+              int32_t maxFragLenFW = refLen - fragStartPos + 1;
+              int32_t maxFragLenRC = fragStartPos;
+              auto densityFW =
+                  (maxFragLenFW >= cdf.size()) ? 1.0 : cdf[maxFragLenFW];
+              auto densityRC =
+                  (maxFragLenRC >= cdf.size()) ? 1.0 : cdf[maxFragLenRC];
+              if (weight * densityFW > 1e-8) {
+                expectPos5[txp.lengthClassIndex()].addMass(
+                    fragStartPos, txp.RefLength, std::log(weight * densityFW));
+              }
+              if (weight * densityRC > 1e-8) {
+                expectPos3[txp.lengthClassIndex()].addMass(
+                    fragStartPos, txp.RefLength, std::log(weight * densityRC));
+              }
+            }
+          } // end: for every fragment start position
+        }   // end for each transcript
+
+      } // end tbb for function
+      );
+
+  size_t bgCutoff =
+      std::min(static_cast<size_t>(150),
+               static_cast<size_t>(numBackgroundTranscripts *  0.1));
+  if (numBackgroundTranscripts < bgCutoff) {
+    sopt.jointLog->warn("I found only {} transcripts meeting the necessary "
+                        "conditions to contribute to "
+                        "the bias background distribution.  This is likely too "
+                        "small to safely do bias correction. "
+                        "I'm skipping bias correction",
+                        numBackgroundTranscripts.load());
+    sopt.biasCorrect = false;
+    sopt.gcBiasCorrect = false;
+    sopt.posBiasCorrect = false;
+    return effLensIn;
+  }
+
+   //
+   // The local bias terms from each thread can be combined
+   // via simple summation.  Here, we combine the locally-computed
+   // bias terms.
+   //
+  SBModel exp5;
+  SBModel exp3;
+  std::vector<SimplePosBias> pos5Exp(5);
+  std::vector<SimplePosBias> pos3Exp(5);
+  auto combineBiasParams =
+      [seqBiasCorrect, gcBiasCorrect, posBiasCorrect, &pos5Exp, &pos3Exp, &exp5,
+       &exp3, &transcriptGCDist](const CombineableBiasParams& p) -> void {
+    if (seqBiasCorrect) {
+      exp5.combineCounts(p.expectSeqFW);
+      exp3.combineCounts(p.expectSeqRC);
+    }
+    if (gcBiasCorrect) {
+      for (size_t i = 0; i < p.expectGC.size(); ++i) {
+        transcriptGCDist[i] += p.expectGC[i];
+      }
+    }
+    if (posBiasCorrect) {
+      for (size_t i = 0; i < p.expectPos5.size(); ++i) {
+        pos5Exp[i].combine(p.expectPos5[i]);
+        pos3Exp[i].combine(p.expectPos3[i]);
+      }
+    }
+  };
+  expectedDist.combine_each(combineBiasParams);
+
+  // finalize expected positional biases
+  if (posBiasCorrect) {
+    for (size_t i = 0; i < pos5Exp.size(); ++i) {
+      pos5Exp[i].finalize();
+      pos3Exp[i].finalize();
+    }
+  }
+
+  auto smoothDist = [](std::vector<double>& v, int w, int d) -> void {
+    v = sg_smooth(v, w, d);
+    double gcSum = 0.0;
+    for (size_t i = 0; i < v.size(); ++i) {
+      if (v[i] < 1e-5) { v[i] = 1e-5; }
+      gcSum += v[i];
+    }
+    for (size_t i = 0; i < v.size(); ++i) {
+      v[i] /= gcSum;
+    }
+  };
+
+  sopt.jointLog->info("Computed expected counts (for bias correction)");
+
+  // Compute appropriate priors and normalization factors
+  double txomeGCNormFactor = 0.0;
+  double gcPrior = 0.0;
+  if (gcBiasCorrect) {
+    for (auto m : transcriptGCDist) {
+      txomeGCNormFactor += m;
+    }
+    auto pmass = 1e-5 * 101.0;
+    gcPrior =
+        ((pmass / (readGCNormFactor - pmass)) * txomeGCNormFactor) / 101.0;
+    txomeGCNormFactor += gcPrior * 101.0;
+  }
+  double scaleGCBias = (txomeGCNormFactor / readGCNormFactor);
+
+  // Compute the bias weights for each fragment-GC bin
+  Eigen::VectorXd gcBias(101);
+  double gcBiasMax = 100.0;
+  double gcBiasMin = 1.0 / gcBiasMax;
+  if (gcBiasCorrect) {
+    for (size_t i = 0; i < 101; ++i) {
+      gcBias[i] = scaleGCBias * (gcCounts[i] / (gcPrior + transcriptGCDist[i]));
+      gcBias[i] = (gcBias[i] > gcBiasMax) ? gcBiasMax :
+          ((gcBias[i] < gcBiasMin) ? gcBiasMin : gcBias[i]);
+    }
+    //gcBias *= scaleGCBias;
+  }
+
+
+  exp5.normalize();
+  exp3.normalize();
+
+  bool noThreshold = sopt.noBiasLengthThreshold;
+  std::atomic<size_t> numCorrected{0};
+  std::atomic<size_t> numUncorrected{0};
+
+  // Write out the bias model parameters we learned
+  if (writeBias) {
+    boost::filesystem::path auxDir = sopt.outputDirectory / sopt.auxDir;
+    bool auxSuccess = boost::filesystem::is_directory(auxDir);
+    if (!auxSuccess) {
+      auxSuccess = boost::filesystem::create_directories(auxDir);
+    }
+    if (auxSuccess) {
+      auto exp5fn = auxDir / "exp5_marginals.txt";
+      auto& exp5m = exp5.marginals();
+      std::ofstream exp5f(exp5fn.string());
+      exp5f << exp5m.rows() << '\t' << exp5m.cols() << '\n';
+      exp5f << exp5m;
+      exp5f.close();
+
+      auto exp3fn = auxDir / "exp3_marginals.txt";
+      auto& exp3m = exp3.marginals();
+      std::ofstream exp3f(exp3fn.string());
+      exp3f << exp3m.rows() << '\t' << exp3m.cols() << '\n';
+      exp3f << exp3m;
+      exp3f.close();
+
+      auto obs5fnc = auxDir / "obs5_conditionals.txt";
+      std::ofstream obs5fc(obs5fnc.string());
+      obs5.dumpConditionalProbabilities(obs5fc);
+      obs5fc.close();
+
+      auto obs3fnc = auxDir / "obs3_conditionals.txt";
+      std::ofstream obs3fc(obs3fnc.string());
+      obs3.dumpConditionalProbabilities(obs3fc);
+      obs3fc.close();
+
+      auto exp5fnc = auxDir / "exp5_conditionals.txt";
+      std::ofstream exp5fc(exp5fnc.string());
+      exp5.dumpConditionalProbabilities(exp5fc);
+      exp5fc.close();
+
+      auto exp3fnc = auxDir / "exp3_conditionals.txt";
+      std::ofstream exp3fc(exp3fnc.string());
+      exp3.dumpConditionalProbabilities(exp3fc);
+      exp3fc.close();
+    } else {
+      sopt.jointLog->warn(
+          "Couldn't create auxiliary directory {} to write bias parameters",
+          auxDir);
+    }
+  }
+
+  std::atomic<uint32_t> numProcessed{0};
+  size_t numTranscripts = transcripts.size();
+  size_t stepSize = static_cast<size_t>(transcripts.size() * 0.1);
+  size_t nextUpdate{0};
+
+  std::mutex updateMutex;
+  // Compute the effective lengths of each transcript (in parallel)
+  tbb::parallel_for(
+      BlockedIndexRange(size_t(0), size_t(transcripts.size())),
+      [&](const BlockedIndexRange& range) -> void {
+
+        std::string rcSeq;
+        // For each transcript
+        for (auto it : boost::irange(range.begin(), range.end())) {
+
+          auto& txp = transcripts[it];
+
+          // eff. length starts out as 0
+          double effLength = 0.0;
+
+          // Reference length
+          int32_t refLen = static_cast<int32_t>(txp.RefLength);
+          // Effective length before any bias correction
+          int32_t elen = static_cast<int32_t>(txp.EffectiveLength);
+
+          // How much of this transcript (beginning and end) should
+          // not be considered
+          int32_t unprocessedLen = std::max(0, refLen - elen);
+
+          if (alphas[it] >= minAlpha and unprocessedLen > 0) {
+
+            Eigen::VectorXd seqFactorsFW(refLen);
+            Eigen::VectorXd seqFactorsRC(refLen);
+            seqFactorsFW.setOnes();
+            seqFactorsRC.setOnes();
+
+            std::vector<double> posFactorsFW(refLen, 1.0);
+            std::vector<double> posFactorsRC(refLen, 1.0);
+
+            // This transcript's sequence
+            const char* tseq = txp.Sequence();
+            revComplement(tseq, refLen, rcSeq);
+            const char* rseq = rcSeq.c_str();
+
+            int32_t fl = fldLow;
+            auto maxLen = std::min(refLen, fldHigh + 1);
+            bool done{fl >= maxLen};
+
+            if (posBiasCorrect) {
+              std::vector<double> posFactorsObs5(refLen, 1.0);
+              std::vector<double> posFactorsObs3(refLen, 1.0);
+              std::vector<double> posFactorsExp5(refLen, 1.0);
+              std::vector<double> posFactorsExp3(refLen, 1.0);
+              auto li = txp.lengthClassIndex();
+              auto& p5O = pos5Obs[li];
+              auto& p3O = pos3Obs[li];
+              auto& p5E = pos5Exp[li];
+              auto& p3E = pos3Exp[li];
+              p5O.projectWeights(posFactorsObs5);
+              p3O.projectWeights(posFactorsObs3);
+              p5E.projectWeights(posFactorsExp5);
+              p3E.projectWeights(posFactorsExp3);
+              for (int32_t fragStart = 0; fragStart < refLen - K; ++fragStart) {
+                posFactorsFW[fragStart] =
+                    posFactorsObs5[fragStart] / posFactorsExp5[fragStart];
+                posFactorsRC[fragStart] =
+                    posFactorsObs3[fragStart] / posFactorsExp3[fragStart];
+              }
+            }
+
+            // Evaluate the sequence specific bias (5' and 3') over the length
+            // of the transcript.  After this loop,
+            // seqFactorsFW will contain the sequence-specific bias for each
+            // position on the 5' strand
+            // and seqFactorsRC will contain the sequence-specific bias for each
+            // position on the 3' strand.
+            if (seqBiasCorrect) {
+              Mer mer;
+              Mer rcmer;
+              mer.from_chars(tseq);
+              rcmer.from_chars(rseq);
+              int32_t contextLength{exp5.getContextLength()};
+
+              for (int32_t fragStart = 0; fragStart < refLen - K; ++fragStart) {
+                int32_t readStart = fragStart + obs5.contextBefore(false);
+                int32_t kmerEndPos =
+                    fragStart + K - 1; // -1 because pos is *inclusive*
+
+                if (kmerEndPos >= 0 and kmerEndPos < refLen and
+                    readStart < refLen) {
+                  seqFactorsFW[readStart] =
+                      std::exp(obs5.evaluateLog(mer) - exp5.evaluateLog(mer));
+                  seqFactorsRC[readStart] = std::exp(obs3.evaluateLog(rcmer) -
+                                                     exp3.evaluateLog(rcmer));
+                }
+                // shift the context one nucleotide to the right
+                mer.shift_left(tseq[fragStart + contextLength]);
+                rcmer.shift_left(rseq[fragStart + contextLength]);
+              }
+              // We need these in 5' -> 3' order, so reverse them
+              seqFactorsRC.reverseInPlace();
+            } // end sequence-specific factor calculation
+
+            if (numProcessed > nextUpdate) {
+              updateMutex.try_lock();
+          if (numProcessed > nextUpdate) {
+        sopt.jointLog->info(
+                    "processed bias for {:3.1f}% of the transcripts",
+                    100.0 * (numProcessed /
+static_cast<double>(numTranscripts)));
+        nextUpdate += stepSize;
+        if (nextUpdate > numTranscripts) {
+          nextUpdate = numTranscripts - 1;
+        }
+          }
+          updateMutex.unlock();
+        }
+        ++numProcessed;
+
+        size_t sp = static_cast<size_t>((fl > 0) ? fl - 1 : 0);
+            double prevFLMass = cdf[sp];
+            double unbiasedMass{0.0};
+
+            // For every possible fragment length
+            while (!done) {
+              if (fl >= maxLen) {
+                done = true;
+                fl = maxLen - 1;
+              }
+              double flWeight = cdf[fl] - prevFLMass;
+              prevFLMass = cdf[fl];
+
+              double flMassTotal{0.0};
+              // For every position a fragment of length fl could start
+              for (int32_t kmerStartPos = 0; kmerStartPos < refLen - fl;
+                   ++kmerStartPos) {
+                int32_t fragStart = kmerStartPos;
+                int32_t fragEnd = fragStart + fl - 1;
+
+                // If the 3' end is within the transcript
+                if (fragStart < refLen and fragEnd < refLen) {
+                  double fragFactor =
+                      seqFactorsFW[fragStart] * seqFactorsRC[fragEnd];
+                  if (gcBiasCorrect) {
+                    auto gcFrac = txp.gcFrac(fragStart, fragEnd);
+                    fragFactor *= gcBias[gcFrac];
+                  }
+                  if (posBiasCorrect) {
+                    fragFactor *=
+                        posFactorsFW[fragStart] * posFactorsRC[fragEnd];
+                  }
+                  flMassTotal += fragFactor;
+                } else {
+                  break;
+                }
+              }
+
+              effLength += (flWeight * flMassTotal);
+              fl += gcSamp;
+            }
+            // effLength = flMassTotal;//flMasses.sum();
+
+          } // for the processed transcript
+
+          // throw caution to the wind
+          double thresh = noThreshold ? 1.0 : unprocessedLen;
+
+          // JUNE 17
+          if (noThreshold) {
+              if (unprocessedLen > 0.0 and effLength > thresh) {
+                  effLensOut(it) = effLength;
+              } else {
+                  effLensOut(it) = effLensIn(it);
+              }
+          } else {
+              double offset = std::max(1.0, thresh);
+              double effLengthNoBias = static_cast<double>(elen);
+              auto barrierLength = [effLengthNoBias, offset](double x) -> double
+{
+                  return std::max(x, std::min(effLengthNoBias, offset));
+                  //return x + ((unprocessedLen * unprocessedLen) /
+                  //            (x + unprocessedLen));
+              };
+              effLength = barrierLength(effLength);
+              effLensOut(it) = effLength;
+          }
+
+          // END: JUNE 17
+//
+//          // commented out JUNE 20
+//          // To correct the transcript length, we require it to be
+//          // "sufficiently" long to begin with.
+//          // WORKING MODEL: if (unprocessedLen > 0.0 and elen > thresh and
+effLength > thresh) {
+//	  if (unprocessedLen > 0.0 and effLength > thresh) {
+//            ++numCorrected;
+//            effLensOut(it) = effLength;
+//          } else {
+//            ++numUncorrected;
+//            effLensOut(it) = effLensIn(it);
+//          }
 
 
 
+        }
+      } // end parallel_for lambda
+      );
+  sopt.jointLog->info("processed bias for 100.0% of the transcripts");
+  return effLensOut;
+}
+*/
diff --git a/src/SequenceBiasModel.cpp b/src/SequenceBiasModel.cpp
index 89b9583..351a553 100644
--- a/src/SequenceBiasModel.cpp
+++ b/src/SequenceBiasModel.cpp
@@ -3,7 +3,8 @@
 
 #include <boost/config.hpp> // for BOOST_LIKELY/BOOST_UNLIKELY
 
-#include "spdlog/details/format.h"
+#include "spdlog/fmt/ostr.h"
+#include "spdlog/fmt/fmt.h"
 #include "SequenceBiasModel.hpp"
 #include "LibraryFormat.hpp"
 #include "Transcript.hpp"
diff --git a/src/SimplePosBias.cpp b/src/SimplePosBias.cpp
new file mode 100644
index 0000000..77d822d
--- /dev/null
+++ b/src/SimplePosBias.cpp
@@ -0,0 +1,78 @@
+#include "SimplePosBias.hpp"
+#include "SalmonMath.hpp"
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+
+SimplePosBias::SimplePosBias(int32_t numBins, bool logSpace)
+    : numBins_(numBins),
+      masses_(numBins, (logSpace ? salmon::math::LOG_1 : 1.0)),
+      isLogged_(logSpace) {}
+
+// Add a mass of @mass to bin @bin
+void SimplePosBias::addMass(int32_t bin, double mass) {
+  masses_[bin] = salmon::math::logAdd(masses_[bin], mass);
+}
+
+// Compute the bin for @pos on a transcript of length @length,
+// and add @mass to the appropriate bin
+void SimplePosBias::addMass(int32_t pos, int32_t length, double mass) {
+  double step = static_cast<double>(length) / numBins_;
+  int bin = std::floor(pos / step);
+  if (bin >= masses_.size()) {
+    std::cerr << "bin = " << bin << '\n';
+  }
+  addMass(bin, mass);
+}
+
+// Project, the weights contained in "bins"
+// into the vector @out (using spline interpolation)
+void SimplePosBias::projectWeights(std::vector<double>& out) {
+  auto len = out.size();
+  for (size_t p = 0; p < len; ++p) {
+    // The fractional sampling factor position p would have
+    double fracP = static_cast<double>(p) / len;
+    out[p] = std::max(0.001, s_(fracP));
+  }
+}
+
+// Combine the distribution @other
+// with this distribution
+void SimplePosBias::combine(const SimplePosBias& other) {
+  assert(other.masses_.size() == masses_.size());
+  for (size_t i = 0; i < masses_.size(); ++i) {
+    masses_[i] = salmon::math::logAdd(masses_[i], other.masses_[i]);
+  }
+}
+
+// We're finished updating this distribution, so
+// compute the cdf etc.
+void SimplePosBias::finalize() {
+  // convert from log space
+  double sum{0.0};
+  for (size_t i = 0; i < masses_.size(); ++i) {
+    masses_[i] = std::exp(masses_[i]);
+    sum += masses_[i];
+  }
+  // Account for mass at endpoints
+  std::vector<double> splineMass(masses_.size() + 2);
+  // Duplicate the first and last points as the end knots
+  double startKnot = masses_.front() / sum;
+  double stopKnot = masses_.back() / sum;
+  double splineSum = sum + startKnot + stopKnot; 
+  splineMass[0] = startKnot;
+  for (size_t i = 0; i < masses_.size(); ++i) {
+    splineMass[i + 1] = (masses_[i] / splineSum);
+    masses_[i] /= sum;
+  }
+  splineMass.back() = stopKnot;
+
+  std::vector<double> splineBins(splineMass.size());
+  splineBins[0] = 0.0;
+  for (size_t i = 0; i < masses_.size(); ++i) {
+    splineBins[i+1] = positionBins_[i] - 0.01;
+  }
+  splineBins.back() = 1.0;
+
+  s_.set_points(splineBins, splineMass);
+}
diff --git a/src/merge_files.cc b/src/merge_files.cc
deleted file mode 100644
index 6203c60..0000000
--- a/src/merge_files.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-/*  This file is part of Jellyfish.
-
-    Jellyfish is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    Jellyfish is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with Jellyfish.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "merge_files.hpp"
-
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <memory>
-#include <string>
-
-#include <jellyfish/err.hpp>
-#include <jellyfish/misc.hpp>
-#include <jellyfish/mer_heap.hpp>
-#include <jellyfish/jellyfish.hpp>
-#include <jellyfish/rectangular_binary_matrix.hpp>
-#include <jellyfish/cpp_array.hpp>
-
-using jellyfish::file_header;
-using jellyfish::RectangularBinaryMatrix;
-using jellyfish::mer_dna;
-using jellyfish::cpp_array;
-typedef std::auto_ptr<binary_reader> binary_reader_ptr;
-typedef std::auto_ptr<text_reader> text_reader_ptr;
-
-struct file_info {
-  std::ifstream is;
-  file_header   header;
-
-  file_info(const char* path) :
-  is(path),
-  header(is)
-  { }
-};
-typedef std::auto_ptr<RectangularBinaryMatrix> matrix_ptr;
-
-template<typename reader_type, typename writer_type>
-void do_merge(cpp_array<file_info>& files, std::ostream& out, writer_type& writer,
-              uint64_t min, uint64_t max) {
-  cpp_array<reader_type> readers(files.size());
-  typedef jellyfish::mer_heap::heap<mer_dna, reader_type> heap_type;
-  typedef typename heap_type::const_item_t heap_item;
-  heap_type heap(files.size());
-
-  for(size_t i = 0; i < files.size(); ++i) {
-    readers.init(i, files[i].is, &files[i].header);
-    if(readers[i].next())
-      heap.push(readers[i]);
-  }
-
-  heap_item head = heap.head();
-  mer_dna   key;
-  while(heap.is_not_empty()) {
-    key = head->key_;
-    uint64_t sum = 0;
-    do {
-      sum += head->val_;
-      heap.pop();
-      if(head->it_->next())
-        heap.push(*head->it_);
-      head = heap.head();
-    } while(head->key_ == key && heap.is_not_empty());
-    if(sum >= min && sum <= max)
-      writer.write(out, key, sum);
-  }
-}
-
-// Merge files. Throws an error if unsuccessful.
-void merge_files(std::vector<const char*> input_files,
-                 const char* out_file,
-                 file_header& out_header,
-                 uint64_t min, uint64_t max) {
-  unsigned int key_len            = 0;
-  size_t       max_reprobe_offset = 0;
-  size_t       size               = 0;
-  unsigned int out_counter_len    = std::numeric_limits<unsigned int>::max();
-  std::string  format;
-  matrix_ptr   matrix;
-
-  cpp_array<file_info> files(input_files.size());
-
-  // create an iterator for each hash file
-  for(size_t i = 0; i < files.size(); i++) {
-    files.init(i, input_files[i]);
-    if(!files[i].is.good())
-      eraise(MergeError) << "Failed to open input file '" << input_files[i] << "'";
-
-    file_header& h = files[i].header;
-    if(i == 0) {
-      key_len            = h.key_len();
-      max_reprobe_offset = h.max_reprobe_offset();
-      size               = h.size();
-      matrix.reset(new RectangularBinaryMatrix(h.matrix()));
-      out_header.size(size);
-      out_header.key_len(key_len);
-      format = h.format();
-      out_header.matrix(*matrix);
-      out_header.max_reprobe(h.max_reprobe());
-      size_t reprobes[h.max_reprobe() + 1];
-      h.get_reprobes(reprobes);
-      out_header.set_reprobes(reprobes);
-      out_counter_len = std::min(out_counter_len, h.counter_len());
-    } else {
-      if(format != h.format())
-        eraise(MergeError) << "Can't merge files with different formats (" << format << ", " << h.format() << ")";
-      if(h.key_len() != key_len)
-        eraise(MergeError) << "Can't merge hashes of different key lengths (" << key_len << ", " << h.key_len() << ")";
-      if(h.max_reprobe_offset() != max_reprobe_offset)
-        eraise(MergeError) << "Can't merge hashes with different reprobing strategies";
-      if(h.size() != size)
-        eraise(MergeError) << "Can't merge hash with different size (" << size << ", " << h.size() << ")";
-      if(h.matrix() != *matrix)
-        eraise(MergeError) << "Can't merge hash with different hash function";
-    }
-  }
-  mer_dna::k(key_len / 2);
-
-  std::ofstream out(out_file);
-  if(!out.good())
-    eraise(MergeError) << "Can't open out file '" << out_file << "'";
-  out_header.format(format);
-
-  if(!format.compare(binary_dumper::format)) {
-    out_header.counter_len(out_counter_len);
-    out_header.write(out);
-    binary_writer writer(out_counter_len, key_len);
-    do_merge<binary_reader, binary_writer>(files, out, writer, min, max);
-  } else if(!format.compare(text_dumper::format)) {
-    out_header.write(out);
-    text_writer writer;
-    do_merge<text_reader, text_writer>(files, out, writer, min, max);
-  } else {
-    eraise(MergeError) << "Unknown format '" << format << "'";
-  }
-  out.close();
-}
diff --git a/tests/GCSampleTests.cpp b/tests/GCSampleTests.cpp
new file mode 100644
index 0000000..00fb58b
--- /dev/null
+++ b/tests/GCSampleTests.cpp
@@ -0,0 +1,61 @@
+#include <random>
+
+std::string generateRandomSequence(size_t length, std::uniform_int_distribution<>& dis, std::mt19937& gen) {
+  char nucs[] =  {'A', 'C', 'G', 'T'};
+  std::string s(length, 'N');
+  for (size_t i = 0; i < length; ++i) {
+    s[i] = nucs[dis(gen)];
+  }
+  return s;
+}
+
+SCENARIO("GC sampling works properly") {
+
+    GIVEN("A collection of random transcript sequences") {
+      std::random_device rd;
+      std::mt19937 gen(rd());
+      std::mt19937 gen2(rd());
+      std::uniform_int_distribution<> dis(0, 3); 
+      std::uniform_int_distribution<> dis2(500, 1500); 
+      
+      char** txpSeqs = new char*[1000];
+      char** names = new char*[1000];
+      std::vector<size_t> lengths;
+      for (size_t tn = 0; tn < 1000; ++tn) {
+	auto l = dis2(gen2);
+	lengths.emplace_back(l);
+	auto s = generateRandomSequence(l, dis, gen);
+	txpSeqs[tn] = new char[s.length() + 1];
+	names[tn] = new char[3];
+	//names[tn] = "HI\0";
+	std::strcpy(txpSeqs[tn], s.c_str());
+      }
+      std::vector<Transcript> txpsSampled;
+      std::vector<Transcript> txpsUnSampled;
+      for (size_t tn = 0; tn < 1000; ++tn) {
+	auto len = lengths[tn];
+	txpsSampled.emplace_back(tn, names[tn], len); 
+	txpsUnSampled.emplace_back(tn, names[tn], len); 
+	txpsSampled[tn].setSequenceBorrowed(txpSeqs[tn], true, 5);
+	txpsUnSampled[tn].setSequenceBorrowed(txpSeqs[tn], true, 1);
+      }
+      
+      for (size_t tn = 0; tn < 1000; ++tn) {
+	WHEN("Computing GC content") {
+	  auto l = txpsSampled[tn].RefLength;
+	  for (size_t i = 0; i < l; ++i) {
+	    THEN("Sampled is the same as unsampled") {
+	      REQUIRE(txpsSampled[tn].gcAt(i) == txpsUnSampled[tn].gcAt(i));
+	    }
+	  }
+	}
+      }
+
+      for (size_t tn = 0; tn < 1000; ++tn) {
+	delete [] txpSeqs[tn];
+	delete [] names[tn];
+      }
+      delete txpSeqs;
+      delete names;
+    } // end GIVEN
+}
diff --git a/tests/KmerHistTests.cpp b/tests/KmerHistTests.cpp
index 02de5f9..de6fc7a 100644
--- a/tests/KmerHistTests.cpp
+++ b/tests/KmerHistTests.cpp
@@ -46,6 +46,37 @@ SCENARIO("Kmers encode and decode correctly") {
     }
 }
 
+std::string rc(const std::string& s) {
+    std::string rc;
+    for (int32_t i = s.size() - 1; i >= 0; --i) {
+        switch(s[i]) {
+        case 'A': rc += 'T'; break;
+        case 'C': rc += 'G'; break;
+        case 'G': rc += 'C'; break;
+        case 'T': rc += 'A'; break;
+        }
+    }
+    return rc;
+};
+
+SCENARIO("Kmers encode and decode correctly (reverse complement)") {
+    using salmon::utils::Direction;
+    GIVEN("All 6-mers") {
+        std::vector<std::string> kmers = getAllWords(6);
+        //KmerDist<6, std::atomic<uint32_t>> kh;
+        for (auto& k : kmers) {
+            auto i = indexForKmer(k.c_str(), 6, Direction::REVERSE_COMPLEMENT);
+            auto kp = kmerForIndex(i, 6);
+            auto krc = rc(k);
+            WHEN("kmer is [" + k + "]") {
+                THEN("decodes as [" + kp + "]") {
+                    REQUIRE(krc == kp);
+                }
+            }
+        }
+    }
+}
+
 
 SCENARIO("The next k-mer index function works correctly") {
     using salmon::utils::Direction;
@@ -73,41 +104,32 @@ SCENARIO("The next k-mer index function works correctly") {
         }
     }
 
-    auto rc = [](std::string s) -> std::string {
-        std::string rc;
-        for (int32_t i = s.size() - 1; i >= 0; --i) {
-            switch(s[i]) {
-                case 'A': rc += 'T'; break;
-                case 'C': rc += 'G'; break;
-                case 'G': rc += 'C'; break;
-                case 'T': rc += 'A'; break;
-            }
-        }
-        return rc;
-    };
-
-    auto rcs = rc(s);
+    //auto rcs = rc(s);
 
     GIVEN("The string " + s + " in the reverse complement direction") {
-        auto idx = indexForKmer(s.c_str() + s.size() - K - 1, 6,
-                                Direction::REVERSE_COMPLEMENT);
-        std::string k = rc(s.substr(s.size() - K - 1, 6));
+        auto idx = indexForKmer(s.c_str(), 6, Direction::REVERSE_COMPLEMENT);
+        std::string k = rc(s.substr(0, 6));
         WHEN("kmer is [" + k + "]") {
             auto kp = kmerForIndex(idx, 6);
             THEN("decodes as [" + kp + "]") {
                 REQUIRE(k == kp);
             }
         }
-        for (int32_t i = s.size() - K - 2; i >= 0; --i) {
-            idx = nextKmerIndex(idx, s[i], 6, Direction::REVERSE_COMPLEMENT);
-            k = rc(s.substr(i, 6));
-            WHEN("kmer is [" + k + "]") {
-                auto kp = kmerForIndex(idx, 6);
-                THEN("decodes as [" + kp + "]") {
-                    REQUIRE(k == kp);
-                }
-            }
+        const char* seq = s.c_str();
+	for (size_t i = 0; i < s.size() - K; ++i) {
+	  idx = nextKmerIndex(idx, s[i+K], 6, Direction::REVERSE_COMPLEMENT);
+	  auto idx2 = indexForKmer(seq+i+1, 6, Direction::REVERSE_COMPLEMENT);
+	  k = rc(s.substr(i+1, 6));
+	  WHEN("kmer is [" + k + "]") {
+	    auto kp = kmerForIndex(idx, 6);
+	    THEN("decodes as [" + kp + "]") {
+	      REQUIRE(k == kp);
+	    }
+        THEN("incremental decoding works") {
+            REQUIRE(idx == idx2);
         }
+	  }
+	}
     }
 
 }
diff --git a/tests/UnitTests.cpp b/tests/UnitTests.cpp
index 9676f98..1ca74e1 100644
--- a/tests/UnitTests.cpp
+++ b/tests/UnitTests.cpp
@@ -4,8 +4,10 @@
 #include "catch.hpp"
 #include "LibraryFormat.hpp"
 #include "SalmonUtils.hpp"
+#include "Transcript.hpp"
 
 bool verbose=false; // Apparently, we *need* this (OSX)
 
+#include "GCSampleTests.cpp"
 #include "LibraryTypeTests.cpp"
-#include "KmerHistTests.cpp"
+//#include "KmerHistTests.cpp"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/salmon.git