[med-svn] [blasr] 01/13: Imported Upstream version 5.2

Afif Elghraoui afif at moszumanska.debian.org
Mon Oct 24 01:24:34 UTC 2016


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository blasr.

commit 9690aa9e00b8415695071d16d75ea902b8f47000
Author: Afif Elghraoui <afif at ghraoui.name>
Date:   Sun Jul 24 16:25:28 2016 -0700

    Imported Upstream version 5.2
---
 .gitignore                                         |    9 +
 Blasr.cpp                                          |  260 ++---
 LICENSE                                            |   34 +
 README.INSTALL.md                                  |   24 +-
 configure.py                                       |   10 +-
 cram.mk                                            |   34 +
 ctest/.gitignore                                   |    1 +
 ctest/bamConcordant.t                              |   33 +
 ctest/cigarAdjecentIndels.t                        |   22 +
 ctest/concordant.t                                 |    7 +-
 ctest/dataset.t                                    |   64 ++
 ctest/deterministic.t                              |   47 +
 ctest/filtercriteria.t                             |    2 +-
 ctest/open_fail.t                                  |    2 +-
 ctest/setup.sh                                     |    7 +-
 ctest/unaligned.t                                  |   25 +
 ctest/useccsallBestN1.t                            |    4 +-
 extrautils/BasH5Simulator.cpp                      |  524 ----------
 extrautils/CCSH5ToBam.cpp                          |  135 ---
 extrautils/SimpleShredder.cpp                      |    2 +-
 extrautils/ctest/alchemy.t                         |   20 -
 extrautils/ctest/ccsh5tobam.t                      |   10 -
 extrautils/ctest/printTupleCountTable.t            |    2 +-
 extrautils/makefile                                |   12 +-
 {include => iblasr}/BlasrAlign.hpp                 |   39 +-
 {include => iblasr}/BlasrAlignImpl.hpp             |  157 ++-
 iblasr/BlasrHeaders.h                              |  113 +++
 iblasr/BlasrMiscs.hpp                              |   62 ++
 {include => iblasr}/BlasrMiscsImpl.hpp             |  100 +-
 {include => iblasr}/BlasrUtils.hpp                 |   50 +-
 {include => iblasr}/BlasrUtilsImpl.hpp             |  100 +-
 {include => iblasr}/MappingBuffers.hpp             |   52 +-
 {include => iblasr}/MappingIPC.h                   |   30 +-
 {include => iblasr}/MappingParameters.h            |   89 +-
 {include => iblasr}/MappingSemaphores.h            |   15 +-
 {include => iblasr}/ReadAlignments.hpp             |   45 +-
 {include => iblasr}/RegisterBlasrOptions.h         |  402 ++++----
 {include => iblasr}/RegisterFilterOptions.h        |   41 +-
 include/BlasrHeaders.h                             |  149 ---
 include/BlasrMiscs.hpp                             |  100 --
 makefile                                           |   36 +-
 rules.mk                                           |    9 +-
 sub.mk                                             |    9 +
 utils/.gitignore                                   |    2 +
 utils/LoadPulses.cpp                               |  145 ++-
 utils/PulseToFasta.cpp                             |   37 +-
 utils/SAWriter.cpp                                 |  358 +++----
 utils/SDPMatcher.cpp                               |   39 +-
 utils/SamFilter.cpp                                |   54 +-
 utils/SamToCmpH5.cpp                               |   29 +-
 utils/SamToM4.cpp                                  |   26 +-
 utils/ToAfg.cpp                                    |   42 +-
 utils/bam2bax/BUILD.txt                            |   48 +
 utils/bam2bax/CMakeLists.txt                       |  133 +++
 utils/bam2bax/README.md                            |   11 +
 utils/bam2bax/makefile                             |   29 +
 utils/bam2bax/src/Bam2Bax.cpp                      |   29 +
 utils/bam2bax/src/Bam2Bax.h                        |   13 +
 utils/bam2bax/src/Bam2BaxConverter.h               |   38 +
 utils/bam2bax/src/Bam2BaxConverterImpl.hpp         |   71 ++
 utils/bam2bax/src/Bam2BaxInternal.h                |   78 ++
 utils/bam2bax/src/Bam2BaxMain.cpp                  |   75 ++
 utils/bam2bax/src/Bam2PlxMain.cpp                  |   65 ++
 utils/bam2bax/src/CMakeLists.txt                   |   79 ++
 utils/bam2bax/src/Converter.cpp                    |  146 +++
 utils/bam2bax/src/Converter.h                      |   77 ++
 utils/bam2bax/src/MetadataWriter.cpp               |   42 +
 utils/bam2bax/src/MetadataWriter.h                 |   38 +
 utils/bam2bax/src/OptionParser.cpp                 |  562 +++++++++++
 utils/bam2bax/src/OptionParser.h                   |  306 ++++++
 utils/bam2bax/src/RegionTypeAdapter.h              |  129 +++
 utils/bam2bax/src/RegionsAdapter.h                 |  118 +++
 utils/bam2bax/src/Settings.cpp                     |  232 +++++
 utils/bam2bax/src/Settings.h                       |   80 ++
 utils/bam2bax/tests/CMakeLists.txt                 |   93 ++
 utils/bam2bax/tests/cram/bam2bax.t                 |   86 ++
 utils/bam2bax/tests/cram/bam2plx.t                 |  230 +++++
 ...4852550000001823085912221377_s1_X0.1.scraps.bam |  Bin 0 -> 644642 bytes
 ...52550000001823085912221377_s1_X0.1.subreads.bam |  Bin 0 -> 220556 bytes
 utils/bam2bax/tests/data/tiny.scraps.sam           |   18 +
 utils/bam2bax/tests/data/tiny.subreads.sam         |   27 +
 .../bam2bax/tests/data/tiny_bam2plx.polymerase.sam |    4 +
 utils/bam2bax/tests/example/end-to-end.sh          |   66 ++
 utils/bam2bax/tests/files.cmake                    |   64 ++
 .../tests/resequencing/bam2bax_resequencing.py     |  179 ++++
 utils/bam2bax/tests/resequencing/one_bax_test.sh   |    5 +
 utils/bam2bax/tests/resequencing/settings.xml      |  201 ++++
 utils/bam2bax/tests/resequencing/tiny_test.sh      |    5 +
 utils/bam2bax/tests/src/TestConstants.h            |  195 ++++
 utils/bam2bax/tests/src/TestData.h.in              |   18 +
 utils/bam2bax/tests/src/TestUtils.cpp              |   42 +
 utils/bam2bax/tests/src/TestUtils.h                |   13 +
 utils/bam2bax/tests/src/test.cpp                   |   23 +
 utils/bam2bax/tests/src/test_Bam2BaxConverter.cpp  |   94 ++
 .../bam2bax/tests/src/test_HDFBaseCallsWriter.cpp  |   56 ++
 utils/bam2bax/tests/src/test_HDFBaxWriter.cpp      |  100 ++
 utils/bam2bax/tests/src/test_HDFScanDataWriter.cpp |   76 ++
 .../bam2bax/tests/src/test_HDFZMWMetricsWriter.cpp |   51 +
 utils/bam2bax/tests/src/test_HDFZMWWriter.cpp      |   40 +
 utils/bax2bam/BUILD.txt                            |   10 +
 utils/bax2bam/CMakeLists.txt                       |  128 +++
 utils/bax2bam/README.md                            |   62 ++
 utils/bax2bam/makefile                             |   29 +
 utils/bax2bam/src/Bax2Bam.cpp                      |  191 ++++
 utils/bax2bam/src/Bax2Bam.h                        |   13 +
 utils/bax2bam/src/CMakeLists.txt                   |   57 ++
 utils/bax2bam/src/CcsConverter.cpp                 |  117 +++
 utils/bax2bam/src/CcsConverter.h                   |   47 +
 utils/bax2bam/src/ConverterBase.h                  | 1019 ++++++++++++++++++++
 utils/bax2bam/src/HqRegionConverter.cpp            |  192 ++++
 utils/bax2bam/src/HqRegionConverter.h              |   26 +
 utils/bax2bam/src/IConverter.cpp                   |   85 ++
 utils/bax2bam/src/IConverter.h                     |   53 +
 utils/bax2bam/src/OptionParser.cpp                 |  562 +++++++++++
 utils/bax2bam/src/OptionParser.h                   |  306 ++++++
 utils/bax2bam/src/PolymeraseReadConverter.cpp      |   60 ++
 utils/bax2bam/src/PolymeraseReadConverter.h        |   26 +
 utils/bax2bam/src/Settings.cpp                     |  261 +++++
 utils/bax2bam/src/Settings.h                       |   87 ++
 utils/bax2bam/src/SubreadConverter.cpp             |  395 ++++++++
 utils/bax2bam/src/SubreadConverter.h               |   26 +
 utils/bax2bam/src/main.cpp                         |  118 +++
 utils/bax2bam/tests/CMakeLists.txt                 |   88 ++
 utils/bax2bam/tests/bax2bam.t                      |    7 +
 utils/bax2bam/tests/files.cmake                    |   52 +
 utils/bax2bam/tests/src/TestData.h.in              |   17 +
 utils/bax2bam/tests/src/TestUtils.cpp              |   43 +
 utils/bax2bam/tests/src/TestUtils.h                |   13 +
 utils/bax2bam/tests/src/test_ccs.cpp               |  219 +++++
 utils/bax2bam/tests/src/test_common.cpp            |   16 +
 utils/bax2bam/tests/src/test_hqregions.cpp         |  421 ++++++++
 utils/bax2bam/tests/src/test_polymerase.cpp        |  236 +++++
 utils/bax2bam/tests/src/test_subreads.cpp          |  410 ++++++++
 .../gtest/.deps/test_fused_gtest_test-gtest-all.Po |    1 +
 .../.deps/test_fused_gtest_test-gtest_main.Po      |    1 +
 .../bax2bam/third-party/gtest-1.7.0/lib/.dirstamp  |    0
 .../gtest-1.7.0/samples/.deps/.dirstamp            |    0
 .../gtest-1.7.0/samples/.deps/sample1.Plo          |    3 +
 .../gtest-1.7.0/samples/.deps/sample10_unittest.Po |    1 +
 .../gtest-1.7.0/samples/.deps/sample1_unittest.Po  |    1 +
 .../gtest-1.7.0/samples/.deps/sample2.Plo          |   29 +
 .../gtest-1.7.0/samples/.deps/sample4.Plo          |   48 +
 .../samples/.deps/test_fused_gtest_test-sample1.Po |    1 +
 .../test_fused_gtest_test-sample1_unittest.Po      |    1 +
 .../third-party/gtest-1.7.0/samples/.dirstamp      |    0
 .../third-party/gtest-1.7.0/src/.deps/.dirstamp    |    0
 .../gtest-1.7.0/src/.deps/gtest-all.Plo            |  617 ++++++++++++
 .../gtest-1.7.0/src/.deps/gtest_main.Plo           |  460 +++++++++
 .../bax2bam/third-party/gtest-1.7.0/src/.dirstamp  |    0
 .../gtest-1.7.0/test/.deps/gtest_all_test.Po       |    1 +
 utils/ctest/.gitignore                             |    1 +
 utils/makefile                                     |   22 +-
 152 files changed, 12135 insertions(+), 2149 deletions(-)

diff --git a/.gitignore b/.gitignore
index bfdb47f..da26b3a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,13 @@
 defines.mk
 *.d
 *.o
+*.err
 blasr
+/utils/bam2bax/build/
+/utils/bam2bax/bin/
+/utils/bam2bax/tests/bin/
+/utils/bam2bax/tests/src/TestData.h
+/utils/bax2bam/build/
+/utils/bax2bam/bin/
+/utils/bax2bam/tests/bin/
+/utils/bax2bam/tests/src/TestData.h
diff --git a/Blasr.cpp b/Blasr.cpp
index 9ae4082..85a928a 100644
--- a/Blasr.cpp
+++ b/Blasr.cpp
@@ -1,44 +1,9 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
 // Author: Mark Chaisson
 
-#include "BlasrMiscs.hpp"
-#include "BlasrUtils.hpp"
-#include "BlasrAlign.hpp"
-#include "RegisterBlasrOptions.h"
+#include "iblasr/BlasrMiscs.hpp"
+#include "iblasr/BlasrUtils.hpp"
+#include "iblasr/BlasrAlign.hpp"
+#include "iblasr/RegisterBlasrOptions.h"
 
 //#define USE_GOOGLE_PROFILER
 #ifdef USE_GOOGLE_PROFILER
@@ -58,15 +23,16 @@ HDFRegionTableReader *regionTableReader = NULL;
 ReaderAgglomerate *reader = NULL;
 
 const string GetMajorVersion() {
-  return "2.0.0";
+  return "5.2";
 }
 
+// version format is 3 numbers sparated by dots : Version.Subversion.SHA1
 const string GetVersion(void) {
-  string perforceVersionString("$Change$");
+  string gitVersionString(SHA1_7);  // gitVersionString is first 7 characters of SHA1
   string version = GetMajorVersion();
-  if (perforceVersionString.size() > 12) {
-    version.insert(version.size(), ".");
-    version.insert(version.size(), perforceVersionString, 9, perforceVersionString.size() - 11);
+  if (gitVersionString.size() == 7) {
+    version.append(".");
+    version.append(gitVersionString);
   }
   return version;
 }
@@ -99,7 +65,7 @@ bool IsGoodRead(const SMRTSequence & smrtRead,
     //
     if (smrtRead.highQualityRegionScore < params.minRawSubreadScore or
         (params.maxReadLength != 0 and smrtRead.length > UInt(params.maxReadLength)) or
-        (smrtRead.length < params.minReadLength)) {
+        (int(smrtRead.length) < params.minReadLength)) {
         return false;
     }
 
@@ -196,16 +162,15 @@ void MakePrimaryIntervals(RegionTable * regionTablePtr,
 void MakePrimaryIntervals(vector<SMRTSequence> & subreads,
                           vector<ReadInterval> & subreadIntervals,
                           vector<int> & subreadDirections,
-                          int & bestSubreadIndex,
-                          MappingParameters & params)
+                          int & bestSubreadIndex)
 {
     MakeSubreadIntervals(subreads, subreadIntervals);
     CreateDirections(subreadDirections, subreadIntervals.size());
-    bestSubreadIndex = GetIndexOfMedian(subreadIntervals);
+    bestSubreadIndex = GetIndexOfConcordantTemplate(subreadIntervals);
 }
 
 
-/// Scan the next read from input.  This may either be a CCS read,
+/// Scan the next read from input.  This may either be a CCS read, unrolled (Polymerase) read,
 /// or regular read (though this may be aligned in whole, or by
 /// subread).
 /// \params[in] reader: FASTA/FASTQ/BAX.H5/CCS.H5/BAM file reader
@@ -231,9 +196,9 @@ bool FetchReads(ReaderAgglomerate * reader,
                 int & associatedRandInt,
                 bool & stop)
 {
-    if (reader->GetFileType() != BAM or not params.concordant) {
-        if (reader->GetFileType() == HDFCCS ||
-            reader->GetFileType() == HDFCCSONLY) {
+    if ((reader->GetFileType() != FileType::PBBAM and reader->GetFileType() != FileType::PBDATASET) or not params.concordant) {
+        if (reader->GetFileType() == FileType::HDFCCS ||
+            reader->GetFileType() == FileType::HDFCCSONLY) {
             if (GetNextReadThroughSemaphore(*reader, params, ccsRead, readGroupId, associatedRandInt, semaphores) == false) {
                 stop = true;
                 return false;
@@ -299,7 +264,7 @@ bool FetchReads(ReaderAgglomerate * reader,
             }
         }
         if (subreads.size() != 0) {
-            MakeVirtualRead(smrtRead, subreads);
+            smrtRead.MadeFromSubreadsAsPolymerase(subreads);
             return true;
         }
         else {
@@ -312,7 +277,6 @@ void MapReadsNonCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapDa
                     MappingBuffers & mappingBuffers,
                     SMRTSequence & smrtRead,
                     SMRTSequence & smrtReadRC,
-                    CCSSequence & ccsRead,
                     vector<SMRTSequence> & subreads,
                     MappingParameters & params,
                     const int & associatedRandInt,
@@ -333,19 +297,18 @@ void MapReadsNonCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapDa
     bwtPtr = mapData->bwtPtr;
     SeqBoundaryFtr<FASTQSequence> seqBoundary(&seqdb);
 
-
     vector<ReadInterval> subreadIntervals;
     vector<int>          subreadDirections;
     int bestSubreadIndex;
 
-    if (mapData->reader->GetFileType() != BAM or not params.concordant) {
+    if ((mapData->reader->GetFileType() != FileType::PBBAM and mapData->reader->GetFileType() != FileType::PBDATASET) or not params.concordant) {
         MakePrimaryIntervals(mapData->regionTablePtr, smrtRead,
                              subreadIntervals, subreadDirections,
                              bestSubreadIndex, params);
     } else {
         MakePrimaryIntervals(subreads,
                              subreadIntervals, subreadDirections,
-                             bestSubreadIndex, params);
+                             bestSubreadIndex);
     }
 
     // Flop all directions if direction of the longest subread is 1.
@@ -360,8 +323,14 @@ void MapReadsNonCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapDa
 
     if (params.concordant) {
         // Only the longest subread will be aligned in the first round.
+        // VR , change the comment
         startIndex = max(startIndex, bestSubreadIndex);
         endIndex   = min(endIndex, bestSubreadIndex + 1);
+
+        if (params.verbosity >= 1) {
+            cout << "Concordant template subread index: " << bestSubreadIndex << ", "
+                 << smrtRead.HoleNumber() << "/" << subreadIntervals[bestSubreadIndex] << endl;
+        }
     }
 
     //
@@ -370,8 +339,7 @@ void MapReadsNonCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapDa
     allReadAlignments.Resize(subreadIntervals.size());
     allReadAlignments.alignMode = Subread;
 
-    DNALength intvIndex;
-    for (intvIndex = startIndex; intvIndex < endIndex; intvIndex++) {
+    for (int intvIndex = startIndex; intvIndex < endIndex; intvIndex++) {
         SMRTSequence subreadSequence, subreadSequenceRC;
         MakeSubreadOfInterval(subreadSequence, smrtRead,
                 subreadIntervals[intvIndex], params);
@@ -473,10 +441,10 @@ void MapReadsNonCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapDa
         // for printing
         // delete all AC which are in complement of SelectedAlignmemntPtrs vector
         // namely (SelectedAlignmentPtrs/alignmentPtrs)
-        for (int ii = 0; ii < alignmentPtrs.size(); ii++)
+        for (size_t ii = 0; ii < alignmentPtrs.size(); ii++)
         {
             int found =0;
-            for (int jj = 0; jj < selectedAlignmentPtrs.size(); jj++)
+            for (size_t jj = 0; jj < selectedAlignmentPtrs.size(); jj++)
             {
                 if (alignmentPtrs[ii] == selectedAlignmentPtrs[jj] )
                 {
@@ -489,10 +457,13 @@ void MapReadsNonCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapDa
         subreadSequence.Free();
         subreadSequenceRC.Free();
     } // End of looping over subread intervals within [startIndex, endIndex).
+ 
 
     if (params.verbosity >= 3)
         allReadAlignments.Print(threadOut);
 
+    // If not concordant , all done
+
     if (params.concordant) {
         allReadAlignments.read = smrtRead;
         allReadAlignments.alignMode = ZmwSubreads;
@@ -507,7 +478,7 @@ void MapReadsNonCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapDa
                                  seqdb, genome, params.flankSize);
             }
 
-            for (intvIndex = 0; intvIndex < subreadIntervals.size(); intvIndex++) {
+            for (int intvIndex = 0; intvIndex < int(subreadIntervals.size()); intvIndex++) {
                 if (intvIndex == startIndex) continue;
                 int passDirection = subreadDirections[intvIndex];
                 int passStartBase = subreadIntervals[intvIndex].start;
@@ -529,7 +500,7 @@ void MapReadsNonCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapDa
                     allReadAlignments.SetSequence(intvIndex, smrtRead);
                 }
 
-                for (int alnIndex = 0; alnIndex < selectedAlignmentPtrs.size(); alnIndex++) {
+                for (size_t alnIndex = 0; alnIndex < selectedAlignmentPtrs.size(); alnIndex++) {
                     T_AlignmentCandidate * alignment = selectedAlignmentPtrs[alnIndex];
                     if (alignment->score > params.maxScore) break;
                     AlignSubreadToAlignmentTarget(allReadAlignments,
@@ -553,7 +524,7 @@ void MapReadsNonCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapDa
                 } // End of aligning this subread to each selected alignment.
                 subread.Free();
             } // End of aligning each subread to where the template subread aligned to.
-            for(int alignmentIndex = 0; alignmentIndex < selectedAlignmentPtrs.size();
+            for(size_t alignmentIndex = 0; alignmentIndex < selectedAlignmentPtrs.size();
                     alignmentIndex++) {
                 if (selectedAlignmentPtrs[alignmentIndex])
                     delete selectedAlignmentPtrs[alignmentIndex];
@@ -562,6 +533,11 @@ void MapReadsNonCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapDa
     } // End of if params.concordant
 }
 
+//
+// invoked for mapping entire ZMW as a single entity
+// either for CCS reads : all subreads of a ZMW collapsed/merged into a single read 
+// or Polymerase reads  : all subreads of a ZMW stitched into a single read  
+//
 void MapReadsCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapData,
                  MappingBuffers & mappingBuffers,
                  SMRTSequence & smrtRead,
@@ -619,6 +595,9 @@ void MapReadsCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapData,
     // all other are secondary.
     //
 
+    //
+    // Here unrolled reads are aligned
+    //
     if (readIsCCS == false or params.useCcsOnly) {
         // if -noSplitSubreads or -useccsdenovo.
         //
@@ -634,9 +613,12 @@ void MapReadsCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapData,
         }
         allReadAlignments.SetSequence(0, smrtRead);
     }
+    //
+    // Here CCS reads are aligned
+    //
     else if (readIsCCS) { // if -useccsall or -useccs
         // Flank alignment candidates to both ends.
-        for(int alignmentIndex = 0; alignmentIndex < selectedAlignmentPtrs.size();
+        for(size_t alignmentIndex = 0; alignmentIndex < selectedAlignmentPtrs.size();
                 alignmentIndex++) {
             FlankTAlignedSeq(selectedAlignmentPtrs[alignmentIndex],
                     seqdb, genome, params.flankSize);
@@ -698,12 +680,11 @@ void MapReadsCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapData,
             // The unrolled alignment should be relative to the entire read.
             allReadAlignments.SetSequence(subreadIndex, ccsRead.unrolledRead);
 
-            int alignmentIndex;
             //
             // Align this subread to all the positions that the de novo
             // sequence has aligned to.
             //
-            for (alignmentIndex = 0; alignmentIndex < selectedAlignmentPtrs.size(); alignmentIndex++) {
+            for (size_t alignmentIndex = 0; alignmentIndex < selectedAlignmentPtrs.size(); alignmentIndex++) {
                 T_AlignmentCandidate *alignment = selectedAlignmentPtrs[alignmentIndex];
                 if (alignment->score > params.maxScore) break;
                 AlignSubreadToAlignmentTarget(allReadAlignments,
@@ -722,10 +703,10 @@ void MapReadsCCS(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapData,
     // for printing
     // delete all AC which are in complement of SelectedAlignmemntPtrs vector
     // namely (SelectedAlignmentPtrs/alignmentPtrs)
-    for (int ii = 0; ii < alignmentPtrs.size(); ii++)
+    for (size_t ii = 0; ii < alignmentPtrs.size(); ii++)
     {
         int found =0;
-        for (int jj = 0; jj < selectedAlignmentPtrs.size(); jj++)
+        for (size_t jj = 0; jj < selectedAlignmentPtrs.size(); jj++)
         {
             if (alignmentPtrs[ii] == selectedAlignmentPtrs[jj] )
             {
@@ -750,14 +731,12 @@ void MapReads(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapData)
     TupleCountTable<T_GenomeSequence, DNATuple> ct;
     SequenceIndexDatabase<FASTQSequence> seqdb;
     T_GenomeSequence    genome;
-    BWT *bwtPtr;
 
     mapData->ShallowCopySuffixArray(sarray);
     mapData->ShallowCopyReferenceSequence(genome);
     mapData->ShallowCopySequenceIndexDatabase(seqdb);
     mapData->ShallowCopyTupleCountTable(ct);
 
-    bwtPtr = mapData->bwtPtr;
     SeqBoundaryFtr<FASTQSequence> seqBoundary(&seqdb);
 
     int numAligned = 0;
@@ -804,6 +783,10 @@ void MapReads(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapData)
 
         smrtRead.MakeRC(smrtReadRC);
 
+        // important 
+        // 1. CCS and unrolled mode are mutually exclusive
+        // 2. Reverse Complement Read is generated fort CCS only
+        //
         if (readIsCCS) {
             ccsRead.unrolledRead.MakeRC(unrolledReadRC);
         }
@@ -815,10 +798,18 @@ void MapReads(MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> *mapData)
         ReadAlignments allReadAlignments;
         allReadAlignments.read = smrtRead;
 
+        // currently 3 ways of mapping
+        // regular, CCS , and Polymerase (unrolled)
+        //
+        // for regular subreads MapReadsNonCCS
+        // for mapping ZMW as a whole (CCS or Polymerase) MapReadsCCS
+        // For the future , change the name of functions  to be more desriptive 
+        // noSplitSubreads is in essense unrolled - Polymerase read mode
+        //
         if (readIsCCS == false and params.mapSubreadsSeparately) {
             // (not readIsCCS and not -noSplitSubreads)
             MapReadsNonCCS(mapData, mappingBuffers,
-                           smrtRead, smrtReadRC, ccsRead, subreads,
+                           smrtRead, smrtReadRC, subreads,
                            params, associatedRandInt,
                            allReadAlignments, threadOut);
        } // End of if (readIsCCS == false and params.mapSubreadsSeparately).
@@ -877,10 +868,7 @@ int main(int argc, char* argv[]) {
   // Configure parameters for refining alignments.
   //
   MappingParameters params;
-  ReverseCompressIndex index;
-  pid_t parentPID;
-  pid_t *pids;
-  
+
   CommandLineParser clp;
   clp.SetHelp(BlasrHelp(params));
   clp.SetConciseHelp(BlasrConciseHelp());
@@ -909,7 +897,7 @@ int main(int argc, char* argv[]) {
     cout << BlasrConciseHelp();
     exit(1); // A failure.
   }
-  
+
   int a, b;
   for (a = 0; a < 5; a++ ) {
     for (b = 0; b < 5; b++ ){
@@ -921,7 +909,7 @@ int main(int argc, char* argv[]) {
       }
     }
   }
-  
+
   if (params.scoreMatrixString != "") {
     if (StringToScoreMatrix(params.scoreMatrixString, SMRTDistanceMatrix) == false) {
       cout << "ERROR. The string " << endl
@@ -938,7 +926,7 @@ int main(int argc, char* argv[]) {
       exit(1);
     }
   }
-  
+
   cerr << "[INFO] " << GetTimestamp() << " [blasr] started." << endl;
   params.MakeSane();
 
@@ -953,7 +941,7 @@ int main(int argc, char* argv[]) {
   else {
     InitializeRandomGeneratorWithTime();
   }
-  
+
   //
   // Various aspects of timing are stored here.  However this isn't
   // quite finished.
@@ -979,13 +967,13 @@ int main(int argc, char* argv[]) {
     }
   }
 
-  if (params.regionTableFileNames.size() != 0 and 
+  if (params.regionTableFileNames.size() != 0 and
       params.regionTableFileNames.size() != params.queryFileNames.size()) {
     cout << "Error, there are not the same number of region table files as input files." << endl;
     exit(1);
   }
 
-  // If reading a separate ccs fofn, there is a 1-1 corresponence 
+  // If reading a separate ccs fofn, there is a 1-1 corresponence
   // between ccs fofn and base file.
   if (params.readSeparateCcsFofn) {
     if (FileOfFileNames::IsFOFN(params.ccsFofnFileName)) {
@@ -995,13 +983,11 @@ int main(int argc, char* argv[]) {
       params.ccsFofnFileNames.push_back(params.ccsFofnFileName);
     }
   }
-  if (params.ccsFofnFileNames.size() != 0 and 
+  if (params.ccsFofnFileNames.size() != 0 and
       params.ccsFofnFileNames.size() != params.queryFileNames.size()) {
     cout << "Error, there are not the same number of ccs files as input files." << endl;
     exit(1);
   }
-  
-  parentPID = getpid();
 
   SequenceIndexDatabase<FASTASequence> seqdb;
   SeqBoundaryFtr<FASTASequence> seqBoundary(&seqdb);
@@ -1021,14 +1007,14 @@ int main(int argc, char* argv[]) {
   // Make sure the reads file exists and can be opened before
   // trying to read any of the larger data structures.
   //
-  
+
 
   FASTASequence   fastaGenome;
   T_Sequence      genome;
   FASTAReader     genomeReader;
 
-  // 
-  // The genome is in normal FASTA, or condensed (lossy homopolymer->unipolymer) 
+  //
+  // The genome is in normal FASTA, or condensed (lossy homopolymer->unipolymer)
   // format.  Both may be read in using a FASTA reader.
   //
   if (!genomeReader.Init(params.genomeFileName)) {
@@ -1054,8 +1040,7 @@ int main(int argc, char* argv[]) {
   //
   // The genome may have extra spaces in the fasta name. Get rid of those.
   //
-  VectorIndex t;
-  for (t = 0; t < fastaGenome.titleLength; t++ ){
+  for (int t = 0; t < fastaGenome.titleLength; t++ ){
     if (fastaGenome.title[t] == ' ') {
       fastaGenome.titleLength = t;
       fastaGenome.title[t] = '\0';
@@ -1073,8 +1058,6 @@ int main(int argc, char* argv[]) {
   DNASuffixArray sarray;
   TupleCountTable<T_GenomeSequence, DNATuple> ct;
 
-  int listTupleSize;
-  
   ofstream outFile;
   outFile.exceptions(ostream::failbit);
   ofstream unalignedOutFile;
@@ -1115,7 +1098,7 @@ int main(int argc, char* argv[]) {
         else {
           params.listTupleSize = sarray.lookupPrefixLength;
         }
-        if (params.minMatchLength < sarray.lookupPrefixLength) {
+        if (params.minMatchLength < int(sarray.lookupPrefixLength)) {
           cerr << "WARNING. The value of -minMatch " << params.minMatchLength << " is less than the smallest searched length of " << sarray.lookupPrefixLength << ".  Setting -minMatch to " << sarray.lookupPrefixLength << "." << endl;
           params.minMatchLength = sarray.lookupPrefixLength;
         }
@@ -1128,7 +1111,7 @@ int main(int argc, char* argv[]) {
     }
   }
 
-  if (params.minMatchLength < sarray.lookupPrefixLength) {
+  if (params.minMatchLength < int(sarray.lookupPrefixLength)) {
     cerr << "WARNING. The value of -minMatch " << params.minMatchLength << " is less than the smallest searched length of " << sarray.lookupPrefixLength << ".  Setting -minMatch to " << sarray.lookupPrefixLength << "." << endl;
     params.minMatchLength = sarray.lookupPrefixLength;
   }
@@ -1136,12 +1119,11 @@ int main(int argc, char* argv[]) {
   //
   // It is required to have a tuple count table
   // for estimating the background frequencies
-  // for word matching. 
+  // for word matching.
   // If one is specified on the command line, simply read
-  // it in.  If not, this is operating under the mode 
+  // it in.  If not, this is operating under the mode
   // that everything is computed from scratch.
   //
-  long l;
   TupleMetrics saLookupTupleMetrics;
   if (params.useCountTable) {
     ifstream ctIn;
@@ -1160,8 +1142,8 @@ int main(int argc, char* argv[]) {
     ofstream titleTableOut;
     CrucialOpen(params.titleTableName, titleTableOut);
     //
-    // When using a sequence index database, the title table is simply copied 
-    // from the sequencedb. 
+    // When using a sequence index database, the title table is simply copied
+    // from the sequencedb.
     //
     if (params.useSeqDB) {
       titleTable.Copy(seqdb.names, seqdb.nSeqPos-1);
@@ -1170,7 +1152,7 @@ int main(int argc, char* argv[]) {
     else {
       //
       // No seqdb, so there is just one sequence. Still the user specified a title
-      // table, so just the first sequence in the fasta file should be used. 
+      // table, so just the first sequence in the fasta file should be used.
       //
       titleTable.Copy(&fastaGenome.title, 1);
       titleTable.ResetTableToIntegers(&genome.title, &genome.titleLength, 1);
@@ -1198,7 +1180,7 @@ int main(int argc, char* argv[]) {
   ofstream metricsOut, lcpBoundsOut;
   ofstream anchorFileStrm;
   ofstream clusterOut, *clusterOutPtr;
- 
+
   if (params.anchorFileName != "") {
     CrucialOpen(params.anchorFileName, anchorFileStrm, std::ios::out);
   }
@@ -1217,7 +1199,7 @@ int main(int argc, char* argv[]) {
         CrucialOpen(params.outFileName, outFileStrm, std::ios::out);
         outFilePtr = &outFileStrm;
       } // otherwise, use bamWriter and initialize it later
-  } 
+  }
 
   if (params.printHeader) {
       switch(params.printFormat) {
@@ -1237,7 +1219,7 @@ int main(int argc, char* argv[]) {
     CrucialOpen(params.unalignedFileName, unalignedFile, std::ios::out);
     unalignedFilePtr = &unalignedFile;
   }
-  
+
   if (params.metricsFileName != "") {
     CrucialOpen(params.metricsFileName, metricsOut);
   }
@@ -1246,7 +1228,7 @@ int main(int argc, char* argv[]) {
     CrucialOpen(params.lcpBoundsFileName, lcpBoundsOut);
     //    lcpBoundsOut << "pos depth width lnwidth" << endl;
   }
-  
+
   //
   // Configure the mapping database.
   //
@@ -1271,7 +1253,6 @@ int main(int argc, char* argv[]) {
   //
   // Start the mapping jobs.
   //
-  int readsFileIndex = 0;
   if (params.subsample < 1) {
     InitializeRandomGeneratorWithTime();
     reader = new ReaderAgglomerate(params.subsample);
@@ -1282,7 +1263,7 @@ int main(int argc, char* argv[]) {
   //  In case the input is fasta, make all bases in upper case.
   reader->SetToUpper();
 
-  
+
   regionTableReader = new HDFRegionTableReader;
   RegionTable regionTable;
   //
@@ -1295,14 +1276,14 @@ int main(int argc, char* argv[]) {
 
   string commandLineString; // Restore command.
   clp.CommandLineToString(argc, argv, commandLineString);
-  
+
   if (params.printSAM or params.printBAM) {
       string so = "UNKNOWN"; // sorting order;
       string version = GetVersion(); //blasr version;
-      SAMHeaderPrinter shp(so, seqdb, 
-              params.queryFileNames, params.queryReadType, 
-              params.samQVList, "BLASR", version, 
-              commandLineString); 
+      SAMHeaderPrinter shp(so, seqdb,
+              params.queryFileNames, params.queryReadType,
+              params.samQVList, "BLASR", version,
+              commandLineString);
       string headerString = shp.ToString();// SAM/BAM header
       if (params.printSAM) {
           *outFilePtr << headerString;
@@ -1314,28 +1295,47 @@ int main(int argc, char* argv[]) {
 #else
       REQUIRE_PBBAM_ERROR();
 #endif
-      } 
+      }
   }
 
-  for (readsFileIndex = 0; readsFileIndex < params.queryFileNames.size(); readsFileIndex++ ){ 
+  for (size_t readsFileIndex = 0; readsFileIndex < params.queryFileNames.size(); readsFileIndex++ ){
     params.readsFileIndex = readsFileIndex;
     //
     // Configure the reader to use the correct read and region
     // file names.
-    // 
+    //
     reader->SetReadFileName(params.queryFileNames[params.readsFileIndex]);
 
+    // if PBBAM , need to construct scrap file name and check if exist  
     //
     // Initialize using already set file names.
     //
-    int initReturnValue = reader->Initialize();    
+
+
+
+    // unrolled Need to pass unrolled option
+    // unrolled If not PBDATASET also need to construct scrap file name and
+    // test if it exists in the same directory, if not exit with error message 
+    //
+    int initReturnValue;
+    
+    if ( ( (reader->GetFileType() == FileType::PBDATASET) || (reader->GetFileType() == FileType::PBBAM)) and not params.mapSubreadsSeparately) {
+        
+        if ( reader->GetFileType() == FileType::PBBAM ) {
+            reader->SetScrapsFileName(params.scrapsFileNames[params.readsFileIndex]);
+        }
+        initReturnValue = reader->Initialize(true);
+    }
+    else {
+        initReturnValue = reader->Initialize();
+    }
     if (initReturnValue <= 0) {
         cerr << "WARNING! Could not open file " << params.queryFileNames[params.readsFileIndex] << endl;
         continue;
     }
 
     // Check whether use ccs only.
-    if (reader->GetFileType() == HDFCCSONLY) {
+    if (reader->GetFileType() == FileType::HDFCCSONLY) {
        params.useAllSubreadsInCcs = false;
        params.useCcs = params.useCcsOnly = true;
     }
@@ -1373,7 +1373,7 @@ int main(int argc, char* argv[]) {
     //  Check to see if there is a region table. If there is a separate
     //  region table, use that (over the region table in the bas
     // file).  If there is a region table in the bas file, use that,
-    // without having to specify a region table on the command line. 
+    // without having to specify a region table on the command line.
     //
     if (params.useRegionTable) {
       regionTable.Reset();
@@ -1383,23 +1383,25 @@ int main(int argc, char* argv[]) {
 
     //
     // Check to see if there is a separate ccs fofn. If there is a separate
-    // ccs fofn, use that over the one in the bas file. 
+    // ccs fofn, use that over the one in the bas file.
     //
     //if (params.readSeparateCcsFofn and params.useCcs) {
     //  if (reader->SetCCS(params.ccsFofnFileNames[params.readsFileIndex]) == 0) {
-    //    cout << "ERROR! Could not read the ccs file " 
+    //    cout << "ERROR! Could not read the ccs file "
     //         << params.ccsFofnFileNames[params.readsFileIndex] << endl;
     //    exit(1);
     //  }
     // }
 
-    if (reader->GetFileType() != HDFCCS and 
-        reader->GetFileType() != HDFBase and
-        reader->GetFileType() != HDFPulse and
-        reader->GetFileType() != BAM and
+    if (reader->GetFileType() != FileType::HDFCCS and
+        reader->GetFileType() != FileType::HDFBase and
+        reader->GetFileType() != FileType::HDFPulse and
+        reader->GetFileType() != FileType::PBBAM and
+        reader->GetFileType() != FileType::PBDATASET and
         params.concordant) {
         cerr << "WARNING! Option concordant is only enabled when "
-             << "input reads are in PacBio bax/pls.h5 or bam format." << endl;
+             << "input reads are in PacBio bax/pls.h5, bam or "
+             << "dataset xml format." << endl;
         params.concordant = false;
     }
 
@@ -1415,7 +1417,7 @@ int main(int argc, char* argv[]) {
 
       assert (initReturnValue > 0);
       if (params.nProc == 1) {
-        mapdb[0].Initialize(&sarray, &genome, &seqdb, &ct, &index, params, reader, &regionTable, 
+        mapdb[0].Initialize(&sarray, &genome, &seqdb, &ct, params, reader, &regionTable,
                             outFilePtr, unalignedFilePtr, &anchorFileStrm, clusterOutPtr);
         mapdb[0].bwtPtr = &bwt;
         if (params.fullMetricsFileName != "") {
@@ -1433,12 +1435,12 @@ int main(int argc, char* argv[]) {
       }
       else {
         pthread_t *threads = new pthread_t[params.nProc];
-        for (procIndex = 0; procIndex < params.nProc; procIndex++ ){ 
+        for (procIndex = 0; procIndex < params.nProc; procIndex++ ){
           //
           // Initialize thread-specific parameters.
           //
- 
-          mapdb[procIndex].Initialize(&sarray, &genome, &seqdb, &ct, &index, params, reader, &regionTable, 
+
+          mapdb[procIndex].Initialize(&sarray, &genome, &seqdb, &ct,  params, reader, &regionTable,
                                       outFilePtr, unalignedFilePtr, &anchorFileStrm, clusterOutPtr);
           mapdb[procIndex].bwtPtr      = &bwt;
           if (params.fullMetricsFileName != "") {
@@ -1477,7 +1479,7 @@ int main(int argc, char* argv[]) {
       }
     reader->Close();
   }
-  
+
   if (!reader) {delete reader; reader = NULL;}
 
   fastaGenome.Free();
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..04d8670
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,34 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
diff --git a/README.INSTALL.md b/README.INSTALL.md
index 5d9e996..f8131e1 100644
--- a/README.INSTALL.md
+++ b/README.INSTALL.md
@@ -1,5 +1,9 @@
 ## Installation
 
+### See a step by step Blasr installation example on Blasr wiki page
+
+        https://github.com/PacificBiosciences/blasr/wiki/Step-by-step-blasr-installation-example
+
 ### Download source code
 
 * To pull this project from git hub to your local system:
@@ -8,7 +12,11 @@
 
 * To sync your code with the latest git code base:
 
-        git pull -u origin master && git submodule update --init
+        git pull --rebase origin master && git submodule update --init
+
+* To update the submodule:
+
+        make update-submodule
 
 ### Requirements
 
@@ -37,6 +45,10 @@ To build BLASR, you must have hdf 1.8.12 or above installed and
   
         export HDF5_INC=path_to_your_hdf5_include && export HDF5_LIB=path_to_your_hdf5_lib
 
+* To configure submodule:
+
+    make configure-submodule
+
 ### Build
 
 * To make the 'libcpp' libraries:
@@ -71,6 +83,16 @@ To build BLASR, you must have hdf 1.8.12 or above installed and
         make blasr
         ./blasr
 
+### CXXFLAGS
+
+* For optimized builds:
+
+    ./configure.py CXXFLAGS=-O3 ...
+
+* For debug builds:
+
+    ./configure.py CXXFLAGS=-g ...
+
 ## Other issues
 ### Static binaries
 If you want static binaries, drop `--shared` when you run configure.py. In that case, you
diff --git a/configure.py b/configure.py
index 57997b3..1fa7e9b 100755
--- a/configure.py
+++ b/configure.py
@@ -78,7 +78,7 @@ def update_env_if(envout, envin, keys):
         if key in envin:
             envout[key] = envin[key]
 def compose_defs_env(env):
-    # We disallow env overrides for anything with a default from GNU make.
+    # We disallow env overrides for some things with defaults from GNU make.
     nons = ['CXX', 'CC', 'AR'] # 'SHELL'?
     ovr    = ['%-20s ?= %s' %(k, v) for k,v in env.items() if k not in nons]
     nonovr = ['%-20s := %s' %(k, v) for k,v in env.items() if k in nons]
@@ -93,7 +93,8 @@ def compose_defines_pacbio(envin):
     #setifenvf(env, envin, 'PREBUILT', get_PREBUILT)
     nondefaults = set([
             'CXX',
-            'BLASR_INC',
+            'CXXFLAGS',
+            'NOPBBAM',
             'LIBPBDATA_INC', 'LIBPBDATA_LIB', 'LIBPBDATA_LIBFLAGS',
             'LIBPBIHDF_INC', 'LIBPBIHDF_LIB', 'LIBPBIHDF_LIBFLAGS',
             'LIBBLASR_INC', 'LIBBLASR_LIB', 'LIBBLASR_LIBFLAGS',
@@ -140,7 +141,6 @@ def update_defaults_for_os(env):
 
 def set_defs_defaults(env, nopbbam, with_szlib):
     defaults = {
-        'BLASR_INC': os.path.join(ROOT, 'include'),
         'LIBBLASR_INC':  os.path.join(ROOT, 'libcpp', 'alignment'),
         'LIBPBDATA_INC':  os.path.join(ROOT, 'libcpp', 'pbdata'),
         'LIBPBIHDF_INC':  os.path.join(ROOT, 'libcpp', 'hdf'),
@@ -172,6 +172,8 @@ def set_defs_defaults(env, nopbbam, with_szlib):
     }
     if not nopbbam:
         defaults.update(pbbam_defaults)
+    else:
+        defaults['NOPBBAM'] = 1
     szlib_defaults = {
         'SZLIB_LIBFLAGS': '-lsz',
         #'ZLIB_LIBFLAGS': '-lz', # probably needed, but provided elsewhere
@@ -196,7 +198,7 @@ def parse_args(args):
     parser.add_option('--no-pbbam', action='store_true',
             help='Avoid compiling anything which would need pbbam.')
     parser.add_option('--with-szlib', action='store_true',
-            help='If HDF5 was built with --with-szlib, then -lz is needed for static binaries.')
+            help='If HDF5 was built with --with-szlib, then -lsz is needed for static binaries.')
     parser.add_option('--submodules', action='store_true',
             help='Set variables to use our git-submodules, which must be pulled and built first. (Implies --no-pbbam.)')
     parser.add_option('--shared', action='store_true',
diff --git a/cram.mk b/cram.mk
new file mode 100644
index 0000000..e928689
--- /dev/null
+++ b/cram.mk
@@ -0,0 +1,34 @@
+FAST_CTESTS := \
+ctest/affineAlign.t            ctest/bamOut.t           ctest/ccsH5.t           ctest/filtercriteria.t  ctest/m0-5.t             ctest/samNM.t \
+ctest/aggressiveIntervalCut.t  ctest/fofn.t             ctest/multipart.t \
+ctest/alignScore.t             ctest/bug25741.t         ctest/ecoli.t           ctest/hitpolicy.t       ctest/noSplitSubreads.t \
+ctest/bamIn.t                  ctest/fastMaxInterval.t  ctest/open_fail.t       ctest/verbose.t         ctest/deterministic.t
+
+MILD_CTESTS := \
+	ctest/useccsallBestN1.t ctest/concordant.t ctest/bug25766.t ctest/holeNumbers.t
+
+SLOW_CTESTS := ctest/bug25328.t ctest/useccsallLargeGenome.t
+
+#BLASR_PATH=/mnt/secondary/builds/full/3.0.0/prod/current-build_smrtanalysis/private/otherbins/internalall/bin/
+#export BLASR_PATH
+
+
+cramfast:
+	cram -v --shell=/bin/bash ${FAST_CTESTS}
+
+crammild:
+	cram -v --shell=/bin/bash ${MILD_CTESTS}
+
+cramslow:
+	cram -v --shell=/bin/bash ${SLOW_CTESTS}
+
+cramtests:
+	cram -v --shell=/bin/bash ${FAST_CTESTS} ${MILD_CTESTS} ${SLOW_CTESTS}
+
+cramqu:
+	for test in ${FAST_CTESTS}; do \
+		qsub -pe smp 15 -V -cwd -b y -N cramqu $@cram -v --shell=bin/bash $$test;\
+	done
+
+clean:
+	rm -f cramqu.* ctest/*.err
diff --git a/ctest/.gitignore b/ctest/.gitignore
new file mode 100644
index 0000000..6a3417b
--- /dev/null
+++ b/ctest/.gitignore
@@ -0,0 +1 @@
+/out/
diff --git a/ctest/bamConcordant.t b/ctest/bamConcordant.t
new file mode 100644
index 0000000..93d5691
--- /dev/null
+++ b/ctest/bamConcordant.t
@@ -0,0 +1,33 @@
+Set up
+  $ . $TESTDIR/setup.sh
+
+Test using bam as input, use -concordant
+  $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/bamConcordantRef.fasta -bam -concordant -refineConcordantAlignments -bestn 1 -out $OUTDIR/bamConcordant.bam
+  [INFO]* (glob)
+  [INFO]* (glob)
+
+Check whether sam out and bam out have identical alignments, not checking qvs
+  $ $SAMTOOLS view $OUTDIR/bamConcordant.bam |cut -f 4 
+  1
+  1
+  8??? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+  86?? (glob)
+
+  $ $EXEC /pbi/dept/secondary/siv/testdata/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam $DATDIR/lambda_ref.fasta -m 4 -concordant -bestn 1 -holeNumbers 17417 -out $OUTDIR/tmp.m4 -V 2 > $OUTDIR/bamConcordant.log
+  [INFO]* (glob)
+  [INFO]* (glob)
+
+  $ grep "Concordant template" $OUTDIR/bamConcordant.log
+  Concordant template subread index: 8, 17417/14708_16595
diff --git a/ctest/cigarAdjecentIndels.t b/ctest/cigarAdjecentIndels.t
new file mode 100644
index 0000000..698f3a3
--- /dev/null
+++ b/ctest/cigarAdjecentIndels.t
@@ -0,0 +1,22 @@
+Set up
+  $ . $TESTDIR/setup.sh
+
+Without -allowAdjacentIndels, adjacent indels should not exist in SAM/BAM CIGAR strings
+  $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/noAdjacentIndels.bam -concordant -refineConcordantAlignments -bestn 1 && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+
+  $ $SAMTOOLS view $OUTDIR/noAdjacentIndels.bam |cut -f 6 > $TMP1
+
+  $ grep 'ID' $TMP1 |wc -l
+  0
+
+  $ grep 'DI' $TMP1 |wc -l
+  0
+
+With -allowAdjacentIndels
+  $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/allowAdjacentIndels.bam -concordant -bestn 1 -allowAdjacentIndels && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
diff --git a/ctest/concordant.t b/ctest/concordant.t
index 129c9fa..5b3b6aa 100644
--- a/ctest/concordant.t
+++ b/ctest/concordant.t
@@ -3,7 +3,7 @@ Set up
 
 Test -concordant
   $ rm -rf $OUTDIR/concordant_subset.sam
-  $ $EXEC $DATDIR/ecoli_lp.fofn $DATDIR/ecoli_reference.fasta -concordant -sam -out $OUTDIR/concordant_subset.sam -nproc 12 -holeNumbers 1-10000 -sa $DATDIR/ecoli_reference.sa
+  $ $EXEC $DATDIR/ecoli_lp.fofn $DATDIR/ecoli_reference.fasta -concordant -refineConcordantAlignments -sam -out $OUTDIR/concordant_subset.sam -nproc 12 -holeNumbers 1-10000 -sa $DATDIR/ecoli_reference.sa
   [INFO]* (glob)
   [INFO]* (glob)
   $ sed -n 6,110864p $OUTDIR/concordant_subset.sam > $OUTDIR/tmp1 
@@ -22,11 +22,12 @@ Test -concordant
 #2015_03_28  --> changelist 148101, 148080 updated read group id, 148100 updated TLEN
 #2015_04_09  --> changelist 148796, updated read group id
 #2015_04_25  --> changelist 149721, update CIGAR string, replace M with X=.
+#2015_04_25  --> changelist ?, force refine all concordant alignments
 
 Test -concordant FMR1 case (the 'typical subread' is selected as template for concordant mapping)
   $ FOFN=$DATDIR/FMR1_concordant.fofn
   $ REF=$DATDIR/FMR1_130CGG.fasta
-  $ $EXEC $FOFN $REF -concordant -out $OUTDIR/FMR1_zmw_37927.m4 -m 4 -holeNumbers 37927
+  $ $EXEC $FOFN $REF -concordant -refineConcordantAlignments -out $OUTDIR/FMR1_zmw_37927.m4 -m 4 -holeNumbers 37927
   [INFO]* (glob)
   [INFO]* (glob)
-  $ diff $OUTDIR/FMR1_zmw_37927.m4 $STDDIR/FMR1_zmw_37927.m4
+  $ diff $OUTDIR/FMR1_zmw_37927.m4 $STDDIR/$UPDATEDATE/FMR1_zmw_37927.m4
diff --git a/ctest/dataset.t b/ctest/dataset.t
new file mode 100644
index 0000000..9a14ec8
--- /dev/null
+++ b/ctest/dataset.t
@@ -0,0 +1,64 @@
+Set up
+  $ . $TESTDIR/setup.sh
+
+Test dataset.xml as input
+  $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -m 4 -out $OUTDIR/chunking.m4 -bestn 1 && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+Test filters in dataset.xml is respected.
+  $ cat $OUTDIR/chunking.m4 | wc -l
+  9
+
+Test dataset.xml -bam output
+  $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/chunking.bam  && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+
+Test dataset.xml -concordant
+  $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/chunking.concordant.bam -concordant && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+
+Test dataset with no filters (to make sure that an empty filter does not discard all bam records.)
+  $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/nofilter.bam -concordant -bestn 1 && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+
+  $ $SAMTOOLS view $OUTDIR/nofilter.bam|wc -l
+  131
+
+
+Test dataset with -concordant is on
+  $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/bamConcordantRef.fasta -bam -concordant -refineConcordantAlignments -bestn 1 -out $OUTDIR/datasetConcordant.bam -holeNumbers 1898 && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+
+  $ $SAMTOOLS view $OUTDIR/datasetConcordant.bam | cut -f 4
+  ??? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
+  3?? (glob)
diff --git a/ctest/deterministic.t b/ctest/deterministic.t
new file mode 100644
index 0000000..8a1214c
--- /dev/null
+++ b/ctest/deterministic.t
@@ -0,0 +1,47 @@
+Set up
+  $ . $TESTDIR/setup.sh
+
+Test blasr with input bam which has:
+(1) insertionQV, deletionQV, deletionTag, substitutionQV, substitutionTag
+(2) insertionQV, deletionQV, deletionTag
+(3) no QV
+and then check if output is determined.
+
+(1)
+  $ name=iq-dq-sub
+  $ infile=$DATDIR/test_bam/$name.subreads.bam
+  $ outfile=$OUTDIR/$name.m4
+  $ stdfile=$STDDIR/$name.m4
+  $ rm -f $outfile
+  $ $EXEC $infile  $DATDIR/lambda_ref.fasta -m 4 -out $outfile && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+  $ sort $outfile > $outfile.tmp && mv $outfile.tmp $outfile
+  $ diff $outfile $stdfile
+
+(2)
+  $ name=iq-dq
+  $ infile=$DATDIR/test_bam/$name.subreads.bam
+  $ outfile=$OUTDIR/$name.m4
+  $ stdfile=$STDDIR/$name.m4
+  $ rm -f $outfile
+  $ $EXEC $infile  $DATDIR/lambda_ref.fasta -m 4 -out $outfile && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+  $ sort $outfile > $outfile.tmp && mv $outfile.tmp $outfile
+  $ diff $outfile $stdfile
+
+(3)
+  $ name=no-iq-dq
+  $ infile=$DATDIR/test_bam/$name.subreads.bam
+  $ outfile=$OUTDIR/$name.m4
+  $ stdfile=$STDDIR/$name.m4
+  $ rm -f $outfile
+  $ $EXEC $infile  $DATDIR/lambda_ref.fasta -m 4 -out $outfile && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+  $ sort $outfile > $outfile.tmp && mv $outfile.tmp $outfile
+  $ diff $outfile $stdfile
diff --git a/ctest/filtercriteria.t b/ctest/filtercriteria.t
index 9634e9e..9cec9dc 100644
--- a/ctest/filtercriteria.t
+++ b/ctest/filtercriteria.t
@@ -27,4 +27,4 @@ Test -minPctSimilarity
   $ echo $?
   0
   $ wc -l $O |cut -f 1 -d ' ' 
-  12
+  14
diff --git a/ctest/open_fail.t b/ctest/open_fail.t
index ac7369f..6df1b82 100644
--- a/ctest/open_fail.t
+++ b/ctest/open_fail.t
@@ -4,5 +4,5 @@ Set up
 If fail to open an bax/bas.h5 file because of unable to initialize required dataset, give an warning.
   $ $EXEC $DATDIR/open_fail_no_dyset.fofn $DATDIR/lambda_ref.fasta -m 4
   [INFO]* (glob)
-  Could not open /mnt/secondary-siv/testdata/BlasrTestData/ctest/data/open_fail_no_dyset.fofn
+  Could not open /pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/open_fail_no_dyset.fofn
   [1]
diff --git a/ctest/setup.sh b/ctest/setup.sh
index 8d44dc2..f75e4fb 100755
--- a/ctest/setup.sh
+++ b/ctest/setup.sh
@@ -1,12 +1,12 @@
 # Set up directories
 CURDIR=$TESTDIR
-REMOTEDIR=/mnt/secondary-siv/testdata/BlasrTestData/ctest
+REMOTEDIR=/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest
 DATDIR=$REMOTEDIR/data
 OUTDIR=$CURDIR/out
 STDDIR=$REMOTEDIR/stdout
 
 # Set up the executable: blasr.
-EXEC=$TESTDIR/../blasr
+EXEC=${BLASR_PATH}/blasr
 
 # Define tmporary files
 TMP1=$OUTDIR/$$.tmp.out
@@ -19,7 +19,7 @@ mkdir -p $OUTDIR
 SAMTOOLS=/mnt/secondary/Smrtpipe/builds/Internal_Mainline_Nightly_LastSuccessfulBuild/analysis/bin/samtools
 
 #Update date
-UPDATEDATE=2015_04_27
+UPDATEDATE=2015_11_09
 
 # 2014_08_21 --> change 138516: added YS, YE, ZM tags
 # 2014_08_28 --> change 139176: Update SAM MD5 
@@ -27,3 +27,4 @@ UPDATEDATE=2015_04_27
 # 2015_04_09 --> change 148796: update read group id
 # 2015_04_25 --> change 149721, update CIGAR string, replace M with X=
 # 2015_04_26 --> change 149749, add opiton -cigarUseSeqMatch (default: false). If -cigarUseSeqMatch is turned on, CIGAR strings use '=' and 'X' to represent sequence match and mismatch instead of 'M'.
+# 2015_11_05 --> change 166177, update CIGAR string, DO NOT allow adjacent indels unless -allowAdjacentIndels is ON.
diff --git a/ctest/unaligned.t b/ctest/unaligned.t
new file mode 100644
index 0000000..3340b0c
--- /dev/null
+++ b/ctest/unaligned.t
@@ -0,0 +1,25 @@
+Set up
+  $ . $TESTDIR/setup.sh
+
+Test dataset.xml as input
+  $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -unaligned $OUTDIR/unaligned.txt -noPrintUnalignedSeqs -concordant 1>/dev/null && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+  $ head -5 $OUTDIR/unaligned.txt
+  m150404_101626_42267_c100807920800000001823174110291514_s1_p0/55/0_1380
+  m150404_101626_42267_c100807920800000001823174110291514_s1_p0/55/1432_3136
+  m150404_101626_42267_c100807920800000001823174110291514_s1_p0/480/11699_11988
+  m150404_101626_42267_c100807920800000001823174110291514_s1_p0/480/12033_13456
+  m150404_101626_42267_c100807920800000001823174110291514_s1_p0/480/13519_14067
+
+  $ $EXEC $DATDIR/ecoli_subset.fasta $DATDIR/ecoli_reference.fasta -unaligned $OUTDIR/unaligned.txt -noPrintUnalignedSeqs 1>/dev/null && echo $?
+  [INFO]* (glob)
+  [INFO]* (glob)
+  0
+  $ head -5 $OUTDIR/unaligned.txt
+  m121004_000921_42130_c100440700060000001523060402151341_s1_p0/8/2724_3021
+  m121004_000921_42130_c100440700060000001523060402151341_s1_p0/13/0_278
+  m121004_000921_42130_c100440700060000001523060402151341_s1_p0/13/327_954
+  m121004_000921_42130_c100440700060000001523060402151341_s1_p0/13/1004_1580
+  m121004_000921_42130_c100440700060000001523060402151341_s1_p0/13/1625_2202
diff --git a/ctest/useccsallBestN1.t b/ctest/useccsallBestN1.t
index 37b1305..32fafc7 100644
--- a/ctest/useccsallBestN1.t
+++ b/ctest/useccsallBestN1.t
@@ -5,7 +5,7 @@ Test -useccsall with bestn = 1
   $ $EXEC $DATDIR/ccstest.fofn $DATDIR/ccstest_ref.fasta -bestn 1 -useccsall -sam -out $OUTDIR/useccsall.sam -holeNumbers 76772
   [INFO]* (glob)
   [INFO]* (glob)
-  $ sed -n '9,$ p' $OUTDIR/useccsall.sam > $TMP1
-  $ sed -n '9,$ p' $STDDIR/$UPDATEDATE/useccsall.sam > $TMP2
+  $ sed -n '9,$ p' $OUTDIR/useccsall.sam |cut -f 1-4 > $TMP1
+  $ sed -n '9,$ p' $STDDIR/$UPDATEDATE/useccsall.sam | cut -f 1-4 > $TMP2
   $ diff $TMP1 $TMP2
   $ rm $TMP1 $TMP2
diff --git a/extrautils/BasH5Simulator.cpp b/extrautils/BasH5Simulator.cpp
deleted file mode 100644
index 969f933..0000000
--- a/extrautils/BasH5Simulator.cpp
+++ /dev/null
@@ -1,524 +0,0 @@
-#include <string>
-#include <sstream>
-#include <iostream>
-#include "utils.hpp"
-#include "Enumerations.h"
-#include "DNASequence.hpp"
-#include "FASTAReader.hpp"
-#include "CommandLineParser.hpp"
-#include "metagenome/TitleTable.hpp"
-#include "metagenome/FindRandomSequence.hpp"
-#include "statistics/StatUtils.hpp"
-#include "HDFBasWriter.hpp"
-#include "HDFRegionTableWriter.hpp"
-#include "simulator/LengthHistogram.hpp"
-#include "simulator/OutputSampleListSet.hpp"
-#include "simulator/ContextOutputList.hpp"
-
-using namespace std;
-
-void SetHelp(string & str) {
-    stringstream helpStream;
-    helpStream 
-        << "usage: alchemy outputModel [ options ]" << endl
-        << " options: " << endl
-        << "  -genome genome.fasta" << endl
-        << "            Simulate reads from the reference genome 'genome.fasta'." << endl << endl
-        << "  -numBasesPerFile numBasesPerFile" << endl
-        << "            Limit the number of bases per output file to this." << endl << endl
-        << "  -sourceReads filename " << endl
-        << "            When set, simulate reads by reading from 'filename', " << endl
-        << "            rather than simulating from a genome." << endl 
-        << "            The format of the fasta titles should be >read_index|chr|start_pos|end_pos" << endl << endl
-        << "  -lengthModel" << endl
-        << "            Use lengths from the alchemy model, rather than the read length.  This " << endl
-        << "            is used in conjunction with the sourceReadsFile, to modulate the lenghts" << endl
-        << "            of the reads." << endl << endl
-        << "  -fixedLength length " << endl
-        << "            Set simulated read length to a fixed value of 'length', rather than " << endl
-        << "            sampling from a length mode." << endl 
-        << "  -movieName name (\"simulated_movie\")" << endl 
-        << "            Use 'name' for movies rather than m000_000..." << endl << endl
-        << "  -titleTable name" <<endl 
-        << "            Read in the titleTable to assign chromosome indices from " << endl
-        << "            simulated reads." << endl << endl
-        << "  -baseFileName name (\"simulated\")" << endl 
-        << "            Use an alternative name for the output file, rather than 'simulated'" << endl << endl 
-        << "  -nFiles N (1)" << endl
-        << "            The number of files to simulate. " << endl << endl 
-        << "  -meanLength L(0)" << endl
-        << "            When set, scales the length of the average read to L." << endl  << endl 
-        << "  -posMap   filename  " << endl
-        << "            Use this when running alignment through compareSequences.py " << endl
-        << "            and writing to cmp.h5. Specify a map between movie names and " << endl
-        << "            chromosome/positions. " << endl << endl 
-        << "            When set, the simulated positions are not sored in " << endl
-        << "            the bas.h5 files and instead printed to 'filename'" << endl << endl 
-        << "  -printPercentRepeat" << endl
-        << "            Add to the title table a field that has the percent " << endl
-        << "            repeat content of the read shown by lower case in " << endl
-        << "            the reference." << endl << endl;
-    str = helpStream.str();
-}
-
-int main(int argc, char* argv[]) {
-    string refGenomeFileName = "";
-    string lengthModelFileName = "";
-    string outputModelFileName = "";
-    DNALength numBasesPerFile = 0;
-    string sourceReadsFileName = "";
-    string titleTableFileName = "";
-    int numBasH5Files = 1;
-    string basH5BaseFileName = "simulated";
-    string movieName = "m101211_092754_00114_cSIM_s1_p0";
-    bool   doRandGenInit = true;
-    bool   usePosMap     = false;
-    bool   printPercentRepeat = false;
-    string posMapFileName = "";
-    vector<string> movieNames;
-    bool useLengthModel = false;
-    bool useFixedLength = false;
-    ofstream posMapFile;
-    int scaledLength = 0;
-    int fixedLength = 0;
-    int nBasFiles = 1;
-    bool useLengthsModel = true;
-    bool printHelp = false;
-
-    
-  //  Look to see if the refAsReads flag is specified anywhere before
-  //  parsing the command line.
-
-    CommandLineParser clp;
-    string commandLine;
-    string helpString;
-    SetHelp(helpString);
-    vector<string> fns;
-
-    clp.RegisterStringOption("genome", &refGenomeFileName, "");
-    clp.RegisterIntOption("numBasesPerFile", (int*)&numBasesPerFile, "",
-            CommandLineParser::PositiveInteger);
-    clp.RegisterStringOption("sourceReads", &sourceReadsFileName, "");
-    clp.RegisterStringOption("lengthModel", &lengthModelFileName, "");
-    clp.RegisterIntOption("fixedLength", &fixedLength, "",
-            CommandLineParser::PositiveInteger);
-    clp.RegisterFlagOption("lengthModel", &useLengthModel, "");
-    clp.RegisterStringOption("movieName", &movieName, "");
-    clp.RegisterStringOption("titleTable", &titleTableFileName, "");
-    clp.RegisterStringOption("baseFileName", &basH5BaseFileName, "");
-    clp.RegisterIntOption("nFiles", &nBasFiles, "",
-            CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("meanLength", &scaledLength, "",
-            CommandLineParser::PositiveInteger);
-    clp.RegisterStringOption("posMap", &posMapFileName, "");
-    clp.RegisterFlagOption("printPercentRepeat", &printPercentRepeat, "");
-    clp.RegisterFlagOption("h", &printHelp, "");
-
-    clp.SetHelp(helpString);
-    clp.ParseCommandLine(argc, argv, fns);
-    clp.CommandLineToString(argc, argv, commandLine);
-
-    clp.SetProgramName("alchemy");
-
-    outputModelFileName = fns[0];
-    if (argc <= 1 or printHelp or outputModelFileName == "") {
-        cout << helpString << endl;
-        exit(0);
-    }
-
-    if (usePosMap) {
-        CrucialOpen(posMapFileName, posMapFile, std::ios::out);
-    }
-
-    if (sourceReadsFileName == "" and fixedLength == 0) {
-        useLengthModel = true;
-    }
-
-    if (useLengthModel and fixedLength != 0) {
-        cout << "ERROR! You must either use a length model or a fixed length." << endl;
-        exit(1);
-    }
-
-    if (sourceReadsFileName == "" and numBasesPerFile == 0) {
-        cout << "ERROR! You must specify either a set of read to use as " << endl
-             << "original reads for simulation or the total number of bases " << endl
-             << "to simulate in each bas.h5 file." << endl;
-        exit(1);
-    }
- 
-    if (sourceReadsFileName == "" and refGenomeFileName == "") {
-        cout << "ERROR! You must specify a genome to sample reads from or a set of read "<<endl
-            << "to use as original reads for simulation." << endl;
-        exit(1);
-    }
-
-    if (fixedLength != 0 and refGenomeFileName == "") {
-        cout << "ERROR! You must specify a genome file if using a fixed length." << endl;
-        exit(1);
-    }
-
-    if ((fixedLength != 0 or scaledLength != 0) and sourceReadsFileName != "") {
-        cout << "ERROR! You cannot specify a fixed length nor mean length with a source " << endl
-            << "reads file.  The read lengths are taken from the source reads or the length model." << endl;
-        exit(1);
-    }
-
-    LengthHistogram   lengthHistogram;
-    OutputSampleListSet   outputModel(0);
-    TitleTable titleTable;
-
-    if (doRandGenInit) {
-        InitializeRandomGeneratorWithTime();
-    }
-
-    //
-    // Read models.
-    //
-    if (titleTableFileName != "") {
-        titleTable.Read(titleTableFileName);
-    }
-
-
-    outputModel.Read(outputModelFileName);
-
-    if (useLengthModel) {
-        lengthHistogram.BuildFromAlignmentLengths(outputModel.lengths);
-    }
-
-
-    vector<int> alignmentLengths;
-    int meanAlignmentLength;
-
-
-    if (scaledLength != 0 and useLengthModel) {
-        //
-        // Scale the histogram so that the average length is 'scaledLength'.
-        //
-
-        // 1. Integrate histogram
-        long totalLength = 0;
-        long totalSamples = 0;
-        int hi;
-        for (hi = 0; hi < lengthHistogram.lengthHistogram.cdf.size()-1; hi++) {
-            int ni;
-            ni = lengthHistogram.lengthHistogram.cdf[hi+1] - lengthHistogram.lengthHistogram.cdf[hi];
-            totalLength += ni * lengthHistogram.lengthHistogram.data[hi];
-        }
-        totalSamples = lengthHistogram.lengthHistogram.cdf[lengthHistogram.lengthHistogram.cdf.size()-1];
-
-        float meanSampleLength = totalLength / (1.0*totalSamples);
-        float fractionIncrease = scaledLength / meanSampleLength;
-
-        for (hi = 0; hi < lengthHistogram.lengthHistogram.cdf.size(); hi++) {
-            lengthHistogram.lengthHistogram.data[hi] *= fractionIncrease;
-        }
-    }
-
-    FASTAReader inReader, seqReader;
-    vector<FASTASequence> reference;
-    DNALength refLength = 0;
-    int i;
-    if (refGenomeFileName != "") {
-        inReader.Init(refGenomeFileName);
-        inReader.ReadAllSequences(reference);
-
-        for (i = 0; i < reference.size(); i++) {
-            refLength += reference[i].length;
-        }
-    }
-
-    if (sourceReadsFileName !=  "") {
-        seqReader.Init(sourceReadsFileName);
-    }
-
-    ofstream readsFile;
-
-    //
-    // Create and simulate bas.h5 files.
-    //
-    int baseFileIndex;
-    bool readsRemain = true;
-    for (baseFileIndex = 0; ((sourceReadsFileName == "" and baseFileIndex < nBasFiles)  // case 1 is reads are generated by file
-                or (sourceReadsFileName != "" and readsRemain)); // case 2 is reads are generated by an input file.
-            baseFileIndex++) {
-        //
-        // Prep the base file for writing.
-        //
-        stringstream fileNameStrm, movieNameStrm;
-        //string movieName = "m000000_000000_00000_cSIMULATED_s";
-        movieNameStrm << movieName << baseFileIndex << "_p0";
-        string fullMovieName = movieNameStrm.str();		
-        fileNameStrm  << fullMovieName <<  ".bas.h5";
-
-
-        HDFBasWriter basWriter;
-        HDFRegionTableWriter regionWriter;
-        //
-        // This is mainly used to create the atributes.
-        //
-        RegionTable regionTable;
-        regionTable.CreateDefaultAttributes();
-
-        basWriter.SetPlatform(Springfield);
-        //
-        // Use a fixed set of fields for now.
-        //
-
-        // These are all pulled from the outputModel.
-        basWriter.IncludeField("Basecall");
-        basWriter.IncludeField("QualityValue");
-        basWriter.IncludeField("SubstitutionQV");
-        basWriter.IncludeField("SubstitutionTag");
-        basWriter.IncludeField("InsertionQV");
-        basWriter.IncludeField("DeletionQV");
-        basWriter.IncludeField("DeletionTag");
-        basWriter.IncludeField("WidthInFrames");
-        basWriter.IncludeField("PreBaseFrames");
-        basWriter.IncludeField("PulseIndex");
-
-        vector<unsigned char> qualityValue, substitutionQV, substitutionTag, insertionQV, deletionQV, deletionTag;
-        vector<HalfWord> widthInFrames, preBaseFrames, pulseIndex;
-
-        // Just go from 0 .. hole Number
-        basWriter.IncludeField("HoleNumber");
-        // Fixed to 0.
-        basWriter.IncludeField("HoleXY");
-        if (usePosMap == false) {
-            basWriter.IncludeField("SimulatedSequenceIndex");
-            basWriter.IncludeField("SimulatedCoordinate");
-        }
-        basWriter.SetChangeListID("1.3.0.50.104380");
-
-
-        DNALength numSimulatedBases  = 0;
-        FASTASequence sampleSeq;
-        //sampleSeq.length = readLength;
-        int maxRetry = 10000000;
-        int retryNumber = 0;
-        int numReads = 0;
-        int readLength = 0;
-
-        while (numBasesPerFile == 0 or numSimulatedBases < numBasesPerFile) {
-            DNALength seqIndex, seqPos;
-            if (useLengthModel or fixedLength) {
-                if (useLengthModel) {
-                    lengthHistogram.GetRandomLength(readLength);
-                }
-                else {
-                    readLength = fixedLength;
-                }
-            }
-            if (refGenomeFileName != "") {
-                FindRandomPos(reference, seqIndex, seqPos, readLength + (outputModel.keyLength - 1));
-                sampleSeq.seq    = &reference[seqIndex].seq[seqPos];
-                sampleSeq.length = readLength + (outputModel.keyLength - 1);
-                assert(reference[seqIndex].length >= sampleSeq.length);
-            }
-            else if (sourceReadsFileName != "") {
-                if (seqReader.GetNext(sampleSeq) == false) {
-                    readsRemain = false;
-                    break;
-                }
-                if (sampleSeq.length < outputModel.keyLength) {
-                    continue;
-                }
-                //
-                // Now attempt to parse the position from the fasta title.
-                //
-
-                if (useLengthModel) {
-                    int tryNumber = 0;
-                    readLength = 0;
-                    int maxNTries = 1000;
-                    int tryBuffer[5] = {-1,-1,-1,-1,-1};
-                    while (tryNumber < maxNTries and readLength < outputModel.keyLength) {
-                        lengthHistogram.GetRandomLength(readLength);
-                        readLength = sampleSeq.length = min(sampleSeq.length, (unsigned int) readLength);
-                        tryBuffer[tryNumber%5] = readLength;
-                        tryNumber++;
-                    }
-                    if (tryNumber >= maxNTries) {
-                        cout << "ERROR. Could not generate a read length greater than the " << outputModel.keyLength << " requried " <<endl
-                            << "minimum number of bases using the length model specified in the alchemy." <<endl
-                            << "model.  Something is either wrong with the model or the context length is too large." <<endl;
-                        cout << "The last few tries were: " << tryBuffer[0] << " " << tryBuffer[1] << " " << tryBuffer[2] << " " << tryBuffer[3] << " " << tryBuffer[4] << endl;
-                        exit(1);
-                    }
-                }
-
-                readLength = sampleSeq.length;
-                vector<string> tokens;
-                Splice(sampleSeq.title, "|", tokens);
-                if (tokens.size() == 4) {
-                    seqPos = atoi(tokens[2].c_str());
-                    if (titleTableFileName == "") {
-                        seqIndex = 0;
-                    }
-                    else {
-                        int index;
-                        titleTable.Lookup(tokens[1], index);
-                        seqIndex = index;
-                    }
-                }
-                else {
-                    seqPos   = 0;
-                }
-            }
-
-            //
-            // If this is the first read printed to the base file, initialize it.
-            //
-            if (numSimulatedBases == 0) {
-                basWriter.Initialize(fileNameStrm.str(), movieNameStrm.str(), Springfield);
-                regionWriter.Initialize(basWriter.pulseDataGroup);
-            }
-
-            numSimulatedBases += readLength;
-
-            int p;
-            // create the sample sequence
-            int contextLength = outputModel.keyLength;
-            int contextMiddle = contextLength / 2;
-            string outputString;
-
-            int nDel = 0;
-            int nIns = 0;
-
-            //
-            // Simulate to beyond the sample length.
-            //
-            qualityValue.clear(); 
-            substitutionQV.clear(); 
-            substitutionTag.clear(); 
-            insertionQV.clear(); 
-            deletionQV.clear(); 
-            deletionTag.clear();
-            pulseIndex.clear();
-            widthInFrames.clear();
-            preBaseFrames.clear();
-            assert(sampleSeq.length > contextMiddle + 1);
-            for (p = contextMiddle;
-                    p < sampleSeq.length - contextMiddle - 1; p++) {
-                string refContext;
-                refContext.assign((const char*) &sampleSeq.seq[p-contextMiddle], contextLength);
-
-                string outputContext;
-                int    contextWasFound;
-                OutputSample sample;
-                int i;
-                for (i = 0; i < refContext.size(); i++) { refContext[i] = toupper(refContext[i]);}
-                outputModel.SampleRandomSample(refContext, sample);
-
-                if (sample.type == OutputSample::Deletion ) {
-                    //
-                    // There was a deletion.  Advance in reference, then output
-                    // the base after the deletion.
-                    //
-                    p++;
-                    ++nDel;
-                }
-
-                int cp;
-                //
-                // Add the sampled context, possibly multiple characters because of an insertion.
-                //
-                for (i = 0; i < sample.nucleotides.size(); i++) {
-                    outputString.push_back(sample.nucleotides[i]);
-                    qualityValue.push_back(sample.qualities[i].qv[0]);
-                    deletionQV.push_back(sample.qualities[i].qv[1]);
-                    insertionQV.push_back(sample.qualities[i].qv[2]);
-                    substitutionQV.push_back(sample.qualities[i].qv[3]);
-                    deletionTag.push_back(sample.qualities[i].tags[0]);
-                    substitutionTag.push_back(sample.qualities[i].tags[1]);
-                    pulseIndex.push_back(sample.qualities[i].frameValues[0]);
-                    preBaseFrames.push_back(sample.qualities[i].frameValues[1]);
-                    widthInFrames.push_back(sample.qualities[i].frameValues[2]);
-                }
-                nIns += sample.qualities.size() - 1;
-            }
-            if (outputString.find('N') != outputString.npos or
-                    outputString.find('n') != outputString.npos) {
-                cout << "WARNING!  The sampled string " << endl << outputString << endl
-                    << "should not contain N's, but it seems to.  This is being ignored "<<endl
-                    << "for now so that simulation may continue, but this shouldn't happen"<<endl
-                    << "and is really a bug." << endl;
-                numSimulatedBases -= readLength;
-                continue;
-            }
-            //
-            // Ok, done creating the read, now time to create some quality values!!!!!
-            //
-            SMRTSequence read;
-            read.length = outputString.size();
-            read.Allocate(read.length);
-            memcpy(read.seq, outputString.c_str(), read.length * sizeof(unsigned char));
-            assert(qualityValue.size() == read.length * sizeof(unsigned char));
-            memcpy(read.qual.data, &qualityValue[0], read.length * sizeof(unsigned char));
-            memcpy(read.deletionQV.data, &deletionQV[0], read.length * sizeof(unsigned char));
-            memcpy(read.insertionQV.data, &insertionQV[0], read.length * sizeof(unsigned char));
-            memcpy(read.substitutionQV.data, &substitutionQV[0], read.length * sizeof(unsigned char));
-            memcpy(read.deletionTag, &deletionTag[0], read.length * sizeof(unsigned char));
-            memcpy(read.substitutionTag, &substitutionTag[0], read.length * sizeof(unsigned char));
-            memcpy(read.pulseIndex, &pulseIndex[0], read.length * sizeof(int));
-            memcpy(read.preBaseFrames, &preBaseFrames[0], read.length * sizeof(HalfWord));
-            memcpy(read.widthInFrames, &widthInFrames[0], read.length * sizeof(HalfWord));
-
-            //
-            // The pulse index for now is just fake data.
-            //
-            int i;
-            for (i = 0; i < read.length; i++) {
-                read.pulseIndex[i] = 1;
-            }
-            read.xy[0] = seqIndex;
-            read.xy[1] = seqPos;
-            read.zmwData.holeNumber = numReads;
-
-            basWriter.Write(read);
-            // Record where this was simulated from.
-            if (usePosMap == false) {
-                basWriter.WriteSimulatedCoordinate(seqPos);
-                basWriter.WriteSimulatedSequenceIndex(seqIndex);
-            }
-            else {
-                posMapFile << fullMovieName << "/" << numReads << "/0_" << read.length << " " << seqIndex << " "<< seqPos;
-                if (printPercentRepeat) {
-                    DNALength nRepeat = sampleSeq.GetRepeatContent();
-                    posMapFile << " " << nRepeat*1.0/sampleSeq.length;
-                }
-                posMapFile << endl;
-            }
-            RegionAnnotation region;
-            region.row[0] = read.zmwData.holeNumber;
-            region.row[1] = 1;
-            region.row[2] = 0;
-            region.row[3] = read.length;
-            region.row[4] = 1000; // Should be enough.
-            regionWriter.Write(region);
-            region.row[1] = 2; // Rewrite for hq region encompassing everything.
-            regionWriter.Write(region);      
-            if (sourceReadsFileName != "") {
-                sampleSeq.Free();
-            }
-            read.Free();
-            ++numReads;
-        }
-        regionWriter.Finalize(regionTable.columnNames, 
-                regionTable.regionTypes,
-                regionTable.regionDescriptions,
-                regionTable.regionSources);
-        basWriter.Close();
-        numReads = 0;
-        //
-        // The bas writer should automatically flush on closing.
-        //
-    }
-    if (usePosMap) {
-        posMapFile.close();
-    }
-
-    for (i = 0; i < reference.size(); i++) {
-        reference[i].Free();
-    }
-}
-
diff --git a/extrautils/CCSH5ToBam.cpp b/extrautils/CCSH5ToBam.cpp
deleted file mode 100644
index 7593cc9..0000000
--- a/extrautils/CCSH5ToBam.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-#include "utils/FileOfFileNames.hpp"
-#include "datastructures/alignmentset/SAMSupplementalQVList.hpp"
-#include "format/SAMHeaderPrinter.hpp"
-#include "format/BAMPrinter.hpp"
-#include "pbbam/BamWriter.h"
-#include "CommandLineParser.hpp"
-
-using namespace PacBio::BAM;
-using namespace std;
-
-string DISCLAIM = 
-"THIS TOOL IS CREATED FOR DEVELOPERS USE ONLY AND IT MAY OR MAY NOT "
-"BREAK AT ANY TIME. USE AT YOUR OWN RISK.";
-
-string GetVersion(void) {
-    return "1.0";
-}
-
-void CCSReadToBamRecord(CCSSequence & ccsRead, BamRecord & bamRecord, SupplementalQVList & samQVList) {
-//m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/1650/1920_2155   4   *   0   255 *   *   0   0
-    bamRecord.Impl().Name(ccsRead.GetTitle());
-    bamRecord.Impl().Flag(static_cast<uint32_t>(4));
-    string seqString;
-    seqString.assign((char*)ccsRead.seq, ccsRead.length);
-
-    bamRecord.Impl().SetSequenceAndQualities(seqString, ccsRead.qual.ToString());
-    // bamRecord.Impl().CigarData(Cigar::FromStdString("*"));
-    bamRecord.Impl().Bin(0);
-    bamRecord.Impl().InsertSize(0);
-    bamRecord.Impl().MatePosition(static_cast<PacBio::BAM::Position>(-1));
-    bamRecord.Impl().MateReferenceId(static_cast<int32_t>(-1));
-    bamRecord.Impl().Position(static_cast<PacBio::BAM::Position>(-1));
-    bamRecord.Impl().ReferenceId(static_cast<int32_t>(-1));
-    TagCollection tags;
-    tags["RG"] = ccsRead.GetReadGroupId();
-    tags["np"] = ccsRead.numPasses;
-    tags["zm"] = ccsRead.zmwData.holeNumber;
-    tags["qs"] = 0;
-    tags["qe"] = ccsRead.length;
-
-    samQVList.FormatQVOptionalFields(ccsRead);
-    // Add QVs to BamRecordImpl.
-    string insertionQVs, deletionQVs, substitutionQVs, mergeQVs, substitutionTags, deletionTags;
-    if (ccsRead.GetQVs("InsertionQV", insertionQVs)) {
-        tags["iq"] = insertionQVs;
-    }
-    if (ccsRead.GetQVs("DeletionQV", deletionQVs)) {
-        tags["dq"] = deletionQVs;
-    }
-    if (ccsRead.GetQVs("SubstitutionQV", substitutionQVs)) {
-        tags["sq"] = substitutionQVs;
-    }
-    if (ccsRead.GetQVs("MergeQV", mergeQVs)) {
-        tags["mq"] = mergeQVs;
-    }
-    // substitutionTag is not included by default
-    if (ccsRead.GetQVs("DeletionTag", deletionTags)) {
-        tags["dt"] = deletionTags;
-    }
-    bamRecord.Impl().Tags(tags);
-}
-
-int main(int argc, char* argv[]) {
-    string progName = "ccsh5tobam";
-    CommandLineParser clp;
-    clp.SetHelp("Convert ccs.h5 to bam.\n" + DISCLAIM);
-    clp.SetConciseHelp("ccsh5tobam ccs.h5|fofn out.bam\n" + DISCLAIM);
-    clp.SetProgramName(progName);
-    clp.SetVersion(GetVersion());
-    string fofn, bamOutName;
-    clp.RegisterStringOption("in.ccs.h5", &fofn, "Input ccs.h5|fofn file.", true);
-    clp.RegisterStringOption("out.bam", &bamOutName, "Output bam file.", true);
-    clp.RegisterPreviousFlagsAsHidden();
-    clp.ParseCommandLine(argc, argv);
-
-    //cerr << "[INFO] " << GetTimestamp() << " [" << progName << "] started."  << endl;
-
-    vector<string> ccsFileNames;
-    FileOfFileNames::StoreFileOrFileList(fofn, ccsFileNames);
-
-    string so = "UNKNOWN"; // sorting order;
-    string version = GetVersion(); 
-    string commandLineString;
-    clp.CommandLineToString(argc, argv, commandLineString);
-
-    SupplementalQVList samQVList;
-    samQVList.SetDefaultQV();
-    SequenceIndexDatabase<FASTASequence> seqdb;
-    SAMHeaderPrinter shp(so, seqdb,
-            ccsFileNames, ReadType::ReadTypeEnum::CCS,
-            samQVList, "ccsh52bam", version,
-            commandLineString);
-    string headerString = shp.ToString();// SAM/BAM header
-
-    BamHeader header = BamHeader(headerString);
-    // Both file name and SAMHeader are required in order to create a BamWriter.
-    BamWriter * bamWriterPtr = new BamWriter(bamOutName, header);
-
-    for (string ccsFileName: ccsFileNames) {
-        ReaderAgglomerate reader;
-        reader.SetReadFileName(ccsFileName);
-        reader.SetReadType(ReadType::ReadTypeEnum::CCS);
-
-        // Initialize using already set file names.
-        int initReturnValue = reader.Initialize();
-        if (initReturnValue <= 0) {
-            cerr << "WARNING! Could not open file " << ccsFileName << endl;
-            continue;
-        }
-
-        // Check whether use ccs only.
-        assert (reader.GetFileType() == HDFCCSONLY);
-        int randint = 0;
-        CCSSequence ccsRead;
-        while(reader.GetNext(ccsRead, randint) != 0) {
-            if (ccsRead.length > 0) {
-                BamRecord bamRecord;
-                CCSReadToBamRecord(ccsRead, bamRecord, samQVList);
-                bamWriterPtr->Write(bamRecord);
-            }
-        }
-    }
-
-    try {
-        bamWriterPtr->TryFlush();
-        delete bamWriterPtr;
-        bamWriterPtr = NULL;
-    } catch (std::exception e) {
-        cout << "Error, could not flush bam records to bam file." << endl;
-        exit(1);
-    }
-
-    //cerr << "[INFO] " << GetTimestamp() << " [" << progName << "] ended."  << endl;
-    return 0;
-}
diff --git a/extrautils/SimpleShredder.cpp b/extrautils/SimpleShredder.cpp
index 321e053..8ccfd77 100644
--- a/extrautils/SimpleShredder.cpp
+++ b/extrautils/SimpleShredder.cpp
@@ -25,7 +25,7 @@ int main(int argc, char* argv[]) {
     clp.RegisterStringOption("inFile", &inFileName, "Reference sequence", 0);
     clp.RegisterPreviousFlagsAsHidden();
     clp.RegisterIntOption("readLength", (int*) &readLength, "The length of reads to simulate.  The length is fixed.",
-            CommandLineParser::PositiveInteger, "Length of every read.", 0);
+            CommandLineParser::PositiveInteger, 0);
     clp.RegisterFloatOption("coverage", &coverage, "Total coverage (from which the number of reads is calculated",
             CommandLineParser::PositiveFloat, 0);
     clp.RegisterFlagOption("nonRandInit", &noRandInit, "Skip initializing the random number generator with time.");
diff --git a/extrautils/ctest/alchemy.t b/extrautils/ctest/alchemy.t
deleted file mode 100644
index d792e7e..0000000
--- a/extrautils/ctest/alchemy.t
+++ /dev/null
@@ -1,20 +0,0 @@
-Set up 
-  $ . $TESTDIR/setup.sh
-
-Set up the executable: alchemy.
-  $ EXEC=$TESTDIR/../alchemy
-
-test_alchemy.cmp.h5 was generated by 
-pbalign.py $DATDIR/test_alchemy_read.fa $DATDIR/test_alchemy_ref.fa test_alchemy.cmp.h5
-
-$ ./cmpH5StoreQualityByContext $DATDIR/test_alchemy.cmp.h5 $OUTDIR/test_alchemy.qbc -contextLength 3
-
-  $ $EXEC $DATDIR/ecoli_out.qbc -genome $DATDIR/ecoli_reference.fasta  -numBasesPerFile 100000 -baseFileName 'this_bas_file' -movieName $OUTDIR/alchemy_
-  $ echo $?
-  0
-
-pls2fasta can be successfully applied to the simulated bas.h5 file.
-$ pls2fasta *.bas.h5 $OUTDIR/test_alchemy_pls2fasta.fa
-[INFO] * [pls2fasta] started. (glob)
-[INFO] * [pls2fasta] ended. (glob)
-
diff --git a/extrautils/ctest/ccsh5tobam.t b/extrautils/ctest/ccsh5tobam.t
deleted file mode 100644
index 5770cca..0000000
--- a/extrautils/ctest/ccsh5tobam.t
+++ /dev/null
@@ -1,10 +0,0 @@
-Set up 
-  $ . $TESTDIR/setup.sh
-
-Set up the executable: ccsh5tobam
-  $ EXEC=$TESTDIR/../ccsh5tobam
-  $ SMRTWRAP=/mnt/secondary/Smrtpipe/builds/Internal_Mainline_Nightly_LastSuccessfulBuild/smrtcmds/bin/smrtwrap
-
-  $ $SMRTWRAP python $SCRIPTDIR/test_ccsh5tobam.py $EXEC $DATDIR/test_ccsh5tobam/input.fofn $OUTDIR/test_ccsh5tobam.bam
-  $ echo $?
-  0
diff --git a/extrautils/ctest/printTupleCountTable.t b/extrautils/ctest/printTupleCountTable.t
index 2229e1b..bc4b1e2 100644
--- a/extrautils/ctest/printTupleCountTable.t
+++ b/extrautils/ctest/printTupleCountTable.t
@@ -11,7 +11,7 @@ Define tmporary files
 Make OUTDIR
   $ mkdir -p $OUTDIR
 
-  $ $EXEC $OUTDIR/ecoli_tuple.table $DATDIR/ecoli_reference.fasta 
+  $ $EXEC $OUTDIR/ecoli_tuple.table 8 $DATDIR/ecoli_reference.fasta 
   $ echo $?
   0
 
diff --git a/extrautils/makefile b/extrautils/makefile
index c897e37..45fd24e 100644
--- a/extrautils/makefile
+++ b/extrautils/makefile
@@ -5,12 +5,12 @@ SRCDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
 include ${SRCDIR}/../rules.mk
 
 CXXOPTS := -std=c++0x -pedantic \
-           -Wall -Wuninitialized -Wno-div-by-zero \
-           -MMD -MP -w -fpermissive
+           -Wall -Wextra -Wno-overloaded-virtual \
+           -MMD -MP
 CXXFLAGS += ${CXXOPTS} ${GCXXFLAGS}
 
-EXE = sa2bwt bwt2sa alchemy excrep evolve bsdb simpleShredder swMatcher \
-  samodify sals printTupleCountTable cmpH5StoreQualityByContext ccsh5tobam
+EXE = sa2bwt bwt2sa excrep evolve bsdb simpleShredder swMatcher \
+  samodify sals printTupleCountTable cmpH5StoreQualityByContext
 
 LD_LIBRARY_PATH=${HDF5_LIB}:${LIBBLASR_LIB}:${LIBPBIHDF_LIB}:${LIBPBDATA_LIB}
 export LD_LIBRARY_PATH
@@ -24,7 +24,6 @@ ${EXE}:
 
 sa2bwt: SuffixArrayToBWT.o
 bwt2sa: BwtToSuffixArray.o
-alchemy: BasH5Simulator.o
 excrep: ExciseRepeats.o
 evolve: Evolve.o
 bsdb: BuildSequenceDB.o
@@ -34,10 +33,9 @@ samodify: SAModify.o
 sals: SALS.o
 printTupleCountTable: PrintTupleCountTable.o
 cmpH5StoreQualityByContext: StoreQualityByContextFromCmpH5.o
-ccsh5tobam: CCSH5ToBam.o
 
 CTESTS := \
-ctest/alchemy.t  ctest/ccsh5tobam.t                  ctest/printTupleCountTable.t  ctest/sals.t      ctest/swmatcher.t \
+ctest/printTupleCountTable.t  ctest/sals.t      ctest/swmatcher.t \
 ctest/bwt2sa.t   ctest/cmpH5StoreQualityByContext.t  ctest/sa2bwt.t                ctest/samodify.t
 
 
diff --git a/include/BlasrAlign.hpp b/iblasr/BlasrAlign.hpp
similarity index 67%
rename from include/BlasrAlign.hpp
rename to iblasr/BlasrAlign.hpp
index 1a00e9c..1782b47 100644
--- a/include/BlasrAlign.hpp
+++ b/iblasr/BlasrAlign.hpp
@@ -1,41 +1,5 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
 // Author: Mark Chaisson
-#ifndef __BLASR_ALIGN_HPP_
-#define __BLASR_ALIGN_HPP_
+#pragma once
 
 #include "BlasrHeaders.h"
 #include "BlasrMiscs.hpp"
@@ -119,4 +83,3 @@ void AlignSubreadToAlignmentTarget(ReadAlignments & allReadAlignments,
         ostream & threadOut);
 
 #include "BlasrAlignImpl.hpp"
-#endif
diff --git a/include/BlasrAlignImpl.hpp b/iblasr/BlasrAlignImpl.hpp
similarity index 93%
rename from include/BlasrAlignImpl.hpp
rename to iblasr/BlasrAlignImpl.hpp
index 2ec9908..8fab221 100644
--- a/include/BlasrAlignImpl.hpp
+++ b/iblasr/BlasrAlignImpl.hpp
@@ -1,42 +1,5 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
 // Author: Mark Chaisson
-
-#ifndef _BLASR_ALIGN_IMPL_HPP_
-#define _BLASR_ALIGN_IMPL_HPP_
+#pragma once
 
 template<typename T_Sequence, typename T_RefSequence, typename T_SuffixArray, typename T_TupleCountTable>
 void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
@@ -52,13 +15,11 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
              MappingIPC *mapData,
              MappingSemaphores & semaphores)
 {
-
     bool matchFound;
     WeightedIntervalSet topIntervals(params.nCandidates);
-    int numKeysMatched=0, rcNumKeysMatched=0;
+    int numKeysMatched=0, rcNumKeysMatched=0; (void)(numKeysMatched); (void)(rcNumKeysMatched);
     int expand = params.minExpand;
     metrics.clocks.total.Tick();
-    int nTotalCells = 0;
     int forwardNumBasesMatched = 0, reverseNumBasesMatched = 0;
     do {
         matchFound = false;
@@ -99,7 +60,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
         //
         // Look to see if only the anchors are printed.
         if (params.anchorFileName != "") {
-            int i;
+            size_t i;
             if (params.nProc > 1) {
 #ifdef __APPLE__
                 sem_wait(semaphores.writer);
@@ -153,7 +114,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
         //
         DNALength squareRefLength = read.length * 1.25 + params.limsAlign;
         if (params.limsAlign != 0) {
-            int fi;
+            size_t fi;
             for (fi = 0; fi < mappingBuffers.matchPosList.size(); fi++) {
                 if (mappingBuffers.matchPosList[fi].t >= squareRefLength) { break; }
             }
@@ -187,15 +148,11 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
         RemoveOverlappingAnchors(mappingBuffers.rcMatchPosList);
 
         if (params.pValueType == 0) {
-            int original = mappingBuffers.matchPosList.size();
-
-            int numMerged = 0;
             if (params.printDotPlots) {
                 ofstream dotPlotOut;
                 string dotPlotName = string(read.title) + ".anchors";
                 CrucialOpen(dotPlotName, dotPlotOut, std::ios::out);
-                int mp;
-                for (mp = 0; mp < mappingBuffers.matchPosList.size(); mp++ ){
+                for (size_t mp = 0; mp < mappingBuffers.matchPosList.size(); mp++ ){
                     dotPlotOut << mappingBuffers.matchPosList[mp].q << " " << mappingBuffers.matchPosList[mp].t << " " << mappingBuffers.matchPosList[mp].l << " " << endl;
                 }
                 dotPlotOut.close();
@@ -220,7 +177,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
                     topIntervals, genome, read, intervalSearchParameters,
                     &mappingBuffers.globalChainEndpointBuffer,
                     mappingBuffers.clusterList,
-                    accumPValue, accumWeight, accumNBases, read.title);
+                    accumPValue, accumWeight, accumNBases);
             // Uncomment when the version of the weight functor needs the sequence.
 
             mappingBuffers.clusterList.ResetCoordinates();
@@ -233,7 +190,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
                     topIntervals, genome, readRC, intervalSearchParameters,
                     &mappingBuffers.globalChainEndpointBuffer,
                     mappingBuffers.revStrandClusterList,
-                    accumPValue, accumWeight, accumNBases, read.title);
+                    accumPValue, accumWeight, accumNBases);
         }
         else if (params.pValueType == 1) {
             FindMaxIncreasingInterval(Forward,
@@ -246,8 +203,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
                     topIntervals, genome, read, intervalSearchParameters,
                     &mappingBuffers.globalChainEndpointBuffer,
                     mappingBuffers.clusterList,
-                    accumPValue, accumWeight, accumNBases,
-                    read.title);
+                    accumPValue, accumWeight, accumNBases);
 
 
             mappingBuffers.clusterList.ResetCoordinates();
@@ -259,8 +215,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
                     topIntervals, genome, readRC, intervalSearchParameters,
                     &mappingBuffers.globalChainEndpointBuffer,
                     mappingBuffers.revStrandClusterList,
-                    accumPValue, accumWeight, accumNBases,
-                    read.title);
+                    accumPValue, accumWeight, accumNBases);
         }
         else if (params.pValueType == 2) {
             FindMaxIncreasingInterval(Forward,
@@ -273,8 +228,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
                     topIntervals, genome, read, intervalSearchParameters,
                     &mappingBuffers.globalChainEndpointBuffer,
                     mappingBuffers.clusterList,
-                    accumPValue, accumWeight, accumNBases,
-                    read.title);
+                    accumPValue, accumWeight, accumNBases);
 
             mappingBuffers.clusterList.ResetCoordinates();
             FindMaxIncreasingInterval(Reverse, mappingBuffers.rcMatchPosList,
@@ -285,8 +239,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
                     topIntervals, genome, readRC, intervalSearchParameters,
                     &mappingBuffers.globalChainEndpointBuffer,
                     mappingBuffers.revStrandClusterList,
-                    accumPValue, accumWeight, accumNBases,
-                    read.title);
+                    accumPValue, accumWeight, accumNBases);
         }
 
         mappingBuffers.clusterList.numBases.insert(mappingBuffers.clusterList.numBases.end(),
@@ -314,8 +267,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
                     << seqBoundary((*topIntIt).start) << " " << seqBoundary((*topIntIt).end) << " "
                     << (*topIntIt).pValue << endl;
                 if (params.verbosity > 2) {
-                    int m;
-                    for (m = 0; m < (*topIntIt).matches.size(); m++) {
+                    for (size_t m = 0; m < (*topIntIt).matches.size(); m++) {
                         cout << " (" << (*topIntIt).matches[m].q << ", " << (*topIntIt).matches[m].t << ", " << (*topIntIt).matches[m].l << ") ";
                     }
                     cout << endl;
@@ -430,13 +382,6 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
         RemoveOverlappingAlignments(alignmentPtrs, params);
     }
 
-    if (params.forPicard) {
-        int a;
-        for (a = 0; a < alignmentPtrs.size(); a++ ) {
-            alignmentPtrs[a]->OrderGapsByType();
-        }
-    }
-
     //
     // Look to see if the number of anchors found for this read match
     // what is expected given the expected distribution of number of
@@ -444,7 +389,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
     //
 
     if (alignmentPtrs.size() > 0) {
-        int clusterIndex;
+        size_t clusterIndex;
         //
         // Compute some stats on the read.  For now this is fixed but will
         // be updated on the fly soon.
@@ -459,13 +404,13 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
         // Handle this by bounding the min match by the smallest and
         // largest values for which there are precomputed statistics.
 
-        int boundedMinWordMatchLength = min(max(params.minMatchLength, anchorMinKValues[0]), anchorMinKValues[1]);
+        int boundedMinWordMatchLength = min(max(params.minMatchLength, PacBio::AnchorDistributionTable::anchorMinKValues[0]), PacBio::AnchorDistributionTable::anchorMinKValues[1]);
 
         //
         // Do a similar bounding for match length and accuracy.
         //
-        int boundedMatchLength  = min(max((int) alignmentPtrs[0]->qAlignedSeq.length, anchorReadLengths[0]), anchorReadLengths[1]);
-        int boundedPctSimilarity = min(max((int)alignmentPtrs[0]->pctSimilarity, anchorReadAccuracies[0]), anchorReadAccuracies[1]);
+        int boundedMatchLength  = min(max((int) alignmentPtrs[0]->qAlignedSeq.length, PacBio::AnchorDistributionTable::anchorReadLengths[0]), PacBio::AnchorDistributionTable::anchorReadLengths[1]);
+        int boundedPctSimilarity = min(max((int)alignmentPtrs[0]->pctSimilarity, PacBio::AnchorDistributionTable::anchorReadAccuracies[0]), PacBio::AnchorDistributionTable::anchorReadAccuracies[1]);
 
         lookupValue = LookupAnchorDistribution(boundedMatchLength, boundedMinWordMatchLength, boundedPctSimilarity,
                 meanAnchorsPerRead, sdAnchorsPerRead, meanAnchorBasesPerRead, sdAnchorBasesPerRead);
@@ -480,8 +425,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
         int numSignificantClusters = 0;
         int totalSignificantClusterSize = 0;
         int maxClusterSize = 0;
-        int maxClusterIndex = 0;
-        int numAlnAnchorBases, numAlnAnchors, scaledMaxClusterSize;
+        int numAlnAnchorBases, numAlnAnchors;
         alignmentPtrs[0]->ComputeNumAnchors(boundedMinWordMatchLength, numAlnAnchors, numAlnAnchorBases);
         int totalAnchorBases = 0;
         if (numAlnAnchorBases > meanAnchorBasesPerRead + sdAnchorBasesPerRead) {
@@ -492,17 +436,14 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
                 for (clusterIndex = 0; clusterIndex < mappingBuffers.clusterList.numBases.size(); clusterIndex++) {
                     if (mappingBuffers.clusterList.numBases[clusterIndex] > maxClusterSize) {
                         maxClusterSize = mappingBuffers.clusterList.numBases[clusterIndex];
-                        maxClusterIndex = clusterIndex;
                     }
                 }
                 int scaledExpectedClusterSize = maxClusterSize / ((float)numAlnAnchorBases) * minExpAnchorBases;
                 for (clusterIndex = 0; clusterIndex < mappingBuffers.clusterList.numBases.size(); clusterIndex++) {
-                    bool isSignificant = false;
                     if (mappingBuffers.clusterList.numBases[clusterIndex] >= scaledExpectedClusterSize) {
                         //          cout << mappingBuffers.clusterList.numBases[clusterIndex] << " " << scaledExpectedClusterSize << " " << meanAnchorBasesPerRead << " " << sdAnchorBasesPerRead << endl;
                         ++numSignificantClusters;
                         totalSignificantClusterSize += meanAnchorBasesPerRead;
-                        isSignificant = true;
                     }
                     //
                     // The following output block is useful in debugging mapqv
@@ -514,9 +455,7 @@ void MapRead(T_Sequence &read, T_Sequence &readRC, T_RefSequence &genome,
             }
 
             if (lookupValue == 0) {
-                int scaledMaxClusterSize;
                 alignmentPtrs[0]->ComputeNumAnchors(params.minMatchLength, numAlnAnchors, numAlnAnchorBases);
-                scaledMaxClusterSize = (  ((float)numAlnAnchorBases )/ meanAnchorBasesPerRead) * maxClusterSize;
             }
         }
 
@@ -621,6 +560,7 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
         MappingParameters &params,
         MappingBuffers &mappingBuffers,
         int procId) {
+    (void)(mutationCostMatrix); (void)(ins); (void)(del); (void)(procId);
 
     vector<T_QuerySequence*> forrev;
     forrev.resize(2);
@@ -665,11 +605,12 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
         DNALength matchIntervalStart, matchIntervalEnd;
         matchIntervalStart = (*intvIt).start;
         matchIntervalEnd   = (*intvIt).end;
-
         bool readOverlapsContigStart    = false;
         bool readOverlapsContigEnd      = false;
         int  startOverlappedContigIndex = 0;
         int  endOverlappedContigIndex   = 0;
+        (void)(readOverlapsContigStart); (void)(readOverlapsContigEnd); (void)(startOverlappedContigIndex); (void)(endOverlappedContigIndex);
+
         if (params.verbosity > 0) {
             cout << "aligning interval : " << read.length << " " << (*intvIt).start << " "
                 << (*intvIt).end  << " " << (*intvIt).qStart << " " << (*intvIt).qEnd
@@ -686,10 +627,8 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
         // If the end is past the end boundary of the read, similarly move
         // the window boundary to the end of the read boundary.
 
-        DNALength tAlignedContigStart = 0;
         int seqDBIndex = 0;
 
-
         //
         // Stretch the alignment interval so that it is close to where
         // the read actually starts.
@@ -828,7 +767,6 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
         //
 
         int intervalSize = 0;
-        int m;
         //
         // Check to see if the matches to the genome are sufficiently
         // dense to allow them to be used instead of having to redo
@@ -836,7 +774,7 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
         //
 
         // First count how much of the read matches the genome exactly.
-        for (m = 0; m < intvIt->matches.size(); m++) { intervalSize += intvIt->matches[m].l;}
+        for (size_t m = 0; m < intvIt->matches.size(); m++) { intervalSize += intvIt->matches[m].l;}
 
         int subreadLength = forrev[(*intvIt).GetStrandIndex()]->SubreadEnd() - forrev[(*intvIt).GetStrandIndex()]->SubreadStart();
         if ((1.0*intervalSize) / subreadLength < params.sdpBypassThreshold and !params.emulateNucmer) {
@@ -850,7 +788,7 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
                 // Run SDP alignment only between the genomic anchors,
                 // including the genomic anchors as part of the alignment.
                 //
-                int m;
+                size_t m;
 
                 vector<ChainedMatchPos> *matches;
                 vector<ChainedMatchPos> rcMatches;
@@ -935,7 +873,6 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
                     int tGap, qGap;
                     tGap = (*matches)[m+1].t - ((*matches)[m].t + (*matches)[m].l);
                     qGap = (*matches)[m+1].q - ((*matches)[m].q + (*matches)[m].l);
-                    float gapRatio = (1.0*tGap)/qGap;
 
                     if (tGap > 0 and qGap > 0) {
                         DNALength tPos, qPos;
@@ -944,7 +881,6 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
                         tSubSeq.ReferenceSubstring(tAlignedSeq, tPos, tGap);
                         qSubSeq.ReferenceSubstring(alignment->qAlignedSeq, qPos, qGap);
                         Alignment alignmentInGap;
-                        int alignScore;
 
                         /*
                            The following code is experimental code for trying to do
@@ -956,14 +892,14 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
                         if (params.separateGaps == true and
                                 qSubSeq.length > 0 and tSubSeq.length > 0 and
                                 ( (1.0*qSubSeq.length)/tSubSeq.length  < 0.25 )) {
-                            alignScore = OneGapAlign(qSubSeq, tSubSeq, distScoreFn, mappingBuffers, alignmentInGap);
+                            OneGapAlign(qSubSeq, tSubSeq, distScoreFn, mappingBuffers, alignmentInGap);
                         }
                         else {
                             /*
                                This is the 'normal/default' way to align between
                                gaps.  It is more well tested than OneGapAlign.
                                */
-                            alignScore = SDPAlign(qSubSeq, tSubSeq, distScoreFn, params.sdpTupleSize,
+                            SDPAlign(qSubSeq, tSubSeq, distScoreFn, params.sdpTupleSize,
                                     params.sdpIns, params.sdpDel, params.indelRate*2,
                                     alignmentInGap, mappingBuffers, Global,
                                     params.detailedSDPAlignment,
@@ -977,7 +913,7 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
                         // alignment.
                         //
                         if (alignmentInGap.blocks.size() > 0) {
-                            int b;
+                            size_t b;
                             //
                             // Configure this block to be relative to the beginning
                             // of the aligned substring.
@@ -1018,8 +954,8 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
                 // Modify the block positions so that they are offset by 0.
                 alignment->tPos = alignment->blocks[0].tPos;
                 alignment->qPos = alignment->blocks[0].qPos;
-                int b;
-                int blocksSize = alignment->blocks.size();
+                size_t b;
+                size_t blocksSize = alignment->blocks.size();
                 for (b = 0; b < blocksSize ; b++) {
                     assert(alignment->tPos <= alignment->blocks[b].tPos);
                     assert(alignment->qPos <= alignment->blocks[b].qPos);
@@ -1057,7 +993,7 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
             //
             // The anchors used to anchor the sequence are sufficient to extend the alignment.
             //
-            int m;
+            size_t m;
             for (m = 0; m < (*intvIt).matches.size(); m++ ){
                 Block block;
                 block.qPos = (*intvIt).matches[m].q - alignment->qAlignedSeqPos;
@@ -1250,7 +1186,6 @@ void AlignIntervals(T_TargetSequence &genome, T_QuerySequence &read, T_QuerySequ
                     // length are in sync.  This needs to go.
                     //
                     if (alignment->blocks.size() > 0) {
-                        int lastBlock = alignment->blocks.size() - 1;
                         alignment->qAlignedSeqLength = alignment->qAlignedSeq.length;
                         alignment->tAlignedSeqLength = alignment->tAlignedSeq.length;
                     }
@@ -1491,6 +1426,7 @@ void AlignSubreadToAlignmentTarget(ReadAlignments & allReadAlignments,
             << ", passDirection " << passDirection
             << ", subreadInterval [" << subreadInterval.start
             << ", " << subreadInterval.end << ")" << endl
+            << "Exploded score " << explodedScore << endl
             << "StickPrintAlignment subread-reference alignment which has"
             << " the " << (sameAlignmentPassDirection?"same":"different")
             << " direction as the ccs-reference (or the "
@@ -1512,7 +1448,6 @@ void AlignSubreadToAlignmentTarget(ReadAlignments & allReadAlignments,
         ComputeAlignmentStats(exploded, subread.seq,
                 alignedRefSequence.seq,
                 distScoreFn2);
-        //SMRTDistanceMatrix, params.indel, params.indel);
         if (exploded.score <= params.maxScore) {
             //
             // The coordinates of the alignment should be
@@ -1549,8 +1484,38 @@ void AlignSubreadToAlignmentTarget(ReadAlignments & allReadAlignments,
             // Save this alignment for printing later.
             //
             T_AlignmentCandidate *alignmentPtr = new T_AlignmentCandidate;
+            // Refine concordant alignments
+            if (params.refineConcordantAlignments) {
+                vector<SMRTSequence*> vquery;
+                vquery.push_back(&unrolledRead);
+                RefineAlignment(vquery, alignedRefSequence, exploded, params, mappingBuffers);
+            }
+
             *alignmentPtr = exploded;
-            allReadAlignments.AddAlignmentForSeq(subreadIndex, alignmentPtr);
+            //
+            // Check if need to be filtered
+            // For now filtering only in concordant mode
+            // Later add filtration in other modes
+            //
+            if (allReadAlignments.alignMode == ZmwSubreads) {
+                if (params.filterCriteria.Satisfy(alignmentPtr)) {
+                    if (params.verbosity > 3) {
+                        std::cerr << " Filters passed. Adding slave alignment in concordant mode" << std::endl;
+                    }
+                    allReadAlignments.AddAlignmentForSeq(subreadIndex, alignmentPtr);
+                }
+                else {
+                    // delete alignment immediately
+                    if (params.verbosity > 3) {
+                        std::cerr << " Filters failed. Delete alignment immediately" << std::endl;
+                    }
+                    delete alignmentPtr;
+                }
+            }
+            // for all modes except ZmwSubreads no filtering for now
+            else {
+                allReadAlignments.AddAlignmentForSeq(subreadIndex, alignmentPtr);
+            }
         } // End of exploded score <= maxScore.
         if (params.verbosity >= 3) {
             threadOut << "exploded score: " << exploded.score << endl
@@ -1560,5 +1525,3 @@ void AlignSubreadToAlignmentTarget(ReadAlignments & allReadAlignments,
         }
     } // End of exploded.blocks.size() > 0.
 }
-
-#endif
diff --git a/iblasr/BlasrHeaders.h b/iblasr/BlasrHeaders.h
new file mode 100644
index 0000000..8a08c27
--- /dev/null
+++ b/iblasr/BlasrHeaders.h
@@ -0,0 +1,113 @@
+#pragma once
+
+#ifdef __GLIBC__
+#  include <mcheck.h>
+#endif
+#include <string>
+#include <iostream>
+#include <vector>
+#include <set>
+#include <sstream>
+#include <pthread.h>
+#include <stdlib.h>
+#include <time.h>
+#include <signal.h>
+#if defined(__GLIBC__) || defined(__APPLE__)
+#  include <execinfo.h>
+#endif
+
+#define MAX_PHRED_SCORE 254
+#define MAPQV_END_ALIGN_WIGGLE 5
+
+using namespace std;
+
+#include <libconfig.h>
+#ifdef USE_PBBAM
+#include <pbbam/BamWriter.h>
+#endif
+
+#include <CCSSequence.hpp>
+#include <SMRTSequence.hpp>
+#include <FASTASequence.hpp>
+#include <FASTAReader.hpp>
+#include <SeqUtils.hpp>
+#include <defs.h>
+#include <utils.hpp>
+
+
+#include <tuples/DNATuple.hpp>
+#include <tuples/HashedTupleList.hpp>
+#include <algorithms/compare/CompareStrings.hpp>
+#include <algorithms/alignment/AffineKBandAlign.hpp>
+#include <algorithms/alignment/GuidedAlign.hpp>
+#include <algorithms/alignment/AffineGuidedAlign.hpp>
+#include <algorithms/alignment/FullQVAlign.hpp>
+#include <algorithms/alignment/ExtendAlign.hpp>
+#include <algorithms/alignment/OneGapAlignment.hpp>
+#include <algorithms/alignment/AlignmentUtils.hpp>
+#include <algorithms/alignment/QualityValueScoreFunction.hpp>
+#include <algorithms/alignment/IDSScoreFunction.hpp>
+#include <algorithms/alignment/DistanceMatrixScoreFunction.hpp>
+#include <algorithms/alignment/StringToScoreMatrix.hpp>
+#include <algorithms/alignment/AlignmentFormats.hpp>
+#include <algorithms/anchoring/LISPValue.hpp>
+#include <algorithms/anchoring/LISPValueWeightor.hpp>
+#include <algorithms/anchoring/LISSizeWeightor.hpp>
+#include <algorithms/anchoring/LISQValueWeightor.hpp>
+#include <algorithms/anchoring/FindMaxInterval.hpp>
+#include <algorithms/anchoring/MapBySuffixArray.hpp>
+#include <datastructures/anchoring/ClusterList.hpp>
+#include <algorithms/anchoring/ClusterProbability.hpp>
+#include <algorithms/anchoring/BWTSearch.hpp>
+#include <metagenome/SequenceIndexDatabase.hpp>
+#include <metagenome/TitleTable.hpp>
+#include <suffixarray/SharedSuffixArray.hpp>
+#include <suffixarray/SuffixArrayTypes.hpp>
+#include <tuples/TupleCountTable.hpp>
+#include <datastructures/anchoring/WeightedInterval.hpp>
+#include <datastructures/anchoring/AnchorParameters.hpp>
+#include <datastructures/alignment/AlignmentCandidate.hpp>
+#include <datastructures/alignment/AlignmentContext.hpp>
+#include <MappingMetrics.hpp>
+#include <reads/ReadInterval.hpp>
+#include <utils/FileOfFileNames.hpp>
+#include <utils/RegionUtils.hpp>
+#include <utils/TimeUtils.hpp>
+#include <utils/SMRTTitle.hpp>
+#include <qvs/QualityTransform.hpp>
+#include <files/ReaderAgglomerate.hpp>
+#include <files/CCSIterator.hpp>
+#include <files/FragmentCCSIterator.hpp>
+#include <HDFRegionTableReader.hpp>
+#include <bwt/BWT.hpp>
+#include <PackedDNASequence.hpp>
+#include <CommandLineParser.hpp>
+#include <qvs/QualityValue.hpp>
+#include <statistics/VarianceAccumulator.hpp>
+#include <statistics/pdfs.hpp>
+#include <statistics/cdfs.hpp>
+#include <statistics/StatUtils.hpp>
+#include <statistics/LookupAnchorDistribution.hpp>
+#include <format/StickAlignmentPrinter.hpp>
+#include <format/SAMPrinter.hpp>
+#include <format/XMLPrinter.hpp>
+#include <format/CompareSequencesPrinter.hpp>
+#include <format/VulgarPrinter.hpp>
+#include <format/IntervalPrinter.hpp>
+#include <format/SummaryPrinter.hpp>
+#include <format/SAMHeaderPrinter.hpp>
+#include <format/BAMPrinter.hpp>
+
+#include "MappingIPC.h"
+#include "MappingSemaphores.h"
+#include "MappingBuffers.hpp"
+#include "ReadAlignments.hpp"
+
+
+typedef SMRTSequence T_Sequence;
+typedef FASTASequence T_GenomeSequence;
+typedef DNASuffixArray T_SuffixArray;
+typedef DNATuple T_Tuple;
+typedef LISPValueWeightor<T_GenomeSequence, DNATuple, vector<ChainedMatchPos> >  PValueWeightor;
+typedef LISSMatchFrequencyPValueWeightor<T_GenomeSequence, DNATuple, vector<ChainedMatchPos> >  MultiplicityPValueWeightor;
+typedef MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> MappingIPC;
diff --git a/iblasr/BlasrMiscs.hpp b/iblasr/BlasrMiscs.hpp
new file mode 100644
index 0000000..2210e0c
--- /dev/null
+++ b/iblasr/BlasrMiscs.hpp
@@ -0,0 +1,62 @@
+// Author: Mark Chaisson
+#pragma once
+
+#include "BlasrHeaders.h"
+
+//-------------------------Fetch Reads----------------------------//
+template<typename T_Sequence>
+bool GetNextReadThroughSemaphore(ReaderAgglomerate &reader,
+                                 MappingParameters &params,
+                                 T_Sequence &read,
+                                 string & readGroupId,
+                                 int & associatedRandInt,
+                                 MappingSemaphores & semaphores);
+
+//---------------------MAKE & CHECK READS-------------------------//
+//FIXME: move to SMRTSequence
+bool ReadHasMeaningfulQualityValues(FASTQSequence &sequence);
+
+//FIXME: Move to SMRTSequence
+// Given a SMRT sequence and a subread interval, make the subread.
+// Input:
+//   smrtRead         - a SMRT sequence
+//   subreadInterval  - a subread interval
+//   params           - mapping parameters
+// Output:
+//   subreadSequence - the constructed subread
+void MakeSubreadOfInterval(SMRTSequence & subreadSequence,
+                           SMRTSequence & smrtRead,
+                           ReadInterval & subreadInterval,
+                           MappingParameters & params);
+
+//FIXME: Move to SMRTSequence
+// Given a SMRT sequence and one of its subreads, make the
+// reverse complement of the subread in the coordinate of the
+// reverse complement sequence of the SMRT sequence.
+// Input:
+//   smrtRead          - a SMRT read
+//   subreadSequence   - a subread of smrtRead
+// Output:
+//   subreadSequenceRC - the reverse complement of the subread
+//                       in the coordinate of the reverse
+//                       complement of the SMRT read.
+void MakeSubreadRC(SMRTSequence & subreadSequenceRC,
+                   SMRTSequence & subreadSequence,
+                   SMRTSequence & smrtRead);
+
+// Construct subreads invervals from subreads
+void MakeSubreadIntervals(vector<SMRTSequence> & subreads,
+                          vector<ReadInterval> & subreadIntervals);
+
+// Return index of subread which will be used as concordant template.
+// If Zmw has exactly one subread, return index of the subread (i.e., 0).
+// If Zmw has exactly two subreads, return index of the longer subread.
+// If Zmw has three or more subreads, return index of the median-length
+// subread in range subreadIntervals[1:-1]. Avoid using the first and last 
+// subreads (which are less likely to be full-pass) if possible.
+int GetIndexOfConcordantTemplate(const vector<ReadInterval> & subreadIntervals);
+
+//-------------------------MISC-----------------------------------//
+int CountZero(unsigned char *ptr, int length);
+
+#include "BlasrMiscsImpl.hpp"
diff --git a/include/BlasrMiscsImpl.hpp b/iblasr/BlasrMiscsImpl.hpp
similarity index 58%
rename from include/BlasrMiscsImpl.hpp
rename to iblasr/BlasrMiscsImpl.hpp
index 5821e13..93b5487 100644
--- a/include/BlasrMiscsImpl.hpp
+++ b/iblasr/BlasrMiscsImpl.hpp
@@ -1,43 +1,7 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
 // Author: Mark Chaisson
+#pragma once
 
-#ifndef _BLASR_MISCS_IMPL_HPP_
-#define _BLASR_MISCS_IMPL_HPP_
-#include "utils/SMRTTitle.hpp"
+#include <utils/SMRTTitle.hpp>
 
 template<typename T_Sequence>
 bool GetNextReadThroughSemaphore(ReaderAgglomerate &reader,
@@ -175,29 +139,6 @@ int CountZero(unsigned char *ptr, int length)
     return nZero;
 }
 
-void MakeVirtualRead(SMRTSequence & smrtRead,
-                     const vector<SMRTSequence> & subreads)
-{
-    assert(subreads.size() > 0);
-    DNALength hqStart = 0, hqEnd = 0;
-    for(auto subread: subreads) {
-        hqStart = min(DNALength(subread.SubreadStart()), hqStart);
-        hqEnd   = max(DNALength(subread.SubreadEnd()),   hqEnd);
-    }
-    smrtRead.Free();
-    smrtRead.Allocate(hqEnd);
-    smrtRead.lowQualityPrefix = hqStart;
-    smrtRead.lowQualitySuffix = smrtRead.length - hqEnd;
-    smrtRead.highQualityRegionScore = subreads[0].highQualityRegionScore;
-    stringstream ss;
-    ss << SMRTTitle(subreads[0].GetTitle()).MovieName() << "/" << subreads[0].HoleNumber();
-    smrtRead.CopyTitle(ss.str());
-    for (auto subread: subreads) {
-        memcpy(&smrtRead.seq[subread.SubreadStart()],
-               &subread.seq[0], sizeof(char) * subread.length);
-    }
-}
-
 void MakeSubreadIntervals(vector<SMRTSequence> & subreads,
                           vector<ReadInterval> & subreadIntervals)
 {
@@ -208,16 +149,31 @@ void MakeSubreadIntervals(vector<SMRTSequence> & subreads,
     }
 }
 
-int GetIndexOfMedian(const vector<ReadInterval> & subreadIntervals)
+int GetIndexOfConcordantTemplate(const vector<ReadInterval> & subreadIntervals)
 {
-    vector<ReadInterval> intervals = subreadIntervals;
-    size_t n = intervals.size() / 2;
-    nth_element(intervals.begin(), intervals.begin() + n, intervals.end(),
-                [](const ReadInterval & a, const ReadInterval & b) -> bool
-                {a.end - a.start < b.end - b.start;});
-    auto it = std::find(subreadIntervals.begin(), subreadIntervals.end(), intervals[n]);
-    int pos = int(std::distance(subreadIntervals.begin(), it));
-    return pos;
+    assert(subreadIntervals.size() != 0);
+    if (subreadIntervals.size() == 1) return 0; // Zmw has exactly one subread.
+    else if (subreadIntervals.size() == 2) {
+        // Zmw has two subreads, return index of the longer one.
+        const ReadInterval & first = subreadIntervals[0];
+        const ReadInterval & second = subreadIntervals[1];
+        if (first.Length() < second.Length()) return 1;
+        else return 0;
+    } else { 
+        // Zmw has more than two subreads, look for the median-length subread
+        // in subreadIntervals[1:-1]. The first and last subreads are not
+        // considered because they are usually non-full-pass.
+        vector<ReadInterval> intervals;
+        intervals.insert(intervals.begin(), subreadIntervals.begin() + 1, subreadIntervals.end() - 1);
+        std::sort(intervals.begin(), intervals.end(), 
+                  [](const ReadInterval& a, const ReadInterval& b)->bool
+                  {return a.Length() < b.Length();});
+        const ReadInterval & template_interval = intervals[int(intervals.size()/2)];
+        for (int pos = 1; pos < int(subreadIntervals.size()) -1; pos ++) {
+            if (subreadIntervals[pos] == template_interval) {
+                return pos;
+            }
+        }
+    }
+    return 0;
 }
-
-#endif
diff --git a/include/BlasrUtils.hpp b/iblasr/BlasrUtils.hpp
similarity index 78%
rename from include/BlasrUtils.hpp
rename to iblasr/BlasrUtils.hpp
index ce5a581..ea7e4de 100644
--- a/include/BlasrUtils.hpp
+++ b/iblasr/BlasrUtils.hpp
@@ -1,43 +1,5 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
 // Author: Mark Chaisson
-
-
-#ifndef _BLASR_INC_UTILS_HPP_
-#define _BLASR_INC_UTILS_HPP_
+#pragma once
 
 #include "BlasrHeaders.h"
 
@@ -157,6 +119,7 @@ void PrintAlignment(T_AlignmentCandidate &alignment,
                     AlignmentContext &alignmentContext,
                     ostream &outFile
 #ifdef USE_PBBAM
+                    , SMRTSequence &subread
                     , PacBio::BAM::BamWriter * bamWriterPtr
 #endif
                     );
@@ -167,6 +130,7 @@ void PrintAlignments(vector<T_AlignmentCandidate*> alignmentPtrs,
                      MappingParameters &params, ostream &outFile,
                      AlignmentContext alignmentContext,
 #ifdef USE_PBBAM
+                     SMRTSequence &subread,
                      PacBio::BAM::BamWriter * bamWriterPtr,
 #endif
                      MappingSemaphores & semaphores);
@@ -174,6 +138,13 @@ void PrintAlignments(vector<T_AlignmentCandidate*> alignmentPtrs,
 void PrintAlignmentPtrs(vector <T_AlignmentCandidate*> & alignmentPtrs,
                         ostream & out = cout);
 
+
+// Print an unaligned read, if noPrintUnalignedSeqs is True, print title only;
+// otherwise, print title and sequence of the read.
+void PrintUnaligned(const SMRTSequence & unalignedRead,
+                    ostream & unalignedFilePtr,
+                    const bool noPrintUnalignedSeqs);
+
 // Print all alignments for subreads in allReadAlignments.
 // Input:
 //   allReadAlignments - contains a set of subreads, each of which
@@ -196,4 +167,3 @@ void PrintAllReadAlignments(ReadAlignments & allReadAlignments,
                             MappingSemaphores & semaphores);
 
 #include "BlasrUtilsImpl.hpp"
-#endif
diff --git a/include/BlasrUtilsImpl.hpp b/iblasr/BlasrUtilsImpl.hpp
similarity index 93%
rename from include/BlasrUtilsImpl.hpp
rename to iblasr/BlasrUtilsImpl.hpp
index 3cb560a..6839590 100644
--- a/include/BlasrUtilsImpl.hpp
+++ b/iblasr/BlasrUtilsImpl.hpp
@@ -1,41 +1,6 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
 // Author: Mark Chaisson
-#ifndef _BLASR_INC_UTILS_IMPL_HPP_
-#define _BLASR_INC_UTILS_IMPL_HPP_
+#pragma once
+
 #include "BlasrAlign.hpp"
 
 
@@ -349,6 +314,7 @@ bool CheckForSufficientMatch(T_Sequence &read,
                              vector<T_AlignmentCandidate*> &alignmentPtrs,
                              MappingParameters &params)
 {
+    (void)(read);
     if (alignmentPtrs.size() > 0 and alignmentPtrs[0]->score < params.maxScore) {
         return true;
     }
@@ -511,11 +477,11 @@ int RemoveLowQualityAlignments(T_Sequence &read,
                                vector<T_AlignmentCandidate*> &alignmentPtrs,
                                MappingParameters &params)
 {
+    PB_UNUSED(read);
     if (params.verbosity > 0) {
         cout << "checking at least " << alignmentPtrs.size() << " alignments to see if they are accurate." << endl;
     }
-    UInt i;
-    for (i = 0; i < MIN(params.nCandidates, alignmentPtrs.size()); i++) {
+    for (size_t i = 0; i < MIN(static_cast<size_t>(params.nCandidates), alignmentPtrs.size()); i++) {
         if (params.verbosity > 0) {
             cout << "Quality check  " << i << " " << alignmentPtrs[i]->score << endl;
         }
@@ -533,8 +499,7 @@ int RemoveLowQualityAlignments(T_Sequence &read,
             if (params.verbosity  > 0) {
                 cout << alignmentPtrs[i]->qName << " alignment " << i << " is too low of a score." << alignmentPtrs[i]->score << endl;
             }
-            int deletedIndex = i;
-            for (; deletedIndex < alignmentPtrs.size(); deletedIndex++) {
+            for (size_t deletedIndex = i; deletedIndex < alignmentPtrs.size(); deletedIndex++) {
                 delete alignmentPtrs[deletedIndex];
                 alignmentPtrs[deletedIndex] = NULL;
             }
@@ -658,6 +623,7 @@ void RefineAlignment(vector<T_Sequence*> &bothQueryStrands,
                      MappingParameters &params,
                      MappingBuffers &mappingBuffers)
 {
+    (void)(genome);
     FASTQSequence qSeq;
     DNASequence   tSeq;
     DistanceMatrixScoreFunction<DNASequence, FASTQSequence> distScoreFn(
@@ -898,7 +864,6 @@ void RefineAlignment(vector<T_Sequence*> &bothQueryStrands,
         VectorIndex lastSDPBlock = alignmentCandidate.blocks.size() - 1;
 
         if (alignmentCandidate.blocks.size() > 0) {
-            DNALength prevLength =  alignmentCandidate.tAlignedSeqLength -= alignmentCandidate.tPos;
             alignmentCandidate.tAlignedSeqLength = (alignmentCandidate.blocks[lastSDPBlock].tPos
                     + alignmentCandidate.blocks[lastSDPBlock].length
                     - alignmentCandidate.blocks[0].tPos);
@@ -911,7 +876,6 @@ void RefineAlignment(vector<T_Sequence*> &bothQueryStrands,
         alignmentCandidate.qAlignedSeqPos    += alignmentCandidate.qPos;
 
         if (alignmentCandidate.blocks.size() > 0) {
-            DNALength prevLength =  alignmentCandidate.qAlignedSeqLength -= alignmentCandidate.qPos;
             alignmentCandidate.qAlignedSeqLength = (alignmentCandidate.blocks[lastSDPBlock].qPos
                     + alignmentCandidate.blocks[lastSDPBlock].length
                     - alignmentCandidate.blocks[0].qPos);
@@ -974,7 +938,7 @@ SelectAlignmentsToPrint(vector<T_AlignmentCandidate*> alignmentPtrs,
   for (auto ptr: alignmentPtrs) {
       if (params.filterCriteria.Satisfy(ptr)) {
           filtered.push_back(ptr);
-          if (filtered.size() == params.nBest) break;
+          if (int(filtered.size()) == params.nBest) break;
       }
   }
 
@@ -988,11 +952,11 @@ void PrintAlignment(T_AlignmentCandidate &alignment,
                     AlignmentContext &alignmentContext,
                     ostream &outFile
 #ifdef USE_PBBAM
+                    , SMRTSequence & subread
                     , PacBio::BAM::BamWriter * bamWriterPtr
 #endif
                     ) {
    try {
-    int lastBlock = alignment.blocks.size() - 1;
     if (params.printFormat == StickPrint) {
       PrintAlignmentStats(alignment, outFile);
       StickPrintAlignment(alignment,
@@ -1002,11 +966,11 @@ void PrintAlignment(T_AlignmentCandidate &alignment,
                           alignment.qAlignedSeqPos, alignment.tAlignedSeqPos);
     }
     else if (params.printFormat == SAM) {
-      SAMOutput::PrintAlignment(alignment, fullRead, outFile, alignmentContext, params.samQVList, params.clipping, params.cigarUseSeqMatch);
+      SAMOutput::PrintAlignment(alignment, fullRead, outFile, alignmentContext, params.samQVList, params.clipping, params.cigarUseSeqMatch, params.allowAdjacentIndels);
     }
     else if (params.printFormat == BAM) {
 #ifdef USE_PBBAM
-      BAMOutput::PrintAlignment(alignment, fullRead, *bamWriterPtr, alignmentContext, params.samQVList, params.clipping, params.cigarUseSeqMatch);
+      BAMOutput::PrintAlignment(alignment, fullRead, subread, *bamWriterPtr, alignmentContext, params.samQVList, params.clipping, params.cigarUseSeqMatch, params.allowAdjacentIndels);
 #else
       REQUIRE_PBBAM_ERROR();
 #endif
@@ -1048,6 +1012,7 @@ void PrintAlignments(vector<T_AlignmentCandidate*> alignmentPtrs,
                      MappingParameters &params, ostream &outFile,
                      AlignmentContext alignmentContext,
 #ifdef USE_PBBAM
+                     SMRTSequence &subread,
                      PacBio::BAM::BamWriter * bamWriterPtr,
 #endif
                      MappingSemaphores & semaphores) {
@@ -1099,6 +1064,7 @@ void PrintAlignments(vector<T_AlignmentCandidate*> alignmentPtrs,
     PrintAlignment(*alignmentPtrs[i], read,
                    params, alignmentContext, outFile
 #ifdef USE_PBBAM
+                   , subread
                    , bamWriterPtr
 #endif
                    );
@@ -1127,6 +1093,26 @@ void PrintAlignmentPtrs(vector <T_AlignmentCandidate*> & alignmentPtrs,
     out << endl;
 }
 
+
+void PrintUnaligned(const SMRTSequence & unalignedRead,
+                    ostream & unalignedFilePtr,
+                    const bool noPrintUnalignedSeqs) {
+    if (noPrintUnalignedSeqs) {
+        string s = unalignedRead.GetTitle();
+        SMRTTitle st(s);
+        if (st.isSMRTTitle)
+            unalignedFilePtr << st.ToString() << endl;
+        else
+            //size_t pos = s.rfind("/");
+            //if (pos != string::npos)
+            //    unalignedFilePtr << s.substr(0, pos) << std::endl;
+            //else
+                unalignedFilePtr << s << std::endl;
+    } else
+        unalignedRead.PrintSeq(unalignedFilePtr);
+}
+
+
 // Print all alignments for subreads in allReadAlignments.
 // Input:
 //   allReadAlignments - contains a set of subreads, each of which
@@ -1179,17 +1165,18 @@ void PrintAllReadAlignments(ReadAlignments & allReadAlignments,
       alignmentContext.rNext = "";
       alignmentContext.hasNextSubreadPos = false;
     }
-    SMRTSequence & sourceSubread = allReadAlignments.subreads[subreadIndex];
+    SMRTSequence * sourceSubread = &(allReadAlignments.subreads[subreadIndex]);
     if (subreads.size() == allReadAlignments.subreads.size()) {
-        sourceSubread = subreads[subreadIndex];
+        sourceSubread = &subreads[subreadIndex];
     }
     if (allReadAlignments.subreadAlignments[subreadIndex].size() > 0) {
         PrintAlignments(allReadAlignments.subreadAlignments[subreadIndex],
-                        sourceSubread,
+                        allReadAlignments.subreads[subreadIndex],
                         // for these alignments
                         params, outFilePtr,//*mapData->outFilePtr,
                         alignmentContext,
 #ifdef USE_PBBAM
+                        *sourceSubread,
                         bamWriterPtr,
 #endif
                         semaphores);
@@ -1199,8 +1186,9 @@ void PrintAllReadAlignments(ReadAlignments & allReadAlignments,
       //
       if (params.printUnaligned == true) {
         if (params.nProc == 1) {
-          //allReadAlignments.subreads[subreadIndex].PrintSeq(*mapData->unalignedFilePtr);
-          allReadAlignments.subreads[subreadIndex].PrintSeq(unalignedFilePtr);
+            PrintUnaligned(*sourceSubread,
+                           unalignedFilePtr,
+                           params.noPrintUnalignedSeqs);
         }
         else {
 #ifdef __APPLE__
@@ -1208,8 +1196,9 @@ void PrintAllReadAlignments(ReadAlignments & allReadAlignments,
 #else
           sem_wait(&semaphores.unaligned);
 #endif
-          //allReadAlignments.subreads[subreadIndex].PrintSeq(*mapData->unalignedFilePtr);
-          allReadAlignments.subreads[subreadIndex].PrintSeq(unalignedFilePtr);
+          PrintUnaligned(*sourceSubread,//subreads[subreadIndex],
+                         unalignedFilePtr,
+                         params.noPrintUnalignedSeqs);
 #ifdef __APPLE__
           sem_post(semaphores.unaligned);
 #else
@@ -1220,6 +1209,3 @@ void PrintAllReadAlignments(ReadAlignments & allReadAlignments,
     } // End of finding no alignments for the subread with subreadIndex.
   } // End of printing and processing alignmentContext for each subread.
 }
-
-
-#endif
diff --git a/include/MappingBuffers.hpp b/iblasr/MappingBuffers.hpp
similarity index 54%
rename from include/MappingBuffers.hpp
rename to iblasr/MappingBuffers.hpp
index df15786..ce439f7 100644
--- a/include/MappingBuffers.hpp
+++ b/iblasr/MappingBuffers.hpp
@@ -1,49 +1,13 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
 // Author: Mark Chaisson
-#ifndef __BLASR_MAPPING_BUFFERS__
-#define __BLASR_MAPPING_BUFFERS__
+#pragma once
 
 #include <vector>
-#include "tuples/DNATuple.hpp"
-#include "tuples/TupleList.hpp"
-#include "algorithms/alignment/sdp/SDPFragment.hpp"
-#include "algorithms/anchoring/BasicEndpoint.hpp"
-#include "datastructures/anchoring/ClusterList.hpp"
-#include "datastructures/anchoring/MatchPos.hpp"
+#include <tuples/DNATuple.hpp>
+#include <tuples/TupleList.hpp>
+#include <algorithms/alignment/sdp/SDPFragment.hpp>
+#include <algorithms/anchoring/BasicEndpoint.hpp>
+#include <datastructures/anchoring/ClusterList.hpp>
+#include <datastructures/anchoring/MatchPos.hpp>
 
 using namespace std;
 
@@ -112,5 +76,3 @@ inline void MappingBuffers::Reset(void) {
     vector<float>().swap(lnMatchPValueMat);
     vector<int>().swap(clusterNumBases);
 }
-
-#endif
diff --git a/include/MappingIPC.h b/iblasr/MappingIPC.h
similarity index 86%
rename from include/MappingIPC.h
rename to iblasr/MappingIPC.h
index e991595..f99ccf5 100644
--- a/include/MappingIPC.h
+++ b/iblasr/MappingIPC.h
@@ -1,22 +1,21 @@
-#ifndef MAPPING_IPC_H_
-#define MAPPING_IPC_H_
+#pragma once
 
 #include <pthread.h>
 
 #include "MappingParameters.h"
 
-#include "FASTASequence.hpp"
-#include "FASTQSequence.hpp"
-#include "tuples/TupleList.hpp"
-#include "tuples/DNATuple.hpp"
-#include "tuples/CompressedDNATuple.hpp"
-#include "tuples/TupleCountTable.hpp"
-#include "files/ReaderAgglomerate.hpp"
-#include "MappingMetrics.hpp"
-#include "suffixarray/SuffixArrayTypes.hpp"
-#include "metagenome/SequenceIndexDatabase.hpp"
-#include "reads/RegionTable.hpp"
-#include "bwt/BWT.hpp"
+#include <FASTASequence.hpp>
+#include <FASTQSequence.hpp>
+#include <tuples/TupleList.hpp>
+#include <tuples/DNATuple.hpp>
+#include <tuples/CompressedDNATuple.hpp>
+#include <tuples/TupleCountTable.hpp>
+#include <files/ReaderAgglomerate.hpp>
+#include <MappingMetrics.hpp>
+#include <suffixarray/SuffixArrayTypes.hpp>
+#include <metagenome/SequenceIndexDatabase.hpp>
+#include <reads/RegionTable.hpp>
+#include <bwt/BWT.hpp>
 /*
  * This structure contains pointers to all required data structures
  * for mapping reads to a suffix array and evaluating the significance
@@ -80,7 +79,6 @@ public:
     void Initialize(T_SuffixArray *saP, T_GenomeSequence *refP,
             SequenceIndexDatabase<FASTASequence> *seqDBP,
             TupleCountTable<T_GenomeSequence, T_Tuple> *ctabP,
-            ReverseCompressIndex *rciP,
             MappingParameters &paramsP,
             ReaderAgglomerate *readerP,
             RegionTable *regionTableP,
@@ -101,5 +99,3 @@ public:
         clusterFilePtr= clusterFilePtrP;
     }
 };
-
-#endif
diff --git a/include/MappingParameters.h b/iblasr/MappingParameters.h
similarity index 86%
rename from include/MappingParameters.h
rename to iblasr/MappingParameters.h
index 9682389..ed64e80 100644
--- a/include/MappingParameters.h
+++ b/iblasr/MappingParameters.h
@@ -1,21 +1,20 @@
-#ifndef MAPPING_PARAMETERS_H_
-#define MAPPING_PARAMETERS_H_
+#pragma once
 
 #define REQUIRE_PBBAM_ERROR() \
 assert("blasr must be compiled with lib pbbam to perform IO on bam." == 0);
 
 #include <vector>
 
-#include "reads/ReadType.hpp"
-#include "utils/FileOfFileNames.hpp"
-#include "utils/RangeUtils.hpp"
-#include "tuples/TupleMetrics.hpp"
-#include "datastructures/anchoring/AnchorParameters.hpp"
-#include "qvs/QualityValue.hpp"
-#include "format/SAMPrinter.hpp"
-#include "algorithms/alignment/AlignmentFormats.hpp"
-#include "files/BaseSequenceIO.hpp"
-#include "datastructures/alignment/FilterCriteria.hpp"
+#include <reads/ReadType.hpp>
+#include <utils/FileOfFileNames.hpp>
+#include <utils/RangeUtils.hpp>
+#include <tuples/TupleMetrics.hpp>
+#include <datastructures/anchoring/AnchorParameters.hpp>
+#include <qvs/QualityValue.hpp>
+#include <format/SAMPrinter.hpp>
+#include <algorithms/alignment/AlignmentFormats.hpp>
+#include <files/BaseSequenceIO.hpp>
+#include <datastructures/alignment/FilterCriteria.hpp>
 
 class MappingParameters {
 public:
@@ -35,7 +34,6 @@ public:
     int sdpTupleSize;
     int match;
     int showAlign;
-    int refineAlign;
     bool useScoreCutoff;
     int maxScore;
     int argi;
@@ -46,6 +44,7 @@ public:
     QVScale qvScaleType;
     vector<string> readsFileNames; // = queryFileNames, genomeFileName
     vector<string> queryFileNames;
+    vector<string> scrapsFileNames; // needed for noSplitSubread flag in PBBAM, deriived from queryFileNames 
     string genomeFileName;
     // Query file type: FASTA/FASTQ/HDF*/PBBAM,
     // Note that mixed query file types is not allowed.
@@ -63,7 +62,7 @@ public:
     string indexFileName;
     string anchorFileName;
     string clusterFileName;
-    VectorIndex nBest;
+    int nBest;
     int printWindow;
     int doCondense;
     int do4BitComp;
@@ -139,10 +138,12 @@ public:
     //float averageMismatchScore;
     bool mapSubreadsSeparately;
     bool concordant;
+    bool refineConcordantAlignments;
     int  flankSize;
     bool useRegionTable;
     bool useHQRegionTable;
     bool printUnaligned;
+    bool noPrintUnalignedSeqs; // print unaligned reads names only.
     string unalignedFileName;
     string metricsFileName;
     string lcpBoundsFileName;
@@ -174,7 +175,7 @@ public:
     int   globalDeletionPrior;
     bool  outputByThread;
     int   recurseOver;
-    bool  forPicard;
+    bool  allowAdjacentIndels;
     bool  separateGaps;
     string scoreMatrixString;
     bool  printDotPlots;
@@ -221,7 +222,6 @@ public:
         match = 0;
         mismatch = 0;
         showAlign = 1;
-        refineAlign = 1;
         useScoreCutoff = false;
         maxScore = -200;
         argi = 1;
@@ -306,11 +306,13 @@ public:
         ccsFofnFileName = "";
         mapSubreadsSeparately=true;
         concordant=false;
+        refineConcordantAlignments=false;
         flankSize=40;
         useRegionTable = true;
         useHQRegionTable=true;
         printUnaligned = false;
         unalignedFileName = "";
+        noPrintUnalignedSeqs = false;
         globalChainType = 0;
         metricsFileName = "";
         fullMetricsFileName = "";
@@ -347,7 +349,7 @@ public:
         globalDeletionPrior = 13;
         outputByThread = false;
         recurseOver = 10000;
-        forPicard = false;
+        allowAdjacentIndels = false;
         separateGaps = false;
         scoreMatrixString = "";
         printDotPlots = false;
@@ -410,14 +412,38 @@ public:
             }
         }
 
-#ifdef __APPLE__
-        nProc = 1;
-        cerr << "WARNING, multi-threading is not yet supported on Apple iOS." << endl;
-#endif
+        // if unrolled(Polymerase) read mode, and extension is .bam, need to derive scraps file name 
+        // rules 
+        // 1. string.subreads.bam -> string.scraps.bam substitute subreads to scraps
+        // 2. string.bam ->  string.scraps.bam   insert .scraps before .bam
+        // TODO loop over query check for each
+        // not needed for xml since scraps specified explicetely
+        //
+        if (not mapSubreadsSeparately && (queryFileType == FileType::PBBAM) ) {
+            const string dsubdb = ".subreads.bam";
+            const string dbam = ".bam"; 
+            // loop over all subread files and fill the vector or scraps files
+            for (size_t i = 0; i < queryFileNames.size(); i++) {
+                scrapsFileNames.push_back(queryFileNames[i]); 
+                size_t dsubdb_pos = scrapsFileNames[i].find(dsubdb); // find .subreads.bam 
+                if (dsubdb_pos != std::string::npos) {  
+                    // TODO check that .subreads.bam is LAST occurence
+                    // replace subreads.bam with scraps.bam
+                    scrapsFileNames[i].replace(dsubdb_pos,dsubdb.length(),".scraps.bam");
+                }
+                else { 
+                    // insert scraps before .bam"
+                    // actually we can just replace last 4 characters
+                    // fix later
+                    size_t dbam_pos = scrapsFileNames[0].find(dbam); // find .bam  
+                    scrapsFileNames[i].replace(dbam_pos,dbam.length(),".scraps.bam");
+                }
+            }
+        }
 
         // -useQuality can not be used in combination with a fasta input
         if (!ignoreQualities) {
-            if (queryFileType == Fasta) {
+            if (queryFileType == FileType::Fasta) {
                 cout<<"ERROR, you can not use -useQuality option when any of the input reads files are in multi-fasta format."<<endl;
                 exit(1);
             }
@@ -455,6 +481,7 @@ public:
                 cout << "ERROR, unsupported concordantTemplate: " << concordantTemplate << endl;
                 exit(1);
             }
+            if (refineConcordantAlignments) {refineAlignments = true;}
         }
 
         if (sdpFilterType > 1) {
@@ -514,7 +541,7 @@ public:
         if (nouseDetailedSDPAlignment == false) {
             detailedSDPAlignment = true;
         }
-        if (anchorParameters.maxLCPLength != 0 and anchorParameters.maxLCPLength < anchorParameters.minMatchLength) {
+        if (anchorParameters.maxLCPLength != 0 and int(anchorParameters.maxLCPLength) < int(anchorParameters.minMatchLength)) {
             cerr << "ERROR: maxLCPLength is less than minLCPLength, which will result in no hits." << endl;
         }
         if (subsample < 1 and stride > 1) {
@@ -531,8 +558,8 @@ public:
             useRandomSeed = true;
         }
         if (printSAM) {
-            printFormat = SAM;
-            forPicard = true;
+            cerr << "ERROR: --sam is no longer supported, use --bam, then translate from .bam to .sam" << endl;
+            exit(1);
         }
         //
         // Parse the clipping.
@@ -560,7 +587,6 @@ public:
 #else
             cigarUseSeqMatch = true; // ALWAYS true for BAM
             printFormat = BAM;
-            forPicard = true;
             printSAM = false;
             samQVList.SetDefaultQV();
             printSAMQV = true;
@@ -568,9 +594,9 @@ public:
                 // Only support two clipping methods: soft or subread.
                 clipping = SAMOutput::subread;
             }
-            if (queryFileType != PBBAM and not enableHiddenPaths) {
+            if (queryFileType != FileType::PBBAM and queryFileType != FileType::PBDATASET and not enableHiddenPaths) {
                 // bax|fasta|fastq -> bam paths are turned off by default
-                cout << "ERROR, could not output alignments in BAM unless input reads are in PacBio BAM files." << endl;
+                cout << "ERROR, could not output alignments in BAM unless input reads are in PacBio BAM or DATASET files." << endl;
                 exit(1);
             }
             if (outFileName == "") {
@@ -630,10 +656,10 @@ public:
     }
 
     ReadType::ReadTypeEnum DetermineQueryReadType() {
-        if (useCcsOnly or queryFileType == HDFCCSONLY) {
+        if (useCcsOnly or queryFileType == FileType::HDFCCSONLY) {
             return ReadType::CCS;
         }
-        if (queryFileType == PBBAM) {
+        if (queryFileType == FileType::PBBAM) {
             // Read type in BAM may be CCS, SUBREAD, HQREGION or POLYMERASE.
             // Determine it later.
             return ReadType::UNKNOWN;
@@ -666,6 +692,3 @@ public:
         anchorParameters.maxAnchorsPerPosition = 10000;
     }
 };
-
-
-#endif
diff --git a/include/MappingSemaphores.h b/iblasr/MappingSemaphores.h
similarity index 75%
rename from include/MappingSemaphores.h
rename to iblasr/MappingSemaphores.h
index e47a657..b1e47ee 100644
--- a/include/MappingSemaphores.h
+++ b/iblasr/MappingSemaphores.h
@@ -1,5 +1,5 @@
-#ifndef ALIGNMENT_MAPPING_SEMAPHORE_H_
-#define ALIGNMENT_MAPPING_SEMAPHORE_H_
+#pragma once
+
 #include <vector>
 #include <pthread.h>
 #include <semaphore.h>
@@ -11,10 +11,7 @@ class MappingSemaphores {
         sem_t writer;
         sem_t unaligned;
         sem_t hitCluster;
-        MappingSemaphores& operator=(MappingSemaphores &rhs) {
-                return *this;
-        }
-
+        
         void InitializeAll() {
                 sem_init(&reader, 0, 1);
                 sem_init(&writer, 0, 1);
@@ -29,10 +26,6 @@ class MappingSemaphores {
         sem_t *writer;
         sem_t *unaligned;
         sem_t *hitCluster;
-        MappingSemaphores& operator=(MappingSemaphores &rhs) {
-                return *this;
-        }
-
         void InitializeAll() {
                 reader     = sem_open("/reader",     O_CREAT, 0644, 1);
                 writer     = sem_open("/writer",     O_CREAT, 0644, 1);
@@ -41,5 +34,3 @@ class MappingSemaphores {
         }
 };
 #endif
-
-#endif
diff --git a/include/ReadAlignments.hpp b/iblasr/ReadAlignments.hpp
similarity index 71%
rename from include/ReadAlignments.hpp
rename to iblasr/ReadAlignments.hpp
index a2d8291..d65287e 100644
--- a/include/ReadAlignments.hpp
+++ b/iblasr/ReadAlignments.hpp
@@ -1,48 +1,11 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
 // Author: Mark Chaisson
-
-#ifndef __BLASR_READ_ALIGNMENTS__
-#define __BLASR_READ_ALIGNMENTS__
+#pragma once
 
 #include <string>
 #include <iostream>
 #include <vector>
-#include "SMRTSequence.hpp"
-#include "datastructures/alignment/AlignmentCandidate.hpp"
+#include <SMRTSequence.hpp>
+#include <datastructures/alignment/AlignmentCandidate.hpp>
 
 using namespace std;
 
@@ -195,5 +158,3 @@ inline void ReadAlignments::Print(ostream &out) {
 inline ReadAlignments::~ReadAlignments() {
     read.Free();
 }
-
-#endif
diff --git a/include/RegisterBlasrOptions.h b/iblasr/RegisterBlasrOptions.h
similarity index 62%
rename from include/RegisterBlasrOptions.h
rename to iblasr/RegisterBlasrOptions.h
index 12ec7db..0425db5 100644
--- a/include/RegisterBlasrOptions.h
+++ b/iblasr/RegisterBlasrOptions.h
@@ -1,3 +1,4 @@
+#pragma once
 /*
  * ============================================================================
  *
@@ -16,113 +17,113 @@
  * ============================================================================
  */
 
-#include "libconfig.h"
-#include "CommandLineParser.hpp"
+#include <sstream>
+#include <libconfig.h>
+#include <CommandLineParser.hpp>
+
 #include "MappingParameters.h"
 #include "RegisterFilterOptions.h"
-#include <sstream>
 using namespace std;
 
 void RegisterBlasrOptions(CommandLineParser & clp, MappingParameters & params) {
     int  trashbinInt;
     float trashbinFloat;
     bool trashbinBool;
-    clp.RegisterStringOption("sa", &params.suffixArrayFileName, "");
-    clp.RegisterStringOption("ctab", &params.countTableName, "" );
-    clp.RegisterStringOption("regionTable", &params.regionTableFileName, "");
-    clp.RegisterStringOption("ccsFofn", &params.ccsFofnFileName, "");
-    clp.RegisterIntOption("bestn", (int*) &params.nBest, "", CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("limsAlign", &params.limsAlign, "", CommandLineParser::PositiveInteger);
-    clp.RegisterFlagOption("printOnlyBest", &params.printOnlyBest, "");
-    clp.RegisterFlagOption("outputByThread", &params.outputByThread, "");
-    clp.RegisterFlagOption("rbao", &params.refineBetweenAnchorsOnly, "");
-    clp.RegisterFlagOption("allowAdjacentIndels", &params.forPicard, "");
-    clp.RegisterFlagOption("onegap", &params.separateGaps, "");
-    clp.RegisterFlagOption("allowAdjacentIndels", &params.forPicard, "");
-    clp.RegisterFlagOption("placeRepeatsRandomly", &params.placeRandomly, "");
-    clp.RegisterIntOption("randomSeed", &params.randomSeed, "", CommandLineParser::Integer);
-    clp.RegisterFlagOption("extend", &params.extendAlignments, "");
-    clp.RegisterIntOption("branchExpand", &params.anchorParameters.branchExpand, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("maxExtendDropoff", &params.maxExtendDropoff, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterFlagOption("nucmer", &params.emulateNucmer, "");
-    clp.RegisterIntOption("maxExpand", &params.maxExpand, "", CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("minExpand", &params.minExpand, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterStringOption("seqdb",  &params.seqDBName, "");
-    clp.RegisterStringOption("anchors",  &params.anchorFileName, "");
-    clp.RegisterStringOption("clusters", &params.clusterFileName, "");
-    clp.RegisterFlagOption("samplePaths", (bool*) &params.samplePaths, "");
-    clp.RegisterFlagOption("noStoreMapQV", &params.storeMapQV, "");
-    clp.RegisterFlagOption("nowarp", (bool*) &params.nowarp, "");
-    clp.RegisterFlagOption("noRefineAlign", (bool*) &params.refineAlign, "");
-    clp.RegisterFlagOption("guidedAlign", (bool*)&params.useGuidedAlign, "");
-    clp.RegisterFlagOption("useGuidedAlign", (bool*)&trashbinBool, "");
-    clp.RegisterFlagOption("noUseGuidedAlign", (bool*)&params.useGuidedAlign, "");
-    clp.RegisterFlagOption("header", (bool*)&params.printHeader, "");
-    clp.RegisterIntOption("bandSize", &params.bandSize, "", CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("extendBandSize", &params.extendBandSize, "", CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("guidedAlignBandSize", &params.guidedAlignBandSize, "", CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("maxAnchorsPerPosition", &params.anchorParameters.maxAnchorsPerPosition, "", CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("stopMappingOnceUnique", (int*) &params.anchorParameters.stopMappingOnceUnique, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterStringOption("out", &params.outFileName, "");
-    clp.RegisterIntOption("match", &params.match, "", CommandLineParser::Integer);
-    clp.RegisterIntOption("mismatch", &params.mismatch, "", CommandLineParser::Integer);
-    clp.RegisterIntOption("minMatch", &params.minMatchLength, "", CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("maxMatch", &params.anchorParameters.maxLCPLength, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("maxLCPLength", &params.anchorParameters.maxLCPLength, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("indel", &params.indel, "", CommandLineParser::Integer);
-    clp.RegisterIntOption("insertion", &params.insertion, "", CommandLineParser::Integer);
-    clp.RegisterIntOption("deletion", &params.deletion, "", CommandLineParser::Integer);
-    clp.RegisterIntOption("idsIndel", &params.idsIndel, "", CommandLineParser::Integer);
-    clp.RegisterIntOption("sdpindel", &params.sdpIndel, "", CommandLineParser::Integer);
-    clp.RegisterIntOption("sdpIns", &params.sdpIns, "", CommandLineParser::Integer);
-    clp.RegisterIntOption("sdpDel", &params.sdpDel, "", CommandLineParser::Integer);
-    clp.RegisterFloatOption("indelRate", &params.indelRate, "", CommandLineParser::NonNegativeFloat);
-    clp.RegisterFloatOption("minRatio", &params.minRatio, "", CommandLineParser::NonNegativeFloat);
-    clp.RegisterFloatOption("sdpbypass", &params.sdpBypassThreshold, "", CommandLineParser::NonNegativeFloat);
-    clp.RegisterFloatOption("minFrac", &trashbinFloat, "", CommandLineParser::NonNegativeFloat);
-    clp.RegisterIntOption("maxScore", &params.maxScore, "", CommandLineParser::Integer);
-    clp.RegisterStringOption("bwt", &params.bwtFileName, "");
+    clp.RegisterStringOption("-sa", &params.suffixArrayFileName, "");
+    clp.RegisterStringOption("-ctab", &params.countTableName, "" );
+    clp.RegisterStringOption("-regionTable", &params.regionTableFileName, "");
+    clp.RegisterStringOption("-ccsFofn", &params.ccsFofnFileName, "");
+    clp.RegisterIntOption("-bestn", (int*) &params.nBest, "", CommandLineParser::PositiveInteger);
+    clp.RegisterIntOption("-limsAlign", &params.limsAlign, "", CommandLineParser::PositiveInteger);
+    clp.RegisterFlagOption("-printOnlyBest", &params.printOnlyBest, "");
+    clp.RegisterFlagOption("-outputByThread", &params.outputByThread, "");
+    clp.RegisterFlagOption("-rbao", &params.refineBetweenAnchorsOnly, "");
+    clp.RegisterFlagOption("-onegap", &params.separateGaps, "");
+    clp.RegisterFlagOption("-allowAdjacentIndels", &params.allowAdjacentIndels, "", false);
+    clp.RegisterFlagOption("-placeRepeatsRandomly", &params.placeRandomly, "");
+    clp.RegisterIntOption("-randomSeed", &params.randomSeed, "", CommandLineParser::Integer);
+    clp.RegisterFlagOption("-extend", &params.extendAlignments, "");
+    clp.RegisterIntOption("-branchExpand", &params.anchorParameters.branchExpand, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-maxExtendDropoff", &params.maxExtendDropoff, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterFlagOption("-nucmer", &params.emulateNucmer, "");
+    clp.RegisterIntOption("-maxExpand", &params.maxExpand, "", CommandLineParser::PositiveInteger);
+    clp.RegisterIntOption("-minExpand", &params.minExpand, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterStringOption("-seqdb",  &params.seqDBName, "");
+    clp.RegisterStringOption("-anchors",  &params.anchorFileName, "");
+    clp.RegisterStringOption("-clusters", &params.clusterFileName, "");
+    clp.RegisterFlagOption("-samplePaths", (bool*) &params.samplePaths, "");
+    clp.RegisterFlagOption("-noStoreMapQV", &params.storeMapQV, "");
+    clp.RegisterFlagOption("-nowarp", (bool*) &params.nowarp, "");
+    clp.RegisterFlagOption("-guidedAlign", (bool*)&params.useGuidedAlign, "");
+    clp.RegisterFlagOption("-useGuidedAlign", (bool*)&trashbinBool, "");
+    clp.RegisterFlagOption("-noUseGuidedAlign", (bool*)&params.useGuidedAlign, "");
+    clp.RegisterFlagOption("-header", (bool*)&params.printHeader, "");
+    clp.RegisterIntOption("-bandSize", &params.bandSize, "", CommandLineParser::PositiveInteger);
+    clp.RegisterIntOption("-extendBandSize", &params.extendBandSize, "", CommandLineParser::PositiveInteger);
+    clp.RegisterIntOption("-guidedAlignBandSize", &params.guidedAlignBandSize, "", CommandLineParser::PositiveInteger);
+    clp.RegisterIntOption("-maxAnchorsPerPosition", (int*) &params.anchorParameters.maxAnchorsPerPosition, "", CommandLineParser::PositiveInteger);
+    clp.RegisterIntOption("-stopMappingOnceUnique", (int*) &params.anchorParameters.stopMappingOnceUnique, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterStringOption("-out", &params.outFileName, "");
+    clp.RegisterIntOption("-match", &params.match, "", CommandLineParser::Integer);
+    clp.RegisterIntOption("-mismatch", &params.mismatch, "", CommandLineParser::Integer);
+    clp.RegisterIntOption("-minMatch", &params.minMatchLength, "", CommandLineParser::PositiveInteger);
+    clp.RegisterIntOption("-maxMatch", &params.anchorParameters.maxLCPLength, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-maxLCPLength", &params.anchorParameters.maxLCPLength, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-indel", &params.indel, "", CommandLineParser::Integer);
+    clp.RegisterIntOption("-insertion", &params.insertion, "", CommandLineParser::Integer);
+    clp.RegisterIntOption("-deletion", &params.deletion, "", CommandLineParser::Integer);
+    clp.RegisterIntOption("-idsIndel", &params.idsIndel, "", CommandLineParser::Integer);
+    clp.RegisterIntOption("-sdpindel", &params.sdpIndel, "", CommandLineParser::Integer);
+    clp.RegisterIntOption("-sdpIns", &params.sdpIns, "", CommandLineParser::Integer);
+    clp.RegisterIntOption("-sdpDel", &params.sdpDel, "", CommandLineParser::Integer);
+    clp.RegisterFloatOption("-indelRate", &params.indelRate, "", CommandLineParser::NonNegativeFloat);
+    clp.RegisterFloatOption("-minRatio", &params.minRatio, "", CommandLineParser::NonNegativeFloat);
+    clp.RegisterFloatOption("-sdpbypass", &params.sdpBypassThreshold, "", CommandLineParser::NonNegativeFloat);
+    clp.RegisterFloatOption("-minFrac", &trashbinFloat, "", CommandLineParser::NonNegativeFloat);
+    clp.RegisterIntOption("-maxScore", &params.maxScore, "", CommandLineParser::Integer);
+    clp.RegisterStringOption("-bwt", &params.bwtFileName, "");
     clp.RegisterIntOption("m", &params.printFormat, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterFlagOption("sam", &params.printSAM, "");
+    clp.RegisterFlagOption("-sam", &params.printSAM, "");
 #ifdef USE_PBBAM
-    clp.RegisterFlagOption("bam", &params.printBAM, "");
+    clp.RegisterFlagOption("-bam", &params.printBAM, "");
 #endif
-    clp.RegisterStringOption("clipping", &params.clippingString, "");
-    clp.RegisterIntOption("sdpTupleSize", &params.sdpTupleSize, "", CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("pvaltype", &params.pValueType, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("start", &params.startRead, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("stride", &params.stride, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterFloatOption("subsample", &params.subsample, "", CommandLineParser::PositiveFloat);
-    clp.RegisterIntOption("nproc", &params.nProc, "", CommandLineParser::PositiveInteger);
-    clp.RegisterFlagOption("sortRefinedAlignments",(bool*) &params.sortRefinedAlignments, "");
-    clp.RegisterIntOption("quallc", &params.qualityLowerCaseThreshold, "", CommandLineParser::Integer);
+    clp.RegisterStringOption("-clipping", &params.clippingString, "");
+    clp.RegisterIntOption("-sdpTupleSize", &params.sdpTupleSize, "", CommandLineParser::PositiveInteger);
+    clp.RegisterIntOption("-pvaltype", &params.pValueType, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-start", &params.startRead, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-stride", &params.stride, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterFloatOption("-subsample", &params.subsample, "", CommandLineParser::PositiveFloat);
+    clp.RegisterIntOption("-nproc", &params.nProc, "", CommandLineParser::PositiveInteger);
+    clp.RegisterFlagOption("-sortRefinedAlignments",(bool*) &params.sortRefinedAlignments, "");
+    clp.RegisterIntOption("-quallc", &params.qualityLowerCaseThreshold, "", CommandLineParser::Integer);
     clp.RegisterFlagOption("v", (bool*) &params.verbosity, "");
     clp.RegisterIntOption("V", &params.verbosity, "Specify a level of verbosity.", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("contextAlignLength", &params.anchorParameters.contextAlignLength, "", CommandLineParser::PositiveInteger);
-    clp.RegisterFlagOption("skipLookupTable", &params.anchorParameters.useLookupTable, "");
-    clp.RegisterStringOption("metrics", &params.metricsFileName, "");
-    clp.RegisterStringOption("lcpBounds", &params.lcpBoundsFileName, "");
-    clp.RegisterStringOption("fullMetrics", &params.fullMetricsFileName, "");
-    clp.RegisterIntOption("nbranch", &params.anchorParameters.numBranches, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterFlagOption("divideByAdapter", &params.byAdapter, "");
-    clp.RegisterFlagOption("useQuality", &params.ignoreQualities, "");
-    clp.RegisterFlagOption("noFrontAlign", &params.extendFrontAlignment, "");
-    clp.RegisterIntOption("minReadLength", &params.minReadLength, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("maxReadLength", &params.maxReadLength, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("minSubreadLength", &params.minSubreadLength, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("minRawSubreadScore", &params.minRawSubreadScore, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("minAvgQual", &params.minAvgQual, "", CommandLineParser::Integer);
-    clp.RegisterFlagOption("advanceHalf", &params.advanceHalf, "");
-    clp.RegisterIntOption("advanceExactMatches", &params.anchorParameters.advanceExactMatches, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterFlagOption("useccs", &params.useCcs, "");
-    clp.RegisterFlagOption("useccsdenovo", &params.useCcsOnly, "");
-    clp.RegisterFlagOption("useccsall", &params.useAllSubreadsInCcs, "");
-    clp.RegisterFlagOption("extendDenovoCCSSubreads", &params.extendDenovoCCSSubreads, "");
-    clp.RegisterFlagOption("noRefineAlignments", &params.refineAlignments, "");
-    clp.RegisterIntOption("nCandidates", &params.nCandidates, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterFlagOption("useTemp", (bool*) &params.tempDirectory, "");
-    clp.RegisterFlagOption("noSplitSubreads", &params.mapSubreadsSeparately, "");
-    clp.RegisterFlagOption("concordant", &params.concordant, "");
+    clp.RegisterIntOption("-contextAlignLength", &params.anchorParameters.contextAlignLength, "", CommandLineParser::PositiveInteger);
+    clp.RegisterFlagOption("-skipLookupTable", &params.anchorParameters.useLookupTable, "");
+    clp.RegisterStringOption("-metrics", &params.metricsFileName, "");
+    clp.RegisterStringOption("-lcpBounds", &params.lcpBoundsFileName, "");
+    clp.RegisterStringOption("-fullMetrics", &params.fullMetricsFileName, "");
+    clp.RegisterIntOption("-nbranch", &params.anchorParameters.numBranches, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterFlagOption("-divideByAdapter", &params.byAdapter, "");
+    clp.RegisterFlagOption("-useQuality", &params.ignoreQualities, "");
+    clp.RegisterFlagOption("-noFrontAlign", &params.extendFrontAlignment, "");
+    clp.RegisterIntOption("-minReadLength", &params.minReadLength, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-maxReadLength", &params.maxReadLength, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-minSubreadLength", &params.minSubreadLength, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-minRawSubreadScore", &params.minRawSubreadScore, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-minAvgQual", &params.minAvgQual, "", CommandLineParser::Integer);
+    clp.RegisterFlagOption("-advanceHalf", &params.advanceHalf, "");
+    clp.RegisterIntOption("-advanceExactMatches", &params.anchorParameters.advanceExactMatches, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterFlagOption("-useccs", &params.useCcs, "");
+    clp.RegisterFlagOption("-useccsdenovo", &params.useCcsOnly, "");
+    clp.RegisterFlagOption("-useccsall", &params.useAllSubreadsInCcs, "");
+    clp.RegisterFlagOption("-extendDenovoCCSSubreads", &params.extendDenovoCCSSubreads, "");
+    clp.RegisterFlagOption("-noRefineAlignments", &params.refineAlignments, "");
+    clp.RegisterFlagOption("-refineConcordantAlignments", &params.refineConcordantAlignments, "");
+    clp.RegisterIntOption("-nCandidates", &params.nCandidates, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterFlagOption("-useTemp", (bool*) &params.tempDirectory, "");
+    clp.RegisterFlagOption("-noSplitSubreads", &params.mapSubreadsSeparately, "");
+    clp.RegisterFlagOption("-concordant", &params.concordant, "");
     // When -concordant is turned on, blasr first selects a subread (e.g., the median length full-pass subread)
     // of a zmw as template, maps the template subread to a reference, then infers directions of all other subreads
     // of the same zmw based on direction of the template, and finally maps all other subreads to the same
@@ -130,46 +131,48 @@ void RegisterBlasrOptions(CommandLineParser & clp, MappingParameters & params) {
     // all other subreads both forwardly and backwardly, without infering their directions. This is a hidden
     // diagnostic option only useful for analyzing movies which have lots of un-identified or missed adapters such
     // that directions of subreads can not be inferred accurately.
-    clp.RegisterFlagOption("concordantAlignBothDirections", &params.concordantAlignBothDirections, "");
-    clp.RegisterIntOption("flankSize", &params.flankSize, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterStringOption("titleTable", &params.titleTableName, "");
-    clp.RegisterFlagOption("useSensitiveSearch", &params.doSensitiveSearch, "");
-    clp.RegisterFlagOption("ignoreRegions", &params.useRegionTable, "");
-    clp.RegisterFlagOption("ignoreHQRegions", &params.useHQRegionTable, "");
-    clp.RegisterFlagOption("computeAlignProbability", &params.computeAlignProbability, "");
-    clp.RegisterStringOption("unaligned", &params.unalignedFileName, "");
-    clp.RegisterFlagOption("global", &params.doGlobalAlignment, "");
-    clp.RegisterIntOption("globalChainType", &params.globalChainType, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterFlagOption("noPrintSubreadTitle", (bool*) &params.printSubreadTitle, "");
-    clp.RegisterIntOption("saLookupTableLength", &params.lookupTableLength, "", CommandLineParser::PositiveInteger);
-    clp.RegisterFlagOption("useDetailedSDP", &params.detailedSDPAlignment, "");
-    clp.RegisterFlagOption("nouseDetailedSDP", &trashbinBool, "");
-    clp.RegisterIntOption("sdpFilterType", &params.sdpFilterType, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("scoreType", &params.scoreType, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterFlagOption("-concordantAlignBothDirections", &params.concordantAlignBothDirections, "");
+    clp.RegisterIntOption("-flankSize", &params.flankSize, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterStringOption("-titleTable", &params.titleTableName, "");
+    clp.RegisterFlagOption("-useSensitiveSearch", &params.doSensitiveSearch, "");
+    clp.RegisterFlagOption("-ignoreRegions", &params.useRegionTable, "");
+    clp.RegisterFlagOption("-ignoreHQRegions", &params.useHQRegionTable, "");
+    clp.RegisterFlagOption("-computeAlignProbability", &params.computeAlignProbability, "");
+    clp.RegisterStringOption("-unaligned", &params.unalignedFileName, "");
+    // Print unaligned reads names only
+    clp.RegisterFlagOption("-noPrintUnalignedSeqs", &params.noPrintUnalignedSeqs, "");
+    clp.RegisterFlagOption("-global", &params.doGlobalAlignment, "");
+    clp.RegisterIntOption("-globalChainType", &params.globalChainType, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterFlagOption("-noPrintSubreadTitle", (bool*) &params.printSubreadTitle, "");
+    clp.RegisterIntOption("-saLookupTableLength", &params.lookupTableLength, "", CommandLineParser::PositiveInteger);
+    clp.RegisterFlagOption("-useDetailedSDP", &params.detailedSDPAlignment, "");
+    clp.RegisterFlagOption("-nouseDetailedSDP", &trashbinBool, "");
+    clp.RegisterIntOption("-sdpFilterType", &params.sdpFilterType, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-scoreType", &params.scoreType, "", CommandLineParser::NonNegativeInteger);
     clp.RegisterFlagOption("h", &params.printVerboseHelp, "");
-    clp.RegisterFlagOption("help", &params.printDiscussion, "");
-    clp.RegisterFloatOption("accuracyPrior",    &params.readAccuracyPrior, "", CommandLineParser::NonNegativeFloat);
+    clp.RegisterFlagOption("-help", &params.printDiscussion, "");
+    clp.RegisterFloatOption("-accuracyPrior",    &params.readAccuracyPrior, "", CommandLineParser::NonNegativeFloat);
     // holeNumberRangesStr is a string of comma-delimited hole number ranges, such as '1,2,3,10-15'.
     // Blasr only analyzes reads whose hole numbers are in the specified hole number ranges.
-    clp.RegisterStringOption("holeNumbers", &params.holeNumberRangesStr, "");
-    clp.RegisterIntOption("substitutionPrior",  &params.substitutionPrior, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("deletionPrior",  &params.globalDeletionPrior, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("recurseOver", &params.recurseOver, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterStringOption("scoreMatrix", &params.scoreMatrixString, "");
-    clp.RegisterFlagOption("printDotPlots", &params.printDotPlots, "");
-    clp.RegisterFlagOption("preserveReadTitle", &params.preserveReadTitle,"");
-    clp.RegisterFlagOption("forwardOnly", &params.forwardOnly,"");
-    clp.RegisterFlagOption("affineAlign", &params.affineAlign, "");
-    clp.RegisterIntOption("affineOpen", &params.affineOpen, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterIntOption("affineExtend", &params.affineExtend, "", CommandLineParser::NonNegativeInteger);
-    clp.RegisterFlagOption("scaleMapQVByNClusters", &params.scaleMapQVByNumSignificantClusters, "", false);
-    clp.RegisterFlagOption("printSAMQV", &params.printSAMQV, "", false);
-    clp.RegisterFlagOption("cigarUseSeqMatch", &params.cigarUseSeqMatch, "");
-    clp.RegisterStringListOption("samQV", &params.samQV, "");
-    clp.RegisterFlagOption("fastMaxInterval", &params.fastMaxInterval, "", false);
-    clp.RegisterFlagOption("aggressiveIntervalCut", &params.aggressiveIntervalCut, "", false);
-    clp.RegisterFlagOption("fastSDP", &params.fastSDP, "", false);
-    clp.RegisterStringOption("concordantTemplate", &params.concordantTemplate, "typicalsubread");
+    clp.RegisterStringOption("-holeNumbers", &params.holeNumberRangesStr, "");
+    clp.RegisterIntOption("-substitutionPrior",  &params.substitutionPrior, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-deletionPrior",  &params.globalDeletionPrior, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-recurseOver", &params.recurseOver, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterStringOption("-scoreMatrix", &params.scoreMatrixString, "");
+    clp.RegisterFlagOption("-printDotPlots", &params.printDotPlots, "");
+    clp.RegisterFlagOption("-preserveReadTitle", &params.preserveReadTitle,"");
+    clp.RegisterFlagOption("-forwardOnly", &params.forwardOnly,"");
+    clp.RegisterFlagOption("-affineAlign", &params.affineAlign, "");
+    clp.RegisterIntOption("-affineOpen", &params.affineOpen, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterIntOption("-affineExtend", &params.affineExtend, "", CommandLineParser::NonNegativeInteger);
+    clp.RegisterFlagOption("-scaleMapQVByNClusters", &params.scaleMapQVByNumSignificantClusters, "", false);
+    clp.RegisterFlagOption("-printSAMQV", &params.printSAMQV, "", false);
+    clp.RegisterFlagOption("-cigarUseSeqMatch", &params.cigarUseSeqMatch, "");
+    clp.RegisterStringListOption("-samQV", &params.samQV, "");
+    clp.RegisterFlagOption("-fastMaxInterval", &params.fastMaxInterval, "", false);
+    clp.RegisterFlagOption("-aggressiveIntervalCut", &params.aggressiveIntervalCut, "", false);
+    clp.RegisterFlagOption("-fastSDP", &params.fastSDP, "", false);
+    clp.RegisterStringOption("-concordantTemplate", &params.concordantTemplate, "typicalsubread");
 
     RegisterFilterOptions(clp, params.minAlnLength, params.minPctSimilarity, params.minPctAccuracy,
                           params.hitPolicyStr, trashbinBool=true, trashbinInt, params.maxScore);
@@ -189,16 +192,16 @@ const string BlasrHelp(MappingParameters & params) {
              << "   reads.fasta is a multi-fasta file of reads.  While any fasta file is valid input, " << endl
              << "   reads.bax.h5|reads.plx.h5 is the old DEPRECATED output format of SMRT reads." << endl
              << "   input.fofn  File of file names accepted." << endl << endl
-             << "   -sa suffixArrayFile"<< endl
+             << "   --sa suffixArrayFile"<< endl
              << "               Use the suffix array 'sa' for detecting matches" << endl
              << "               between the reads and the reference.  The suffix" << endl
              << "               array has been prepared by the sawriter program." << endl << endl
-             << "   -ctab tab "<<endl
+             << "   --ctab tab "<<endl
              << "               A table of tuple counts used to estimate match significance.  This is " << endl
              << "               by the program 'printTupleCountTable'.  While it is quick to generate on " << endl
              << "               the fly, if there are many invocations of blasr, it is useful to"<<endl
              << "               precompute the ctab." <<endl << endl
-             << "   -regionTable table (DEPRECATED)" << endl
+             << "   --regionTable table (DEPRECATED)" << endl
              << "               Read in a read-region table in HDF format for masking portions of reads." << endl
              << "               This may be a single table if there is just one input file, " << endl
              << "               or a fofn.  When a region table is specified, any region table inside " << endl
@@ -215,53 +218,54 @@ const string BlasrHelp(MappingParameters & params) {
              << "               contain information for the location of the high and low quality regions of"<<endl
              << "               reads.  Reads produced by spurrious reads from empty ZMWs have a high"<<endl
              << "               quality start coordinate equal to high quality end, making no usable read." <<endl
-             << "   -useccs   " << endl
+             << "   --useccs   " << endl
              << "               Align the circular consensus sequence (ccs), then report alignments" << endl
              << "               of the ccs subreads to the window that the ccs was mapped to.  Only " << endl
              << "               alignments of the subreads are reported." << endl
-             << "   -useccsall"<<endl
+             << "   --useccsall"<<endl
              << "               Similar to -useccs, except all subreads are aligned, rather than just" << endl
              << "               the subreads used to call the ccs.  This will include reads that only"<<endl
              << "               cover part of the template." << endl
-             << "   -useccsdenovo" << endl
+             << "   --useccsdenovo" << endl
              << "               Align the circular consensus, and report only the alignment of the ccs"<<endl
              << "               sequence." << endl
-             << "   -noSplitSubreads (false)" <<endl
+             << "   --noSplitSubreads (false)" <<endl
              << "               Do not split subreads at adapters.  This is typically only " << endl
              << "               useful when the genome in an unrolled version of a known template, and " << endl
              << "               contains template-adapter-reverse_template sequence." << endl
-             << "   -ignoreRegions(false)" << endl
+             << "   --ignoreRegions(false)" << endl
              << "               Ignore any information in the region table." << endl
-             << "   -ignoreHQRegions (false)Ignore any hq regions in the region table." << endl
+             << "   --ignoreHQRegions (false)Ignore any hq regions in the region table." << endl
              << endl
              << " Alignments To Report." << endl
-             << "   -bestn n (10)" <<endl
+             << "   --bestn n (10)" <<endl
              << "               Report the top 'n' alignments." << endl
-             << "   -hitPolicy" << endl
+             << "   --hitPolicy" << endl
              << "               " << params.hitPolicy.Help(string(15, ' ')) << endl
-             << "   -placeRepeatsRandomly (false)" << endl
-             << "               DEPRECATED! If true, equivalent to -hitPolicy randombest." << endl
-             << "   -randomSeed (0)" << endl
+             << "   --placeRepeatsRandomly (false)" << endl
+             << "               DEPRECATED! If true, equivalent to --hitPolicy randombest." << endl
+             << "   --randomSeed (0)" << endl
              << "               Seed for random number generator. By default (0), use current time as seed. " << endl
-             << "   -noSortRefinedAlignments (false) " << endl
+             << "   --noSortRefinedAlignments (false) " << endl
              << "               Once candidate alignments are generated and scored via sparse dynamic "<< endl
              << "               programming, they are rescored using local alignment that accounts " << endl
              << "               for different error profiles." <<endl
              << "               Resorting based on the local alignment may change the order the hits are returned." << endl
-             << "   -allowAdjacentIndels " << endl
+             << "   --allowAdjacentIndels " << endl
              << "               When specified, adjacent insertion or deletions are allowed. Otherwise, adjacent " << endl
              << "               insertion and deletions are merged into one operation.  Using quality values " << endl
              << "               to guide pairwise alignments may dictate that the higher probability alignment "<<endl
              << "               contains adjacent insertions or deletions.  Current tools such as GATK do not permit" << endl
              << "               this and so they are not reported by default." << endl << endl
              << " Output Formats and Files" << endl
-             << "   -out out (terminal)  " << endl
+             << "   --out out (terminal)  " << endl
              << "               Write output to 'out'." << endl
 #ifdef USE_PBBAM
-             << "   -bam        Write output in PacBio BAM format. This is the preferred output format." << endl
+             << "   --bam       Write output in PacBio BAM format. This is the preferred output format." << endl
              << "               Input query reads must be in PacBio BAM format." << endl
 #endif
-             << "   -sam        Write output in SAM format." << endl
+             << "   --sam       Write output in SAM format. Starting from version 5.2 is no longer supported" << endl
+             << "               Use --bam, then translate from .bam to .sam" << endl
              << "   -m t           " << endl
              << "               If not printing SAM, modify the output of the alignment." << endl
              << "                t=" << StickPrint <<   " Print blast like output with |'s connecting matched nucleotides." << endl
@@ -270,82 +274,80 @@ const string BlasrHelp(MappingParameters & params) {
              << "                  " << Vulgar <<       " Print in vulgar format (DEPRECATED)." << endl
              << "                  " << Interval <<     " Print a longer tabular version of the alignment." << endl
              << "                  " << CompareSequencesParsable  << " Print in a machine-parsable format that is read by compareSequences.py." << endl
-             << "   -header" <<endl
+             << "   --header" <<endl
              << "               Print a header as the first line of the output file describing the contents of each column."<<endl
-             << "   -titleTable tab (NULL) " << endl
+             << "   --titleTable tab (NULL) " << endl
              << "               Construct a table of reference sequence titles.  The reference sequences are " << endl
              << "               enumerated by row, 0,1,...  The reference index is printed in alignment results" << endl
              << "               rather than the full reference name.  This makes output concise, particularly when" << endl
              << "               very verbose titles exist in reference names."<< endl
-             << "   -unaligned file" << endl
+             << "   --unaligned file" << endl
              << "               Output reads that are not aligned to 'file'" << endl
-             << "   -clipping [none|hard|subread|soft] (none)" << endl
+             << "   --noPrintUnalignedSeqs" << endl
+             << "               Must be used together with -unaligned, print unaligned read names only." << endl
+             << "   --clipping [none|hard|subread|soft] (none)" << endl
              << "               Use no/hard/subread/soft clipping, ONLY for SAM/BAM output."<< endl
-             << "   -printSAMQV (false)" << endl
+             << "   --printSAMQV (false)" << endl
              << "               Print quality values to SAM output." << endl
-             << "   -cigarUseSeqMatch (false)" << endl
+             << "   --cigarUseSeqMatch (false)" << endl
              << "               CIGAR strings in SAM/BAM output use '=' and 'X' to represent sequence match and mismatch instead of 'M'." << endl << endl
              << " Options for anchoring alignment regions. This will have the greatest effect on speed and sensitivity." << endl
-             << "   -minMatch m (12) " << endl
+             << "   --minMatch m (12) " << endl
              << "               Minimum seed length.  Higher minMatch will speed up alignment, " << endl
              << "               but decrease sensitivity." << endl
-//             << "   -maxExpand M (1)" << endl
+//             << "   --maxExpand M (1)" << endl
 //             << "               Perform no more than M iterations of searches through the suffix " << endl
 //             << "               array for matches. At each iteration, all matches of length LCPi-M" << endl
 //             << "               are found, where LCPi is the length of the longest common prefix " << endl
 //             << "               between the string at i and anywhere in the genome."<<endl
 //             << "               The number of matches grows as M increases, and can become very large with M > 3." << endl
-             << "   -maxMatch l (inf)" << endl
+             << "   --maxMatch l (inf)" << endl
              << "               Stop mapping a read to the genome when the lcp length reaches l.  " << endl
              << "               This is useful when the query is part of the reference, for example when " <<endl
              << "               constructing pairwise alignments for de novo assembly."<<endl
-             << "   -maxLCPLength l (inf)" << endl
+             << "   --maxLCPLength l (inf)" << endl
              << "               The same as -maxMatch." << endl
-             << "   -maxAnchorsPerPosition m (10000) " << endl
+             << "   --maxAnchorsPerPosition m (10000) " << endl
              << "               Do not add anchors from a position if it matches to more than 'm' locations in the target." << endl
-//             << "   -advanceHalf (false) " << endl
+//             << "   --advanceHalf (false) " << endl
 //             << "               A trick for speeding up alignments at the cost of sensitivity.  If " << endl
 //             << "               a cluster of anchors of size n, (a1,...,an) is found, normally anchors " << endl
 //             << "               (a2,...an) of size n-1 is also clustered to make sure a1 did not decrease the " << endl
 //             << "               cluster score.  When advanceHalf is specified, clustering begins at a_(n/2)."<<endl<< endl
-             << "   -advanceExactMatches E (0)" << endl
+             << "   --advanceExactMatches E (0)" << endl
              << "               Another trick for speeding up alignments with match - E fewer anchors.  Rather than" << endl
              << "               finding anchors between the read and the genome at every position in the read, " <<endl
              << "               when an anchor is found at position i in a read of length L, the next position " << endl
              << "               in a read to find an anchor is at i+L-E." << endl
              << "               Use this when alignining already assembled contigs." << endl
-             << "   -nCandidates n (10)" << endl
+             << "   --nCandidates n (10)" << endl
              << "               Keep up to 'n' candidates for the best alignment.  A large value of n will slow mapping" << endl
              << "               because the slower dynamic programming steps are applied to more clusters of anchors" <<endl
              << "               which can be a rate limiting step when reads are very long."<<endl
-             << "   -concordant(false)" << endl
+             << "   --concordant(false)" << endl
              << "               Map all subreads of a zmw (hole) to where the longest full pass subread of the zmw " << endl
              << "               aligned to. This requires to use the region table and hq regions." << endl
              << "               This option only works when reads are in base or pulse h5 format." << endl
-             << "   -concordantTemplate(mediansubread)" << endl
-             << "               Select a full pass subread of a zmw as template for concordant mapping." << endl
-             << "               longestsubread - use the longest full pass subread" << endl
-             << "               mediansubread  - use the median length full pass subread" << endl
-             << "               typicalsubread - use the second longest full pass subread if length of" << endl
-             << "                                the longest full pass subread is an outlier" << endl
-             << "   -fastMaxInterval(false)" << endl
+             << "   --fastMaxInterval(false)" << endl
              << "               Fast search maximum increasing intervals as alignment candidates. The search " << endl
              << "               is not as exhaustive as the default, but is much faster." << endl
-             << "   -aggressiveIntervalCut(false)" << endl
+             << "   --aggressiveIntervalCut(false)" << endl
              << "               Agreesively filter out non-promising alignment candidates, if there " << endl
              << "               exists at least one promising candidate. If this option is turned on, " << endl
              << "               Blasr is likely to ignore short alignments of ALU elements." << endl
-             << "   -fastSDP(false)" << endl
+             << "   --fastSDP(false)" << endl
              << "               Use a fast heuristic algorithm to speed up sparse dynamic programming." << endl
              << endl
              << "  Options for Refining Hits." << endl
-//             << "   -indelRate i (0.30)" << endl
+//             << "   --indelRate i (0.30)" << endl
 //             << "               The approximate maximum rate to allow drifting from the diagonal." <<endl << endl
-             << "   -sdpTupleSize K (11)" << endl
+             << "   --refineConcordantAlignments(false)" << endl
+             << "               Refine concordant alignments. It slightly increases alignment accuracy at cost of time." << endl
+             << "   --sdpTupleSize K (11)" << endl
              << "               Use matches of length K to speed dynamic programming alignments.  This controls" <<endl
              << "               accuracy of assigning gaps in pairwise alignments once a mapping has been found,"<<endl
              << "               rather than mapping sensitivity itself."<<endl
-             << "   -scoreMatrix \"score matrix string\" " << endl
+             << "   --scoreMatrix \"score matrix string\" " << endl
              << "               Specify an alternative score matrix for scoring fasta reads.  The matrix is " << endl
              << "               in the format " << endl
              << "                  ACGTN" << endl
@@ -356,12 +358,12 @@ const string BlasrHelp(MappingParameters & params) {
              << "                N uvwxy" << " . The values a...y should be input as a quoted space separated " << endl
              << "               string: \"a b c ... y\". Lower scores are better, so matches should be less " << endl
              << "               than mismatches e.g. a,g,m,s = -5 (match), mismatch = 6. " << endl
-             << "   -affineOpen value (10) " << endl
+             << "   --affineOpen value (10) " << endl
              << "               Set the penalty for opening an affine alignment." << endl
-             << "   -affineExtend a (0)" << endl
+             << "   --affineExtend a (0)" << endl
              << "               Change affine (extension) gap penalty. Lower value allows more gaps." << endl << endl
              << " Options for overlap/dynamic programming alignments and pairwise overlap for de novo assembly. " << endl
-             << "   -useQuality (false)" << endl
+             << "   --useQuality (false)" << endl
              << "               Use substitution/insertion/deletion/merge quality values to score gap and " << endl
              << "               mismatch penalties in pairwise alignments.  Because the insertion and deletion" << endl
              << "               rates are much higher than substitution, this will make many alignments " <<endl
@@ -370,47 +372,52 @@ const string BlasrHelp(MappingParameters & params) {
              << "               used when calling consensus using the Quiver method.  Furthermore, when " << endl
              << "               not using quality values to score alignments, there will be a lower consensus " << endl
              << "               accuracy in homolymer regions." << endl
-             << "   -affineAlign (false)" << endl
+             << "   --affineAlign (false)" << endl
              << "               Refine alignment using affine guided align." << endl << endl
              << " Options for filtering reads and alignments" << endl
-             << "   -minReadLength l(50)" << endl
+             << "   --minReadLength l(50)" << endl
              << "               Skip reads that have a full length less than l. Subreads may be shorter." << endl
-             << "   -minSubreadLength l(0)" << endl
+             << "   --minSubreadLength l(0)" << endl
              << "               Do not align subreads of length less than l." << endl
-             << "   -minRawSubreadScore m(0)" << endl
+             << "   --minRawSubreadScore m(0)" << endl
              << "               Do not align subreads whose quality score in region table is less than m (quality scores should be in range [0, 1000])." << endl
-             << "   -maxScore m(-200)" << endl //params.filterCriteria.scoreCutoff
+             << "   --maxScore m(-200)" << endl //params.filterCriteria.scoreCutoff
              << "               Maximum score to output (high is bad, negative good)." << endl
-             << "   -minAlnLength" << endl
+             << "   --minAlnLength" << endl
              << "               " << params.filterCriteria.MinAlnLengthHelp() << endl
-             << "   -minPctSimilarity" << endl
+             << "   --minPctSimilarity" << endl
              << "               " << params.filterCriteria.MinPctSimilarityHelp() << endl
-             << "   -minPctAccuracy" << endl
+             << "   --minPctAccuracy" << endl
              << "               " << params.filterCriteria.MinPctAccuracyHelp() << endl << endl
              << " Options for parallel alignment." << endl
-             << "   -nproc N (1)" << endl
+             << "   --nproc N (1)" << endl
              << "               Align using N processes.  All large data structures such as the suffix array and " << endl
              << "               tuple count table are shared."<<endl
-             << "   -start S (0)" << endl
+             << "   --start S (0)" << endl
              << "               Index of the first read to begin aligning. This is useful when multiple instances " << endl
              << "               are running on the same data, for example when on a multi-rack cluster."<<endl
-             << "   -stride S (1)" << endl
+             << "   --stride S (1)" << endl
              << "               Align one read every 'S' reads." << endl << endl
              << " Options for subsampling reads." << endl
-             << "   -subsample (0)" << endl
+             << "   --subsample (0)" << endl
              << "               Proportion of reads to randomly subsample (expressed as a decimal) and align." << endl
-             << "   -holeNumbers LIST " << endl
+             << "   --holeNumbers LIST " << endl
              << "               When specified, only align reads whose ZMW hole numbers are in LIST." << endl
              << "               LIST is a comma-delimited string of ranges, such as '1,2,3,10-13'." << endl
              << "               This option only works when reads are in bam, bax.h5 or plx.h5 format." << endl
              << endl
 //             << " Options for dynamic programming alignments. " << endl << endl
-//             << "   -ignoreQuality" << endl
+//             << "   --ignoreQuality" << endl
 //             << "                 Ignore quality values when computing alignments (they still may be used." << endl
 //             << "                 when mapping)." << endl << endl
 //             << " -v            Print some verbose information." << endl
 //             << " -V 2          Make verbosity more verbose.  Probably only useful for development." << endl
              << " -h            Print this help file." << endl << endl
+             << "In release v5.1 of BLASR, command-line options will use the " << endl
+             << "single dash/double dash convention: " << endl
+             << "Character options are preceded by a single dash. (Example: -v) " << endl
+             << "Word options are preceded by a double dash. (Example: --verbose) " << endl
+             << "Please modify your scripts accordingly when BLASR v5.1 is released. " << endl << endl
              << "To cite BLASR, please use: Chaisson M.J., and Tesler G., Mapping " << endl
              << "single molecule sequencing reads using Basic Local Alignment with " << endl
              << "Successive Refinement (BLASR): Theory and Application, BMC " << endl
@@ -425,7 +432,12 @@ const string BlasrConciseHelp(void) {
     ss << "blasr - a program to map reads to a genome" << endl
        << " usage: blasr reads genome " << endl
        << " Run with -h for a list of commands " << endl
-       << "          -help for verbose discussion of how to run blasr." << endl;
+       << "          -help for verbose discussion of how to run blasr." << endl << endl
+       << "In release v5.1 of BLASR, command-line options will use the " << endl
+       << "single dash/double dash convention: " << endl
+       << "Character options are preceded by a single dash. (Example: -v) " << endl
+       << "Word options are preceded by a double dash. (Example: --verbose) " << endl
+       << "Please modify your scripts accordingly when BLASR v5.1 is released. " << endl << endl;
     return ss.str();
 }
 
diff --git a/include/RegisterFilterOptions.h b/iblasr/RegisterFilterOptions.h
similarity index 56%
rename from include/RegisterFilterOptions.h
rename to iblasr/RegisterFilterOptions.h
index f12d302..ddd8d0f 100644
--- a/include/RegisterFilterOptions.h
+++ b/iblasr/RegisterFilterOptions.h
@@ -1,13 +1,14 @@
-#include "libconfig.h"
-#include "CommandLineParser.hpp"
-#include "datastructures/alignment/FilterCriteria.hpp"
-#include <sstream>
-using namespace std;
+#pragma once
+
+#include <libconfig.h>
+#include <CommandLineParser.hpp>
+#include <datastructures/alignment/FilterCriteria.hpp>
+#include <string>
 
 /// Register options for filtering alignments.
 void RegisterFilterOptions(CommandLineParser & clp, int & minAlnLength,
                            float & minPctSimilarity, float & minPctAccuracy,
-                           string & hitPolicyStr, bool & useScoreCutoff,
+                           std::string & hitPolicyStr, bool & useScoreCutoff,
                            int & scoreSignInt, int & scoreCutoff) {
     ScoreSign ss = static_cast<ScoreSign>(scoreSignInt);
     Score sc(static_cast<float>(scoreCutoff),  ss);
@@ -17,37 +18,37 @@ void RegisterFilterOptions(CommandLineParser & clp, int & minAlnLength,
 
     HitPolicy hp("randombest", ScoreSign::NEGATIVE);
 
-    clp.RegisterIntOption("minAlnLength", &minAlnLength,
+    clp.RegisterIntOption("-minAlnLength", &minAlnLength,
                           fc.MinAlnLengthHelp(),
                           CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("minAlignLength", &minAlnLength,
-                          "Alias of -minAlnLength",
+    clp.RegisterIntOption("-minAlignLength", &minAlnLength,
+                          "Alias of --minAlnLength",
                           CommandLineParser::PositiveInteger);
-    clp.RegisterIntOption("minLength", &minAlnLength,
-                          "Alias of -minAlnLength",
+    clp.RegisterIntOption("-minLength", &minAlnLength,
+                          "Alias of --minAlnLength",
                           CommandLineParser::PositiveInteger);
 
-    clp.RegisterFloatOption("minPctSimilarity", &minPctSimilarity,
+    clp.RegisterFloatOption("-minPctSimilarity", &minPctSimilarity,
                             fc.MinPctSimilarityHelp(),
                             CommandLineParser::PositiveFloat);
-    clp.RegisterFloatOption("minPctIdentity", &minPctSimilarity,
-                            "Alias of -minPctSimilarity",
+    clp.RegisterFloatOption("-minPctIdentity", &minPctSimilarity,
+                            "Alias of --minPctSimilarity",
                             CommandLineParser::PositiveFloat);
 
-    clp.RegisterFloatOption("minPctAccuracy", &minPctAccuracy,
+    clp.RegisterFloatOption("-minPctAccuracy", &minPctAccuracy,
                             fc.MinPctAccuracyHelp(),
                             CommandLineParser::PositiveFloat);
-    clp.RegisterFloatOption("minAccuracy", &minPctAccuracy,
-                            "Alias of -minPctAccuracy",
+    clp.RegisterFloatOption("-minAccuracy", &minPctAccuracy,
+                            "Alias of --minPctAccuracy",
                             CommandLineParser::PositiveFloat);
 
-    clp.RegisterStringOption("hitPolicy", &hitPolicyStr, hp.Help());
+    clp.RegisterStringOption("-hitPolicy", &hitPolicyStr, hp.Help());
 
-    clp.RegisterIntOption("scoreSign", &scoreSignInt,
+    clp.RegisterIntOption("-scoreSign", &scoreSignInt,
                           fc.ScoreSignHelp(),
                           CommandLineParser::Integer);
 
-    clp.RegisterIntOption("scoreCutoff", &scoreCutoff,
+    clp.RegisterIntOption("-scoreCutoff", &scoreCutoff,
                           fc.ScoreCutoffHelp(),
                           CommandLineParser::Integer);
 }
diff --git a/include/BlasrHeaders.h b/include/BlasrHeaders.h
deleted file mode 100644
index 4da847e..0000000
--- a/include/BlasrHeaders.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-#ifndef _BLASR_HEADERS_H_
-#define _BLASR_HEADERS_H_
-
-#ifdef __linux__
-#  include <mcheck.h>
-#endif
-#include <string>
-#include <iostream>
-#include <vector>
-#include <set>
-#include <sstream>
-#include <pthread.h>
-#include <stdlib.h>
-#include <time.h>
-#include <signal.h>
-#include <execinfo.h>
-
-#define MAX_PHRED_SCORE 254
-#define MAPQV_END_ALIGN_WIGGLE 5
-
-using namespace std;
-
-#include "libconfig.h"
-#ifdef USE_PBBAM
-#include <pbbam/BamWriter.h>
-#endif
-
-#include "CCSSequence.hpp"
-#include "SMRTSequence.hpp"
-#include "FASTASequence.hpp"
-#include "FASTAReader.hpp"
-#include "SeqUtils.hpp"
-#include "defs.h"
-#include "utils.hpp"
-
-
-#include "tuples/DNATuple.hpp"
-#include "tuples/HashedTupleList.hpp"
-#include "algorithms/compare/CompareStrings.hpp"
-#include "algorithms/alignment/AffineKBandAlign.hpp"
-#include "algorithms/alignment/GuidedAlign.hpp"
-#include "algorithms/alignment/AffineGuidedAlign.hpp"
-#include "algorithms/alignment/FullQVAlign.hpp"
-#include "algorithms/alignment/ExtendAlign.hpp"
-#include "algorithms/alignment/OneGapAlignment.hpp"
-#include "algorithms/alignment/AlignmentUtils.hpp"
-#include "algorithms/alignment/QualityValueScoreFunction.hpp"
-#include "algorithms/alignment/IDSScoreFunction.hpp"
-#include "algorithms/alignment/DistanceMatrixScoreFunction.hpp"
-#include "algorithms/alignment/StringToScoreMatrix.hpp"
-#include "algorithms/alignment/AlignmentFormats.hpp"
-#include "algorithms/anchoring/LISPValue.hpp"
-#include "algorithms/anchoring/LISPValueWeightor.hpp"
-#include "algorithms/anchoring/LISSizeWeightor.hpp"
-#include "algorithms/anchoring/LISQValueWeightor.hpp"
-#include "algorithms/anchoring/FindMaxInterval.hpp"
-#include "algorithms/anchoring/MapBySuffixArray.hpp"
-#include "datastructures/anchoring/ClusterList.hpp"
-#include "algorithms/anchoring/ClusterProbability.hpp"
-#include "algorithms/anchoring/BWTSearch.hpp"
-#include "metagenome/SequenceIndexDatabase.hpp"
-#include "metagenome/TitleTable.hpp"
-#include "suffixarray/SharedSuffixArray.hpp"
-#include "suffixarray/SuffixArrayTypes.hpp"
-#include "tuples/TupleCountTable.hpp"
-#include "datastructures/anchoring/WeightedInterval.hpp"
-#include "datastructures/anchoring/AnchorParameters.hpp"
-#include "datastructures/alignment/AlignmentCandidate.hpp"
-#include "datastructures/alignment/AlignmentContext.hpp"
-#include "MappingMetrics.hpp"
-#include "reads/ReadInterval.hpp"
-#include "utils/FileOfFileNames.hpp"
-#include "utils/RegionUtils.hpp"
-#include "utils/TimeUtils.hpp"
-#include "utils/SMRTTitle.hpp"
-#include "qvs/QualityTransform.hpp"
-#include "files/ReaderAgglomerate.hpp"
-#include "files/CCSIterator.hpp"
-#include "files/FragmentCCSIterator.hpp"
-#include "HDFRegionTableReader.hpp"
-#include "bwt/BWT.hpp"
-#include "PackedDNASequence.hpp"
-#include "CommandLineParser.hpp"
-#include "qvs/QualityValue.hpp"
-#include "statistics/VarianceAccumulator.hpp"
-#include "statistics/pdfs.hpp"
-#include "statistics/cdfs.hpp"
-#include "statistics/StatUtils.hpp"
-#include "statistics/LookupAnchorDistribution.hpp"
-#include "format/StickAlignmentPrinter.hpp"
-#include "format/SAMPrinter.hpp"
-#include "format/XMLPrinter.hpp"
-#include "format/CompareSequencesPrinter.hpp"
-#include "format/VulgarPrinter.hpp"
-#include "format/IntervalPrinter.hpp"
-#include "format/SummaryPrinter.hpp"
-#include "format/SAMHeaderPrinter.hpp"
-#include "format/BAMPrinter.hpp"
-
-#include "MappingIPC.h"
-#include "MappingSemaphores.h"
-#include "MappingBuffers.hpp"
-#include "ReadAlignments.hpp"
-
-
-typedef SMRTSequence T_Sequence;
-typedef FASTASequence T_GenomeSequence;
-typedef DNASuffixArray T_SuffixArray;
-typedef DNATuple T_Tuple;
-typedef LISPValueWeightor<T_GenomeSequence, DNATuple, vector<ChainedMatchPos> >  PValueWeightor;
-typedef LISSMatchFrequencyPValueWeightor<T_GenomeSequence, DNATuple, vector<ChainedMatchPos> >  MultiplicityPValueWeightor;
-typedef MappingData<T_SuffixArray, T_GenomeSequence, T_Tuple> MappingIPC;
-
-#endif
diff --git a/include/BlasrMiscs.hpp b/include/BlasrMiscs.hpp
deleted file mode 100644
index 2d0a09c..0000000
--- a/include/BlasrMiscs.hpp
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Mark Chaisson
-#ifndef _BLASR_MISCS_HPP_
-#define _BLASR_MISCS_HPP_
-
-#include "BlasrHeaders.h"
-
-//-------------------------Fetch Reads----------------------------//
-template<typename T_Sequence>
-bool GetNextReadThroughSemaphore(ReaderAgglomerate &reader,
-                                 MappingParameters &params,
-                                 T_Sequence &read,
-                                 string & readGroupId,
-                                 int & associatedRandInt,
-                                 MappingSemaphores & semaphores);
-
-//---------------------MAKE & CHECK READS-------------------------//
-//FIXME: move to SMRTSequence
-bool ReadHasMeaningfulQualityValues(FASTQSequence &sequence);
-
-//FIXME: Move to SMRTSequence
-// Given a SMRT sequence and a subread interval, make the subread.
-// Input:
-//   smrtRead         - a SMRT sequence
-//   subreadInterval  - a subread interval
-//   params           - mapping parameters
-// Output:
-//   subreadSequence - the constructed subread
-void MakeSubreadOfInterval(SMRTSequence & subreadSequence,
-                           SMRTSequence & smrtRead,
-                           ReadInterval & subreadInterval,
-                           MappingParameters & params);
-
-//FIXME: Move to SMRTSequence
-// Given a SMRT sequence and one of its subreads, make the
-// reverse complement of the subread in the coordinate of the
-// reverse complement sequence of the SMRT sequence.
-// Input:
-//   smrtRead          - a SMRT read
-//   subreadSequence   - a subread of smrtRead
-// Output:
-//   subreadSequenceRC - the reverse complement of the subread
-//                       in the coordinate of the reverse
-//                       complement of the SMRT read.
-void MakeSubreadRC(SMRTSequence & subreadSequenceRC,
-                   SMRTSequence & subreadSequence,
-                   SMRTSequence & smrtRead);
-
-// Make a virtual SMRTSequence (polymerase reads) given all subreads.
-// NO QVs will be copied at this point.
-void MakeVirtualRead(SMRTSequence & smrtRead,
-                     const vector<SMRTSequence> & subreads);
-
-// Construct subreads invervals from subreads
-void MakeSubreadIntervals(vector<SMRTSequence> & subreads,
-                          vector<ReadInterval> & subreadIntervals);
-
-// Get index of median length interval
-int GetIndexOfMedian(const vector<ReadInterval> & subreadIntervals);
-
-//-------------------------MISC-----------------------------------//
-int CountZero(unsigned char *ptr, int length);
-
-#include "BlasrMiscsImpl.hpp"
-
-#endif
diff --git a/makefile b/makefile
index f0742aa..9182630 100644
--- a/makefile
+++ b/makefile
@@ -9,13 +9,13 @@ foo:
 	echo $(MAKEFILE_LIST)
 	echo ${SRCDIR}
 
-CXXFLAGS += -O3 -g
+CXXFLAGS += -O3 -g -DSHA1_7=\"${GET_SHA1}\"
 CXXOPTS += \
 		   -std=c++0x -pedantic \
-           -Wall -Wuninitialized -Wno-div-by-zero \
-           -MMD -MP -w -fpermissive
+           -Wall -Wextra -Wno-div-by-zero -Wno-overloaded-virtual \
+           -MMD -MP
 GCXXFLAGS := -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -fno-omit-frame-pointer 
-CXXFLAGS += ${CXXOPTS} ${GCXXFLAGS}
+override CXXFLAGS += ${CXXOPTS} ${GCXXFLAGS}
 #INC_DIRS:=${LIBBLASR_INC} ${LIBPBIHDF_INC} ${LIBPBDATA_INC} ${PBBAM_INC} ${HTSLIB_INC} ${HDF5_INC} ${ZLIB_INC}
 #LIB_DIRS:=${LIBBLASR_LIB} ${LIBPBIHDF_LIB} ${LIBPBDATA_LIB} ${PBBAM_LIB} ${HTSLIB_LIB} ${HDF5_LIB} ${ZLIB_LIB}
 #LDLIBS := \
@@ -25,13 +25,15 @@ CXXFLAGS += ${CXXOPTS} ${GCXXFLAGS}
 
 # HDF5 needs -ldl, but mobs does not pass it in.
 
-CPPFLAGS:=-I${SRCDIR}/include ${CPPFLAGS}
-
 SRCS := Blasr.cpp
 OBJS := ${SRCS:.cpp=.o}
 DEPS := ${SRCS:.cpp=.d}
+GET_SHA1 := $(shell git rev-parse --short HEAD)
+
+override BLASR_PATH=${SRCDIR}/
+export BLASR_PATH
 
-LD_LIBRARY_PATH:=${HDF5_LIB}:${LIBBLASR_LIB}:${LIBPBIHDF_LIB}:${LIBPBDATA_LIB}:${LD_LIBRARY_PATH}
+override LD_LIBRARY_PATH:=${LIBBLASR_LIB}:${LIBPBIHDF_LIB}:${LIBPBDATA_LIB}:${HDF5_LIB}:${HTSLIB_LIB}:${PBBAM_LIB}:${ZLIB_LIB}:${LD_LIBRARY_PATH}
 export LD_LIBRARY_PATH
 # Note: On macosx, this would be DYLD_LIBRARY_PATH.
 
@@ -39,17 +41,19 @@ vpath %.cpp ${SRCDIR}
 
 init-submodule:
 	${MAKE} update-submodule
+	${MAKE} configure-submodule
 	${MAKE} build-submodule
 
 update-submodule:
 	git submodule update --init
 
+configure-submodule:
+	${MAKE} -f ${SRCDIR}/sub.mk configure-submodule
+
 build-submodule:
-	# DON'T use pbbam which is not on github.
-	cd libcpp && NOPBBAM=true HDF5_LIB=${HDF5_LIB} HDF5_INC=${HDF5_INC} ./configure.py
 	${MAKE} -C libcpp
 
-submodule-clean:
+distclean-submodule:
 	${RM} -r libcpp
 
 # The rules above must be run separately.
@@ -64,20 +68,12 @@ makeutils:
 makeextrautils:
 	${MAKE} -C extrautils
 
-CTESTS := \
-ctest/affineAlign.t            ctest/bamOut.t    ctest/ccsH5.t            ctest/filtercriteria.t  ctest/m0-5.t             ctest/samNM.t \
-ctest/aggressiveIntervalCut.t  ctest/bug25328.t  ctest/concordant.t       ctest/fofn.t            ctest/multipart.t        ctest/useccsallBestN1.t \
-ctest/alignScore.t             ctest/bug25741.t  ctest/ecoli.t            ctest/hitpolicy.t       ctest/noSplitSubreads.t  ctest/useccsallLargeGenome.t\
-ctest/bamIn.t                  ctest/bug25766.t  ctest/fastMaxInterval.t  ctest/holeNumbers.t     ctest/open_fail.t        ctest/verbose.t
-
-SLOW_CTESTS := ctest/bug25328.t ctest/useccsallLargeGenome.t
-
 cramtests: blasr utils
-	cram -v --shell=/bin/bash ${CTESTS}
+	${MAKE} -f cram.mk cramtests
 	${MAKE} -C utils cramtests
 
 cramfast: blasr utils
-	cram -v --shell=/bin/bash $(filter-out ${SLOW_CTESTS},${CTESTS})
+	${MAKE} -f cram.mk cramfast
 	${MAKE} -C utils cramfast
 
 gtest: blasr
diff --git a/rules.mk b/rules.mk
index bdae8f5..c705565 100644
--- a/rules.mk
+++ b/rules.mk
@@ -1,16 +1,16 @@
 INCDIRS := \
-        ${BLASR_INC} \
 	${LIBBLASR_INC} \
-	${LIBPBDATA_INC} \
 	${LIBPBIHDF_INC} \
+	${LIBPBDATA_INC}
+SYSINCDIRS := \
 	${PBBAM_INC} \
 	${HDF5_INC} \
 	${HTSLIB_INC} \
 	${BOOST_INC}
 LIBDIRS := \
 	${LIBBLASR_LIB} \
-	${LIBPBDATA_LIB} \
 	${LIBPBIHDF_LIB} \
+	${LIBPBDATA_LIB} \
 	${PBBAM_LIB} \
 	${HDF5_LIB} \
 	${HTSLIB_LIB} \
@@ -20,8 +20,8 @@ LIBDIRS := \
 LDLIBS+= \
 	${LIBPBIHDF_LIBFLAGS} \
 	${LIBBLASR_LIBFLAGS} \
-	${LIBPBDATA_LIBFLAGS} \
 	${LIBPBIHDF_LIBFLAGS} \
+	${LIBPBDATA_LIBFLAGS} \
 	${PBBAM_LIBFLAGS} \
 	${HDF5_LIBFLAGS} \
 	${HTSLIB_LIBFLAGS} \
@@ -33,4 +33,5 @@ LDLIBS+= \
 # We repeat LIBPBIHDF_LIBFLAGS because of a circular dependency. See #77.
 
 CPPFLAGS+=$(patsubst %,-I%,${INCDIRS})
+CPPFLAGS+=$(patsubst %,-isystem%,${SYSINCDIRS})
 LDFLAGS+=$(patsubst %,-L%,${LIBDIRS})
diff --git a/sub.mk b/sub.mk
new file mode 100644
index 0000000..297037f
--- /dev/null
+++ b/sub.mk
@@ -0,0 +1,9 @@
+
+SRCDIR:=$(dir $(realpath $(firstword ${MAKEFILE_LIST})))
+-include ${CURDIR}/defines.mk
+-include ${SRCDIR}/rules.mk
+
+export
+
+configure-submodule:
+	cd libcpp && ./configure.py
diff --git a/utils/.gitignore b/utils/.gitignore
index e088241..404d582 100644
--- a/utils/.gitignore
+++ b/utils/.gitignore
@@ -6,3 +6,5 @@
 /sawriter
 /sdpMatcher
 /toAfg
+*.swp
+tags
diff --git a/utils/LoadPulses.cpp b/utils/LoadPulses.cpp
index 34fc0ab..51763c4 100644
--- a/utils/LoadPulses.cpp
+++ b/utils/LoadPulses.cpp
@@ -1,22 +1,22 @@
 #define __FAST_MATH__
 
-#include "HDFCmpFile.hpp"
-#include "HDFBasReader.hpp"
-#include "HDFPlsReader.hpp"
-#include "HDFCCSReader.hpp"
-#include "datastructures/alignment/CmpFile.hpp"
-#include "alignment/CmpAlignment.hpp"
-#include "datastructures/alignment/ByteAlignment.h"
-#include "datastructures/alignment/AlignmentMap.hpp"
-#include "reads/BaseFile.hpp"
-#include "reads/PulseFile.hpp"
-#include "reads/ReadType.hpp"
-#include "loadpulses/MetricField.hpp"
-#include "loadpulses/MovieAlnIndexLookupTable.hpp"
-#include "utils/FileOfFileNames.hpp"
-#include "utils/TimeUtils.hpp"
-#include "files/BaseSequenceIO.hpp"
-#include "CommandLineParser.hpp"
+#include <HDFCmpFile.hpp>
+#include <HDFBasReader.hpp>
+#include <HDFPlsReader.hpp>
+#include <HDFCCSReader.hpp>
+#include <datastructures/alignment/CmpFile.hpp>
+#include <alignment/CmpAlignment.hpp>
+#include <datastructures/alignment/ByteAlignment.h>
+#include <datastructures/alignment/AlignmentMap.hpp>
+#include <reads/BaseFile.hpp>
+#include <reads/PulseFile.hpp>
+#include <reads/ReadType.hpp>
+#include <loadpulses/MetricField.hpp>
+#include <loadpulses/MovieAlnIndexLookupTable.hpp>
+#include <utils/FileOfFileNames.hpp>
+#include <utils/TimeUtils.hpp>
+#include <files/BaseSequenceIO.hpp>
+#include <CommandLineParser.hpp>
 #include <map>
 #include <set>
 #include <string>
@@ -103,8 +103,7 @@ void ExclusivelyAdd(const char *value, vector<string> &vect) {
 }
 
 bool AnyFieldRequiresFrameRate(vector<string> &fields) {
-    int i;
-    for (i = 0; i < fields.size(); i++ ) {
+    for (size_t i = 0; i < fields.size(); i++ ) {
         if (fields[i] == "PulseWidth" or
                 fields[i] == "IPD" or
                 fields[i] == "Light" or
@@ -207,7 +206,7 @@ vector<string> GetPulseMetrics() {
 // Return true if this metric can be computed from PulseCalls.
 bool IsPulseMetric(const string & metric) {
     vector<string> pulseMetrics = GetPulseMetrics();
-    for (int i = 0; i < pulseMetrics.size(); i++) {
+    for (size_t i = 0; i < pulseMetrics.size(); i++) {
         if (pulseMetrics[i] == metric) 
             return true;
     }
@@ -224,7 +223,7 @@ vector<string> GetMetricsToLoad(map<string, bool> & metricOptions) {
     // Get all supported metrics. 
     vector<string> supportedMetrics = GetAllSupportedMetrics();
     map<string, bool>::iterator metricIt;
-    for (int i = 0; i < supportedMetrics.size(); i++) {
+    for (size_t i = 0; i < supportedMetrics.size(); i++) {
         string metric = supportedMetrics[i];
         metricIt = metricOptions.find(metric);
         if (metricIt!=metricOptions.end() and metricIt->second) {
@@ -238,8 +237,7 @@ vector<string> GetMetricsToLoad(map<string, bool> & metricOptions) {
 void StoreDatasetFieldsFromPulseFields(MetricOptionsMap &fieldSet,
         RequirementMap &fieldRequirements, 
         vector<string> &datasetFields) {
-    int f;
-    int d;
+    size_t d;
     MetricOptionsMap::iterator optionsIt;
     for (optionsIt = fieldSet.begin(); optionsIt != fieldSet.end(); ++optionsIt) {
         if (optionsIt->second == true) {
@@ -258,8 +256,7 @@ void StoreDatasetFieldsFromPulseFields(MetricOptionsMap &fieldSet,
 void ParseMetricsList(string metricListString, MetricOptionsMap &metricOptions) {
     vector<string> metrics;
     Splice(metricListString, ",", metrics);
-    int m;
-    for  (m = 0; m < metrics.size(); m++) {
+    for  (size_t m = 0; m < metrics.size(); m++) {
         if (metricOptions.find(metrics[m]) != metricOptions.end()) {
             metricOptions[metrics[m]] = true;
         }
@@ -273,7 +270,7 @@ void ParseMetricsList(string metricListString, MetricOptionsMap &metricOptions)
 // Set default metric options to true
 void SetDefaultMetricOptions(map<string, bool> & metricOptions) {
     vector<string> defaultMetrics = GetDefaultMetrics(); 
-    for (int i = 0; i < defaultMetrics.size(); i++) {
+    for (size_t i = 0; i < defaultMetrics.size(); i++) {
         metricOptions[defaultMetrics[i]] = true;
     }
 }
@@ -282,7 +279,7 @@ void SetDefaultMetricOptions(map<string, bool> & metricOptions) {
 // Initialize all supported metric options and set all to false
 void CreateMetricOptions(map<string, bool> &metricOptions) {
     vector<string> supportedMetrics = GetAllSupportedMetrics();
-    for (int i = 0; i < supportedMetrics.size(); i++) {
+    for (size_t i = 0; i < supportedMetrics.size(); i++) {
         metricOptions[supportedMetrics[i]] = false;
     }
 }
@@ -296,7 +293,7 @@ bool AreAllFieldsAvailable(
         const bool       & usePulseFile) {
     bool allAvailable = true;
 
-    for (int i = 0; i < requiredFields.size(); i++) {
+    for (size_t i = 0; i < requiredFields.size(); i++) {
         Field field = requiredFields[i];
         if (field.type == BasField) {
             if (!useBaseFile or !hdfBasReader.FieldIsIncluded(field.name)
@@ -393,7 +390,7 @@ void CanMetricsBeComputed(
     for (metricIt = metricOptions.begin(); metricIt != metricOptions.end(); ++metricIt) {
         string metricName = metricIt->first;
         if (metricName == "") {
-            metricIt->second == false;
+            metricIt->second = false;
         }
 
         if (metricIt->second == false) {
@@ -429,7 +426,6 @@ UInt ComputeRequiredMemoryForThisField(
         HDFPlsReader   & hdfPlsReader,
         const bool     & useBaseFile,
         const bool     & usePulseFile) {
-    UInt memory = 0;
     if (thisField.type == BasField) {
         assert(useBaseFile);
         return hdfBasReader.GetFieldSize(thisField.name);
@@ -453,14 +449,15 @@ UInt ComputeRequiredMemory(
         HDFCmpFile<CmpAlignment> & cmpReader,
         UInt           & totalAlnLength) {
     UInt maxMemory = 0;
-    for (int i = 0; i < metricsToLoad.size(); i++) {
+    for (size_t i = 0; i < metricsToLoad.size(); i++) {
         UInt memoryForThisMetric = 0;
         vector<Field> fieldsToBeUsed;
         bool canBeComputed = CanThisMetricBeComputed(
                 metricsToLoad[i], hdfBasReader, hdfPlsReader,
                 useBaseFile, usePulseFile, fieldsToBeUsed);
+        (void)(canBeComputed);
 
-        for (int j = 0; j < fieldsToBeUsed.size(); j++) {
+        for (size_t j = 0; j < fieldsToBeUsed.size(); j++) {
             UInt memoryForThisField = ComputeRequiredMemoryForThisField(
                     fieldsToBeUsed[j], hdfBasReader, hdfPlsReader,
                     useBaseFile, usePulseFile);
@@ -562,6 +559,7 @@ void BuildLookupTable(
     //
     int refGroupId  = cmpFile.alnInfo.alignments[alignmentIndex].GetRefGroupId();
     int movieId     = cmpFile.alnInfo.alignments[alignmentIndex].GetMovieId();
+    (void)(movieId);
     UInt holeNumber = cmpFile.alnInfo.alignments[alignmentIndex].GetHoleNumber();
     int alnGroupId  = cmpFile.alnInfo.alignments[alignmentIndex].GetAlnGroupId();
 
@@ -689,6 +687,7 @@ void GetSourceRead(CmpFile      & cmpFile,
                    const string & alignedSequence,
                    SMRTSequence & sourceRead,   
                    unsigned int & numPasses) {
+    (void)(baseFile); (void)(pulseFile); (void)(alignedSequence);
 
     assert(!table.skip);
     //
@@ -752,9 +751,10 @@ void BuildLookupTablesAndMakeSane(
         const vector< pair<int,int> >    & toFrom,
         const set<uint32_t>              & moviePartHoleNumbers,
         vector<MovieAlnIndexLookupTable> & lookupTables) {
+    (void)(hdfPlsReader); (void)(hdfCcsReader); (void)(useCcsOnly); (void)(useBaseFile);
 
     lookupTables.resize(movieAlnIndex.size());
-    int movieAlignmentIndex = 0;
+    size_t movieAlignmentIndex = 0;
     for (movieAlignmentIndex = 0; movieAlignmentIndex < movieAlnIndex.size(); movieAlignmentIndex++) {
         BuildLookupTable(movieAlignmentIndex,
             cmpFile, 
@@ -795,16 +795,16 @@ void BuildLookupTablesAndMakeSane(
         //
         // Get sequence for this alignment from baseFile
         //
-        Nucleotide * seq = new Nucleotide[table.readLength]; 
+        Nucleotide * seq = new Nucleotide[table.readLength];
         baseFile.CopyArray(baseFile.baseCalls, table.readStart, table.readLength, seq);
         
         string readSequence;
         readSequence.resize(table.queryEnd - table.queryStart);
-        copy((char*) (seq + table.queryStart), 
+        copy((char*) (seq + table.queryStart),
              (char*) (seq + table.queryEnd), 
              readSequence.begin());
-        delete seq;
-        
+        delete []seq;
+
         //
         // Do a sanity check to make sure the pulses and the alignment
         // make sense.  The main check is to see if the query sequence
@@ -858,8 +858,8 @@ void GroupLookupTables(
 
     vector<pair<UInt, UInt> > refGroupIndexReadGroupIndexPairs;
     UInt movieAlignmentIndex = 0;
-    UInt preRefGroupIndex    = 0;
-    UInt preReadGroupIndex   = 0;
+    size_t preRefGroupIndex     = 0;
+    size_t preReadGroupIndex    = 0;
     UInt pairFirst           = 0;
     bool isVeryFirstGroup    = true;
 
@@ -906,8 +906,8 @@ void GroupLookupTables(
 
 
     // Double check all assumptions are met
-    for (int i = 0; i < refGroupIndexReadGroupIndexPairs.size(); i++) {
-        for (int j = i+1; j < refGroupIndexReadGroupIndexPairs.size(); j++) {
+    for (size_t i = 0; i < refGroupIndexReadGroupIndexPairs.size(); i++) {
+        for (size_t j = i+1; j < refGroupIndexReadGroupIndexPairs.size(); j++) {
             // Assure that assumption (1) is met. If this assertion fails, 
             // then alignments in the input cmp.h5 are not grouped by
             // reference. Check /AlnInfo/AlnIndex dataset column 3.
@@ -915,8 +915,7 @@ void GroupLookupTables(
         }
     }
     assert(groupedLookupTablesIndexPairs.size() == refGroupIndexReadGroupIndexPairs.size());
-    int i ;
-    for (i = 0; i < groupedLookupTablesIndexPairs.size(); i++) {
+    for (size_t i = 0; i < groupedLookupTablesIndexPairs.size(); i++) {
         UInt firstIndex     = groupedLookupTablesIndexPairs[i].first;
         UInt lastIndex      = groupedLookupTablesIndexPairs[i].second;
         UInt refGroupIndex  = refGroupIndexReadGroupIndexPairs[i].first;
@@ -944,6 +943,7 @@ void CacheRequiredFieldsForMetric(
         const bool                  & useCcsOnly,
         vector<Field>               & cachedFields,
         const string                & curMetric) {
+    (void)(hdfCcsReader); (void)(useCcsOnly);
 
     vector<Field> fieldsToBeUsed;
     bool canBeComputed = CanThisMetricBeComputed( 
@@ -952,9 +952,9 @@ void CacheRequiredFieldsForMetric(
     assert(canBeComputed);
 
     // Cache all required fields 
-    for (int i = 0; i < fieldsToBeUsed.size(); i++) {
+    for (size_t i = 0; i < fieldsToBeUsed.size(); i++) {
         bool isFieldCached = false;
-        for (int j = 0; j < cachedFields.size(); j++) {
+        for (size_t j = 0; j < cachedFields.size(); j++) {
             if (fieldsToBeUsed[i] == cachedFields[j]) {
                 isFieldCached = true;
                 break;
@@ -997,7 +997,7 @@ void ClearCachedFields(
         vector<Field>              & cachedFields,
         const string               & curMetric,
         const string               & nextMetric) {
-
+    (void)(hdfCcsReader); (void)(useCcsOnly); (void)(curMetric);
  
     vector<Field> nextRequiredFields;
     if (nextMetric != "") {
@@ -1006,9 +1006,9 @@ void ClearCachedFields(
             useBaseFile, usePulseFile, nextRequiredFields); 
         assert(canBeComputed);
     }
-    for (int i = 0; i < cachedFields.size(); i++) {
+    for (size_t i = 0; i < cachedFields.size(); i++) {
         bool isRequiredForNextMetric = false;
-        for (int j = 0; j < nextRequiredFields.size(); j++) {
+        for (size_t j = 0; j < nextRequiredFields.size(); j++) {
             if (cachedFields[i] == nextRequiredFields[j]) {
                 isRequiredForNextMetric = true;
                 break;
@@ -1137,9 +1137,9 @@ void WriteMetric(
         vector<MovieAlnIndexLookupTable> & lookupTables,
         vector<pair<UInt, UInt> >        & groupedLookupTablesIndexPairs,
         const string                     & curMetric ) {
+    (void)(cmpFile); (void)(hdfCcsReader); (void)(useCcsOnly);
 
-    int movieAlignmentIndex = 0;
-    for (int index = 0; index < groupedLookupTablesIndexPairs.size(); index++) {
+    for (size_t index = 0; index < groupedLookupTablesIndexPairs.size(); index++) {
         // Group[index] contains all items in lookupTables[firstIndex...lastIndex)
         UInt firstIndex = groupedLookupTablesIndexPairs[index].first;
         UInt lastIndex  = groupedLookupTablesIndexPairs[index].second;
@@ -1263,13 +1263,12 @@ void WriteMetric(
             exit(1);
         }
 
-        for (movieAlignmentIndex = firstIndex; movieAlignmentIndex < lastIndex; movieAlignmentIndex++) {
+        for (size_t movieAlignmentIndex = firstIndex; movieAlignmentIndex < lastIndex; movieAlignmentIndex++) {
             MovieAlnIndexLookupTable & lookupTable   = lookupTables[movieAlignmentIndex];
             if (lookupTable.skip) continue;
 
             const UInt alignedSequenceLength         = lookupTable.offsetEnd - lookupTable.offsetBegin; 
             const UInt ungappedAlignedSequenceLength = lookupTable.queryEnd  - lookupTable.queryStart;
-            const UInt   & readIndex                 = lookupTable.readIndex;
             const UInt   & plsReadIndex              = lookupTable.plsReadIndex;
             const UInt   & readStart                 = lookupTable.readStart;
             const UInt   & readLength                = lookupTable.readLength;
@@ -1623,8 +1622,7 @@ void WriteMetricWhenStarted(
 //
 string MetricsToString(const vector<string> & metrics) {
     string ret = ""; 
-    int j = 0; 
-    for (int i = 0; i < metrics.size(); i++) {
+    for (size_t i = 0; i < metrics.size(); i++) {
         ret += metrics[i]; 
         if (i != metrics.size()-1) ret += ","; 
         if (i % 4 == 3) ret += "\n";
@@ -1676,8 +1674,6 @@ int main(int argc, char* argv[]) {
     AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionStr);
 
     string cmpFileName, movieFileName;
-    int argi = 3;
-    int numMetrics = 8;
     map<string,bool> metricOptions;
     int maxElements = 0;
     //Maximum Memory allowed for bymetric is 6 GB
@@ -1725,7 +1721,7 @@ int main(int argc, char* argv[]) {
             "(default value: 4 GB). Use -byread if the limit is exceeded.",
             CommandLineParser::PositiveInteger);
     int metaNElements, rawChunkSize, rawNElements;
-    metaNElements = rawChunkSize = metaNElements = 0;
+    metaNElements = 0; rawChunkSize = 0; metaNElements = 0;
     clp.RegisterIntOption("metaNElements", & metaNElements,
             "Set number of elements in meta data cache for reading bas/bax/pls.h5 file.",
             CommandLineParser::PositiveInteger);
@@ -1788,7 +1784,7 @@ int main(int argc, char* argv[]) {
     HDFCCSReader<SMRTSequence> hdfCcsReader;
 
     vector<string> baseFileFields, pulseFileFields;
-    int fieldIndex;
+    size_t fieldIndex;
     bool useBaseFile = false, usePulseFile = false;
     for (fieldIndex = 0; fieldIndex < datasetFields.size(); fieldIndex++) {
         if (hdfBasReader.ContainsField(datasetFields[fieldIndex])) {
@@ -1864,7 +1860,7 @@ int main(int argc, char* argv[]) {
     for (movieIndex = 0; movieIndex < nMovies; movieIndex++) {
         FileType fileType;
         BaseSequenceIO::DetermineFileTypeByExtension(movieFileNames[movieIndex], fileType, true);
-        if (fileType == HDFCCSONLY) {
+        if (fileType == FileType::HDFCCSONLY) {
             useCcsOnly = true;
         }
     }
@@ -1955,8 +1951,7 @@ int main(int argc, char* argv[]) {
     // Load pulses from movies in order they appear in the input fofn.
     //
     int m;
-    int fofnMovieIndex;
-    for (fofnMovieIndex = 0; fofnMovieIndex < fofnMovieNames.size(); fofnMovieIndex++) {
+    for (size_t fofnMovieIndex = 0; fofnMovieIndex < fofnMovieNames.size(); fofnMovieIndex++) {
         bool byMetricForThisMovie = byMetric;
 
         if (cmpFile.readType == ReadType::CCS or useCcsOnly) {
@@ -1994,7 +1989,7 @@ int main(int argc, char* argv[]) {
 
         string cmpFileMovieName;
 
-        for (m = 0; m < cmpFile.movieInfo.name.size(); m++) {
+        for (m = 0; m < static_cast<int>(cmpFile.movieInfo.name.size()); m++) {
             //
             // First find the file name for the movie 'm'
             //
@@ -2012,7 +2007,7 @@ int main(int argc, char* argv[]) {
         // alignments were found between the input bas.h5 and the
         // reference.  That shouldn't happen.
         // 
-        if (m == cmpFile.movieInfo.name.size()) {
+        if (m == static_cast<int>(cmpFile.movieInfo.name.size())) {
             cout << "WARNING: Could not find any alignments for file " << movieFileNames[fofnMovieIndex] << endl;
             continue;
         }
@@ -2070,7 +2065,7 @@ int main(int argc, char* argv[]) {
         {
             UInt requiredMem = ComputeRequiredMemory(metricsToLoad, hdfBasReader, 
                     hdfPlsReader, useBaseFile, usePulseFile, cmpReader, totalAlnLength);
-            if (hdfBasReader.baseArray.arrayLength > hdfBasReader.maxAllocNElements or 
+            if (hdfBasReader.baseArray.arrayLength > static_cast<DSLength>(hdfBasReader.maxAllocNElements) or 
                 (usePulseFile and 
                  hdfPlsReader.GetStartFrameSize() > hdfPlsReader.maxAllocNElements) or
                 ((float)requiredMem / 1024 / 1024) > maxMemory) {
@@ -2114,7 +2109,6 @@ int main(int argc, char* argv[]) {
         //
         cout << "loading " <<  movieIndexSets[movieIndex].size() << " alignments for movie " << movieIndex << endl;
 
-        UInt i;
         if (byMetricForThisMovie) {
             //
             // Build lookup tables for all alignments which 
@@ -2174,7 +2168,7 @@ int main(int argc, char* argv[]) {
                 cachedFields.push_back(Field("NumEvent", PlsField));
             } 
 
-            for (int metricsToLoadIndex = 0; metricsToLoadIndex < metricsToLoad.size(); metricsToLoadIndex++) {
+            for (size_t metricsToLoadIndex = 0; metricsToLoadIndex < metricsToLoad.size(); metricsToLoadIndex++) {
                 string curMetric = metricsToLoad[metricsToLoadIndex];
                 // Metric "WhenStarted" should have been loaded before getting here.
                 if (curMetric == "WhenStarted") {
@@ -2252,14 +2246,12 @@ int main(int argc, char* argv[]) {
                 }
 
                 UInt & alignmentIndex = lookupTable.alignmentIndex;
-                int  & refGroupIndex  = lookupTable.refGroupIndex;
-                int  & readGroupIndex = lookupTable.readGroupIndex;
+                size_t  & refGroupIndex  = lookupTable.refGroupIndex;
+                size_t  & readGroupIndex = lookupTable.readGroupIndex;
                 UInt & holeNumber     = lookupTable.holeNumber;
-                int  & readIndex      = lookupTable.readIndex;
-                int  & queryStart     = lookupTable.queryStart;
-                int  & queryEnd       = lookupTable.queryEnd;
-                int  & readStart      = lookupTable.readStart;
-                int  & readLength     = lookupTable.readLength;
+                size_t  & readIndex   = lookupTable.readIndex;
+                UInt & queryStart     = lookupTable.queryStart;
+                UInt & queryEnd       = lookupTable.queryEnd;
                 UInt & offsetBegin    = lookupTable.offsetBegin;
                 UInt & offsetEnd      = lookupTable.offsetEnd;
 
@@ -2319,17 +2311,16 @@ int main(int argc, char* argv[]) {
                 vector<UChar> qvMetric;
                 vector<HalfWord> frameRateMetric;
                 vector<uint32_t> timeMetric;
-                int ungappedAlignedSequenceLength = alignedSequence.size();
+                UInt ungappedAlignedSequenceLength = alignedSequence.size();
                 assert(ungappedAlignedSequenceLength == queryEnd - queryStart);
 
-                int alignedSequenceLength = offsetEnd - offsetBegin;
+                UInt alignedSequenceLength = offsetEnd - offsetBegin;
                 readPulseMetric.resize(alignedSequenceLength+1);
                 qvMetric.resize(alignedSequenceLength+1);
                 frameRateMetric.resize(alignedSequenceLength+1);
                 timeMetric.resize(alignedSequenceLength+1);
 
                 UInt i;
-                UInt pi;
 
                 HDFCmpExperimentGroup* expGroup = cmpReader.refAlignGroups[refGroupIndex]->readGroups[readGroupIndex];
                 UInt alnArrayLength = expGroup->alignmentArray.size();
@@ -2413,7 +2404,7 @@ int main(int argc, char* argv[]) {
                     }
                     readDeletionTagMetric[i] = '\0';
                     for (i = 0; i < ungappedAlignedSequenceLength; i++ ) {
-                        assert(baseToAlignmentMap[i] < readDeletionTagMetric.size());
+                        assert(baseToAlignmentMap[i] < static_cast<int>(readDeletionTagMetric.size()));
                         readDeletionTagMetric[baseToAlignmentMap[i]] = sourceRead.deletionTag[queryStart+i];
                     }
                     readDeletionTagMetric[readDeletionTagMetric.size()-1] = 0;
diff --git a/utils/PulseToFasta.cpp b/utils/PulseToFasta.cpp
index 9effafe..971b603 100644
--- a/utils/PulseToFasta.cpp
+++ b/utils/PulseToFasta.cpp
@@ -2,18 +2,18 @@
 #include <iostream>
 #include <vector>
 
-#include "HDFPlsReader.hpp"
-#include "HDFUtils.hpp"
-#include "HDFRegionTableReader.hpp"
-#include "reads/RegionTable.hpp"
-#include "reads/ReadInterval.hpp"
-#include "files/ReaderAgglomerate.hpp"
-#include "utils/FileOfFileNames.hpp"
-#include "utils/RegionUtils.hpp"
-#include "utils/TimeUtils.hpp"
-#include "SMRTSequence.hpp"
-#include "utils.hpp"
-#include "CommandLineParser.hpp"
+#include <HDFPlsReader.hpp>
+#include <HDFUtils.hpp>
+#include <HDFRegionTableReader.hpp>
+#include <reads/RegionTable.hpp>
+#include <reads/ReadInterval.hpp>
+#include <files/ReaderAgglomerate.hpp>
+#include <utils/FileOfFileNames.hpp>
+#include <utils/RegionUtils.hpp>
+#include <utils/TimeUtils.hpp>
+#include <SMRTSequence.hpp>
+#include <utils.hpp>
+#include <CommandLineParser.hpp>
 
 
 using namespace std;
@@ -30,15 +30,12 @@ int main(int argc, char* argv[]) {
 	bool trimByRegion, maskByRegion;
 	trimByRegion = false;
 	maskByRegion = false;
-	int argi = 3;
 	RegionTable regionTable;
 	string regionsFOFNName = "";
 	vector<string> regionFileNames;
 	bool splitSubreads = true;
 	int minSubreadLength = 0;
 	bool addSimulatedData = false;
-	bool printSimulatedCoordinate = false;
-	bool printSimulatedSequenceIndex = false;
   bool printFastq = false;
   bool printCcs   = false;
   int  lineLength = 50;
@@ -97,13 +94,12 @@ int main(int argc, char* argv[]) {
     
 	ofstream fastaOut;
 	CrucialOpen(fastaOutName, fastaOut);
-	int plsFileIndex;
 	HDFRegionTableReader hdfRegionReader;
     sort(holeNumbers.begin(), holeNumbers.end());
 
     vector<int> pls2rgn = MapPls2Rgn(plsFileNames, regionFileNames);
    
-    for (plsFileIndex = 0; plsFileIndex < plsFileNames.size(); plsFileIndex++) {
+    for (size_t plsFileIndex = 0; plsFileIndex < plsFileNames.size(); plsFileIndex++) {
         if (trimByRegion or maskByRegion or splitSubreads) {
             hdfRegionReader.Initialize(regionFileNames[pls2rgn[plsFileIndex]]);
             hdfRegionReader.ReadTable(regionTable);
@@ -222,13 +218,11 @@ int main(int argc, char* argv[]) {
       //
       // Output all subreads as separate sequences.
       //
-      int intvIndex;
       SMRTSequence bestSubreadSequence;
       int bestSubreadScore = -1;
       int bestSubreadIndex = 0;
-      int bestSubreadStart = 0, bestSubreadEnd = 0;
       SMRTSequence bestSubread;
-      for (intvIndex = 0; intvIndex < subreadIntervals.size(); intvIndex++) {
+      for (size_t intvIndex = 0; intvIndex < subreadIntervals.size(); intvIndex++) {
         SMRTSequence subreadSequence, subreadSequenceRC;
 					
         subreadSequence.SubreadStart(subreadIntervals[intvIndex].start);
@@ -244,7 +238,7 @@ int main(int argc, char* argv[]) {
         }
 
         if (subreadSequence.SubreadStart() >= subreadSequence.SubreadEnd() or 
-            subreadSequence.SubreadEnd() - subreadSequence.SubreadStart() <= minSubreadLength) {
+            subreadSequence.SubreadEnd() - subreadSequence.SubreadStart() <= DNALength(minSubreadLength)) {
           //
           // There is no high qualty portion of this subread. Skip it.
           //
@@ -299,6 +293,7 @@ int main(int argc, char* argv[]) {
           int subreadWeightedScore = subreadSequence.length * hqRegionScore;
           if (subreadWeightedScore > bestSubreadScore) {
             bestSubreadIndex = intvIndex;
+            (void)(bestSubreadIndex);
             bestSubread = subreadSequence;
             bestSubreadScore = subreadWeightedScore;
           }
diff --git a/utils/SAWriter.cpp b/utils/SAWriter.cpp
index 883723c..9ece42e 100644
--- a/utils/SAWriter.cpp
+++ b/utils/SAWriter.cpp
@@ -1,64 +1,70 @@
 #include <vector>
 #include <string>
-#include "suffixarray/SuffixArray.hpp"
-#include "FASTASequence.hpp"
-#include "FASTAReader.hpp"
-#include "NucConversion.hpp"
-#include "Types.h"
-#include "suffixarray/ssort.hpp"
-#include "algorithms/sorting/qsufsort.hpp"
-#include "algorithms/sorting/Karkkainen.hpp"
-#include "CompressedSequence.hpp"
+#include <suffixarray/SuffixArray.hpp>
+#include <FASTASequence.hpp>
+#include <FASTAReader.hpp>
+#include <NucConversion.hpp>
+#include <Types.h>
+#include <suffixarray/ssort.hpp>
+#include <algorithms/sorting/qsufsort.hpp>
+#include <algorithms/sorting/Karkkainen.hpp>
+#include <CompressedSequence.hpp>
 
 
 void PrintUsage() {
-	cout << "usage: sawriter saOut fastaIn [fastaIn2 fastaIn3 ...] [-blt p] [-larsson] [-4bit] [-manmy] [-kar]" << endl;
+  cout << "usage: sawriter saOut fastaIn [fastaIn2 fastaIn3 ...] [-blt p] [-larsson] [-4bit] [-manmy] [-kar]" << endl;
   cout << "   or  sawriter fastaIn  (writes to fastIn.sa)." << endl;
-	cout << "       -blt p      Build a lookup table on prefixes of length 'p'. This speeds " << endl
-			 << "                   up lookups considerably (more than the LCP table), but misses matches " << endl
-			 << "                   less than p when searching." << endl;
-	cout << "       -4bit       Read in (one) fasta file as a compressed sequence file." << endl;
-	cout << "       -larsson  (default)  Uses the method of Larsson and Sadakane to build the array." << endl;
-	cout << "       -mamy      Uses the method of MAnber and MYers to build the array (slower than larsson, " << endl
-			 << "                   and produces the same result. This is mainly for double checking"<<endl
-			 << "                   the correctness of larsson)." << endl
-			 << "       -kark       Use Karkkainen DS3 method for building the suffix array.  This will probably be more "<<endl
-			 << "                   slow than larsson, but takes only an extra N/(sqrt 3) extra space." << endl
-			 << "       -mafe       (disabled for now!) Use the lightweight construction algorithm from Manzini and Ferragina" << endl
-			 << "       -welter     Use lightweight (sort of light) suffix array construction.  This is a bit more slow than" << endl
-			 << "                   normal larsson." << endl
-			 << "       -welterweight N use a difference cover of size N for building the suffix array.  Valid values are 7,32,64,111, and 2281." << endl;
+  cout << "       -blt p      Build a lookup table on prefixes of length 'p'. This speeds " << endl
+       << "                   up lookups considerably (more than the LCP table), but misses matches " << endl
+       << "                   less than p when searching." << endl;
+  cout << "       -4bit       Read in (one) fasta file as a compressed sequence file." << endl;
+  cout << "       -larsson  (default)  Uses the method of Larsson and Sadakane to build the array." << endl;
+  cout << "       -mamy      Uses the method of MAnber and MYers to build the array (slower than larsson, " << endl
+       << "                   and produces the same result. This is mainly for double checking"<<endl
+       << "                   the correctness of larsson)." << endl
+       << "       -kark       Use Karkkainen DS3 method for building the suffix array.  This will probably be more "<<endl
+       << "                   slow than larsson, but takes only an extra N/(sqrt 3) extra space." << endl
+       << "       -mafe       (disabled for now!) Use the lightweight construction algorithm from Manzini and Ferragina" << endl
+       << "       -welter     Use lightweight (sort of light) suffix array construction.  This is a bit more slow than" << endl
+       << "                   normal larsson." << endl
+       << "       -welterweight N use a difference cover of size N for building the suffix array.  Valid values are 7,32,64,111, and 2281." << endl;
 
 
 }
 
 int main(int argc, char* argv[]) {
 
-	if (argc < 2) {
-		PrintUsage();
-		exit(1);
-	}
-	int argi = 1;
-	string saFile = argv[argi++];
-	vector<string> inFiles;
-	
-	int doBLT = 1;
-	int bltPrefixLength = 8;
-	int parsingOptions = 0;
-	SAType saBuildType = larsson;
-	int read4BitCompressed  = 0;
-	int diffCoverSize = 0;
-	while (argi < argc) {
-		if (strlen(argv[argi]) > 0 and
-				argv[argi][0] == '-'){ 
-			parsingOptions = 1;
-		}
-		if (!parsingOptions) {
-			inFiles.push_back(argv[argi]);
-		}
-		else {
-			if (strcmp(argv[argi], "-blt") == 0) {
-				doBLT = 1;
+  if (argc < 2) {
+    PrintUsage();
+    exit(1);
+  }
+  else if (strcmp(argv[1], "-h") == 0 or
+           strcmp(argv[1], "-help") == 0 or
+           strcmp(argv[1], "--help") == 0) {
+    PrintUsage();
+    exit(0);
+  }
+  int argi = 1;
+  string saFile = argv[argi++];
+  vector<string> inFiles;
+
+  int doBLT = 1;
+  int bltPrefixLength = 8;
+  int parsingOptions = 0;
+  SAType saBuildType = larsson;
+  int read4BitCompressed  = 0;
+  int diffCoverSize = 0;
+  while (argi < argc) {
+    if (strlen(argv[argi]) > 0 and
+        argv[argi][0] == '-'){
+      parsingOptions = 1;
+    }
+    if (!parsingOptions) {
+      inFiles.push_back(argv[argi]);
+    }
+    else {
+      if (strcmp(argv[argi], "-blt") == 0) {
+        doBLT = 1;
         if (argi < argc - 1) {
           bltPrefixLength = atoi(argv[++argi]);
           if (bltPrefixLength == 0) {
@@ -70,29 +76,29 @@ int main(int argc, char* argv[]) {
           cout << "Please specify a lookup table length." << endl;
           exit(1);
         }
-			}
-			else if (strcmp(argv[argi], "-mamy") == 0) {
-				saBuildType = manmy;
-			}
-			else if (strcmp(argv[argi], "-larsson") == 0) {
-				saBuildType = larsson;
-			}
-			else if (strcmp(argv[argi], "-mcilroy") == 0) {
-				saBuildType = mcilroy;
-			}
-			else if (strcmp(argv[argi], "-slow") == 0) {
-				saBuildType = slow;
-			}
-			else if (strcmp(argv[argi], "-kark") == 0) {
-				saBuildType = kark;
-			}
-			else if (strcmp(argv[argi], "-mafe") == 0) {
-				saBuildType = mafe;
-			}
-			else if (strcmp(argv[argi], "-welter") == 0) {
-				saBuildType = welter;
-			}
-			else if (strcmp(argv[argi], "-welterweight") == 0) {
+      }
+      else if (strcmp(argv[argi], "-mamy") == 0) {
+        saBuildType = manmy;
+      }
+      else if (strcmp(argv[argi], "-larsson") == 0) {
+        saBuildType = larsson;
+      }
+      else if (strcmp(argv[argi], "-mcilroy") == 0) {
+        saBuildType = mcilroy;
+      }
+      else if (strcmp(argv[argi], "-slow") == 0) {
+        saBuildType = slow;
+      }
+      else if (strcmp(argv[argi], "-kark") == 0) {
+        saBuildType = kark;
+      }
+      else if (strcmp(argv[argi], "-mafe") == 0) {
+        saBuildType = mafe;
+      }
+      else if (strcmp(argv[argi], "-welter") == 0) {
+        saBuildType = welter;
+      }
+      else if (strcmp(argv[argi], "-welterweight") == 0) {
         if (argi < argc-1) {
           diffCoverSize = atoi(argv[++argi]);
         }
@@ -100,28 +106,34 @@ int main(int argc, char* argv[]) {
           cout << "Please specify a difference cover size.  Valid values are 7,32,64,111, and 2281.  Larger values use less memory but may be slower." << endl;
           exit(1);
         }
-        if ( ! (diffCoverSize == 7 or 
+        if ( ! (diffCoverSize == 7 or
                 diffCoverSize == 32 or
-                diffCoverSize == 64 or 
+                diffCoverSize == 64 or
                 diffCoverSize == 111 or
                 diffCoverSize == 2281) ) {
           cout << "The difference cover size must be one of 7,32,64,111, or 2281." << endl;
           cout << "Larger numbers use less space but are more slow." << endl;
           exit(1);
         }
-			}
-			else if (strcmp(argv[argi], "-4bit") == 0) {
-				read4BitCompressed = 1;
-			}
-			else {
-				PrintUsage();
-				cout << "ERROR, bad option: " << argv[argi] << endl;
-				exit(1);
-			}
-		}
-		++argi;
-	}
-  
+      }
+      else if (strcmp(argv[argi], "-4bit") == 0) {
+        read4BitCompressed = 1;
+      }
+      else if (strcmp(argv[argi], "-h") == 0 or
+               strcmp(argv[argi], "-help") == 0 or
+               strcmp(argv[argi], "--help") == 0) {
+        PrintUsage();
+        exit(0);
+      }
+      else {
+        PrintUsage();
+        cout << "ERROR, bad option: " << argv[argi] << endl;
+        exit(1);
+      }
+    }
+    ++argi;
+  }
+
   if (inFiles.size() == 0) {
     //
     // Special use case: the input file is a fasta file.  Write to that file + .sa
@@ -129,46 +141,46 @@ int main(int argc, char* argv[]) {
     inFiles.push_back(saFile);
     saFile = saFile + ".sa";
   }
-  
-	VectorIndex inFileIndex;
-	FASTASequence seq;
-	CompressedSequence<FASTASequence> compSeq;
-
-	if (read4BitCompressed == 0) {
-		for (inFileIndex = 0; inFileIndex < inFiles.size(); ++inFileIndex) {
-			FASTAReader reader;
-			reader.Init(inFiles[inFileIndex]);
-			reader.SetSpacePadding(111);
-			if (saBuildType == kark) {
-				//
-				// The Karkkainen sa building method requires a little extra
-				// space at the end of the dna sequence so that counting may
-				// be done mod 3 without adding extra logic for boundaries.
-				//
-			}
-  
-			if (inFileIndex == 0) {
-				reader.ReadAllSequencesIntoOne(seq);
-				reader.Close();
-			}
-			else {
-				while(reader.ConcatenateNext(seq)) {
-					cout << "added " << seq.title << endl;
-				}
-			}
-		}
-		seq.ToThreeBit();
-		//seq.ToUpper();
-	}
-	else {
-		assert(inFiles.size() == 1);
-		cout << "reading compressed sequence." << endl;
-		compSeq.Read(inFiles[0]);
-		seq.seq = compSeq.seq;
-		seq.length = compSeq.length;
-		compSeq.RemoveCompressionCounts();
-		cout << "done." << endl;
-	}
+
+  VectorIndex inFileIndex;
+  FASTASequence seq;
+  CompressedSequence<FASTASequence> compSeq;
+
+  if (read4BitCompressed == 0) {
+    for (inFileIndex = 0; inFileIndex < inFiles.size(); ++inFileIndex) {
+      FASTAReader reader;
+      reader.Init(inFiles[inFileIndex]);
+      reader.SetSpacePadding(111);
+      if (saBuildType == kark) {
+        //
+        // The Karkkainen sa building method requires a little extra
+        // space at the end of the dna sequence so that counting may
+        // be done mod 3 without adding extra logic for boundaries.
+        //
+      }
+
+      if (inFileIndex == 0) {
+        reader.ReadAllSequencesIntoOne(seq);
+        reader.Close();
+      }
+      else {
+        while(reader.ConcatenateNext(seq)) {
+          cout << "added " << seq.title << endl;
+        }
+      }
+    }
+    seq.ToThreeBit();
+    //seq.ToUpper();
+  }
+  else {
+    assert(inFiles.size() == 1);
+    cout << "reading compressed sequence." << endl;
+    compSeq.Read(inFiles[0]);
+    seq.seq = compSeq.seq;
+    seq.length = compSeq.length;
+    compSeq.RemoveCompressionCounts();
+    cout << "done." << endl;
+  }
 
   //
   // For now, do not allow creation of suffix arrays on sequences > 4G.
@@ -179,53 +191,53 @@ int main(int argc, char* argv[]) {
     cout << "against each file, and merging the result." << endl;
     exit(1);
   }
-	vector<int> alphabet;
-	
-	SuffixArray<Nucleotide, vector<int> >  sa;
-	//	sa.InitTwoBitDNAAlphabet(alphabet);
-	//	sa.InitAsciiCharDNAAlphabet(alphabet);
+  vector<int> alphabet;
+
+  SuffixArray<Nucleotide, vector<int> >  sa;
+  //  sa.InitTwoBitDNAAlphabet(alphabet);
+  //  sa.InitAsciiCharDNAAlphabet(alphabet);
   sa.InitThreeBitDNAAlphabet(alphabet);
 
-	if (saBuildType == manmy) {
-		sa.MMBuildSuffixArray(seq.seq, seq.length, alphabet);
-	}
-	else if (saBuildType == mcilroy) {
-		sa.index = new SAIndex[seq.length+1];
-		DNALength i;
-		for (i = 0; i < seq.length; i++) { sa.index[i] = seq.seq[i] + 1;}
-		sa.index[seq.length] = 0;
-		ssort(sa.index, NULL);
-		for (i = 1; i < seq.length+1; i++ ){ sa.index[i-1] = sa.index[i];};
-		sa.length = seq.length;
-	}
-	else if (saBuildType == larsson) {
-		sa.LarssonBuildSuffixArray(seq.seq, seq.length, alphabet);
-	}
-	else if (saBuildType == kark) {
-		sa.index = new SAIndex[seq.length];
-		seq.ToThreeBit();
-		DNALength p;
-		for (p = 0; p < seq.length; p++ ){ seq.seq[p]++; }
-		KarkkainenBuildSuffixArray<Nucleotide>(seq.seq, sa.index, seq.length, 5);
-		sa.length = seq.length;
-	}
-	else if (saBuildType == mafe) {
-		//		sa.MaFeBuildSuffixArray(seq.seq, seq.length);
-		
-	}
-	else if (saBuildType == welter) {
-		if (diffCoverSize == 0) {
-			sa.LightweightBuildSuffixArray(seq.seq, seq.length);
-		}
-		else {
-			sa.LightweightBuildSuffixArray(seq.seq, seq.length, diffCoverSize);
-		}
-	}
-	if (doBLT) {
-		sa.BuildLookupTable(seq.seq, seq.length, bltPrefixLength);
-	}
-	sa.Write(saFile);
-
-	return 0;
+  if (saBuildType == manmy) {
+    sa.MMBuildSuffixArray(seq.seq, seq.length, alphabet);
+  }
+  else if (saBuildType == mcilroy) {
+    sa.index = new SAIndex[seq.length+1];
+    DNALength i;
+    for (i = 0; i < seq.length; i++) { sa.index[i] = seq.seq[i] + 1;}
+    sa.index[seq.length] = 0;
+    ssort(sa.index, NULL);
+    for (i = 1; i < seq.length+1; i++ ){ sa.index[i-1] = sa.index[i];};
+    sa.length = seq.length;
+  }
+  else if (saBuildType == larsson) {
+    sa.LarssonBuildSuffixArray(seq.seq, seq.length, alphabet);
+  }
+  else if (saBuildType == kark) {
+    sa.index = new SAIndex[seq.length];
+    seq.ToThreeBit();
+    DNALength p;
+    for (p = 0; p < seq.length; p++ ){ seq.seq[p]++; }
+    KarkkainenBuildSuffixArray<Nucleotide>(seq.seq, sa.index, seq.length, 5);
+    sa.length = seq.length;
+  }
+  else if (saBuildType == mafe) {
+    //    sa.MaFeBuildSuffixArray(seq.seq, seq.length);
+
+  }
+  else if (saBuildType == welter) {
+    if (diffCoverSize == 0) {
+      sa.LightweightBuildSuffixArray(seq.seq, seq.length);
+    }
+    else {
+      sa.LightweightBuildSuffixArray(seq.seq, seq.length, diffCoverSize);
+    }
+  }
+  if (doBLT) {
+    sa.BuildLookupTable(seq.seq, seq.length, bltPrefixLength);
+  }
+  sa.Write(saFile);
+
+  return 0;
 
 }
diff --git a/utils/SDPMatcher.cpp b/utils/SDPMatcher.cpp
index d35a7d6..83195e1 100644
--- a/utils/SDPMatcher.cpp
+++ b/utils/SDPMatcher.cpp
@@ -3,17 +3,17 @@
 #include <assert.h>
 #include <math.h>
 
-#include "defs.h"
-#include "FASTAReader.hpp"
-#include "FASTASequence.hpp"
-#include "tuples/DNATuple.hpp"
-#include "tuples/TupleMetrics.hpp"
-#include "datastructures/alignment/Path.h"
-#include "datastructures/alignment/Alignment.hpp"
-#include "algorithms/alignment/AlignmentUtils.hpp"
-#include "format/StickAlignmentPrinter.hpp"
-#include "algorithms/alignment/SWAlign.hpp"
-#include "algorithms/alignment/SDPAlign.hpp"
+#include <defs.h>
+#include <FASTAReader.hpp>
+#include <FASTASequence.hpp>
+#include <tuples/DNATuple.hpp>
+#include <tuples/TupleMetrics.hpp>
+#include <datastructures/alignment/Path.h>
+#include <datastructures/alignment/Alignment.hpp>
+#include <algorithms/alignment/AlignmentUtils.hpp>
+#include <format/StickAlignmentPrinter.hpp>
+#include <algorithms/alignment/SWAlign.hpp>
+#include <algorithms/alignment/SDPAlign.hpp>
 
 /* 
  * Performs sparse dynamic programming (SDP) between pairs of sequences as they 
@@ -43,12 +43,14 @@ int main(int argc, char* argv[]) {
     int indel = 3;
     int match = 0;
     int printSW = 0;
+    int printSimilarity = 0;
     int refineAlignments = 1;
     int showalign = 0;
     int fixedTarget = 0;
     int sdpIndel = indel;
     int sdpIns = 5;
     int sdpDel = 5;
+    (void)(sdpIndel); (void)(sdpIns); (void)(sdpDel); // not yet used.
     AlignmentType alignType = Global;
     while (argi < argc) {
         if (strcmp(argv[argi], "-indelRate") == 0) {
@@ -85,6 +87,9 @@ int main(int argc, char* argv[]) {
         else if (strcmp(argv[argi], "-fixedtarget") == 0) {
             fixedTarget = 1;
         }
+        else if (strcmp(argv[argi], "-printSimilarity") == 0) {
+            printSimilarity = 1;
+        }
         else {
             PrintUsage();
             cout << "Bad option: " << argv[argi] << endl;
@@ -119,7 +124,10 @@ int main(int argc, char* argv[]) {
         targetReader.GetNext(target);
     }
 
-    cout << "qid,tid,qstart,qend,qlen,tstart,tend,tlen,score" << endl;
+    cout << "qid,tid,qstart,qend,qlen,tstart,tend,tlen,score";
+    if (printSimilarity) cout << ",pctSimilarity";
+    cout << endl;
+    
     while (queryReader.GetNext(query) and 
            (fixedTarget or targetReader.GetNext(target))) {
         
@@ -137,6 +145,8 @@ int main(int argc, char* argv[]) {
                               false,
                               0);
 
+        ComputeAlignmentStats(alignment, query.seq, target.seq, distScoreFn);
+
         if (alignScore > 0){ // in rare cases the SDP returns positive. 
             alignScore = 0;  // this makes it more like a true local alignment
         }                   
@@ -157,7 +167,10 @@ int main(int argc, char* argv[]) {
              << alignment.qPos << "," << alignment.QEnd()   << "," 
              << query.length  << "," << alignment.tPos << "," 
              << alignment.TEnd()   << "," << target.length << "," 
-             << alignScore << endl;
+             << alignScore;
+        if (printSimilarity) 
+             cout << "," << alignment.pctSimilarity;
+        cout << endl;
 
         ++seqIndex;
     }
diff --git a/utils/SamFilter.cpp b/utils/SamFilter.cpp
index a6a747e..c484618 100644
--- a/utils/SamFilter.cpp
+++ b/utils/SamFilter.cpp
@@ -28,29 +28,29 @@
 
 #include <iostream>
 
-#include "FASTASequence.hpp"
-#include "FASTAReader.hpp"
-#include "CommandLineParser.hpp"
-#include "ChangeListID.hpp"
-#include "utils/TimeUtils.hpp"
-#include "utils/RangeUtils.hpp"
-#include "utils/SMRTReadUtils.hpp"
-#include "algorithms/alignment/DistanceMatrixScoreFunction.hpp"
-#include "algorithms/alignment/AlignmentUtils.hpp"
-#include "algorithms/alignment/StringToScoreMatrix.hpp"
-#include "sam/SAMReader.hpp"
-#include "format/SAMPrinter.hpp"
-#include "datastructures/alignment/AlignmentCandidate.hpp"
-#include "datastructures/alignment/FilterCriteria.hpp"
-#include "metagenome/TitleTable.hpp"
-#include "datastructures/alignment/SAMToAlignmentCandidateAdapter.hpp"
-#include "GFFFile.hpp"
-#include "defs.h"
-#include "RegisterFilterOptions.h"
+#include <FASTASequence.hpp>
+#include <FASTAReader.hpp>
+#include <CommandLineParser.hpp>
+#include <ChangeListID.hpp>
+#include <utils/TimeUtils.hpp>
+#include <utils/RangeUtils.hpp>
+#include <utils/SMRTReadUtils.hpp>
+#include <algorithms/alignment/DistanceMatrixScoreFunction.hpp>
+#include <algorithms/alignment/AlignmentUtils.hpp>
+#include <algorithms/alignment/StringToScoreMatrix.hpp>
+#include <sam/SAMReader.hpp>
+#include <format/SAMPrinter.hpp>
+#include <datastructures/alignment/AlignmentCandidate.hpp>
+#include <datastructures/alignment/FilterCriteria.hpp>
+#include <metagenome/TitleTable.hpp>
+#include <datastructures/alignment/SAMToAlignmentCandidateAdapter.hpp>
+#include <GFFFile.hpp>
+#include <defs.h>
+#include "../iblasr/RegisterFilterOptions.h"
 
 //#define USE_GOOGLE_PROFILER
 #ifdef USE_GOOGLE_PROFILER
-#include "gperftools/profiler.h"
+#include <gperftools/profiler.h>
 #endif
 
 char VERSION[] = "v0.1.0";
@@ -159,7 +159,7 @@ void ConvertTitlesToTitleTableIndices(vector<FASTASequence> & references,
         string & titleTableName) {
     TitleTable tt;
     tt.Read(titleTableName);
-    for(int i = 0; i < references.size(); i++) {
+    for(size_t i = 0; i < references.size(); i++) {
         string title = references[i].GetTitle();
         int idx = -1;
         if (tt.Lookup(title, idx)) {
@@ -201,7 +201,7 @@ bool CheckAdapterOnly(GFFFile & adapterGffFile, //Adapter gff file
     // Reconstruct ref id in the format "ref00000?".
     string refNameId(buf);
     int FUZZY_OVERLAP = 20;
-    for(int eindex = 0; eindex < adapterGffFile.entries.size();
+    for(size_t eindex = 0; eindex < adapterGffFile.entries.size();
             eindex++) { 
         GFFEntry & entry = adapterGffFile.entries[eindex];
         // Convert each GFF record from 1-based inclusive to 
@@ -218,7 +218,7 @@ bool CheckAdapterOnly(GFFFile & adapterGffFile, //Adapter gff file
             }
             if (not (eend < alignment.GenomicTBegin() or
                  estart > alignment.GenomicTEnd())) {
-                int lengthUnion = max(eend, alignment.GenomicTEnd()) -
+                UInt lengthUnion = max(eend, alignment.GenomicTEnd()) -
                                   min(estart, alignment.GenomicTBegin());
                 if (lengthUnion < eend - estart + FUZZY_OVERLAP) {
                     return true;
@@ -350,11 +350,9 @@ int main(int argc, char* argv[]) {
     }
 
     // Open output file.
-    ostream * outFilePtr = &cout;
 	ofstream outFileStrm;
 	if (outFileName != "") {
 		CrucialOpen(outFileName, outFileStrm, std::ios::out);
-		outFilePtr = &outFileStrm;
 	}
     
     GFFFile adapterGffFile;
@@ -405,7 +403,7 @@ int main(int argc, char* argv[]) {
     clp.CommandLineToString(argc, argv, commandLineString);
     allHeaders.push_back("@PG\tID:SAMFILTER\tVN:" + versionString + \
                          "\tCL:" + program + " " + commandLineString);
-    for (int i = 0; i < allHeaders.size(); i++) {
+    for (size_t i = 0; i < allHeaders.size(); i++) {
         outFileStrm << allHeaders[i] << endl;
     }
 
@@ -419,7 +417,7 @@ int main(int argc, char* argv[]) {
 
     // Map reference name obtained from SAM file to indices
     map<string, int> refNameToIndex;
-    for (int i = 0; i < references.size(); i++) {
+    for (size_t i = 0; i < references.size(); i++) {
         string refName = alignmentSet.references[i].GetSequenceName();
         refNameToIndex[refName] = i;
     }
@@ -428,7 +426,7 @@ int main(int argc, char* argv[]) {
     // Store the alignments.
     //
     SAMAlignment samAlignment;
-    int alignIndex = 0; 
+    size_t alignIndex = 0; 
 
     //
     // For 150K, each chip produces about 300M sequences 
diff --git a/utils/SamToCmpH5.cpp b/utils/SamToCmpH5.cpp
index e6219e0..48cca7d 100644
--- a/utils/SamToCmpH5.cpp
+++ b/utils/SamToCmpH5.cpp
@@ -1,16 +1,16 @@
 #include <iostream>
 
-#include "datastructures/alignment/AlignmentCandidate.hpp"
-#include "sam/SAMReader.hpp"
-#include "format/StickAlignmentPrinter.hpp"
-#include "HDFCmpFile.hpp"
-#include "FASTASequence.hpp"
-#include "FASTAReader.hpp"
-#include "CommandLineParser.hpp"
-#include "datastructures/alignmentset/AlignmentSetToCmpH5Adapter.hpp"
-#include "datastructures/alignment/SAMToAlignmentCandidateAdapter.hpp"
-#include "ChangeListID.hpp"
-#include "utils/TimeUtils.hpp"
+#include <datastructures/alignment/AlignmentCandidate.hpp>
+#include <sam/SAMReader.hpp>
+#include <format/StickAlignmentPrinter.hpp>
+#include <HDFCmpFile.hpp>
+#include <FASTASequence.hpp>
+#include <FASTAReader.hpp>
+#include <CommandLineParser.hpp>
+#include <datastructures/alignmentset/AlignmentSetToCmpH5Adapter.hpp>
+#include <datastructures/alignment/SAMToAlignmentCandidateAdapter.hpp>
+#include <ChangeListID.hpp>
+#include <utils/TimeUtils.hpp>
 
 char VERSION[] = "v1.0.0";
 char PERFORCE_VERSION_STRING[] = "$Change: 141782 $";
@@ -135,7 +135,7 @@ int main(int argc, char* argv[]) {
   // The SAM convention uppercases and normalizes before computing the MD5. 
   // For cmp.h5, we compute the MD5 on the sequence 'as is'.
   // 
-  for(int i = 0; i < alignmentSet.references.size(); i++) {
+  for(size_t i = 0; i < alignmentSet.references.size(); i++) {
       MakeMD5((const char*)&references[i].seq[0], 
               (unsigned int)references[i].length, alignmentSet.references[i].md5);
   }
@@ -147,7 +147,7 @@ int main(int argc, char* argv[]) {
   map<string, string>::iterator it;
   assert(references.size() == alignmentSet.references.size());
   if (!useShortRefName) {
-      for (int i = 0; i < references.size(); i++) {
+      for (size_t i = 0; i < references.size(); i++) {
           string shortRefName = alignmentSet.references[i].GetSequenceName();
           string fullRefName(references[i].title); 
           if (shortRefNameToFull.find(shortRefName) != shortRefNameToFull.end()) {
@@ -198,8 +198,7 @@ int main(int argc, char* argv[]) {
     // o.w., the value will be assigned as moleculeID.
     alignmentSetAdapter.StoreAlignmentCandidateList(convertedAlignments, cmpFile, -1, copyQVs);
 
-    int a;
-    for (a = 0; a < convertedAlignments.size(); a++) {
+    for (size_t a = 0; a < convertedAlignments.size(); a++) {
       convertedAlignments[a].FreeSubsequences();
     }
   }
diff --git a/utils/SamToM4.cpp b/utils/SamToM4.cpp
index 6933688..375b19f 100644
--- a/utils/SamToM4.cpp
+++ b/utils/SamToM4.cpp
@@ -16,16 +16,16 @@
  * =====================================================================================
  */
 #include <iostream>
-#include "FASTASequence.hpp"
-#include "FASTAReader.hpp"
-#include "CommandLineParser.hpp"
-#include "ChangeListID.hpp"
-#include "algorithms/alignment/DistanceMatrixScoreFunction.hpp"
-#include "algorithms/alignment/AlignmentUtils.hpp"
-#include "sam/SAMReader.hpp"
-#include "format/IntervalPrinter.hpp"
-#include "datastructures/alignment/AlignmentCandidate.hpp"
-#include "datastructures/alignment/SAMToAlignmentCandidateAdapter.hpp"
+#include <FASTASequence.hpp>
+#include <FASTAReader.hpp>
+#include <CommandLineParser.hpp>
+#include <ChangeListID.hpp>
+#include <algorithms/alignment/DistanceMatrixScoreFunction.hpp>
+#include <algorithms/alignment/AlignmentUtils.hpp>
+#include <sam/SAMReader.hpp>
+#include <format/IntervalPrinter.hpp>
+#include <datastructures/alignment/AlignmentCandidate.hpp>
+#include <datastructures/alignment/SAMToAlignmentCandidateAdapter.hpp>
 
 char VERSION[] = "v0.1.0";
 char PERFORCE_VERSION_STRING[] = "$Change: 126414 $";
@@ -108,7 +108,7 @@ int main(int argc, char* argv[]) {
     map<string, string>::iterator it;
     assert(references.size() == alignmentSet.references.size());
     if (!useShortRefName) {
-        for (int i = 0; i < references.size(); i++) {
+        for (size_t i = 0; i < references.size(); i++) {
             string shortRefName = alignmentSet.references[i].GetSequenceName();
             string fullRefName(references[i].title); 
             if (shortRefNameToFull.find(shortRefName) != shortRefNameToFull.end()) {
@@ -122,7 +122,7 @@ int main(int argc, char* argv[]) {
 
     // Map reference name obtained from SAM file to indices
     map<string, int> refNameToIndex;
-    for (int i = 0; i < references.size(); i++) {
+    for (size_t i = 0; i < references.size(); i++) {
         string refName = alignmentSet.references[i].GetSequenceName();
         refNameToIndex[refName] = i;
     }
@@ -131,7 +131,7 @@ int main(int argc, char* argv[]) {
     // Store the alignments.
     //
     SAMAlignment samAlignment;
-    int alignIndex = 0; 
+    size_t alignIndex = 0; 
 
     //
     // For 150K, each chip produces about 300M sequences 
diff --git a/utils/ToAfg.cpp b/utils/ToAfg.cpp
index fd63879..5fa334f 100644
--- a/utils/ToAfg.cpp
+++ b/utils/ToAfg.cpp
@@ -3,16 +3,16 @@
 #include <string>
 #include <vector>
 
-#include "HDFPlsReader.hpp"
-#include "amos/AfgBasWriter.hpp"
-#include "HDFRegionTableReader.hpp"
-#include "reads/RegionTable.hpp"
-#include "reads/ReadInterval.hpp"
-#include "files/ReaderAgglomerate.hpp"
-#include "utils/FileOfFileNames.hpp"
-#include "utils/RegionUtils.hpp"
-#include "SMRTSequence.hpp"
-#include "utils.hpp"
+#include <HDFPlsReader.hpp>
+#include <amos/AfgBasWriter.hpp>
+#include <HDFRegionTableReader.hpp>
+#include <reads/RegionTable.hpp>
+#include <reads/ReadInterval.hpp>
+#include <files/ReaderAgglomerate.hpp>
+#include <utils/FileOfFileNames.hpp>
+#include <utils/RegionUtils.hpp>
+#include <SMRTSequence.hpp>
+#include <utils.hpp>
 
 using namespace std;
 void PrintUsage() {
@@ -88,7 +88,6 @@ int main(int argc, char* argv[]) {
 
     ofstream fastaOut;
     CrucialOpen(outputFileName, fastaOut);
-    int plsFileIndex;
     HDFRegionTableReader hdfRegionReader;
     AfgBasWriter afgWriter;
     if (useUniformQV){
@@ -97,7 +96,7 @@ int main(int argc, char* argv[]) {
 
     afgWriter.Initialize(outputFileName);
 
-    for (plsFileIndex = 0; plsFileIndex < inputFileNames.size(); plsFileIndex++) {
+    for (size_t plsFileIndex = 0; plsFileIndex < inputFileNames.size(); plsFileIndex++) {
         if (splitSubreads) {
             hdfRegionReader.Initialize(regionFileNames[plsFileIndex]);
             hdfRegionReader.ReadTable(regionTable);
@@ -113,19 +112,18 @@ int main(int argc, char* argv[]) {
         reader.Initialize(inputFileNames[plsFileIndex]);
         CCSSequence seq; 
         int seqIndex = 0;
-        int numRecords = 0;
         vector<ReadInterval> subreadIntervals;
         while (reader.GetNext(seq)){ 
             ++seqIndex;
 
             if (useUniformQV && seq.qual.data != NULL){
-                for (int qvIndex = 0; qvIndex < seq.length; qvIndex++){
+                for (DNALength qvIndex = 0; qvIndex < seq.length; qvIndex++){
                     seq.qual[qvIndex] = uniformQV;
                 }
             }
 
             if (splitSubreads == false) {
-                if (seq.length >= minSubreadLength) {
+                if (seq.length >= static_cast<DNALength>(minSubreadLength)) {
                     afgWriter.Write(seq);
                 }
                 seq.Free();
@@ -151,16 +149,16 @@ int main(int argc, char* argv[]) {
             }
 
 
-            for (int intvIndex = 0; intvIndex < subreadIntervals.size(); intvIndex++) {
+            for (size_t intvIndex = 0; intvIndex < subreadIntervals.size(); intvIndex++) {
                 SMRTSequence subreadSequence;
                 
-                int subreadStart = subreadIntervals[intvIndex].start > hqReadStart ? 
-                                   subreadIntervals[intvIndex].start : hqReadStart;
-                int subreadEnd   = subreadIntervals[intvIndex].end < hqReadEnd ?
-                                   subreadIntervals[intvIndex].end : hqReadEnd;
-                int subreadLength = subreadEnd - subreadStart;
+                DNALength subreadStart = static_cast<DNALength>(subreadIntervals[intvIndex].start) > hqReadStart ? 
+                                   static_cast<DNALength>(subreadIntervals[intvIndex].start) : hqReadStart;
+                DNALength subreadEnd   = static_cast<DNALength>(subreadIntervals[intvIndex].end) < hqReadEnd ?
+                                   static_cast<DNALength>(subreadIntervals[intvIndex].end) : hqReadEnd;
+                DNALength subreadLength = subreadEnd - subreadStart;
 
-                if (subreadLength < minSubreadLength) continue;
+                if (subreadLength < DNALength(minSubreadLength)) continue;
 
                 subreadSequence.SubreadStart(subreadStart);
                 subreadSequence.SubreadEnd  (subreadEnd);
diff --git a/utils/bam2bax/BUILD.txt b/utils/bam2bax/BUILD.txt
new file mode 100644
index 0000000..09539dc
--- /dev/null
+++ b/utils/bam2bax/BUILD.txt
@@ -0,0 +1,48 @@
+Build instructions for developers:
+Assuming that blasr and blaser_libcpp is placed under //depot/software/smrtanalysis/bioinformatics/ext/pi
+
+  $ cd <bam2bax>
+  $ module load boost
+  $ mkdir build; cd build; cmake ..
+  $ make
+  $ ../tests/bin/test_bam2bax # to test bam2bax exe
+
+
+Build instructions for users:
+  $ cd <bam2bax>
+  $ mkdir build; cd build; 
+  $ cmake -DPacBioBAM_INCLUDE_DIRS=<path_to_include_dir> \
+      -DHTSLIB_INCLUDE_DIRS=<path_to_include_dir> \
+      -DPacBioBAM_LIBRARIES=<path_to_lib_so_or_a> \
+      -DHTSLIB_LIBRARIES=<path_to_lib_so_or_a> \
+      -DPBDATA_INCLUDE_DIRS=<path_to_include_dir> 
+      -DPBDATA_LIBRARIES=<path_to_lib_so_or_a> \
+      -DPBIHDF_INCLUDE_DIRS=<path_to_include_dir> 
+      -DPBIHDF_LIBRARIES=<path_to_lib_so_or_a> \
+      -DBLASR_INCLUDE_DIRS=<path_to_include_dir> 
+      -DBLASR_LIBRARIES=<path_to_lib_so_or_a> \
+      -DHDF5_INCLUDE_DIRS=<path_to_include_dir> 
+      -DHDF5_CPP_LIBRARIES=<path_to_lib_so_or_a> \
+      -DHDF5_LIBRARIES=<path_to_lib_so_or_a> \
+      -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
+      ../
+
+  $ make
+  $ ../tests/bin/test_bam2bax # to test bam2bax exe
+
+# e.g.,
+#cmake -DPacBioBAM_INCLUDE_DIRS=$smrtanalysis/bioinformatics/lib/cpp/pbbam/include \
+#      -DHTSLIB_INCLUDE_DIRS=$smrtanalysis/bioinformatics/lib/cpp/htslib \
+#      -DPacBioBAM_LIBRARIES=$smrtanalysis/bioinformatics/lib/cpp/pbbam/lib/libpbbam.a \
+#      -DHTSLIB_LIBRARIES=$smrtanalysis/bioinformatics/lib/cpp/htslib/libhts.a \
+#      -DPBDATA_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata \
+#      -DPBDATA_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/pbdata/libpbdata.a \
+#      -DPBIHDF_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf \
+#      -DPBIHDF_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/hdf/libpbihdf.a \
+#      -DBLASR_INCLUDE_DIRS=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/ \
+#      -DBLASR_LIBRARIES=$smrtanalysis/bioinformatics/ext/pi/blasr/libcpp/alignment/libblasr.a \
+#      -DHDF5_INCLUDE_DIRS=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/include \
+#      -DHDF5_CPP_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5_cpp.a \
+#      -DHDF5_LIBRARIES=$smrtanalysis/prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404/lib/libhdf5.a \
+#      -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
+#      ../
diff --git a/utils/bam2bax/CMakeLists.txt b/utils/bam2bax/CMakeLists.txt
new file mode 100644
index 0000000..09e5990
--- /dev/null
+++ b/utils/bam2bax/CMakeLists.txt
@@ -0,0 +1,133 @@
+########################################################################
+# CMake build script for Bam2Bax executable.
+########################################################################
+
+project(Bam2Bax CXX C)
+cmake_minimum_required(VERSION 2.8)
+
+# project version
+set(Bam2Bax_MAJOR_VERSION 0)
+set(Bam2Bax_MINOR_VERSION 0)
+set(Bam2Bax_PATCH_VERSION 1)
+set(Bam2Bax_VERSION
+  "${Bam2Bax_MAJOR_VERSION}.${Bam2Bax_MINOR_VERSION}.${Bam2Bax_PATCH_VERSION}"
+)
+
+# build-time options
+option(Bam2Bax_build_tests "Build Bam2Bax's unit tests." ON)
+
+# main project paths
+set(Bam2Bax_RootDir       ${Bam2Bax_SOURCE_DIR})
+set(Bam2Bax_DocsDir       ${Bam2Bax_RootDir}/docs)
+set(Bam2Bax_SourceDir     ${Bam2Bax_RootDir}/src)
+set(Bam2Bax_TestsDir      ${Bam2Bax_RootDir}/tests)
+set(Bam2Bax_ThirdPartyDir ${Bam2Bax_RootDir}/third-party)
+
+if (NOT Bam2Bax_OutputDir)
+    set(Bam2Bax_OutputDir ${Bam2Bax_RootDir})
+endif()
+
+set(Bam2Bax_BinDir        ${Bam2Bax_OutputDir}/bin)
+file(MAKE_DIRECTORY       ${Bam2Bax_BinDir})
+
+# shared & third-party paths
+
+if (NOT PBDATA_ROOT_DIR)
+    set(PBDATA_ROOT_DIR ${Bam2Bax_RootDir}/../../../blasr_libcpp)
+endif()
+
+# find (existing) libraries needed by executable and tests
+if (NOT BLASR_INCLUDE_DIRS OR NOT BLASR_LIBRARIES)
+    find_library(BLASR_LIBRARIES    blasr    ${PBDATA_ROOT_DIR}/alignment)
+    set(BLASR_INCLUDE_DIRS ${PBDATA_ROOT_DIR}/alignment)
+endif()
+
+if (NOT PBIHDF_INCLUDE_DIRS OR NOT PBIHDF_LIBRARIES)
+    find_library(PBIHDF_LIBRARIES   pbihdf   ${PBDATA_ROOT_DIR}/hdf)
+    set(PBIHDF_INCLUDE_DIRS ${PBDATA_ROOT_DIR}/hdf)
+endif()
+
+if (NOT PBDATA_INCLUDE_DIRS OR NOT PBDATA_LIBRARIES)
+    find_library(PBDATA_LIBRARIES   pbdata   ${PBDATA_ROOT_DIR}/pbdata)
+    set(PBDATA_INCLUDE_DIRS ${PBDATA_ROOT_DIR}/pbdata)
+endif()
+
+if (NOT HDF5_INCLUDE_DIRS OR NOT HDF5_LIBRARIES)
+    if (NOT HDF5_RootDir)
+        set(HDF5_RootDir ${Bam2Bax_RootDir}/../../../../../../prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404)
+    endif()
+
+    set(HDF5_INCLUDE_DIRS ${HDF5_RootDir}/include)
+    set(HDF5_LibDir       ${HDF5_RootDir}/lib)
+
+    find_library(HDF5_LIBRARIES     hdf5     ${HDF5_LibDir} NO_CMAKE_SYSTEM_PATH)
+    find_library(HDF5_CPP_LIBRARIES hdf5_cpp ${HDF5_LibDir} NO_CMAKE_SYSTEM_PATH)
+endif()
+
+if (NOT PacBioBAM_INCLUDE_DIRS OR NOT PacBioBAM_LIBRARIES
+    OR NOT HTSLIB_INCLUDE_DIRS OR NOT HTSLIB_LIBRARIES)
+    set(PacBioBAM_RootDir ${Bam2Bax_RootDir}/../../../../../lib/cpp/pbbam)
+    add_subdirectory(${PacBioBAM_RootDir} external/build/pbbam)
+endif()
+
+if (NOT Boost_INCLUDE_DIRS)
+    find_package(Boost REQUIRED)
+endif()
+
+if (NOT ZLIB_LIBRARIES OR NOT ZLIB_INCLUDE_DIRS)
+    find_package(ZLIB REQUIRED)
+endif()
+
+# shared CXX flags for src & tests
+include(CheckCXXCompilerFlag)
+set(Bam2Bax_CXX_FLAGS "-g -std=c++11 -Wall")
+
+# quash warnings from pbdata
+check_cxx_compiler_flag("-Wno-overloaded-virtual" HAS_NO_OVERLOADED_VIRTUAL)
+if(HAS_NO_OVERLOADED_VIRTUAL)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-overloaded-virtual")
+endif()
+#check_cxx_compiler_flag("-Wno-unused-private-field" HAS_NO_UNUSED_PRIVATE_FIELD)
+#if(HAS_NO_UNUSED_PRIVATE_FIELD)
+#    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-unused-private-field")
+#endif()
+check_cxx_compiler_flag("-Wno-unused-variable" HAS_NO_UNUSED_VARIABLE)
+if(HAS_NO_UNUSED_VARIABLE)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-unused-variable")
+endif()
+check_cxx_compiler_flag("-Wno-uninitialized" HAS_NO_UNINITIALIZED)
+if(HAS_NO_UNINITIALIZED)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-uninitialized")
+endif()
+check_cxx_compiler_flag("-Wunused-but-set-variable" HAS_UNUSED_BUT_SET_VARIABLE)
+if(HAS_UNUSED_BUT_SET_VARIABLE)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wunused-but-set-variable")
+endif()
+check_cxx_compiler_flag("-Wno-deprecated-declarations" HAS_NO_DEPRECATED_DECLARATIONS)
+if(HAS_NO_DEPRECATED_DECLARATIONS)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-deprecated-declarations")
+endif()
+# NOTE: -Wno-unused-local-typedefs used to quash clang warnings w/ Boost
+check_cxx_compiler_flag("-Wno-unused-local-typedef" HAS_NO_UNUSED_LOCAL_TYPEDEF)
+if(HAS_NO_UNUSED_LOCAL_TYPEDEF)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-unused-local-typedef")
+endif()
+
+SET(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${Bam2Bax_LINKER_FLAGS}" )
+
+# main exe src
+add_subdirectory(src)
+
+# testing
+if(Bam2Bax_build_tests)
+
+    enable_testing()
+    
+    if (NOT GTEST_SRC_DIR)
+        set(GTEST_SRC_DIR ../gtest)
+    endif()
+
+    add_subdirectory(${GTEST_SRC_DIR} external/gtest/build)
+    add_subdirectory(tests)
+endif()
+
diff --git a/utils/bam2bax/README.md b/utils/bam2bax/README.md
new file mode 100644
index 0000000..62bbe17
--- /dev/null
+++ b/utils/bam2bax/README.md
@@ -0,0 +1,11 @@
+#bam2bax#
+
+##Usage##
+
+    bam2bax movie.subreads.bam movie.scrapes.bam -o movie
+   
+    # movie.bax.h5 will be generated.
+
+##Example##
+
+    tests/example/end-to-end.sh
diff --git a/utils/bam2bax/makefile b/utils/bam2bax/makefile
new file mode 100644
index 0000000..f4b4245
--- /dev/null
+++ b/utils/bam2bax/makefile
@@ -0,0 +1,29 @@
+.PHONY=all
+
+SRCDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
+-include ${CURDIR}/../../defines.mk
+include ${SRCDIR}/../../rules.mk
+
+all: ${CURDIR}/src/*.cpp ${CURDIR}/src/*.h  ${CURDIR}/tests/src/*.cpp ${CURDIR}/tests/src/*.h
+	@mkdir -p ${CURDIR}/build && \
+	 cd ${CURDIR}/build && \
+		cmake -DPacBioBAM_INCLUDE_DIRS=${PBBAM_INC} \
+          -DHTSLIB_INCLUDE_DIRS=${HTSLIB_INC} \
+          -DPacBioBAM_LIBRARIES=${PBBAM_LIB}/libpbbam${SH_LIB_EXT} \
+          -DHTSLIB_LIBRARIES=${HTSLIB_LIB}/libhts${SH_LIB_EXT} \
+          -DPBDATA_INCLUDE_DIRS=${LIBPBDATA_INC} \
+          -DPBDATA_LIBRARIES=${LIBPBDATA_LIB}/libpbdata${SH_LIB_EXT} \
+          -DPBIHDF_INCLUDE_DIRS=${LIBPBIHDF_INC} \
+          -DPBIHDF_LIBRARIES=${LIBPBIHDF_LIB}/libpbihdf${SH_LIB_EXT} \
+          -DBLASR_INCLUDE_DIRS=${LIBBLASR_INC}/ \
+          -DBLASR_LIBRARIES=${LIBBLASR_LIB}/libblasr${SH_LIB_EXT} \
+          -DHDF5_INCLUDE_DIRS=${HDF5_INC} \
+          -DHDF5_CPP_LIBRARIES=${HDF5_LIB}/libhdf5_cpp${SH_LIB_EXT} \
+          -DHDF5_LIBRARIES=${HDF5_LIB}/libhdf5${SH_LIB_EXT} \
+          -DBam2Bax_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
+          ../ && \
+		make
+
+clean:
+	@rm -rf ${CURDIR}/bin/
+	@rm -rf ${CURDIR}/build
diff --git a/utils/bam2bax/src/Bam2Bax.cpp b/utils/bam2bax/src/Bam2Bax.cpp
new file mode 100644
index 0000000..323747d
--- /dev/null
+++ b/utils/bam2bax/src/Bam2Bax.cpp
@@ -0,0 +1,29 @@
+// Author: Yuan Li
+
+#include <unistd.h> // getcwd
+#include <iostream>
+#include <memory>
+
+#include "Bam2Bax.h"
+#include "Converter.h"
+
+using namespace std;
+
+int Bam2Bax::Run(Settings& settings) {
+
+    bool success = false;
+    Converter converter(settings);
+
+    if (converter.Run()) {
+        success = true;
+    }
+
+    // return success/fail
+    if (success)
+        return EXIT_SUCCESS;
+    else {
+        for (const string& e : converter.Errors())
+            cerr << "ERROR: " << e << endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/utils/bam2bax/src/Bam2Bax.h b/utils/bam2bax/src/Bam2Bax.h
new file mode 100644
index 0000000..f363723
--- /dev/null
+++ b/utils/bam2bax/src/Bam2Bax.h
@@ -0,0 +1,13 @@
+// Author: Yuan Li
+#ifndef BAM2BAX_H
+#define BAM2BAX_H
+
+class Settings;
+
+class Bam2Bax
+{
+public:
+    static int Run(Settings& settings);
+};
+
+#endif // BAM2BAX_H
diff --git a/utils/bam2bax/src/Bam2BaxConverter.h b/utils/bam2bax/src/Bam2BaxConverter.h
new file mode 100644
index 0000000..6e7160c
--- /dev/null
+++ b/utils/bam2bax/src/Bam2BaxConverter.h
@@ -0,0 +1,38 @@
+// Author: Yuan Li
+#ifndef _BAM2BAXCONVERTER_H_
+#define _BAM2BAXCONVERTER_H_
+
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamHeader.h>
+#include <pbbam/ReadGroupInfo.h>
+#include <pbbam/virtual/VirtualPolymeraseReader.h>
+#include <pbbam/virtual/VirtualPolymeraseBamRecord.h>
+#include <pbbam/virtual/VirtualRegion.h>
+#include <pbbam/virtual/VirtualRegionType.h>
+#include <pbbam/virtual/VirtualRegionTypeMap.h>
+#include "HDFFile.hpp"
+#include "RegionsAdapter.h"
+#include "IConverter.h"
+
+
+
+template <class T_HDFWRITER>
+class Bam2BaxConverter : public IConverter
+{
+public:
+    Bam2BaxConverter(Settings & settings)
+    :IConverter(settings) {}
+
+    ~Bam2BaxConverter(void) {}
+
+    bool Run(void) {return ConvertFile();}
+
+protected:
+    bool ConvertFile(void);
+};
+
+#include "Bam2BaxConverterImpl.hpp"
+#endif
diff --git a/utils/bam2bax/src/Bam2BaxConverterImpl.hpp b/utils/bam2bax/src/Bam2BaxConverterImpl.hpp
new file mode 100644
index 0000000..016b3fc
--- /dev/null
+++ b/utils/bam2bax/src/Bam2BaxConverterImpl.hpp
@@ -0,0 +1,71 @@
+// Author: Yuan Li
+#ifndef BAM2BAX_CONVERTER_IMPL_HPP
+#define BAM2BAX_CONVERTER_IMPL_HPP
+#include <iostream>
+#include "MetadataWriter.h"
+#include "Bam2BaxInternal.h"
+#include <pbbam/BamFile.h>
+#include <pbbam/EntireFileQuery.h>
+
+
+template<class T_HDFWRITER>
+bool Bam2BaxConverter<T_HDFWRITER>::ConvertFile(void) {
+
+
+    // Write metadata.xml to parent directory of Bax.h5.
+    if (not settings_.outputMetadataFilename.empty())
+        MetadataWriter metaWriter_(settings_.outputMetadataFilename, 
+                                   rg, 
+                                   settings_.outputAnalysisDirname);
+
+    T_HDFWRITER writer(outfn, 
+            rg.BasecallerVersion(), 
+            scandata.BaseMap(),
+            qvs,
+            Bam2BaxDefaults::Bax_Regions_RegionTypes);
+
+    if (settings_.traceFilename.empty()) {
+        writer.WriteScanData(scandata);
+    } else {
+        HDFFile traceFile;
+        traceFile.Open(settings_.traceFilename, H5F_ACC_RDONLY);
+        writer.CopyObject(traceFile, "/ScanData"); 
+        traceFile.Close();
+    }
+       
+    if (not settings_.subreadsBamFilename.empty() and 
+        not settings_.scrapsBamFilename.empty()) {
+
+        // Stich subreads and scraps in order to reconstruct polymerase reads.
+        PacBio::BAM::VirtualPolymeraseReader reader(settings_.subreadsBamFilename,
+                                                    settings_.scrapsBamFilename);
+        while(reader.HasNext()) {
+            // FIXME: pbbam should not crash when reading internal pulse features.
+            const PacBio::BAM::VirtualPolymeraseBamRecord & record = reader.Next();
+            SMRTSequence smrt;
+            smrt.Copy(record, true);
+            std::vector<RegionAnnotation> ras = RegionsAdapter::ToRegionAnnotations(record, regionTypes);
+            if (not writer.WriteOneZmw(smrt, ras) or not writer.Errors().empty()) { break; }
+            writer.Flush();
+        }
+        if (not settings_.ignoreQV) writer.WriteFakeDataSets();
+        for (auto error: writer.Errors()) { AddErrorMessage(error); }
+    } else if (not settings_.polymeraseBamFilename.empty()) {
+        // Read polymerase reads from polymerase.bam directly.
+        PacBio::BAM::EntireFileQuery query(bamfile);
+        for (auto record: query) {
+            SMRTSequence smrt;
+            smrt.Copy(record, true);
+            RegionAnnotation ra(record.HoleNumber(), 
+                                RegionTypeAdapter::ToRegionTypeIndex(PacBio::BAM::VirtualRegionType::HQREGION, regionTypes),
+                                0, 0, 0);
+            std::vector<RegionAnnotation> ras({ra});
+            if (not writer.WriteOneZmw(smrt, ras) or not writer.Errors().empty()) { break; }
+        }
+        if (not settings_.ignoreQV) writer.WriteFakeDataSets();
+        for (auto error: writer.Errors()) { AddErrorMessage(error); }
+    }
+            
+    return errors_.empty();
+}
+#endif
diff --git a/utils/bam2bax/src/Bam2BaxInternal.h b/utils/bam2bax/src/Bam2BaxInternal.h
new file mode 100644
index 0000000..777d80d
--- /dev/null
+++ b/utils/bam2bax/src/Bam2BaxInternal.h
@@ -0,0 +1,78 @@
+// Author: Yuan Li
+
+#ifndef _BAM2BAXINTERNAL_H_
+#define _BAM2BAXINTERNAL_H_
+#include <pbbam/EntireFileQuery.h>
+
+//namespace internal
+namespace internal {
+    /// \name \{
+    static const std::vector<PacBio::BAM::BaseFeature> QVEnums = {
+          PacBio::BAM::BaseFeature::DELETION_QV
+        , PacBio::BAM::BaseFeature::DELETION_TAG
+        , PacBio::BAM::BaseFeature::INSERTION_QV
+        , PacBio::BAM::BaseFeature::MERGE_QV
+        , PacBio::BAM::BaseFeature::SUBSTITUTION_QV
+        , PacBio::BAM::BaseFeature::SUBSTITUTION_TAG
+        , PacBio::BAM::BaseFeature::IPD
+        , PacBio::BAM::BaseFeature::PULSE_WIDTH
+        , PacBio::BAM::BaseFeature::PKMID
+        , PacBio::BAM::BaseFeature::PKMEAN
+        , PacBio::BAM::BaseFeature::LABEL
+        , PacBio::BAM::BaseFeature::LABEL_QV
+        , PacBio::BAM::BaseFeature::ALT_LABEL
+        , PacBio::BAM::BaseFeature::ALT_LABEL_QV
+        , PacBio::BAM::BaseFeature::PULSE_MERGE_QV
+        , PacBio::BAM::BaseFeature::PULSE_CALL
+        , PacBio::BAM::BaseFeature::START_FRAME
+        , PacBio::BAM::BaseFeature::PULSE_CALL_WIDTH
+    };
+
+    /// \returns QVs contained by read group rg.
+    /// FIXME: this function should be provided by pbbam.ReadGroupInfo
+    /// FIXME: pbbam, ReadGroupInfo does not recognize internal pulse features such as AltLabelQV.
+    inline std::vector<PacBio::BAM::BaseFeature> 
+    QVEnumsInReadGroup(const PacBio::BAM::ReadGroupInfo & rg) {
+        std::vector<PacBio::BAM::BaseFeature> ret;
+        for (auto it = internal::QVEnums.begin(); it != internal::QVEnums.end(); it++) {
+            if (rg.HasBaseFeature(*it)) {
+                ret.push_back(*it);
+            }
+        }
+        return ret;
+    }
+    /// \}
+ 
+    /// \returns QVs contained by the first record if it exists, otherwise, return {}
+    /// FIXME: this function provides an alternative route to get QVs contained in the bam file now,
+    /// because pbbam ReadGroupInfo does not recorgize internal pulse features such as AltLabelQV.
+    /// Note: Ignore Label because it is neither base feature nor internal pulse feature.
+    inline std::vector<PacBio::BAM::BaseFeature> 
+    QVEnumsInFirstRecord(const PacBio::BAM::BamFile & bamFile) {
+        std::vector<PacBio::BAM::BaseFeature> ret;
+        PacBio::BAM::EntireFileQuery query(bamFile);
+        for (const PacBio::BAM::BamRecord & record: query) {
+            if (record.HasDeletionQV())      {ret.push_back(PacBio::BAM::BaseFeature::DELETION_QV);}
+            if (record.HasDeletionTag())     {ret.push_back(PacBio::BAM::BaseFeature::DELETION_TAG);}
+            if (record.HasInsertionQV())     {ret.push_back(PacBio::BAM::BaseFeature::INSERTION_QV);}
+            if (record.HasMergeQV())         {ret.push_back(PacBio::BAM::BaseFeature::MERGE_QV);}
+            if (record.HasSubstitutionQV())  {ret.push_back(PacBio::BAM::BaseFeature::SUBSTITUTION_QV);}
+            if (record.HasSubstitutionTag()) {ret.push_back(PacBio::BAM::BaseFeature::SUBSTITUTION_TAG);}
+            if (record.HasIPD())             {ret.push_back(PacBio::BAM::BaseFeature::IPD);}
+            if (record.HasPulseWidth())      {ret.push_back(PacBio::BAM::BaseFeature::PULSE_WIDTH);}
+            if (record.HasPkmid())           {ret.push_back(PacBio::BAM::BaseFeature::PKMID);}
+            if (record.HasPkmean())          {ret.push_back(PacBio::BAM::BaseFeature::PKMEAN);}
+            if (record.HasLabelQV())         {ret.push_back(PacBio::BAM::BaseFeature::LABEL_QV);}
+            if (record.HasAltLabelTag())     {ret.push_back(PacBio::BAM::BaseFeature::ALT_LABEL);}
+            if (record.HasAltLabelQV())      {ret.push_back(PacBio::BAM::BaseFeature::ALT_LABEL_QV);}
+            if (record.HasPulseMergeQV())    {ret.push_back(PacBio::BAM::BaseFeature::PULSE_MERGE_QV);}
+            if (record.HasPulseCall())       {ret.push_back(PacBio::BAM::BaseFeature::PULSE_CALL);}
+            if (record.HasStartFrame())      {ret.push_back(PacBio::BAM::BaseFeature::START_FRAME);}
+            if (record.HasPulseCallWidth())  {ret.push_back(PacBio::BAM::BaseFeature::PULSE_CALL_WIDTH);}
+            break; // only use the first record.
+        }
+        return ret;
+    }
+};
+
+#endif
diff --git a/utils/bam2bax/src/Bam2BaxMain.cpp b/utils/bam2bax/src/Bam2BaxMain.cpp
new file mode 100644
index 0000000..e8e3895
--- /dev/null
+++ b/utils/bam2bax/src/Bam2BaxMain.cpp
@@ -0,0 +1,75 @@
+// Author: Yuan Li
+
+#include "Bam2Bax.h"
+#include "OptionParser.h"
+#include "Settings.h"
+#include <iostream>
+#include <string>
+#include <cstdlib>
+using namespace std;
+
+int main(int argc, char* argv[])
+{
+    // setup help & options
+    optparse::OptionParser parser;
+    parser.description("bam2bax converts the PacBio BAM format into bax.h5 format.");
+    parser.prog("bam2bax");
+    parser.version("1.0.0.170337");
+    parser.add_version_option(true);
+    parser.add_help_option(true);
+
+    auto ioGroup = optparse::OptionGroup(parser, "Input/output files");
+    ioGroup.add_option("")
+           .dest(Settings::Option::input_)
+	       .metavar("movie.subreads.bam movie.scraps.bam | movie.polymerase.bam") 
+           .help("Input a movie.polymerase.bam. Or a movie.subreads.bam and a movie.scraps.bam");
+    ioGroup.add_option("--trace")
+            .dest(Settings::Option::trace_)
+            .metavar("movie.trc.h5")
+            .help("(Optional but recommended) Input trace file to copy ScanData from");
+    ioGroup.add_option("-o")
+           .dest(Settings::Option::output_)
+	       .metavar("STRING")
+           .help("Prefix of output filenames. Movie name will be used if no prefix provided");
+    ioGroup.add_option("--metadata")
+           .dest(Settings::Option::metadata_)
+           .action("store_true")
+           .help("Write metadata.xml to the upper directory of output file.");
+    parser.add_option_group(ioGroup);
+
+    auto modeGroup = optparse::OptionGroup(parser, "Output file types (mutually exclusive:)");
+    modeGroup.add_option("--base")
+             .dest(Settings::Option::baseMode_)
+             .metavar("")
+             .action("store_true")
+             .help("Output bax.h5 (default)");
+    modeGroup.add_option("--pulse")
+             .dest(Settings::Option::pulseMode_)
+             .metavar("")
+             .action("store_true")
+             .help("Output pls.h5");
+    modeGroup.add_option("--baseMap")
+             .dest(Settings::Option::baseMap_)
+             .metavar(Settings::OptionValue::baseMap_)
+             .help("Set /ScanData/DyeSet/BaseMap, mapping channels to bases.");
+    modeGroup.add_option("--ignoreQV")
+             .dest(Settings::Option::ignoreQV_)
+             .metavar("")
+             .action("store_true")
+             .help("Don't save QVs in ouptut file.");
+    parser.add_option_group(modeGroup);
+
+    // parse command line
+    Settings settings = Settings::FromCommandLine(parser, argc, argv);
+    if (!settings.errors_.empty()) {
+        cerr << endl;
+        for (const auto e : settings.errors_)
+            cerr << "ERROR: " << e << endl;
+        cerr << endl;
+        parser.print_help();
+        return EXIT_FAILURE;
+    }
+
+    // main conversion
+    return Bam2Bax::Run(settings);
+}
diff --git a/utils/bam2bax/src/Bam2PlxMain.cpp b/utils/bam2bax/src/Bam2PlxMain.cpp
new file mode 100644
index 0000000..8d0f48f
--- /dev/null
+++ b/utils/bam2bax/src/Bam2PlxMain.cpp
@@ -0,0 +1,65 @@
+// Author: Yuan Li
+
+#include "Bam2Bax.h"
+#include "OptionParser.h"
+#include "Settings.h"
+#include <iostream>
+#include <string>
+#include <cstdlib>
+using namespace std;
+
+int main(int argc, char* argv[])
+{
+    // setup help & options
+    optparse::OptionParser parser;
+    parser.description("bam2plx converts the PacBio Internal BAM format into plx.h5 format.");
+    parser.prog("bam2plx");
+    parser.version("1.0.0.170337");
+    parser.add_version_option(true);
+    parser.add_help_option(true);
+
+    auto ioGroup = optparse::OptionGroup(parser, "Input/output files");
+    ioGroup.add_option("")
+           .dest(Settings::Option::input_)
+	       .metavar("movie.subreads.bam movie.scraps.bam | movie.polymerase.bam") 
+           .help("Input a movie.polymerase.bam. Or a movie.subreads.bam and a movie.scraps.bam");
+    ioGroup.add_option("-o")
+           .dest(Settings::Option::output_)
+	       .metavar("STRING")
+           .help("Prefix of output filenames. Movie name will be used if no prefix provided");
+    ioGroup.add_option("--metadata")
+           .dest(Settings::Option::metadata_)
+           .action("store_true")
+           .help("Write metadata.xml to the upper directory of output file.");
+    parser.add_option_group(ioGroup);
+
+    auto modeGroup = optparse::OptionGroup(parser, "Output file types (mutually exclusive:)");
+    modeGroup.add_option("--baseMap")
+             .dest(Settings::Option::baseMap_)
+             .metavar(Settings::OptionValue::baseMap_)
+             .help("Set /ScanData/DyeSet/BaseMap, mapping channels to bases.");
+    modeGroup.add_option("--ignoreQV")
+             .dest(Settings::Option::ignoreQV_)
+             .metavar("")
+             .action("store_true")
+             .help("Don't save QVs in ouptut file.");
+    parser.add_option_group(modeGroup);
+
+    // parse command line
+    Settings settings = Settings::FromCommandLine(parser, argc, argv, true);
+    if (!settings.errors_.empty()) {
+        cerr << endl;
+        for (const auto e : settings.errors_)
+            cerr << "ERROR: " << e << endl;
+        cerr << endl;
+        parser.print_help();
+        return EXIT_FAILURE;
+    }
+
+    // Reset settings.mode to pulse mode.
+    settings.mode = Settings::PulseMode;
+    settings.outputBaxFilename = settings.outputBaxPrefix + ".plx.h5";
+
+    // main conversion
+    return Bam2Bax::Run(settings);
+}
diff --git a/utils/bam2bax/src/CMakeLists.txt b/utils/bam2bax/src/CMakeLists.txt
new file mode 100644
index 0000000..a91dc14
--- /dev/null
+++ b/utils/bam2bax/src/CMakeLists.txt
@@ -0,0 +1,79 @@
+include_directories(
+    .
+    ${BLASR_INCLUDE_DIRS}
+    ${Boost_INCLUDE_DIRS}
+    ${HDF5_INCLUDE_DIRS}
+    ${HTSLIB_INCLUDE_DIRS}
+    ${PacBioBAM_INCLUDE_DIRS}
+    ${PBDATA_INCLUDE_DIRS}
+    ${PBDATA_ROOT_DIR}
+    ${PBIHDF_INCLUDE_DIRS}
+    ${ZLIB_INCLUDE_DIRS}
+)
+
+set(SOURCES
+    Settings.h
+    Settings.cpp
+    Converter.h
+    Converter.cpp
+    Bam2BaxInternal.h
+    RegionTypeAdapter.h
+    RegionsAdapter.h
+    Bam2BaxConverter.h
+    Bam2BaxConverterImpl.hpp
+    Bam2Bax.h
+    Bam2Bax.cpp
+    OptionParser.h
+    OptionParser.cpp
+    MetadataWriter.h
+    MetadataWriter.cpp
+)
+
+set(BAM2BAX_SOURCES
+    Bam2BaxMain.cpp
+    ${SOURCES}
+)
+
+set(BAM2PLX_SOURCES
+    Bam2PlxMain.cpp
+    ${SOURCES}
+)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Bam2Bax_CXX_FLAGS}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${Bam2Bax_EXE_LINKER_FLAGS}")
+
+add_executable(bam2bax ${BAM2BAX_SOURCES})
+set_target_properties(bam2bax PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY ${Bam2Bax_BinDir}
+)
+if (NOT APPLE)
+    set(MY_LIBRT -lrt)
+else()
+endif()
+target_link_libraries(bam2bax
+    ${BLASR_LIBRARIES}
+    ${PBIHDF_LIBRARIES}
+    ${PBDATA_LIBRARIES}
+    ${HDF5_CPP_LIBRARIES}
+    ${HDF5_LIBRARIES}
+    ${PacBioBAM_LIBRARIES}
+    ${HTSLIB_LIBRARIES}
+    ${ZLIB_LIBRARIES}
+    ${MY_LIBRT}
+)
+
+add_executable(bam2plx ${BAM2PLX_SOURCES} )
+set_target_properties(bam2plx PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY ${Bam2Bax_BinDir}
+)
+target_link_libraries(bam2plx
+    ${BLASR_LIBRARIES}
+    ${PBIHDF_LIBRARIES}
+    ${PBDATA_LIBRARIES}
+    ${HDF5_CPP_LIBRARIES}
+    ${HDF5_LIBRARIES}
+    ${PacBioBAM_LIBRARIES}
+    ${HTSLIB_LIBRARIES}
+    ${ZLIB_LIBRARIES}
+    ${MY_LIBRT}
+)
diff --git a/utils/bam2bax/src/Converter.cpp b/utils/bam2bax/src/Converter.cpp
new file mode 100644
index 0000000..1926b19
--- /dev/null
+++ b/utils/bam2bax/src/Converter.cpp
@@ -0,0 +1,146 @@
+#include "Converter.h"
+
+Converter::Converter(Settings const& settings)
+:settings_(settings) { 
+    writer_ = NULL;
+    scanData_ = NULL;
+
+    std::string infn = settings_.subreadsBamFilename;
+
+    if (infn.empty()) infn = settings_.polymeraseBamFilename;
+
+    bamfile_ = new PacBio::BAM::BamFile(infn);
+    PacBio::BAM::BamHeader bamheader = bamfile_->Header();
+
+    if (bamheader.ReadGroups().size() != 1) {
+        AddErrorMessage("Bam file must contain reads from exactly one SMRTCell.");
+        // XXX: Throw initialization exception
+    }
+    PacBio::BAM::ReadGroupInfo rg = bamheader.ReadGroups()[0];
+    MockScanData(rg);
+
+    // Write metadata.xml to parent directory of Bax.h5.
+    if (not settings_.outputMetadataFilename.empty())
+        MetadataWriter metaWriter_(settings_.outputMetadataFilename,
+                                   rg,
+                                   settings_.outputAnalysisDirname);
+
+    // FIXME: pbbam needs to provide an API which returns BaseFeatures in read group
+    std::vector<PacBio::BAM::BaseFeature> qvs = settings_.ignoreQV ? std::vector<PacBio::BAM::BaseFeature>({}) : internal::QVEnumsInFirstRecord(*bamfile_);
+
+    InitializeWriter(rg.BasecallerVersion(), qvs);
+}
+
+Converter::~Converter(void) {
+    if (scanData_ != NULL) delete scanData_;
+    if (writer_ != NULL) delete writer_;
+    delete bamfile_;
+}
+
+std::vector<std::string> Converter::Errors(void) const {
+    return errors_;
+}
+
+bool Converter::Run() {
+    if (settings_.traceFilename.empty()) {
+        writer_->WriteScanData(*scanData_);
+    } else {
+        HDFFile traceFile;
+        traceFile.Open(settings_.traceFilename, H5F_ACC_RDONLY);
+        writer_->CopyObject(traceFile, "/ScanData"); 
+        if (settings_.mode == Settings::PulseMode) {
+            SetInverseGain(traceFile);
+        }
+        traceFile.Close();
+    }
+
+    // Regions attribute RegionTypes, which defines supported region types in ORDER.
+    std::vector<RegionType> regionTypes = 
+        RegionTypeAdapter::ToRegionTypes(Bam2BaxDefaults::Bax_Regions_RegionTypes);
+
+    if (not settings_.subreadsBamFilename.empty() and 
+        not settings_.scrapsBamFilename.empty()) {
+
+        // Stich subreads and scraps in order to reconstruct polymerase reads.
+        PacBio::BAM::VirtualPolymeraseReader reader(settings_.subreadsBamFilename,
+                                                    settings_.scrapsBamFilename);
+        while(reader.HasNext()) {
+            // FIXME: pbbam should not crash when reading internal pulse features.
+            const PacBio::BAM::VirtualPolymeraseBamRecord & record = reader.Next();
+            SMRTSequence smrt;
+            smrt.Copy(record, true);
+            std::vector<RegionAnnotation> ras = RegionsAdapter::ToRegionAnnotations(record, regionTypes);
+            if (not writer_->WriteOneZmw(smrt, ras) or not writer_->Errors().empty()) { break; }
+            writer_->Flush();
+        }
+        if (not settings_.ignoreQV) writer_->WriteFakeDataSets();
+        for (auto error: writer_->Errors()) { AddErrorMessage(error); }
+    } else if (not settings_.polymeraseBamFilename.empty()) {
+        // Read polymerase reads from polymerase.bam directly.
+        PacBio::BAM::EntireFileQuery query(*bamfile_);
+        for (auto record: query) {
+            SMRTSequence smrt;
+            smrt.Copy(record, true);
+            RegionAnnotation ra(record.HoleNumber(), 
+                                RegionTypeAdapter::ToRegionTypeIndex(PacBio::BAM::VirtualRegionType::HQREGION, regionTypes),
+                                0, 0, 0);
+            std::vector<RegionAnnotation> ras({ra});
+            if (not writer_->WriteOneZmw(smrt, ras) or not writer_->Errors().empty()) { break; }
+        }
+        if (not settings_.ignoreQV) writer_->WriteFakeDataSets();
+        for (auto error: writer_->Errors()) { AddErrorMessage(error); }
+    }
+
+    return errors_.empty();
+}
+
+void Converter::MockScanData(PacBio::BAM::ReadGroupInfo& rg) {
+    // Construct AcqParams
+    AcqParams acqParams(Bam2BaxDefaults::Bax_ScanData_AduGain,
+                        Bam2BaxDefaults::Bax_ScanData_CameraGain,
+                        Bam2BaxDefaults::Bax_ScanData_CameraType,
+                        Bam2BaxDefaults::Bax_ScanData_HotStartFrame,
+                        Bam2BaxDefaults::Bax_ScanData_LaserOnFrame);
+
+    // Construct scandata.
+    scanData_ = new ScanData(acqParams);
+    scanData_->PlatformID(Sequel) // assume sequel movie 
+             .MovieName(rg.MovieName()) // should be reliable now
+             .WhenStarted(rg.Date())
+             .RunCode(Bam2BaxDefaults::Bax_ScanData_RunCode)  // bam does not contain RunCode
+             .NumFrames(Bam2BaxDefaults::Bax_ScanData_NumFrames) // bam does not contain NumFrames
+             .FrameRate(Bam2BaxDefaults::Bax_ScanData_FrameRate) // Ignore bam header FrameRate.
+             .SequencingKit(rg.SequencingKit())
+             .BindingKit(rg.BindingKit())
+             .BaseMap(settings_.baseMap);
+}
+
+void Converter::InitializeWriter(const std::string& bcvers, 
+        const std::vector<PacBio::BAM::BaseFeature>& qvs) 
+    {
+    std::string outfn = settings_.outputBaxFilename;
+    Settings::Mode mode = settings_.mode;
+    
+    if (mode == Settings::BaseMode) {
+        std::cout << "Converting BAM to bax.h5." << std::endl;
+        writer_ = new HDFBaxWriter(outfn, bcvers,
+            scanData_->BaseMap(), qvs, Bam2BaxDefaults::Bax_Regions_RegionTypes);
+    } else if (mode == Settings::PulseMode) {
+        std::cout << "Converting BAM to plx.h5." << std::endl;
+        writer_ = new HDFPulseWriter(outfn, bcvers,
+            scanData_->BaseMap(), qvs, Bam2BaxDefaults::Bax_Regions_RegionTypes);
+    } else {
+        std::cerr << "UNKNOWN mode." << settings_.mode << std::endl;
+        throw std::exception();
+    }
+}
+
+void Converter::SetInverseGain(HDFFile& traceFile) {
+    H5::Group acqGrp = traceFile.hdfFile.openGroup("/ScanData/AcqParams");
+    H5::Attribute aduAttr = acqGrp.openAttribute("AduGain");
+    float igain;
+    H5::DataType* dt = new H5::DataType(H5::PredType::IEEE_F32LE);
+    aduAttr.read(*dt, &igain);
+    HDFPulseWriter* pw = static_cast<HDFPulseWriter*>(writer_);
+    pw->SetInverseGain(igain);
+}
diff --git a/utils/bam2bax/src/Converter.h b/utils/bam2bax/src/Converter.h
new file mode 100644
index 0000000..70e648c
--- /dev/null
+++ b/utils/bam2bax/src/Converter.h
@@ -0,0 +1,77 @@
+// Author: Yuan Li
+#ifndef BAM2BAX_ICONVERTER_H_
+#define BAM2BAX_ICONVERTER_H_
+
+#include <string>
+#include <vector>
+#include <algorithm>
+#include "pbdata/Enumerations.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/BamHeader.h"
+#include "pbbam/ReadGroupInfo.h"
+#include "pbbam/virtual/VirtualPolymeraseReader.h"
+#include "pbbam/virtual/VirtualPolymeraseBamRecord.h"
+#include "pbbam/virtual/VirtualRegion.h"
+#include "pbbam/virtual/VirtualRegionType.h"
+#include "pbbam/virtual/VirtualRegionTypeMap.h"
+#include "HDFWriterBase.hpp"
+#include "HDFBaxWriter.hpp"
+#include "HDFPulseWriter.hpp"
+#include "RegionsAdapter.h"
+#include "Settings.h"
+#include "MetadataWriter.h"
+#include "Bam2BaxInternal.h"
+
+namespace Bam2BaxDefaults {
+    // Default value of attribute /ScanData/AcqParams/NumFrames in Bax.
+    static const unsigned int Bax_ScanData_NumFrames = 0;
+    // Default value of attribute /ScanData/AcqParams/AduGain in Bax.
+    static const float Bax_ScanData_AduGain = 1.0;
+    // Default value of attribute /ScanData/AcqParams/CameraGain in Bax.
+    static const float Bax_ScanData_CameraGain = 1.0;
+    // Default value of attribute /ScanData/AcqParams/CameraType in Bax.
+    static const int Bax_ScanData_CameraType = 0;
+    // Default value of attribute /ScanData/AcqParams/HotStartFrame in Bax.
+    static const UInt Bax_ScanData_HotStartFrame = 0;
+    // Default value of attribute /ScanData/AcqParams/LaserOnFrame in Bax.
+    static const UInt Bax_ScanData_LaserOnFrame = 0;
+    // Default value of attribute /ScanData/AcqParams/FrameRate in Bax.
+    static const float Bax_ScanData_FrameRate = 80.047035;
+
+    // Default value of attribute /ScanData/RunInfo/RunCode in Bax.
+    static const std::string Bax_ScanData_RunCode = "Bam2Bax_Run_Code";
+    // Default value of attribute /ScanData/DyeSet/BaseMap in Bax.
+    static const std::string Bax_ScanData_BaseMap = PacBio::AttributeValues::ScanData::DyeSet::basemap;
+    // Default value of attribute /Regions/RegionTypes in Bax.
+    static const std::vector<std::string> Bax_Regions_RegionTypes = PacBio::AttributeValues::Regions::regiontypes;
+}
+
+class Converter {
+public:
+    Converter(Settings const& settings);
+    ~Converter(void);
+
+public:
+    std::vector<std::string> Errors(void) const;
+    bool Run();
+
+protected:
+    void AddErrorMessage(const std::string & errmsg) {
+        errors_.push_back(errmsg);
+    }
+
+protected:
+    // protected variables
+    Settings const& settings_;
+    ScanData* scanData_;
+    HDFWriterBase* writer_;
+    PacBio::BAM::BamFile* bamfile_;
+    std::vector<std::string> errors_;
+
+private:
+    void MockScanData(PacBio::BAM::ReadGroupInfo& rg);
+    void InitializeWriter(const std::string& bcvers, 
+                          const std::vector<PacBio::BAM::BaseFeature>& qvs);
+    void SetInverseGain(HDFFile& traceFile);
+};
+#endif
diff --git a/utils/bam2bax/src/MetadataWriter.cpp b/utils/bam2bax/src/MetadataWriter.cpp
new file mode 100644
index 0000000..08d1a27
--- /dev/null
+++ b/utils/bam2bax/src/MetadataWriter.cpp
@@ -0,0 +1,42 @@
+#include "MetadataWriter.h"
+
+std::string internal::Replace(const std::string & in_str,
+                              const std::string & to_find,
+                              const std::string & to_replace) {
+    // Replace the first occurrence of to_find by to_replace.
+    std::string ret = in_str;
+    std::size_t pos = ret.find(to_find);
+    if (pos != std::string::npos) {
+        ret.replace(pos, to_find.size(), to_replace);
+    }
+    return ret;
+}
+
+MetadataWriter::MetadataWriter(const std::string & filename, 
+                               const PacBio::BAM::ReadGroupInfo & rg,
+                               const std::string & analysisDir) {
+    MetadataWriter(filename, 
+                   rg.BasecallerVersion(),
+                   rg.SequencingKit(),
+                   rg.BindingKit(),
+                   analysisDir);
+}
+
+MetadataWriter::MetadataWriter(const std::string & filename, 
+                               const std::string & basecallerVersion,
+                               const std::string & sequencingKit,
+                               const std::string & bindingKit,
+                               const std::string & analysisDir) {
+    assert(analysisDir.find('/') == std::string::npos);
+    std::ofstream ofile; 
+    ofile.open(filename, std::ofstream::out);
+
+    std::string to_print = internal::META_CONTENT;
+    to_print = internal::Replace(to_print, "__BASECALLERVERSION__", basecallerVersion);
+    to_print = internal::Replace(to_print, "__SEQUENCINGKIT__", sequencingKit);
+    to_print = internal::Replace(to_print, "__BINDINGKIT__", bindingKit);
+    to_print = internal::Replace(to_print, "__ANALYSISDIR__", analysisDir);
+
+    ofile << to_print << std::endl;
+    ofile.close();
+}
diff --git a/utils/bam2bax/src/MetadataWriter.h b/utils/bam2bax/src/MetadataWriter.h
new file mode 100644
index 0000000..35f312d
--- /dev/null
+++ b/utils/bam2bax/src/MetadataWriter.h
@@ -0,0 +1,38 @@
+// Author: Yuan Li
+
+#ifndef _BAM2BAX_METADATA_WRITER_H_
+#define _BAM2BAX_METADATA_WRITER_H_
+
+#include <iostream>
+#include <fstream>
+#include <cassert>
+#include <pbbam/ReadGroupInfo.h>
+
+namespace internal{
+
+const std::string DEFAULT_ANALYSIS_DIR = "Analysis_Results";
+
+const std::string META_CONTENT = 
+"<?xml version=\"1.0\" encoding=\"utf-8\"?><Metadata xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns=\"http://pacificbiosciences.com/PAP/Metadata.xsd\"><InstCtrlVer>__BASECALLERVERSION__</InstCtrlVer><CellIndex>3</CellIndex><SetNumber>1</SetNumber><BindingKit><PartNumber>__BINDINGKIT__</PartNumber></BindingKit><SequencingKit><PartNumber>__SEQUENCINGKIT__</PartNumber></SequencingKit><Primary><Protocol>BasecallerV1</Protocol><Res [...]
+
+std::string Replace(const std::string & in_str,
+                    const std::string & to_find,
+                    const std::string & to_replace);
+} //namespace internal
+
+class MetadataWriter {
+public: 
+    MetadataWriter(const std::string & filename, 
+                   const PacBio::BAM::ReadGroupInfo & rg,
+                   const std::string & analysisDir=internal::DEFAULT_ANALYSIS_DIR);
+
+    MetadataWriter(const std::string & filename, 
+                   const std::string & basecallerVersion,
+                   const std::string & sequencingKit,
+                   const std::string & bindingKit,
+                   const std::string & analysisDir);
+
+    ~MetadataWriter(void) {}
+};
+
+#endif
diff --git a/utils/bam2bax/src/OptionParser.cpp b/utils/bam2bax/src/OptionParser.cpp
new file mode 100644
index 0000000..fc73176
--- /dev/null
+++ b/utils/bam2bax/src/OptionParser.cpp
@@ -0,0 +1,562 @@
+/**
+ * Copyright (C) 2010 Johannes Weißl <jargon at molb.org>
+ * License: your favourite BSD-style license
+ *
+ * See OptionParser.h for help.
+ */
+
+#include "OptionParser.h"
+
+#include <cstdlib>
+#include <algorithm>
+#include <complex>
+#include <ciso646>
+
+#if defined(ENABLE_NLS) && ENABLE_NLS
+# include <libintl.h>
+# define _(s) gettext(s)
+#else
+# define _(s) ((const char *) (s))
+#endif
+
+using namespace std;
+
+namespace optparse {
+
+////////// auxiliary (string) functions { //////////
+class str_wrap {
+public:
+  str_wrap(const string& l, const string& r) : lwrap(l), rwrap(r) {}
+  str_wrap(const string& w) : lwrap(w), rwrap(w) {}
+  string operator() (const string& s) { return lwrap + s + rwrap; }
+  const string lwrap, rwrap;
+};
+template<typename InputIterator, typename UnaryOperator>
+static string str_join_trans(const string& sep, InputIterator begin, InputIterator end, UnaryOperator op) {
+  string buf;
+  for (InputIterator it = begin; it != end; ++it) {
+    if (it != begin)
+      buf += sep;
+    buf += op(*it);
+  }
+  return buf;
+}
+template<class InputIterator>
+static string str_join(const string& sep, InputIterator begin, InputIterator end) {
+  return str_join_trans(sep, begin, end, str_wrap(""));
+}
+static string& str_replace(string& s, const string& patt, const string& repl) {
+  size_t pos = 0, n = patt.length();
+  while (true) {
+    pos = s.find(patt, pos);
+    if (pos == string::npos)
+      break;
+    s.replace(pos, n, repl);
+    pos += repl.size();
+  }
+  return s;
+}
+static string str_replace(const string& s, const string& patt, const string& repl) {
+  string tmp = s;
+  str_replace(tmp, patt, repl);
+  return tmp;
+}
+static string str_format(const string& s, size_t pre, size_t len, bool indent_first = true) {
+  stringstream ss;
+  string p;
+  if (indent_first)
+    p = string(pre, ' ');
+
+  size_t pos = 0, linestart = 0;
+  size_t line = 0;
+  while (true) {
+    bool wrap = false;
+
+    size_t new_pos = s.find_first_of(" \n\t", pos);
+    if (new_pos == string::npos)
+      break;
+    if (s[new_pos] == '\n') {
+      pos = new_pos + 1;
+      wrap = true;
+    }
+    if (line == 1)
+      p = string(pre, ' ');
+    if (wrap || new_pos + pre > linestart + len) {
+      ss << p << s.substr(linestart, pos - linestart - 1) << endl;
+      linestart = pos;
+      line++;
+    }
+    pos = new_pos + 1;
+  }
+  ss << p << s.substr(linestart) << endl;
+  return ss.str();
+}
+static string str_inc(const string& s) {
+  stringstream ss;
+  string v = (s != "") ? s : "0";
+  long i;
+  istringstream(v) >> i;
+  ss << i+1;
+  return ss.str();
+}
+static unsigned int cols() {
+  unsigned int n = 80;
+#ifndef _WIN32
+  const char *s = getenv("COLUMNS");
+  if (s)
+    istringstream(s) >> n;
+#endif
+  return n;
+}
+static string basename(const string& s) {
+  string b = s;
+  size_t i = b.find_last_not_of('/');
+  if (i == string::npos) {
+    if (b[0] == '/')
+      b.erase(1);
+    return b;
+  }
+  b.erase(i+1, b.length()-i-1);
+  i = b.find_last_of("/");
+  if (i != string::npos)
+    b.erase(0, i+1);
+  return b;
+}
+////////// } auxiliary (string) functions //////////
+
+
+////////// class OptionParser { //////////
+OptionParser::OptionParser() :
+  _usage(_("%prog [options]")),
+  _add_help_option(true),
+  _add_version_option(true),
+  _interspersed_args(true) {}
+
+Option& OptionParser::add_option(const string& opt) {
+  const string tmp[1] = { opt };
+  return add_option(vector<string>(&tmp[0], &tmp[1]));
+}
+Option& OptionParser::add_option(const string& opt1, const string& opt2) {
+  const string tmp[2] = { opt1, opt2 };
+  return add_option(vector<string>(&tmp[0], &tmp[2]));
+}
+Option& OptionParser::add_option(const string& opt1, const string& opt2, const string& opt3) {
+  const string tmp[3] = { opt1, opt2, opt3 };
+  return add_option(vector<string>(&tmp[0], &tmp[3]));
+}
+Option& OptionParser::add_option(const vector<string>& v) {
+  _opts.resize(_opts.size()+1);
+  Option& option = _opts.back();
+  string dest_fallback;
+  for (vector<string>::const_iterator it = v.begin(); it != v.end(); ++it) {
+    if (it->substr(0,2) == "--") {
+      const string s = it->substr(2);
+      if (option.dest() == "")
+        option.dest(str_replace(s, "-", "_"));
+      option._long_opts.insert(s);
+      _optmap_l[s] = &option;
+    } else if ( it->empty() ) {
+       continue;
+    } else {
+      const string s = it->substr(1,1);
+      if (dest_fallback == "")
+        dest_fallback = s;
+      option._short_opts.insert(s);
+      _optmap_s[s] = &option;
+    }
+  }
+  if (option.dest() == "")
+    option.dest(dest_fallback);
+  return option;
+}
+
+OptionParser& OptionParser::add_option_group(const OptionGroup& group) {
+  for (list<Option>::const_iterator oit = group._opts.begin(); oit != group._opts.end(); ++oit) {
+    const Option& option = *oit;
+    for (set<string>::const_iterator it = option._short_opts.begin(); it != option._short_opts.end(); ++it)
+      _optmap_s[*it] = &option;
+    for (set<string>::const_iterator it = option._long_opts.begin(); it != option._long_opts.end(); ++it)
+      _optmap_l[*it] = &option;
+  }
+  _groups.push_back(&group);
+  return *this;
+}
+
+const Option& OptionParser::lookup_short_opt(const string& opt) const {
+  optMap::const_iterator it = _optmap_s.find(opt);
+  if (it == _optmap_s.end())
+    error(_("no such option") + string(": -") + opt);
+  return *it->second;
+}
+
+void OptionParser::handle_short_opt(const string& opt, const string& arg) {
+
+  _remaining.pop_front();
+  string value;
+
+  const Option& option = lookup_short_opt(opt);
+  if (option._nargs == 1) {
+    value = arg.substr(2);
+    if (value == "") {
+      if (_remaining.empty())
+        error("-" + opt + " " + _("option requires an argument"));
+      value = _remaining.front();
+      _remaining.pop_front();
+    }
+  } else {
+    if (arg.length() > 2)
+      _remaining.push_front(string("-") + arg.substr(2));
+  }
+
+  process_opt(option, string("-") + opt, value);
+}
+
+const Option& OptionParser::lookup_long_opt(const string& opt) const {
+
+  list<string> matching;
+  for (optMap::const_iterator it = _optmap_l.begin(); it != _optmap_l.end(); ++it) {
+    if (it->first.compare(0, opt.length(), opt) == 0)
+      matching.push_back(it->first);
+  }
+  if (matching.size() > 1) {
+    string x = str_join(", ", matching.begin(), matching.end());
+    error(_("ambiguous option") + string(": --") + opt + " (" + x + "?)");
+  }
+  if (matching.size() == 0)
+    error(_("no such option") + string(": --") + opt);
+
+  return *_optmap_l.find(matching.front())->second;
+}
+
+void OptionParser::handle_long_opt(const string& optstr) {
+
+  _remaining.pop_front();
+  string opt, value;
+
+  size_t delim = optstr.find("=");
+  if (delim != string::npos) {
+    opt = optstr.substr(0, delim);
+    value = optstr.substr(delim+1);
+  } else
+    opt = optstr;
+
+  const Option& option = lookup_long_opt(opt);
+  if (option._nargs == 1 and delim == string::npos) {
+    if (not _remaining.empty()) {
+      value = _remaining.front();
+      _remaining.pop_front();
+    }
+  }
+
+  if (option._nargs == 1 and value == "")
+    error("--" + opt + " " + _("option requires an argument"));
+
+  process_opt(option, string("--") + opt, value);
+}
+
+Values& OptionParser::parse_args(const int argc, char const* const* const argv) {
+  if (prog() == "")
+    prog(basename(argv[0]));
+  return parse_args(&argv[1], &argv[argc]);
+}
+Values& OptionParser::parse_args(const vector<string>& v) {
+
+  _remaining.assign(v.begin(), v.end());
+
+  if (add_version_option() and version() != "") {
+    add_option("--version") .action("version") .help(_("show program's version number and exit"));
+    _opts.splice(_opts.begin(), _opts, --(_opts.end()));
+  }
+  if (add_help_option()) {
+    add_option("-h", "--help") .action("help") .help(_("show this help message and exit"));
+    _opts.splice(_opts.begin(), _opts, --(_opts.end()));
+  }
+
+  while (not _remaining.empty()) {
+    const string arg = _remaining.front();
+
+    if (arg == "--") {
+      _remaining.pop_front();
+      break;
+    }
+
+    if (arg.substr(0,2) == "--") {
+      handle_long_opt(arg.substr(2));
+    } else if (arg.substr(0,1) == "-" and arg.length() > 1) {
+      handle_short_opt(arg.substr(1,1), arg);
+    } else {
+      _remaining.pop_front();
+      _leftover.push_back(arg);
+      if (not interspersed_args())
+        break;
+    }
+  }
+  while (not _remaining.empty()) {
+    const string arg = _remaining.front();
+    _remaining.pop_front();
+    _leftover.push_back(arg);
+  }
+
+  for (strMap::const_iterator it = _defaults.begin(); it != _defaults.end(); ++it) {
+    if (not _values.is_set(it->first))
+      _values[it->first] = it->second;
+  }
+
+  for (list<Option>::const_iterator it = _opts.begin(); it != _opts.end(); ++it) {
+    if (it->get_default() != "" and not _values.is_set(it->dest()))
+        _values[it->dest()] = it->get_default();
+  }
+
+  return _values;
+}
+
+void OptionParser::process_opt(const Option& o, const string& opt, const string& value) {
+  if (o.action() == "store") {
+    string err = o.check_type(opt, value);
+    if (err != "")
+      error(err);
+    _values[o.dest()] = value;
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "store_const") {
+    _values[o.dest()] = o.get_const();
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "store_true") {
+    _values[o.dest()] = "1";
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "store_false") {
+    _values[o.dest()] = "0";
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "append") {
+    string err = o.check_type(opt, value);
+    if (err != "")
+      error(err);
+    _values[o.dest()] = value;
+    _values.all(o.dest()).push_back(value);
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "append_const") {
+    _values[o.dest()] = o.get_const();
+    _values.all(o.dest()).push_back(o.get_const());
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "count") {
+    _values[o.dest()] = str_inc(_values[o.dest()]);
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "help") {
+    print_help();
+    std::exit(0);
+  }
+  else if (o.action() == "version") {
+    print_version();
+    std::exit(0);
+  }
+  else if (o.action() == "callback" && o.callback()) {
+    (*o.callback())(o, opt, value, *this);
+  }
+}
+
+string OptionParser::format_option_help(unsigned int indent /* = 2 */) const {
+  stringstream ss;
+
+  if (_opts.empty())
+    return ss.str();
+
+  for (list<Option>::const_iterator it = _opts.begin(); it != _opts.end(); ++it) {
+    if (it->help() != SUPPRESS_HELP)
+      ss << it->format_help(indent);
+  }
+
+  return ss.str();
+}
+
+string OptionParser::format_help() const {
+  stringstream ss;
+
+  if (usage() != SUPPRESS_USAGE)
+    ss << get_usage() << endl;
+
+  if (description() != "")
+    ss << str_format(description(), 0, cols()) << endl;
+
+  ss << _("Options") << ":" << endl;
+  ss << format_option_help();
+
+  for (list<OptionGroup const*>::const_iterator it = _groups.begin(); it != _groups.end(); ++it) {
+    const OptionGroup& group = **it;
+    ss << endl << "  " << group.title() << ":" << endl;
+    if (group.group_description() != "")
+      ss << str_format(group.group_description(), 4, cols()) << endl;
+    ss << group.format_option_help(4);
+  }
+
+  if (epilog() != "")
+    ss << endl << str_format(epilog(), 0, cols());
+
+  return ss.str();
+}
+void OptionParser::print_help() const {
+  cout << format_help();
+}
+
+void OptionParser::set_usage(const string& u) {
+  string lower = u;
+  transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
+  if (lower.compare(0, 7, "usage: ") == 0)
+    _usage = u.substr(7);
+  else
+    _usage = u;
+}
+string OptionParser::format_usage(const string& u) const {
+  stringstream ss;
+  ss << _("Usage") << ": " << u << endl;
+  return ss.str();
+}
+string OptionParser::get_usage() const {
+  if (usage() == SUPPRESS_USAGE)
+    return string("");
+  return format_usage(str_replace(usage(), "%prog", prog()));
+}
+void OptionParser::print_usage(ostream& out) const {
+  string u = get_usage();
+  if (u != "")
+    out << u << endl;
+}
+void OptionParser::print_usage() const {
+  print_usage(cout);
+}
+
+string OptionParser::get_version() const {
+  return str_replace(_version, "%prog", prog());
+}
+void OptionParser::print_version(ostream& out) const {
+  out << get_version() << endl;
+}
+void OptionParser::print_version() const {
+  print_version(cout);
+}
+
+void OptionParser::exit() const {
+  std::exit(2);
+}
+void OptionParser::error(const string& msg) const {
+  print_usage(cerr);
+  cerr << prog() << ": " << _("error") << ": " << msg << endl;
+  exit();
+}
+////////// } class OptionParser //////////
+
+////////// class Values { //////////
+const string& Values::operator[] (const string& d) const {
+  strMap::const_iterator it = _map.find(d);
+  static const string empty = "";
+  return (it != _map.end()) ? it->second : empty;
+}
+void Values::is_set_by_user(const string& d, bool yes) {
+  if (yes)
+    _userSet.insert(d);
+  else
+    _userSet.erase(d);
+}
+////////// } class Values //////////
+
+////////// class Option { //////////
+string Option::check_type(const string& opt, const string& val) const {
+  istringstream ss(val);
+  stringstream err;
+
+  if (type() == "int" || type() == "long") {
+    long t;
+    if (not (ss >> t))
+      err << _("option") << " " << opt << ": " << _("invalid integer value") << ": '" << val << "'";
+  }
+  else if (type() == "float" || type() == "double") {
+    double t;
+    if (not (ss >> t))
+      err << _("option") << " " << opt << ": " << _("invalid floating-point value") << ": '" << val << "'";
+  }
+  else if (type() == "choice") {
+    if (find(choices().begin(), choices().end(), val) == choices().end()) {
+      list<string> tmp = choices();
+      transform(tmp.begin(), tmp.end(), tmp.begin(), str_wrap("'"));
+      err << _("option") << " " << opt << ": " << _("invalid choice") << ": '" << val << "'"
+        << " (" << _("choose from") << " " << str_join(", ", tmp.begin(), tmp.end()) << ")";
+    }
+  }
+  else if (type() == "complex") {
+    complex<double> t;
+    if (not (ss >> t))
+      err << _("option") << " " << opt << ": " << _("invalid complex value") << ": '" << val << "'";
+  }
+
+  return err.str();
+}
+
+string Option::format_option_help(unsigned int indent /* = 2 */) const {
+
+  string mvar_short, mvar_long;
+  if (nargs() == 1) {
+    string mvar = metavar();
+    if (mvar == "") {
+      mvar = type();
+      transform(mvar.begin(), mvar.end(), mvar.begin(), ::toupper);
+     }
+    mvar_short = " " + mvar;
+    mvar_long = "=" + mvar;
+  }
+
+  stringstream ss;
+  ss << string(indent, ' ');
+
+  if (not _short_opts.empty()) {
+    ss << str_join_trans(", ", _short_opts.begin(), _short_opts.end(), str_wrap("-", mvar_short));
+    if (not _long_opts.empty())
+      ss << ", ";
+  }
+  if (not _long_opts.empty())
+    ss << str_join_trans(", ", _long_opts.begin(), _long_opts.end(), str_wrap("--", mvar_long));
+
+  if ( _short_opts.empty() && _long_opts.empty() )
+      ss << metavar();
+
+
+  return ss.str();
+}
+
+string Option::format_help(unsigned int indent /* = 2 */) const {
+  stringstream ss;
+  string h = format_option_help(indent);
+  unsigned int width = cols();
+  unsigned int opt_width = min(width*3/10, 36u);
+  bool indent_first = false;
+  ss << h;
+  // if the option list is too long, start a new paragraph
+  if (h.length() >= (opt_width-1)) {
+    ss << endl;
+    indent_first = true;
+  } else {
+    ss << string(opt_width - h.length(), ' ');
+    if (help() == "")
+      ss << endl;
+  }
+  if (help() != "") {
+    string help_str = (get_default() != "") ? str_replace(help(), "%default", get_default()) : help();
+    ss << str_format(help_str, opt_width, width, indent_first);
+  }
+  return ss.str();
+}
+
+Option& Option::action(const string& a) {
+  _action = a;
+  if (a == "store_const" || a == "store_true" || a == "store_false" ||
+      a == "append_const" || a == "count" || a == "help" || a == "version")
+    nargs(0);
+  return *this;
+}
+////////// } class Option //////////
+
+}
diff --git a/utils/bam2bax/src/OptionParser.h b/utils/bam2bax/src/OptionParser.h
new file mode 100644
index 0000000..8ec6538
--- /dev/null
+++ b/utils/bam2bax/src/OptionParser.h
@@ -0,0 +1,306 @@
+/**
+ * Copyright (C) 2010 Johannes Weißl <jargon at molb.org>
+ * License: your favourite BSD-style license
+ *
+ * git clone http://github.com/weisslj/cpp-optparse.git
+ *
+ * This is yet another option parser for C++. It is modelled after the
+ * excellent Python optparse API. Although incomplete, anyone familiar to
+ * optparse should feel at home:
+ * http://docs.python.org/library/optparse.html
+ *
+ * Design decisions:
+ * - elegant and easy usage more important than speed / flexibility
+ * - shortness more important than feature completeness
+ *   * no unicode
+ *   * no checking for user programming errors
+ *
+ * Why not use getopt/getopt_long?
+ * - not C++ / not completely POSIX
+ * - too cumbersome to use, would need lot of additional code
+ *
+ * Why not use Boost.Program_options?
+ * - boost not installed on all target platforms (esp. cluster, HPC, ...)
+ * - too big to include just for option handling:
+ *   322 *.h (44750 lines) + 7 *.cpp (2078 lines)
+ *
+ * Why not use tclap/Opag/Options/CmdLine/Anyoption/Argument_helper/...?
+ * - no reason, writing one is faster than code inspection :-)
+ * - similarity to Python desired for faster learning curve
+ *
+ * Future work:
+ * - nargs > 1?
+ * - comments?
+ *
+ * Python only features:
+ * - conflict handlers
+ * - adding new actions
+ *
+ *
+ * Example:
+ *
+ * using optparse::OptionParser;
+ *
+ * OptionParser parser = OptionParser() .description("just an example");
+ *
+ * parser.add_option("-f", "--file") .dest("filename")
+ *                   .help("write report to FILE") .metavar("FILE");
+ * parser.add_option("-q", "--quiet")
+ *                   .action("store_false") .dest("verbose") .set_default("1")
+ *                   .help("don't print status messages to stdout");
+ * 
+ * optparse::Values options = parser.parse_args(argc, argv);
+ * vector<string> args = parser.args();
+ *
+ * if (options.get("verbose"))
+ *     cout << options["filename"] << endl;
+ *
+ */
+
+#ifndef OPTIONPARSER_H_
+#define OPTIONPARSER_H_
+
+#include <iostream>
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace optparse {
+
+class OptionParser;
+class OptionGroup;
+class Option;
+class Values;
+class Value;
+class Callback;
+
+typedef std::map<std::string,std::string> strMap;
+typedef std::map<std::string,std::list<std::string> > lstMap;
+typedef std::map<std::string,Option const*> optMap;
+
+const char* const SUPPRESS_HELP = "SUPPRESS" "HELP";
+const char* const SUPPRESS_USAGE = "SUPPRESS" "USAGE";
+
+//! Class for automatic conversion from string -> anytype
+class Value {
+  public:
+    Value() : str(), valid(false) {}
+    Value(const std::string& v) : str(v), valid(true) {}
+    operator const char*() { return str.c_str(); }
+    operator bool() { bool t; return (valid && (std::istringstream(str) >> t)) ? t : false; }
+    operator short() { short t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator unsigned short() { unsigned short t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator int() { int t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator unsigned int() { unsigned int t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator long() { long t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator unsigned long() { unsigned long t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator float() { float t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator double() { double t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator long double() { long double t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ private:
+    const std::string str;
+    bool valid;
+};
+
+class Values {
+  public:
+    Values() : _map() {}
+    const std::string& operator[] (const std::string& d) const;
+    std::string& operator[] (const std::string& d) { return _map[d]; }
+    bool is_set(const std::string& d) const { return _map.find(d) != _map.end(); }
+    bool is_set_by_user(const std::string& d) const { return _userSet.find(d) != _userSet.end(); }
+    void is_set_by_user(const std::string& d, bool yes);
+    Value get(const std::string& d) const { return (is_set(d)) ? Value((*this)[d]) : Value(); }
+
+    typedef std::list<std::string>::iterator iterator;
+    typedef std::list<std::string>::const_iterator const_iterator;
+    std::list<std::string>& all(const std::string& d) { return _appendMap[d]; }
+    const std::list<std::string>& all(const std::string& d) const { return _appendMap.find(d)->second; }
+
+  private:
+    strMap _map;
+    lstMap _appendMap;
+    std::set<std::string> _userSet;
+};
+
+class OptionParser {
+  public:
+    OptionParser();
+    virtual ~OptionParser() {}
+
+    OptionParser& usage(const std::string& u) { set_usage(u); return *this; }
+    OptionParser& version(const std::string& v) { _version = v; return *this; }
+    OptionParser& description(const std::string& d) { _description = d; return *this; }
+    OptionParser& add_help_option(bool h) { _add_help_option = h; return *this; }
+    OptionParser& add_version_option(bool v) { _add_version_option = v; return *this; }
+    OptionParser& prog(const std::string& p) { _prog = p; return *this; }
+    OptionParser& epilog(const std::string& e) { _epilog = e; return *this; }
+    OptionParser& set_defaults(const std::string& dest, const std::string& val) {
+      _defaults[dest] = val; return *this;
+    }
+    OptionParser& enable_interspersed_args() { _interspersed_args = true; return *this; }
+    OptionParser& disable_interspersed_args() { _interspersed_args = false; return *this; }
+    OptionParser& add_option_group(const OptionGroup& group);
+
+    const std::string& usage() const { return _usage; }
+    const std::string& version() const { return _version; }
+    const std::string& description() const { return _description; }
+    bool add_help_option() const { return _add_help_option; }
+    bool add_version_option() const { return _add_version_option; }
+    const std::string& prog() const { return _prog; }
+    const std::string& epilog() const { return _epilog; }
+    bool interspersed_args() const { return _interspersed_args; }
+
+    Option& add_option(const std::string& opt);
+    Option& add_option(const std::string& opt1, const std::string& opt2);
+    Option& add_option(const std::string& opt1, const std::string& opt2, const std::string& opt3);
+    Option& add_option(const std::vector<std::string>& opt);
+
+    Values& parse_args(int argc, char const* const* argv);
+    Values& parse_args(const std::vector<std::string>& args);
+    template<typename InputIterator>
+    Values& parse_args(InputIterator begin, InputIterator end) {
+      return parse_args(std::vector<std::string>(begin, end));
+    }
+
+    const std::list<std::string>& args() const { return _leftover; }
+    std::vector<std::string> args() {
+      return std::vector<std::string>(_leftover.begin(), _leftover.end());
+    }
+
+    std::string format_help() const;
+    std::string format_option_help(unsigned int indent = 2) const;
+    void print_help() const;
+
+    void set_usage(const std::string& u);
+    std::string get_usage() const;
+    void print_usage(std::ostream& out) const;
+    void print_usage() const;
+
+    std::string get_version() const;
+    void print_version(std::ostream& out) const;
+    void print_version() const;
+
+    void error(const std::string& msg) const;
+    void exit() const;
+
+  private:
+    const Option& lookup_short_opt(const std::string& opt) const;
+    const Option& lookup_long_opt(const std::string& opt) const;
+
+    void handle_short_opt(const std::string& opt, const std::string& arg);
+    void handle_long_opt(const std::string& optstr);
+
+    void process_opt(const Option& option, const std::string& opt, const std::string& value);
+
+    std::string format_usage(const std::string& u) const;
+
+    std::string _usage;
+    std::string _version;
+    std::string _description;
+    bool _add_help_option;
+    bool _add_version_option;
+    std::string _prog;
+    std::string _epilog;
+    bool _interspersed_args;
+
+    Values _values;
+
+    std::list<Option> _opts;
+    optMap _optmap_s;
+    optMap _optmap_l;
+    strMap _defaults;
+    std::list<OptionGroup const*> _groups;
+
+    std::list<std::string> _remaining;
+    std::list<std::string> _leftover;
+};
+
+class OptionGroup : public OptionParser {
+  public:
+    OptionGroup(const OptionParser& /*p*/, const std::string& t, const std::string& d = "") :
+      //_parser(p),
+      _title(t), _group_description(d) {}
+    virtual ~OptionGroup() {}
+
+    OptionGroup& title(const std::string& t) { _title = t; return *this; }
+    OptionGroup& group_description(const std::string& d) { _group_description = d; return *this; }
+    const std::string& title() const { return _title; }
+    const std::string& group_description() const { return _group_description; }
+
+  private:
+    //const OptionParser& _parser;
+    std::string _title;
+    std::string _group_description;
+};
+
+class Option {
+  public:
+    Option() : _action("store"), _type("string"), _nargs(1), _callback(0) {}
+    virtual ~Option() {}
+
+    Option& action(const std::string& a);
+    Option& type(const std::string& t) { _type = t; return *this; }
+    Option& dest(const std::string& d) { _dest = d; return *this; }
+    Option& set_default(const std::string& d) { _default = d; return *this; }
+    template<typename T>
+    Option& set_default(T t) { std::ostringstream ss; ss << t; _default = ss.str(); return *this; }
+    Option& nargs(size_t n) { _nargs = n; return *this; }
+    Option& set_const(const std::string& c) { _const = c; return *this; }
+    template<typename InputIterator>
+    Option& choices(InputIterator begin, InputIterator end) {
+      _choices.assign(begin, end); type("choice"); return *this;
+    }
+    template<typename InputEnumerable>
+    Option& choices(InputEnumerable enumerable) {
+      _choices.assign(enumerable.begin(), enumerable.end()); type("choice"); return *this;
+    }
+    Option& help(const std::string& h) { _help = h; return *this; }
+    Option& metavar(const std::string& m) { _metavar = m; return *this; }
+    Option& callback(Callback& c) { _callback = &c; return *this; }
+
+    const std::string& action() const { return _action; }
+    const std::string& type() const { return _type; }
+    const std::string& dest() const { return _dest; }
+    const std::string& get_default() const { return _default; }
+    size_t nargs() const { return _nargs; }
+    const std::string& get_const() const { return _const; }
+    const std::list<std::string>& choices() const { return _choices; }
+    const std::string& help() const { return _help; }
+    const std::string& metavar() const { return _metavar; }
+    Callback* callback() const { return _callback; }
+
+  private:
+    std::string check_type(const std::string& opt, const std::string& val) const;
+    std::string format_option_help(unsigned int indent = 2) const;
+    std::string format_help(unsigned int indent = 2) const;
+
+    std::set<std::string> _short_opts;
+    std::set<std::string> _long_opts;
+
+    std::string _action;
+    std::string _type;
+    std::string _dest;
+    std::string _default;
+    size_t _nargs;
+    std::string _const;
+    std::list<std::string> _choices;
+    std::string _help;
+    std::string _metavar;
+    Callback* _callback;
+
+    friend class OptionParser;
+};
+
+class Callback {
+public:
+  virtual void operator() (const Option& option, const std::string& opt, const std::string& val, const OptionParser& parser) = 0;
+  virtual ~Callback() {}
+};
+
+}
+
+#endif
diff --git a/utils/bam2bax/src/RegionTypeAdapter.h b/utils/bam2bax/src/RegionTypeAdapter.h
new file mode 100644
index 0000000..b83ce65
--- /dev/null
+++ b/utils/bam2bax/src/RegionTypeAdapter.h
@@ -0,0 +1,129 @@
+// Author: Yuan Li
+
+#ifndef _REGIONTYPE_ADAPTER_H_
+#define _REGIONTYPE_ADAPTER_H_
+
+#include <string>
+#include <vector>
+
+class RegionTypeAdapter {
+public:
+    /// \name \{
+    /// Converts between RegionType and VirtualRegionType
+    /// \returns true if input PacBio::BAM::VirtualRegionType object vrt can be converted to any RegionType in regionTypes.
+    /// \param[in]  vrt, VirtualRegionType to be converted.
+    /// \param[in]  regionTypes, valid RegionTypes which can be converted to.
+    static inline 
+    //bool IsConvertibleVirtualRegionType(PacBio::BAM::VirtualRegionType vrt, std::vector<RegionType> & regionTypes); 
+    bool IsConvertible(PacBio::BAM::VirtualRegionType vrt, std::vector<RegionType> & regionTypes); 
+
+    /// Converts PacBio::BAM::VirtualRegionType vrt to enum RegionType defined in pbdata/Enumeration.h
+    //inline RegionType VirtualRegionTypeToRegionType(PacBio::BAM::VirtualRegionType vrt);
+    static inline
+    RegionType ToRegionType(PacBio::BAM::VirtualRegionType vrt);
+
+    static inline
+    RegionType ToRegionType(const std::string & type);
+
+    static inline
+    std::vector<RegionType> ToRegionTypes(const std::vector<std::string> & typeStrs);
+
+    static inline
+    RegionType ToRegionTypes(const std::string & str);
+
+    static inline
+    PacBio::BAM::VirtualRegionType ToVirtualRegionType(const std::string & str);
+
+    /// Converts RegionType to PacBio::BAM::VirtualRegionType
+    //inline PacBio::BAM::VirtualRegionType RegionTypeToVirtualRegionType(RegionType rt);
+    static inline
+    PacBio::BAM::VirtualRegionType ToVirtualRegionType(RegionType rt);
+    
+    /// Converts VirtualRegionType to RegionTypeIndex in regionTypes.
+    /// \returns index of this region type in given regionTypes.
+    static inline 
+    int ToRegionTypeIndex(PacBio::BAM::VirtualRegionType vrt, std::vector<RegionType> & regionTypes);
+
+    /// \}
+}; // RegionTypeAdapter
+
+
+/// Convert a BAM::VirtualRegionType to pbdata RegionAnnotataion.TypeIndex.
+int RegionTypeAdapter::ToRegionTypeIndex(PacBio::BAM::VirtualRegionType vrt,
+                                         std::vector<RegionType> & regionTypes) {
+    RegionType rt =  ToRegionType(vrt); 
+    std::vector<RegionType>::iterator it =  std::find(regionTypes.begin(), 
+                                                      regionTypes.end(), 
+                                                      rt);
+    size_t index =  std::distance(regionTypes.begin(), it);
+    assert(index !=  regionTypes.size());
+    return static_cast<int>(index);
+}
+
+bool RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType vrt, 
+                                      std::vector<RegionType> & regionTypes) 
+{
+    RegionType rt = ToRegionType(vrt);
+    if (rt == UnknownRegionType) return false;
+    std::vector<RegionType>::iterator it = std::find(regionTypes.begin(), regionTypes.end(), rt);
+    return (it != regionTypes.end());
+}
+
+RegionType RegionTypeAdapter::ToRegionType(PacBio::BAM::VirtualRegionType vrt)
+{ 
+    if (vrt == PacBio::BAM::VirtualRegionType::SUBREAD)
+        return Insert;
+    else if (vrt == PacBio::BAM::VirtualRegionType::ADAPTER)
+        return Adapter;
+    else if (vrt == PacBio::BAM::VirtualRegionType::HQREGION)
+        return HQRegion;
+    else if (vrt == PacBio::BAM::VirtualRegionType::BARCODE)
+        return BarCode;
+    else 
+        return UnknownRegionType;
+    //e.g., No LQRegion defined in pbdata/Enumeration.h
+}
+
+RegionType RegionTypeAdapter::ToRegionType(const std::string & str) {
+    std::string u_str = str;
+    std::transform(u_str.begin(), u_str.end(), u_str.begin(), ::toupper);
+    if (u_str == "INSERT" || u_str == "SUBREAD") {
+        return Insert;
+    } else if (u_str == "ADAPTER") {
+        return Adapter;
+    } else if (u_str == "HQREGION") {
+        return HQRegion;
+    } else if (u_str == "BARCODE") {
+        return BarCode;
+    } else {
+        return UnknownRegionType;
+    }
+}
+
+std::vector<RegionType> RegionTypeAdapter::ToRegionTypes(const std::vector<std::string> & typeStrs) {
+    std::vector<RegionType> ret;
+    for(auto str: typeStrs) 
+        ret.push_back(ToRegionType(str));
+    return ret;
+}
+
+PacBio::BAM::VirtualRegionType RegionTypeAdapter::ToVirtualRegionType(const std::string & str) {
+    return ToVirtualRegionType(ToRegionType(str));
+}
+
+PacBio::BAM::VirtualRegionType RegionTypeAdapter::ToVirtualRegionType(RegionType rt) {
+    if (rt == Insert) 
+        return PacBio::BAM::VirtualRegionType::SUBREAD;
+    else if (rt == Adapter)
+        return PacBio::BAM::VirtualRegionType::ADAPTER;
+    else if (rt == HQRegion)
+        return PacBio::BAM::VirtualRegionType::HQREGION;
+    else if (rt == BarCode)
+        return PacBio::BAM::VirtualRegionType::BARCODE;
+    else
+        assert("Unable to convert RegionType to VirtualRegionType." == NULL);
+}
+
+
+
+#endif
diff --git a/utils/bam2bax/src/RegionsAdapter.h b/utils/bam2bax/src/RegionsAdapter.h
new file mode 100644
index 0000000..da60d96
--- /dev/null
+++ b/utils/bam2bax/src/RegionsAdapter.h
@@ -0,0 +1,118 @@
+// Author: Yuan Li
+
+#ifndef _REGIONS_ADAPTER_H_
+#define _REGIONS_ADAPTER_H_
+
+#include "pbdata/reads/RegionAnnotation.hpp"
+#include "RegionTypeAdapter.h"
+
+class RegionsAdapter {
+public:
+    /// \name \{
+    /// Converts PacBio::BAM::VirtualRegion to RegionAnnotation
+    /// in pbdata.
+    /// VirtualRegion has four fields, including
+    ///      * region type,
+    ///      * region start,
+    ///      * region end,
+    ///      * region score.
+    /// RegionAnnotation has five fields, including
+    ///      * holeNumber,        --> missing in VirtualRegion
+    ///      * region type index, --> not region type.
+    ///      * region start,
+    ///      * region end,
+    ///      * region score
+    /// \note region type has to be converted to region type index.
+    /// \}
+    /// \param[in] holeNumber, zmw hole number, which is absent in VirtualRegion,
+    ///            but is required by RegionAnnotation
+    /// \param[in] vr, virtual region, which contains region type, 
+    ///            region start, region end and region score, out of which
+    ///            region type has to be converted to region type index.
+    /// \param[in] regionTypes, a table to look up region types according 
+    ///            to region type index.
+    /// \returns a RegionAnnotation object
+    static inline
+    RegionAnnotation ToRegionAnnotation(const UInt holeNumber, 
+                                        const PacBio::BAM::VirtualRegion & vr, 
+                                        std::vector<RegionType> & regionTypes);
+    /*
+    /// Create a RegionAnnotaion object.
+    inline RegionAnnotation CreateRegionAnnotation(const UInt holeNumber, const PacBio::BAM::VirtualRegion & vr, std::vector<RegionType> & regionTypes);
+    */
+
+    /// Comparison between two RegionAnnotations to decide their order in H5 RegionTable.
+    static inline 
+    bool CmpRegionAnnotations(const RegionAnnotation & l, 
+                              const RegionAnnotation & r);
+
+    //inline std::vector<RegionAnnotation> RegionAnnotationsFromVirtualPolymeraseRead (const PacBio::BAM::VirtualPolymeraseBamRecord & record);
+    
+    /// Creates a vector of RegionAnnotations from a virtual polymerase bam record.
+    /// \returns a vector of RegionAnnotations created from a virtual polymerase bam record.
+    /// \param[in] record, input Virtual Polymerase Bam Record.
+    /// \param[in] regionTypes, a table to look up region types according 
+    ///            to region type index.
+    static inline 
+    std::vector<RegionAnnotation> ToRegionAnnotations(
+            const PacBio::BAM::VirtualPolymeraseBamRecord & record, 
+            std::vector<RegionType> & regionTypes);
+
+    /// \}
+}; // class RegionsAdapter
+
+
+RegionAnnotation RegionsAdapter::ToRegionAnnotation(
+    const UInt holeNumber, 
+    const PacBio::BAM::VirtualRegion & vr, 
+    std::vector<RegionType> & regionTypes) 
+{
+    int index = RegionTypeAdapter::ToRegionTypeIndex(vr.type, regionTypes);
+    if (vr.type == PacBio::BAM::VirtualRegionType::HQREGION and 
+        vr.beginPos == vr.endPos) {
+        // bug 29935, by convention, use HQREGION 0, 0, 0 if no HQREGION is found.
+        return RegionAnnotation(holeNumber, index, 0, 0, 0);
+    } else return RegionAnnotation(holeNumber, index, vr.beginPos, vr.endPos, vr.score);
+}
+
+bool RegionsAdapter::CmpRegionAnnotations(const RegionAnnotation & l, 
+                                          const RegionAnnotation & r) 
+{
+    assert(l.GetHoleNumber() == r.GetHoleNumber());
+    if (l.GetTypeIndex() == r.GetTypeIndex()) {
+        if (l.GetStart() == r.GetStart()) {
+            return l.GetEnd() > r.GetEnd();
+        } else {
+            return l.GetStart() < r.GetStart();
+        }
+    } else {
+        return (l.GetTypeIndex() < r.GetTypeIndex());
+    }
+}
+
+std::vector<RegionAnnotation> RegionsAdapter::ToRegionAnnotations (
+        const PacBio::BAM::VirtualPolymeraseBamRecord & record,
+        std::vector<RegionType> & regionTypes) {
+
+    auto virtualRegionMap_ = record.VirtualRegionsMap();
+    std::vector<RegionAnnotation> ret;
+    for (auto it = virtualRegionMap_.begin(); it != virtualRegionMap_.end(); it++) {
+        for (PacBio::BAM::VirtualRegion vr: it->second) {
+            if (RegionTypeAdapter::IsConvertible(vr.type, regionTypes)) {
+                RegionAnnotation annotation = RegionsAdapter::ToRegionAnnotation(record.HoleNumber(), vr, regionTypes);
+                if ((vr.type == PacBio::BAM::VirtualRegionType::ADAPTER or
+                     vr.type == PacBio::BAM::VirtualRegionType::HQREGION) and 
+                     record.HasReadAccuracy()) {
+                    float rq = record.ReadAccuracy(); 
+                    annotation.SetScore((rq >= 1.0)?(int(rq)):(int(rq * 1000)));
+                }
+                ret.push_back(annotation);
+            } // Some region types such as LQRegion can not be converted
+        }
+    }
+    std::sort(ret.begin(), ret.end(), RegionsAdapter::CmpRegionAnnotations); 
+
+    return ret;
+}
+
+#endif
diff --git a/utils/bam2bax/src/Settings.cpp b/utils/bam2bax/src/Settings.cpp
new file mode 100644
index 0000000..5d54d56
--- /dev/null
+++ b/utils/bam2bax/src/Settings.cpp
@@ -0,0 +1,232 @@
+// Author: Yuan Li
+
+#include "Settings.h"
+#include "OptionParser.h"
+#include "StringUtils.hpp"
+#include <unistd.h> // getcwd
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <stdio.h>
+#include <algorithm>
+#include "PacBioDefs.h"
+
+#define DEBUG_SETTINGS
+
+using namespace std;
+
+namespace internal {
+
+static
+std::string GetMovienameFromFilename(const std::string & filename) {
+    std::vector<std::string> tokens; 
+    Splice(filename, "/", tokens);
+    std::string tmp = tokens.back();
+    Splice(tmp, ".", tokens);
+    return tokens.front();
+}
+
+static 
+bool IsAbsolutePath(const std::string & file) 
+{
+    return (file.find("/") == 0);
+}
+
+static
+std::string CurrentWorkingDirectory(void) 
+{
+    char result[FILENAME_MAX] = { };
+    if (getcwd(result, FILENAME_MAX) == nullptr)
+        return std::string();
+    return std::string(result);
+}
+
+static 
+std::string DirectoryPath(const std::string & file) 
+{
+    // Return either relative or absolute directory path of file
+    std::size_t pos = file.rfind('/');
+    if (IsAbsolutePath(file)) {
+        if (pos != std::string::npos) 
+            return file.substr(0, pos);
+        else 
+            return std::string();
+    } else {
+        if (pos != std::string::npos)
+            return CurrentWorkingDirectory() + "/" + file.substr(0, pos);
+        else
+            return CurrentWorkingDirectory();
+    }
+}
+
+static 
+std::string EXEC(const std::string & cmd) {
+    // execute a cmd and return output
+    FILE* pipe = popen(cmd.c_str(), "r");
+    if (!pipe) return "ERROR";
+    char buffer[128];
+    std::string result = "";
+    while(!feof(pipe)) {
+        if(fgets(buffer, 128, pipe) != NULL)
+            result += buffer;
+    }
+    pclose(pipe);
+    return result;
+}
+
+static  // return parent directory name, no absolute path
+std::string ParentDirectoryName (const std::string & folder) {
+    std::stringstream ss;
+    ss << "realpath " << folder << " | xargs basename";
+    return EXEC(ss.str());
+}
+
+} // namespace internal
+
+const char* Settings::Option::input_        = "input";
+const char* Settings::Option::output_       = "output";
+const char* Settings::Option::metadata_     = "metadata";
+const char* Settings::Option::baseMode_     = "base";
+const char* Settings::Option::pulseMode_    = "pulse";
+const char* Settings::Option::baseMap_      = "basemap";
+const char* Settings::Option::ignoreQV_     = "ignoreQV";
+const char* Settings::Option::trace_        = "trace";
+const char* Settings::OptionValue::baseMap_ = PacBio::AttributeValues::ScanData::DyeSet::basemap.c_str();
+
+Settings::Settings(void)
+    : mode(Settings::BaseMode)
+    , ignoreQV(false)
+    , baseMap(Settings::OptionValue::baseMap_)
+{}
+
+Settings Settings::FromCommandLine(optparse::OptionParser& parser,
+                                   int argc,
+                                   char *argv[],
+                                   bool forcePulseMode)
+{
+    Settings settings;
+
+    // general program info
+    settings.program = parser.prog();
+    settings.description = parser.description();
+    settings.version = parser.version();
+    for (int i = 1; i < argc; ++i) {
+        settings.args.append(argv[i]);
+        settings.args.append(" ");
+    }
+
+    const optparse::Values options = parser.parse_args(argc, argv);
+
+    // mode
+    settings.ignoreQV = options.is_set(Settings::Option::ignoreQV_);
+
+    const bool isBaseMode = 
+        options.is_set(Settings::Option::baseMode_) ? options.get(Settings::Option::baseMode_) : false;
+
+    const bool isPulseMode = 
+        options.is_set(Settings::Option::pulseMode_) ? options.get(Settings::Option::pulseMode_) : false;
+
+    int modeCount = 0;
+    if (isBaseMode)  modeCount++;
+    if (isPulseMode) modeCount++;
+
+    if (modeCount == 0)
+        settings.mode = Settings::BaseMode;
+    else if (modeCount == 1) 
+        if (isBaseMode)  
+            settings.mode = Settings::BaseMode;
+        else 
+            settings.mode = Settings::PulseMode;
+    else
+        settings.errors_.push_back("Unknown modes selected.");
+
+    if (forcePulseMode) settings.mode = Settings::PulseMode;
+
+    // BaseMap
+    if (not options[Settings::Option::baseMap_].empty()) {
+        settings.baseMap = options[Settings::Option::baseMap_];
+        std::transform(settings.baseMap.begin(), settings.baseMap.end(), settings.baseMap.begin(), ::toupper);
+        cout << settings.baseMap << endl;
+        std::string _baseMap = settings.baseMap;
+        std::sort(_baseMap.begin(), _baseMap.end());
+        if (_baseMap != "ACGT") { settings.errors_.push_back("Bad basemap."); }
+    }
+
+    // input
+    settings.inputBamFilenames = parser.args();
+    if (settings.inputBamFilenames.size() == 1) {
+        settings.polymeraseBamFilename = settings.inputBamFilenames[0];
+        if (settings.polymeraseBamFilename.find("polymerase.bam") == std::string::npos)
+            settings.errors_.push_back("missing input *.polymerase.bam.");
+    } else if (settings.inputBamFilenames.size() == 2) {
+        settings.subreadsBamFilename = settings.inputBamFilenames[0];
+        settings.scrapsBamFilename   = settings.inputBamFilenames[1];
+        if (settings.subreadsBamFilename.find("subreads.bam") == std::string::npos)
+            settings.errors_.push_back("missing input *.subreads.bam.");
+        if (settings.scrapsBamFilename.find("scraps.bam") == std::string::npos)
+            settings.errors_.push_back("missing input *.scraps.bam.");
+    } else {
+        settings.errors_.push_back("missing input (polymerase.bam or subreads+scraps.bam.");
+    }
+
+    if (options.is_set(Settings::Option::trace_)) {
+        settings.traceFilename = options[Settings::Option::trace_];
+    } else {
+        settings.traceFilename = "";
+    }
+
+    // output 
+    settings.outputBaxPrefix = options[Settings::Option::output_];
+    if (settings.outputBaxPrefix.empty()) { // if output prefix not set.
+        if (not settings.subreadsBamFilename.empty()) {
+            settings.outputBaxPrefix = internal::GetMovienameFromFilename(settings.subreadsBamFilename);
+        } else if (not settings.polymeraseBamFilename.empty()) {
+            settings.outputBaxPrefix = internal::GetMovienameFromFilename(settings.polymeraseBamFilename);
+        }
+    }
+
+    if (settings.mode == Settings::BaseMode) 
+        settings.outputBaxFilename = settings.outputBaxPrefix + ".bax.h5";
+    else if (settings.mode == Settings::PulseMode)
+        settings.outputBaxFilename = settings.outputBaxPrefix + ".plx.h5";
+    settings.outputRgnFilename = settings.outputBaxPrefix + ".rgn.h5";
+
+    // movie
+    settings.movieName = internal::GetMovienameFromFilename(settings.outputBaxPrefix);
+
+    if (options.is_set(Settings::Option::metadata_)) {
+        // metadata.xml will be placed at upper directory of bax.h5
+        settings.outputMetadataFilename = internal::DirectoryPath(settings.outputBaxPrefix) + "/../" + 
+                                          settings.movieName + ".metadata.xml";
+        settings.outputAnalysisDirname = internal::ParentDirectoryName(internal::DirectoryPath(settings.outputBaxPrefix));
+    }
+
+#ifdef DEBUG_SETTINGS
+    string modeString = "Unknown";
+    if (settings.mode == Settings::BaseMode)
+        modeString = "base";
+    else if (settings.mode == Settings::PulseMode)
+        modeString = "pulse";
+
+    cerr << "CommandLine: " << settings.program << " " << settings.args << endl
+         << "Description: " << settings.description << endl
+         << "Version    : " << settings.version << endl
+         << "Mode       : " << modeString << endl
+         << "BaseMap    : " << settings.baseMap << endl
+         << "Movie name : " << settings.movieName << endl
+         << "Input files: " << endl;
+    if (not settings.subreadsBamFilename.empty())
+         cerr << " subreads  : " << settings.subreadsBamFilename << endl;
+    if (not settings.scrapsBamFilename.empty())
+         cerr << " scraps    : " << settings.scrapsBamFilename << endl;
+    if (not settings.polymeraseBamFilename.empty())
+         cerr << " polymerase: " << settings.polymeraseBamFilename << endl;
+    if (not settings.traceFilename.empty())
+         cerr << " trace     : " << settings.traceFilename << endl;
+    cerr << "Output h5  : " << settings.outputBaxFilename << endl
+         << "Output xml : " << settings.outputMetadataFilename << endl;
+
+#endif
+
+    return settings;
+}
diff --git a/utils/bam2bax/src/Settings.h b/utils/bam2bax/src/Settings.h
new file mode 100644
index 0000000..a2ca17a
--- /dev/null
+++ b/utils/bam2bax/src/Settings.h
@@ -0,0 +1,80 @@
+// Author: Yuan Li
+
+#ifndef SETTINGS_H
+#define SETTINGS_H
+
+#include <string>
+#include <vector>
+
+namespace optparse { class OptionParser; }
+
+class Settings
+{
+public:
+    enum Mode { BaseMode  // BAM to BAX.H5
+              , PulseMode // BAM to PLS.H5
+              }; 
+
+public:
+    Settings(void);
+    static Settings FromCommandLine(optparse::OptionParser& parser,
+                                    int argc,
+                                    char* argv[],
+                                    bool forcePulseMode=false);
+    struct Option {
+        static const char* input_;
+        static const char* output_;
+        static const char* metadata_;
+        static const char* baseMode_;
+        static const char* pulseMode_;
+        static const char* ignoreQV_;
+        static const char* baseMap_;
+        static const char* trace_;
+    };
+
+    // default option value
+    struct OptionValue {
+        static const char* baseMap_;
+    };
+
+public:
+    // input
+    std::vector<std::string> inputBamFilenames;
+
+    std::string subreadsBamFilename;
+    std::string scrapsBamFilename;
+    std::string polymeraseBamFilename;
+    
+    // recommended, but optional input
+    std::string traceFilename;
+
+    //output
+    std::string outputBaxPrefix;
+    std::string outputBaxFilename;
+    std::string outputRgnFilename;
+
+    std::string outputMetadataFilename;
+    std::string outputAnalysisDirname;
+
+    // program info
+    std::string program;
+    std::string args;
+    std::string version;
+    std::string description;
+
+    // generated
+    std::string movieName;
+
+    // mode
+    Mode mode;
+
+    bool ignoreQV;
+
+    // base map
+    std::string baseMap;
+
+    // command line parsing
+    std::vector<std::string> errors_;
+};
+
+#endif // SETTINGS_H
diff --git a/utils/bam2bax/tests/CMakeLists.txt b/utils/bam2bax/tests/CMakeLists.txt
new file mode 100644
index 0000000..86bbe19
--- /dev/null
+++ b/utils/bam2bax/tests/CMakeLists.txt
@@ -0,0 +1,93 @@
+file(MAKE_DIRECTORY ${Bam2Bax_TestsDir}/bin)
+file(MAKE_DIRECTORY ${Bam2Bax_TestsDir}/out)
+
+# Generate path for test data
+configure_file(
+    ${Bam2Bax_TestsDir}/src/TestData.h.in
+    ${Bam2Bax_TestsDir}/src/TestData.h
+)
+
+include(files.cmake)
+include_directories(
+    ${Bam2Bax_SourceDir}
+    ${BLASR_INCLUDE_DIRS}
+    ${Boost_INCLUDE_DIRS}
+    ${HDF5_INCLUDE_DIRS}
+    ${HTSLIB_INCLUDE_DIRS}
+    ${PacBioBAM_INCLUDE_DIRS}
+    ${PBDATA_INCLUDE_DIRS}
+    ${PBDATA_ROOT_DIR}
+    ${PBIHDF_INCLUDE_DIRS}
+    ${ZLIB_INCLUDE_DIRS}
+    ${gtest_SOURCE_DIR}/include
+    ${gtest_SOURCE_DIR}
+)
+
+set(SOURCES
+    ${Bam2BaxTest_H}
+    ${Bam2BaxTest_CPP}
+)
+
+# shared CXX flags for src & tests
+include(CheckCXXCompilerFlag)
+set(Bam2Bax_CXX_FLAGS "-g -std=c++11 -Wall")
+
+# quash warnings from pbdata
+check_cxx_compiler_flag("-Wno-overloaded-virtual" HAS_NO_OVERLOADED_VIRTUAL)
+if(HAS_NO_OVERLOADED_VIRTUAL)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-overloaded-virtual")
+endif()
+check_cxx_compiler_flag("-Wno-unused-private-field" HAS_NO_UNUSED_PRIVATE_FIELD)
+if(HAS_NO_UNUSED_PRIVATE_FIELD)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-unused-private-field")
+endif()
+check_cxx_compiler_flag("-Wno-unused-variable" HAS_NO_UNUSED_VARIABLE)
+if(HAS_NO_UNUSED_VARIABLE)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-unused-variable")
+endif()
+check_cxx_compiler_flag("-Wno-uninitialized" HAS_NO_UNINITIALIZED)
+if(HAS_NO_UNINITIALIZED)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-uninitialized")
+endif()
+check_cxx_compiler_flag("-Wunused-but-set-variable" HAS_UNUSED_BUT_SET_VARIABLE)
+if(HAS_UNUSED_BUT_SET_VARIABLE)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wunused-but-set-variable")
+endif()
+# NOTE: -Wno-unused-local-typedefs used to quash clang warnings w/ Boost
+check_cxx_compiler_flag("-Wno-unused-local-typedef" HAS_NO_UNUSED_LOCAL_TYPEDEF)
+if(HAS_NO_UNUSED_LOCAL_TYPEDEF)
+    set(Bam2Bax_CXX_FLAGS "${Bam2Bax_CXX_FLAGS} -Wno-unused-local-typedef")
+endif()
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Bam2Bax_CXX_FLAGS}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${Bam2Bax_EXE_LINKER_FLAGS}")
+
+add_executable(test_bam2bax ${SOURCES})
+
+set_target_properties(test_bam2bax PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY ${Bam2Bax_TestsDir}/bin
+)
+if (NOT APPLE)
+    set(MY_LIBRT -lrt)
+else()
+endif()
+target_link_libraries(test_bam2bax
+    gtest
+    gtest_main
+    ${BLASR_LIBRARIES}
+    ${PBIHDF_LIBRARIES}
+    ${PBDATA_LIBRARIES}
+    ${HDF5_CPP_LIBRARIES}
+    ${HDF5_LIBRARIES}
+    ${PacBioBAM_LIBRARIES}
+    ${HTSLIB_LIBRARIES}
+    ${ZLIB_LIBRARIES}
+    ${CMAKE_THREAD_LIBS_INIT} # quirky pthreads
+    ${MY_LIBRT}
+)
+
+add_test(
+    NAME UnitTests 
+    WORKING_DIRECTORY ${Bam2Bax_TestsDir}/bin
+    COMMAND test_bam2bax 
+)
diff --git a/utils/bam2bax/tests/cram/bam2bax.t b/utils/bam2bax/tests/cram/bam2bax.t
new file mode 100755
index 0000000..735ff3e
--- /dev/null
+++ b/utils/bam2bax/tests/cram/bam2bax.t
@@ -0,0 +1,86 @@
+Create Test Files And Run Tests
+First convert *.bax.h5 to bam using bax2bam
+Next convert *.bam back to *.bax.h5 using bam2bax.
+Then convert generated bax.h5 to bam again using bax2bam
+Finally, compare whether bam files are identical.
+
+  $ BAX2BAM=/mnt/secondary/builds/full/3.0.0/prod/current-build_smrtanalysis/smrtcmds/bin/bax2bam
+  $ BLASR=/mnt/secondary/builds/full/3.0.0/prod/current-build_smrtanalysis/smrtcmds/bin/blasr
+  $ . /mnt/software/Modules/current/init/sh
+  $ module load samtools
+  $ SAMTOOLS=samtools
+
+BAM2BAX=? MUST SET UP PATH TO BAM2BAX
+
+  $ I_PREFIX=m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1
+  $ I_BAX_H5=$I_PREFIX.bax.h5
+  $ I_SR_BAM=$I_PREFIX.subreads.bam
+  $ I_SC_BAM=$I_PREFIX.scraps.bam
+  $ I_SR_SAM=$I_PREFIX.subreads.sam
+  $ I_SC_SAM=$I_PREFIX.scraps.sam
+
+  $ O_DIR=Analysis_Results
+  $ O_PREFIX=$O_DIR/${I_PREFIX}
+  $ O_BAX_H5=$O_PREFIX.bax.h5
+  $ O_SR_BAM=$O_PREFIX.subreads.bam
+  $ O_SC_BAM=$O_PREFIX.scraps.bam
+  $ O_SR_SAM=$O_PREFIX.subreads.sam
+  $ O_SC_SAM=$O_PREFIX.scraps.sam
+  $ O_META_XML=`echo $I_PREFIX | cut -f 1 -d '.'`.metadata.xml
+
+Clean
+  $ rm -f *.bam *.sam *.tmp $O_PREFIX.* $I_PREFIX.* ../${I_PREFIX}.metadata.xml
+
+Copy input bax.h5
+  $ cp /pbi/dept/secondary/siv/testdata/bam2bax/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 .
+
+Make output dir
+  $ mkdir -p $O_DIR
+
+Input *.subreads.bam, *.scraps.bam
+  $ $BAX2BAM $I_BAX_H5 -o $I_PREFIX --subread --losslessframes && echo $?
+  0
+  $ ls $I_SR_BAM $I_SC_BAM > /dev/null || echo failed to convert input bax.h5 to input bam
+
+Output bax.h5: convert input bam to output bax.h5
+  $ $BAM2BAX $I_SR_BAM $I_SC_BAM -o $O_PREFIX --metadata 1>/dev/null 2>/dev/null && echo $?
+  0
+
+Check existance of metadata.xml
+  $ ls $O_META_XML >/dev/null && echo $?
+  0
+
+Output bam
+echo convert output bax.h5 to bam
+  $ $BAX2BAM $O_BAX_H5 -o $O_PREFIX --subread --losslessframes && echo $?
+  0
+
+To sam
+  $ $SAMTOOLS view -h $I_SR_BAM -o $I_SR_SAM && cat $I_SR_SAM | grep -v '^@' > $I_SR_SAM.tmp
+  $ $SAMTOOLS view -h $I_SC_BAM -o $I_SC_SAM && cat $I_SC_SAM | grep -v '^@' > $I_SC_SAM.tmp
+  $ $SAMTOOLS view -h $O_SR_BAM -o $O_SR_SAM && cat $O_SR_SAM | grep -v '^@' > $O_SR_SAM.tmp
+  $ $SAMTOOLS view -h $O_SC_BAM -o $O_SC_SAM && cat $O_SC_SAM | grep -v '^@' > $O_SC_SAM.tmp
+
+diff input with output
+  $ diff $I_SR_SAM.tmp $O_SR_SAM.tmp || echo I.subreads.bam and O.subreads.bam are not identical
+  $ diff $I_SC_SAM.tmp $O_SC_SAM.tmp || echo I.subreads.bam and O.subreads.bam are not identical
+
+
+polymerase.bam to bax.h5
+  $ $BAM2BAX /pbi/dept/secondary/siv/testdata/bam2bax/bam2plx/small.polymerase.bam -o Analysis_Results/polymerase 1>/dev/null 2>/dev/null && echo $?
+  0
+
+  $ h5ls -r Analysis_Results/polymerase.bax.h5 |grep PulseData/Regions |wc -l
+  1
+
+  $ h5dump -d PulseData/Regions Analysis_Results/polymerase.bax.h5 |grep '133229, 2, 0, 0, 0' |wc -l
+  1
+
+
+ZMW with no HQ region
+  $ $BAM2BAX /pbi/dept/secondary/siv/testdata/bam2bax/all_lq/all_lq.subreads.bam /pbi/dept/secondary/siv/testdata/bam2bax/all_lq/all_lq.scraps.bam -o Analysis_Results/all_lq 1>/dev/null 2>/dev/null && echo $?
+  0
+
+  $ h5dump -d /PulseData/Regions Analysis_Results/all_lq.bax.h5 |grep "(0,0): 47775928, 2, 0, 0, 700"
+     (0,0): 47775928, 2, 0, 0, 700,
+
diff --git a/utils/bam2bax/tests/cram/bam2plx.t b/utils/bam2bax/tests/cram/bam2plx.t
new file mode 100755
index 0000000..07669c4
--- /dev/null
+++ b/utils/bam2bax/tests/cram/bam2plx.t
@@ -0,0 +1,230 @@
+Create Test Files And Run Tests
+Convert *.bam to *.pls.h5 using bam2plx.
+
+Must define BAM2PLX= either as 'bam2bax --pulse' or 'bam2plx'
+#  $ BAM2PLX='PATH_TO_//depot/software/smrtanalysis/bioinformatics/staging/PostPrimary/bam2bax/bin/bam2bax --pulse'
+#  $ BAM2PLX='PATH_TO_//depot/software/smrtanalysis/bioinformatics/staging/PostPrimary/bam2bax/bin/bam2plx'
+
+  $ . /mnt/software/Modules/current/init/sh
+  $ module load samtools
+  $ SAMTOOLS=samtools
+
+  $ I_PREFIX=$TESTDIR/../data/tiny_bam2plx
+  $ I_PL_SAM=$I_PREFIX.polymerase.sam
+  $ I_PL_BAM=$I_PREFIX.polymerase.bam
+
+  $ O_DIR=`realpath Analysis_Results`
+  $ O_PREFIX=$O_DIR/`basename ${I_PREFIX}`
+  $ O_PLX_H5=$O_PREFIX.plx.h5
+  $ O_META_XML=$O_DIR/../`basename $I_PREFIX | cut -f 1 -d '.'`.metadata.xml
+
+Clean
+  $ rm -rf $O_PLX_H5 $O_META_XML
+
+  $ mkdir -p $O_DIR
+
+===================================================================
+Convert polymerase bam to plx.h5
+  $ $SAMTOOLS view -bS $I_PL_SAM -o $I_PL_BAM 1>/dev/null 2>/dev/null && echo $?
+  0
+
+#Old bam input should be rejected
+#  $ $BAM2PLX $I_PL_BAM -o $O_PREFIX 2>&1 |tail -1
+#  ERROR:* (glob)
+
+===================================================================
+Convert subreads.bam + scraps.bam to plx.h5
+  $ I_PREFIX=$TESTDIR/../data/tiny
+  $ I_SR_BAM=$I_PREFIX.subreads.bam
+  $ I_SC_BAM=$I_PREFIX.scraps.bam
+  $ I_SR_SAM=$I_PREFIX.subreads.sam
+  $ I_SC_SAM=$I_PREFIX.scraps.sam
+
+Convert input bam to output plx.h5
+  $ $SAMTOOLS view -bS $I_SR_SAM -o $I_SR_BAM 1>/dev/null 2>/dev/null && echo $?
+  0
+  $ $SAMTOOLS view -bS $I_SC_SAM -o $I_SC_BAM 1>/dev/null 2>/dev/null && echo $?
+  0
+ 
+Check exit status and output
+  $ $BAM2PLX $I_SR_BAM $I_SC_BAM -o $O_PREFIX --metadata 1>/dev/null 2>/dev/null && echo $?
+  0
+
+Check existance of metadata.xml
+  $ ls $O_META_XML >/dev/null && echo $?
+  0
+
+Check h5
+  $ h5ls -r $O_PLX_H5
+  /                        Group
+  /PulseData               Group
+  /PulseData/BaseCalls     Group
+  /PulseData/BaseCalls/Basecall Dataset {15576/Inf}
+  /PulseData/BaseCalls/DeletionQV Dataset {15576/Inf}
+  /PulseData/BaseCalls/DeletionTag Dataset {15576/Inf}
+  /PulseData/BaseCalls/InsertionQV Dataset {15576/Inf}
+  /PulseData/BaseCalls/MergeQV Dataset {15576/Inf}
+  /PulseData/BaseCalls/PreBaseFrames Dataset {15576/Inf}
+  /PulseData/BaseCalls/PulseIndex Dataset {15576/Inf}
+  /PulseData/BaseCalls/QualityValue Dataset {15576/Inf}
+  /PulseData/BaseCalls/SubstitutionQV Dataset {15576/Inf}
+  /PulseData/BaseCalls/SubstitutionTag Dataset {15576/Inf}
+  /PulseData/BaseCalls/WidthInFrames Dataset {15576/Inf}
+  /PulseData/BaseCalls/ZMW Group
+  /PulseData/BaseCalls/ZMW/HoleNumber Dataset {9/Inf}
+  /PulseData/BaseCalls/ZMW/HoleStatus Dataset {9/Inf}
+  /PulseData/BaseCalls/ZMW/HoleXY Dataset {9/Inf, 2}
+  /PulseData/BaseCalls/ZMW/NumEvent Dataset {9/Inf}
+  /PulseData/BaseCalls/ZMWMetrics Group
+  /PulseData/BaseCalls/ZMWMetrics/HQRegionSNR Dataset {9/Inf, 4}
+  /PulseData/BaseCalls/ZMWMetrics/Productivity Dataset {9/Inf}
+  /PulseData/BaseCalls/ZMWMetrics/ReadScore Dataset {9/Inf}
+  /PulseData/PulseCalls    Group
+  /PulseData/PulseCalls/AltLabel Dataset {15581/Inf}
+  /PulseData/PulseCalls/AltLabelQV Dataset {15581/Inf}
+  /PulseData/PulseCalls/Channel Dataset {15581/Inf}
+  /PulseData/PulseCalls/Chi2 Dataset {15581/Inf, 4}
+  /PulseData/PulseCalls/IsPulse Dataset {15581/Inf}
+  /PulseData/PulseCalls/LabelQV Dataset {15581/Inf}
+  /PulseData/PulseCalls/MaxSignal Dataset {15581/Inf}
+  /PulseData/PulseCalls/MeanSignal Dataset {15581/Inf, 4}
+  /PulseData/PulseCalls/MergeQV Dataset {15581/Inf}
+  /PulseData/PulseCalls/MidSignal Dataset {15581/Inf}
+  /PulseData/PulseCalls/MidStdDev Dataset {15581/Inf}
+  /PulseData/PulseCalls/StartFrame Dataset {15581/Inf}
+  /PulseData/PulseCalls/WidthInFrames Dataset {15581/Inf}
+  /PulseData/PulseCalls/ZMW Group
+  /PulseData/PulseCalls/ZMW/BaselineLevel Dataset {9/Inf, 4}
+  /PulseData/PulseCalls/ZMW/BaselineSigma Dataset {9/Inf, 4}
+  /PulseData/PulseCalls/ZMW/HoleNumber Dataset {9/Inf}
+  /PulseData/PulseCalls/ZMW/HoleStatus Dataset {9/Inf}
+  /PulseData/PulseCalls/ZMW/HoleXY Dataset {9/Inf, 2}
+  /PulseData/PulseCalls/ZMW/NumEvent Dataset {9/Inf}
+  /PulseData/PulseCalls/ZMW/SignalLevel Dataset {9/Inf, 4}
+  /PulseData/PulseCalls/ZMW/SignalSigma Dataset {9/Inf, 4}
+  /PulseData/Regions       Dataset {44/Inf, 5}
+  /ScanData                Group
+  /ScanData/AcqParams      Group
+  /ScanData/DyeSet         Group
+  /ScanData/RunInfo        Group
+
+===================================================================
+Check PulseCalls/MeanSignal
+  $ h5dump -d /PulseData/PulseCalls/MeanSignal $O_PLX_H5 |sed -n '2,10p'
+  DATASET "/PulseData/PulseCalls/MeanSignal" {
+     DATATYPE  H5T_STD_U16LE
+     DATASPACE  SIMPLE { ( 15581, 4 ) / ( H5S_UNLIMITED, 4 ) }
+     DATA {
+     (0,0): 0, 0, 0, 112,
+     (1,0): 0, 75, 0, 0,
+     (2,0): 0, 0, 0, 65,
+     (3,0): 0, 60, 0, 0,
+     (4,0): 0, 0, 0, 62,
+
+Check PulseCalls/MidSignal
+  $ h5dump -d /PulseData/PulseCalls/MidSignal $O_PLX_H5 |grep "(0):"
+     (0): 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 0, 82,
+
+Check PulseCalls/WidthInFrames
+  $ h5dump -d /PulseData/PulseCalls/WidthInFrames $O_PLX_H5 | grep "(0):"
+     (0): 1, 2, 1, 3, 2, 1, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 4, 1, 3, 1,
+
+
+===================================================================
+Check BaseCalls attributes: SchemaRevision 
+  $ h5dump -a /PulseData/BaseCalls/SchemaRevision $O_PLX_H5 |grep ":"
+  *"1.1" (glob)
+  $ h5dump -a /PulseData/BaseCalls/SchemaRevision $O_PLX_H5 |grep "DATATYPE"
+     DATATYPE  H5T_STRING {
+
+Check PulseCalls attributes: SchemaRevision
+  $ h5dump -a /PulseData/PulseCalls/ChangeListID $O_PLX_H5 | grep ":"
+  *"3.0.14.167651" (glob)
+  $ h5dump -a /PulseData/PulseCalls/ChangeListID $O_PLX_H5 | grep "DATATYPE"
+     DATATYPE  H5T_STRING {
+
+$ h5dump -a /PulseData/PulseCalls/Content $O_PLX_H5 | grep ":"
+* "AltLabel,AltLabelQV,Channel,Chi2,IsPulse,LabelQV,MaxSignal,MeanSignal,MergeQV,MidSignal,MidStdDev,StartFrame,WidthInFrames,uint8_t,uint8_t,uint8_t,uint16_t,uint8_t,uint8_t,uint16_t,uint16_t,uint8_t,uint16_t,uint16_t,uint32_t,uint16_t" (glob)
+
+  $ h5dump -a /PulseData/PulseCalls/ContentStored $O_PLX_H5 | grep ":"
+     (0): 9
+  $ h5dump -a /PulseData/PulseCalls/ContentStored $O_PLX_H5 | grep "DATATYPE"
+     DATATYPE  H5T_STD_U32LE
+
+  $ h5dump -a /PulseData/PulseCalls/DataCreated $O_PLX_H5 | grep "DATATYPE"
+     DATATYPE  H5T_STRING {
+
+  $ h5dump -a /PulseData/PulseCalls/SchemaRevision $O_PLX_H5 | grep ":"
+  *"1.1" (glob)
+  $ h5dump -a /PulseData/PulseCalls/SchemaRevision $O_PLX_H5 |grep "DATATYPE"
+     DATATYPE  H5T_STRING {
+
+Check ScanData/AcqParams
+  $ h5dump -a /ScanData/AcqParams/AduGain $O_PLX_H5 | grep ":"
+  *1 (glob)
+  $ h5dump -a /ScanData/AcqParams/AduGain $O_PLX_H5 | grep "DATATYPE"
+  *H5T_IEEE_F32LE (glob)
+
+  $ h5dump -a /ScanData/AcqParams/CameraGain $O_PLX_H5 | grep ":"
+  *1 (glob)
+  $ h5dump -a /ScanData/AcqParams/CameraGain $O_PLX_H5 | grep "DATATYPE"
+  *H5T_IEEE_F32LE (glob)
+
+  $ h5dump -a /ScanData/AcqParams/CameraType $O_PLX_H5 | grep ":"
+  *0 (glob)
+  $ h5dump -a /ScanData/AcqParams/CameraType $O_PLX_H5 | grep "DATATYPE"
+  *H5T_STD_I32LE (glob)
+
+  $ h5dump -a /ScanData/AcqParams/HotStartFrame $O_PLX_H5 | grep ":"
+  * 0 (glob)
+  $ h5dump -a /ScanData/AcqParams/HotStartFrame $O_PLX_H5 | grep "DATATYPE"
+  *H5T_STD_U32LE (glob)
+
+  $ h5dump -a /ScanData/AcqParams/LaserOnFrame $O_PLX_H5 | grep ":"
+  * 0 (glob)
+  $ h5dump -a /ScanData/AcqParams/LaserOnFrame $O_PLX_H5 | grep "DATATYPE"
+  *H5T_STD_U32LE (glob)
+
+  $ h5dump -a /ScanData/AcqParams/FrameRate $O_PLX_H5 | grep ":"
+  *80.047 (glob)
+  $ h5dump -a /ScanData/AcqParams/FrameRate $O_PLX_H5 | grep "DATATYPE"
+  *H5T_IEEE_F32LE (glob)
+
+Check ScanData/RunInfo
+  $ h5dump -a /ScanData/RunInfo/InstrumentName $O_PLX_H5 | grep ":"
+  *"sequel" (glob)
+  $ h5dump -a /ScanData/RunInfo/InstrumentName $O_PLX_H5 | grep "DATATYPE"
+     DATATYPE  H5T_STRING {
+
+  $ h5dump -a /ScanData/RunInfo/PlatformId $O_PLX_H5 | grep ":"
+  *4 (glob)
+  $ h5dump -a /ScanData/RunInfo/PlatformId $O_PLX_H5 | grep "DATATYPE"
+     DATATYPE  H5T_STD_U32LE
+
+  $ h5dump -a /ScanData/RunInfo/PlatformName $O_PLX_H5 | grep ":"
+  *"SequelAlpha" (glob)
+  $ h5dump -a /ScanData/RunInfo/PlatformName $O_PLX_H5 | grep "DATATYPE"
+     DATATYPE  H5T_STRING {
+
+  $ h5dump -a /ScanData/RunInfo/MovieName $O_PLX_H5 | grep ":"
+     (0): "m54006_151021_185942"
+  $ h5dump -a /ScanData/RunInfo/MovieName $O_PLX_H5 | grep "DATATYPE"
+     DATATYPE  H5T_STRING {
+
+Check ScanData/DyeSet
+  $ h5dump -a /ScanData/DyeSet/BaseMap $O_PLX_H5 |grep ":"
+  *"TGCA" (glob)
+  $ h5dump -a /ScanData/DyeSet/BaseMap $O_PLX_H5 |grep "DATATYPE"
+     DATATYPE  H5T_STRING {
+
+  $ h5dump -a /ScanData/DyeSet/NumAnalog $O_PLX_H5 |grep "DATATYPE"
+  *H5T_STD_U16LE (glob)
+
+===================================================================
+Check exit status and output
+  $ $BAM2PLX $I_SR_BAM $I_SC_BAM -o ${O_PREFIX}.atgc --baseMap ATCG 1>/dev/null 2>/dev/null && echo $?
+  0
+
+Check /ScanData/DySet/BaseMap
+  $ h5dump -a /ScanData/DyeSet/BaseMap $O_PREFIX.atgc.plx.h5 |grep "ATCG" |wc -l
+  1
diff --git a/utils/bam2bax/tests/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.scraps.bam b/utils/bam2bax/tests/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.scraps.bam
new file mode 100644
index 0000000..03b1aa4
Binary files /dev/null and b/utils/bam2bax/tests/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.scraps.bam differ
diff --git a/utils/bam2bax/tests/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.subreads.bam b/utils/bam2bax/tests/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.subreads.bam
new file mode 100644
index 0000000..374216d
Binary files /dev/null and b/utils/bam2bax/tests/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.subreads.bam differ
diff --git a/utils/bam2bax/tests/data/tiny.scraps.sam b/utils/bam2bax/tests/data/tiny.scraps.sam
new file mode 100644
index 0000000..f295422
--- /dev/null
+++ b/utils/bam2bax/tests/data/tiny.scraps.sam
@@ -0,0 +1,18 @@
+ at HD	VN:3.0.0	SO:unknown	pb:3.0.1
+ at RG	ID:d1a56a50	PL:PACBIO	DS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;StartFrame=sf;BINDINGKIT=100619300;SEQUENCINGKIT=100620000;BASECALLERVERSION=3.0.14.167651;FRAMERATEHZ=80.000000	PU:m54006_151021_185942
+ at PG	ID:baz2bam	PN:baz2bam	VN:2.0.0.167651
+ at PG	ID:bazFormat	PN:bazformat	VN:1.2.0
+ at PG	ID:bazwriter	PN:bazwriter	VN:2.0.0.167651
+m54006_151021_185942/47775928/1634_1684	4	*	0	255	*	*	0	0	TCTCGCTAACAAGCCTAAGAATGGTGCAGGAGGAGGAAAAGCGAAAGAGT	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,42,18,11,2,4,24,1,66,26,9,12,5,25,4,103,10,6,5,2,69,2,1,10,9,35,13,1,10,11,54,80,18,10,26,26,16,11,31,42,9,2,29,6,3,26,16,5,15,19,8	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S,150, [...]
+m54006_151021_185942/47775928/4540_4589	4	*	0	255	*	*	0	0	CGCTCATCAACAGACCAACAACAGAAGACGGAGTAGGGAAAAGAGAGAT	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,473,148,1,8,42,3,114,74,446,7,14,6,159,12,7,9,9,57,4,370,42,61,36,28,1,3,16,13,37,1,23,41,3,111,3,10,16,18,65,88,19,4,1,31,123,30,5,8,13	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S,164, [...]
+m54006_151021_185942/47775928/5754_5802	4	*	0	255	*	*	0	0	ACTCAGGCGAGGTACAACAACAACGGAGAGGAGGAAGAAGAGAGGAGA	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,35,46,21,28,21,17,276,41,58,49,74,24,41,76,43,5,6,67,353,13,18,28,42,2,23,17,4,2,56,53,34,59,9,7,13,24,2,31,7,3,6,6,4,13,4,65,13,50	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S,75,154,155,160 [...]
+m54006_151021_185942/47775928/6894_6947	4	*	0	255	*	*	0	0	AGCTACCTAGCAAAGACAAGCAATAAATGGGAGGAGGAGGAAAAGGAGAGAGA	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,6,13,4,8,18,1,119,6,8,6,40,37,42,22,3,12,29,2,7,39,18,63,17,12,33,5,59,12,5,3,8,5,10,10,15,24,12,25,34,11,194,10,17,93,23,5,12,8,19,164,19,59,19	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [...]
+m54006_151021_185942/47775928/11128_11167	4	*	0	255	*	*	0	0	CGCTACCACAAACATCACGGAGGTGGGAAAAGAGAGAGA	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,3,3,6,37,6,4,5,7,45,12,20,5,7,3,1,1,2,41,25,2,8,8,19,33,3,3,4,7,13,20,5,7,6,19,16,58,14,92,1	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S,144,67,148,153,65,155,103,71,150,73,66,46,134,72,132,126,76,151,82,112,78,116,77,133,81,98,85,77 [...]
+m54006_151021_185942/47775928/12023_12072	4	*	0	255	*	*	0	0	TCTCTCTCACAACAACAAACAGGAGGAGGAGGAAAGAGAAGAGAAGACT	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,26,26,52,26,783,7,104,21,11,207,97,96,132,5,55,75,6,124,266,137,405,70,21,45,56,66,53,38,68,29,10,194,19,6,57,43,42,28,136,375,143,16,2,48,5,108,123,113,10	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [...]
+m54006_151021_185942/47775928/12364_12412	4	*	0	255	*	*	0	0	ATACTCTCAAAAAACAAAACAAAGGGGAAAGTAGGAAAAGAGGAGAGA	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,12,2,2,18,1307,20,37,73,6,39,743,1243,353,2063,237,21,3409,24,21,129,77,128,17,167,84,200,3,114,7,15,2,15,25,34,41,64,15,28,125,41,3,143,268,29,62,49,84,32	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np [...]
+m54006_151021_185942/47775928/12729_12776	4	*	0	255	*	*	0	0	CTCTATCAACAACAACAACTGAAGGGGAGGAAGAAGAGAGAAGAGAT	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,23,215,4,179,32,121,34,14,126,93,91,36,64,12,15,88,72,23,50,319,77,64,18,55,132,193,20,13,21,20,48,134,10,11,34,46,41,1,17,155,40,5,340,23,32,127,30	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S, [...]
+m54006_151021_185942/47775928/13077_13118	4	*	0	255	*	*	0	0	CTCTCTCAACAACAACAAGAAGGAGGAGGAAAAGAGGAGAT	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,201,19,18,182,34,31,44,50,35,111,8,588,148,4,22,14,119,274,263,113,38,23,32,18,77,82,32,80,98,145,4,174,9,42,60,97,213,96,95,10,5	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S,156,119,153,150,150,156,161,88,79,155,70,94,165,9 [...]
+m54006_151021_185942/47775928/13434_13481	4	*	0	255	*	*	0	0	TCCTCCTATCAACAACAACAACGGAGGAGTAGGAAAAGATAGAGAGA	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,81,21,388,48,33,86,393,20,134,233,25,103,95,30,42,7,5,105,147,6,83,39,16,37,403,65,45,13,81,88,1,11,275,308,52,26,130,50,18,10,17,67,84,88,45,60,1	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S,15 [...]
+m54006_151021_185942/47775928/13784_13826	4	*	0	255	*	*	0	0	TTCTAGCTCAACAAACAACACTAGGAGAGGAAAAGAGAGAGA	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,11,41,37,94,50,630,6,72,9,9,47,647,117,1298,12,80,61,248,48,22,99,962,22,36,22,125,16,187,52,134,88,5,118,32,227,77,145,29,48,21,51,42	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S,122,148,157,172,138,86,169,148,154,77,6 [...]
+m54006_151021_185942/47775928/14130_14172	4	*	0	255	*	*	0	0	ATCTCGCAACAACAACAACGGAGGAGGGAGGAAAGAGAGACT	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,10,64,6,683,8,380,18,18,20,140,35,70,23,26,26,191,37,86,49,83,185,120,250,3,4,61,3,82,58,8,72,28,22,109,236,42,8,95,18,2,1,1	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S,106,138,151,154,174,97,170,92,90,186,73,74,149,78 [...]
+m54006_151021_185942/47775928/14844_14899	4	*	0	255	*	*	0	0	TCTCTCTATCAAAGACAGAACAGAAAAAAAACGGAGGAAGTATTAAAGAGAAGAT	*	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,86,28,48,13,31,12,1132,3,184,24,3,74,34,34,35,204,14,19,6,122,60,4,476,20,71,10,92,18,54,13,500,21,274,71,146,21,25,2,39,123,141,25,47,30,10,2,7,12,160,54,15,38,11,10,17	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!! [...]
diff --git a/utils/bam2bax/tests/data/tiny.subreads.sam b/utils/bam2bax/tests/data/tiny.subreads.sam
new file mode 100644
index 0000000..9529d27
--- /dev/null
+++ b/utils/bam2bax/tests/data/tiny.subreads.sam
@@ -0,0 +1,27 @@
+ at HD	VN:3.0.0	SO:unknown	pb:3.0.1
+ at RG	ID:a3e2f6e1	PL:PACBIO	DS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;StartFrame=sf;BINDINGKIT=100619300;SEQUENCINGKIT=100620000;BASECALLERVERSION=3.0.14.167651;FRAMERATEHZ=80.000000	PU:m54006_151021_185942
+ at PG	ID:baz2bam	PN:baz2bam	VN:2.0.0.167651
+ at PG	ID:bazFormat	PN:bazformat	VN:1.2.0
+ at PG	ID:bazwriter	PN:bazwriter	VN:2.0.0.167651
+m54006_151021_185942/47775920/0_27	4	*	0	255	*	*	0	0	AAGAAGAGGAAAGGGGGAAGGT	*	cx:i:0	dq:Z:!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNN	ip:B:S,34020,9949,2113,32763,11147,20116,16527,34072,54949,10177,1282,8890,18959,65535,37240,5400,65535,39234,18661,55300,24578,45288	iq:Z:!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S,112,75,65,60,62,85,61,82,70,91,71,89,80,78,90,98,83,81,78,77,106,68,79,63,90,81,163	pc:Z:agagaAAGAAGAGGAAAGGGGGAAGGT	pd:B:S,65535,8177,31244,6648,2 [...]
+m54006_151021_185942/47775921/0_48	4	*	0	255	*	*	0	0	GTGAGAGAGGAAGGAAAGAGGGGACGGGGGGGGGAAGGAAAAAGAGGG	*	cx:i:0	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,21671,44723,14533,11557,298,12671,38666,5630,35899,6216,46700,5008,43869,1081,9768,9416,23548,18920,1398,11870,28379,1959,2747,6881,6966,17095,13560,32413,3712,3383,31173,16545,48759,8725,44839,21684,8953,13437,6048,23137,5555,972,4899,2919,31405,2110,40579,12289	iq [...]
+m54006_151021_185942/47775922/0_54	4	*	0	255	*	*	0	0	GAAAGAAAAGGAAAGAAAAAAGAAAGAAAGAAGAGAAGGGAAGGGGGGGGAAAG	*	cx:i:0	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,17489,37,667,2,128,76,380,2,29,22301,14637,1918,17011,5,1,2,4,5,1665,12602,65535,13136,26232,3871,3135,28690,3989,7690,4867,2831,31543,2,6280,8767,34264,49774,15161,4254,24515,808,367,65535,38088,17855,42895,11586,65535,2253,15863,24879,2,8444,1185 [...]
+m54006_151021_185942/47775923/0_120	4	*	0	255	*	*	0	0	AAGGTAAAGGAAGAGGAGGAGGGGCGAAGGGGAAAGGAAGAAAGGGGGGGAGGAGAGGGGGAAGGGAAAGGAAAGGGAAAAAAAGGGAAAAAAAAAGGGAGAAGAGAAGGGGAAGTAGGG	*	cx:i:0	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,3330,4589,21770,161,2461,5359,1309,6481,35253,21, [...]
+m54006_151021_185942/47775924/0_34	4	*	0	255	*	*	0	0	GGGAAGAGAGAAGGGAGAGGGGGGAGGAGGGAGA	*	cx:i:0	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,24732,34536,7162,26748,35835,45925,24284,49599,48469,16040,45770,39565,9533,65535,30558,6909,7022,30117,18893,42094,3,2,2,3,10226,5667,28678,3071,14040,13390,4420,65535,1,36541	iq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	mq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	np:i:1	pa:B:S,107,67,72,75,84,68,84,73,98,79,94,91, [...]
+m54006_151021_185942/47775925/0_112	4	*	0	255	*	*	0	0	GGGTATATTGGGGTGAGGGGGATGGGGGTGTGGAAAAAAAAAAAAAAAAAAAAAAAAACAACAAAAAAAAAAAAAAGGATGAAAGGGGGGAGAGAAAGGGTGGGGAGGGAGA	*	cx:i:0	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,148,19115,2,36,9,36785,1,1,78,9310,13880,386,2017,38,15083,23569,1279,678 [...]
+m54006_151021_185942/47775926/0_53	4	*	0	255	*	*	0	0	AAAGAAGGAAAAGAGAAAAGAAAGAAAAAAGGAAAAAAAAGAGGAAAAGAAGA	*	cx:i:0	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,48005,6471,814,10416,19440,65535,4459,1,30653,5073,11808,12256,1061,2119,43019,3181,7679,2275,19547,15513,3338,13545,6854,11939,5640,23130,7701,13467,27353,32149,4775,8243,18266,1742,13163,5534,205,8611,1,14279,32646,403,34569,14538,28454,26350,26199, [...]
+m54006_151021_185942/47775927/0_92	4	*	0	255	*	*	0	0	GGAGAAAGAGAGAAAAAAGAAGGAAAAAGGGAAAGGGAGAGGAGAAAAGGAGAAAAAAGGGAAGAAAGAAGAAAGAGAGAAGAAGAAGAAGG	*	cx:i:0	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN	ip:B:S,9315,1,613,3673,39634,2,22851,7691,15506,2680,10097,22739,1822,19150,10242,37739,2045,3036,19692,22721,19057,35007,5670,708,4910,396,1 [...]
+m54006_151021_185942/47775928/0_1634	4	*	0	255	*	*	0	0	AGAAGATAGGCGCTAGGGGTAAAGAGAAGGGAGGAGGGAGAAAGAGGTAGATTCGGACGTAGGGGGCTGGATCGCTGGGGCGAGGCCGCAGAGTATGAAAAGAGAAATGGGGGGGACAAACGTCGCCTAAGGCGGAGAGAAACAGAATGAACGAGGATGGAGAGATAGGTGGAGATACTTGACGAGAAGCCAGCAGATAGGGGAAGGGATATGGCGGAGAGTTCGAATGTTAGACAGGGGGTAAGGTAAAGGGATCAAAAGAAGCAAGGCGATAATAGCGAAGCTAGAGAAAGTATGATAAGGAAGCCAGAGTAGTGGAAATGAGAAAGGAGAGCGTAAGAGGGGAGAGAAGAAAAAACGCTGGAAAAAGCAAGTGGGGGCGGAGCATGGAATAGAGGAGTGGGGTATCCCCGGTCCTACAAAAACAAGGGAGCAAC [...]
+m54006_151021_185942/47775928/1684_4540	4	*	0	255	*	*	0	0	GGATAGAAATGCGTCCGCCGGAAGTAATCTCCGCGATCTATTAGGGCATGGTAAGCCAGAGGTTGGATTAGGCGGAAGAGGGGCGCAGTGCAAGTACTTTTTTGGTCGCGTGAGATAGACTGTAGCAGCGCAGATAAGGTCCGCGTGATTTAACAGGGAGAGTTACATCGCCCCAACTGACGGTGCGTCGCGAAGTTCGCGTACACGACGGTAGAACTCAAGCCAGCGATGAGGTGAAATCAGACGTAAATCACGGTATCAAGAGACCGCAGCTGTTGATGGTAGGACCAGTAGGGGTACCAAAGAAACGTGGGCGATTAATAGCGCGCTTTAAACGAGGTGAAGGGTCAGCAGGCGGCTAGGAGATGATCGGAAGACAAGACATAGGCGCGGAGCGCATAATGACAGCACTGGGGGACGAGGGCGGGGCGAAC [...]
+m54006_151021_185942/47775928/4589_5754	4	*	0	255	*	*	0	0	CGCAGCGCGAGTAAGAGACCTCGAGCAGAGACTCGGCTCTGTAAGGCAGGCACACAGAGACGGAGCTGGCACAGGCCTACTGCGTAGCAGAGTGCGTGTAGCAGTGGCGTCATATGTGTCGAGGGGATGGGCGGACAGGCCATGTGAACTCTGTCCATGGAGGGTGGAACAGCTAGGGTGCCTCTCCCCATTAAACGGGGCGCGGGTCAGGTGAGAGACGAAATCTGAGCGGGCGGGCGGACTACTCAGAGCTGAAGAGAAGAGAAGGGAGCGCGCTGGAGGGAGGGAATGAAAGAACAGTGAAGGCAGGGACTATGATATGCCGGTGACAACCAGGAGGCACGTGACAGCCGGAGGGGATAGCACGAGGGGCGTGCGGCAACGTCGCGACAGAGACAGAATTCCTCATAATATCGCGATCCAGGAAACGTCAG [...]
+m54006_151021_185942/47775928/5802_6894	4	*	0	255	*	*	0	0	CGAGACTGGAGTCAATCCACCAACAATGCGGGGGGAGACGTGTTCCTGCGGGAAGATGGGAGAAGGGAATACTGGCGGAGGGAGCGAAATTGTGGGGGTGATGCCCTTAGGACAAGGACGTGAGCCAATCTATGGTGTTGGGTGATGGCGCGTGGTAGGAGCCGTGGGCTAGTCGACCACCGGGCGTGTGAAACGCCGCGCCCGGGAAAGGGGGGAGCCCGAGGGGTCGACTAAGCGTGCTAATGGGACCGGATCGAGCCGGTGAGACCCTGAGAGGACGGGAGGCGGATAGCAGCCCGAAGAGGCGGACAACAGAGGGGCGACTACGACGCGCTGAGCGGCGAGCGCGCACTAGCGTATGAATCAACCATAGAGGCGGCGGAGGCGCAATGAGGAGCCCCCAAGTAAGGGGGCGAAGGGGACGAGCAAGGAGG [...]
+m54006_151021_185942/47775928/6947_11128	4	*	0	255	*	*	0	0	AAGGGACTTAGCATCGAGGGAGAAACAAGGAAGCTGAGGCAGTGAGTGAAAGGAATGCACAAGGGAGCATGTGAGGCGTAGACGATGGTGCTCTCAGCATGCAGACTAGTCATATGTCCGTGGTAAGTCGCGTTAGAGAGAAGAGCTTGAGTGCTGGCGGTGGATGTCCAGGCGGAGGTGGCTGACGTGCGGGAAATTGGCCCGGGGGGAGGACAAGAACCGGGCGTAGGCCGGATGTGGGAGCGCGGACGTGATCGGAGCTCAGCTGTGTTCAGAGACGGAGGGGAGGAACCTCGAAGCTCGGGAGGACAAGGGCACTCTGGGGAGAGGGGCCGCCAGGTGGAGCAAGGGGACACGATAAGAGTGGGGAGAGGACGGGAGCAAAGGGAGACGGCCCGGAAGGATCAGCTGAATAATGCTAAAGAAAATACGG [...]
+m54006_151021_185942/47775928/11167_12023	4	*	0	255	*	*	0	0	GAACAATGAAGTAGCTACCTCCTATGATGAAGGGAGCCCGACGAGCTAGCATTGGCTAGGGAAAATGCCTATCTGGAGCGCATGACCGTGCTAAAGAGGGTGGCACGCGAGGCTTGCTACTCGATTCTGCATACAGTGGGCGAAGGGGGGGCGCCTCGCAGCGGGTGCATGTGGCGTTTCACGTATTTGCACACCAGTTGGAAGACCATGGAAGCGTGTCGCCGTCGAGAAGATCGACTAACGACACTACGAGCTATGGCGTCTATGCGGGCTCCTATGTCTAACCAGATTGGTATAGAGGACAACACATTCGCAGAAAACGCCGGGGGATGCCGCTTAAACGGGGAAGATGTACATCACGAAAGTAGGACGAACCTGTCTCGATGTTCCAGTCGCAACAAACGCAACTAGTATCAGGAGGTTATTGTACAG [...]
+m54006_151021_185942/47775928/12072_12364	4	*	0	255	*	*	0	0	AGTCCTCAGCAAGCGTGGGGGAGACATGACTTTGCCACAATCCGATGTGGCATGAGGCGCGATATATTTTGCGAGTAGTGCATGCCTAAAGGGTCCCTGTGGTAACACTCATACCATTAACGTGGCGCCGTGATTCGACGACCTCTATCTTCTGGAGTCTATCGGGCACGAGGGTCAACAGGTGGGGAGAGGACACCAAGATGGCACTTAAGCGGAGAAGTATATAACGAGTGTGACTGAGCAAGATCTTCCGCAGAAAACTACTTGGTTCATTCTAGAGGATAACACGGGT	*	cx:i:3	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [...]
+m54006_151021_185942/47775928/12412_12729	4	*	0	255	*	*	0	0	CAACGGGGGATACGCGAGAAGGCACAGGCGGGGTATCTGCCGGAAGATTGCTAAGGTCAATACTTCGTGAGATGATATGCCGCTTGTGCAGCTTTTTTTAACGCTCCCCACCTGTTGAAAACTCGTCCAGAGGAACGCAAGAGCGTGAGGTTAGGCGAAGACATGGAAGAACCGGAATGGTATGCCGGTGTACAACAGAGGACCCTCCTCATTTACGGACAGCGCACGACTAAGAAAAGCATATCGAGATACATAGAACAGAGGAGGTGCTAACGGAATTGTCTACCCAAGCTTGCTAGAGGAATAGGAGGCTTCCG	*	cx:i:3	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [...]
+m54006_151021_185942/47775928/12776_13077	4	*	0	255	*	*	0	0	AGTCCGCGCCAAGCGTGGTAACAATGATTTGCCACATCCATGGTCATGGACGCTATAGGCTTTTGCTAGGAGTGCGCGGCCGTAAAGGGTCCCGGTGTTACACCGGCATACCAATTACGTGTCTCCAGTGGATGCTACGAACACTCAGATCTGTCGTATACCGTCGGTACCGAGTTTCAACAGGGTGGGGAGAGGACCCCAGAAGGAGTCACTTAAGCGGAATAAGTGAATATCACGAAGTAGTAAACCTAGCAAATCTGCAGCAGAAAAAGTCTTTGGTTGCTTTCTAGAGAGCCCAGGG	*	cx:i:3	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [...]
+m54006_151021_185942/47775928/13118_13434	4	*	0	255	*	*	0	0	ACCACGGTATACGCTATTAATCAAACCAAGGCGTTTTACTGGCGGAAGATTTGCTCAGTTAAATACTGGCTTGATATAAATTATTCAGCTTAAGGTGAATAGTTGGGCCTCTCCCCACCCTGTTGAAACTAGGGACCAGAGGGAGAACGCCAGAGGATTATTTAGTCCGATCAGGGAGAACTTAATGGTAATCAGGGGAAAACAGGGACCCTTTACGGTCCGCACGACTAGCAAAAAGCCATCGGCGATCCATGGACACATAGGATGTGTCAGAAAGTCATTGTATCCCAACGTGGCGGAGGACGAGGTTAGCTGC	*	cx:i:3	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [...]
+m54006_151021_185942/47775928/13481_13784	4	*	0	255	*	*	0	0	AGCTACTAGTCCTGAGAAAACTTGGTTGACAATCCGTTACCACATCCGAGTGGTTCATGGATCGCGATAGGCTTTTGCTAGTCGTGCGCTGCCGTAAAGGGGCTCTGTTGTACAACGCTCAATAACCATGTACGGGTATCCAGTGAGGCGACGAACCTCAGCTCGTGCGTCTAATCGGTCCCGAGTTTAAAACAGGTGTGGAGAGGACACCAAGATTCACTGAAGAGGAATAAAGTTATATCACGAAGTTATTTAACTGATCTCGTCCGCAAAAACGCCGGGTGCATGCGAAGAGGATCCCCG	*	cx:i:3	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [...]
+m54006_151021_185942/47775928/13826_14130	4	*	0	255	*	*	0	0	CCCGGGATACGCTCTAAATGCAACCAAGCGGTTTCTGAGGGAAGGTTTGCTCATTTTCAATAATTCGTGATAGAAATTATTCCGCTTAAGTGCATCTGGGGGCCTCGCCCAACATGTGAGCTCGTGGCCCGAGTATAACGCCAGAGCTAGTTGAGCGAATCACGGGAGAAACGTAATGGTATACCTGGGACACCAGTGACCCGTTACGCAGCAGGCACGACTAGCAAAAGCCCTACTGATCCATGAAAGCATAGATGGGCTAACGTCGTGTCTACCCAAGCTTGCATGATTACTAGTAGCGTCT	*	cx:i:3	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [...]
+m54006_151021_185942/47775928/14172_14844	4	*	0	255	*	*	0	0	AGAAGCCTACTAGTCCGCATCAAGGCGTGGGTAGACAATTAATTTATCCAAATCCATGTTCATTGATCGCGATAGTCTGTTGCGAGTCAGTGCGCTGTCCATTATTTTCCCTTTTTTACACATGCATACCAGGACGGTCTCCAGTGATTCACGACCTCACTCTGGCGTCGTACCCTCGTGCCAGAGTTTAAACAGGGTGGTAGAGGAACTACACAATATGCACTGAAGCGTAATAATGTATATAACTAATGAGGGAACTTAGCAATCTGCCTCAAAAACGCCTGGGTAGATTATATAGGATCCCCGGTTATCATCAACAAACAACAAAACTTATTAGAGAAAGAAGATAGCACGTGATCCCTCTAAGATGCACCAAGGAGTTTCTCTGCGGAATAGGTGATCAGTTCAATACTGTTTCGTGATATAAATAAT [...]
+m54006_151021_185942/47775928/14899_15041	4	*	0	255	*	*	0	0	AGAAGCTATGTTAAGAGTCCTCAGCAGAGCTGGTTTGGAAAAACAATGACTTATCCACAGCCTATGTGTAGTGGATAGCGAGAGTCTTTTGCTAGTCGTGCCGGCAGTAAAGGCACGGGGTACACCGGCATACCAATTAACT	*	cx:i:1	dq:Z:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!	dt:Z:NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN [...]
diff --git a/utils/bam2bax/tests/data/tiny_bam2plx.polymerase.sam b/utils/bam2bax/tests/data/tiny_bam2plx.polymerase.sam
new file mode 100644
index 0000000..507c002
--- /dev/null
+++ b/utils/bam2bax/tests/data/tiny_bam2plx.polymerase.sam
@@ -0,0 +1,4 @@
+ at HD	VN:1.1	SO:unknown	pb:3.0.1
+ at RG	ID:fe715e84	PL:PACBIO	DS:READTYPE=POLYMERASE;Ipd:Frames=ip;PulseWidth:Frames=pw;PulseCall=pc;Pkmid=pm;Pkmean=pa;PrePulseFrames=pd;PulseCallWidth=px;AltLabel=pt;AltLabelQV:CodecV1=pv;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000O	PU:m140909_233743_richard_c100694811280000001823149303241587_s1_p0
+ at PG	ID:plx2bam
+m140909_233743_richard_c100694811280000001823149303241587_s1_p0/229/0_19	4	*	0	255	*	*	0	0	TGTGTGTAGTTGTTATCTT	*	zm:i:229	RG:Z:fe715e84	qs:i:0	qe:i:19	ip:B:S,805,34,2108,5377,12133,2926,162,981,117,1358,586,623,696,1803,7198,1507,2221,567,1470	pw:B:S,3,1,1,3,1,1,1,1,4,1,1,1,1,1,1,1,1,2,1	pc:Z:acaaTGaaaattgaaTaccaggaaaataaaaaaaataaaaGacggagggataaaaaaagggaagagaaaaaacttcaaaaaaacaaaaagaagcaaaTactagaacaaGaTgttaAGtaaataTaaTgaGaaaaTattaaTgttagaaataataaataaaatagagagacaataaaAggactaTagaagCTggggtTa [...]
diff --git a/utils/bam2bax/tests/example/end-to-end.sh b/utils/bam2bax/tests/example/end-to-end.sh
new file mode 100755
index 0000000..b62ed63
--- /dev/null
+++ b/utils/bam2bax/tests/example/end-to-end.sh
@@ -0,0 +1,66 @@
+#How to Create Test Files And Run Tests#
+
+#BAX2BAM=/home/UNIXHOME/yli/git/depot/software/smrtanalysis/bioinformatics/staging/PostPrimary/bax2bam/bin/bax2bam
+#BAM2BAX=/home/UNIXHOME/yli/git/depot/software/smrtanalysis/bioinformatics/staging/PostPrimary/bam2bax/bin/bam2bax
+#BLASR=/home/UNIXHOME/yli/git/depot/software/smrtanalysis/bioinformatics/ext/pi/blasr/blasr
+
+BAX2BAM=bax2bam
+BAM2BAX=../../bin/bam2bax
+BLASR=blasr
+
+I_PREFIX=m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1
+I_BAX_H5=$I_PREFIX.bax.h5
+I_SR_BAM=$I_PREFIX.subreads.bam
+I_SC_BAM=$I_PREFIX.scraps.bam
+I_SR_SAM=$I_PREFIX.subreads.sam
+I_SC_SAM=$I_PREFIX.scraps.sam
+
+O_DIR=Analysis_Results
+O_PREFIX=$O_DIR/${I_PREFIX}
+O_BAX_H5=$O_PREFIX.bax.h5
+O_SR_BAM=$O_PREFIX.subreads.bam
+O_SC_BAM=$O_PREFIX.scraps.bam
+O_SR_SAM=$O_PREFIX.subreads.sam
+O_SC_SAM=$O_PREFIX.scraps.sam
+O_META_XML=`echo $I_PREFIX | cut -f 1 -d '.'`.metadata.xml
+
+# Clean
+rm -f *.bam *.sam *.tmp $O_PREFIX.* $I_PREFIX.*
+
+# Make output dir
+mkdir -p $O_DIR
+
+# Input bax.h5
+#echo  cp input bax.h5 from siv
+cp /pbi/dept/secondary/siv/testdata/bam2bax/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 .
+
+# Input *.subreads.bam, *.scraps.bam
+echo convert input bax.h5 to bam
+$BAX2BAM $I_BAX_H5 -o $I_PREFIX --subread --losslessframes
+ls $I_SR_BAM $I_SC_BAM || echo failed to convert input bax.h5 to input bam
+
+# Output bax.h5
+echo convert input bam to output bax.h5
+$BAM2BAX $I_SR_BAM $I_SC_BAM -o $O_PREFIX --metadata
+
+ls $O_BAX_H5 || echo "ERROR! $O_BAX_H5" does not exist
+ls $O_META_XML || echo "ERROR! $O_META_XML" does not exist
+
+# Output bam
+echo convert output bax.h5 to bam
+$BAX2BAM $O_BAX_H5 -o $O_PREFIX --subread --losslessframes
+
+# out.subreads.sam, out.scraps.sam
+samtools view -h $I_SR_BAM -o $I_SR_SAM && cat $I_SR_SAM | grep -v '^@' > $I_SR_SAM.tmp
+samtools view -h $I_SC_BAM -o $I_SC_SAM && cat $I_SC_SAM | grep -v '^@' > $I_SC_SAM.tmp
+samtools view -h $O_SR_BAM -o $O_SR_SAM && cat $O_SR_SAM | grep -v '^@' > $O_SR_SAM.tmp
+samtools view -h $O_SC_BAM -o $O_SC_SAM && cat $O_SC_SAM | grep -v '^@' > $O_SC_SAM.tmp
+
+echo diff input with output
+diff $I_SR_SAM.tmp $O_SR_SAM.tmp || echo I.subreads.bam and O.subreads.bam are not identical
+diff $I_SC_SAM.tmp $O_SC_SAM.tmp || echo I.subreads.bam and O.subreads.bam are not identical
+
+#How to use bam2bax#
+#- basic use case
+#    bam2bax smrtcell.subreads.bam smrtcell.scraps.bam -o output_prefix
+
diff --git a/utils/bam2bax/tests/files.cmake b/utils/bam2bax/tests/files.cmake
new file mode 100644
index 0000000..0879b33
--- /dev/null
+++ b/utils/bam2bax/tests/files.cmake
@@ -0,0 +1,64 @@
+# test case headers
+set(Bam2BaxTest_H
+    ${Bam2Bax_TestsDir}/src/TestData.h
+    ${Bam2Bax_TestsDir}/src/TestConstants.h
+    ${Bam2Bax_TestsDir}/src/TestUtils.h
+)
+
+set(Bam2BaxTest_CPP
+    ${Bam2Bax_TestsDir}/src/test.cpp
+    ${Bam2Bax_TestsDir}/src/TestUtils.cpp
+    ${Bam2Bax_TestsDir}/src/test_HDFZMWWriter.cpp
+    ${Bam2Bax_TestsDir}/src/test_HDFZMWWriter.cpp
+    ${Bam2Bax_TestsDir}/src/test_HDFZMWMetricsWriter.cpp
+    ${Bam2Bax_TestsDir}/src/test_HDFScanDataWriter.cpp
+    ${Bam2Bax_TestsDir}/src/test_HDFBaseCallsWriter.cpp
+    ${Bam2Bax_TestsDir}/src/test_HDFBaxWriter.cpp
+    ${Bam2Bax_TestsDir}/src/test_Bam2BaxConverter.cpp
+)
+
+#set(X
+#    /home/UNIXHOME/yli/git/depot/software/smrtanalysis/bioinformatics/staging/PostPrimary/bam2bax/build/src/CMakeFiles/bam2bax.dir
+#)
+#
+#set(Bam2BaxTest_O
+#    ${X}/HDFZmwWriter.cpp.o
+#    ${X}/HDFRegionsWriter.cpp.o
+#    ${X}/HDFScanDataWriter.cpp.o
+#    ${X}/HDFBaseCallsWriter.cpp.o
+#    ${X}/HDFBaxWriter.cpp.o
+#)
+
+# GoogleTest headers
+set( GTest_H
+
+    ${GTest_IncludeDir}/gtest/gtest-death-test.h
+    ${GTest_IncludeDir}/gtest/gtest-message.h
+    ${GTest_IncludeDir}/gtest/gtest-param-test.h
+    ${GTest_IncludeDir}/gtest/gtest-printers.h
+    ${GTest_IncludeDir}/gtest/gtest-spi.h
+    ${GTest_IncludeDir}/gtest/gtest-test-part.h
+    ${GTest_IncludeDir}/gtest/gtest-typed-test.h
+    ${GTest_IncludeDir}/gtest/gtest.h
+    ${GTest_IncludeDir}/gtest/gtest_pred_impl.h
+    ${GTest_IncludeDir}/gtest/gtest_prod.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-death-test-internal.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-filepath.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-internal.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-linked_ptr.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-param-util-generated.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-param-util.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-port.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-string.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-tuple.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-type-util.h
+
+    ${GTest_SourceDir}/gtest-internal-inl.h
+)
+
+# GoogleTest sources
+set( GTest_CPP
+    ${GTest_SourceDir}/gtest-all.cc
+    ${GTest_SourceDir}/gtest_main.cc
+)
+
diff --git a/utils/bam2bax/tests/resequencing/bam2bax_resequencing.py b/utils/bam2bax/tests/resequencing/bam2bax_resequencing.py
new file mode 100755
index 0000000..0b6791a
--- /dev/null
+++ b/utils/bam2bax/tests/resequencing/bam2bax_resequencing.py
@@ -0,0 +1,179 @@
+#!/usr/env/python
+import os
+import os.path as op
+from pbcore.util.Process import backticks
+from pbcore.io import readFofn
+
+
+"""
+Convert bam files to bax.h5 and test
+the converted bax.h5 file through the
+2.3 smrtpipe resequencing pipeline.
+"""
+
+smrtwrap = "/mnt/software/s/smrtanalysis/2.3.0.p4/smrtcmds/bin/smrtwrap"
+
+def c_dir():
+    """return path to current directory"""
+    return op.dirname(op.abspath(__file__))
+
+def execute(cmd):
+    """Execute cmd and raise run time error if it failed."""
+    print "CMD: %s" % cmd
+    o, c, m = backticks(cmd)
+    if c != 0:
+        raise RuntimeError("%s failed. %s" % (cmd, m))
+
+def mkdir(path):
+    """Create output dir"""
+    cmd = "rm -rf %s && mkdir %s" % (path, path)
+    execute(cmd)
+
+def bax2bam_path():
+    """Return path to bax2bam"""
+    cmd = "which bax2bam"
+    o, c, m = backticks(cmd)
+    if c != 0:
+        raise RuntimeError ("could not find bax2bam")
+    else:
+        return o[0]
+
+def bam2bax_path():
+    """Return path to bam2bax"""
+    p = os.getenv('BAM2BAX', "%s/../../bin/bam2bax" % c_dir())
+    print p
+    if op.exists(p):
+        return p
+    else:
+        raise IOError ("Unable to find bam2bax %s" % p)
+
+def sr_bam_path(prefix):
+    """return path to subreads.bam given prefix"""
+    return "%s.subreads.bam" % prefix
+
+def sc_bam_path(prefix):
+    """return path to scraps.bam given prefix"""
+    return "%s.scraps.bam" % prefix
+
+def bax_fn(prefix):
+    """return path to bax.h5 given prefix"""
+    return "%s.bax.h5" % prefix
+
+def metadata_fn(bax_fn):
+    """return path to metadata.xml, which should be
+    in upper directory of bax.h5"""
+    return op.join(op.dirname(op.dirname(bax_fn)),
+                   "%s.metadata.xml" % op.basename(bax_fn).split(".")[0])
+
+def settings_xml():
+    """return path to settings.xml"""
+    return op.join(c_dir(), "settings.xml")
+
+def prepare_bam_inputs(i_file, o_dir):
+    """Prepare subreads|scraps.bam if they do not exist."""
+    bam_fn, bax_fn = parse_input_file(i_file)
+    if bax_fn is not None:
+        return convert_bax_to_bam(bax_fn, o_dir)
+    if bam_fn is not None:
+        if "subreads.bam" not in bam_fn:
+            raise ValueError ("%s is not a subreads.bam file" % bam_fn)
+        return bam_fn.split(".subreads.bam")[0]
+
+def parse_input_file(i_file):
+    """Parse input file, get input bam or bax.h5 file."""
+    bam_fn, bax_fn = None, None
+    if (i_file.endswith(".bam")):
+        bam_fn = i_file
+    elif (i_file.endswith(".fofn")):
+        fns = [f for f in readFofn(i_file)]
+        if not all([f.endswith(".bax.h5") for f in fns]) or \
+            len(fns) != 1:
+            raise ValueError ("%s fofn should contain exactly one bax.h5 file.")
+        else:
+            bax_fn = fns[0]
+    elif i_file.endswith(".bax.h5"):
+        bax_fn = i_file
+    else:
+        raise ValueError ("Unsupported file format %s" % i_file)
+    return bam_fn, bax_fn
+
+
+def convert_bax_to_bam(bax_fn, o_dir):
+    """Convert bax.h5 to bam, return prefix of bam file."""
+    movie_name = op.basename(bax_fn).split(".bax.h5")[0]
+    bam_prefix = op.join(o_dir, movie_name)
+    cmd = "%s %s -o %s " % (bax2bam_path(), bax_fn, bam_prefix) + \
+          "--subread --pulsefeatures DeletionQV,DeletionTag,InsertionQV,IPD,PulseWidth,MergeQV,SubstitutionQV,SubstitutionTag"
+    execute(cmd)
+    return bam_prefix
+
+def convert_bam_to_bax(bam_prefix, o_dir, analysis_dir):
+    """Call bam2bax to convert bam files to $o_dir/$analysis_dir/{movie}.bax.h5, return prefix of bax file."""
+    sr_bam = sr_bam_path(bam_prefix)
+    sc_bam = sc_bam_path(bam_prefix)
+    movie_name = op.basename(sr_bam).split(".subreads.bam")[0]
+    bax_prefix = op.join(o_dir, analysis_dir, movie_name)
+
+    # create output directory
+    cmd = "mkdir -p %s/%s" % (o_dir, analysis_dir)
+    execute(cmd)
+
+    # call bam2bax with --metadata
+    cmd = "%s %s %s -o %s --metadata " % (bam2bax_path(), sr_bam, sc_bam, bax_prefix)
+    execute(cmd)
+
+    # call ls to verify that both *.bax.h5 and metadata.xml are generated
+    cmd = "ls %s %s" % (bax_fn(bax_prefix), metadata_fn(bax_fn(bax_prefix)))
+    execute(cmd)
+    return bax_prefix
+
+def prepare_4_resequencing(bax_prefix, o_dir):
+    """Create *.fofn and *.xml from bax.h5, return xml file"""
+    # Create input.fofn
+    fofn = op.join(o_dir, "input.fofn")
+    #with open(fofn, 'w') as f:
+    #    f.write("%s" % bax_fn(bax_prefix))
+    cmd = "echo %s | xargs realpath > %s" % (bax_fn(bax_prefix), fofn)
+    execute(cmd)
+
+    # Create input.xml for smrtpipe
+    xml = op.join(o_dir, "input.xml")
+    cmd = "%s fofnToSmrtpipeInput.py %s > %s" % (smrtwrap, fofn, xml)
+    execute(cmd)
+
+    return xml
+
+def run_resequencing(xml, o_dir):
+    cmd = "%s " % smrtwrap + \
+          "smrtpipe.py --debug -D TMP=/scratch -D SHARED_DIR=/scratch --distribute " + \
+          "--params=%s " % (settings_xml()) + \
+          "--output=%s " % (o_dir) + \
+          "xml:%s " % (xml) + \
+          "2>%s/err 1>%s/out" % (o_dir, o_dir)
+    execute(cmd)
+
+def run(i_file, o_dir):
+    """
+    """
+    mkdir(o_dir)
+
+    bam_prefix = prepare_bam_inputs(i_file, o_dir)
+
+    bax_prefix = convert_bam_to_bax(bam_prefix, o_dir, analysis_dir="Analysis_Results")
+
+    xml        = prepare_4_resequencing(bax_prefix, o_dir)
+
+    run_resequencing(xml, o_dir)
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) != 3:
+        print "Usage: %s input.subreads.bam output_dir" % (op.basename(__file__))
+        print "       Convert bam files to bax.h5 and run smrtpipe resequencing."
+        print "Usage: %s input.fofn output_dir" % (op.basename(__file__))
+        print "       Convert input.fofn's bax.h5 files to bam, then converts bam to bax, and finally run smrtpipe resequencing."
+        exit(1);
+
+    print "current directory = %s " % c_dir()
+
+    run(i_file=sys.argv[1], o_dir=sys.argv[2])
diff --git a/utils/bam2bax/tests/resequencing/one_bax_test.sh b/utils/bam2bax/tests/resequencing/one_bax_test.sh
new file mode 100755
index 0000000..1de9f3e
--- /dev/null
+++ b/utils/bam2bax/tests/resequencing/one_bax_test.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/bash
+PYTHON=/mnt/secondary/builds/full/3.0.0/prod/current-build_smrtanalysis/smrtcmds/bin/python
+
+# Test with one bax.h5 --> subreads|scraps.bam --> converted.bax.h5 --> resequencing
+$PYTHON bam2bax_resequencing.py /pbi/dept/secondary/siv/testdata/bam2bax/one_bax/input.fofn one_bax_output || exit 1
diff --git a/utils/bam2bax/tests/resequencing/settings.xml b/utils/bam2bax/tests/resequencing/settings.xml
new file mode 100755
index 0000000..de0e3bf
--- /dev/null
+++ b/utils/bam2bax/tests/resequencing/settings.xml
@@ -0,0 +1,201 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<smrtpipeSettings>
+    <protocol version="2.3.0" id="RS_Resequencing.1" editable="false">
+        <param name="name" label="Protocol Name">
+            <value>RS_Resequencing</value>
+            <input type="text"/>
+            <rule required="true"/>
+        </param>
+        <param name="description">
+            <value>Map length and quality-filtered reads against a reference sequence; identify consensus and variant sequences using Quiver algorithm.
+      </value>
+            <textarea></textarea>
+        </param>
+        <param name="version" hidden="true">
+            <value>1</value>
+            <input type="text"/>
+            <rule type="digits" required="true" min="1.0"/>
+        </param>
+        <param name="state" hidden="true">
+            <value>active</value>
+            <input value="active" type="radio"/>
+            <input value="inactive" type="radio"/>
+        </param>
+        <param name="reference" hidden="true">
+            <value>/mnt/secondary/Smrtanalysis/opt/smrtanalysis/common/references/lambda</value>
+        </param>
+        <param name="control" hidden="true">
+            <value></value>
+        </param>
+        <param name="fetch" hidden="true">
+            <value>common/protocols/preprocessing/Fetch.1.xml</value>
+        </param>
+        <param name="filtering">
+            <value>common/protocols/filtering/SFilter.1.xml</value>
+            <select multiple="true">
+                <import extension="xml" contentType="text/directory">common/protocols/filtering</import>
+            </select>
+        </param>
+        <param name="spikeinControl" hidden="true">
+            <value>common/protocols/control/SControl.1.xml</value>
+        </param>
+        <param name="mapping">
+            <value>common/protocols/mapping/BLASR_Resequencing.1.xml</value>
+            <select multiple="true">
+                <import extension="xml" contentType="text/directory">common/protocols/mapping</import>
+            </select>
+        </param>
+        <param name="consensus">
+            <value>common/protocols/consensus/Quiver.1.xml</value>
+            <select multiple="true">
+                <import extension="xml" contentType="text/directory">common/protocols/consensus</import>
+            </select>
+        </param>
+    </protocol>
+    <moduleStage name="fetch" editable="true">
+        <module label="Fetch v1" id="P_Fetch" editableInJob="true">
+            <description>Sets up inputs</description>
+        </module>
+    </moduleStage>
+    <moduleStage name="filtering" editable="true">
+        <module label="SFilter v1" id="P_Filter" editableInJob="true">
+            <description>This module filters reads based on a minimum subread length, polymerase read quality and polymerase read length.</description>
+            <param name="minSubReadLength" label="Minimum Subread Length">
+                <value>50</value>
+                <title>Subreads shorter than this value (in base pairs) are filtered out and excluded from analysis.</title>
+                <input type="text" size="3"/>
+                <rule type="number" min="0.0" message="Value must be a positive integer"/>
+            </param>
+            <param name="readScore" label="Minimum Polymerase Read Quality">
+                <value>75</value>
+                <title>Polymerase reads with lower quality than this value are filtered out and excluded from analysis.</title>
+                <input type="text"/>
+                <rule type="number" min="0.0" message="Value must be between 0 and 100" max="100.0"/>
+            </param>
+            <param name="minLength" label="Minimum Polymerase Read Length">
+                <value>50</value>
+                <title>Polymerase reads shorter than this value (in base pairs) are filtered out and excluded from analysis.</title>
+                <input type="text" size="3"/>
+                <rule type="number" min="0.0" message="Value must be a positive integer"/>
+            </param>
+        </module>
+        <module label="SFilter Reports v1" id="P_FilterReports" editableInJob="false"/>
+    </moduleStage>
+    <moduleStage name="spikeinControl" editable="true">
+        <module label="SControl v1" id="P_Control" editableInJob="true">
+            <param name="pbalign_opts" hidden="true">
+                <value>--maxHits=1 --minAccuracy=0.75 --minLength=50 --algorithmOptions="-useQuality -holeNumbers 1-1000"</value>
+            </param>
+        </module>
+        <module label="SControl Reports v1" id="P_ControlReports" editableInJob="false"/>
+    </moduleStage>
+    <moduleStage name="mapping" editable="true">
+        <module label="BLASR v1" id="P_Mapping" editableInJob="true">
+            <description>
+BLASR maps reads to genomes by finding the highest scoring local alignment or set of local alignments between the read and the genome. The first set of alignments is found by querying an index of the reference genome, and then refining until only high scoring alignments are retained.  Additional pulse metrics are loaded into the resulting cmp.h5 file to enable downstream use of the Quiver algorithm.
+    </description>
+            <param name="maxHits" label="Maximum number of hits per read" hidden="true">
+                <value>10</value>
+                <title>
+        The maximum number of matches of each read to the reference
+        sequence that will be evaluated. maxHits should be greater
+        than the expected number of repeats if you want to spread hits
+        out on the genome.
+      </title>
+                <input type="text"/>
+                <rule type="digits" message="Value must be an integer between 0 and 1000"/>
+            </param>
+            <param name="maxDivergence" label="Maximum divergence (%)">
+                <value>30</value>
+                <title>The maximum allowed divergence (in %) of a read from the reference sequence.</title>
+                <input type="text"/>
+                <rule type="digits" message="Value must be an integer between 0 and 100"/>
+            </param>
+            <param name="minAnchorSize" label="Minimum anchor size">
+                <value>12</value>
+                <title>The minimum size of the read (in base pairs) that must match against the reference.</title>
+                <input type="text"/>
+                <rule type="digits" message="Value must be an integer between 8 and 30"/>
+            </param>
+            <param name="samBam" label="Write output as a BAM file">
+                <value>True</value>
+                <title>Specify whether or not to output a BAM representation of the cmp.h5 file.</title>
+                <input type="checkbox"/>
+            </param>
+            <param name="gff2Bed" label="Write BED coverage file">
+                <value>True</value>
+                <title>Specify whether or not to output a BED representation of the depth of coverage summary.</title>
+                <input type="checkbox"/>
+            </param>
+            <param name="placeRepeatsRandomly" label="Place repeats randomly">
+                <value>True</value>
+                <title>Specify that if BLASR maps a read to more than one location with equal probability, then it randomly selects which location it chooses as the best location. If not set, defaults to the first on the list of matches.</title>
+                <input type="checkbox"/>
+            </param>
+            <param name="pbalign_opts" hidden="true">
+                <value>--seed=1 --minAccuracy=0.75 --minLength=50 --concordant --algorithmOptions="-useQuality"</value>
+            </param>
+            <param name="pbalign_advanced_opts" label="Advanced pbalign options">
+                <value> </value>
+                <title>Specify additional Pbalign options. For advanced users only.</title>
+                <input type="text"/>
+            </param>
+            <param name="pulseMetrics" hidden="true">
+                <value>DeletionQV,IPD,QualityValue,InsertionQV,MergeQV,SubstitutionQV,DeletionTag</value>
+            </param>
+            <param name="loadPulsesOpts" hidden="true">
+                <value>bymetric</value>
+                <title>The default option of loadPulses is 'byread'. Option 'bymetric' 
+               is desined to sacrifice memory for increased speed, especially 
+               for jobs of which the number of reference contigs is large. </title>
+            </param>
+        </module>
+        <module label="BLASR Reports v1" id="P_MappingReports" editableInJob="false"/>
+    </moduleStage>
+    <moduleStage name="consensus" editable="true">
+        <module label="Quiver v1" id="P_GenomicConsensus" editableInJob="true">
+            <description>Quiver identifies haploid  SNPs and indels by performing a local  realignment of reads using the full range of sequence quality metrics.</description>
+            <param name="algorithm" label="Consensus algorithm">
+                <value>quiver</value>
+                <title>Specify the consensus/variant algorithm to use in the analysis.</title>
+                <input value="plurality" type="radio"/>
+                <input value="quiver" type="radio"/>
+            </param>
+            <param name="outputConsensus" label="Output consensus FASTA and FASTQ files">
+                <value>True</value>
+                <title>Specify whether or not to output FASTA and FASTQ representations of the consensus sequence.</title>
+                <input type="checkbox"/>
+            </param>
+            <param name="makeVcf" label="Write SNPs/Variants as VCF file">
+                <value>True</value>
+                <title>Specify whether or not to output a VCF representation of the variants.</title>
+                <input type="checkbox"/>
+            </param>
+            <param name="makeBed" label="Write SNPs/Variants as BED file">
+                <value>True</value>
+                <title>Specify whether or not to output a BED representation of the variants.</title>
+                <input type="checkbox"/>
+            </param>
+            <param name="enableMapQVFilter" label="Use only unambiguously mapped reads">
+                <value>True</value>
+                <title>Specify whether or not to filter out reads where Map QV is less than 10. Reduces coverage in repeat regions that are shorter than the read length.</title>
+                <input type="checkbox"/>
+            </param>
+            <param name="minConfidence" label="Minimum variant confidence" hidden="true">
+                <value>40</value>
+                <title>Minimum variant confidence</title>
+            </param>
+            <param name="minCoverage" label="Minimum coverage requirement" hidden="true">
+                <value>5</value>
+                <title>Minimum variant coverage</title>
+            </param>
+            <param name="diploidMode" label="Diploid analysis">
+                <value>False</value>
+                <title>Specify whether or not Quiver operates in diploid mode and calls variants with the assumption that there are two copies of the genome in the sample; the mapping specificity should be higher.</title>
+                <input type="checkbox"/>
+            </param>
+        </module>
+        <module label="Genomic Consensus Reports v1" id="P_ConsensusReports" editableInJob="false"/>
+    </moduleStage>
+    <fileName>RS_Resequencing.1.xml</fileName>
+</smrtpipeSettings>
diff --git a/utils/bam2bax/tests/resequencing/tiny_test.sh b/utils/bam2bax/tests/resequencing/tiny_test.sh
new file mode 100755
index 0000000..aea7be7
--- /dev/null
+++ b/utils/bam2bax/tests/resequencing/tiny_test.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/bash
+PYTHON=/mnt/secondary/builds/full/3.0.0/prod/current-build_smrtanalysis/smrtcmds/bin/python
+
+# Test with tiny.subreads|scraps.bam --> converted.bax.h5 --> resequencing 
+$PYTHON bam2bax_resequencing.py  /pbi/dept/secondary/siv/testdata/bam2bax/tiny/m150905_153119_sherri_c100907042550000001823207504291693_s1_p0.subreads.bam tiny_output || exit 1
diff --git a/utils/bam2bax/tests/src/TestConstants.h b/utils/bam2bax/tests/src/TestConstants.h
new file mode 100644
index 0000000..f36851e
--- /dev/null
+++ b/utils/bam2bax/tests/src/TestConstants.h
@@ -0,0 +1,195 @@
+#ifndef _TEST_CONSTANTS_
+#define _TEST_CONSTANTS_
+#include "Enumerations.h"
+#include <string>
+#include <vector>
+#include <map>
+#include "SMRTSequence.hpp"
+#include "reads/ScanData.hpp"
+
+#include "HDFFile.hpp"
+#include "HDFScanDataWriter.hpp"
+#include "HDFBaseCallsWriter.hpp"
+#include "HDFBaxWriter.hpp"
+#include "HDFBasReader.hpp"
+#include "HDFScanDataReader.hpp"
+
+namespace tests {
+    // Setup variables for test HDFScanDataWriter.
+    const PlatformId platformid = Springfield;
+    const std::string movieName = "thisismymovie";
+    const std::string runCode = "thisismyruncode";
+    const std::string whenStarted = "2015-01-05";
+    const float frameRate = 75.0;
+    const unsigned int numFrames = 539900;
+    const std::map<char, size_t> baseMap = {{'T', 0}, {'G', 1}, {'A', 2}, {'C', 3}};
+    const std::map<char, size_t> randomBaseMap = {{'A', 1}, {'C', 3}, {'G', 2}, {'T', 0}};
+    const std::string bindingKit = "thisismybindingkit";
+    const std::string sequencingKit = "thisismysequencingkit";
+
+    // Setup variables for test HDFBaseCallersWriter.
+    const std::vector<std::string> QVNames = 
+        {"DeletionTag", "DeletionQV", 
+         "InsertionQV", "MergeQV", 
+         "SubstitutionQV", "SubstitutionTag", 
+         "PreBaseFrames", "WidthInFrames",
+         "HQRegionSNR", "ReadScore"};
+
+    const std::vector<PacBio::BAM::BaseFeature> QVEnums =
+        {   PacBio::BAM::BaseFeature::DELETION_TAG
+          , PacBio::BAM::BaseFeature::DELETION_QV
+          , PacBio::BAM::BaseFeature::INSERTION_QV
+          , PacBio::BAM::BaseFeature::MERGE_QV
+          , PacBio::BAM::BaseFeature::SUBSTITUTION_QV
+          , PacBio::BAM::BaseFeature::SUBSTITUTION_TAG
+          , PacBio::BAM::BaseFeature::IPD
+          , PacBio::BAM::BaseFeature::PULSE_WIDTH };
+
+
+    const unsigned int len = 2;
+    const unsigned int holeNumber = 110;
+    const unsigned char holeStatus = '8';
+    const int readScoreInt = 760;
+    const float snra = 0.11;
+    const float snrc = 0.22;
+    const float snrg = 0.33;
+    const float snrt = 0.44;
+
+    const std::string myseq = "AT";
+    const std::string deletionTag = "NG";
+    const std::string deletionQV = "&2";
+    const std::string insertionQV = "+%";
+    const std::string mergeQV = "(.";
+    const std::string substitutionQV = "/<";
+    const std::string substitutionTag = "NC";
+    const std::string preBaseFrames = "C0";
+    const std::string widthInFrames = "8;";
+
+    const std::string PULSEDATA = "PulseData";
+
+    const std::string basecallerVersion = "2.0.1.354";
+
+    template <typename T>
+    inline void SetData(T dst, const std::string & src, const unsigned int len) {
+        for (unsigned int i = 0; i < len; i++) 
+            dst[i] = src.c_str()[i];
+    }
+
+    template <typename T>
+    inline bool CmpData(T l, T r, const unsigned int len) {
+        for (unsigned int i = 0; i < len; i++) {
+            if (l[i] != r[i]) return false;
+        }
+        return true;
+    }
+
+    inline void make_scandata(ScanData & scandata, 
+                              const std::map<char, size_t> & baseMap_) {
+    scandata.PlatformID(platformid)
+        .MovieName(tests::movieName)
+        .WhenStarted(tests::whenStarted)
+        .RunCode(runCode)
+        .NumFrames(tests::numFrames)
+        .FrameRate(tests::frameRate)
+        .SequencingKit(tests::sequencingKit)
+        .BindingKit(tests::bindingKit)
+        .BaseMap(baseMap_);
+    }
+
+    inline void  make_smrtseq(SMRTSequence & seq) {
+        // Create a SMRTSequence
+        seq.Allocate(tests::len);
+
+        memcpy(seq.seq, myseq.c_str(), tests::len * sizeof(char));
+
+        SetData<QualityValueVector<unsigned char>>(seq.deletionQV, tests::deletionQV,    tests::len);
+        SetData<Nucleotide *>(seq.deletionTag, tests::deletionTag,   tests::len);
+        SetData<QualityValueVector<unsigned char>>(seq.insertionQV, tests::insertionQV,   tests::len);
+        SetData<QualityValueVector<unsigned char>>(seq.mergeQV,     tests::mergeQV,       tests::len);
+        SetData<QualityValueVector<unsigned char>>(seq.substitutionQV,  tests::substitutionQV,  tests::len);
+        SetData<Nucleotide *>(seq.substitutionTag, tests::substitutionTag, tests::len);
+        SetData<HalfWord *>(seq.preBaseFrames,    tests::preBaseFrames, tests::len);
+        SetData<HalfWord *>(seq.widthInFrames,    tests::widthInFrames, tests::len);
+
+        seq.zmwData.holeNumber = tests::holeNumber;
+        seq.zmwData.holeStatus = tests::holeStatus;
+
+        seq.readScore = tests::readScoreInt / 1000.0;
+
+        seq.HQRegionSnr('A', tests::snra);
+        seq.HQRegionSnr('C', tests::snrc);
+        seq.HQRegionSnr('G', tests::snrg);
+        seq.HQRegionSnr('T', tests::snrt);
+    }
+
+    // Write seq to outfn
+    inline bool write_to(const std::string outfn, const ScanData & sd, const SMRTSequence & seq) {
+        std::unique_ptr<HDFBaxWriter> writer;
+        writer.reset(new HDFBaxWriter(outfn, tests::basecallerVersion, sd.BaseMap(), tests::QVEnums));
+        bool ret = writer->WriteOneZmw(seq);
+        writer->WriteScanData(sd);
+        writer.reset();
+        return ret;
+    }
+
+    // Write seq to outfn
+    inline bool write_to(const std::string outfn, const SMRTSequence & seq) {
+        HDFFile outfile;
+        outfile.Open(outfn, H5F_ACC_TRUNC);
+
+        HDFGroup pulseDataGroup;
+        outfile.rootGroup.AddGroup(tests::PULSEDATA);
+        pulseDataGroup.Initialize(outfile.rootGroup, tests::PULSEDATA);
+
+        std::unique_ptr<HDFBaseCallsWriter> writer;
+        writer.reset(new HDFBaseCallsWriter(outfn, pulseDataGroup, tests::baseMap, tests::basecallerVersion, tests::QVEnums)); 
+
+        bool ret = writer->WriteOneZmw(seq);
+        writer.reset();
+        outfile.Close();
+        return ret;
+    }
+
+    // Read seq from fn
+    inline int read_from(const std::string fn, SMRTSequence & seq) {
+        // read the sequence from fn
+        HDFBasReader reader;
+        std::vector<std::string> qvn = tests::QVNames;
+        reader.InitializeFields(qvn);
+        reader.Initialize(fn);
+        int count = 0;
+        while(reader.GetNext(seq)) {
+            count += 1;
+        }
+        return count;
+    }
+
+    inline void write_to(const std::string & outfn, ScanData & scandata) {
+        HDFFile outfile;
+        outfile.Open(outfn, H5F_ACC_TRUNC);
+        HDFScanDataWriter writer(outfile.rootGroup);
+        writer.Write(scandata);
+        writer.Close();
+        outfile.Close();
+    }
+
+    inline bool read_from(const std::string & infn, ScanData & scandata) {
+        //read
+        HDFScanDataReader reader;
+        HDFFile infile;
+        infile.Open(infn, H5F_ACC_RDONLY);
+        HDFGroup * rootGroupPtr = &infile.rootGroup;
+
+        if (not rootGroupPtr->ContainsObject("ScanData")) return false;
+        
+        reader.Initialize(rootGroupPtr);
+
+        bool OK = reader.Read(scandata);
+        reader.Close();
+        infile.Close();
+        return OK;
+    }
+
+};
+
+#endif
diff --git a/utils/bam2bax/tests/src/TestData.h.in b/utils/bam2bax/tests/src/TestData.h.in
new file mode 100644
index 0000000..3384d17
--- /dev/null
+++ b/utils/bam2bax/tests/src/TestData.h.in
@@ -0,0 +1,18 @@
+// Author: Yuan Li
+
+#ifndef TESTDATA_H
+#define TESTDATA_H
+
+#include <string>
+
+namespace tests {
+
+const std::string Bam2Bax_Exe  = std::string("@Bam2Bax_BinDir@/bam2bax");
+const std::string Source_Dir   = std::string("@Bam2Bax_TestsDir@");
+const std::string Bin_Dir      = std::string("@CMAKE_CURRENT_BINARY_DIR@");
+const std::string Data_Dir     = std::string("@Bam2Bax_TestsDir@/data");
+const std::string Out_Dir      = std::string("@Bam2Bax_TestsDir@/out");
+
+} // namespace tests
+
+#endif // TESTDATA_H
diff --git a/utils/bam2bax/tests/src/TestUtils.cpp b/utils/bam2bax/tests/src/TestUtils.cpp
new file mode 100644
index 0000000..4d149db
--- /dev/null
+++ b/utils/bam2bax/tests/src/TestUtils.cpp
@@ -0,0 +1,42 @@
+// Author: Derek Barnett
+
+#include "TestData.h"
+#include "TestUtils.h"
+
+#include <gtest/gtest.h>
+#include <cstdio>
+#include <cstdlib>
+
+using namespace std;
+
+void RemoveFiles(const vector<string>& filenames)
+{
+    for (auto fn : filenames)
+        remove(fn.c_str());
+}
+
+void RemoveFile(const string& filename)
+{
+    vector<string> filenames;
+    filenames.push_back(filename);
+    RemoveFiles(filenames);
+}
+
+int RunBam2Bax(const vector<string>& bamFilenames,
+               const string& outputType,
+               const string& additionalArgs)
+{
+    string convertArgs;
+    convertArgs += outputType;
+    if (!additionalArgs.empty()) {
+        convertArgs += string(" ");
+        convertArgs += additionalArgs;
+    }
+    for (auto fn : bamFilenames) {
+        convertArgs += string(" ");
+        convertArgs += fn;
+    }
+
+    const string& convertCommandLine = tests::Bam2Bax_Exe + string(" ") + convertArgs;
+    return system(convertCommandLine.c_str());
+}
diff --git a/utils/bam2bax/tests/src/TestUtils.h b/utils/bam2bax/tests/src/TestUtils.h
new file mode 100644
index 0000000..15c2a33
--- /dev/null
+++ b/utils/bam2bax/tests/src/TestUtils.h
@@ -0,0 +1,13 @@
+// Author: Derek Barnett 
+
+#include "SMRTSequence.hpp"
+#include <pbbam/BamRecord.h>
+#include <string>
+#include <vector>
+
+void RemoveFile(const std::string& filename);
+void RemoveFiles(const std::vector<std::string>& filenames);
+
+int RunBam2Bax(const std::vector<std::string>& bamFilenames,
+               const std::string& outputType,
+               const std::string& additionalArgs = std::string());
diff --git a/utils/bam2bax/tests/src/test.cpp b/utils/bam2bax/tests/src/test.cpp
new file mode 100644
index 0000000..e7018b3
--- /dev/null
+++ b/utils/bam2bax/tests/src/test.cpp
@@ -0,0 +1,23 @@
+// Author: Yuan Li
+
+#include "TestData.h"
+#include "TestUtils.h"
+
+#include "DNASequence.hpp"
+#include <string>
+#include <gtest/gtest.h>
+
+using namespace std;
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+TEST(BAM2BAXTEST, EndToEnd)
+{
+    const std::string movieName = "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0";
+    const std::string subreadsBamFilename = tests::Data_Dir + "/" + movieName + ".1.subreads.bam";
+    const std::string scrapsBamFilename = tests::Data_Dir + "/" + movieName + ".1.scraps.bam";
+
+    vector<string> bamFilenames = {subreadsBamFilename, scrapsBamFilename};
+    const int result = RunBam2Bax(bamFilenames, "-o " + tests::Out_Dir + "/" + movieName + ".1");
+    EXPECT_EQ(0, result);
+}
diff --git a/utils/bam2bax/tests/src/test_Bam2BaxConverter.cpp b/utils/bam2bax/tests/src/test_Bam2BaxConverter.cpp
new file mode 100644
index 0000000..0faed2b
--- /dev/null
+++ b/utils/bam2bax/tests/src/test_Bam2BaxConverter.cpp
@@ -0,0 +1,94 @@
+// Author: Yuan Li
+
+#include "TestData.h"
+#include "TestConstants.h"
+
+#include "reads/RegionTable.hpp"
+#include "Converter.h"
+#include <pbbam/virtual/VirtualRegion.h>
+#include <string>
+#include <gtest/gtest.h>
+
+using namespace std;
+
+std::vector<RegionType> defaultRegionTypes = RegionTable::DefaultRegionTypes();
+std::vector<RegionType> definedRegionTypes = {Insert, HQRegion, Adapter, BarCode};
+
+TEST(HDFBaxWriter, IsConvertibleVirtualRegionType)
+{
+    EXPECT_TRUE(RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType::ADAPTER, defaultRegionTypes));
+    EXPECT_TRUE(RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType::HQREGION, defaultRegionTypes));
+    EXPECT_TRUE(RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType::SUBREAD, defaultRegionTypes));
+    EXPECT_FALSE(RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType::BARCODE, defaultRegionTypes));
+    EXPECT_FALSE(RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType::LQREGION, defaultRegionTypes));
+
+    EXPECT_TRUE(RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType::ADAPTER, definedRegionTypes));
+    EXPECT_TRUE(RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType::HQREGION, definedRegionTypes));
+    EXPECT_TRUE(RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType::SUBREAD, definedRegionTypes));
+    EXPECT_TRUE(RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType::BARCODE, definedRegionTypes));
+    EXPECT_FALSE(RegionTypeAdapter::IsConvertible(PacBio::BAM::VirtualRegionType::LQREGION, definedRegionTypes));
+}
+
+TEST(Bam2BaxConverter, CreateRegionAnnotation_DefaultRegionTypes)
+{
+    int zmw = 12134, beginPos = 0, endPos = 100, score = 770;
+
+    EXPECT_EQ(defaultRegionTypes.size(), 3U);
+    EXPECT_EQ(defaultRegionTypes[0],  Adapter);
+    EXPECT_EQ(defaultRegionTypes[1],  Insert);
+    EXPECT_EQ(defaultRegionTypes[2],  HQRegion);
+
+    PacBio::BAM::VirtualRegion vr(PacBio::BAM::VirtualRegionType::ADAPTER, beginPos, endPos, score);
+    RegionAnnotation ra = RegionsAdapter::ToRegionAnnotation(zmw, vr, defaultRegionTypes);
+
+    EXPECT_EQ(ra.GetHoleNumber(), zmw);
+    EXPECT_EQ(defaultRegionTypes[ra.GetTypeIndex()], Adapter);
+    EXPECT_EQ(ra.GetTypeString(defaultRegionTypes), "Adapter");
+    EXPECT_EQ(ra.GetStart(),      beginPos);
+    EXPECT_EQ(ra.GetEnd(),        endPos);
+    EXPECT_EQ(ra.GetScore(),      score);
+
+    PacBio::BAM::VirtualRegion vr2(PacBio::BAM::VirtualRegionType::HQREGION, beginPos, endPos, score);
+    RegionAnnotation ra2 = RegionsAdapter::ToRegionAnnotation(zmw, vr2, defaultRegionTypes);
+    EXPECT_EQ(defaultRegionTypes[ra2.GetTypeIndex()], HQRegion); 
+    EXPECT_EQ(ra2.GetTypeString(defaultRegionTypes), "HQRegion");
+
+    PacBio::BAM::VirtualRegion vr3(PacBio::BAM::VirtualRegionType::SUBREAD, beginPos, endPos, score);
+    RegionAnnotation ra3 = RegionsAdapter::ToRegionAnnotation(zmw, vr3, defaultRegionTypes);
+    EXPECT_EQ(defaultRegionTypes[ra3.GetTypeIndex()], Insert); 
+    EXPECT_EQ(ra3.GetTypeString(defaultRegionTypes), "Insert");
+
+};
+
+
+TEST(Bam2BaxConverter, CreateRegionAnnotation_DefinedRegionTypes)
+{
+    int zmw = 12134, beginPos = 0, endPos = 100, score = 770;
+
+    EXPECT_EQ(definedRegionTypes.size(), 4U);
+    EXPECT_EQ(definedRegionTypes[0],  Insert);
+    EXPECT_EQ(definedRegionTypes[1],  HQRegion);
+    EXPECT_EQ(definedRegionTypes[2],  Adapter);
+    EXPECT_EQ(definedRegionTypes[3],  BarCode);
+
+    PacBio::BAM::VirtualRegion vr(PacBio::BAM::VirtualRegionType::ADAPTER, beginPos, endPos, score);
+    RegionAnnotation ra = RegionsAdapter::ToRegionAnnotation(zmw, vr, definedRegionTypes);
+
+    EXPECT_EQ(ra.GetHoleNumber(), zmw);
+    EXPECT_EQ(definedRegionTypes[ra.GetTypeIndex()], Adapter);
+    EXPECT_EQ(ra.GetTypeString(definedRegionTypes), "Adapter");
+    EXPECT_EQ(ra.GetStart(),      beginPos);
+    EXPECT_EQ(ra.GetEnd(),        endPos);
+    EXPECT_EQ(ra.GetScore(),      score);
+
+    PacBio::BAM::VirtualRegion vr2(PacBio::BAM::VirtualRegionType::HQREGION, beginPos, endPos, score);
+    RegionAnnotation ra2 = RegionsAdapter::ToRegionAnnotation(zmw, vr2, definedRegionTypes);
+    EXPECT_EQ(definedRegionTypes[ra2.GetTypeIndex()], HQRegion); 
+    EXPECT_EQ(ra2.GetTypeString(definedRegionTypes), "HQRegion");
+
+    PacBio::BAM::VirtualRegion vr3(PacBio::BAM::VirtualRegionType::SUBREAD, beginPos, endPos, score);
+    RegionAnnotation ra3 = RegionsAdapter::ToRegionAnnotation(zmw, vr3, definedRegionTypes);
+    EXPECT_EQ(definedRegionTypes[ra3.GetTypeIndex()], Insert); 
+    EXPECT_EQ(ra3.GetTypeString(definedRegionTypes), "Insert");
+
+};
diff --git a/utils/bam2bax/tests/src/test_HDFBaseCallsWriter.cpp b/utils/bam2bax/tests/src/test_HDFBaseCallsWriter.cpp
new file mode 100644
index 0000000..f4ecbb9
--- /dev/null
+++ b/utils/bam2bax/tests/src/test_HDFBaseCallsWriter.cpp
@@ -0,0 +1,56 @@
+// Author: Yuan Li
+
+#include "TestData.h"
+#include "TestConstants.h"
+
+#include "SMRTSequence.hpp"
+#include "HDFGroup.hpp"
+#include "HDFFile.hpp"
+#include "HDFBaseCallsWriter.hpp"
+#include <string>
+#include <gtest/gtest.h>
+
+using namespace std;
+
+const std::string outfn = tests::Out_Dir  + "/" + "basecalls.h5";
+
+TEST(HDFBaseCallsWriter, WriteOneZmw_EndToEnd)
+{
+    unsigned int len = tests::len;
+    // setup a sequence 
+    SMRTSequence seq;
+    tests::make_smrtseq(seq);
+    EXPECT_EQ(seq.length, len);
+
+    // write the sequence to outfn
+    bool ret = tests::write_to(outfn, seq);
+    EXPECT_TRUE(ret);
+
+    // read the seq from outfn
+    SMRTSequence seq2;
+    int count = tests::read_from(outfn, seq2);
+    EXPECT_EQ(count, 1);
+
+    // compare
+    EXPECT_EQ(seq2.length, len);
+    EXPECT_EQ(memcmp(seq.seq, seq2.seq, len * sizeof(char)), 0);
+    EXPECT_EQ(seq2.zmwData.holeNumber, tests::holeNumber);
+    EXPECT_EQ(seq2.zmwData.holeStatus, tests::holeStatus);
+
+    // HQRegionSNR can only be correctly read when ScanData is also available.
+    EXPECT_EQ(seq2.HQRegionSnr('A'), -1); 
+    EXPECT_EQ(seq2.HQRegionSnr('C'), -1);
+    EXPECT_EQ(seq2.HQRegionSnr('G'), -1);
+    EXPECT_EQ(seq2.HQRegionSnr('T'), -1);
+
+    EXPECT_EQ(seq2.readScore * 1000, tests::readScoreInt);
+
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.deletionQV,  seq2.deletionQV,    len));
+    EXPECT_TRUE(tests::CmpData<Nucleotide *>(seq.deletionTag, seq2.deletionTag,   len));
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.insertionQV, seq2.insertionQV,   len));
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.mergeQV,     seq2.mergeQV,       len));
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.substitutionQV,  seq2.substitutionQV,  len));
+    EXPECT_TRUE(tests::CmpData<Nucleotide *>(seq.substitutionTag, seq2.substitutionTag, len));
+    EXPECT_TRUE(tests::CmpData<HalfWord *>(seq.preBaseFrames,    seq2.preBaseFrames, len));
+    EXPECT_TRUE(tests::CmpData<HalfWord *>(seq.widthInFrames,    seq2.widthInFrames, len));
+};
diff --git a/utils/bam2bax/tests/src/test_HDFBaxWriter.cpp b/utils/bam2bax/tests/src/test_HDFBaxWriter.cpp
new file mode 100644
index 0000000..e0c232c
--- /dev/null
+++ b/utils/bam2bax/tests/src/test_HDFBaxWriter.cpp
@@ -0,0 +1,100 @@
+// Author: Yuan Li
+
+#include "TestData.h"
+#include "TestConstants.h"
+
+#include "SMRTSequence.hpp"
+#include "HDFGroup.hpp"
+#include "HDFFile.hpp"
+#include "HDFBasReader.hpp"
+#include "HDFBaxWriter.hpp"
+#include <string>
+#include <gtest/gtest.h>
+
+using namespace std;
+
+const std::string outfn = tests::Out_Dir  + "/" + "test_HDFBaxWriter.bax.h5";
+
+TEST(HDFBaxWriter, WriteOneZmw_EndToEnd)
+{
+    // setup a sequence 
+    SMRTSequence seq;
+    tests::make_smrtseq(seq);
+    EXPECT_EQ(seq.length, tests::len);
+
+    // setup a scandata
+    ScanData scandata;
+    tests::make_scandata(scandata, tests::baseMap);
+
+    // write the sequence to outfn
+    bool ret = tests::write_to(outfn, scandata, seq);
+    EXPECT_TRUE(ret);
+
+    // read the seq from outfn
+    SMRTSequence seq2;
+    int count = tests::read_from(outfn, seq2);
+    EXPECT_EQ(count, 1);
+
+    // compare
+    unsigned int len = tests::len;
+    EXPECT_EQ(seq2.length, len);
+    EXPECT_EQ(memcmp(seq.seq, seq2.seq, len * sizeof(char)), 0);
+    EXPECT_EQ(seq2.zmwData.holeNumber, tests::holeNumber);
+    EXPECT_EQ(seq2.zmwData.holeStatus, tests::holeStatus);
+
+    EXPECT_EQ(tests::snra, seq2.HQRegionSnr('A'));
+    EXPECT_EQ(tests::snrc, seq2.HQRegionSnr('C'));
+    EXPECT_EQ(tests::snrg, seq2.HQRegionSnr('G'));
+    EXPECT_EQ(tests::snrt, seq2.HQRegionSnr('T'));
+
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.deletionQV,  seq2.deletionQV,    len));
+    EXPECT_TRUE(tests::CmpData<Nucleotide *>(seq.deletionTag, seq2.deletionTag,   len));
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.insertionQV, seq2.insertionQV,   len));
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.mergeQV,     seq2.mergeQV,       len));
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.substitutionQV,  seq2.substitutionQV,  len));
+    EXPECT_TRUE(tests::CmpData<Nucleotide *>(seq.substitutionTag, seq2.substitutionTag, len));
+    EXPECT_TRUE(tests::CmpData<HalfWord *>(seq.preBaseFrames,    seq2.preBaseFrames, len));
+    EXPECT_TRUE(tests::CmpData<HalfWord *>(seq.widthInFrames,    seq2.widthInFrames, len));
+};
+
+TEST(HDFBaxWriter, WriteOneZmw_EndToEnd_RandomBaseMap)
+{
+    // setup a sequence 
+    SMRTSequence seq;
+    tests::make_smrtseq(seq);
+    EXPECT_EQ(seq.length, tests::len);
+
+    // setup a scandata
+    ScanData scandata;
+    tests::make_scandata(scandata, tests::randomBaseMap);
+
+    // write the sequence to outfn
+    bool ret = tests::write_to(outfn, scandata, seq);
+    EXPECT_TRUE(ret);
+
+    // read the seq from outfn
+    SMRTSequence seq2;
+    int count = tests::read_from(outfn, seq2);
+    EXPECT_EQ(count, 1);
+
+    // compare
+    unsigned int len = tests::len;
+    EXPECT_EQ(seq2.length, len);
+    EXPECT_EQ(memcmp(seq.seq, seq2.seq, len * sizeof(char)), 0);
+    EXPECT_EQ(seq2.zmwData.holeNumber, tests::holeNumber);
+    EXPECT_EQ(seq2.zmwData.holeStatus, tests::holeStatus);
+
+    EXPECT_EQ(tests::snra, seq2.HQRegionSnr('A'));
+    EXPECT_EQ(tests::snrc, seq2.HQRegionSnr('C'));
+    EXPECT_EQ(tests::snrg, seq2.HQRegionSnr('G'));
+    EXPECT_EQ(tests::snrt, seq2.HQRegionSnr('T'));
+
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.deletionQV,  seq2.deletionQV,    len));
+    EXPECT_TRUE(tests::CmpData<Nucleotide *>(seq.deletionTag, seq2.deletionTag,   len));
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.insertionQV, seq2.insertionQV,   len));
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.mergeQV,     seq2.mergeQV,       len));
+    EXPECT_TRUE(tests::CmpData<QualityValueVector<unsigned char>>(seq.substitutionQV,  seq2.substitutionQV,  len));
+    EXPECT_TRUE(tests::CmpData<Nucleotide *>(seq.substitutionTag, seq2.substitutionTag, len));
+    EXPECT_TRUE(tests::CmpData<HalfWord *>(seq.preBaseFrames,    seq2.preBaseFrames, len));
+    EXPECT_TRUE(tests::CmpData<HalfWord *>(seq.widthInFrames,    seq2.widthInFrames, len));
+};
diff --git a/utils/bam2bax/tests/src/test_HDFScanDataWriter.cpp b/utils/bam2bax/tests/src/test_HDFScanDataWriter.cpp
new file mode 100644
index 0000000..7a3ac66
--- /dev/null
+++ b/utils/bam2bax/tests/src/test_HDFScanDataWriter.cpp
@@ -0,0 +1,76 @@
+// Author: Yuan Li
+
+#include "TestData.h"
+#include "TestConstants.h"
+
+#include "HDFGroup.hpp"
+#include "HDFFile.hpp"
+#include "HDFScanDataWriter.hpp"
+#include "HDFScanDataReader.hpp"
+#include "reads/ScanData.hpp"
+#include <string>
+#include <gtest/gtest.h>
+
+using namespace std;
+
+const std::string outfn = tests::Out_Dir  + "/" + "scandata.h5";
+
+TEST(HDFScanDataWriter, EndToEnd)
+{
+    std::map<char, size_t> baseMap = tests::baseMap;
+
+    ScanData scandata;
+    tests::make_scandata(scandata, baseMap);
+    
+    // Write
+    tests::write_to(outfn, scandata);
+
+    // Read
+    ScanData scandata_;
+    EXPECT_TRUE(tests::read_from(outfn, scandata_));
+
+    EXPECT_EQ(scandata_.MovieName(), tests::movieName);
+    EXPECT_EQ(scandata_.RunCode(), tests::runCode);
+    EXPECT_EQ(scandata_.WhenStarted(), tests::whenStarted);
+    EXPECT_EQ(scandata_.FrameRate(), tests::frameRate);
+    EXPECT_EQ(scandata_.NumFrames(), tests::numFrames);
+
+    EXPECT_EQ(scandata_.BaseMap()['A'], baseMap['A']);
+    /*EXPECT_EQ(scandata_.BaseMap()['C'], tests::baseMap['C']);
+    EXPECT_EQ(scandata_.BaseMap()['G'], tests::baseMap['G']);
+    EXPECT_EQ(scandata_.BaseMap()['T'], tests::baseMap['T']);*/
+
+    EXPECT_EQ(scandata_.BindingKit(), tests::bindingKit);
+    EXPECT_EQ(scandata_.SequencingKit(), tests::sequencingKit);
+};
+
+TEST(HDFScanDataWriter, EndToEnd_RandomBaseMap)
+{
+    std::map<char, size_t> baseMap = tests::randomBaseMap;
+
+    ScanData scandata;
+    tests::make_scandata(scandata, baseMap);
+    
+    // Write
+    tests::write_to(outfn, scandata);
+
+    // Read
+    ScanData scandata_;
+    EXPECT_TRUE(tests::read_from(outfn, scandata_));
+
+    EXPECT_EQ(scandata_.MovieName(), tests::movieName);
+    EXPECT_EQ(scandata_.RunCode(), tests::runCode);
+    EXPECT_EQ(scandata_.WhenStarted(), tests::whenStarted);
+    EXPECT_EQ(scandata_.FrameRate(), tests::frameRate);
+    EXPECT_EQ(scandata_.NumFrames(), tests::numFrames);
+
+    EXPECT_EQ(scandata_.BaseMap()['A'], baseMap['A']);
+    /*
+    EXPECT_EQ(scandata_.BaseMap()['C'], tests::randomBaseMap['C']);
+    EXPECT_EQ(scandata_.BaseMap()['G'], tests::randomBaseMap['G']);
+    EXPECT_EQ(scandata_.BaseMap()['T'], tests::randomBaseMap['T']);
+    */
+
+    EXPECT_EQ(scandata_.BindingKit(), tests::bindingKit);
+    EXPECT_EQ(scandata_.SequencingKit(), tests::sequencingKit);
+};
diff --git a/utils/bam2bax/tests/src/test_HDFZMWMetricsWriter.cpp b/utils/bam2bax/tests/src/test_HDFZMWMetricsWriter.cpp
new file mode 100644
index 0000000..793c1c4
--- /dev/null
+++ b/utils/bam2bax/tests/src/test_HDFZMWMetricsWriter.cpp
@@ -0,0 +1,51 @@
+// Author: Yuan Li
+
+#include "TestData.h"
+#include "TestConstants.h"
+
+#include "SMRTSequence.hpp"
+#include "HDFGroup.hpp"
+#include "HDFFile.hpp"
+#include "HDFZMWMetricsWriter.hpp"
+#include <string>
+#include <gtest/gtest.h>
+
+using namespace std;
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+TEST(HDFZMWMetricsWriter, EndToEnd)
+{
+    // setup
+    std::string outfn = tests::Out_Dir  + "/" + "zmwmetrics.h5";
+
+    HDFFile outfile;
+    outfile.Open(outfn, H5F_ACC_TRUNC);
+
+    HDFZMWMetricsWriter writer(outfn, outfile.rootGroup, tests::baseMap);
+
+    SMRTSequence seq;
+    tests::make_smrtseq(seq);
+
+    bool OK = writer.WriteOneZmw(seq);
+
+    EXPECT_TRUE(OK);
+};
+
+TEST(HDFZMWMetricsWriter, EndToEnd_RandomBaseMap)
+{
+    // setup
+    std::string outfn = tests::Out_Dir  + "/" + "zmwmetrics2.h5";
+
+    HDFFile outfile;
+    outfile.Open(outfn, H5F_ACC_TRUNC);
+
+    HDFZMWMetricsWriter writer(outfn, outfile.rootGroup, tests::randomBaseMap);
+
+    SMRTSequence seq;
+    tests::make_smrtseq(seq);
+
+    bool OK = writer.WriteOneZmw(seq);
+
+    EXPECT_TRUE(OK);
+};
diff --git a/utils/bam2bax/tests/src/test_HDFZMWWriter.cpp b/utils/bam2bax/tests/src/test_HDFZMWWriter.cpp
new file mode 100644
index 0000000..0c54ab8
--- /dev/null
+++ b/utils/bam2bax/tests/src/test_HDFZMWWriter.cpp
@@ -0,0 +1,40 @@
+// Author: Yuan Li
+
+#include "TestData.h"
+
+#include "SMRTSequence.hpp"
+#include "HDFGroup.hpp"
+#include "HDFFile.hpp"
+#include "HDFZMWWriter.hpp"
+#include <string>
+#include <gtest/gtest.h>
+
+using namespace std;
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+TEST(HDFZMWWriter, EndToEnd)
+{
+    // setup
+    std::string outfn = tests::Out_Dir  + "/" + "zmw.h5";
+
+    HDFFile outfile;
+    outfile.Open(outfn, H5F_ACC_TRUNC);
+
+    HDFZMWWriter writer(outfn, outfile.rootGroup);
+
+    for (int i = 1 ; i < 1000; i++) {
+        SMRTSequence seq;
+        seq.length = i;
+        seq.zmwData.holeNumber = i;
+        seq.zmwData.holeStatus = static_cast<unsigned char> (8);
+        seq.readScore = 0.87;
+        seq.HQRegionSnr('A', 0.1);
+        seq.HQRegionSnr('C', 0.2);
+        seq.HQRegionSnr('G', 0.3);
+        seq.HQRegionSnr('T', 0.4);
+
+        bool OK = writer.WriteOneZmw(seq);
+        EXPECT_TRUE(OK);
+    }
+};
diff --git a/utils/bax2bam/BUILD.txt b/utils/bax2bam/BUILD.txt
new file mode 100644
index 0000000..5e11e23
--- /dev/null
+++ b/utils/bax2bam/BUILD.txt
@@ -0,0 +1,10 @@
+Build instructions for developers:
+Assuming that blasr and blaser_libcpp is placed under //depot/software/smrtanalysis/bioinformatics/ext/pi
+
+  $ cd <bax2bam>
+  $ module load boost
+  $ mkdir build; cd build; cmake ..
+  $ make
+  $ ../tests/bin/test_bax2bam # to test bax2bam exe
+
+
diff --git a/utils/bax2bam/CMakeLists.txt b/utils/bax2bam/CMakeLists.txt
new file mode 100644
index 0000000..8ffb153
--- /dev/null
+++ b/utils/bax2bam/CMakeLists.txt
@@ -0,0 +1,128 @@
+########################################################################
+# CMake build script for Bax2Bam executable.
+########################################################################
+
+project(Bax2Bam)
+cmake_minimum_required(VERSION 2.8)
+
+# project version
+set(Bax2Bam_MAJOR_VERSION 0)
+set(Bax2Bam_MINOR_VERSION 0)
+set(Bax2Bam_PATCH_VERSION 8)
+set(Bax2Bam_VERSION
+  "${Bax2Bam_MAJOR_VERSION}.${Bax2Bam_MINOR_VERSION}.${Bax2Bam_PATCH_VERSION}"
+)
+
+# build-time options
+option(Bax2BAM_build_tests "Build Bax2BAM's unit tests." ON)
+
+# main project paths
+set(Bax2Bam_RootDir       ${Bax2Bam_SOURCE_DIR})
+set(Bax2Bam_DocsDir       ${Bax2Bam_RootDir}/docs)
+set(Bax2Bam_SourceDir     ${Bax2Bam_RootDir}/src)
+set(Bax2Bam_TestsDir      ${Bax2Bam_RootDir}/tests)
+set(Bax2Bam_ThirdPartyDir ${Bax2Bam_RootDir}/third-party)
+
+if (NOT Bax2BAM_OutputDir)
+    set(Bax2BAM_OutputDir ${Bax2Bam_RootDir})
+endif()
+
+set(Bax2Bam_BinDir        ${Bax2BAM_OutputDir}/bin)
+file(MAKE_DIRECTORY       ${Bax2Bam_BinDir})
+
+# shared & third-party paths
+if (NOT PBDATA_ROOT_DIR)
+    set(PBDATA_ROOT_DIR ${Bax2Bam_RootDir}/../../../blasr_libcpp)
+endif()
+
+# find (existing) libraries needed by executable and tests
+if (NOT BLASR_INCLUDE_DIRS OR NOT BLASR_LIBRARIES)
+    find_library(BLASR_LIBRARIES    blasr    ${PBDATA_ROOT_DIR}/alignment)
+    set(BLASR_INCLUDE_DIRS ${PBDATA_ROOT_DIR}/alignment)
+endif()
+
+if (NOT PBIHDF_INCLUDE_DIRS OR NOT PBIHDF_LIBRARIES)
+    find_library(PBIHDF_LIBRARIES   pbihdf   ${PBDATA_ROOT_DIR}/hdf)
+    set(PBIHDF_INCLUDE_DIRS ${PBDATA_ROOT_DIR}/hdf)
+endif()
+
+if (NOT PBDATA_INCLUDE_DIRS OR NOT PBDATA_LIBRARIES)
+    find_library(PBDATA_LIBRARIES   pbdata   ${PBDATA_ROOT_DIR}/pbdata)
+    set(PBDATA_INCLUDE_DIRS ${PBDATA_ROOT_DIR}/pbdata)
+endif()
+
+if (NOT HDF5_INCLUDE_DIRS OR NOT HDF5_LIBRARIES)
+    if (NOT HDF5_RootDir)
+        set(HDF5_RootDir ${Bax2Bam_RootDir}/../../../../../../prebuilt.out/hdf5/hdf5-1.8.12/ubuntu-1404)
+    endif()
+
+    set(HDF5_INCLUDE_DIRS ${HDF5_RootDir}/include)
+    set(HDF5_LibDir       ${HDF5_RootDir}/lib)
+
+    find_library(HDF5_LIBRARIES     hdf5     ${HDF5_LibDir} NO_CMAKE_SYSTEM_PATH)
+    find_library(HDF5_CPP_LIBRARIES hdf5_cpp ${HDF5_LibDir} NO_CMAKE_SYSTEM_PATH)
+endif()
+
+if (NOT PacBioBAM_INCLUDE_DIRS OR NOT PacBioBAM_LIBRARIES
+    OR NOT HTSLIB_INCLUDE_DIRS OR NOT HTSLIB_LIBRARIES)
+    set(PacBioBAM_RootDir  ${Bax2Bam_RootDir}/../../../../../lib/cpp/pbbam)
+    add_subdirectory(${PacBioBAM_RootDir} external/build/pbbam)
+endif()
+
+if (NOT Boost_INCLUDE_DIRS)
+    find_package(Boost REQUIRED)
+endif()
+
+if (NOT ZLIB_LIBRARIES OR NOT ZLIB_INCLUDE_DIRS)
+    find_package(ZLIB REQUIRED)
+endif()
+
+# shared CXX flags for src & tests
+include(CheckCXXCompilerFlag)
+set(Bax2Bam_CXX_FLAGS "-g -std=c++11 -Wall")
+
+# quash warnings from pbdata
+check_cxx_compiler_flag("-Wno-overloaded-virtual" HAS_NO_OVERLOADED_VIRTUAL)
+if(HAS_NO_OVERLOADED_VIRTUAL)
+    set(Bax2Bam_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-overloaded-virtual")
+endif()
+#check_cxx_compiler_flag("-Wno-unused-private-field" HAS_NO_UNUSED_PRIVATE_FIELD)
+#if(HAS_NO_UNUSED_PRIVATE_FIELD)
+#    set(Bax2Bam_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-unused-private-field")
+#endif()
+check_cxx_compiler_flag("-Wno-unused-variable" HAS_NO_UNUSED_VARIABLE)
+if(HAS_NO_UNUSED_VARIABLE)
+    set(Bax2Bam_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-unused-variable")
+endif()
+check_cxx_compiler_flag("-Wno-uninitialized" HAS_NO_UNINITIALIZED)
+if(HAS_NO_UNINITIALIZED)
+    set(Bax2Bam_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-uninitialized")
+endif()
+check_cxx_compiler_flag("-Wno-deprecated-declarations" HAS_NO_DEPRECATED_DECLARATIONS)
+if(HAS_NO_DEPRECATED_DECLARATIONS)
+    set(Bax2Bam_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-deprecated-declarations")
+endif()
+# NOTE: -Wno-unused-local-typedefs used to quash clang warnings w/ Boost
+check_cxx_compiler_flag("-Wno-unused-local-typedef" HAS_NO_UNUSED_LOCAL_TYPEDEF)
+if(HAS_NO_UNUSED_LOCAL_TYPEDEF)
+    set(Bax2Bam_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-unused-local-typedef")
+endif()
+
+SET(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${BAX2BAM_LINKER_FLAGS}" )
+
+# main exe src
+add_subdirectory(src)
+
+# testing
+if(Bax2BAM_build_tests)
+
+    enable_testing()
+    
+    if (NOT GTEST_SRC_DIR)
+        set(GTEST_SRC_DIR ../gtest)
+    endif()
+
+    add_subdirectory(${GTEST_SRC_DIR} external/gtest/build)
+    add_subdirectory(tests)
+endif()
+
diff --git a/utils/bax2bam/README.md b/utils/bax2bam/README.md
new file mode 100644
index 0000000..62bdcbd
--- /dev/null
+++ b/utils/bax2bam/README.md
@@ -0,0 +1,62 @@
+# bax2bam
+
+## Command-line interface
+
+```
+
+Usage: bax2bam [options] <input files...>
+
+bax2bam converts the legacy PacBio basecall format (bax.h5) into the BAM
+basecall format.
+
+Options:
+  -h, --help            show this help message and exit
+  --version             show program's version number and exit
+
+  Input/output files:
+    movie.1.bax.h5 movie.2.bax.h5 ...
+                        Input files which should be from the same movie
+    --xml=STRING        DataSet XML file containing a list of movie names
+    -f STRING, --fofn=STRING
+                        File-of-file-names containing a list of input files
+    -o STRING           Prefix of output filenames. Movie name will be used if
+                        no prefix provided
+    --output-xml=STRING
+                        Explicit output XML name. If none provided via this arg,
+                        bax2bam will use -o prefix (<prefix>.dataset.xml). If
+                        that is not specified either, the output XML filename
+                        will be <moviename>.dataset.xml
+
+  Output read types (mutually exclusive):
+    --subread           Output subreads (default)
+    --hqregion          Output HQ regions
+    --polymeraseread    Output full polymerase read
+    --ccs               Output CCS sequences
+
+  Pulse feature options:
+    Configure pulse features in the output BAM. Supported features include:
+        Pulse Feature:    BAM tag:  Default:
+        DeletionQV        dq        Y
+        DeletionTag       dt        Y
+        InsertionQV       iq        Y
+        IPD               ip        Y
+        PulseWidth        pw        N
+        MergeQV           mq        Y
+        SubstitutionQV    sq        Y
+        SubstitutionTag   st        N
+    If this option is used, then only those features listed will be included,
+    regardless of the default state.
+
+    --pulsefeatures=STRING
+                        Comma-separated list of desired pulse features, using
+                        the names listed above.
+                        
+    --losslessframes    Store full, 16-bit IPD/PulseWidth data, instead of
+                        (default) downsampled, 8-bit encoding.
+
+  Output BAM file type:
+    --internal          Output BAMs in internal mode. Currently this indicates
+                        that non-sequencing ZMWs should be included in the
+                        output scraps BAM file, if applicable.
+
+```
\ No newline at end of file
diff --git a/utils/bax2bam/makefile b/utils/bax2bam/makefile
new file mode 100644
index 0000000..c50e02c
--- /dev/null
+++ b/utils/bax2bam/makefile
@@ -0,0 +1,29 @@
+.PHONY=all
+
+SRCDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
+-include ${CURDIR}/../../defines.mk
+include ${SRCDIR}/../../rules.mk
+
+all: ${CURDIR}/src/* ${CURDIR}/tests/src/*
+	@mkdir -p ${CURDIR}/build && \
+	 cd ${CURDIR}/build && \
+		cmake -DPacBioBAM_INCLUDE_DIRS=${PBBAM_INC} \
+          -DHTSLIB_INCLUDE_DIRS=${HTSLIB_INC} \
+          -DPacBioBAM_LIBRARIES=${PBBAM_LIB}/libpbbam${SH_LIB_EXT} \
+          -DHTSLIB_LIBRARIES=${HTSLIB_LIB}/libhts${SH_LIB_EXT} \
+          -DPBDATA_INCLUDE_DIRS=${LIBPBDATA_INC} \
+          -DPBDATA_LIBRARIES=${LIBPBDATA_LIB}/libpbdata${SH_LIB_EXT} \
+          -DPBIHDF_INCLUDE_DIRS=${LIBPBIHDF_INC} \
+          -DPBIHDF_LIBRARIES=${LIBPBIHDF_LIB}/libpbihdf${SH_LIB_EXT} \
+          -DBLASR_INCLUDE_DIRS=${LIBBLASR_INC}/ \
+          -DBLASR_LIBRARIES=${LIBBLASR_LIB}/libblasr${SH_LIB_EXT} \
+          -DHDF5_INCLUDE_DIRS=${HDF5_INC} \
+          -DHDF5_CPP_LIBRARIES=${HDF5_LIB}/libhdf5_cpp${SH_LIB_EXT} \
+          -DHDF5_LIBRARIES=${HDF5_LIB}/libhdf5${SH_LIB_EXT} \
+          -DBax2Bam_EXE_LINKER_FLAGS="-Wl,--no-as-needed -ldl -pthread -lrt " \
+          ../ && \
+		make
+
+clean:
+	@rm -rf ${CURDIR}/bin/
+	@rm -rf ${CURDIR}/build
diff --git a/utils/bax2bam/src/Bax2Bam.cpp b/utils/bax2bam/src/Bax2Bam.cpp
new file mode 100644
index 0000000..70fc196
--- /dev/null
+++ b/utils/bax2bam/src/Bax2Bam.cpp
@@ -0,0 +1,191 @@
+// Author: Derek Barnett
+
+#include "Bax2Bam.h"
+#include "CcsConverter.h"
+#include "HqRegionConverter.h"
+#include "PolymeraseReadConverter.h"
+#include "SubreadConverter.h"
+#include <pbbam/DataSet.h>
+#include <pbbam/PbiRawData.h>
+#include <boost/algorithm/string.hpp>
+#include <memory>
+#include <fstream>
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <time.h>
+
+#include <unistd.h> // getcwd
+using namespace std;
+
+namespace internal {
+
+static inline
+string CurrentWorkingDir(void)
+{
+    char result[FILENAME_MAX] = { };
+    if (getcwd(result, FILENAME_MAX) == nullptr)
+        return string();
+    return string(result);
+}
+
+static
+bool WriteDatasetXmlOutput(const Settings& settings,
+                           vector<string>* errors)
+{
+    using namespace PacBio::BAM;
+    assert(errors);
+
+    try {
+        DataSet dataset(settings.datasetXmlFilename);
+        assert(dataset.Type() == DataSet::HDF_SUBREAD);
+
+        // change type
+        dataset.Type(DataSet::SUBREAD);
+        dataset.MetaType("PacBio.DataSet.SubreadSet");
+
+        time_t currentTime = time(NULL);
+        //const string& timestamp = CurrentTimestamp();
+        dataset.CreatedAt(ToIso8601(currentTime));
+        dataset.TimeStampedName(string{"pacbio_dataset_subreadset-"}+ToDataSetFormat(currentTime));
+
+        // change files: remove BAX, add BAM
+        std::vector<ExternalResource> toRemove;
+        ExternalResources resources = dataset.ExternalResources();
+        auto iter = resources.cbegin();
+        auto end  = resources.cend();
+        for (; iter != end; ++iter) {
+            ExternalResource e = (*iter);
+            boost::iterator_range<string::iterator> baxFound = boost::algorithm::ifind_first(e.MetaType(), "bax");
+            if (!baxFound.empty()) 
+                toRemove.push_back(e);
+        }
+
+        while(!toRemove.empty()) {
+            auto e = toRemove.back();
+            resources.Remove(e);
+            toRemove.pop_back();
+        }
+
+        const string scheme = "file://";
+        string mainBamFilepath;
+
+        // If the output filename starts with a slash, assume it's the path
+        if (boost::starts_with(settings.outputBamFilename, "/"))
+        {
+            mainBamFilepath = settings.outputBamFilename;
+        }
+        else // otherwise build the path from the CWD
+        { 
+            mainBamFilepath = CurrentWorkingDir();
+            if (!mainBamFilepath.empty())
+                mainBamFilepath.append(1, '/');
+            mainBamFilepath.append(settings.outputBamFilename);
+        }
+
+        // Combine the scheme and filepath and store in the dataset
+        mainBamFilepath = scheme + mainBamFilepath;
+        ExternalResource mainBam{ "PacBio.SubreadFile.SubreadBamFile", mainBamFilepath };
+        FileIndex mainPbi{ "PacBio.Index.PacBioIndex", mainBamFilepath + ".pbi" };
+        mainBam.FileIndices().Add(mainPbi);
+
+        // maybe add scraps BAM (& PBI)
+        if (!settings.scrapsBamFilename.empty()) {
+
+            string scrapsBamFilepath;
+
+            // If the output filename starts with a slash, assume it's the path
+            if (boost::starts_with(settings.scrapsBamFilename, "/"))
+            {
+                scrapsBamFilepath = settings.scrapsBamFilename;
+            }
+            else // otherwise build the path from the CWD
+            {
+                scrapsBamFilepath = CurrentWorkingDir();
+                if (!scrapsBamFilepath.empty())
+                    scrapsBamFilepath.append(1, '/');
+                scrapsBamFilepath.append(settings.scrapsBamFilename);
+            }
+
+            ExternalResource scrapsBam{ "PacBio.SubreadFile.ScrapsBamFile", scrapsBamFilepath };
+            FileIndex scrapsPbi{ "PacBio.Index.PacBioIndex", scrapsBamFilepath + ".pbi" };
+            scrapsBam.FileIndices().Add(scrapsPbi);
+            mainBam.ExternalResources().Add(scrapsBam);
+        }
+
+        // add resources to output dataset
+        resources.Add(mainBam);
+        dataset.ExternalResources(resources);
+
+        // update TotalLength & NumRecords
+        const BamFile subreadFile{ settings.outputBamFilename };
+        const string subreadPbiFn = subreadFile.PacBioIndexFilename();
+        const PbiRawData subreadsIndex{ subreadPbiFn };
+        const PbiRawBasicData& subreadData = subreadsIndex.BasicData();
+
+        uint64_t totalLength = 0;
+        uint32_t numRecords = subreadsIndex.NumReads();
+        for (uint32_t i = 0; i < numRecords; ++i) {
+            const auto subreadLength = subreadData.qEnd_.at(i) - subreadData.qStart_.at(i);
+            totalLength += subreadLength;
+        }
+
+        DataSetMetadata metadata = dataset.Metadata();
+        metadata.TotalLength(std::to_string(totalLength));
+        metadata.NumRecords(std::to_string(numRecords));
+        dataset.Metadata(metadata);
+
+        // save to file 
+        string xmlFn = settings.outputXmlFilename; // try user-provided explicit filename first
+        if (xmlFn.empty())
+            xmlFn = settings.outputBamPrefix + ".dataset.xml"; // prefix set w/ moviename elsewhere if not user-provided
+        dataset.Save(xmlFn);
+        return true;
+
+    } catch (std::exception&) {
+        errors->push_back("could not create output XML");
+        return false;
+    }
+}
+
+} // namespace internal
+
+int Bax2Bam::Run(Settings& settings) {
+
+    // init conversion mode
+    std::unique_ptr<IConverter> converter;
+    switch (settings.mode) {
+        case Settings::HQRegionMode   : converter.reset(new HqRegionConverter(settings)); break;
+        case Settings::PolymeraseMode : converter.reset(new PolymeraseReadConverter(settings)); break;
+        case Settings::SubreadMode    : converter.reset(new SubreadConverter(settings)); break;
+        case Settings::CCSMode        : converter.reset(new CcsConverter(settings)); break;
+        default :
+            cerr << "ERROR: unknown mode selected" << endl;
+            return EXIT_FAILURE;
+    }
+
+    // run conversion
+    bool success = false;
+    vector<string> xmlErrors;
+    if (converter->Run()) {
+        success = true;
+
+        // if given dataset XML as input, attempt write dataset XML output
+        if (!settings.datasetXmlFilename.empty()) {
+            if (!internal::WriteDatasetXmlOutput(settings, &xmlErrors))
+                success = false;
+        }
+    }
+
+    // return success/fail
+    if (success)
+        return EXIT_SUCCESS;
+    else {
+        for (const string& e : converter->Errors())
+            cerr << "ERROR: " << e << endl;
+        for (const string& e : xmlErrors)
+            cerr << "ERROR: " << e << endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/utils/bax2bam/src/Bax2Bam.h b/utils/bax2bam/src/Bax2Bam.h
new file mode 100644
index 0000000..defd879
--- /dev/null
+++ b/utils/bax2bam/src/Bax2Bam.h
@@ -0,0 +1,13 @@
+// Author: Derek Barnett
+#ifndef BAX2BAM_H
+#define BAX2BAM_H
+
+class Settings;
+
+class Bax2Bam
+{
+public:
+    static int Run(Settings& settings);
+};
+
+#endif // BAX2BAM_H
diff --git a/utils/bax2bam/src/CMakeLists.txt b/utils/bax2bam/src/CMakeLists.txt
new file mode 100644
index 0000000..9fa7243
--- /dev/null
+++ b/utils/bax2bam/src/CMakeLists.txt
@@ -0,0 +1,57 @@
+include_directories(
+    .
+    ${BLASR_INCLUDE_DIRS}
+    ${Boost_INCLUDE_DIRS}
+    ${HDF5_INCLUDE_DIRS}
+    ${HTSLIB_INCLUDE_DIRS}
+    ${PacBioBAM_INCLUDE_DIRS}
+    ${PBDATA_INCLUDE_DIRS}
+    ${PBDATA_ROOT_DIR}
+    ${PBIHDF_INCLUDE_DIRS}
+    ${ZLIB_INCLUDE_DIRS}
+)
+
+set(SOURCES
+    main.cpp
+    Bax2Bam.h
+    Bax2Bam.cpp
+    CcsConverter.h
+    CcsConverter.cpp
+    ConverterBase.h
+    HqRegionConverter.h
+    HqRegionConverter.cpp
+    IConverter.h
+    IConverter.cpp
+    OptionParser.h
+    OptionParser.cpp
+    PolymeraseReadConverter.h
+    PolymeraseReadConverter.cpp
+    Settings.h
+    Settings.cpp
+    SubreadConverter.h
+    SubreadConverter.cpp
+)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Bax2Bam_CXX_FLAGS}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${Bax2Bam_EXE_LINKER_FLAGS}")
+
+
+add_executable(bax2bam ${SOURCES})
+set_target_properties(bax2bam PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY ${Bax2Bam_BinDir}
+)
+if (NOT APPLE)
+    set(MY_LIBRT -lrt)
+else()
+endif()
+target_link_libraries(bax2bam 
+    ${BLASR_LIBRARIES}
+    ${PBIHDF_LIBRARIES}
+    ${PBDATA_LIBRARIES} 
+    ${HDF5_CPP_LIBRARIES}
+    ${HDF5_LIBRARIES}
+    ${PacBioBAM_LIBRARIES}
+    ${HTSLIB_LIBRARIES}
+    ${ZLIB_LIBRARIES}
+    ${MY_LIBRT}
+)
diff --git a/utils/bax2bam/src/CcsConverter.cpp b/utils/bax2bam/src/CcsConverter.cpp
new file mode 100644
index 0000000..ffb9c02
--- /dev/null
+++ b/utils/bax2bam/src/CcsConverter.cpp
@@ -0,0 +1,117 @@
+// Author: Derek Barnett
+
+#include <iostream>
+
+#include "CcsConverter.h"
+
+#include "utils/RegionUtils.hpp"
+#include "HDFRegionTableReader.hpp"
+
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamWriter.h>
+#include <algorithm>
+
+using namespace std;
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+CcsConverter::CcsConverter(Settings& settings)
+    : ConverterBase(settings)
+{
+    settings_.usingMergeQV         = false;
+    settings_.usingDeletionTag     = false;
+    settings_.usingSubstitutionTag = false;
+    settings_.usingIPD             = false;
+    settings_.usingPulseWidth      = false;
+}
+
+CcsConverter::~CcsConverter(void) { }
+
+bool CcsConverter::ConvertFile(HdfCcsReader* reader,
+                               BamWriter* writer)
+{
+    assert(reader);
+
+    // initialize read scores
+    InitReadScores(reader);
+
+    // fetch records from HDF5 file
+    CCSSequence smrtRecord;
+    while (reader->GetNext(smrtRecord)) {
+
+        // Skip empty records
+        if ((smrtRecord.length == 0) || !IsSequencingZmw(smrtRecord))
+            continue;
+
+        // attempt convert BAX to BAM
+        if (!WriteRecord(smrtRecord, 0, smrtRecord.length, ReadGroupId(), writer))
+        {
+            smrtRecord.Free();
+            return false;
+        }
+
+        smrtRecord.Free();
+    }
+
+    // if we get here, all OK
+    return true;
+}
+
+bool CcsConverter::ConvertFile(HdfCcsReader* reader,
+                               PacBio::BAM::BamWriter* writer,
+                               PacBio::BAM::BamWriter* scrapsWriter) 
+{ return false; }
+
+void CcsConverter::SetSequenceAndQualities(PacBio::BAM::BamRecordImpl* bamRecord,
+                                           const CCSSequence& smrtRead,
+                                           const int start,
+                                           const int length)
+{
+    recordSequence_.assign((const char*)smrtRead.seq + start, length);
+    if (smrtRead.qual.Empty())
+        bamRecord->SetSequenceAndQualities(recordSequence_);
+    else
+    {
+        recordQVs_.assign((uint8_t*)smrtRead.qual.data + start,
+                          (uint8_t*)smrtRead.qual.data + start + length);
+        bamRecord->SetSequenceAndQualities(recordSequence_, recordQVs_.Fastq());
+    }
+}
+
+void CcsConverter::AddRecordName(PacBio::BAM::BamRecordImpl* bamRecord,
+                                 const UInt holeNumber,
+                                 const int start,
+                                 const int end)
+{
+    const string name = settings_.movieName + "/"
+                      + to_string(holeNumber) + "/ccs";
+    bamRecord->Name(name);
+}
+
+void CcsConverter::AddModeTags(PacBio::BAM::TagCollection* tags,
+                               const CCSSequence& smrtRead,
+                               const int start,
+                               const int end)
+{
+    (*tags)["np"] = static_cast<int32_t>(smrtRead.numPasses);
+}
+
+CcsConverter::HdfCcsReader* CcsConverter::InitHdfReader()
+{
+    HdfCcsReader* reader = ConverterBase<CCSSequence, HdfCcsReader>::InitHdfReader();
+    // set the reader to CCS mode
+    reader->SetReadBasesFromCCS();
+    return reader;
+}
+
+string CcsConverter::HeaderReadType(void) const
+{ return "CCS"; }
+
+string CcsConverter::ScrapsReadType(void) const
+{ return "UNKNOWN"; }
+
+string CcsConverter::OutputFileSuffix(void) const
+{ return ".ccs.bam"; }
+
+string CcsConverter::ScrapsFileSuffix(void) const
+{ return ".empty.bam"; }
diff --git a/utils/bax2bam/src/CcsConverter.h b/utils/bax2bam/src/CcsConverter.h
new file mode 100644
index 0000000..aceabb3
--- /dev/null
+++ b/utils/bax2bam/src/CcsConverter.h
@@ -0,0 +1,47 @@
+// Author: Derek Barnett
+
+#ifndef CCSCONVERTER_H
+#define CCSCONVERTER_H
+
+#include "ConverterBase.h"
+#include "CCSSequence.hpp"
+#include "HDFCCSReader.hpp"
+
+class CcsConverter : public ConverterBase<CCSSequence, HDFCCSReader<CCSSequence>>
+{
+private:
+    typedef HDFCCSReader<CCSSequence> HdfCcsReader;
+
+public:
+    CcsConverter(Settings& settings);
+    ~CcsConverter(void);
+
+protected:
+    bool ConvertFile(HdfCcsReader* reader,
+                     PacBio::BAM::BamWriter* writer);
+    bool ConvertFile(HdfCcsReader* reader,
+                     PacBio::BAM::BamWriter* writer,
+                     PacBio::BAM::BamWriter* scrapsWriter);
+    void SetSequenceAndQualities(PacBio::BAM::BamRecordImpl* bamRecord,
+                                 const CCSSequence& smrtRecord,
+                                 const int start,
+                                 const int end);
+    void AddRecordName(PacBio::BAM::BamRecordImpl* bamRecord,
+                       const UInt holeNumber,
+                       const int start,
+                       const int end);
+    void AddModeTags(PacBio::BAM::TagCollection* tags,
+                     const CCSSequence& smrtRecord,
+                     const int start,
+                     const int end);
+    HdfCcsReader* InitHdfReader(void);
+    std::string HeaderReadType(void) const;
+    std::string ScrapsReadType(void) const;
+    std::string OutputFileSuffix(void) const;
+    std::string ScrapsFileSuffix(void) const;
+
+protected:
+    PacBio::BAM::QualityValues recordQVs_;
+};
+
+#endif // CCSCONVERTER_H
diff --git a/utils/bax2bam/src/ConverterBase.h b/utils/bax2bam/src/ConverterBase.h
new file mode 100644
index 0000000..e2cafd5
--- /dev/null
+++ b/utils/bax2bam/src/ConverterBase.h
@@ -0,0 +1,1019 @@
+// Author: Derek Barnett
+
+#ifndef CONVERTERBASE_H
+#define CONVERTERBASE_H
+
+#include "IConverter.h"
+#include "Settings.h"
+#include "HDFBasReader.hpp"
+#include <pbbam/BamFile.h>
+#include <pbbam/BamHeader.h>
+#include <pbbam/BamWriter.h>
+#include <pbbam/PbiFile.h>
+#include <pbbam/ReadGroupInfo.h>
+#include <pbbam/Tag.h>
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/xml_parser.hpp>
+#include <libgen.h>
+#include <cstdlib>
+#include <climits>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+class BamRecordImpl;
+
+} // namespace BAM
+} // namespace PacBio
+
+template<typename RecordType = SMRTSequence, typename HdfReader = HDFBasReader>
+class ConverterBase : public IConverter
+{
+public:
+    ~ConverterBase(void);
+
+public:
+    virtual bool Run(void) final;
+
+protected:
+    ConverterBase(Settings& settings);
+
+    virtual bool ConvertFile(HdfReader* reader,
+                             PacBio::BAM::BamWriter* writer) =0;
+
+    virtual bool ConvertFile(HdfReader* reader,
+                             PacBio::BAM::BamWriter* writer,
+                             PacBio::BAM::BamWriter* scrapsWriter) =0;
+
+    virtual bool ConvertRecord(const RecordType& smrtRecord,
+                               const int start,
+                               const int end,
+                               const std::string& rgId,
+                               PacBio::BAM::BamRecordImpl* bamRecord);
+
+    virtual bool WriteRecord(const RecordType& smrtRecord,
+                             const int recordStart,
+                             const int recordEnd,
+                             const std::string& readGroupId,
+                             PacBio::BAM::BamWriter* writer);
+
+    virtual bool WriteFilteredRecord(const RecordType& smrtRecord,
+                                     const int recordStart,
+                                     const int recordEnd,
+                                     const std::string& readGroupId,
+                                     PacBio::BAM::BamWriter* writer);
+
+    virtual bool WriteFilteredRecord(const RecordType& smrtRecord,
+                                     const int recordStart,
+                                     const int recordEnd,
+                                     const std::string& readGroupId,
+                                     const uint8_t contextFlags,
+                                     PacBio::BAM::BamWriter* writer);
+
+    virtual bool WriteLowQualityRecord(const RecordType& smrtRecord,
+                                       const int recordStart,
+                                       const int recordEnd,
+                                       const std::string& readGroupId,
+                                       PacBio::BAM::BamWriter* writer);
+
+    virtual bool WriteAdapterRecord(const RecordType& smrtRecord,
+                                    const int recordStart,
+                                    const int recordEnd,
+                                    const std::string& readGroupId,
+                                    PacBio::BAM::BamWriter* writer);
+
+    virtual bool WriteSubreadRecord(const RecordType& smrtRecord,
+                                    const int recordStart,
+                                    const int recordEnd,
+                                    const std::string& readGroupId,
+                                    const uint8_t contextFlags,
+                                    PacBio::BAM::BamWriter* writer);
+
+    virtual void SetSequenceAndQualities(PacBio::BAM::BamRecordImpl* bamRecord,
+                                         const RecordType& smrtRecord,
+                                         const int start,
+                                         const int length);
+
+    virtual void AddRecordName(PacBio::BAM::BamRecordImpl* bamRecord,
+                               const UInt holeNumber,
+                               const int start,
+                               const int end);
+
+    virtual void AddModeTags(PacBio::BAM::TagCollection* tags,
+                             const RecordType& smrtRecord,
+                             const int start,
+                             const int end);
+
+    virtual HdfReader* InitHdfReader(void);
+    virtual void InitReadScores(HdfReader* reader) final;
+
+    virtual bool IsSequencingZmw(const RecordType& record) const final;
+
+    virtual bool LoadChemistryFromMetadataXML(const std::string& baxFn,
+                                              const std::string& movieName) final; 
+
+    virtual std::string HeaderReadType(void) const =0;
+    virtual std::string ScrapsReadType(void) const =0;
+    virtual std::string OutputFileSuffix(void) const =0;
+    virtual std::string ScrapsFileSuffix(void) const =0;
+
+    // Settings variable accessors
+    virtual std::string MovieName(void);
+    virtual std::string ReadGroupId(void);
+    virtual std::string ScrapsReadGroupId(void);
+
+protected:
+    std::vector<HdfReader*> readers_;
+    std::map<HdfReader*, std::string> filenameForReader_;
+
+    std::vector<float> readScores_;
+    std::map<UInt, size_t> indexForHoleNumber_; // helper table for read scores (holenumber -> vector index)
+
+    // re-used containers
+    PacBio::BAM::BamRecordImpl bamRecord_;
+    std::string recordSequence_;
+    PacBio::BAM::QualityValues recordDeletionQVs_;
+    PacBio::BAM::QualityValues recordInsertionQVs_;
+    PacBio::BAM::QualityValues recordMergeQVs_;
+    PacBio::BAM::QualityValues recordSubstitutionQVs_;
+    std::string recordDeletionTags_;
+    std::string recordSubstitutionTags_;
+    std::vector<uint16_t> recordRawIPDs_;
+    std::vector<uint8_t> recordEncodedIPDs_;
+    std::vector<uint16_t> recordRawPulseWidths_;
+    std::vector<uint8_t> recordEncodedPulseWidths_;
+
+    // IPD downsampling
+    std::vector<uint16_t> framepoints_;
+    std::vector<uint8_t> frameToCode_;
+    uint16_t maxFramepoint_;
+
+    // store tags
+    //
+    //     i: signed32, I: unsigned32, C: unsigned8, A: ASCII, Z: string
+    //
+    // qs:i - 0-based start of query in the polymerase read
+    // qe:i - 0-based end of query in the polymerase read
+    // zm:i - ZMW hole number
+    // np:i - NumPasses (1 for subreads, variable for CCS)
+    // rq:i - float in [0.0,1.0] encoding expected accuracy
+    // dq:Z - DeletionQV
+    // dt:Z - DeletionTag
+    // iq:Z - InsertionQV
+    // mq:Z - MergeQV
+    // sq:Z - SubstitutionQV
+    // st:Z - SubstitutionTag
+    // ip:B,C *or* B,S - IPD (frames: 8-bit (lossy) or 16-bit (full)
+    // pw:B,C *or* B,S - PulseWidth (frames: 8-bit (lossy) or 16-bit (full)
+    // sc:A - Scrap-type
+    // sz:A - ZMW classification
+    //
+    // RG:Z - standard SAM/BAM RG tag, contains the corresponding @RG:ID
+    //
+    static const std::string Tag_zm;
+    static const std::string Tag_rq;
+    static const std::string Tag_cx;
+    static const std::string Tag_sn;
+    static const std::string Tag_dq;
+    static const std::string Tag_dt;
+    static const std::string Tag_iq;
+    static const std::string Tag_mq;
+    static const std::string Tag_sq;
+    static const std::string Tag_st;
+    static const std::string Tag_ip;
+    static const std::string Tag_pw;
+    static const std::string Tag_sc;
+    static const std::string Tag_sz;
+    static const std::string Tag_RG;
+
+    // store re-used tag values
+    //
+    // Adapter Tag    = Tag('A', ASCII_CHAR)
+    // LowQuality Tag = Tag('L', ASCII_CHAR)
+    // Filtered Tag   = Tag('F', ASCII_CHAR)
+    // NormalZMW Tag  = Tag('N', ASCII_CHAR)
+    //
+    static const PacBio::BAM::Tag lowQualityTag_;
+    static const PacBio::BAM::Tag adapterTag_;
+    static const PacBio::BAM::Tag filteredTag_;
+    static const PacBio::BAM::Tag normalZmwTag_;
+};
+
+// Static Tag-name initializers
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_zm = "zm";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_rq = "rq";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_cx = "cx";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_sn = "sn";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_dq = "dq";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_dt = "dt";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_iq = "iq";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_mq = "mq";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_sq = "sq";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_st = "st";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_ip = "ip";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_pw = "pw";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_sc = "sc";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_sz = "sz";
+template<typename RecordType, typename HdfReader>
+const std::string ConverterBase<RecordType, HdfReader>::Tag_RG = "RG";
+
+// Static Tag-Value initializers
+template<typename RecordType, typename HdfReader>
+const PacBio::BAM::Tag ConverterBase<RecordType, HdfReader>::lowQualityTag_ =
+        PacBio::BAM::Tag('L', PacBio::BAM::TagModifier::ASCII_CHAR);
+
+template<typename RecordType, typename HdfReader>
+const PacBio::BAM::Tag ConverterBase<RecordType, HdfReader>::adapterTag_ =
+        PacBio::BAM::Tag('A', PacBio::BAM::TagModifier::ASCII_CHAR);
+
+template<typename RecordType, typename HdfReader>
+const PacBio::BAM::Tag ConverterBase<RecordType, HdfReader>::filteredTag_ =
+        PacBio::BAM::Tag('F', PacBio::BAM::TagModifier::ASCII_CHAR);
+
+template<typename RecordType, typename HdfReader>
+const PacBio::BAM::Tag ConverterBase<RecordType, HdfReader>::normalZmwTag_ =
+        PacBio::BAM::Tag('N', PacBio::BAM::TagModifier::ASCII_CHAR);
+
+// Constructor
+template<typename RecordType, typename HdfReader>
+ConverterBase<RecordType, HdfReader>::ConverterBase(Settings& settings)
+    : IConverter(settings)
+{ }
+
+// Destructor
+template<typename RecordType, typename HdfReader>
+ConverterBase<RecordType, HdfReader>::~ConverterBase(void)
+{
+    auto end  = readers_.end();
+    for (auto iter = readers_.begin(); iter != end; ++iter) {
+        HdfReader* r = (*iter);
+        if (r) {
+            r->Close();
+            delete r;
+            r = 0;
+        }
+    }
+    readers_.clear();
+}
+
+template<typename RecordType, typename HdfReader>
+std::string ConverterBase<RecordType, HdfReader>::MovieName(void)
+{
+    return settings_.movieName;
+}
+
+template<typename RecordType, typename HdfReader>
+std::string ConverterBase<RecordType, HdfReader>::ReadGroupId(void)
+{
+    return settings_.readGroupId;
+}
+
+template<typename RecordType, typename HdfReader>
+std::string ConverterBase<RecordType, HdfReader>::ScrapsReadGroupId(void)
+{
+    return settings_.scrapsReadGroupId;
+}
+
+template<typename RecordType, typename HdfReader>
+bool ConverterBase<RecordType, HdfReader>::ConvertRecord(
+        const RecordType& smrtRead,
+        const int subreadStart,
+        const int subreadEnd,
+        const std::string& rgId,
+        PacBio::BAM::BamRecordImpl* bamRecord)
+{
+    using namespace PacBio;
+    using namespace PacBio::BAM;
+    using namespace std;
+
+    // sanity check
+    assert(bamRecord);
+
+    const UInt holeNumber   = smrtRead.zmwData.holeNumber;
+    const DNALength length = subreadEnd - subreadStart;
+
+    AddRecordName(bamRecord, holeNumber, subreadStart, subreadEnd);
+
+    // store sequence
+    // NOTE - qualities are empty (per PacBio BAM spec)
+    SetSequenceAndQualities(bamRecord, smrtRead, subreadStart, length);
+
+    // check settings/existence of *QV/*Tag data
+    if (settings_.usingDeletionQV && smrtRead.deletionQV.Empty())
+    {
+        AddErrorMessage("DeletionQV requested but unavailable");
+        return false;
+    }
+
+    if (settings_.usingInsertionQV && smrtRead.insertionQV.Empty())
+    {
+        AddErrorMessage("InsertionQV requested but unavailable");
+        return false;
+    }
+
+    if (settings_.usingMergeQV && smrtRead.mergeQV.Empty())
+    {
+        AddErrorMessage("MergeQV requested but unavailable");
+        return false;
+    }
+
+    if (settings_.usingSubstitutionQV && smrtRead.substitutionQV.Empty())
+    {
+        AddErrorMessage("SubstitutionQV requested but unavailable");
+        return false;
+    }
+
+    if (settings_.usingDeletionTag && smrtRead.deletionTag == nullptr)
+    {
+        AddErrorMessage("DeletionTag requested but unavailable");
+        return false;
+    }
+
+    if (settings_.usingSubstitutionTag && smrtRead.substitutionTag == nullptr)
+    {
+        AddErrorMessage("SubstitutionTag requested but unavailable");
+        return false;
+    }
+
+    if (settings_.usingIPD && smrtRead.preBaseFrames == nullptr)
+    {
+        AddErrorMessage("IPD requested but unavailable");
+        return false;
+    }
+
+    if (settings_.usingPulseWidth && smrtRead.widthInFrames == nullptr)
+    {
+        AddErrorMessage("PulseWidth requested but unavailable");
+        return false;
+    }
+
+    // fetch *QV/*Tag data
+    if (settings_.usingDeletionQV) {
+        recordDeletionQVs_.assign((uint8_t*)smrtRead.deletionQV.data + subreadStart,
+                                  (uint8_t*)smrtRead.deletionQV.data + subreadStart + length);
+    }
+    if (settings_.usingInsertionQV) {
+        recordInsertionQVs_.assign((uint8_t*)smrtRead.insertionQV.data + subreadStart,
+                                   (uint8_t*)smrtRead.insertionQV.data + subreadStart + length);
+    }
+    if (settings_.usingMergeQV) {
+        recordMergeQVs_.assign((uint8_t*)smrtRead.mergeQV.data + subreadStart,
+                               (uint8_t*)smrtRead.mergeQV.data + subreadStart + length);
+    }
+    if (settings_.usingSubstitutionQV) {
+        recordSubstitutionQVs_.assign((uint8_t*)smrtRead.substitutionQV.data + subreadStart,
+                                      (uint8_t*)smrtRead.substitutionQV.data + subreadStart + length);
+    }
+    if (settings_.usingDeletionTag) {
+        recordDeletionTags_.assign((char*)smrtRead.deletionTag + subreadStart,
+                                   (char*)smrtRead.deletionTag + subreadStart + length);
+    }
+    if (settings_.usingSubstitutionTag) {
+        recordSubstitutionTags_.assign((char*)smrtRead.substitutionTag + subreadStart,
+                                       (char*)smrtRead.substitutionTag + subreadStart + length);
+    }
+
+    // fetch IPDs, then maybe encode
+    if (settings_.usingIPD) {
+        recordRawIPDs_.assign((uint16_t*)smrtRead.preBaseFrames + subreadStart,
+                              (uint16_t*)smrtRead.preBaseFrames + subreadStart + length);
+
+        // if not using full data, encode
+        if (!settings_.losslessFrames)
+            recordEncodedIPDs_ = std::move(Frames::Encode(recordRawIPDs_));
+    }
+
+    // fetch PulseWidths, then maybe encode
+    if (settings_.usingPulseWidth) {
+        recordRawPulseWidths_.assign((uint16_t*)smrtRead.widthInFrames + subreadStart,
+                                     (uint16_t*)smrtRead.widthInFrames + subreadStart + length);
+
+        // if not using full data, encode
+        if (!settings_.losslessFrames)
+            recordEncodedPulseWidths_ = std::move(Frames::Encode(recordRawPulseWidths_));
+    }
+
+    TagCollection tags;
+    tags[Tag_RG] = rgId;
+    tags[Tag_zm] = static_cast<int32_t>(holeNumber);
+
+    // HQRegionSNR, TODO: should I do this in AddModeTags?
+    if (HeaderReadType() != "CCS")
+    {
+        // Stored as 'ACGT' in BAM, no fixed order in SMRTSequence
+        vector<float> hqSnr = { smrtRead.HQRegionSnr('A'),
+                                smrtRead.HQRegionSnr('C'),   
+                                smrtRead.HQRegionSnr('G'),   
+                                smrtRead.HQRegionSnr('T')};
+        tags[Tag_sn] = hqSnr;
+    }
+
+    AddModeTags(&tags, smrtRead, subreadStart, subreadEnd);
+
+    if (!readScores_.empty())
+        tags[Tag_rq] = static_cast<float>(readScores_.at(indexForHoleNumber_[holeNumber]));
+    else
+        tags[Tag_rq] = static_cast<float>(0.0f);
+
+    if (settings_.usingDeletionQV)      tags[Tag_dq] = recordDeletionQVs_.Fastq();
+    if (settings_.usingDeletionTag)     tags[Tag_dt] = recordDeletionTags_;
+    if (settings_.usingInsertionQV)     tags[Tag_iq] = recordInsertionQVs_.Fastq();
+    if (settings_.usingMergeQV)         tags[Tag_mq] = recordMergeQVs_.Fastq();
+    if (settings_.usingSubstitutionQV)  tags[Tag_sq] = recordSubstitutionQVs_.Fastq();
+    if (settings_.usingSubstitutionTag) tags[Tag_st] = recordSubstitutionTags_;
+
+    if (settings_.usingIPD) {
+        if (settings_.losslessFrames)
+            tags[Tag_ip] = recordRawIPDs_;
+        else
+            tags[Tag_ip] = recordEncodedIPDs_;
+
+    }
+
+    if (settings_.usingPulseWidth) {
+        if (settings_.losslessFrames)
+            tags[Tag_pw] = recordRawPulseWidths_;
+        else
+            tags[Tag_pw] = recordEncodedPulseWidths_;
+    }
+
+    bamRecord->Tags(tags);
+
+    // if we get here, everything should be OK
+    return true;
+}
+
+template<typename RecordType, typename HdfReader>
+bool ConverterBase<RecordType, HdfReader>::WriteRecord(const RecordType& smrtRecord,
+                                                       const int recordStart,
+                                                       const int recordEnd,
+                                                       const std::string& readGroupId,
+                                                       PacBio::BAM::BamWriter* writer)
+{
+    // attempt convert BAX to BAM
+    if (!ConvertRecord(smrtRecord,
+                       recordStart,
+                       recordEnd,
+                       readGroupId,
+                       &bamRecord_))
+    {
+        return false;
+    }
+
+    // attempt write BAM to file
+    try {
+        writer->Write(bamRecord_);
+    } catch (std::exception&) {
+        AddErrorMessage("failed to write BAM record");
+        return false;
+    }
+
+    return true;
+}
+
+template<typename RecordType, typename HdfReader>
+bool ConverterBase<RecordType, HdfReader>::WriteFilteredRecord(const RecordType& smrtRecord,
+                                                               const int recordStart,
+                                                               const int recordEnd,
+                                                               const std::string& readGroupId,
+                                                               PacBio::BAM::BamWriter* writer)
+{
+    // attempt convert BAX to BAM
+    if (!ConvertRecord(smrtRecord,
+                       recordStart,
+                       recordEnd,
+                       readGroupId,
+                       &bamRecord_))
+    {
+        return false;
+    }
+
+    // add scrap tags
+    if (!bamRecord_.AddTag(Tag_sz, normalZmwTag_))
+    {
+        AddErrorMessage("failed to add scrap's zmw classification tag");
+        return false;
+    }
+    if (!bamRecord_.AddTag(Tag_sc, filteredTag_))
+    {
+        AddErrorMessage("failed to add scrap's filtered tag");
+        return false;
+    }
+
+    // attempt write BAM to file
+    try {
+        writer->Write(bamRecord_);
+    } catch (std::exception&) {
+        AddErrorMessage("failed to write BAM record");
+        return false;
+    }
+
+    return true;
+}
+
+template<typename RecordType, typename HdfReader>
+bool ConverterBase<RecordType, HdfReader>::WriteFilteredRecord(const RecordType& smrtRecord,
+                                                               const int recordStart,
+                                                               const int recordEnd,
+                                                               const std::string& readGroupId,
+                                                               const uint8_t contextFlags,
+                                                               PacBio::BAM::BamWriter* writer)
+{
+    // attempt convert BAX to BAM
+    if (!ConvertRecord(smrtRecord,
+                       recordStart,
+                       recordEnd,
+                       readGroupId,
+                       &bamRecord_))
+    {
+        return false;
+    }
+
+    // add scrap tags
+    if (!bamRecord_.AddTag(Tag_sz, normalZmwTag_))
+    {
+        AddErrorMessage("failed to add scrap's zmw classification tag");
+        return false;
+    }
+    if (!bamRecord_.AddTag(Tag_sc, filteredTag_))
+    {
+        AddErrorMessage("failed to add scrap's filtered tag");
+        return false;
+    }
+
+    // add context tag
+    if (!bamRecord_.AddTag(Tag_cx, contextFlags))
+    {
+        AddErrorMessage("failed to add context flag tag");
+        return false;
+    }
+
+    // attempt write BAM to file
+    try {
+        writer->Write(bamRecord_);
+    } catch (std::exception&) {
+        AddErrorMessage("failed to write BAM record");
+        return false;
+    }
+
+    return true;
+}
+
+template<typename RecordType, typename HdfReader>
+bool ConverterBase<RecordType, HdfReader>::WriteLowQualityRecord(const RecordType& smrtRecord,
+                                                                 const int recordStart,
+                                                                 const int recordEnd,
+                                                                 const std::string& readGroupId,
+                                                                 PacBio::BAM::BamWriter* writer)
+{
+    // attempt convert BAX to BAM
+    if (!ConvertRecord(smrtRecord,
+                       recordStart,
+                       recordEnd,
+                       readGroupId,
+                       &bamRecord_))
+    {
+        return false;
+    }
+
+    // add scrap tags
+    if (!bamRecord_.AddTag(Tag_sz, normalZmwTag_))
+    {
+        AddErrorMessage("failed to add scrap's zmw classification tag");
+        return false;
+    }
+    if (!bamRecord_.AddTag(Tag_sc, lowQualityTag_))
+    {
+        AddErrorMessage("failed to add scrap's low-quality region tag");
+        return false;
+    }
+
+    // attempt write BAM to file
+    try {
+        writer->Write(bamRecord_);
+    } catch (std::exception&) {
+        AddErrorMessage("failed to write BAM record");
+        return false;
+    }
+
+    return true;
+}
+
+template<typename RecordType, typename HdfReader>
+bool ConverterBase<RecordType, HdfReader>::WriteAdapterRecord(const RecordType& smrtRecord,
+                                                              const int recordStart,
+                                                              const int recordEnd,
+                                                              const std::string& readGroupId,
+                                                              PacBio::BAM::BamWriter* writer)
+{
+    // attempt convert BAX to BAM
+    if (!ConvertRecord(smrtRecord,
+                       recordStart,
+                       recordEnd,
+                       readGroupId,
+                       &bamRecord_))
+    {
+        return false;
+    }
+
+    // add scrap tags
+    if (!bamRecord_.AddTag(Tag_sz, normalZmwTag_))
+    {
+        AddErrorMessage("failed to add scrap's zmw classification tag");
+        return false;
+    }
+    if (!bamRecord_.AddTag(Tag_sc, adapterTag_))
+    {
+        AddErrorMessage("failed to add scrap's adapter tag");
+        return false;
+    }
+
+    // attempt write BAM to file
+    try {
+        writer->Write(bamRecord_);
+    } catch (std::exception&) {
+        AddErrorMessage("failed to write BAM record");
+        return false;
+    }
+
+    return true;
+}
+
+template<typename RecordType, typename HdfReader>
+bool ConverterBase<RecordType, HdfReader>::WriteSubreadRecord(const RecordType& smrtRecord,
+                                                              const int recordStart,
+                                                              const int recordEnd,
+                                                              const std::string& readGroupId,
+                                                              const uint8_t contextFlags,
+                                                              PacBio::BAM::BamWriter* writer)
+{
+    // attempt convert BAX to BAM
+    if (!ConvertRecord(smrtRecord,
+                       recordStart,
+                       recordEnd,
+                       readGroupId,
+                       &bamRecord_))
+    {
+        return false;
+    }
+
+    // Try to add the additional tag supplied by the caller
+    if (!bamRecord_.AddTag(Tag_cx, contextFlags))
+    {
+        AddErrorMessage("failed to add context flag tag");
+        return false;
+    }
+
+    // attempt write BAM to file
+    try {
+        writer->Write(bamRecord_);
+    } catch (std::exception&) {
+        AddErrorMessage("failed to write BAM record");
+        return false;
+    }
+
+    return true;
+}
+
+template<typename RecordType, typename HdfReader>
+void ConverterBase<RecordType, HdfReader>::SetSequenceAndQualities(
+        PacBio::BAM::BamRecordImpl* bamRecord,
+        const RecordType& smrtRead,
+        const int start,
+        const int length)
+{
+    recordSequence_.assign((const char*)smrtRead.seq + start, length);
+    bamRecord->SetSequenceAndQualities(recordSequence_);
+}
+
+template<typename RecordType, typename HdfReader>
+void ConverterBase<RecordType, HdfReader>::AddRecordName(
+        PacBio::BAM::BamRecordImpl* bamRecord,
+        const UInt holeNumber,
+        const int start,
+        const int end)
+{
+    const string name = settings_.movieName + "/"
+                      + to_string(holeNumber) + "/"
+                      + to_string(start) + "_"
+                      + to_string(end);
+    bamRecord->Name(name);
+}
+
+template<typename RecordType, typename HdfReader>
+void ConverterBase<RecordType, HdfReader>::AddModeTags(
+        PacBio::BAM::TagCollection* tags,
+        const RecordType& smrtRead,
+        const int start,
+        const int end)
+{
+    (*tags)["qs"] = start;
+    (*tags)["qe"] = end;
+    (*tags)["np"] = static_cast<int32_t>(1);
+}
+
+template<typename RecordType, typename HdfReader>
+HdfReader* ConverterBase<RecordType, HdfReader>::InitHdfReader(void)
+{
+    HdfReader* reader = new HdfReader;
+    reader->IncludeField("Basecall");
+    if (HeaderReadType() != "CCS")      reader->IncludeField("HQRegionSNR");
+    if (settings_.usingDeletionQV)      reader->IncludeField("DeletionQV");
+    if (settings_.usingDeletionTag)     reader->IncludeField("DeletionTag");
+    if (settings_.usingInsertionQV)     reader->IncludeField("InsertionQV");
+    if (settings_.usingIPD)             reader->IncludeField("PreBaseFrames");
+    if (settings_.usingMergeQV)         reader->IncludeField("MergeQV");
+    if (settings_.usingPulseWidth)      reader->IncludeField("WidthInFrames");
+    if (settings_.usingSubstitutionQV)  reader->IncludeField("SubstitutionQV");
+    if (settings_.usingSubstitutionTag) reader->IncludeField("SubstitutionTag");
+    return reader;
+}
+
+template<typename RecordType, typename HdfReader>
+void ConverterBase<RecordType, HdfReader>::InitReadScores(HdfReader* reader)
+{
+    assert(reader);
+
+    // fetch read scores
+    readScores_.clear();
+    if (reader->baseCallsGroup.ContainsObject("ZMWMetrics")) {
+        HDFGroup zmwMetricsGroup;
+        if (zmwMetricsGroup.Initialize(reader->baseCallsGroup.group, "ZMWMetrics")) {
+            if (zmwMetricsGroup.ContainsObject("ReadScore")) {
+                HDFArray<float> readScoresArray;
+                if (readScoresArray.InitializeForReading(zmwMetricsGroup, "ReadScore"))
+                    readScoresArray.ReadDataset(readScores_);
+            }
+        }
+    }
+
+    // init holenumber -> index lookup
+    indexForHoleNumber_.clear();
+    if (!readScores_.empty()) {
+        for (size_t i = 0; i < readScores_.size(); ++i) {
+            UInt holeNumber;
+            reader->zmwReader.GetHoleNumberAt(i, holeNumber);
+            indexForHoleNumber_[holeNumber] = i;
+        }
+    }
+}
+
+template<typename RecordType, typename HdfReader>
+bool ConverterBase<RecordType, HdfReader>::IsSequencingZmw(const RecordType& record) const
+{ return record.zmwData.holeStatus == 0; }
+
+template<typename RecordType, typename HdfReader>
+bool ConverterBase<RecordType, HdfReader>::LoadChemistryFromMetadataXML(
+        const std::string& baxFn,
+        const std::string& movieName)
+{
+    using boost::property_tree::ptree;
+
+    // get the absolute path of the bax.h5 file and go up 2 directories
+    char buf[PATH_MAX + 1];
+    char *res = realpath(baxFn.c_str(), buf);
+
+    if (res == nullptr)
+        return false;
+
+    // up 1
+    res = dirname(res);
+
+    if (res == nullptr)
+        return false;
+
+    // up 2
+    res = dirname(res);
+
+    if (res == nullptr)
+        return false;
+
+    std::string prefix(res);
+    std::string path = prefix + '/' + movieName + ".metadata.xml";
+    ptree pt;
+
+    try
+    {
+        read_xml(path, pt);
+
+        bindingKit_        = pt.get<std::string>("Metadata.BindingKit.PartNumber");
+        sequencingKit_     = pt.get<std::string>("Metadata.SequencingKit.PartNumber");
+        basecallerVersion_ = pt.get<std::string>("Metadata.InstCtrlVer");
+
+        // throws if invalid chemistry triple
+        // we'll take the opportunity to exit early with error message
+        using PacBio::BAM::ReadGroupInfo;
+        auto chemistryCheck = ReadGroupInfo::SequencingChemistryFromTriple(bindingKit_,
+                                                                           sequencingKit_,
+                                                                           basecallerVersion_);
+        return true;
+    }
+    catch (PacBio::BAM::InvalidSequencingChemistryException& e) {
+        AddErrorMessage(e.what());
+        return false;
+    }
+    catch (...)
+    { }
+
+    return false;
+}
+
+template<typename RecordType, typename HdfReader>
+bool ConverterBase<RecordType, HdfReader>::Run(void)
+{
+    using namespace PacBio;
+    using namespace PacBio::BAM;
+    using namespace std;
+
+    set<string> movieNames;
+
+    // initialize input BAX readers
+    const auto baxEnd = settings_.inputBaxFilenames.cend();
+    for (auto baxIter = settings_.inputBaxFilenames.cbegin(); baxIter != baxEnd; ++baxIter) {
+        const string& baxFn = (*baxIter);
+        if (baxFn.empty())
+            continue;
+
+        HdfReader* reader = InitHdfReader();
+        
+        // read in mandatory ReadGroupInfo from bax file
+        if (reader->Initialize(baxFn) &&
+            reader->scanDataReader.fileHasScanData &&
+            reader->scanDataReader.initializedRunInfoGroup)
+        {
+            // FrameRate
+            {
+                HDFAtom<float> frAtom;
+                if (reader->scanDataReader.acqParamsGroup.ContainsAttribute("FrameRate") &&
+                    frAtom.Initialize(reader->scanDataReader.acqParamsGroup, "FrameRate"))
+                {
+                    float localFrameRate;
+                    frAtom.Read(localFrameRate);
+                    frAtom.dataspace.close();
+                    frameRateHz_ = std::to_string(localFrameRate);
+                } else {
+                    AddErrorMessage("FrameRate is mandatory but unavailable");
+                    return false;
+                }
+            }
+
+            // chemistry triple success flag
+            bool success = false;
+
+            // BindingKit
+            {
+                HDFAtom<std::string> bkAtom;
+                if (reader->scanDataReader.runInfoGroup.ContainsAttribute("BindingKit") &&
+                    bkAtom.Initialize(reader->scanDataReader.runInfoGroup, "BindingKit"))
+                {
+                    bkAtom.Read(bindingKit_);
+                    bkAtom.dataspace.close();
+                } else {
+                    goto fallback;
+                }
+            }
+
+            // SequencingKit
+            {
+                HDFAtom<std::string> skAtom;
+                if (reader->scanDataReader.runInfoGroup.ContainsAttribute("SequencingKit") &&
+                    skAtom.Initialize(reader->scanDataReader.runInfoGroup, "SequencingKit"))
+                {
+                    skAtom.Read(sequencingKit_);
+                    skAtom.dataspace.close();
+                } else {
+                    goto fallback;
+                }
+            }
+
+            // basecaller ChangeListID
+            {
+                HDFGroup bcGroup;
+                if (reader->pulseDataGroup.ContainsObject("BaseCalls") &&
+                    bcGroup.Initialize(reader->pulseDataGroup.group, "BaseCalls"))
+                {
+                    HDFAtom<std::string> clAtom;
+                    if (bcGroup.ContainsAttribute("ChangeListID") &&
+                        clAtom.Initialize(bcGroup.group, "ChangeListID"))
+                    {
+                        clAtom.Read(basecallerVersion_);
+                        clAtom.dataspace.close();
+                        success = true;
+                    }
+                    bcGroup.Close();
+                }
+            }
+
+fallback:
+            if (!success && !LoadChemistryFromMetadataXML(baxFn, reader->GetMovieName()))
+            {
+                AddErrorMessage("BindingKit, SequencingKit, and ChangeListID are mandatory but unavailable");
+                return false;
+            }
+
+        } else {
+            delete reader;
+            AddErrorMessage("Failed to properly initialize HDFBasReader");
+            return false;
+        }
+
+        movieNames.insert(reader->GetMovieName());
+        readers_.push_back(reader);
+        filenameForReader_[reader] = baxFn;
+    }
+
+    if (readers_.empty()) {
+        AddErrorMessage("could not open BAX file(s)");
+        return false;
+    }
+
+    // sanity check that BAX files come from same movie
+    if (movieNames.size() != 1) {
+        AddErrorMessage("multiple movies detected:");
+        for (const auto m : movieNames)
+            AddErrorMessage(string("    ")+m);
+        return false;
+    }
+    settings_.movieName = (*movieNames.cbegin());
+
+    // Use the movie name to initialize the ReadGroupId
+    settings_.readGroupId = MakeReadGroupId(MovieName(), HeaderReadType());
+
+    // initialize output file(s)
+    if (settings_.outputBamPrefix.empty())
+        settings_.outputBamPrefix = settings_.movieName;
+    settings_.outputBamFilename = settings_.outputBamPrefix + OutputFileSuffix();
+
+    // Separate single-output from dual-output jobs
+    if (HeaderReadType() == "SUBREAD" || HeaderReadType() == "HQREGION")
+    {
+        // setup scram BAM file info
+        settings_.scrapsReadGroupId = MakeReadGroupId(MovieName(), ScrapsReadType());
+        settings_.scrapsBamFilename = settings_.outputBamPrefix + ScrapsFileSuffix();
+
+        // main conversion of BAX -> BAM records for dual-output jobs
+        try {
+            BamWriter writer(settings_.outputBamFilename, CreateHeader(HeaderReadType()));
+            BamWriter scrapsWriter(settings_.scrapsBamFilename, CreateHeader(ScrapsReadType()));
+
+            for (HdfReader* reader : readers_) {
+                assert(reader);
+                if (!ConvertFile(reader, &writer, &scrapsWriter))
+                    return false;
+            }
+        } catch (std::exception&) {
+            // TODO: get more helpful message here
+            AddErrorMessage("failed to convert BAM file");
+            return false;
+        }
+
+        // make PBI files
+        PbiFile::CreateFrom(BamFile{ settings_.outputBamFilename });
+        PbiFile::CreateFrom(BamFile{ settings_.scrapsBamFilename });
+
+    } else { 
+
+        assert(settings_.scrapsBamFilename.empty());
+
+        // main conversion of BAX -> BAM records for single-output jobs
+        try {
+            BamWriter writer(settings_.outputBamFilename, CreateHeader(HeaderReadType()));
+          
+            for (HdfReader* reader : readers_) {
+                assert(reader);
+                if (!ConvertFile(reader, &writer))
+                    return false;
+            }
+        } catch (std::exception&) {
+            // TODO: get more helpful message here
+            AddErrorMessage("failed to convert BAM file");
+            return false;
+        }
+
+        // make PBI file
+        PbiFile::CreateFrom(BamFile{ settings_.outputBamFilename });
+    }
+
+    // if we get here, return success
+    return true;
+}
+
+#endif
diff --git a/utils/bax2bam/src/HqRegionConverter.cpp b/utils/bax2bam/src/HqRegionConverter.cpp
new file mode 100644
index 0000000..d8e0306
--- /dev/null
+++ b/utils/bax2bam/src/HqRegionConverter.cpp
@@ -0,0 +1,192 @@
+// Author: Derek Barnett
+
+#include "HqRegionConverter.h"
+
+#include "utils/RegionUtils.hpp"
+#include "HDFRegionTableReader.hpp"
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamWriter.h>
+#include <memory>
+#include <set>
+#include <sstream>
+
+using namespace std;
+using namespace PacBio::BAM;
+
+HqRegionConverter::HqRegionConverter(Settings& settings)
+    : ConverterBase(settings)
+{ }
+
+HqRegionConverter::~HqRegionConverter(void) { }
+
+bool HqRegionConverter::ConvertFile(HDFBasReader* reader,
+                                    PacBio::BAM::BamWriter* writer)
+{
+    return ConvertFile(reader, writer, nullptr);
+}
+
+bool HqRegionConverter::ConvertFile(HDFBasReader* reader,
+                                    PacBio::BAM::BamWriter* writer,
+                                    PacBio::BAM::BamWriter* scrapsWriter) 
+{
+    assert(reader);
+
+    // read region table info
+    std::unique_ptr<HDFRegionTableReader> const regionTableReader(new HDFRegionTableReader);
+    RegionTable regionTable;
+    std::string fn = filenameForReader_[reader];
+    assert(!fn.empty());
+    if (regionTableReader->Initialize(fn) == 0) {
+        AddErrorMessage("could not read region table on "+fn);
+        return false;
+    }
+    regionTable.Reset();
+    regionTableReader->ReadTable(regionTable);
+    regionTableReader->Close();
+
+    // initialize read scores
+    InitReadScores(reader);
+
+    // fetch records from HDF5 file
+    SMRTSequence smrtRecord;
+    int hqStart, hqEnd, score;
+    while (reader->GetNext(smrtRecord)) {
+
+        // attempt get high quality region
+        if (!LookupHQRegion(smrtRecord.zmwData.holeNumber,
+                            regionTable,
+                            hqStart,
+                            hqEnd,
+                            score))
+        {
+            stringstream s;
+            s << "could not find HQ region for hole number: " << smrtRecord.zmwData.holeNumber;
+            AddErrorMessage(s.str());
+            smrtRecord.Free();
+            return false;
+        }
+
+        // Catch and repair 1-off errors in the HQ region
+        hqEnd = (hqEnd == static_cast<int>(smrtRecord.length)-1) ? smrtRecord.length
+                                                                 : hqEnd;
+
+        // sequencing ZMW
+        if (IsSequencingZmw(smrtRecord))
+        {
+            // write HQRegion to main BAM file
+            if (hqStart < hqEnd)
+            {
+                if (!WriteRecord(smrtRecord,
+                                 hqStart,
+                                 hqEnd,
+                                 ReadGroupId(),
+                                 writer))
+                {
+                    smrtRecord.Free();
+                    return false;
+                }
+            }
+
+            // if scraps BAM file present
+            if (scrapsWriter)
+            {
+                // write 5'-end LQ sequence
+                if (hqStart > 0)
+                {
+                    if (!WriteLowQualityRecord(smrtRecord,
+                                               0,
+                                               hqStart,
+                                               ScrapsReadGroupId(),
+                                               scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                }
+
+                // write 3'-end LQ sequence
+                if (static_cast<size_t>(hqEnd) < smrtRecord.length)
+                {
+                    if (!WriteLowQualityRecord(smrtRecord,
+                                               hqEnd,
+                                               smrtRecord.length,
+                                               ScrapsReadGroupId(),
+                                               scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                }
+            }
+        }
+
+        // non-sequencing ZMW
+        else
+        {
+            assert(!IsSequencingZmw(smrtRecord));
+
+            // only write these if scraps BAM present & we are in 'internal mode'
+            if (settings_.isInternal && scrapsWriter)
+            {
+                // write 5'-end LQ sequence
+                if (hqStart > 0)
+                {
+                    if (!WriteLowQualityRecord(smrtRecord,
+                                               0,
+                                               hqStart,
+                                               ScrapsReadGroupId(),
+                                               scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                }
+
+                // write HQRegion to scraps BAM file
+                if (hqStart < hqEnd)
+                {
+                    if (!WriteFilteredRecord(smrtRecord,
+                                             hqStart,
+                                             hqEnd,
+                                             ScrapsReadGroupId(),
+                                             scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                }
+
+                // write 3'-end LQ sequence
+                if (static_cast<size_t>(hqEnd) < smrtRecord.length)
+                {
+                    if (!WriteLowQualityRecord(smrtRecord,
+                                               hqEnd,
+                                               smrtRecord.length,
+                                               ScrapsReadGroupId(),
+                                               scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                }
+            }
+        }
+
+        smrtRecord.Free();
+    }
+
+    // if we get here, all OK
+    return true;
+}
+
+string HqRegionConverter::HeaderReadType(void) const
+{ return "HQREGION"; }
+
+string HqRegionConverter::ScrapsReadType(void) const
+{ return "SCRAP"; }
+
+string HqRegionConverter::OutputFileSuffix(void) const
+{ return ".hqregions.bam"; }
+
+string HqRegionConverter::ScrapsFileSuffix(void) const
+{ return ".lqregions.bam"; }
diff --git a/utils/bax2bam/src/HqRegionConverter.h b/utils/bax2bam/src/HqRegionConverter.h
new file mode 100644
index 0000000..69522a3
--- /dev/null
+++ b/utils/bax2bam/src/HqRegionConverter.h
@@ -0,0 +1,26 @@
+// Author: Derek Barnett
+
+#ifndef HQREGIONCONVERTER_H
+#define HQREGIONCONVERTER_H
+
+#include "ConverterBase.h"
+
+class HqRegionConverter : public ConverterBase<>
+{
+public:
+    HqRegionConverter(Settings& settings);
+    ~HqRegionConverter(void);
+
+protected:
+    bool ConvertFile(HDFBasReader* reader,
+                     PacBio::BAM::BamWriter* writer);
+    bool ConvertFile(HDFBasReader* reader,
+                     PacBio::BAM::BamWriter* writer,
+                     PacBio::BAM::BamWriter* scrapsWriter);
+    std::string HeaderReadType(void) const;
+    std::string ScrapsReadType(void) const;
+    std::string OutputFileSuffix(void) const;
+    std::string ScrapsFileSuffix(void) const;
+};
+
+#endif // HQREGIONCONVERTER_H
diff --git a/utils/bax2bam/src/IConverter.cpp b/utils/bax2bam/src/IConverter.cpp
new file mode 100644
index 0000000..8313d5c
--- /dev/null
+++ b/utils/bax2bam/src/IConverter.cpp
@@ -0,0 +1,85 @@
+// Author: Derek Barnett
+
+#include "IConverter.h"
+#include <pbbam/BamRecord.h>
+#include <algorithm>
+#include <iostream>
+#include <set>
+#include <cassert>
+#include <cmath>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+IConverter::IConverter(Settings& settings)
+    : settings_(settings)
+{ }
+
+IConverter::~IConverter(void) { }
+
+void IConverter::AddErrorMessage(const std::string& e)
+{ errors_.push_back(e); }
+
+BamHeader IConverter::CreateHeader(const string& modeString)
+{
+    BamHeader header;
+
+    // @HD VN:<current SAM/BAM spec version>
+    //     SO:unsorted
+    //     pb:<current PacBio BAM spec version>
+    header.Version("1.5")
+          .SortOrder("unknown")
+          .PacBioBamVersion("3.0.2");
+
+    // @RG ID: <read group ID>
+    //     DS: READTYPE=<HQREGION|POLYMERASE|SUBREAD>[;<Tag Manifest>;BINDINGKIT=<foo>;SEQUENCINGKIT=<bar>;BASECALLERVERSION=<42>]
+    //     PL: PACBIO
+    //     PU: <movieName>
+    //
+    const PlatformModelType platform = settings_.isSequelInput ? PlatformModelType::SEQUEL
+                                                                : PlatformModelType::RS;
+    ReadGroupInfo rg(settings_.movieName, modeString, platform);
+    rg.BindingKit(bindingKit_)
+      .SequencingKit(sequencingKit_)
+      .BasecallerVersion(basecallerVersion_)
+      .FrameRateHz(frameRateHz_);
+
+    if (settings_.usingDeletionQV)      rg.BaseFeatureTag(BaseFeature::DELETION_QV,      "dq");
+    if (settings_.usingDeletionTag)     rg.BaseFeatureTag(BaseFeature::DELETION_TAG,     "dt");
+    if (settings_.usingInsertionQV)     rg.BaseFeatureTag(BaseFeature::INSERTION_QV,     "iq");
+    if (settings_.usingMergeQV)         rg.BaseFeatureTag(BaseFeature::MERGE_QV,         "mq");
+    if (settings_.usingSubstitutionQV)  rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV,  "sq");
+    if (settings_.usingSubstitutionTag) rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_TAG, "st");
+    if (settings_.usingIPD) {
+        FrameCodec codec = FrameCodec::V1;
+        if (settings_.losslessFrames)
+            codec = FrameCodec::RAW;
+        rg.IpdCodec(codec, "ip");
+    }
+    if (settings_.usingPulseWidth) {
+        FrameCodec codec = FrameCodec::V1;
+        if (settings_.losslessFrames)
+            codec = FrameCodec::RAW;
+        rg.PulseWidthCodec(codec, "pw");
+    }
+
+    header.AddReadGroup(rg);
+
+    // @PG ID:bax2bam-<version>
+    //     PN:bax2bam
+    //     CL:bax2bam <args>
+    //     DS:<description>
+    //     VN:<version>
+
+    ProgramInfo program(settings_.program + "-" + settings_.version);
+    program.Name(settings_.program)
+           .CommandLine(settings_.program + " " + settings_.args)
+           .Description(settings_.description)
+           .Version(settings_.version);
+    header.AddProgram(program);
+
+    return header;
+}
+
+std::vector<std::string> IConverter::Errors(void) const
+{ return errors_; }
diff --git a/utils/bax2bam/src/IConverter.h b/utils/bax2bam/src/IConverter.h
new file mode 100644
index 0000000..dacfd62
--- /dev/null
+++ b/utils/bax2bam/src/IConverter.h
@@ -0,0 +1,53 @@
+// Author: Derek Barnett
+
+#ifndef ICONVERTER_H
+#define ICONVERTER_H
+
+#include "Settings.h"
+#include "SMRTSequence.hpp"
+#include <pbbam/BamHeader.h>
+#include <pbbam/BamWriter.h>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+class BamRecordImpl;
+
+} // namespace BAM
+} // namespace PacBio
+
+class IConverter
+{
+public:
+    virtual ~IConverter(void);
+
+public:
+    virtual std::vector<std::string> Errors(void) const final;
+    virtual bool Run(void) =0;
+
+protected:
+    IConverter(Settings& settings);
+
+    virtual void AddErrorMessage(const std::string& e) final;
+
+    virtual PacBio::BAM::BamHeader CreateHeader(const std::string& modeString) final;
+
+    virtual std::string HeaderReadType(void) const =0;
+    virtual std::string OutputFileSuffix(void) const =0;
+
+protected:
+    // common state
+    Settings& settings_;
+    std::vector<std::string> errors_;
+
+    // run info for BamHeader creation
+    std::string bindingKit_;
+    std::string sequencingKit_;
+    std::string basecallerVersion_;
+    std::string frameRateHz_;
+};
+
+#endif // ICONVERTER_H
diff --git a/utils/bax2bam/src/OptionParser.cpp b/utils/bax2bam/src/OptionParser.cpp
new file mode 100644
index 0000000..fc73176
--- /dev/null
+++ b/utils/bax2bam/src/OptionParser.cpp
@@ -0,0 +1,562 @@
+/**
+ * Copyright (C) 2010 Johannes Weißl <jargon at molb.org>
+ * License: your favourite BSD-style license
+ *
+ * See OptionParser.h for help.
+ */
+
+#include "OptionParser.h"
+
+#include <cstdlib>
+#include <algorithm>
+#include <complex>
+#include <ciso646>
+
+#if defined(ENABLE_NLS) && ENABLE_NLS
+# include <libintl.h>
+# define _(s) gettext(s)
+#else
+# define _(s) ((const char *) (s))
+#endif
+
+using namespace std;
+
+namespace optparse {
+
+////////// auxiliary (string) functions { //////////
+class str_wrap {
+public:
+  str_wrap(const string& l, const string& r) : lwrap(l), rwrap(r) {}
+  str_wrap(const string& w) : lwrap(w), rwrap(w) {}
+  string operator() (const string& s) { return lwrap + s + rwrap; }
+  const string lwrap, rwrap;
+};
+template<typename InputIterator, typename UnaryOperator>
+static string str_join_trans(const string& sep, InputIterator begin, InputIterator end, UnaryOperator op) {
+  string buf;
+  for (InputIterator it = begin; it != end; ++it) {
+    if (it != begin)
+      buf += sep;
+    buf += op(*it);
+  }
+  return buf;
+}
+template<class InputIterator>
+static string str_join(const string& sep, InputIterator begin, InputIterator end) {
+  return str_join_trans(sep, begin, end, str_wrap(""));
+}
+static string& str_replace(string& s, const string& patt, const string& repl) {
+  size_t pos = 0, n = patt.length();
+  while (true) {
+    pos = s.find(patt, pos);
+    if (pos == string::npos)
+      break;
+    s.replace(pos, n, repl);
+    pos += repl.size();
+  }
+  return s;
+}
+static string str_replace(const string& s, const string& patt, const string& repl) {
+  string tmp = s;
+  str_replace(tmp, patt, repl);
+  return tmp;
+}
+static string str_format(const string& s, size_t pre, size_t len, bool indent_first = true) {
+  stringstream ss;
+  string p;
+  if (indent_first)
+    p = string(pre, ' ');
+
+  size_t pos = 0, linestart = 0;
+  size_t line = 0;
+  while (true) {
+    bool wrap = false;
+
+    size_t new_pos = s.find_first_of(" \n\t", pos);
+    if (new_pos == string::npos)
+      break;
+    if (s[new_pos] == '\n') {
+      pos = new_pos + 1;
+      wrap = true;
+    }
+    if (line == 1)
+      p = string(pre, ' ');
+    if (wrap || new_pos + pre > linestart + len) {
+      ss << p << s.substr(linestart, pos - linestart - 1) << endl;
+      linestart = pos;
+      line++;
+    }
+    pos = new_pos + 1;
+  }
+  ss << p << s.substr(linestart) << endl;
+  return ss.str();
+}
+static string str_inc(const string& s) {
+  stringstream ss;
+  string v = (s != "") ? s : "0";
+  long i;
+  istringstream(v) >> i;
+  ss << i+1;
+  return ss.str();
+}
+static unsigned int cols() {
+  unsigned int n = 80;
+#ifndef _WIN32
+  const char *s = getenv("COLUMNS");
+  if (s)
+    istringstream(s) >> n;
+#endif
+  return n;
+}
+static string basename(const string& s) {
+  string b = s;
+  size_t i = b.find_last_not_of('/');
+  if (i == string::npos) {
+    if (b[0] == '/')
+      b.erase(1);
+    return b;
+  }
+  b.erase(i+1, b.length()-i-1);
+  i = b.find_last_of("/");
+  if (i != string::npos)
+    b.erase(0, i+1);
+  return b;
+}
+////////// } auxiliary (string) functions //////////
+
+
+////////// class OptionParser { //////////
+OptionParser::OptionParser() :
+  _usage(_("%prog [options]")),
+  _add_help_option(true),
+  _add_version_option(true),
+  _interspersed_args(true) {}
+
+Option& OptionParser::add_option(const string& opt) {
+  const string tmp[1] = { opt };
+  return add_option(vector<string>(&tmp[0], &tmp[1]));
+}
+Option& OptionParser::add_option(const string& opt1, const string& opt2) {
+  const string tmp[2] = { opt1, opt2 };
+  return add_option(vector<string>(&tmp[0], &tmp[2]));
+}
+Option& OptionParser::add_option(const string& opt1, const string& opt2, const string& opt3) {
+  const string tmp[3] = { opt1, opt2, opt3 };
+  return add_option(vector<string>(&tmp[0], &tmp[3]));
+}
+Option& OptionParser::add_option(const vector<string>& v) {
+  _opts.resize(_opts.size()+1);
+  Option& option = _opts.back();
+  string dest_fallback;
+  for (vector<string>::const_iterator it = v.begin(); it != v.end(); ++it) {
+    if (it->substr(0,2) == "--") {
+      const string s = it->substr(2);
+      if (option.dest() == "")
+        option.dest(str_replace(s, "-", "_"));
+      option._long_opts.insert(s);
+      _optmap_l[s] = &option;
+    } else if ( it->empty() ) {
+       continue;
+    } else {
+      const string s = it->substr(1,1);
+      if (dest_fallback == "")
+        dest_fallback = s;
+      option._short_opts.insert(s);
+      _optmap_s[s] = &option;
+    }
+  }
+  if (option.dest() == "")
+    option.dest(dest_fallback);
+  return option;
+}
+
+OptionParser& OptionParser::add_option_group(const OptionGroup& group) {
+  for (list<Option>::const_iterator oit = group._opts.begin(); oit != group._opts.end(); ++oit) {
+    const Option& option = *oit;
+    for (set<string>::const_iterator it = option._short_opts.begin(); it != option._short_opts.end(); ++it)
+      _optmap_s[*it] = &option;
+    for (set<string>::const_iterator it = option._long_opts.begin(); it != option._long_opts.end(); ++it)
+      _optmap_l[*it] = &option;
+  }
+  _groups.push_back(&group);
+  return *this;
+}
+
+const Option& OptionParser::lookup_short_opt(const string& opt) const {
+  optMap::const_iterator it = _optmap_s.find(opt);
+  if (it == _optmap_s.end())
+    error(_("no such option") + string(": -") + opt);
+  return *it->second;
+}
+
+void OptionParser::handle_short_opt(const string& opt, const string& arg) {
+
+  _remaining.pop_front();
+  string value;
+
+  const Option& option = lookup_short_opt(opt);
+  if (option._nargs == 1) {
+    value = arg.substr(2);
+    if (value == "") {
+      if (_remaining.empty())
+        error("-" + opt + " " + _("option requires an argument"));
+      value = _remaining.front();
+      _remaining.pop_front();
+    }
+  } else {
+    if (arg.length() > 2)
+      _remaining.push_front(string("-") + arg.substr(2));
+  }
+
+  process_opt(option, string("-") + opt, value);
+}
+
+const Option& OptionParser::lookup_long_opt(const string& opt) const {
+
+  list<string> matching;
+  for (optMap::const_iterator it = _optmap_l.begin(); it != _optmap_l.end(); ++it) {
+    if (it->first.compare(0, opt.length(), opt) == 0)
+      matching.push_back(it->first);
+  }
+  if (matching.size() > 1) {
+    string x = str_join(", ", matching.begin(), matching.end());
+    error(_("ambiguous option") + string(": --") + opt + " (" + x + "?)");
+  }
+  if (matching.size() == 0)
+    error(_("no such option") + string(": --") + opt);
+
+  return *_optmap_l.find(matching.front())->second;
+}
+
+void OptionParser::handle_long_opt(const string& optstr) {
+
+  _remaining.pop_front();
+  string opt, value;
+
+  size_t delim = optstr.find("=");
+  if (delim != string::npos) {
+    opt = optstr.substr(0, delim);
+    value = optstr.substr(delim+1);
+  } else
+    opt = optstr;
+
+  const Option& option = lookup_long_opt(opt);
+  if (option._nargs == 1 and delim == string::npos) {
+    if (not _remaining.empty()) {
+      value = _remaining.front();
+      _remaining.pop_front();
+    }
+  }
+
+  if (option._nargs == 1 and value == "")
+    error("--" + opt + " " + _("option requires an argument"));
+
+  process_opt(option, string("--") + opt, value);
+}
+
+Values& OptionParser::parse_args(const int argc, char const* const* const argv) {
+  if (prog() == "")
+    prog(basename(argv[0]));
+  return parse_args(&argv[1], &argv[argc]);
+}
+Values& OptionParser::parse_args(const vector<string>& v) {
+
+  _remaining.assign(v.begin(), v.end());
+
+  if (add_version_option() and version() != "") {
+    add_option("--version") .action("version") .help(_("show program's version number and exit"));
+    _opts.splice(_opts.begin(), _opts, --(_opts.end()));
+  }
+  if (add_help_option()) {
+    add_option("-h", "--help") .action("help") .help(_("show this help message and exit"));
+    _opts.splice(_opts.begin(), _opts, --(_opts.end()));
+  }
+
+  while (not _remaining.empty()) {
+    const string arg = _remaining.front();
+
+    if (arg == "--") {
+      _remaining.pop_front();
+      break;
+    }
+
+    if (arg.substr(0,2) == "--") {
+      handle_long_opt(arg.substr(2));
+    } else if (arg.substr(0,1) == "-" and arg.length() > 1) {
+      handle_short_opt(arg.substr(1,1), arg);
+    } else {
+      _remaining.pop_front();
+      _leftover.push_back(arg);
+      if (not interspersed_args())
+        break;
+    }
+  }
+  while (not _remaining.empty()) {
+    const string arg = _remaining.front();
+    _remaining.pop_front();
+    _leftover.push_back(arg);
+  }
+
+  for (strMap::const_iterator it = _defaults.begin(); it != _defaults.end(); ++it) {
+    if (not _values.is_set(it->first))
+      _values[it->first] = it->second;
+  }
+
+  for (list<Option>::const_iterator it = _opts.begin(); it != _opts.end(); ++it) {
+    if (it->get_default() != "" and not _values.is_set(it->dest()))
+        _values[it->dest()] = it->get_default();
+  }
+
+  return _values;
+}
+
+void OptionParser::process_opt(const Option& o, const string& opt, const string& value) {
+  if (o.action() == "store") {
+    string err = o.check_type(opt, value);
+    if (err != "")
+      error(err);
+    _values[o.dest()] = value;
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "store_const") {
+    _values[o.dest()] = o.get_const();
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "store_true") {
+    _values[o.dest()] = "1";
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "store_false") {
+    _values[o.dest()] = "0";
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "append") {
+    string err = o.check_type(opt, value);
+    if (err != "")
+      error(err);
+    _values[o.dest()] = value;
+    _values.all(o.dest()).push_back(value);
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "append_const") {
+    _values[o.dest()] = o.get_const();
+    _values.all(o.dest()).push_back(o.get_const());
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "count") {
+    _values[o.dest()] = str_inc(_values[o.dest()]);
+    _values.is_set_by_user(o.dest(), true);
+  }
+  else if (o.action() == "help") {
+    print_help();
+    std::exit(0);
+  }
+  else if (o.action() == "version") {
+    print_version();
+    std::exit(0);
+  }
+  else if (o.action() == "callback" && o.callback()) {
+    (*o.callback())(o, opt, value, *this);
+  }
+}
+
+string OptionParser::format_option_help(unsigned int indent /* = 2 */) const {
+  stringstream ss;
+
+  if (_opts.empty())
+    return ss.str();
+
+  for (list<Option>::const_iterator it = _opts.begin(); it != _opts.end(); ++it) {
+    if (it->help() != SUPPRESS_HELP)
+      ss << it->format_help(indent);
+  }
+
+  return ss.str();
+}
+
+string OptionParser::format_help() const {
+  stringstream ss;
+
+  if (usage() != SUPPRESS_USAGE)
+    ss << get_usage() << endl;
+
+  if (description() != "")
+    ss << str_format(description(), 0, cols()) << endl;
+
+  ss << _("Options") << ":" << endl;
+  ss << format_option_help();
+
+  for (list<OptionGroup const*>::const_iterator it = _groups.begin(); it != _groups.end(); ++it) {
+    const OptionGroup& group = **it;
+    ss << endl << "  " << group.title() << ":" << endl;
+    if (group.group_description() != "")
+      ss << str_format(group.group_description(), 4, cols()) << endl;
+    ss << group.format_option_help(4);
+  }
+
+  if (epilog() != "")
+    ss << endl << str_format(epilog(), 0, cols());
+
+  return ss.str();
+}
+void OptionParser::print_help() const {
+  cout << format_help();
+}
+
+void OptionParser::set_usage(const string& u) {
+  string lower = u;
+  transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
+  if (lower.compare(0, 7, "usage: ") == 0)
+    _usage = u.substr(7);
+  else
+    _usage = u;
+}
+string OptionParser::format_usage(const string& u) const {
+  stringstream ss;
+  ss << _("Usage") << ": " << u << endl;
+  return ss.str();
+}
+string OptionParser::get_usage() const {
+  if (usage() == SUPPRESS_USAGE)
+    return string("");
+  return format_usage(str_replace(usage(), "%prog", prog()));
+}
+void OptionParser::print_usage(ostream& out) const {
+  string u = get_usage();
+  if (u != "")
+    out << u << endl;
+}
+void OptionParser::print_usage() const {
+  print_usage(cout);
+}
+
+string OptionParser::get_version() const {
+  return str_replace(_version, "%prog", prog());
+}
+void OptionParser::print_version(ostream& out) const {
+  out << get_version() << endl;
+}
+void OptionParser::print_version() const {
+  print_version(cout);
+}
+
+void OptionParser::exit() const {
+  std::exit(2);
+}
+void OptionParser::error(const string& msg) const {
+  print_usage(cerr);
+  cerr << prog() << ": " << _("error") << ": " << msg << endl;
+  exit();
+}
+////////// } class OptionParser //////////
+
+////////// class Values { //////////
+const string& Values::operator[] (const string& d) const {
+  strMap::const_iterator it = _map.find(d);
+  static const string empty = "";
+  return (it != _map.end()) ? it->second : empty;
+}
+void Values::is_set_by_user(const string& d, bool yes) {
+  if (yes)
+    _userSet.insert(d);
+  else
+    _userSet.erase(d);
+}
+////////// } class Values //////////
+
+////////// class Option { //////////
+string Option::check_type(const string& opt, const string& val) const {
+  istringstream ss(val);
+  stringstream err;
+
+  if (type() == "int" || type() == "long") {
+    long t;
+    if (not (ss >> t))
+      err << _("option") << " " << opt << ": " << _("invalid integer value") << ": '" << val << "'";
+  }
+  else if (type() == "float" || type() == "double") {
+    double t;
+    if (not (ss >> t))
+      err << _("option") << " " << opt << ": " << _("invalid floating-point value") << ": '" << val << "'";
+  }
+  else if (type() == "choice") {
+    if (find(choices().begin(), choices().end(), val) == choices().end()) {
+      list<string> tmp = choices();
+      transform(tmp.begin(), tmp.end(), tmp.begin(), str_wrap("'"));
+      err << _("option") << " " << opt << ": " << _("invalid choice") << ": '" << val << "'"
+        << " (" << _("choose from") << " " << str_join(", ", tmp.begin(), tmp.end()) << ")";
+    }
+  }
+  else if (type() == "complex") {
+    complex<double> t;
+    if (not (ss >> t))
+      err << _("option") << " " << opt << ": " << _("invalid complex value") << ": '" << val << "'";
+  }
+
+  return err.str();
+}
+
+string Option::format_option_help(unsigned int indent /* = 2 */) const {
+
+  string mvar_short, mvar_long;
+  if (nargs() == 1) {
+    string mvar = metavar();
+    if (mvar == "") {
+      mvar = type();
+      transform(mvar.begin(), mvar.end(), mvar.begin(), ::toupper);
+     }
+    mvar_short = " " + mvar;
+    mvar_long = "=" + mvar;
+  }
+
+  stringstream ss;
+  ss << string(indent, ' ');
+
+  if (not _short_opts.empty()) {
+    ss << str_join_trans(", ", _short_opts.begin(), _short_opts.end(), str_wrap("-", mvar_short));
+    if (not _long_opts.empty())
+      ss << ", ";
+  }
+  if (not _long_opts.empty())
+    ss << str_join_trans(", ", _long_opts.begin(), _long_opts.end(), str_wrap("--", mvar_long));
+
+  if ( _short_opts.empty() && _long_opts.empty() )
+      ss << metavar();
+
+
+  return ss.str();
+}
+
+string Option::format_help(unsigned int indent /* = 2 */) const {
+  stringstream ss;
+  string h = format_option_help(indent);
+  unsigned int width = cols();
+  unsigned int opt_width = min(width*3/10, 36u);
+  bool indent_first = false;
+  ss << h;
+  // if the option list is too long, start a new paragraph
+  if (h.length() >= (opt_width-1)) {
+    ss << endl;
+    indent_first = true;
+  } else {
+    ss << string(opt_width - h.length(), ' ');
+    if (help() == "")
+      ss << endl;
+  }
+  if (help() != "") {
+    string help_str = (get_default() != "") ? str_replace(help(), "%default", get_default()) : help();
+    ss << str_format(help_str, opt_width, width, indent_first);
+  }
+  return ss.str();
+}
+
+Option& Option::action(const string& a) {
+  _action = a;
+  if (a == "store_const" || a == "store_true" || a == "store_false" ||
+      a == "append_const" || a == "count" || a == "help" || a == "version")
+    nargs(0);
+  return *this;
+}
+////////// } class Option //////////
+
+}
diff --git a/utils/bax2bam/src/OptionParser.h b/utils/bax2bam/src/OptionParser.h
new file mode 100644
index 0000000..8ec6538
--- /dev/null
+++ b/utils/bax2bam/src/OptionParser.h
@@ -0,0 +1,306 @@
+/**
+ * Copyright (C) 2010 Johannes Weißl <jargon at molb.org>
+ * License: your favourite BSD-style license
+ *
+ * git clone http://github.com/weisslj/cpp-optparse.git
+ *
+ * This is yet another option parser for C++. It is modelled after the
+ * excellent Python optparse API. Although incomplete, anyone familiar to
+ * optparse should feel at home:
+ * http://docs.python.org/library/optparse.html
+ *
+ * Design decisions:
+ * - elegant and easy usage more important than speed / flexibility
+ * - shortness more important than feature completeness
+ *   * no unicode
+ *   * no checking for user programming errors
+ *
+ * Why not use getopt/getopt_long?
+ * - not C++ / not completely POSIX
+ * - too cumbersome to use, would need lot of additional code
+ *
+ * Why not use Boost.Program_options?
+ * - boost not installed on all target platforms (esp. cluster, HPC, ...)
+ * - too big to include just for option handling:
+ *   322 *.h (44750 lines) + 7 *.cpp (2078 lines)
+ *
+ * Why not use tclap/Opag/Options/CmdLine/Anyoption/Argument_helper/...?
+ * - no reason, writing one is faster than code inspection :-)
+ * - similarity to Python desired for faster learning curve
+ *
+ * Future work:
+ * - nargs > 1?
+ * - comments?
+ *
+ * Python only features:
+ * - conflict handlers
+ * - adding new actions
+ *
+ *
+ * Example:
+ *
+ * using optparse::OptionParser;
+ *
+ * OptionParser parser = OptionParser() .description("just an example");
+ *
+ * parser.add_option("-f", "--file") .dest("filename")
+ *                   .help("write report to FILE") .metavar("FILE");
+ * parser.add_option("-q", "--quiet")
+ *                   .action("store_false") .dest("verbose") .set_default("1")
+ *                   .help("don't print status messages to stdout");
+ * 
+ * optparse::Values options = parser.parse_args(argc, argv);
+ * vector<string> args = parser.args();
+ *
+ * if (options.get("verbose"))
+ *     cout << options["filename"] << endl;
+ *
+ */
+
+#ifndef OPTIONPARSER_H_
+#define OPTIONPARSER_H_
+
+#include <iostream>
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace optparse {
+
+class OptionParser;
+class OptionGroup;
+class Option;
+class Values;
+class Value;
+class Callback;
+
+typedef std::map<std::string,std::string> strMap;
+typedef std::map<std::string,std::list<std::string> > lstMap;
+typedef std::map<std::string,Option const*> optMap;
+
+const char* const SUPPRESS_HELP = "SUPPRESS" "HELP";
+const char* const SUPPRESS_USAGE = "SUPPRESS" "USAGE";
+
+//! Class for automatic conversion from string -> anytype
+class Value {
+  public:
+    Value() : str(), valid(false) {}
+    Value(const std::string& v) : str(v), valid(true) {}
+    operator const char*() { return str.c_str(); }
+    operator bool() { bool t; return (valid && (std::istringstream(str) >> t)) ? t : false; }
+    operator short() { short t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator unsigned short() { unsigned short t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator int() { int t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator unsigned int() { unsigned int t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator long() { long t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator unsigned long() { unsigned long t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator float() { float t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator double() { double t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+    operator long double() { long double t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ private:
+    const std::string str;
+    bool valid;
+};
+
+class Values {
+  public:
+    Values() : _map() {}
+    const std::string& operator[] (const std::string& d) const;
+    std::string& operator[] (const std::string& d) { return _map[d]; }
+    bool is_set(const std::string& d) const { return _map.find(d) != _map.end(); }
+    bool is_set_by_user(const std::string& d) const { return _userSet.find(d) != _userSet.end(); }
+    void is_set_by_user(const std::string& d, bool yes);
+    Value get(const std::string& d) const { return (is_set(d)) ? Value((*this)[d]) : Value(); }
+
+    typedef std::list<std::string>::iterator iterator;
+    typedef std::list<std::string>::const_iterator const_iterator;
+    std::list<std::string>& all(const std::string& d) { return _appendMap[d]; }
+    const std::list<std::string>& all(const std::string& d) const { return _appendMap.find(d)->second; }
+
+  private:
+    strMap _map;
+    lstMap _appendMap;
+    std::set<std::string> _userSet;
+};
+
+class OptionParser {
+  public:
+    OptionParser();
+    virtual ~OptionParser() {}
+
+    OptionParser& usage(const std::string& u) { set_usage(u); return *this; }
+    OptionParser& version(const std::string& v) { _version = v; return *this; }
+    OptionParser& description(const std::string& d) { _description = d; return *this; }
+    OptionParser& add_help_option(bool h) { _add_help_option = h; return *this; }
+    OptionParser& add_version_option(bool v) { _add_version_option = v; return *this; }
+    OptionParser& prog(const std::string& p) { _prog = p; return *this; }
+    OptionParser& epilog(const std::string& e) { _epilog = e; return *this; }
+    OptionParser& set_defaults(const std::string& dest, const std::string& val) {
+      _defaults[dest] = val; return *this;
+    }
+    OptionParser& enable_interspersed_args() { _interspersed_args = true; return *this; }
+    OptionParser& disable_interspersed_args() { _interspersed_args = false; return *this; }
+    OptionParser& add_option_group(const OptionGroup& group);
+
+    const std::string& usage() const { return _usage; }
+    const std::string& version() const { return _version; }
+    const std::string& description() const { return _description; }
+    bool add_help_option() const { return _add_help_option; }
+    bool add_version_option() const { return _add_version_option; }
+    const std::string& prog() const { return _prog; }
+    const std::string& epilog() const { return _epilog; }
+    bool interspersed_args() const { return _interspersed_args; }
+
+    Option& add_option(const std::string& opt);
+    Option& add_option(const std::string& opt1, const std::string& opt2);
+    Option& add_option(const std::string& opt1, const std::string& opt2, const std::string& opt3);
+    Option& add_option(const std::vector<std::string>& opt);
+
+    Values& parse_args(int argc, char const* const* argv);
+    Values& parse_args(const std::vector<std::string>& args);
+    template<typename InputIterator>
+    Values& parse_args(InputIterator begin, InputIterator end) {
+      return parse_args(std::vector<std::string>(begin, end));
+    }
+
+    const std::list<std::string>& args() const { return _leftover; }
+    std::vector<std::string> args() {
+      return std::vector<std::string>(_leftover.begin(), _leftover.end());
+    }
+
+    std::string format_help() const;
+    std::string format_option_help(unsigned int indent = 2) const;
+    void print_help() const;
+
+    void set_usage(const std::string& u);
+    std::string get_usage() const;
+    void print_usage(std::ostream& out) const;
+    void print_usage() const;
+
+    std::string get_version() const;
+    void print_version(std::ostream& out) const;
+    void print_version() const;
+
+    void error(const std::string& msg) const;
+    void exit() const;
+
+  private:
+    const Option& lookup_short_opt(const std::string& opt) const;
+    const Option& lookup_long_opt(const std::string& opt) const;
+
+    void handle_short_opt(const std::string& opt, const std::string& arg);
+    void handle_long_opt(const std::string& optstr);
+
+    void process_opt(const Option& option, const std::string& opt, const std::string& value);
+
+    std::string format_usage(const std::string& u) const;
+
+    std::string _usage;
+    std::string _version;
+    std::string _description;
+    bool _add_help_option;
+    bool _add_version_option;
+    std::string _prog;
+    std::string _epilog;
+    bool _interspersed_args;
+
+    Values _values;
+
+    std::list<Option> _opts;
+    optMap _optmap_s;
+    optMap _optmap_l;
+    strMap _defaults;
+    std::list<OptionGroup const*> _groups;
+
+    std::list<std::string> _remaining;
+    std::list<std::string> _leftover;
+};
+
+class OptionGroup : public OptionParser {
+  public:
+    OptionGroup(const OptionParser& /*p*/, const std::string& t, const std::string& d = "") :
+      //_parser(p),
+      _title(t), _group_description(d) {}
+    virtual ~OptionGroup() {}
+
+    OptionGroup& title(const std::string& t) { _title = t; return *this; }
+    OptionGroup& group_description(const std::string& d) { _group_description = d; return *this; }
+    const std::string& title() const { return _title; }
+    const std::string& group_description() const { return _group_description; }
+
+  private:
+    //const OptionParser& _parser;
+    std::string _title;
+    std::string _group_description;
+};
+
+class Option {
+  public:
+    Option() : _action("store"), _type("string"), _nargs(1), _callback(0) {}
+    virtual ~Option() {}
+
+    Option& action(const std::string& a);
+    Option& type(const std::string& t) { _type = t; return *this; }
+    Option& dest(const std::string& d) { _dest = d; return *this; }
+    Option& set_default(const std::string& d) { _default = d; return *this; }
+    template<typename T>
+    Option& set_default(T t) { std::ostringstream ss; ss << t; _default = ss.str(); return *this; }
+    Option& nargs(size_t n) { _nargs = n; return *this; }
+    Option& set_const(const std::string& c) { _const = c; return *this; }
+    template<typename InputIterator>
+    Option& choices(InputIterator begin, InputIterator end) {
+      _choices.assign(begin, end); type("choice"); return *this;
+    }
+    template<typename InputEnumerable>
+    Option& choices(InputEnumerable enumerable) {
+      _choices.assign(enumerable.begin(), enumerable.end()); type("choice"); return *this;
+    }
+    Option& help(const std::string& h) { _help = h; return *this; }
+    Option& metavar(const std::string& m) { _metavar = m; return *this; }
+    Option& callback(Callback& c) { _callback = &c; return *this; }
+
+    const std::string& action() const { return _action; }
+    const std::string& type() const { return _type; }
+    const std::string& dest() const { return _dest; }
+    const std::string& get_default() const { return _default; }
+    size_t nargs() const { return _nargs; }
+    const std::string& get_const() const { return _const; }
+    const std::list<std::string>& choices() const { return _choices; }
+    const std::string& help() const { return _help; }
+    const std::string& metavar() const { return _metavar; }
+    Callback* callback() const { return _callback; }
+
+  private:
+    std::string check_type(const std::string& opt, const std::string& val) const;
+    std::string format_option_help(unsigned int indent = 2) const;
+    std::string format_help(unsigned int indent = 2) const;
+
+    std::set<std::string> _short_opts;
+    std::set<std::string> _long_opts;
+
+    std::string _action;
+    std::string _type;
+    std::string _dest;
+    std::string _default;
+    size_t _nargs;
+    std::string _const;
+    std::list<std::string> _choices;
+    std::string _help;
+    std::string _metavar;
+    Callback* _callback;
+
+    friend class OptionParser;
+};
+
+class Callback {
+public:
+  virtual void operator() (const Option& option, const std::string& opt, const std::string& val, const OptionParser& parser) = 0;
+  virtual ~Callback() {}
+};
+
+}
+
+#endif
diff --git a/utils/bax2bam/src/PolymeraseReadConverter.cpp b/utils/bax2bam/src/PolymeraseReadConverter.cpp
new file mode 100644
index 0000000..2197706
--- /dev/null
+++ b/utils/bax2bam/src/PolymeraseReadConverter.cpp
@@ -0,0 +1,60 @@
+// Author: Derek Barnett
+
+#include "PolymeraseReadConverter.h"
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamWriter.h>
+
+using namespace std;
+
+PolymeraseReadConverter::PolymeraseReadConverter(Settings& settings)
+    : ConverterBase(settings)
+{ }
+
+PolymeraseReadConverter::~PolymeraseReadConverter(void) { }
+
+bool PolymeraseReadConverter::ConvertFile(HDFBasReader* reader,
+                                          PacBio::BAM::BamWriter* writer)
+{
+    assert(reader);
+
+    // initialize read scores
+    InitReadScores(reader);
+
+    // fetch records from HDF5 file
+    SMRTSequence smrtRecord;
+    while (reader->GetNext(smrtRecord)) {
+
+        // Skip empty records
+        if ((smrtRecord.length == 0) || !IsSequencingZmw(smrtRecord))
+            continue;
+
+        // attempt convert BAX to BAM
+        if (!WriteRecord(smrtRecord, 0, smrtRecord.length, ReadGroupId(), writer))
+        {
+            smrtRecord.Free();
+            return false;
+        }
+
+        smrtRecord.Free();
+    }
+
+    // if we get here, all OK
+    return true;
+}
+
+bool PolymeraseReadConverter::ConvertFile(HDFBasReader* reader,
+                                          PacBio::BAM::BamWriter* writer,
+                                          PacBio::BAM::BamWriter* scrapsWriter) 
+{ return false; }
+
+string PolymeraseReadConverter::HeaderReadType(void) const
+{ return "POLYMERASE"; }
+
+string PolymeraseReadConverter::ScrapsReadType(void) const
+{ return "UNKNOWN"; }
+
+string PolymeraseReadConverter::OutputFileSuffix(void) const
+{ return ".polymerase.bam"; }
+
+string PolymeraseReadConverter::ScrapsFileSuffix(void) const
+{ return ".empty.bam"; }
diff --git a/utils/bax2bam/src/PolymeraseReadConverter.h b/utils/bax2bam/src/PolymeraseReadConverter.h
new file mode 100644
index 0000000..0fda123
--- /dev/null
+++ b/utils/bax2bam/src/PolymeraseReadConverter.h
@@ -0,0 +1,26 @@
+// Author: Derek Barnett
+
+#ifndef POLYMERASEREADCONVERTER_H
+#define POLYMERASEREADCONVERTER_H
+
+#include "ConverterBase.h"
+
+class PolymeraseReadConverter : public ConverterBase<>
+{
+public:
+    PolymeraseReadConverter(Settings& settings);
+    ~PolymeraseReadConverter(void);
+
+protected:
+    bool ConvertFile(HDFBasReader* reader,
+                     PacBio::BAM::BamWriter* writer);
+    bool ConvertFile(HDFBasReader* reader,
+                     PacBio::BAM::BamWriter* writer,
+                     PacBio::BAM::BamWriter* scrapsWriter);
+    std::string HeaderReadType(void) const;
+    std::string ScrapsReadType(void) const;
+    std::string OutputFileSuffix(void) const;
+    std::string ScrapsFileSuffix(void) const;
+};
+
+#endif // POLYMERASEREADCONVERTER_H
diff --git a/utils/bax2bam/src/Settings.cpp b/utils/bax2bam/src/Settings.cpp
new file mode 100644
index 0000000..a6daa01
--- /dev/null
+++ b/utils/bax2bam/src/Settings.cpp
@@ -0,0 +1,261 @@
+// Author: Derek Barnett
+
+#include "Settings.h"
+#include "OptionParser.h"
+#include <boost/algorithm/string.hpp>
+#include <HDFNewBasReader.hpp>
+#include <pbbam/DataSet.h>
+#include <sstream>
+using namespace std;
+
+namespace internal {
+
+static
+vector<string> BaxFilenamesFromXml(const string& xmlFilename)
+{
+    using namespace PacBio::BAM;
+
+    try {
+        vector<string> filenames;
+
+        DataSet dataset(xmlFilename);
+        const vector<string> resources = dataset.ResolvedResourceIds();
+        for (const string& resource : resources) {
+            cerr << resource << endl;
+            const boost::iterator_range<string::const_iterator> baxFound = boost::algorithm::ifind_first(resource, ".bax.h5");
+            if (!baxFound.empty()) 
+                filenames.push_back(resource);
+        }
+        return filenames;
+
+    } catch (std::exception&) {
+        // TODO: report error
+        return vector<string>();
+    }
+}
+
+static
+vector<string> FilenamesFromFofn(const string& fileName)
+{
+    vector<string> retval;
+    ifstream in_stream;
+    string line;
+
+    in_stream.open(fileName);
+
+    while(!in_stream.eof())
+    {
+        in_stream >> line;
+        if (!line.empty())
+            retval.push_back(line);
+        line.clear();
+    }
+
+    return retval;
+}        
+
+static
+bool isBasH5(const string& fileName)
+{
+    return boost::ends_with(boost::to_lower_copy(fileName), ".bas.h5");
+}
+
+static
+void H5FilenamesFromBasH5(const string& basFileName,
+                          vector<string>* const output)
+{
+    HDFNewBasReader reader;
+    if (reader.Initialize(basFileName))
+        for (const auto& baxFileName : reader.GetBaxFileNames())
+            output->push_back(baxFileName);
+    else
+        output->push_back(basFileName);
+}
+
+} // namespace internal
+
+// option names
+const char* Settings::Option::datasetXml_     = "datasetXml";
+const char* Settings::Option::hqRegionMode_   = "hqRegionMode";
+const char* Settings::Option::input_          = "input";
+const char* Settings::Option::fofn_           = "fofn";
+const char* Settings::Option::losslessFrames_ = "losslessFrames";
+const char* Settings::Option::output_         = "output";
+const char* Settings::Option::polymeraseMode_ = "polymeraseMode";
+const char* Settings::Option::pulseFeatures_  = "pulseFeatures";
+const char* Settings::Option::subreadMode_    = "subreadMode";
+const char* Settings::Option::ccsMode_        = "ccsMode";
+const char* Settings::Option::internalMode_   = "internalMode";
+const char* Settings::Option::outputXml_      = "outputXml";
+const char* Settings::Option::sequelPlatform_ = "sequelPlatform";
+
+Settings::Settings(void)
+    : mode(Settings::SubreadMode)
+    , isInternal(false)
+    , isSequelInput(false)
+    , usingDeletionQV(true)
+    , usingDeletionTag(true)
+    , usingInsertionQV(true)
+    , usingIPD(true)
+    , usingMergeQV(true)
+    , usingPulseWidth(false)
+    , usingSubstitutionQV(true)
+    , usingSubstitutionTag(false)
+    , losslessFrames(false)
+{ }
+
+Settings Settings::FromCommandLine(optparse::OptionParser& parser,
+                                   int argc,
+                                   char *argv[])
+{
+    Settings settings;
+
+    // general program info
+    settings.program = parser.prog();
+    settings.description = parser.description();
+    settings.version = parser.version();
+    for (int i = 1; i < argc; ++i) {
+        settings.args.append(argv[i]);
+        settings.args.append(" ");
+    }
+
+    const optparse::Values options = parser.parse_args(argc, argv);
+
+    // output prefix
+    // TODO: output dir ??
+    settings.outputBamPrefix = options[Settings::Option::output_];
+    settings.outputXmlFilename = options[Settings::Option::outputXml_];
+
+    // input files from dataset XML ?
+    if ( options.is_set(Settings::Option::datasetXml_) ) {
+        settings.datasetXmlFilename = options[Settings::Option::datasetXml_];
+        settings.inputBaxFilenames = internal::BaxFilenamesFromXml(settings.datasetXmlFilename);
+    }
+
+    // input files from fofn ?
+    else if ( options.is_set(Settings::Option::fofn_))
+    {
+        settings.fofnFilename = options[Settings::Option::fofn_];
+        settings.inputFilenames = internal::FilenamesFromFofn(settings.fofnFilename);        
+    }
+
+    // else input files command-line args
+    else
+        settings.inputFilenames = parser.args();
+
+    // Process input files to convert Bas.H5 --> Bax.h5 as needed
+    for (const std::string& fn : settings.inputFilenames)
+    {
+        if (internal::isBasH5(fn))
+            internal::H5FilenamesFromBasH5(fn, &settings.inputBaxFilenames);
+        else
+            settings.inputBaxFilenames.push_back(fn);
+    }
+
+    if (settings.inputBaxFilenames.empty())
+        settings.errors.push_back("missing input BAX files.");
+
+    // mode
+    const bool isSubreadMode =
+            options.is_set(Settings::Option::subreadMode_) ? options.get(Settings::Option::subreadMode_)
+                                                           : false;
+    const bool isHQRegionMode =
+            options.is_set(Settings::Option::hqRegionMode_) ? options.get(Settings::Option::hqRegionMode_)
+                                                            : false;
+    const bool isPolymeraseMode =
+            options.is_set(Settings::Option::polymeraseMode_) ? options.get(Settings::Option::polymeraseMode_)
+                                                              : false;
+    const bool isCCS =
+            options.is_set(Settings::Option::ccsMode_) ? options.get(Settings::Option::ccsMode_)
+                                                       : false;
+
+    int modeCount = 0;
+    if (isSubreadMode)    ++modeCount;
+    if (isHQRegionMode)   ++modeCount;
+    if (isPolymeraseMode) ++modeCount;
+    if (isCCS)            ++modeCount;
+
+    if (modeCount == 0)
+        settings.mode = Settings::SubreadMode;
+    else if (modeCount == 1) {
+        if (isSubreadMode)    settings.mode = Settings::SubreadMode;
+        if (isHQRegionMode)   settings.mode = Settings::HQRegionMode;
+        if (isPolymeraseMode) settings.mode = Settings::PolymeraseMode;
+        if (isCCS)            settings.mode = Settings::CCSMode;
+    }
+    else
+        settings.errors.push_back("multiple modes selected");
+
+    // internal file mode
+    settings.isInternal = options.is_set(Settings::Option::internalMode_) ? options.get(Settings::Option::internalMode_)
+                                                                          : false;
+
+    // platform
+    settings.isSequelInput = options.is_set(Settings::Option::sequelPlatform_) ? options.get(Settings::Option::sequelPlatform_)
+                                                                                : false;
+
+    // frame data encoding
+    settings.losslessFrames = options.is_set(Settings::Option::losslessFrames_) ? options.get(Settings::Option::losslessFrames_)
+                                                                                : false;
+
+    // pulse features list
+    if (options.is_set(Settings::Option::pulseFeatures_)) {
+
+        // ignore defaults
+        settings.usingDeletionQV = false;
+        settings.usingDeletionTag = false;
+        settings.usingInsertionQV = false;
+        settings.usingIPD = false;
+        settings.usingMergeQV = false;
+        settings.usingPulseWidth = false;
+        settings.usingSubstitutionQV = false;
+        settings.usingSubstitutionTag = false;
+
+        // apply user-requested features
+        stringstream stream(options[Settings::Option::pulseFeatures_]);
+        string feature;
+        while(std::getline(stream, feature, ',')) {
+            if      (feature == "DeletionQV")      settings.usingDeletionQV = true;
+            else if (feature == "DeletionTag")     settings.usingDeletionTag = true;
+            else if (feature == "InsertionQV")     settings.usingInsertionQV = true;
+            else if (feature == "IPD")             settings.usingIPD = true;
+            else if (feature == "MergeQV")         settings.usingMergeQV = true;
+            else if (feature == "PulseWidth")      settings.usingPulseWidth = true;
+            else if (feature == "SubstitutionQV")  settings.usingSubstitutionQV = true;
+            else if (feature == "SubstitutionTag") settings.usingSubstitutionTag = true;
+            else
+                settings.errors.push_back(string("unknown pulse feature: ") + feature);
+        }
+    }
+
+#ifdef DEBUG_SETTINGS
+
+    string modeString;
+    if (settings.mode == Settings::SubreadMode)
+        modeString = "subread";
+    else if (settings.mode == Settings::HQRegionMode)
+        modeString = "hqRegion";
+    else if (settings.mode == Settings::PolymeraseMode)
+        modeString = "polymerase";
+    else
+        modeString = "ccs";
+
+    string platformString = settings.isSequelInput_ ? "Sequel" : "RS";
+
+    cerr << "CommandLine: " << settings.program << " " << settings.args << endl
+         << "Description: " << settings.description << endl
+         << "Version:     " << settings.version << endl
+         << "Mode:        " << modeString << endl
+         << "Platform:    " << platformString << endl
+         << "DeletionQV?:      " << ( settings.usingDeletionQV ? "yes" : "no" ) << endl
+         << "DeletionTag?:     " << ( settings.usingDeletionTag ? "yes" : "no" ) << endl
+         << "InsertionQV?:     " << ( settings.usingInsertionQV ? "yes" : "no" ) << endl
+         << "IPD?:             " << ( settings.usingMergeQV ? "yes" : "no" ) << endl
+         << "MergeQV?:         " << ( settings.usingIPD ? "yes" : "no" ) << endl
+         << "PulseWidth?:      " << ( settings.usingPulseWidth ? "yes" : "no" ) << endl
+         << "SubstitutionQV?:  " << ( settings.usingSubstitutionQV ? "yes" : "no" ) << endl
+         << "SubstitutionTag?: " << ( settings.usingSubstitutionTag ? "yes" : "no" ) << endl;
+#endif
+
+    return settings;
+}
diff --git a/utils/bax2bam/src/Settings.h b/utils/bax2bam/src/Settings.h
new file mode 100644
index 0000000..c060620
--- /dev/null
+++ b/utils/bax2bam/src/Settings.h
@@ -0,0 +1,87 @@
+// Author: Derek Barnett
+#ifndef SETTINGS_H
+#define SETTINGS_H
+
+#include <string>
+#include <vector>
+
+namespace optparse { class OptionParser; }
+
+class Settings
+{
+public:
+    enum Mode { SubreadMode
+              , HQRegionMode
+              , PolymeraseMode
+              , CCSMode
+              };
+
+    struct Option {
+        static const char* datasetXml_;
+        static const char* hqRegionMode_;
+        static const char* input_;
+        static const char* fofn_;
+        static const char* losslessFrames_;
+        static const char* output_;
+        static const char* polymeraseMode_;
+        static const char* pulseFeatures_;
+        static const char* subreadMode_;
+        static const char* ccsMode_;
+        static const char* internalMode_;
+        static const char* outputXml_;
+        static const char* sequelPlatform_;
+    };
+
+public:
+    Settings(void);
+    static Settings FromCommandLine(optparse::OptionParser& parser,
+                                    int argc,
+                                    char* argv[]);
+
+public:
+    // input/output
+    std::vector<std::string> inputFilenames;
+    std::vector<std::string> inputBaxFilenames;
+    std::string datasetXmlFilename;
+    std::string fofnFilename;
+    std::string outputBamPrefix;
+    std::string outputBamFilename;
+    std::string scrapsBamFilename;
+    std::string outputXmlFilename;
+
+    // mode
+    Mode mode;
+    bool isInternal;
+
+    // platform
+    bool isSequelInput;
+
+    // features
+    bool usingDeletionQV;
+    bool usingDeletionTag;
+    bool usingInsertionQV;
+    bool usingIPD;
+    bool usingMergeQV;
+    bool usingPulseWidth;
+    bool usingSubstitutionQV;
+    bool usingSubstitutionTag;
+
+    // frame data encoding
+    bool losslessFrames;
+
+    // program info
+    std::string program;
+    std::string args;
+    std::string version;
+    std::string description;
+
+    // generated
+    std::string movieName;
+    std::string readGroupId;
+    std::string scrapsReadGroupId;
+
+    // command line parsing
+    std::vector<std::string> errors;
+};
+
+#endif // SETTINGS_H
diff --git a/utils/bax2bam/src/SubreadConverter.cpp b/utils/bax2bam/src/SubreadConverter.cpp
new file mode 100644
index 0000000..931f94d
--- /dev/null
+++ b/utils/bax2bam/src/SubreadConverter.cpp
@@ -0,0 +1,395 @@
+// Author: Derek Barnett
+
+
+#include "SubreadConverter.h"
+#include "utils/RegionUtils.hpp"
+#include "HDFRegionTableReader.hpp"
+
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamWriter.h>
+
+#include <algorithm>
+#include <deque>
+#include <memory>
+
+#define MAX( A, B )     ( (A)>(B) ? (A) : (B) )
+#define MAX3( A, B, C ) MAX( MAX( A, B ), C )
+
+using namespace std;
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+SubreadConverter::SubreadConverter(Settings& settings)
+    : ConverterBase(settings)
+{ }
+
+SubreadConverter::~SubreadConverter(void) { }
+
+struct SubreadInterval
+{
+    size_t Start;
+    size_t End;
+    PacBio::BAM::LocalContextFlags LocalContextFlags;
+
+    SubreadInterval()
+        : Start{0}
+        , End{0}
+        , LocalContextFlags{NO_LOCAL_CONTEXT}
+    { }
+
+    SubreadInterval(size_t start, size_t end, bool adapterBefore = false, bool adapterAfter = false)
+        : Start{start}
+        , End{end}
+        , LocalContextFlags{(adapterBefore ? ADAPTER_BEFORE : NO_LOCAL_CONTEXT) |
+                            (adapterAfter  ? ADAPTER_AFTER  : NO_LOCAL_CONTEXT)}
+    { }
+};
+
+namespace {
+
+struct ReadIntervalComparer {
+    bool operator()(const ReadInterval& lhs, const ReadInterval& rhs) const {
+        if (lhs.start == rhs.start)
+            return lhs.end < rhs.end;
+        return lhs.start < rhs.start;
+    }
+};
+
+SubreadInterval ComputeSubreadIntervals(deque<SubreadInterval>* const intervals,
+                                        deque<SubreadInterval>* const adapters,
+                                        RegionTable& regionTable,
+                                        const unsigned holeNumber,
+                                        const size_t readLength)
+{
+    constexpr int RegionStart = RegionAnnotation::REGIONSTARTCOL;
+    constexpr int RegionEnd   = RegionAnnotation::REGIONENDCOL;
+
+    // clear the input first
+    intervals->clear();
+    adapters->clear();
+
+    // region annotations of a zmw
+    RegionAnnotations zmwRegions = regionTable[holeNumber];
+
+    // Has non-empty HQregion or not?
+    if (!zmwRegions.HasHQRegion())
+        return SubreadInterval(0, 0);
+
+    size_t hqStart = zmwRegions.HQStart();
+    size_t hqEnd   = zmwRegions.HQEnd();
+
+    // Catch and repair 1-off errors in the HQ region
+    hqEnd = (hqEnd == readLength-1) ? readLength : hqEnd;
+
+    // Catch empty or invalid HQ regions and return empty
+    if (hqEnd <= hqStart)
+        return SubreadInterval(0, 0);
+
+    // adapter intervals of this zmw
+    vector<ReadInterval> adapterIntervals = zmwRegions.AdapterIntervals();
+
+    // Catch and trim overlapping adapter calls
+    // Shared starts indicate multiple alignments for the same adapter
+    // Unique starts indicate multiple overlapping adapters
+    // Therefore we trim adapter ends and remove any 0-length adapters
+    // such that the number of adapter regions == number of adapters
+    stable_sort(adapterIntervals.begin(), adapterIntervals.end(), ReadIntervalComparer());
+    for (size_t i = 1; i < adapterIntervals.size(); i++) {
+        if (adapterIntervals[i-1].end > adapterIntervals[i].start)
+            adapterIntervals[i-1].end = adapterIntervals[i].start;
+    }
+    adapterIntervals.erase(
+            std::remove_if(adapterIntervals.begin(), adapterIntervals.end(),
+                    [](const ReadInterval& interval) { return interval.start == interval.end; }),
+            adapterIntervals.end());
+
+    size_t subreadStart  = hqStart;
+    bool   adapterBefore = false;
+
+    for (size_t i = 0; i < adapterIntervals.size(); i++) {
+
+        size_t adapterStart = adapterIntervals[i].start;
+        size_t adapterEnd   = adapterIntervals[i].end;
+
+        // if we're not in the HQRegion yet, skip ahead
+        if (hqStart > adapterEnd)
+            continue;
+
+        // if the adapter is beyond the HQRegion, we're done
+        if (hqEnd < adapterStart)
+            break;
+
+        // If the subread is greater than length=0, save it
+        if (subreadStart < adapterStart)
+            intervals->emplace_back(SubreadInterval(subreadStart, adapterStart, adapterBefore, true));
+
+        // Save the region of the adapter that overlaps the HQ region
+        adapters->emplace_back(SubreadInterval(MAX3(adapterStart, hqStart, subreadStart), 
+                    min(adapterEnd, hqEnd)));
+
+        subreadStart  = adapterEnd;
+        adapterBefore = true;
+    }
+
+    // Save any region between the last adatper and the end of the HQ region as a subread
+    if (subreadStart < hqEnd)
+        intervals->emplace_back(SubreadInterval(subreadStart, hqEnd, adapterBefore, false));
+
+    return SubreadInterval(hqStart, hqEnd);
+}
+
+} // anon
+
+bool SubreadConverter::ConvertFile(HDFBasReader* reader,
+                                   PacBio::BAM::BamWriter* writer)
+{
+    return ConvertFile(reader, writer, nullptr);
+}
+
+bool SubreadConverter::ConvertFile(HDFBasReader* reader,
+                                   PacBio::BAM::BamWriter* writer,
+                                   PacBio::BAM::BamWriter* scrapsWriter) 
+{
+    assert(reader);
+
+    // initialize with default values (shared across all unmapped subreads)
+    BamRecordImpl bamRecord;
+
+    // read region table info
+    std::unique_ptr<HDFRegionTableReader> const regionTableReader(new HDFRegionTableReader);
+    RegionTable regionTable;
+    string fn = filenameForReader_[reader];
+    assert(!fn.empty());
+    if (regionTableReader->Initialize(fn) == 0) {
+        AddErrorMessage("could not read region table on "+fn);
+        return false;
+    }
+    regionTable.Reset();
+    regionTableReader->ReadTable(regionTable);
+    regionTableReader->Close();
+
+    // initialize read scores
+    InitReadScores(reader);
+
+    // fetch records from HDF5 file
+    SMRTSequence smrtRecord;
+    while (reader->GetNext(smrtRecord)) {
+
+        // compute subread & adapter intervals
+        SubreadInterval hqInterval;
+        deque<SubreadInterval> subreadIntervals;
+        deque<SubreadInterval> adapterIntervals;
+        try {
+            hqInterval = ComputeSubreadIntervals(&subreadIntervals,
+                                                 &adapterIntervals,
+                                                 regionTable,
+                                                 smrtRecord.zmwData.holeNumber,
+                                                 smrtRecord.length);
+        } catch (runtime_error& e) {
+            AddErrorMessage(string(e.what()));
+            smrtRecord.Free();
+            return false;
+        }
+
+        // sequencing ZMW
+        if (IsSequencingZmw(smrtRecord))
+        {
+            // write subreads to main BAM file
+            for (const SubreadInterval& interval : subreadIntervals)
+            {
+                // skip invalid or 0-sized intervals
+                if (interval.End <= interval.Start)
+                    continue;
+
+                if (!WriteSubreadRecord(smrtRecord,
+                                        interval.Start,
+                                        interval.End,
+                                        ReadGroupId(),
+                                        static_cast<uint8_t>(interval.LocalContextFlags),
+                                        writer))
+                {
+                    smrtRecord.Free();
+                    return false;
+                }
+            }
+
+            // if scraps BAM file present
+            if (scrapsWriter)
+            {
+                // write 5-end LQ sequence
+                if (hqInterval.Start > 0)
+                {
+                    if (!WriteLowQualityRecord(smrtRecord,
+                                               0,
+                                               hqInterval.Start,
+                                               ScrapsReadGroupId(),
+                                               scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                }
+
+                // write adapters
+                for (const SubreadInterval& interval : adapterIntervals) {
+
+                    // skip invalid or 0-sized adapters
+                    if (interval.End <= interval.Start)
+                        continue;
+
+                    if (!WriteAdapterRecord(smrtRecord,
+                                            interval.Start,
+                                            interval.End,
+                                            ScrapsReadGroupId(),
+                                            scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                }
+
+                // write 3'-end LQ sequence
+                if (hqInterval.End < smrtRecord.length)
+                {
+                    if (!WriteLowQualityRecord(smrtRecord,
+                                               hqInterval.End,
+                                               smrtRecord.length,
+                                               ScrapsReadGroupId(),
+                                               scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                }
+            }
+        } // sequencing ZMW
+
+        // non-sequencing ZMW
+        else
+        {
+            assert(!IsSequencingZmw(smrtRecord));
+
+            // only write these if scraps BAM present & we are in 'internal mode'
+            if (settings_.isInternal && scrapsWriter)
+            {
+                // write 5-end LQ sequence to scraps BAM
+                if (hqInterval.Start > 0)
+                {
+                    if (!WriteLowQualityRecord(smrtRecord,
+                                               0,
+                                               hqInterval.Start,
+                                               ScrapsReadGroupId(),
+                                               scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                }
+
+                // write subreads & adapters to scraps BAM, sorted by query start
+                while (!subreadIntervals.empty() && !adapterIntervals.empty()) {
+
+                    const SubreadInterval& subread = subreadIntervals.front();
+                    const SubreadInterval& adapter = adapterIntervals.front();
+                    assert(subread.Start != adapter.Start);
+
+                    if (subread.Start < adapter.Start)
+                    {
+                        if (!WriteFilteredRecord(smrtRecord,
+                                                 subread.Start,
+                                                 subread.End,
+                                                 ScrapsReadGroupId(),
+                                                 static_cast<uint8_t>(subread.LocalContextFlags),
+                                                 scrapsWriter))
+                        {
+                            smrtRecord.Free();
+                            return false;
+                        }
+
+                        subreadIntervals.pop_front();
+                    }
+                    else
+                    {
+                        if (!WriteAdapterRecord(smrtRecord,
+                                                adapter.Start,
+                                                adapter.End,
+                                                ScrapsReadGroupId(),
+                                                scrapsWriter))
+                        {
+                            smrtRecord.Free();
+                            return false;
+                        }
+                        adapterIntervals.pop_front();
+                    }
+                }
+
+                // flush any traling subread intervals
+                while (!subreadIntervals.empty())
+                {
+                    assert(adapterIntervals.empty());
+                    const SubreadInterval& subread = subreadIntervals.front();
+                    if (!WriteFilteredRecord(smrtRecord,
+                                             subread.Start,
+                                             subread.End,
+                                             ScrapsReadGroupId(),
+                                             static_cast<uint8_t>(subread.LocalContextFlags),
+                                             scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+
+                    subreadIntervals.pop_front();
+                }
+
+                // flush any remaining adapter intervals
+                while (!adapterIntervals.empty())
+                {
+                    assert(subreadIntervals.empty());
+                    const SubreadInterval& adapter = adapterIntervals.front();
+                    if (!WriteAdapterRecord(smrtRecord,
+                                            adapter.Start,
+                                            adapter.End,
+                                            ScrapsReadGroupId(),
+                                            scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                    adapterIntervals.pop_front();
+                }
+
+                // write 3'-end LQ sequence to scraps BAM
+                if (hqInterval.End < smrtRecord.length)
+                {
+                    if (!WriteLowQualityRecord(smrtRecord,
+                                               hqInterval.End,
+                                               smrtRecord.length,
+                                               ScrapsReadGroupId(),
+                                               scrapsWriter))
+                    {
+                        smrtRecord.Free();
+                        return false;
+                    }
+                }
+            }
+        } // non-sequencing ZMW
+
+        smrtRecord.Free();
+    }
+
+    // if we get here, all OK
+    return true; 
+} 
+
+string SubreadConverter::HeaderReadType(void) const
+{ return "SUBREAD"; }
+
+string SubreadConverter::ScrapsReadType(void) const
+{ return "SCRAP"; }
+
+string SubreadConverter::OutputFileSuffix(void) const
+{ return ".subreads.bam"; }
+
+string SubreadConverter::ScrapsFileSuffix(void) const
+{ return ".scraps.bam"; }
diff --git a/utils/bax2bam/src/SubreadConverter.h b/utils/bax2bam/src/SubreadConverter.h
new file mode 100644
index 0000000..3d231f1
--- /dev/null
+++ b/utils/bax2bam/src/SubreadConverter.h
@@ -0,0 +1,26 @@
+// Author: Derek Barnett
+
+#ifndef SUBREADCONVERTER_H
+#define SUBREADCONVERTER_H
+
+#include "ConverterBase.h"
+
+class SubreadConverter : public ConverterBase<>
+{
+public:
+    SubreadConverter(Settings& settings);
+    ~SubreadConverter(void);
+
+protected:
+    bool ConvertFile(HDFBasReader* reader,
+                     PacBio::BAM::BamWriter* writer);
+    bool ConvertFile(HDFBasReader* reader,
+                     PacBio::BAM::BamWriter* writer,
+                     PacBio::BAM::BamWriter* scrapsWriter);
+    std::string HeaderReadType(void) const;
+    std::string ScrapsReadType(void) const;
+    std::string OutputFileSuffix(void) const;
+    std::string ScrapsFileSuffix(void) const;
+};
+
+#endif // SUBREADCONVERTER_H
diff --git a/utils/bax2bam/src/main.cpp b/utils/bax2bam/src/main.cpp
new file mode 100644
index 0000000..223004f
--- /dev/null
+++ b/utils/bax2bam/src/main.cpp
@@ -0,0 +1,118 @@
+// Author: Derek Barnett
+
+#include "Bax2Bam.h"
+#include "OptionParser.h"
+#include "Settings.h"
+#include <iostream>
+#include <string>
+#include <cstdlib>
+using namespace std;
+
+int main(int argc, char* argv[])
+{
+    // setup help & options
+    optparse::OptionParser parser;
+    parser.description("bax2bam converts the legacy PacBio basecall format (bax.h5) into the BAM basecall format.");
+    parser.prog("bax2bam");
+    parser.version("0.0.8");
+    parser.add_version_option(true);
+    parser.add_help_option(true);
+
+    auto ioGroup = optparse::OptionGroup(parser, "Input/output files");
+    ioGroup.add_option("")
+           .dest(Settings::Option::input_)
+	   .metavar("movie.1.bax.h5 movie.2.bax.h5 ...")
+           .help("Input files which should be from the same movie");
+    ioGroup.add_option("--xml")
+           .dest(Settings::Option::datasetXml_)
+           .metavar("STRING")
+           .help("DataSet XML file containing a list of movie names");
+    ioGroup.add_option("-f", "--fofn")
+           .dest(Settings::Option::fofn_)
+           .metavar("STRING")
+           .help("File-of-file-names containing a list of input files");
+    ioGroup.add_option("-o")
+           .dest(Settings::Option::output_)
+	   .metavar("STRING")
+           .help("Prefix of output filenames. Movie name will be used if no prefix provided");
+    ioGroup.add_option("--output-xml")
+           .dest(Settings::Option::outputXml_)
+           .metavar("STRING")
+           .help("Explicit output XML name. If none provided via this arg, bax2bam will use -o prefix (<prefix>.dataset.xml). "
+                 "If that is not specified either, the output XML filename will be <moviename>.dataset.xml");
+    parser.add_option_group(ioGroup);
+
+    auto platformGroup = optparse::OptionGroup(parser, "Input sequencing platform");
+    platformGroup.add_option("--sequel-input")
+                 .dest(Settings::Option::sequelPlatform_)
+                 .action("store_true")
+                 .help("Specify that input data is from Sequel. "
+                       "bax2bam will assume RS unless this option is specified");
+
+    auto readModeGroup = optparse::OptionGroup(parser, "Output read types (mutually exclusive)");
+    readModeGroup.add_option("--subread")
+                 .dest(Settings::Option::subreadMode_)
+                 .action("store_true")
+                 .help("Output subreads (default)");
+    readModeGroup.add_option("--hqregion")
+                 .dest(Settings::Option::hqRegionMode_)
+                 .action("store_true")
+                 .help("Output HQ regions");
+    readModeGroup.add_option("--polymeraseread")
+                 .dest(Settings::Option::polymeraseMode_)
+                 .action("store_true")
+                 .help("Output full polymerase read");
+    readModeGroup.add_option("--ccs")
+                 .dest(Settings::Option::ccsMode_)
+                 .action("store_true")
+                 .help("Output CCS sequences");
+    parser.add_option_group(readModeGroup);
+
+    auto featureGroup = optparse::OptionGroup(parser, "Pulse feature options");
+    featureGroup.group_description("Configure pulse features in the output BAM. Supported features include:\n"
+                                   "    Pulse Feature:    BAM tag:  Default:\n"
+                                   "    DeletionQV        dq        Y\n"
+                                   "    DeletionTag       dt        Y\n"
+                                   "    InsertionQV       iq        Y\n"
+                                   "    IPD               ip        Y\n"
+                                   "    PulseWidth        pw        N\n"
+                                   "    MergeQV           mq        Y\n"
+                                   "    SubstitutionQV    sq        Y\n"
+                                   "    SubstitutionTag   st        N\n"
+                                   "If this option is used, then only those features listed will be included, "
+                                   "regardless of the default state."
+                                   );
+    featureGroup.add_option("--pulsefeatures")
+                .dest(Settings::Option::pulseFeatures_)
+                .metavar("STRING")
+                .help("Comma-separated list of desired pulse features, using the names listed above.\n");
+    featureGroup.add_option("--losslessframes")
+                .dest(Settings::Option::losslessFrames_)
+                .action("store_true")
+                .help("Store full, 16-bit IPD/PulseWidth data, instead of (default) downsampled, 8-bit encoding.");
+    parser.add_option_group(featureGroup);
+
+    auto bamModeGroup = optparse::OptionGroup(parser, "Output BAM file type");
+    bamModeGroup.add_option("--internal")
+                .dest(Settings::Option::internalMode_)
+                .action("store_true")
+                .help("Output BAMs in internal mode. Currently this indicates that "
+                      "non-sequencing ZMWs should be included in the output scraps "
+                      "BAM file, if applicable."
+                      );
+    parser.add_option_group(bamModeGroup);
+
+    // parse command line
+    Settings settings = Settings::FromCommandLine(parser, argc, argv);
+    if (!settings.errors.empty()) {
+        cerr << endl;
+        for (const auto e : settings.errors)
+            cerr << "ERROR: " << e << endl;
+        cerr << endl;
+        parser.print_help();
+        return EXIT_FAILURE;
+    }
+
+    // main conversion
+    return Bax2Bam::Run(settings);
+}
diff --git a/utils/bax2bam/tests/CMakeLists.txt b/utils/bax2bam/tests/CMakeLists.txt
new file mode 100644
index 0000000..8045024
--- /dev/null
+++ b/utils/bax2bam/tests/CMakeLists.txt
@@ -0,0 +1,88 @@
+file(MAKE_DIRECTORY  ${Bax2Bam_TestsDir}/bin)
+
+# Generate paths for test data
+configure_file(
+    ${Bax2Bam_TestsDir}/src/TestData.h.in
+    ${Bax2Bam_TestsDir}/src/TestData.h
+)
+include(files.cmake)
+include_directories(
+    ${BLASR_INCLUDE_DIRS}
+    ${Boost_INCLUDE_DIRS}
+    ${HDF5_INCLUDE_DIRS}
+    ${HTSLIB_INCLUDE_DIRS}
+    ${PacBioBAM_INCLUDE_DIRS}
+    ${PBDATA_INCLUDE_DIRS}
+    ${PBDATA_ROOT_DIR}
+    ${PBIHDF_INCLUDE_DIRS}
+    ${ZLIB_INCLUDE_DIRS}
+    ${gtest_SOURCE_DIR}/include 
+    ${gtest_SOURCE_DIR}
+#    ${Bax2BamTest_H}
+)
+
+# Grab postprimary test source files
+
+set(SOURCES
+    ${Bax2BamTest_H}
+    ${Bax2BamTest_CPP}
+)
+
+# shared CXX flags for src & tests
+include(CheckCXXCompilerFlag)
+set(Bax2Bam_CXX_FLAGS "-g -std=c++11 -Wall")
+
+# quash warnings from pbdata
+check_cxx_compiler_flag("-Wno-overloaded-virtual" HAS_NO_OVERLOADED_VIRTUAL)
+if(HAS_NO_OVERLOADED_VIRTUAL)
+    set(Bax2Bam_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-overloaded-virtual")
+endif()
+check_cxx_compiler_flag("-Wno-unused-private-field" HAS_NO_UNUSED_PRIVATE_FIELD)
+if(HAS_NO_UNUSED_PRIVATE_FIELD)
+    set(Bax2Bam_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-unused-private-field")
+endif()
+check_cxx_compiler_flag("-Wno-unused-variable" HAS_NO_UNUSED_VARIABLE)
+if(HAS_NO_UNUSED_VARIABLE)
+    set(Bax2Bam_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-unused-variable")
+endif()
+check_cxx_compiler_flag("-Wno-uninitialized" HAS_NO_UNINITIALIZED)
+if(HAS_NO_UNINITIALIZED)
+    set(Bax2Bam_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-uninitialized")
+endif()
+# NOTE: -Wno-unused-local-typedefs used to quash clang warnings w/ Boost
+check_cxx_compiler_flag("-Wno-unused-local-typedef" HAS_NO_UNUSED_LOCAL_TYPEDEF)
+if(HAS_NO_UNUSED_LOCAL_TYPEDEF)
+    set(Bam2Bax_CXX_FLAGS "${Bax2Bam_CXX_FLAGS} -Wno-unused-local-typedef")
+endif()
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Bax2Bam_CXX_FLAGS}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${Bax2Bam_EXE_LINKER_FLAGS}")
+
+add_executable(test_bax2bam ${SOURCES})
+# add_executable(test_bax2bam ${Bax2Bam_TestsDir}/src/test_ccs.cpp)
+set_target_properties(test_bax2bam PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY ${Bax2Bam_TestsDir}/bin
+)
+if (NOT APPLE)
+    set(MY_LIBRT -lrt)
+else()
+endif()
+target_link_libraries(test_bax2bam
+    gtest
+    gtest_main
+    ${BLASR_LIBRARIES}
+    ${PBIHDF_LIBRARIES}
+    ${PBDATA_LIBRARIES} 
+    ${HDF5_CPP_LIBRARIES}
+    ${HDF5_LIBRARIES}
+    ${PacBioBAM_LIBRARIES}
+    ${HTSLIB_LIBRARIES}
+    ${ZLIB_LIBRARIES}
+    ${CMAKE_THREAD_LIBS_INIT} # quirky pthreads
+    ${MY_LIBRT}
+)
+# add_test(test_bax2bam test_bax2bam)
+add_test(
+    NAME UnitTests
+    WORKING_DIRECTORY ${Bax2Bam_TestsDir}/bin
+    COMMAND test_bax2bam
+)
diff --git a/utils/bax2bam/tests/bax2bam.t b/utils/bax2bam/tests/bax2bam.t
new file mode 100644
index 0000000..625ee6f
--- /dev/null
+++ b/utils/bax2bam/tests/bax2bam.t
@@ -0,0 +1,7 @@
+
+Simple test to make sure bax2bam runs properly.
+
+  $ cd $WORKSPACE
+  $ cd smrtanalysis/_output/modulebuilds/bioinformatics/staging/PostPrimary/bax2bam/_output/install/binwrap-build
+  $ rm -f tst1.*.bam
+  $ ./bax2bam -o tst1 /pbi/dept/secondary/siv/testdata/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.bax.h5
diff --git a/utils/bax2bam/tests/files.cmake b/utils/bax2bam/tests/files.cmake
new file mode 100644
index 0000000..45d493b
--- /dev/null
+++ b/utils/bax2bam/tests/files.cmake
@@ -0,0 +1,52 @@
+
+# test case headers
+set( Bax2BamTest_H
+
+    ${Bax2Bam_TestsDir}/src/TestData.h
+    ${Bax2Bam_TestsDir}/src/TestUtils.h
+)
+
+# test case sources
+set( Bax2BamTest_CPP
+
+    ${Bax2Bam_TestsDir}/src/test_ccs.cpp
+    ${Bax2Bam_TestsDir}/src/test_common.cpp
+    ${Bax2Bam_TestsDir}/src/test_hqregions.cpp
+    ${Bax2Bam_TestsDir}/src/test_polymerase.cpp
+    ${Bax2Bam_TestsDir}/src/test_subreads.cpp
+    ${Bax2Bam_TestsDir}/src/TestUtils.cpp
+)
+
+# GoogleTest headers
+set( GTest_H
+
+    ${GTest_IncludeDir}/gtest/gtest-death-test.h
+    ${GTest_IncludeDir}/gtest/gtest-message.h
+    ${GTest_IncludeDir}/gtest/gtest-param-test.h
+    ${GTest_IncludeDir}/gtest/gtest-printers.h
+    ${GTest_IncludeDir}/gtest/gtest-spi.h
+    ${GTest_IncludeDir}/gtest/gtest-test-part.h
+    ${GTest_IncludeDir}/gtest/gtest-typed-test.h
+    ${GTest_IncludeDir}/gtest/gtest.h
+    ${GTest_IncludeDir}/gtest/gtest_pred_impl.h
+    ${GTest_IncludeDir}/gtest/gtest_prod.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-death-test-internal.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-filepath.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-internal.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-linked_ptr.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-param-util-generated.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-param-util.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-port.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-string.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-tuple.h
+    ${GTest_IncludeDir}/gtest/internal/gtest-type-util.h
+
+    ${GTest_SourceDir}/gtest-internal-inl.h
+)
+
+# GoogleTest sources
+set( GTest_CPP
+    ${GTest_SourceDir}/gtest-all.cc
+    ${GTest_SourceDir}/gtest_main.cc
+)
+
diff --git a/utils/bax2bam/tests/src/TestData.h.in b/utils/bax2bam/tests/src/TestData.h.in
new file mode 100644
index 0000000..11fa1f8
--- /dev/null
+++ b/utils/bax2bam/tests/src/TestData.h.in
@@ -0,0 +1,17 @@
+// Author: Derek Barnett
+
+#ifndef TESTDATA_H
+#define TESTDATA_H
+
+#include <string>
+
+namespace tests {
+
+const std::string Bax2Bam_Exe  = std::string("@Bax2Bam_BinDir@/bax2bam");
+const std::string Source_Dir   = std::string("@Bax2Bam_TestsDir@");
+const std::string Bin_Dir      = std::string("@CMAKE_CURRENT_BINARY_DIR@");
+const std::string Data_Dir     = std::string("/pbi/dept/secondary/siv/testdata/bax2bam/data");
+
+} // namespace tests
+
+#endif // TESTDATA_H
diff --git a/utils/bax2bam/tests/src/TestUtils.cpp b/utils/bax2bam/tests/src/TestUtils.cpp
new file mode 100644
index 0000000..8724ea9
--- /dev/null
+++ b/utils/bax2bam/tests/src/TestUtils.cpp
@@ -0,0 +1,43 @@
+// Author: Derek Barnett
+
+#include "TestData.h"
+#include "TestUtils.h"
+
+#include <gtest/gtest.h>
+#include <cstdio>
+#include <cstdlib>
+using namespace std;
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+void RemoveFiles(const vector<string>& filenames)
+{
+    for (auto fn : filenames)
+        remove(fn.c_str());
+}
+
+void RemoveFile(const string& filename)
+{
+    vector<string> filenames;
+    filenames.push_back(filename);
+    RemoveFiles(filenames);
+}
+
+int RunBax2Bam(const vector<string>& baxFilenames,
+               const string& outputType,
+               const string& additionalArgs)
+{
+    string convertArgs;
+    convertArgs += outputType;
+    if (!additionalArgs.empty()) {
+        convertArgs += string(" ");
+        convertArgs += additionalArgs;
+    }
+    for (auto fn : baxFilenames) {
+        convertArgs += string(" ");
+        convertArgs += fn;
+    }
+
+    const string& convertCommandLine = tests::Bax2Bam_Exe + string(" ") + convertArgs;
+    return system(convertCommandLine.c_str());
+}
diff --git a/utils/bax2bam/tests/src/TestUtils.h b/utils/bax2bam/tests/src/TestUtils.h
new file mode 100644
index 0000000..65bc771
--- /dev/null
+++ b/utils/bax2bam/tests/src/TestUtils.h
@@ -0,0 +1,13 @@
+// Author: Derek Barnett
+
+#include "SMRTSequence.hpp"
+#include <pbbam/BamRecord.h>
+#include <string>
+#include <vector>
+
+void RemoveFile(const std::string& filename);
+void RemoveFiles(const std::vector<std::string>& filenames);
+
+int RunBax2Bam(const std::vector<std::string>& baxFilenames,
+               const std::string& outputType,
+               const std::string& additionalArgs = std::string());
diff --git a/utils/bax2bam/tests/src/test_ccs.cpp b/utils/bax2bam/tests/src/test_ccs.cpp
new file mode 100644
index 0000000..f388bca
--- /dev/null
+++ b/utils/bax2bam/tests/src/test_ccs.cpp
@@ -0,0 +1,219 @@
+// Author: Derek Barnett
+
+#include "TestData.h"
+#include "TestUtils.h"
+
+#include "CCSSequence.hpp"
+#include "HDFCCSReader.hpp"
+#include <gtest/gtest.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/EntireFileQuery.h>
+#include <memory>
+#include <string>
+#include <vector>
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+
+using namespace std;
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+TEST(CcsTest, EndToEnd_Multiple)
+{
+    // setup
+    const string movieName = "m131018_081703_42161_c100585152550000001823088404281404_s1_p0";
+
+    vector<string> baxFilenames;
+    baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.ccs.h5");
+
+    const string generatedBam = movieName + ".ccs.bam";
+
+    // run conversion
+    const int result = RunBax2Bam(baxFilenames, "--ccs");
+    EXPECT_EQ(0, result);
+
+    {   // ensure PBI exists
+        const BamFile generatedBamFile(generatedBam);
+        EXPECT_TRUE(generatedBamFile.PacBioIndexExists());
+    }
+
+    // open BAX reader on original data
+    HDFCCSReader<CCSSequence> baxReader;
+    baxReader.IncludeField("Basecall");
+    baxReader.IncludeField("QualityValue");
+    baxReader.IncludeField("DeletionQV");
+    baxReader.IncludeField("InsertionQV");
+    baxReader.IncludeField("SubstitutionQV");
+
+    string baxBasecallerVersion;
+    string baxBindingKit;
+    string baxSequencingKit;
+
+    // set magic bits
+    baxReader.SetReadBasesFromCCS();
+
+    const int initOk = baxReader.Initialize(baxFilenames.front());
+    EXPECT_EQ(1, initOk);
+    if (initOk == 1) {
+
+        if (baxReader.scanDataReader.fileHasScanData && baxReader.scanDataReader.initializedRunInfoGroup) {
+
+            if (baxReader.scanDataReader.runInfoGroup.ContainsAttribute("BindingKit")) {
+                HDFAtom<std::string> bkAtom;
+                if (bkAtom.Initialize(baxReader.scanDataReader.runInfoGroup, "BindingKit")) {
+                    bkAtom.Read(baxBindingKit);
+                    bkAtom.dataspace.close();
+                }
+            }
+
+            if (baxReader.scanDataReader.runInfoGroup.ContainsAttribute("SequencingKit")) {
+                HDFAtom<std::string> skAtom;
+                if (skAtom.Initialize(baxReader.scanDataReader.runInfoGroup, "SequencingKit")) {
+                    skAtom.Read(baxSequencingKit);
+                    skAtom.dataspace.close();
+                }
+            }
+
+            {
+                HDFGroup bcGroup;
+                if (baxReader.pulseDataGroup.ContainsObject("BaseCalls") &&
+                    bcGroup.Initialize(baxReader.pulseDataGroup.group, "BaseCalls"))
+                {
+                    HDFAtom<std::string> clAtom;
+                    if (bcGroup.ContainsAttribute("ChangeListID") &&
+                        clAtom.Initialize(bcGroup.group, "ChangeListID"))
+                    {
+                        clAtom.Read(baxBasecallerVersion);
+                        clAtom.dataspace.close();
+                    }
+                    bcGroup.Close();
+                }
+            }
+        }
+    }
+
+    EXPECT_NO_THROW(
+    {
+        // open BAM file
+        BamFile bamFile(generatedBam);
+
+        // check BAM header information
+        const BamHeader& header = bamFile.Header();
+        EXPECT_EQ(string("1.5"),     header.Version());
+        EXPECT_EQ(string("unknown"), header.SortOrder());
+        EXPECT_EQ(string("3.0.2"),   header.PacBioBamVersion());
+        EXPECT_TRUE(header.Sequences().empty());
+        EXPECT_TRUE(header.Comments().empty());
+        ASSERT_FALSE(header.Programs().empty());
+
+        const vector<string> readGroupIds = header.ReadGroupIds();
+        ASSERT_FALSE(readGroupIds.empty());
+        const ReadGroupInfo& rg = header.ReadGroup(readGroupIds.front());
+
+        string rawId = movieName + "//CCS";
+        string md5Id;
+        MakeMD5(rawId, md5Id, 8);
+        EXPECT_EQ(md5Id, rg.Id());
+
+        EXPECT_EQ(string("PACBIO"), rg.Platform());
+        EXPECT_EQ(movieName, rg.MovieName());
+
+        EXPECT_TRUE(rg.SequencingCenter().empty());
+        EXPECT_TRUE(rg.Date().empty());
+        EXPECT_TRUE(rg.FlowOrder().empty());
+        EXPECT_TRUE(rg.KeySequence().empty());
+        EXPECT_TRUE(rg.Library().empty());
+        EXPECT_TRUE(rg.Programs().empty());
+        EXPECT_TRUE(rg.PredictedInsertSize().empty());
+        EXPECT_TRUE(rg.Sample().empty());
+
+        EXPECT_EQ("CCS", rg.ReadType());
+        EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion());
+        EXPECT_EQ(baxBindingKit, rg.BindingKit());
+        EXPECT_EQ(baxSequencingKit, rg.SequencingKit());
+        EXPECT_EQ(75, std::stod(rg.FrameRateHz()));
+        EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
+        EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV));
+        EXPECT_EQ("sq", rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV));
+        EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::DELETION_TAG));
+        EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::IPD));
+        EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::MERGE_QV));
+        EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_TAG));
+
+        // compare 1st record from each file
+        CCSSequence baxRecord;
+        const UInt holeNumber = baxRecord.zmwData.holeNumber;
+
+        size_t numTested = 0;
+        EntireFileQuery entireFile(bamFile);
+        for (BamRecord& bamRecord : entireFile) {
+            while (baxReader.GetNext(baxRecord)) {
+                if (baxRecord.length > 0)
+                    goto compare;
+            }
+            goto cleanup;
+
+compare:
+            EXPECT_GT(baxRecord.length, 0U);
+            const BamRecordImpl& bamRecordImpl = bamRecord.Impl();
+            EXPECT_EQ(4680U,bamRecordImpl.Bin());
+            EXPECT_EQ(0,   bamRecordImpl.InsertSize());
+            EXPECT_EQ(255, bamRecordImpl.MapQuality());
+            EXPECT_EQ(-1,  bamRecordImpl.MatePosition());
+            EXPECT_EQ(-1,  bamRecordImpl.MateReferenceId());
+            EXPECT_EQ(-1,  bamRecordImpl.Position());
+            EXPECT_EQ(-1,  bamRecordImpl.ReferenceId());
+            EXPECT_FALSE(bamRecordImpl.IsMapped());
+
+            const int holeNumber      = baxRecord.zmwData.holeNumber;
+            const int numPasses       = baxRecord.numPasses;
+            const string expectedName = baxRecord.GetName();
+            EXPECT_EQ(expectedName, bamRecordImpl.Name());
+
+            using PacBio::BAM::QualityValue;
+            using PacBio::BAM::QualityValues;
+
+            const DNALength length = baxRecord.length;
+
+            string expectedSequence;
+            expectedSequence.assign((const char*)baxRecord.seq, length);
+
+            QualityValues expectedQualities;
+            expectedQualities.assign((uint8_t*)baxRecord.qual.data, baxRecord.qual.data + length);
+
+            const string bamSequence = bamRecord.Sequence();
+            const QualityValues bamQualities = bamRecord.Qualities();
+            EXPECT_EQ(expectedSequence,  bamSequence);
+            EXPECT_EQ(expectedQualities, bamQualities);
+
+            const QualityValues bamDeletionQVs     = bamRecord.DeletionQV();
+            const QualityValues bamInsertionQVs    = bamRecord.InsertionQV();
+            const QualityValues bamSubstitutionQVs = bamRecord.SubstitutionQV();
+
+            for (size_t i = 0; i < length; ++i) {
+                EXPECT_EQ((QualityValue)baxRecord.GetDeletionQV(i),     bamDeletionQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetInsertionQV(i),    bamInsertionQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetSubstitutionQV(i), bamSubstitutionQVs.at(i));
+            }
+
+            EXPECT_EQ(md5Id,        bamRecord.ReadGroupId());
+            EXPECT_EQ(movieName,    bamRecord.MovieName());
+            EXPECT_EQ(numPasses,    bamRecord.NumPasses());
+            EXPECT_EQ(holeNumber,   bamRecord.HoleNumber());
+            EXPECT_FALSE(bamRecord.HasLocalContextFlags());
+            EXPECT_FALSE(bamRecord.HasSignalToNoise());
+            numTested++;
+        }
+
+cleanup:
+        EXPECT_GT(numTested, 1UL);
+
+        // cleanup
+        baxReader.Close();
+        RemoveFile(generatedBam);
+        RemoveFile(generatedBam + ".pbi");
+
+    }); // EXPECT_NO_THROW
+}
diff --git a/utils/bax2bam/tests/src/test_common.cpp b/utils/bax2bam/tests/src/test_common.cpp
new file mode 100644
index 0000000..052f395
--- /dev/null
+++ b/utils/bax2bam/tests/src/test_common.cpp
@@ -0,0 +1,16 @@
+// Author: Derek Barnett
+
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+
+TEST(CommonTest, EmptyCommandLine)
+{
+    EXPECT_TRUE(true);
+}
+
+TEST(CommonTest, NormalCommandLine) 
+{
+    EXPECT_TRUE(true);
+}
+
diff --git a/utils/bax2bam/tests/src/test_hqregions.cpp b/utils/bax2bam/tests/src/test_hqregions.cpp
new file mode 100644
index 0000000..c443850
--- /dev/null
+++ b/utils/bax2bam/tests/src/test_hqregions.cpp
@@ -0,0 +1,421 @@
+// Author: Derek Barnett
+
+#include "TestData.h"
+#include "TestUtils.h"
+
+#include "HDFBasReader.hpp"
+#include "alignment/utils/RegionUtils.hpp"
+#include "hdf/HDFRegionTableReader.hpp"
+#include <gtest/gtest.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/EntireFileQuery.h>
+#include <memory>
+#include <string>
+#include <vector>
+#include <cstdio>
+#include <cstdlib>
+using namespace std;
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+TEST(HqRegionsTest, EndToEnd_Single)
+{
+    // setup
+    const string movieName = "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0";
+
+    vector<string> baxFilenames;
+    baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.bax.h5");
+
+    const string generatedBam = movieName + ".hqregions.bam";
+    const string scrapBam = movieName + ".lqregions.bam";
+
+    // run conversion
+    const int result = RunBax2Bam(baxFilenames, "--hqregion");
+    EXPECT_EQ(0, result);
+
+    {   // ensure PBIs exist
+        const BamFile generatedBamFile(generatedBam);
+        const BamFile scrapsBamFile(scrapBam);
+        EXPECT_TRUE(generatedBamFile.PacBioIndexExists());
+        EXPECT_TRUE(scrapsBamFile.PacBioIndexExists());
+    }
+
+    // open BAX reader on original data
+    HDFBasReader baxReader;
+    baxReader.IncludeField("Basecall");
+    baxReader.IncludeField("DeletionQV");
+    baxReader.IncludeField("DeletionTag");
+    baxReader.IncludeField("InsertionQV");
+    baxReader.IncludeField("PreBaseFrames");
+    baxReader.IncludeField("MergeQV");
+    baxReader.IncludeField("SubstitutionQV");
+    baxReader.IncludeField("HQRegionSNR");
+    // not using SubTag or PulseWidth here
+
+    string baxBasecallerVersion;
+    string baxBindingKit;
+    string baxSequencingKit;
+
+    const int initOk = baxReader.Initialize(baxFilenames.front());
+    EXPECT_EQ(1, initOk);
+    if (initOk == 1) {
+
+        if (baxReader.scanDataReader.fileHasScanData && baxReader.scanDataReader.initializedRunInfoGroup) {
+
+            if (baxReader.scanDataReader.runInfoGroup.ContainsAttribute("BindingKit")) {
+                HDFAtom<std::string> bkAtom;
+                if (bkAtom.Initialize(baxReader.scanDataReader.runInfoGroup, "BindingKit")) {
+                    bkAtom.Read(baxBindingKit);
+                    bkAtom.dataspace.close();
+                }
+            }
+
+            if (baxReader.scanDataReader.runInfoGroup.ContainsAttribute("SequencingKit")) {
+                HDFAtom<std::string> skAtom;
+                if (skAtom.Initialize(baxReader.scanDataReader.runInfoGroup, "SequencingKit")) {
+                    skAtom.Read(baxSequencingKit);
+                    skAtom.dataspace.close();
+                }
+            }
+        }
+
+        baxReader.GetChangeListID(baxBasecallerVersion);
+    }
+        
+    // compare 1st record from each file
+    SMRTSequence baxRecord;
+    EXPECT_TRUE(baxReader.GetNext(baxRecord) > 0);
+
+    // read region table info
+    std::unique_ptr<HDFRegionTableReader> const regionTableReader(new HDFRegionTableReader);
+    RegionTable regionTable;
+    std::string fn = baxFilenames.front();
+    EXPECT_TRUE(regionTableReader->Initialize(fn) != 0);
+    regionTable.Reset();
+    regionTableReader->ReadTable(regionTable);
+    regionTableReader->Close();
+
+    // Test primary, i.e. hqregions.bam, output
+    EXPECT_NO_THROW(
+    {
+        // open BAM file
+        BamFile bamFile(generatedBam);
+
+        // check BAM header information
+        const BamHeader& header = bamFile.Header();
+        EXPECT_EQ(string("1.5"),     header.Version());
+        EXPECT_EQ(string("unknown"), header.SortOrder());
+        EXPECT_EQ(string("3.0.2"),   header.PacBioBamVersion());
+        EXPECT_TRUE(header.Sequences().empty());
+        EXPECT_TRUE(header.Comments().empty());
+        ASSERT_FALSE(header.Programs().empty());
+
+        const vector<string> readGroupIds = header.ReadGroupIds();
+        ASSERT_FALSE(readGroupIds.empty());
+        const ReadGroupInfo& rg = header.ReadGroup(readGroupIds.front());
+
+        string rawId = movieName + "//HQREGION";
+        string md5Id;
+        MakeMD5(rawId, md5Id, 8);
+        EXPECT_EQ(md5Id, rg.Id());
+
+        EXPECT_EQ(string("PACBIO"), rg.Platform());
+        EXPECT_EQ(movieName, rg.MovieName());
+
+        EXPECT_TRUE(rg.SequencingCenter().empty());
+        EXPECT_TRUE(rg.Date().empty());
+        EXPECT_TRUE(rg.FlowOrder().empty());
+        EXPECT_TRUE(rg.KeySequence().empty());
+        EXPECT_TRUE(rg.Library().empty());
+        EXPECT_TRUE(rg.Programs().empty());
+        EXPECT_TRUE(rg.PredictedInsertSize().empty());
+        EXPECT_TRUE(rg.Sample().empty());
+
+        EXPECT_EQ("HQREGION", rg.ReadType());
+        EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion());
+        EXPECT_EQ(baxBindingKit, rg.BindingKit());
+        EXPECT_EQ(baxSequencingKit, rg.SequencingKit());
+        EXPECT_EQ(75, std::stod(rg.FrameRateHz()));
+        EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
+        EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG));
+        EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV));
+        EXPECT_EQ("ip", rg.BaseFeatureTag(BaseFeature::IPD));
+        EXPECT_EQ("mq", rg.BaseFeatureTag(BaseFeature::MERGE_QV));
+        EXPECT_EQ("sq", rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV));
+        EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_TAG));
+        EXPECT_EQ(FrameCodec::V1, rg.IpdCodec());
+
+        int hqStart;
+        int hqEnd;
+        int hqScore;
+        const bool lookupResult = LookupHQRegion(baxRecord.zmwData.holeNumber,
+                                                 regionTable,
+                                                 hqStart,
+                                                 hqEnd,
+                                                 hqScore);
+        EXPECT_TRUE(lookupResult);
+
+        vector<float> hqSnr;
+        hqSnr.push_back(baxRecord.HQRegionSnr('A'));
+        hqSnr.push_back(baxRecord.HQRegionSnr('C'));
+        hqSnr.push_back(baxRecord.HQRegionSnr('G'));
+        hqSnr.push_back(baxRecord.HQRegionSnr('T'));
+
+        EXPECT_GT(hqSnr[0], 0);
+        EXPECT_GT(hqSnr[1], 0);
+        EXPECT_GT(hqSnr[2], 0);
+        EXPECT_GT(hqSnr[3], 0);
+
+        bool firstRecord = true;
+        EntireFileQuery entireFile(bamFile);
+        for ( BamRecord& bamRecord : entireFile) {
+            if (!firstRecord)
+                break;
+            firstRecord = false;
+
+            const BamRecordImpl& bamRecordImpl = bamRecord.Impl();
+            EXPECT_EQ(4680U,bamRecordImpl.Bin());
+            EXPECT_EQ(0,   bamRecordImpl.InsertSize());
+            EXPECT_EQ(255, bamRecordImpl.MapQuality());
+            EXPECT_EQ(-1,  bamRecordImpl.MatePosition());
+            EXPECT_EQ(-1,  bamRecordImpl.MateReferenceId());
+            EXPECT_EQ(-1,  bamRecordImpl.Position());
+            EXPECT_EQ(-1,  bamRecordImpl.ReferenceId());
+            EXPECT_FALSE(bamRecordImpl.IsMapped());
+
+            const int holeNumber    = baxRecord.zmwData.holeNumber;
+            const int subreadStart  = hqStart;
+            const int subreadEnd    = hqEnd;
+
+            const string expectedName = movieName + "/" +
+                    to_string(holeNumber)   + "/" +
+                    to_string(subreadStart) + "_" +
+                    to_string(subreadEnd);
+            EXPECT_EQ(expectedName, bamRecordImpl.Name());
+
+            using PacBio::BAM::QualityValue;
+            using PacBio::BAM::QualityValues;
+
+            const DNALength length = subreadEnd - subreadStart;
+
+            string expectedSequence;
+            expectedSequence.assign((const char*)baxRecord.seq + subreadStart, length);
+
+            const string bamSequence = bamRecord.Sequence();
+            const QualityValues bamQualities = bamRecord.Qualities();
+            EXPECT_EQ(expectedSequence, bamSequence);
+            EXPECT_TRUE(bamQualities.empty());
+
+            const QualityValues bamDeletionQVs = bamRecord.DeletionQV();
+            const QualityValues bamInsertionQVs = bamRecord.InsertionQV();
+            const QualityValues bamMergeQVs = bamRecord.MergeQV();
+            const QualityValues bamSubstitutionQVs = bamRecord.SubstitutionQV();
+
+            for (size_t i = 0; i < length; ++i) {
+                const size_t pos = subreadStart + i;
+
+                EXPECT_EQ((QualityValue)baxRecord.GetDeletionQV(pos),     bamDeletionQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetInsertionQV(pos),    bamInsertionQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetMergeQV(pos),        bamMergeQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetSubstitutionQV(pos), bamSubstitutionQVs.at(i));
+            }
+
+            if (baxRecord.deletionTag)
+            {
+                string expectedDeletionTags;
+                expectedDeletionTags.assign((char*)baxRecord.deletionTag + subreadStart,
+                                            (char*)baxRecord.deletionTag + subreadStart + length);
+                const string& bamDeletionTags = bamRecord.DeletionTag();
+                EXPECT_EQ(expectedDeletionTags, bamDeletionTags);
+            }
+
+            if (baxRecord.substitutionTag)
+            {
+                string expectedSubstitutionTags;
+                expectedSubstitutionTags.assign((char*)baxRecord.substitutionTag + subreadStart,
+                                            (char*)baxRecord.substitutionTag + subreadStart + length);
+                const string& bamSubstitutionTags = bamRecord.SubstitutionTag();
+                EXPECT_EQ(expectedSubstitutionTags, bamSubstitutionTags);
+            }
+
+            // TODO: IPDs
+
+            EXPECT_EQ(md5Id,        bamRecord.ReadGroupId());
+            EXPECT_EQ(movieName,    bamRecord.MovieName());
+            EXPECT_EQ(1,            bamRecord.NumPasses());
+            EXPECT_EQ(holeNumber,   bamRecord.HoleNumber());
+            EXPECT_EQ(subreadStart, bamRecord.QueryStart());
+            EXPECT_EQ(subreadEnd,   bamRecord.QueryEnd());
+            EXPECT_EQ(hqSnr,        bamRecord.SignalToNoise());
+            EXPECT_FALSE(bamRecord.HasLocalContextFlags());
+        }
+
+    }); // EXPECT_NO_THROW
+
+    // Test secondary, i.e. lqregions.bam, output
+    EXPECT_NO_THROW(
+    {
+        // open BAM file
+        BamFile bamFile(scrapBam);
+
+        // check BAM header information
+        const BamHeader& header = bamFile.Header();
+        EXPECT_EQ(string("1.5"),     header.Version());
+        EXPECT_EQ(string("unknown"), header.SortOrder());
+        EXPECT_EQ(string("3.0.2"),   header.PacBioBamVersion());
+        EXPECT_TRUE(header.Sequences().empty());
+        EXPECT_TRUE(header.Comments().empty());
+        ASSERT_FALSE(header.Programs().empty());
+
+        const vector<string> readGroupIds = header.ReadGroupIds();
+        ASSERT_FALSE(readGroupIds.empty());
+        const ReadGroupInfo& rg = header.ReadGroup(readGroupIds.front());
+
+        string rawId = movieName + "//SCRAP";
+        string md5Id;
+        MakeMD5(rawId, md5Id, 8);
+        EXPECT_EQ(md5Id, rg.Id());
+
+        EXPECT_EQ(string("PACBIO"), rg.Platform());
+        EXPECT_EQ(movieName, rg.MovieName());
+
+        EXPECT_TRUE(rg.SequencingCenter().empty());
+        EXPECT_TRUE(rg.Date().empty());
+        EXPECT_TRUE(rg.FlowOrder().empty());
+        EXPECT_TRUE(rg.KeySequence().empty());
+        EXPECT_TRUE(rg.Library().empty());
+        EXPECT_TRUE(rg.Programs().empty());
+        EXPECT_TRUE(rg.PredictedInsertSize().empty());
+        EXPECT_TRUE(rg.Sample().empty());
+
+        EXPECT_EQ("SCRAP", rg.ReadType());
+        EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion());
+        EXPECT_EQ(baxBindingKit, rg.BindingKit());
+        EXPECT_EQ(baxSequencingKit, rg.SequencingKit());
+        EXPECT_EQ(75, std::stod(rg.FrameRateHz()));
+        EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
+        EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG));
+        EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV));
+        EXPECT_EQ("ip", rg.BaseFeatureTag(BaseFeature::IPD));
+        EXPECT_EQ("mq", rg.BaseFeatureTag(BaseFeature::MERGE_QV));
+        EXPECT_EQ("sq", rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV));
+        EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_TAG));
+        EXPECT_EQ(FrameCodec::V1, rg.IpdCodec());
+
+        int hqStart;
+        int hqEnd;
+        int hqScore;
+        const bool lookupResult = LookupHQRegion(baxRecord.zmwData.holeNumber,
+                                                 regionTable,
+                                                 hqStart,
+                                                 hqEnd,
+                                                 hqScore);
+        EXPECT_TRUE(lookupResult);
+
+        vector<float> hqSnr;
+        hqSnr.push_back(baxRecord.HQRegionSnr('A'));
+        hqSnr.push_back(baxRecord.HQRegionSnr('C'));
+        hqSnr.push_back(baxRecord.HQRegionSnr('G'));
+        hqSnr.push_back(baxRecord.HQRegionSnr('T'));
+
+        EXPECT_GT(hqSnr[0], 0);
+        EXPECT_GT(hqSnr[1], 0);
+        EXPECT_GT(hqSnr[2], 0);
+        EXPECT_GT(hqSnr[3], 0);
+
+        bool firstRecord = true;
+        EntireFileQuery entireFile(bamFile);
+        for ( BamRecord& bamRecord : entireFile) {
+            if (!firstRecord)
+                break;
+            firstRecord = false;
+
+            const BamRecordImpl& bamRecordImpl = bamRecord.Impl();
+            EXPECT_EQ(4680U,bamRecordImpl.Bin());
+            EXPECT_EQ(0,   bamRecordImpl.InsertSize());
+            EXPECT_EQ(255, bamRecordImpl.MapQuality());
+            EXPECT_EQ(-1,  bamRecordImpl.MatePosition());
+            EXPECT_EQ(-1,  bamRecordImpl.MateReferenceId());
+            EXPECT_EQ(-1,  bamRecordImpl.Position());
+            EXPECT_EQ(-1,  bamRecordImpl.ReferenceId());
+            EXPECT_FALSE(bamRecordImpl.IsMapped());
+
+            const int holeNumber    = baxRecord.zmwData.holeNumber;
+            const int subreadStart  = 0;
+            const int subreadEnd    = hqStart;
+
+            const string expectedName = movieName + "/" +
+                    to_string(holeNumber)   + "/" +
+                    to_string(subreadStart) + "_" +
+                    to_string(subreadEnd);
+            EXPECT_EQ(expectedName, bamRecordImpl.Name());
+
+            using PacBio::BAM::QualityValue;
+            using PacBio::BAM::QualityValues;
+
+            const DNALength length = subreadEnd - subreadStart;
+
+            string expectedSequence;
+            expectedSequence.assign((const char*)baxRecord.seq + subreadStart, length);
+
+            const string bamSequence = bamRecord.Sequence();
+            const QualityValues bamQualities = bamRecord.Qualities();
+            EXPECT_EQ(expectedSequence, bamSequence);
+            EXPECT_TRUE(bamQualities.empty());
+
+            const QualityValues bamDeletionQVs = bamRecord.DeletionQV();
+            const QualityValues bamInsertionQVs = bamRecord.InsertionQV();
+            const QualityValues bamMergeQVs = bamRecord.MergeQV();
+            const QualityValues bamSubstitutionQVs = bamRecord.SubstitutionQV();
+
+            for (size_t i = 0; i < length; ++i) {
+                const size_t pos = subreadStart + i;
+
+                EXPECT_EQ((QualityValue)baxRecord.GetDeletionQV(pos),     bamDeletionQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetInsertionQV(pos),    bamInsertionQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetMergeQV(pos),        bamMergeQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetSubstitutionQV(pos), bamSubstitutionQVs.at(i));
+            }
+
+            if (baxRecord.deletionTag)
+            {
+                string expectedDeletionTags;
+                expectedDeletionTags.assign((char*)baxRecord.deletionTag + subreadStart,
+                                            (char*)baxRecord.deletionTag + subreadStart + length);
+                const string& bamDeletionTags = bamRecord.DeletionTag();
+                EXPECT_EQ(expectedDeletionTags, bamDeletionTags);
+            }
+
+            if (baxRecord.substitutionTag)
+            {
+                string expectedSubstitutionTags;
+                expectedSubstitutionTags.assign((char*)baxRecord.substitutionTag + subreadStart,
+                                            (char*)baxRecord.substitutionTag + subreadStart + length);
+                const string& bamSubstitutionTags = bamRecord.SubstitutionTag();
+                EXPECT_EQ(expectedSubstitutionTags, bamSubstitutionTags);
+            }
+
+            // TODO: IPDs
+            EXPECT_EQ(md5Id,        bamRecord.ReadGroupId());
+            EXPECT_EQ(movieName,    bamRecord.MovieName());
+            EXPECT_EQ(1,            bamRecord.NumPasses());
+            EXPECT_EQ(holeNumber,   bamRecord.HoleNumber());
+            EXPECT_EQ(subreadStart, bamRecord.QueryStart());
+            EXPECT_EQ(subreadEnd,   bamRecord.QueryEnd());
+            EXPECT_EQ(hqSnr,        bamRecord.SignalToNoise());
+            EXPECT_FALSE(bamRecord.HasLocalContextFlags());
+        }
+
+    }); // EXPECT_NO_THROW
+
+    // cleanup
+    EXPECT_NO_THROW(
+    {
+        baxReader.Close();
+        RemoveFile(generatedBam);
+        RemoveFile(scrapBam);
+        RemoveFile(generatedBam + ".pbi");
+        RemoveFile(scrapBam + ".pbi");
+    });
+}
diff --git a/utils/bax2bam/tests/src/test_polymerase.cpp b/utils/bax2bam/tests/src/test_polymerase.cpp
new file mode 100644
index 0000000..611962a
--- /dev/null
+++ b/utils/bax2bam/tests/src/test_polymerase.cpp
@@ -0,0 +1,236 @@
+// Author: Derek Barnett
+
+#include "TestData.h"
+#include "TestUtils.h"
+
+#include "HDFBasReader.hpp"
+#include <gtest/gtest.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/EntireFileQuery.h>
+#include <memory>
+#include <string>
+#include <vector>
+#include <cstdio>
+#include <cstdlib>
+using namespace std;
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+TEST(PolymeraseTest, EndToEnd_Single)
+{
+    // setup
+    const string movieName = "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0";
+
+    vector<string> baxFilenames;
+    baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.bax.h5");
+
+    const string generatedBam = movieName + ".polymerase.bam";
+
+    // run conversion
+    const int result = RunBax2Bam(baxFilenames, "--polymeraseread");
+    EXPECT_EQ(0, result);
+
+    {   // ensure PBI exists
+        const BamFile generatedBamFile(generatedBam);
+        EXPECT_TRUE(generatedBamFile.PacBioIndexExists());
+    }
+
+    // open BAX reader on original data
+    HDFBasReader baxReader;
+    baxReader.IncludeField("Basecall");
+    baxReader.IncludeField("DeletionQV");
+    baxReader.IncludeField("DeletionTag");
+    baxReader.IncludeField("InsertionQV");
+    baxReader.IncludeField("PreBaseFrames");
+    baxReader.IncludeField("MergeQV");
+    baxReader.IncludeField("SubstitutionQV");
+    baxReader.IncludeField("HQRegionSNR");
+    // not using SubTag or PulseWidth here
+
+    string baxBasecallerVersion;
+    string baxBindingKit;
+    string baxSequencingKit;
+
+    const int initOk = baxReader.Initialize(baxFilenames.front());
+    EXPECT_EQ(1, initOk);
+    if (initOk == 1) {
+
+        if (baxReader.scanDataReader.fileHasScanData && baxReader.scanDataReader.initializedRunInfoGroup) {
+
+            if (baxReader.scanDataReader.runInfoGroup.ContainsAttribute("BindingKit")) {
+                HDFAtom<std::string> bkAtom;
+                if (bkAtom.Initialize(baxReader.scanDataReader.runInfoGroup, "BindingKit")) {
+                    bkAtom.Read(baxBindingKit);
+                    bkAtom.dataspace.close();
+                }
+            }
+
+            if (baxReader.scanDataReader.runInfoGroup.ContainsAttribute("SequencingKit")) {
+                HDFAtom<std::string> skAtom;
+                if (skAtom.Initialize(baxReader.scanDataReader.runInfoGroup, "SequencingKit")) {
+                    skAtom.Read(baxSequencingKit);
+                    skAtom.dataspace.close();
+                }
+            }
+        }
+
+        baxReader.GetChangeListID(baxBasecallerVersion);
+    }
+
+    EXPECT_NO_THROW(
+    {
+
+        // open BAM file
+        BamFile bamFile(generatedBam);
+
+        // check BAM header information
+        const BamHeader& header = bamFile.Header();
+        EXPECT_EQ(string("1.5"),     header.Version());
+        EXPECT_EQ(string("unknown"), header.SortOrder());
+        EXPECT_EQ(string("3.0.2"),   header.PacBioBamVersion());
+        EXPECT_TRUE(header.Sequences().empty());
+        EXPECT_TRUE(header.Comments().empty());
+        ASSERT_FALSE(header.Programs().empty());
+
+        const vector<string> readGroupIds = header.ReadGroupIds();
+        ASSERT_FALSE(readGroupIds.empty());
+        const ReadGroupInfo& rg = header.ReadGroup(readGroupIds.front());
+
+        string rawId = movieName + "//POLYMERASE";
+        string md5Id;
+        MakeMD5(rawId, md5Id, 8);
+        EXPECT_EQ(md5Id, rg.Id());
+
+        EXPECT_EQ(string("PACBIO"), rg.Platform());
+        EXPECT_EQ(movieName, rg.MovieName());
+
+        EXPECT_TRUE(rg.SequencingCenter().empty());
+        EXPECT_TRUE(rg.Date().empty());
+        EXPECT_TRUE(rg.FlowOrder().empty());
+        EXPECT_TRUE(rg.KeySequence().empty());
+        EXPECT_TRUE(rg.Library().empty());
+        EXPECT_TRUE(rg.Programs().empty());
+        EXPECT_TRUE(rg.PredictedInsertSize().empty());
+        EXPECT_TRUE(rg.Sample().empty());
+
+        EXPECT_EQ("POLYMERASE", rg.ReadType());
+        EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion());
+        EXPECT_EQ(baxBindingKit, rg.BindingKit());
+        EXPECT_EQ(baxSequencingKit, rg.SequencingKit());
+        EXPECT_EQ(75, std::stod(rg.FrameRateHz()));
+        EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
+        EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG));
+        EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV));
+        EXPECT_EQ("ip", rg.BaseFeatureTag(BaseFeature::IPD));
+        EXPECT_EQ("mq", rg.BaseFeatureTag(BaseFeature::MERGE_QV));
+        EXPECT_EQ("sq", rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV));
+        EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_TAG));
+        EXPECT_EQ(FrameCodec::V1, rg.IpdCodec());
+
+        // compare 1st record from each file
+        SMRTSequence baxRecord;
+        EXPECT_TRUE(baxReader.GetNext(baxRecord) > 0);
+
+        vector<float> hqSnr;
+        hqSnr.push_back(baxRecord.HQRegionSnr('A'));
+        hqSnr.push_back(baxRecord.HQRegionSnr('C'));
+        hqSnr.push_back(baxRecord.HQRegionSnr('G'));
+        hqSnr.push_back(baxRecord.HQRegionSnr('T'));
+ 
+        EXPECT_GT(hqSnr[0], 0);
+        EXPECT_GT(hqSnr[1], 0);
+        EXPECT_GT(hqSnr[2], 0);
+        EXPECT_GT(hqSnr[3], 0);
+
+        bool firstRecord = true;
+        EntireFileQuery entireFile(bamFile);
+        for ( BamRecord& bamRecord : entireFile) {
+            if (!firstRecord)
+                break;
+            firstRecord = false;
+
+            const BamRecordImpl& bamRecordImpl = bamRecord.Impl();
+            EXPECT_EQ(4680U,bamRecordImpl.Bin());
+            EXPECT_EQ(0,   bamRecordImpl.InsertSize());
+            EXPECT_EQ(255, bamRecordImpl.MapQuality());
+            EXPECT_EQ(-1,  bamRecordImpl.MatePosition());
+            EXPECT_EQ(-1,  bamRecordImpl.MateReferenceId());
+            EXPECT_EQ(-1,  bamRecordImpl.Position());
+            EXPECT_EQ(-1,  bamRecordImpl.ReferenceId());
+            EXPECT_FALSE(bamRecordImpl.IsMapped());
+
+            const int holeNumber    = baxRecord.zmwData.holeNumber;
+            const int subreadStart  = 0;
+            const int subreadEnd    = baxRecord.length;
+
+            const string expectedName = movieName + "/" +
+                    to_string(holeNumber)   + "/" +
+                    to_string(subreadStart) + "_" +
+                    to_string(subreadEnd);
+            EXPECT_EQ(expectedName, bamRecordImpl.Name());
+
+            using PacBio::BAM::QualityValue;
+            using PacBio::BAM::QualityValues;
+
+            const DNALength length = subreadEnd - subreadStart;
+
+            string expectedSequence;
+            expectedSequence.assign((const char*)baxRecord.seq + subreadStart, length);
+
+            const string bamSequence = bamRecord.Sequence();
+            const QualityValues bamQualities = bamRecord.Qualities();
+            EXPECT_EQ(expectedSequence, bamSequence);
+            EXPECT_TRUE(bamQualities.empty());
+
+            const QualityValues bamDeletionQVs = bamRecord.DeletionQV();
+            const QualityValues bamInsertionQVs = bamRecord.InsertionQV();
+            const QualityValues bamMergeQVs = bamRecord.MergeQV();
+            const QualityValues bamSubstitutionQVs = bamRecord.SubstitutionQV();
+
+            for (size_t i = 0; i < length; ++i) {
+                const size_t pos = subreadStart + i;
+
+                EXPECT_EQ((QualityValue)baxRecord.GetDeletionQV(pos),     bamDeletionQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetInsertionQV(pos),    bamInsertionQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetMergeQV(pos),        bamMergeQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetSubstitutionQV(pos), bamSubstitutionQVs.at(i));
+            }
+
+            if (baxRecord.deletionTag)
+            {
+                string expectedDeletionTags;
+                expectedDeletionTags.assign((char*)baxRecord.deletionTag + subreadStart,
+                                            (char*)baxRecord.deletionTag + subreadStart + length);
+                const string& bamDeletionTags = bamRecord.DeletionTag();
+                EXPECT_EQ(expectedDeletionTags, bamDeletionTags);
+            }
+
+            if (baxRecord.substitutionTag)
+            {
+                string expectedSubstitutionTags;
+                expectedSubstitutionTags.assign((char*)baxRecord.substitutionTag + subreadStart,
+                                            (char*)baxRecord.substitutionTag + subreadStart + length);
+                const string& bamSubstitutionTags = bamRecord.SubstitutionTag();
+                EXPECT_EQ(expectedSubstitutionTags, bamSubstitutionTags);
+            }
+
+            // TODO: IPDs
+
+            EXPECT_EQ(md5Id,        bamRecord.ReadGroupId());
+            EXPECT_EQ(movieName,    bamRecord.MovieName());
+            EXPECT_EQ(1,            bamRecord.NumPasses());
+            EXPECT_EQ(holeNumber,   bamRecord.HoleNumber());
+            EXPECT_EQ(subreadStart, bamRecord.QueryStart());
+            EXPECT_EQ(subreadEnd,   bamRecord.QueryEnd());
+            EXPECT_EQ(hqSnr,        bamRecord.SignalToNoise());
+            EXPECT_FALSE(bamRecord.HasLocalContextFlags());
+        }
+
+        // cleanup
+        baxReader.Close();
+        RemoveFile(generatedBam);
+        RemoveFile(generatedBam + ".pbi");
+
+    }); // EXPECT_NO_THROW
+}
diff --git a/utils/bax2bam/tests/src/test_subreads.cpp b/utils/bax2bam/tests/src/test_subreads.cpp
new file mode 100644
index 0000000..681a17a
--- /dev/null
+++ b/utils/bax2bam/tests/src/test_subreads.cpp
@@ -0,0 +1,410 @@
+// Author: Derek Barnett
+
+#include "TestData.h"
+#include "TestUtils.h"
+
+#include "HDFBasReader.hpp"
+#include "alignment/utils/RegionUtils.hpp"
+#include "hdf/HDFRegionTableReader.hpp"
+#include <gtest/gtest.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/EntireFileQuery.h>
+#include <memory>
+#include <string>
+#include <vector>
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+
+using namespace std;
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+// TODO: much of this is copypasta from src/SubreadConverter.cpp
+struct SubreadInterval
+{
+    size_t Start;
+    size_t End;
+    PacBio::BAM::LocalContextFlags LocalContextFlags;
+
+    SubreadInterval()
+        : Start{0}
+        , End{0}
+        , LocalContextFlags{NO_LOCAL_CONTEXT}
+    { }
+
+    SubreadInterval(size_t start, size_t end, bool adapterBefore = false, bool adapterAfter = false)
+        : Start{start}
+        , End{end}
+        , LocalContextFlags{(adapterBefore ? ADAPTER_BEFORE : NO_LOCAL_CONTEXT) |
+                            (adapterAfter  ? ADAPTER_AFTER  : NO_LOCAL_CONTEXT)}
+    { }
+};
+
+inline
+bool RegionComparer(const RegionAnnotation& lhs, const RegionAnnotation& rhs)
+{
+    constexpr int HoleNumber  = RegionAnnotation::HOLENUMBERCOL;
+    constexpr int RegionType  = RegionAnnotation::REGIONTYPEINDEXCOL;
+    constexpr int RegionStart = RegionAnnotation::REGIONSTARTCOL;
+
+    if (lhs.row[HoleNumber] < rhs.row[HoleNumber])
+        return true;
+    else if (lhs.row[HoleNumber] == rhs.row[HoleNumber])
+    {
+        if (lhs.row[RegionType] < rhs.row[RegionType])
+            return true;
+        else if (lhs.row[RegionType] == rhs.row[RegionType])
+            return lhs.row[RegionStart] < rhs.row[RegionStart];
+    }
+    return false;
+}
+
+void ComputeSubreadIntervals(vector<SubreadInterval>* const intervals,
+                             RegionTable& regionTable,
+                             const unsigned holeNumber)
+{
+    constexpr int RegionStart = RegionAnnotation::REGIONSTARTCOL;
+    constexpr int RegionEnd   = RegionAnnotation::REGIONENDCOL;
+
+    // clear the input first
+    intervals->clear();
+
+    RegionAnnotations zmwRegions = regionTable[holeNumber];
+
+    // Has non-empty HQRegion or not?
+    if (not zmwRegions.HasHQRegion())
+        return;
+        //throw runtime_error("could not find HQRegion for ZMW " + to_string(holeNumber));
+
+    size_t hqStart = zmwRegions.HQStart();
+    size_t hqEnd   = zmwRegions.HQEnd();
+
+    if (hqEnd <= hqStart)
+        return;
+
+    // this logic mirrors that in the C# codebase for DelimitedSeqRegions rather
+    // than what's in src/SubreadConverter.cpp for verification purposes
+    ReadInterval const * lastAdapter = nullptr;
+    bool prevIsAdapter = false;
+    size_t regStart = hqStart;
+    vector<ReadInterval> adapters = zmwRegions.AdapterIntervals();
+    for (size_t i = 0; i < adapters.size(); i++) { 
+        ReadInterval adapter = adapters[i];
+        size_t adapterStart = adapter.start;
+        size_t adapterEnd   = adapter.end;
+
+        if (adapterEnd < hqStart)
+            continue;
+
+        if (adapterStart > hqEnd)
+            break;
+
+        if (prevIsAdapter)
+            intervals->emplace_back(SubreadInterval(lastAdapter->end, adapterStart, true, true));
+        else if (regStart < adapterStart)
+            intervals->emplace_back(SubreadInterval(regStart, adapterStart, false, true));
+
+        lastAdapter = &adapters[i];
+        prevIsAdapter = true;
+        regStart = adapterEnd;
+    }
+
+    if (prevIsAdapter)
+        intervals->emplace_back(SubreadInterval(lastAdapter->end, hqEnd, true, false));
+    else if (regStart < hqEnd)
+        intervals->emplace_back(SubreadInterval(regStart, hqEnd, false, false));
+}
+
+
+TEST(SubreadsTest, EndToEnd_Multiple)
+{
+    // setup
+    const string movieName = "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0";
+
+    vector<string> baxFilenames;
+    baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.bax.h5");
+
+    const string generatedBam = movieName + ".subreads.bam";
+    const string scrapBam = movieName + ".scraps.bam";
+
+    // run conversion
+    const int result = RunBax2Bam(baxFilenames, "--subread");
+    EXPECT_EQ(0, result);
+
+    {   // ensure PBIs exist
+        const BamFile generatedBamFile(generatedBam);
+        const BamFile scrapsBamFile(scrapBam);
+        EXPECT_TRUE(generatedBamFile.PacBioIndexExists());
+        EXPECT_TRUE(scrapsBamFile.PacBioIndexExists());
+    }
+
+    // open BAX reader on original data
+    HDFBasReader baxReader;
+    baxReader.IncludeField("Basecall");
+    baxReader.IncludeField("DeletionQV");
+    baxReader.IncludeField("DeletionTag");
+    baxReader.IncludeField("InsertionQV");
+    baxReader.IncludeField("PreBaseFrames");
+    baxReader.IncludeField("MergeQV");
+    baxReader.IncludeField("SubstitutionQV");
+    baxReader.IncludeField("HQRegionSNR");
+    // not using SubTag or PulseWidth here
+
+    string baxBasecallerVersion;
+    string baxBindingKit;
+    string baxSequencingKit;
+
+    const int initOk = baxReader.Initialize(baxFilenames.front());
+    EXPECT_EQ(1, initOk);
+    if (initOk == 1) {
+
+        if (baxReader.scanDataReader.fileHasScanData && baxReader.scanDataReader.initializedRunInfoGroup) {
+
+            if (baxReader.scanDataReader.runInfoGroup.ContainsAttribute("BindingKit")) {
+                HDFAtom<std::string> bkAtom;
+                if (bkAtom.Initialize(baxReader.scanDataReader.runInfoGroup, "BindingKit")) {
+                    bkAtom.Read(baxBindingKit);
+                    bkAtom.dataspace.close();
+                }
+            }
+
+            if (baxReader.scanDataReader.runInfoGroup.ContainsAttribute("SequencingKit")) {
+                HDFAtom<std::string> skAtom;
+                if (skAtom.Initialize(baxReader.scanDataReader.runInfoGroup, "SequencingKit")) {
+                    skAtom.Read(baxSequencingKit);
+                    skAtom.dataspace.close();
+                }
+            }
+        }
+
+        baxReader.GetChangeListID(baxBasecallerVersion);
+    }
+
+    // read region table info
+    std::unique_ptr<HDFRegionTableReader> const regionTableReader(new HDFRegionTableReader);
+    RegionTable regionTable;
+    std::string fn = baxFilenames.front();
+    EXPECT_TRUE(regionTableReader->Initialize(fn) != 0);
+    regionTable.Reset();
+    regionTableReader->ReadTable(regionTable);
+    regionTableReader->Close();
+
+    EXPECT_NO_THROW(
+    {
+        // open BAM file
+        BamFile bamFile(generatedBam);
+
+        // check BAM header information
+        const BamHeader& header = bamFile.Header();
+        EXPECT_EQ(string("1.5"),     header.Version());
+        EXPECT_EQ(string("unknown"), header.SortOrder());
+        EXPECT_EQ(string("3.0.2"),   header.PacBioBamVersion());
+        EXPECT_TRUE(header.Sequences().empty());
+        EXPECT_TRUE(header.Comments().empty());
+        ASSERT_FALSE(header.Programs().empty());
+
+        const vector<string> readGroupIds = header.ReadGroupIds();
+        ASSERT_FALSE(readGroupIds.empty());
+        const ReadGroupInfo& rg = header.ReadGroup(readGroupIds.front());
+
+        string rawId = movieName + "//SUBREAD";
+        string md5Id;
+        MakeMD5(rawId, md5Id, 8);
+        EXPECT_EQ(md5Id, rg.Id());
+
+        EXPECT_EQ(string("PACBIO"), rg.Platform());
+        EXPECT_EQ(movieName, rg.MovieName());
+
+        EXPECT_TRUE(rg.SequencingCenter().empty());
+        EXPECT_TRUE(rg.Date().empty());
+        EXPECT_TRUE(rg.FlowOrder().empty());
+        EXPECT_TRUE(rg.KeySequence().empty());
+        EXPECT_TRUE(rg.Library().empty());
+        EXPECT_TRUE(rg.Programs().empty());
+        EXPECT_TRUE(rg.PredictedInsertSize().empty());
+        EXPECT_TRUE(rg.Sample().empty());
+
+        EXPECT_EQ("SUBREAD", rg.ReadType());
+        EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion());
+        EXPECT_EQ(baxBindingKit, rg.BindingKit());
+        EXPECT_EQ(baxSequencingKit, rg.SequencingKit());
+        EXPECT_EQ(75, std::stod(rg.FrameRateHz()));
+        EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
+        EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG));
+        EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV));
+        EXPECT_EQ("ip", rg.BaseFeatureTag(BaseFeature::IPD));
+        EXPECT_EQ("mq", rg.BaseFeatureTag(BaseFeature::MERGE_QV));
+        EXPECT_EQ("sq", rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV));
+        EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_TAG));
+        EXPECT_EQ(FrameCodec::V1, rg.IpdCodec());
+
+        // compare 1st record from each file
+        SMRTSequence baxRecord;
+        auto holeNumber = 0;
+        vector<float> hqSnr;
+
+        size_t intervalIdx = 0;
+        vector<SubreadInterval> subreadIntervals;
+
+        size_t numTested = 0;
+        EntireFileQuery entireFile(bamFile);
+        for (BamRecord& bamRecord : entireFile) {
+            if (intervalIdx >= subreadIntervals.size())
+            {
+                while (baxReader.GetNext(baxRecord))
+                {
+                    holeNumber  = baxRecord.zmwData.holeNumber;
+
+                    ComputeSubreadIntervals(&subreadIntervals, regionTable, holeNumber);
+
+                    /* this is for debugging subread interval problems
+                    int hqStart = 0;
+                    int hqEnd = 0;
+                    int hqScore = 0;
+                    LookupHQRegion(holeNumber,
+                                   regionTable,
+                                   hqStart,
+                                   hqEnd,
+                                   hqScore);
+
+                    vector<ReadInterval> subreadIntervals_;
+                    CollectSubreadIntervals(baxRecord, &regionTable, subreadIntervals_);
+
+                    for (int i = subreadIntervals_.size() - 1; i >= 0; --i)
+                    {
+                        auto& in = subreadIntervals_[i];
+                        int inStart = max(hqStart, in.start);
+                        int inEnd   = min(hqEnd,   in.end);
+                        if (inEnd <= inStart)
+                            subreadIntervals_.erase(subreadIntervals_.begin() + i);
+                    }
+
+                    cerr << "hqRegion: " << hqStart << ", " << hqEnd << endl;
+                    cerr << "subreadRegions:" << endl;
+                    for (const auto& in : subreadIntervals_)
+                        cerr << "  l, r: " << in.start << ", " << in.end << endl;
+
+                    cerr << "adapterDerived:" << endl;
+                    for (const auto& in : subreadIntervals)
+                        cerr << "  l, r: " << in.Start << ", " << in.End << endl;
+
+                    cerr << endl;
+                    // */
+
+                    if (subreadIntervals.empty())
+                        continue;
+
+                    intervalIdx = 0;
+
+                    hqSnr.clear();
+                    hqSnr.push_back(baxRecord.HQRegionSnr('A'));
+                    hqSnr.push_back(baxRecord.HQRegionSnr('C'));
+                    hqSnr.push_back(baxRecord.HQRegionSnr('G'));
+                    hqSnr.push_back(baxRecord.HQRegionSnr('T'));
+
+                    EXPECT_GT(hqSnr[0], 0);
+                    EXPECT_GT(hqSnr[1], 0);
+                    EXPECT_GT(hqSnr[2], 0);
+                    EXPECT_GT(hqSnr[3], 0);
+
+                    goto compare;
+                }
+
+                goto cleanup;
+            }
+
+compare:
+            const BamRecordImpl& bamRecordImpl = bamRecord.Impl();
+            EXPECT_EQ(4680U,bamRecordImpl.Bin());
+            EXPECT_EQ(0,   bamRecordImpl.InsertSize());
+            EXPECT_EQ(255, bamRecordImpl.MapQuality());
+            EXPECT_EQ(-1,  bamRecordImpl.MatePosition());
+            EXPECT_EQ(-1,  bamRecordImpl.MateReferenceId());
+            EXPECT_EQ(-1,  bamRecordImpl.Position());
+            EXPECT_EQ(-1,  bamRecordImpl.ReferenceId());
+            EXPECT_FALSE(bamRecordImpl.IsMapped());
+
+            const int subreadStart = subreadIntervals[intervalIdx].Start;
+            const int subreadEnd   = subreadIntervals[intervalIdx].End;
+
+            const string expectedName = movieName + "/" +
+                    to_string(holeNumber)   + "/" +
+                    to_string(subreadStart) + "_" +
+                    to_string(subreadEnd);
+            EXPECT_EQ(expectedName, bamRecordImpl.Name());
+
+            using PacBio::BAM::QualityValue;
+            using PacBio::BAM::QualityValues;
+
+            const DNALength length = subreadEnd - subreadStart;
+
+            string expectedSequence;
+            expectedSequence.assign((const char*)baxRecord.seq + subreadStart, length);
+
+            const string bamSequence = bamRecord.Sequence();
+            const QualityValues bamQualities = bamRecord.Qualities();
+            EXPECT_EQ(expectedSequence, bamSequence);
+            EXPECT_TRUE(bamQualities.empty());
+
+            const QualityValues bamDeletionQVs = bamRecord.DeletionQV();
+            const QualityValues bamInsertionQVs = bamRecord.InsertionQV();
+            const QualityValues bamMergeQVs = bamRecord.MergeQV();
+            const QualityValues bamSubstitutionQVs = bamRecord.SubstitutionQV();
+
+            for (size_t i = 0; i < length; ++i) {
+                const size_t pos = subreadStart + i;
+
+                EXPECT_EQ((QualityValue)baxRecord.GetDeletionQV(pos),     bamDeletionQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetInsertionQV(pos),    bamInsertionQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetMergeQV(pos),        bamMergeQVs.at(i));
+                EXPECT_EQ((QualityValue)baxRecord.GetSubstitutionQV(pos), bamSubstitutionQVs.at(i));
+            }
+
+            if (baxRecord.deletionTag)
+            {
+                string expectedDeletionTags;
+                expectedDeletionTags.assign((char*)baxRecord.deletionTag + subreadStart,
+                                            (char*)baxRecord.deletionTag + subreadStart + length);
+                const string& bamDeletionTags = bamRecord.DeletionTag();
+                EXPECT_EQ(expectedDeletionTags, bamDeletionTags);
+            }
+
+            if (baxRecord.substitutionTag)
+            {
+                string expectedSubstitutionTags;
+                expectedSubstitutionTags.assign((char*)baxRecord.substitutionTag + subreadStart,
+                                            (char*)baxRecord.substitutionTag + subreadStart + length);
+                const string& bamSubstitutionTags = bamRecord.SubstitutionTag();
+                EXPECT_EQ(expectedSubstitutionTags, bamSubstitutionTags);
+            }
+
+            // TODO: IPDs
+            const LocalContextFlags ctxFlags = subreadIntervals[intervalIdx].LocalContextFlags;
+
+            EXPECT_EQ(md5Id,        bamRecord.ReadGroupId());
+            EXPECT_EQ(movieName,    bamRecord.MovieName());
+            EXPECT_EQ(1,            bamRecord.NumPasses());
+            EXPECT_EQ(holeNumber,   bamRecord.HoleNumber());
+            EXPECT_EQ(subreadStart, bamRecord.QueryStart());
+            EXPECT_EQ(subreadEnd,   bamRecord.QueryEnd());
+            EXPECT_EQ(hqSnr,        bamRecord.SignalToNoise());
+            EXPECT_EQ(ctxFlags,     bamRecord.LocalContextFlags());
+
+            numTested++;
+            intervalIdx++;
+        }
+
+cleanup:
+        EXPECT_GT(numTested, 1UL);
+
+        // cleanup
+        baxReader.Close();
+        RemoveFile(generatedBam);
+        RemoveFile(scrapBam);
+        RemoveFile(generatedBam + ".pbi");
+        RemoveFile(scrapBam + ".pbi");
+
+    }); // EXPECT_NO_THROW
+}
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/fused-src/gtest/.deps/test_fused_gtest_test-gtest-all.Po b/utils/bax2bam/third-party/gtest-1.7.0/fused-src/gtest/.deps/test_fused_gtest_test-gtest-all.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/fused-src/gtest/.deps/test_fused_gtest_test-gtest-all.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/fused-src/gtest/.deps/test_fused_gtest_test-gtest_main.Po b/utils/bax2bam/third-party/gtest-1.7.0/fused-src/gtest/.deps/test_fused_gtest_test-gtest_main.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/fused-src/gtest/.deps/test_fused_gtest_test-gtest_main.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/lib/.dirstamp b/utils/bax2bam/third-party/gtest-1.7.0/lib/.dirstamp
new file mode 100644
index 0000000..e69de29
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/.dirstamp b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/.dirstamp
new file mode 100644
index 0000000..e69de29
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample1.Plo b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample1.Plo
new file mode 100644
index 0000000..d8d6134
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample1.Plo
@@ -0,0 +1,3 @@
+samples/sample1.lo: samples/sample1.cc samples/sample1.h
+
+samples/sample1.h:
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample10_unittest.Po b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample10_unittest.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample10_unittest.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample1_unittest.Po b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample1_unittest.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample1_unittest.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample2.Plo b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample2.Plo
new file mode 100644
index 0000000..793d7ba
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample2.Plo
@@ -0,0 +1,29 @@
+samples/sample2.lo: samples/sample2.cc samples/sample2.h \
+ /usr/include/string.h /usr/include/features.h \
+ /usr/include/bits/predefs.h /usr/include/sys/cdefs.h \
+ /usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
+ /usr/include/gnu/stubs-64.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stddef.h \
+ /usr/include/xlocale.h /usr/include/bits/string3.h
+
+samples/sample2.h:
+
+/usr/include/string.h:
+
+/usr/include/features.h:
+
+/usr/include/bits/predefs.h:
+
+/usr/include/sys/cdefs.h:
+
+/usr/include/bits/wordsize.h:
+
+/usr/include/gnu/stubs.h:
+
+/usr/include/gnu/stubs-64.h:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stddef.h:
+
+/usr/include/xlocale.h:
+
+/usr/include/bits/string3.h:
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample4.Plo b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample4.Plo
new file mode 100644
index 0000000..1cf00b2
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/sample4.Plo
@@ -0,0 +1,48 @@
+samples/sample4.lo: samples/sample4.cc /usr/include/stdio.h \
+ /usr/include/features.h /usr/include/bits/predefs.h \
+ /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
+ /usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stddef.h \
+ /usr/include/bits/types.h /usr/include/bits/typesizes.h \
+ /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stdarg.h \
+ /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \
+ /usr/include/bits/stdio.h /usr/include/bits/stdio2.h samples/sample4.h
+
+/usr/include/stdio.h:
+
+/usr/include/features.h:
+
+/usr/include/bits/predefs.h:
+
+/usr/include/sys/cdefs.h:
+
+/usr/include/bits/wordsize.h:
+
+/usr/include/gnu/stubs.h:
+
+/usr/include/gnu/stubs-64.h:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stddef.h:
+
+/usr/include/bits/types.h:
+
+/usr/include/bits/typesizes.h:
+
+/usr/include/libio.h:
+
+/usr/include/_G_config.h:
+
+/usr/include/wchar.h:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stdarg.h:
+
+/usr/include/bits/stdio_lim.h:
+
+/usr/include/bits/sys_errlist.h:
+
+/usr/include/bits/stdio.h:
+
+/usr/include/bits/stdio2.h:
+
+samples/sample4.h:
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/test_fused_gtest_test-sample1.Po b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/test_fused_gtest_test-sample1.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/test_fused_gtest_test-sample1.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/test_fused_gtest_test-sample1_unittest.Po b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/test_fused_gtest_test-sample1_unittest.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/samples/.deps/test_fused_gtest_test-sample1_unittest.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/samples/.dirstamp b/utils/bax2bam/third-party/gtest-1.7.0/samples/.dirstamp
new file mode 100644
index 0000000..e69de29
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/src/.deps/.dirstamp b/utils/bax2bam/third-party/gtest-1.7.0/src/.deps/.dirstamp
new file mode 100644
index 0000000..e69de29
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/src/.deps/gtest-all.Plo b/utils/bax2bam/third-party/gtest-1.7.0/src/.deps/gtest-all.Plo
new file mode 100644
index 0000000..1725dfe
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/src/.deps/gtest-all.Plo
@@ -0,0 +1,617 @@
+src/gtest-all.lo: src/gtest-all.cc include/gtest/gtest.h \
+ /usr/include/c++/4.4/limits \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/c++config.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/os_defines.h \
+ /usr/include/features.h /usr/include/bits/predefs.h \
+ /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
+ /usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/cpu_defines.h \
+ /usr/include/c++/4.4/ostream /usr/include/c++/4.4/ios \
+ /usr/include/c++/4.4/iosfwd /usr/include/c++/4.4/bits/stringfwd.h \
+ /usr/include/c++/4.4/bits/postypes.h /usr/include/c++/4.4/cwchar \
+ /usr/include/c++/4.4/cstddef \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stddef.h \
+ /usr/include/wchar.h /usr/include/stdio.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stdarg.h \
+ /usr/include/bits/wchar.h /usr/include/xlocale.h \
+ /usr/include/bits/wchar2.h /usr/include/c++/4.4/exception \
+ /usr/include/c++/4.4/bits/char_traits.h \
+ /usr/include/c++/4.4/bits/stl_algobase.h \
+ /usr/include/c++/4.4/bits/functexcept.h \
+ /usr/include/c++/4.4/exception_defines.h \
+ /usr/include/c++/4.4/bits/cpp_type_traits.h \
+ /usr/include/c++/4.4/ext/type_traits.h \
+ /usr/include/c++/4.4/ext/numeric_traits.h \
+ /usr/include/c++/4.4/bits/stl_pair.h /usr/include/c++/4.4/bits/move.h \
+ /usr/include/c++/4.4/bits/concept_check.h \
+ /usr/include/c++/4.4/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.4/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.4/bits/stl_iterator.h \
+ /usr/include/c++/4.4/debug/debug.h /usr/include/c++/4.4/bits/localefwd.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/c++locale.h \
+ /usr/include/c++/4.4/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.4/cctype \
+ /usr/include/ctype.h /usr/include/bits/types.h \
+ /usr/include/bits/typesizes.h /usr/include/endian.h \
+ /usr/include/bits/endian.h /usr/include/bits/byteswap.h \
+ /usr/include/c++/4.4/bits/ios_base.h \
+ /usr/include/c++/4.4/ext/atomicity.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/gthr.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
+ /usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
+ /usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
+ /usr/include/bits/setjmp.h /usr/include/unistd.h \
+ /usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
+ /usr/include/bits/confname.h /usr/include/getopt.h \
+ /usr/include/bits/unistd.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/atomic_word.h \
+ /usr/include/c++/4.4/bits/locale_classes.h /usr/include/c++/4.4/string \
+ /usr/include/c++/4.4/bits/allocator.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/c++allocator.h \
+ /usr/include/c++/4.4/ext/new_allocator.h /usr/include/c++/4.4/new \
+ /usr/include/c++/4.4/bits/ostream_insert.h \
+ /usr/include/c++/4.4/cxxabi-forced.h \
+ /usr/include/c++/4.4/bits/stl_function.h \
+ /usr/include/c++/4.4/backward/binders.h \
+ /usr/include/c++/4.4/bits/basic_string.h \
+ /usr/include/c++/4.4/initializer_list \
+ /usr/include/c++/4.4/bits/basic_string.tcc \
+ /usr/include/c++/4.4/bits/locale_classes.tcc \
+ /usr/include/c++/4.4/streambuf /usr/include/c++/4.4/bits/streambuf.tcc \
+ /usr/include/c++/4.4/bits/basic_ios.h \
+ /usr/include/c++/4.4/bits/locale_facets.h /usr/include/c++/4.4/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/ctype_base.h \
+ /usr/include/c++/4.4/bits/streambuf_iterator.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/ctype_inline.h \
+ /usr/include/c++/4.4/bits/locale_facets.tcc \
+ /usr/include/c++/4.4/bits/basic_ios.tcc \
+ /usr/include/c++/4.4/bits/ostream.tcc /usr/include/c++/4.4/vector \
+ /usr/include/c++/4.4/bits/stl_construct.h \
+ /usr/include/c++/4.4/bits/stl_uninitialized.h \
+ /usr/include/c++/4.4/bits/stl_vector.h \
+ /usr/include/c++/4.4/bits/stl_bvector.h \
+ /usr/include/c++/4.4/bits/vector.tcc \
+ include/gtest/internal/gtest-internal.h \
+ include/gtest/internal/gtest-port.h /usr/include/stdlib.h \
+ /usr/include/bits/waitflags.h /usr/include/bits/waitstatus.h \
+ /usr/include/sys/types.h /usr/include/sys/select.h \
+ /usr/include/bits/select.h /usr/include/sys/sysmacros.h \
+ /usr/include/alloca.h /usr/include/bits/stdlib.h /usr/include/libio.h \
+ /usr/include/_G_config.h /usr/include/bits/stdio_lim.h \
+ /usr/include/bits/sys_errlist.h /usr/include/bits/stdio.h \
+ /usr/include/bits/stdio2.h /usr/include/string.h \
+ /usr/include/bits/string3.h /usr/include/sys/stat.h \
+ /usr/include/bits/stat.h /usr/include/c++/4.4/iostream \
+ /usr/include/c++/4.4/istream /usr/include/c++/4.4/bits/istream.tcc \
+ /usr/include/c++/4.4/sstream /usr/include/c++/4.4/bits/sstream.tcc \
+ /usr/include/strings.h /usr/include/regex.h \
+ /usr/include/gnu/option-groups.h /usr/include/c++/4.4/typeinfo \
+ /usr/include/c++/4.4/tr1/tuple /usr/include/c++/4.4/utility \
+ /usr/include/c++/4.4/bits/stl_relops.h /usr/include/sys/wait.h \
+ /usr/include/bits/signum.h /usr/include/bits/siginfo.h \
+ /usr/include/bits/sigaction.h /usr/include/bits/sigcontext.h \
+ /usr/include/bits/sigstack.h /usr/include/sys/ucontext.h \
+ /usr/include/bits/sigthread.h /usr/include/sys/resource.h \
+ /usr/include/bits/resource.h /usr/include/c++/4.4/stdexcept \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/float.h \
+ /usr/include/c++/4.4/iomanip /usr/include/c++/4.4/set \
+ /usr/include/c++/4.4/bits/stl_tree.h /usr/include/c++/4.4/bits/stl_set.h \
+ /usr/include/c++/4.4/bits/stl_multiset.h include/gtest/gtest-message.h \
+ include/gtest/internal/gtest-string.h \
+ include/gtest/internal/gtest-filepath.h \
+ include/gtest/internal/gtest-type-util.h /usr/include/c++/4.4/cxxabi.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/cxxabi_tweaks.h \
+ include/gtest/gtest-death-test.h \
+ include/gtest/internal/gtest-death-test-internal.h \
+ include/gtest/gtest-param-test.h \
+ include/gtest/internal/gtest-param-util.h /usr/include/c++/4.4/iterator \
+ /usr/include/c++/4.4/bits/stream_iterator.h \
+ include/gtest/internal/gtest-linked_ptr.h /usr/include/assert.h \
+ include/gtest/gtest-printers.h \
+ include/gtest/internal/gtest-param-util-generated.h \
+ include/gtest/gtest_prod.h include/gtest/gtest-test-part.h \
+ include/gtest/gtest-typed-test.h include/gtest/gtest_pred_impl.h \
+ src/gtest.cc include/gtest/gtest-spi.h /usr/include/math.h \
+ /usr/include/bits/huge_val.h /usr/include/bits/huge_valf.h \
+ /usr/include/bits/huge_vall.h /usr/include/bits/inf.h \
+ /usr/include/bits/nan.h /usr/include/bits/mathdef.h \
+ /usr/include/bits/mathcalls.h /usr/include/bits/mathinline.h \
+ /usr/include/c++/4.4/algorithm /usr/include/c++/4.4/bits/stl_algo.h \
+ /usr/include/c++/4.4/cstdlib /usr/include/c++/4.4/bits/algorithmfwd.h \
+ /usr/include/c++/4.4/bits/stl_heap.h \
+ /usr/include/c++/4.4/bits/stl_tempbuf.h /usr/include/fcntl.h \
+ /usr/include/bits/fcntl.h /usr/include/bits/uio.h \
+ /usr/include/bits/fcntl2.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include-fixed/limits.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include-fixed/syslimits.h \
+ /usr/include/limits.h /usr/include/bits/posix1_lim.h \
+ /usr/include/bits/local_lim.h /usr/include/linux/limits.h \
+ /usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
+ /usr/include/sys/mman.h /usr/include/bits/mman.h /usr/include/sys/time.h \
+ /usr/include/arpa/inet.h /usr/include/netinet/in.h /usr/include/stdint.h \
+ /usr/include/sys/socket.h /usr/include/sys/uio.h \
+ /usr/include/bits/socket.h /usr/include/bits/sockaddr.h \
+ /usr/include/asm/socket.h /usr/include/asm-generic/socket.h \
+ /usr/include/asm/sockios.h /usr/include/asm-generic/sockios.h \
+ /usr/include/bits/socket2.h /usr/include/bits/in.h /usr/include/netdb.h \
+ /usr/include/rpc/netdb.h /usr/include/bits/netdb.h \
+ src/gtest-internal-inl.h /usr/include/errno.h /usr/include/bits/errno.h \
+ /usr/include/linux/errno.h /usr/include/asm/errno.h \
+ /usr/include/asm-generic/errno.h /usr/include/asm-generic/errno-base.h \
+ src/gtest-death-test.cc src/gtest-filepath.cc \
+ /usr/include/c++/4.4/climits src/gtest-port.cc src/gtest-printers.cc \
+ src/gtest-test-part.cc src/gtest-typed-test.cc
+
+include/gtest/gtest.h:
+
+/usr/include/c++/4.4/limits:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/c++config.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/os_defines.h:
+
+/usr/include/features.h:
+
+/usr/include/bits/predefs.h:
+
+/usr/include/sys/cdefs.h:
+
+/usr/include/bits/wordsize.h:
+
+/usr/include/gnu/stubs.h:
+
+/usr/include/gnu/stubs-64.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/cpu_defines.h:
+
+/usr/include/c++/4.4/ostream:
+
+/usr/include/c++/4.4/ios:
+
+/usr/include/c++/4.4/iosfwd:
+
+/usr/include/c++/4.4/bits/stringfwd.h:
+
+/usr/include/c++/4.4/bits/postypes.h:
+
+/usr/include/c++/4.4/cwchar:
+
+/usr/include/c++/4.4/cstddef:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stddef.h:
+
+/usr/include/wchar.h:
+
+/usr/include/stdio.h:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stdarg.h:
+
+/usr/include/bits/wchar.h:
+
+/usr/include/xlocale.h:
+
+/usr/include/bits/wchar2.h:
+
+/usr/include/c++/4.4/exception:
+
+/usr/include/c++/4.4/bits/char_traits.h:
+
+/usr/include/c++/4.4/bits/stl_algobase.h:
+
+/usr/include/c++/4.4/bits/functexcept.h:
+
+/usr/include/c++/4.4/exception_defines.h:
+
+/usr/include/c++/4.4/bits/cpp_type_traits.h:
+
+/usr/include/c++/4.4/ext/type_traits.h:
+
+/usr/include/c++/4.4/ext/numeric_traits.h:
+
+/usr/include/c++/4.4/bits/stl_pair.h:
+
+/usr/include/c++/4.4/bits/move.h:
+
+/usr/include/c++/4.4/bits/concept_check.h:
+
+/usr/include/c++/4.4/bits/stl_iterator_base_types.h:
+
+/usr/include/c++/4.4/bits/stl_iterator_base_funcs.h:
+
+/usr/include/c++/4.4/bits/stl_iterator.h:
+
+/usr/include/c++/4.4/debug/debug.h:
+
+/usr/include/c++/4.4/bits/localefwd.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/c++locale.h:
+
+/usr/include/c++/4.4/clocale:
+
+/usr/include/locale.h:
+
+/usr/include/bits/locale.h:
+
+/usr/include/c++/4.4/cctype:
+
+/usr/include/ctype.h:
+
+/usr/include/bits/types.h:
+
+/usr/include/bits/typesizes.h:
+
+/usr/include/endian.h:
+
+/usr/include/bits/endian.h:
+
+/usr/include/bits/byteswap.h:
+
+/usr/include/c++/4.4/bits/ios_base.h:
+
+/usr/include/c++/4.4/ext/atomicity.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/gthr.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/gthr-default.h:
+
+/usr/include/pthread.h:
+
+/usr/include/sched.h:
+
+/usr/include/time.h:
+
+/usr/include/bits/sched.h:
+
+/usr/include/bits/time.h:
+
+/usr/include/signal.h:
+
+/usr/include/bits/sigset.h:
+
+/usr/include/bits/pthreadtypes.h:
+
+/usr/include/bits/setjmp.h:
+
+/usr/include/unistd.h:
+
+/usr/include/bits/posix_opt.h:
+
+/usr/include/bits/environments.h:
+
+/usr/include/bits/confname.h:
+
+/usr/include/getopt.h:
+
+/usr/include/bits/unistd.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/atomic_word.h:
+
+/usr/include/c++/4.4/bits/locale_classes.h:
+
+/usr/include/c++/4.4/string:
+
+/usr/include/c++/4.4/bits/allocator.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/c++allocator.h:
+
+/usr/include/c++/4.4/ext/new_allocator.h:
+
+/usr/include/c++/4.4/new:
+
+/usr/include/c++/4.4/bits/ostream_insert.h:
+
+/usr/include/c++/4.4/cxxabi-forced.h:
+
+/usr/include/c++/4.4/bits/stl_function.h:
+
+/usr/include/c++/4.4/backward/binders.h:
+
+/usr/include/c++/4.4/bits/basic_string.h:
+
+/usr/include/c++/4.4/initializer_list:
+
+/usr/include/c++/4.4/bits/basic_string.tcc:
+
+/usr/include/c++/4.4/bits/locale_classes.tcc:
+
+/usr/include/c++/4.4/streambuf:
+
+/usr/include/c++/4.4/bits/streambuf.tcc:
+
+/usr/include/c++/4.4/bits/basic_ios.h:
+
+/usr/include/c++/4.4/bits/locale_facets.h:
+
+/usr/include/c++/4.4/cwctype:
+
+/usr/include/wctype.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/ctype_base.h:
+
+/usr/include/c++/4.4/bits/streambuf_iterator.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/ctype_inline.h:
+
+/usr/include/c++/4.4/bits/locale_facets.tcc:
+
+/usr/include/c++/4.4/bits/basic_ios.tcc:
+
+/usr/include/c++/4.4/bits/ostream.tcc:
+
+/usr/include/c++/4.4/vector:
+
+/usr/include/c++/4.4/bits/stl_construct.h:
+
+/usr/include/c++/4.4/bits/stl_uninitialized.h:
+
+/usr/include/c++/4.4/bits/stl_vector.h:
+
+/usr/include/c++/4.4/bits/stl_bvector.h:
+
+/usr/include/c++/4.4/bits/vector.tcc:
+
+include/gtest/internal/gtest-internal.h:
+
+include/gtest/internal/gtest-port.h:
+
+/usr/include/stdlib.h:
+
+/usr/include/bits/waitflags.h:
+
+/usr/include/bits/waitstatus.h:
+
+/usr/include/sys/types.h:
+
+/usr/include/sys/select.h:
+
+/usr/include/bits/select.h:
+
+/usr/include/sys/sysmacros.h:
+
+/usr/include/alloca.h:
+
+/usr/include/bits/stdlib.h:
+
+/usr/include/libio.h:
+
+/usr/include/_G_config.h:
+
+/usr/include/bits/stdio_lim.h:
+
+/usr/include/bits/sys_errlist.h:
+
+/usr/include/bits/stdio.h:
+
+/usr/include/bits/stdio2.h:
+
+/usr/include/string.h:
+
+/usr/include/bits/string3.h:
+
+/usr/include/sys/stat.h:
+
+/usr/include/bits/stat.h:
+
+/usr/include/c++/4.4/iostream:
+
+/usr/include/c++/4.4/istream:
+
+/usr/include/c++/4.4/bits/istream.tcc:
+
+/usr/include/c++/4.4/sstream:
+
+/usr/include/c++/4.4/bits/sstream.tcc:
+
+/usr/include/strings.h:
+
+/usr/include/regex.h:
+
+/usr/include/gnu/option-groups.h:
+
+/usr/include/c++/4.4/typeinfo:
+
+/usr/include/c++/4.4/tr1/tuple:
+
+/usr/include/c++/4.4/utility:
+
+/usr/include/c++/4.4/bits/stl_relops.h:
+
+/usr/include/sys/wait.h:
+
+/usr/include/bits/signum.h:
+
+/usr/include/bits/siginfo.h:
+
+/usr/include/bits/sigaction.h:
+
+/usr/include/bits/sigcontext.h:
+
+/usr/include/bits/sigstack.h:
+
+/usr/include/sys/ucontext.h:
+
+/usr/include/bits/sigthread.h:
+
+/usr/include/sys/resource.h:
+
+/usr/include/bits/resource.h:
+
+/usr/include/c++/4.4/stdexcept:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/float.h:
+
+/usr/include/c++/4.4/iomanip:
+
+/usr/include/c++/4.4/set:
+
+/usr/include/c++/4.4/bits/stl_tree.h:
+
+/usr/include/c++/4.4/bits/stl_set.h:
+
+/usr/include/c++/4.4/bits/stl_multiset.h:
+
+include/gtest/gtest-message.h:
+
+include/gtest/internal/gtest-string.h:
+
+include/gtest/internal/gtest-filepath.h:
+
+include/gtest/internal/gtest-type-util.h:
+
+/usr/include/c++/4.4/cxxabi.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/cxxabi_tweaks.h:
+
+include/gtest/gtest-death-test.h:
+
+include/gtest/internal/gtest-death-test-internal.h:
+
+include/gtest/gtest-param-test.h:
+
+include/gtest/internal/gtest-param-util.h:
+
+/usr/include/c++/4.4/iterator:
+
+/usr/include/c++/4.4/bits/stream_iterator.h:
+
+include/gtest/internal/gtest-linked_ptr.h:
+
+/usr/include/assert.h:
+
+include/gtest/gtest-printers.h:
+
+include/gtest/internal/gtest-param-util-generated.h:
+
+include/gtest/gtest_prod.h:
+
+include/gtest/gtest-test-part.h:
+
+include/gtest/gtest-typed-test.h:
+
+include/gtest/gtest_pred_impl.h:
+
+src/gtest.cc:
+
+include/gtest/gtest-spi.h:
+
+/usr/include/math.h:
+
+/usr/include/bits/huge_val.h:
+
+/usr/include/bits/huge_valf.h:
+
+/usr/include/bits/huge_vall.h:
+
+/usr/include/bits/inf.h:
+
+/usr/include/bits/nan.h:
+
+/usr/include/bits/mathdef.h:
+
+/usr/include/bits/mathcalls.h:
+
+/usr/include/bits/mathinline.h:
+
+/usr/include/c++/4.4/algorithm:
+
+/usr/include/c++/4.4/bits/stl_algo.h:
+
+/usr/include/c++/4.4/cstdlib:
+
+/usr/include/c++/4.4/bits/algorithmfwd.h:
+
+/usr/include/c++/4.4/bits/stl_heap.h:
+
+/usr/include/c++/4.4/bits/stl_tempbuf.h:
+
+/usr/include/fcntl.h:
+
+/usr/include/bits/fcntl.h:
+
+/usr/include/bits/uio.h:
+
+/usr/include/bits/fcntl2.h:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include-fixed/limits.h:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include-fixed/syslimits.h:
+
+/usr/include/limits.h:
+
+/usr/include/bits/posix1_lim.h:
+
+/usr/include/bits/local_lim.h:
+
+/usr/include/linux/limits.h:
+
+/usr/include/bits/posix2_lim.h:
+
+/usr/include/bits/xopen_lim.h:
+
+/usr/include/sys/mman.h:
+
+/usr/include/bits/mman.h:
+
+/usr/include/sys/time.h:
+
+/usr/include/arpa/inet.h:
+
+/usr/include/netinet/in.h:
+
+/usr/include/stdint.h:
+
+/usr/include/sys/socket.h:
+
+/usr/include/sys/uio.h:
+
+/usr/include/bits/socket.h:
+
+/usr/include/bits/sockaddr.h:
+
+/usr/include/asm/socket.h:
+
+/usr/include/asm-generic/socket.h:
+
+/usr/include/asm/sockios.h:
+
+/usr/include/asm-generic/sockios.h:
+
+/usr/include/bits/socket2.h:
+
+/usr/include/bits/in.h:
+
+/usr/include/netdb.h:
+
+/usr/include/rpc/netdb.h:
+
+/usr/include/bits/netdb.h:
+
+src/gtest-internal-inl.h:
+
+/usr/include/errno.h:
+
+/usr/include/bits/errno.h:
+
+/usr/include/linux/errno.h:
+
+/usr/include/asm/errno.h:
+
+/usr/include/asm-generic/errno.h:
+
+/usr/include/asm-generic/errno-base.h:
+
+src/gtest-death-test.cc:
+
+src/gtest-filepath.cc:
+
+/usr/include/c++/4.4/climits:
+
+src/gtest-port.cc:
+
+src/gtest-printers.cc:
+
+src/gtest-test-part.cc:
+
+src/gtest-typed-test.cc:
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/src/.deps/gtest_main.Plo b/utils/bax2bam/third-party/gtest-1.7.0/src/.deps/gtest_main.Plo
new file mode 100644
index 0000000..6efd5bf
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/src/.deps/gtest_main.Plo
@@ -0,0 +1,460 @@
+src/gtest_main.lo: src/gtest_main.cc /usr/include/stdio.h \
+ /usr/include/features.h /usr/include/bits/predefs.h \
+ /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
+ /usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stddef.h \
+ /usr/include/bits/types.h /usr/include/bits/typesizes.h \
+ /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stdarg.h \
+ /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \
+ /usr/include/bits/stdio.h /usr/include/bits/stdio2.h \
+ include/gtest/gtest.h /usr/include/c++/4.4/limits \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/c++config.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/os_defines.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/cpu_defines.h \
+ /usr/include/c++/4.4/ostream /usr/include/c++/4.4/ios \
+ /usr/include/c++/4.4/iosfwd /usr/include/c++/4.4/bits/stringfwd.h \
+ /usr/include/c++/4.4/bits/postypes.h /usr/include/c++/4.4/cwchar \
+ /usr/include/c++/4.4/cstddef /usr/include/bits/wchar.h \
+ /usr/include/xlocale.h /usr/include/bits/wchar2.h \
+ /usr/include/c++/4.4/exception /usr/include/c++/4.4/bits/char_traits.h \
+ /usr/include/c++/4.4/bits/stl_algobase.h \
+ /usr/include/c++/4.4/bits/functexcept.h \
+ /usr/include/c++/4.4/exception_defines.h \
+ /usr/include/c++/4.4/bits/cpp_type_traits.h \
+ /usr/include/c++/4.4/ext/type_traits.h \
+ /usr/include/c++/4.4/ext/numeric_traits.h \
+ /usr/include/c++/4.4/bits/stl_pair.h /usr/include/c++/4.4/bits/move.h \
+ /usr/include/c++/4.4/bits/concept_check.h \
+ /usr/include/c++/4.4/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.4/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.4/bits/stl_iterator.h \
+ /usr/include/c++/4.4/debug/debug.h /usr/include/c++/4.4/bits/localefwd.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/c++locale.h \
+ /usr/include/c++/4.4/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.4/cctype \
+ /usr/include/ctype.h /usr/include/endian.h /usr/include/bits/endian.h \
+ /usr/include/bits/byteswap.h /usr/include/c++/4.4/bits/ios_base.h \
+ /usr/include/c++/4.4/ext/atomicity.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/gthr.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
+ /usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
+ /usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
+ /usr/include/bits/setjmp.h /usr/include/unistd.h \
+ /usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
+ /usr/include/bits/confname.h /usr/include/getopt.h \
+ /usr/include/bits/unistd.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/atomic_word.h \
+ /usr/include/c++/4.4/bits/locale_classes.h /usr/include/c++/4.4/string \
+ /usr/include/c++/4.4/bits/allocator.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/c++allocator.h \
+ /usr/include/c++/4.4/ext/new_allocator.h /usr/include/c++/4.4/new \
+ /usr/include/c++/4.4/bits/ostream_insert.h \
+ /usr/include/c++/4.4/cxxabi-forced.h \
+ /usr/include/c++/4.4/bits/stl_function.h \
+ /usr/include/c++/4.4/backward/binders.h \
+ /usr/include/c++/4.4/bits/basic_string.h \
+ /usr/include/c++/4.4/initializer_list \
+ /usr/include/c++/4.4/bits/basic_string.tcc \
+ /usr/include/c++/4.4/bits/locale_classes.tcc \
+ /usr/include/c++/4.4/streambuf /usr/include/c++/4.4/bits/streambuf.tcc \
+ /usr/include/c++/4.4/bits/basic_ios.h \
+ /usr/include/c++/4.4/bits/locale_facets.h /usr/include/c++/4.4/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/ctype_base.h \
+ /usr/include/c++/4.4/bits/streambuf_iterator.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/ctype_inline.h \
+ /usr/include/c++/4.4/bits/locale_facets.tcc \
+ /usr/include/c++/4.4/bits/basic_ios.tcc \
+ /usr/include/c++/4.4/bits/ostream.tcc /usr/include/c++/4.4/vector \
+ /usr/include/c++/4.4/bits/stl_construct.h \
+ /usr/include/c++/4.4/bits/stl_uninitialized.h \
+ /usr/include/c++/4.4/bits/stl_vector.h \
+ /usr/include/c++/4.4/bits/stl_bvector.h \
+ /usr/include/c++/4.4/bits/vector.tcc \
+ include/gtest/internal/gtest-internal.h \
+ include/gtest/internal/gtest-port.h /usr/include/stdlib.h \
+ /usr/include/bits/waitflags.h /usr/include/bits/waitstatus.h \
+ /usr/include/sys/types.h /usr/include/sys/select.h \
+ /usr/include/bits/select.h /usr/include/sys/sysmacros.h \
+ /usr/include/alloca.h /usr/include/bits/stdlib.h /usr/include/string.h \
+ /usr/include/bits/string3.h /usr/include/sys/stat.h \
+ /usr/include/bits/stat.h /usr/include/c++/4.4/iostream \
+ /usr/include/c++/4.4/istream /usr/include/c++/4.4/bits/istream.tcc \
+ /usr/include/c++/4.4/sstream /usr/include/c++/4.4/bits/sstream.tcc \
+ /usr/include/strings.h /usr/include/regex.h \
+ /usr/include/gnu/option-groups.h /usr/include/c++/4.4/typeinfo \
+ /usr/include/c++/4.4/tr1/tuple /usr/include/c++/4.4/utility \
+ /usr/include/c++/4.4/bits/stl_relops.h /usr/include/sys/wait.h \
+ /usr/include/bits/signum.h /usr/include/bits/siginfo.h \
+ /usr/include/bits/sigaction.h /usr/include/bits/sigcontext.h \
+ /usr/include/bits/sigstack.h /usr/include/sys/ucontext.h \
+ /usr/include/bits/sigthread.h /usr/include/sys/resource.h \
+ /usr/include/bits/resource.h /usr/include/c++/4.4/stdexcept \
+ /usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/float.h \
+ /usr/include/c++/4.4/iomanip /usr/include/c++/4.4/set \
+ /usr/include/c++/4.4/bits/stl_tree.h /usr/include/c++/4.4/bits/stl_set.h \
+ /usr/include/c++/4.4/bits/stl_multiset.h include/gtest/gtest-message.h \
+ include/gtest/internal/gtest-string.h \
+ include/gtest/internal/gtest-filepath.h \
+ include/gtest/internal/gtest-type-util.h /usr/include/c++/4.4/cxxabi.h \
+ /usr/include/c++/4.4/x86_64-linux-gnu/bits/cxxabi_tweaks.h \
+ include/gtest/gtest-death-test.h \
+ include/gtest/internal/gtest-death-test-internal.h \
+ include/gtest/gtest-param-test.h \
+ include/gtest/internal/gtest-param-util.h /usr/include/c++/4.4/iterator \
+ /usr/include/c++/4.4/bits/stream_iterator.h \
+ include/gtest/internal/gtest-linked_ptr.h /usr/include/assert.h \
+ include/gtest/gtest-printers.h \
+ include/gtest/internal/gtest-param-util-generated.h \
+ include/gtest/gtest_prod.h include/gtest/gtest-test-part.h \
+ include/gtest/gtest-typed-test.h include/gtest/gtest_pred_impl.h
+
+/usr/include/stdio.h:
+
+/usr/include/features.h:
+
+/usr/include/bits/predefs.h:
+
+/usr/include/sys/cdefs.h:
+
+/usr/include/bits/wordsize.h:
+
+/usr/include/gnu/stubs.h:
+
+/usr/include/gnu/stubs-64.h:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stddef.h:
+
+/usr/include/bits/types.h:
+
+/usr/include/bits/typesizes.h:
+
+/usr/include/libio.h:
+
+/usr/include/_G_config.h:
+
+/usr/include/wchar.h:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stdarg.h:
+
+/usr/include/bits/stdio_lim.h:
+
+/usr/include/bits/sys_errlist.h:
+
+/usr/include/bits/stdio.h:
+
+/usr/include/bits/stdio2.h:
+
+include/gtest/gtest.h:
+
+/usr/include/c++/4.4/limits:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/c++config.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/os_defines.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/cpu_defines.h:
+
+/usr/include/c++/4.4/ostream:
+
+/usr/include/c++/4.4/ios:
+
+/usr/include/c++/4.4/iosfwd:
+
+/usr/include/c++/4.4/bits/stringfwd.h:
+
+/usr/include/c++/4.4/bits/postypes.h:
+
+/usr/include/c++/4.4/cwchar:
+
+/usr/include/c++/4.4/cstddef:
+
+/usr/include/bits/wchar.h:
+
+/usr/include/xlocale.h:
+
+/usr/include/bits/wchar2.h:
+
+/usr/include/c++/4.4/exception:
+
+/usr/include/c++/4.4/bits/char_traits.h:
+
+/usr/include/c++/4.4/bits/stl_algobase.h:
+
+/usr/include/c++/4.4/bits/functexcept.h:
+
+/usr/include/c++/4.4/exception_defines.h:
+
+/usr/include/c++/4.4/bits/cpp_type_traits.h:
+
+/usr/include/c++/4.4/ext/type_traits.h:
+
+/usr/include/c++/4.4/ext/numeric_traits.h:
+
+/usr/include/c++/4.4/bits/stl_pair.h:
+
+/usr/include/c++/4.4/bits/move.h:
+
+/usr/include/c++/4.4/bits/concept_check.h:
+
+/usr/include/c++/4.4/bits/stl_iterator_base_types.h:
+
+/usr/include/c++/4.4/bits/stl_iterator_base_funcs.h:
+
+/usr/include/c++/4.4/bits/stl_iterator.h:
+
+/usr/include/c++/4.4/debug/debug.h:
+
+/usr/include/c++/4.4/bits/localefwd.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/c++locale.h:
+
+/usr/include/c++/4.4/clocale:
+
+/usr/include/locale.h:
+
+/usr/include/bits/locale.h:
+
+/usr/include/c++/4.4/cctype:
+
+/usr/include/ctype.h:
+
+/usr/include/endian.h:
+
+/usr/include/bits/endian.h:
+
+/usr/include/bits/byteswap.h:
+
+/usr/include/c++/4.4/bits/ios_base.h:
+
+/usr/include/c++/4.4/ext/atomicity.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/gthr.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/gthr-default.h:
+
+/usr/include/pthread.h:
+
+/usr/include/sched.h:
+
+/usr/include/time.h:
+
+/usr/include/bits/sched.h:
+
+/usr/include/bits/time.h:
+
+/usr/include/signal.h:
+
+/usr/include/bits/sigset.h:
+
+/usr/include/bits/pthreadtypes.h:
+
+/usr/include/bits/setjmp.h:
+
+/usr/include/unistd.h:
+
+/usr/include/bits/posix_opt.h:
+
+/usr/include/bits/environments.h:
+
+/usr/include/bits/confname.h:
+
+/usr/include/getopt.h:
+
+/usr/include/bits/unistd.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/atomic_word.h:
+
+/usr/include/c++/4.4/bits/locale_classes.h:
+
+/usr/include/c++/4.4/string:
+
+/usr/include/c++/4.4/bits/allocator.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/c++allocator.h:
+
+/usr/include/c++/4.4/ext/new_allocator.h:
+
+/usr/include/c++/4.4/new:
+
+/usr/include/c++/4.4/bits/ostream_insert.h:
+
+/usr/include/c++/4.4/cxxabi-forced.h:
+
+/usr/include/c++/4.4/bits/stl_function.h:
+
+/usr/include/c++/4.4/backward/binders.h:
+
+/usr/include/c++/4.4/bits/basic_string.h:
+
+/usr/include/c++/4.4/initializer_list:
+
+/usr/include/c++/4.4/bits/basic_string.tcc:
+
+/usr/include/c++/4.4/bits/locale_classes.tcc:
+
+/usr/include/c++/4.4/streambuf:
+
+/usr/include/c++/4.4/bits/streambuf.tcc:
+
+/usr/include/c++/4.4/bits/basic_ios.h:
+
+/usr/include/c++/4.4/bits/locale_facets.h:
+
+/usr/include/c++/4.4/cwctype:
+
+/usr/include/wctype.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/ctype_base.h:
+
+/usr/include/c++/4.4/bits/streambuf_iterator.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/ctype_inline.h:
+
+/usr/include/c++/4.4/bits/locale_facets.tcc:
+
+/usr/include/c++/4.4/bits/basic_ios.tcc:
+
+/usr/include/c++/4.4/bits/ostream.tcc:
+
+/usr/include/c++/4.4/vector:
+
+/usr/include/c++/4.4/bits/stl_construct.h:
+
+/usr/include/c++/4.4/bits/stl_uninitialized.h:
+
+/usr/include/c++/4.4/bits/stl_vector.h:
+
+/usr/include/c++/4.4/bits/stl_bvector.h:
+
+/usr/include/c++/4.4/bits/vector.tcc:
+
+include/gtest/internal/gtest-internal.h:
+
+include/gtest/internal/gtest-port.h:
+
+/usr/include/stdlib.h:
+
+/usr/include/bits/waitflags.h:
+
+/usr/include/bits/waitstatus.h:
+
+/usr/include/sys/types.h:
+
+/usr/include/sys/select.h:
+
+/usr/include/bits/select.h:
+
+/usr/include/sys/sysmacros.h:
+
+/usr/include/alloca.h:
+
+/usr/include/bits/stdlib.h:
+
+/usr/include/string.h:
+
+/usr/include/bits/string3.h:
+
+/usr/include/sys/stat.h:
+
+/usr/include/bits/stat.h:
+
+/usr/include/c++/4.4/iostream:
+
+/usr/include/c++/4.4/istream:
+
+/usr/include/c++/4.4/bits/istream.tcc:
+
+/usr/include/c++/4.4/sstream:
+
+/usr/include/c++/4.4/bits/sstream.tcc:
+
+/usr/include/strings.h:
+
+/usr/include/regex.h:
+
+/usr/include/gnu/option-groups.h:
+
+/usr/include/c++/4.4/typeinfo:
+
+/usr/include/c++/4.4/tr1/tuple:
+
+/usr/include/c++/4.4/utility:
+
+/usr/include/c++/4.4/bits/stl_relops.h:
+
+/usr/include/sys/wait.h:
+
+/usr/include/bits/signum.h:
+
+/usr/include/bits/siginfo.h:
+
+/usr/include/bits/sigaction.h:
+
+/usr/include/bits/sigcontext.h:
+
+/usr/include/bits/sigstack.h:
+
+/usr/include/sys/ucontext.h:
+
+/usr/include/bits/sigthread.h:
+
+/usr/include/sys/resource.h:
+
+/usr/include/bits/resource.h:
+
+/usr/include/c++/4.4/stdexcept:
+
+/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/float.h:
+
+/usr/include/c++/4.4/iomanip:
+
+/usr/include/c++/4.4/set:
+
+/usr/include/c++/4.4/bits/stl_tree.h:
+
+/usr/include/c++/4.4/bits/stl_set.h:
+
+/usr/include/c++/4.4/bits/stl_multiset.h:
+
+include/gtest/gtest-message.h:
+
+include/gtest/internal/gtest-string.h:
+
+include/gtest/internal/gtest-filepath.h:
+
+include/gtest/internal/gtest-type-util.h:
+
+/usr/include/c++/4.4/cxxabi.h:
+
+/usr/include/c++/4.4/x86_64-linux-gnu/bits/cxxabi_tweaks.h:
+
+include/gtest/gtest-death-test.h:
+
+include/gtest/internal/gtest-death-test-internal.h:
+
+include/gtest/gtest-param-test.h:
+
+include/gtest/internal/gtest-param-util.h:
+
+/usr/include/c++/4.4/iterator:
+
+/usr/include/c++/4.4/bits/stream_iterator.h:
+
+include/gtest/internal/gtest-linked_ptr.h:
+
+/usr/include/assert.h:
+
+include/gtest/gtest-printers.h:
+
+include/gtest/internal/gtest-param-util-generated.h:
+
+include/gtest/gtest_prod.h:
+
+include/gtest/gtest-test-part.h:
+
+include/gtest/gtest-typed-test.h:
+
+include/gtest/gtest_pred_impl.h:
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/src/.dirstamp b/utils/bax2bam/third-party/gtest-1.7.0/src/.dirstamp
new file mode 100644
index 0000000..e69de29
diff --git a/utils/bax2bam/third-party/gtest-1.7.0/test/.deps/gtest_all_test.Po b/utils/bax2bam/third-party/gtest-1.7.0/test/.deps/gtest_all_test.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/utils/bax2bam/third-party/gtest-1.7.0/test/.deps/gtest_all_test.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/utils/ctest/.gitignore b/utils/ctest/.gitignore
new file mode 100644
index 0000000..6a3417b
--- /dev/null
+++ b/utils/ctest/.gitignore
@@ -0,0 +1 @@
+/out/
diff --git a/utils/makefile b/utils/makefile
index 88a7310..f2fabd7 100644
--- a/utils/makefile
+++ b/utils/makefile
@@ -4,20 +4,32 @@ SRCDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
 -include ${CURDIR}/../defines.mk
 include ${SRCDIR}/../rules.mk
 
+CXXFLAGS ?= -O3 -g
 CXXOPTS := -std=c++0x -pedantic \
-           -Wall -Wuninitialized -Wno-div-by-zero \
-           -MMD -MP -w -fpermissive
-CXXFLAGS += ${CXXOPTS} ${GCXXFLAGS}
+           -Wall -Wextra -Wno-overloaded-virtual \
+           -MMD -MP
+override CXXFLAGS += ${CXXOPTS} ${GCXXFLAGS}
 
 EXE = loadPulses pls2fasta samtoh5 samtom4 samFilter toAfg sawriter sdpMatcher
 
-LD_LIBRARY_PATH=${HDF5_LIB}:${LIBBLASR_LIB}:${LIBPBIHDF_LIB}:${LIBPBDATA_LIB}
+LD_LIBRARY_PATH:=${HDF5_LIB}:${LIBBLASR_LIB}:${LIBPBIHDF_LIB}:${LIBPBDATA_LIB}:${LD_LIBRARY_PATH}
 export LD_LIBRARY_PATH
 
 vpath %.cpp ${SRCDIR}
 
 all: ${EXE}
 
+bam2bax: ${CURDIR}/bam2bax/bin/bam2bax
+
+${CURDIR}/bam2bax/bin/bam2bax:
+	@cd ${CURDIR}/bam2bax && make all
+
+bax2bam: ${CURDIR}/bax2bam/bin/bax2bam
+
+${CURDIR}/bax2bam/bin/bax2bam:
+	@cd ${CURDIR}/bax2bam && make all
+
+
 ${EXE}:
 	${CXX} -o $@ $< ${CXXFLAGS} ${CPPFLAGS} -MF"${@:%=%.d}" ${STATIC} ${LDFLAGS} ${LDLIBS}
 
@@ -45,3 +57,5 @@ cramfast: ${EXE}
 clean: 
 	@rm -f ${EXE}
 	@rm -f *.d *.o
+	@cd ${CURDIR}/bam2bax && make clean && cd ${CURDIR}
+	@cd ${CURDIR}/bax2bam && make clean && cd ${CURDIR}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/blasr.git



More information about the debian-med-commit mailing list