[med-svn] [libsmithwaterman] 01/02: Imported Upstream version 0.0+20151117

Andreas Tille tille at debian.org
Wed Jun 22 11:47:11 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository libsmithwaterman.

commit 7c48bcde4e68549156c6e3151319e3856d144546
Author: Andreas Tille <tille at debian.org>
Date:   Wed Jun 22 13:44:54 2016 +0200

    Imported Upstream version 0.0+20151117
---
 .gitignore              |   3 +
 BandedSmithWaterman.cpp | 670 +++++++++++++++++++++++++++++++++++++
 BandedSmithWaterman.h   | 111 +++++++
 IndelAllele.cpp         |  62 ++++
 IndelAllele.h           |  37 +++
 LeftAlign.cpp           | 853 ++++++++++++++++++++++++++++++++++++++++++++++++
 LeftAlign.h             |  32 ++
 Makefile                |  66 ++++
 Mosaik.h                |  73 +++++
 Repeats.cpp             |  69 ++++
 Repeats.h               |   8 +
 SWMain.cpp              | 126 +++++++
 SmithWatermanGotoh.cpp  | 741 +++++++++++++++++++++++++++++++++++++++++
 SmithWatermanGotoh.h    | 103 ++++++
 convert.h               |  22 ++
 disorder.cpp            | 191 +++++++++++
 disorder.h              |  62 ++++
 examples.txt            |  76 +++++
 libdisorder.LICENSE     | 339 +++++++++++++++++++
 smithwaterman.cpp       | 306 +++++++++++++++++
 20 files changed, 3950 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e36f800
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.*.swp
+*.o
+smithwaterman
diff --git a/BandedSmithWaterman.cpp b/BandedSmithWaterman.cpp
new file mode 100644
index 0000000..a961f7c
--- /dev/null
+++ b/BandedSmithWaterman.cpp
@@ -0,0 +1,670 @@
+#include "BandedSmithWaterman.h"
+
+// define our static constants
+const float CBandedSmithWaterman::FLOAT_NEGATIVE_INFINITY = (float)-1e+30;
+
+const DirectionType CBandedSmithWaterman::Directions_STOP     = 0;
+const DirectionType CBandedSmithWaterman::Directions_LEFT     = 1;
+const DirectionType CBandedSmithWaterman::Directions_DIAGONAL = 2;
+const DirectionType CBandedSmithWaterman::Directions_UP       = 3;
+
+const PositionType CBandedSmithWaterman::Position_REF_AND_QUERY_ZERO    = 0;
+const PositionType CBandedSmithWaterman::Position_REF_ZERO              = 1;
+const PositionType CBandedSmithWaterman::Position_QUERY_ZERO            = 2;
+const PositionType CBandedSmithWaterman::Position_REF_AND_QUERO_NONZERO = 3;
+
+// constructor
+CBandedSmithWaterman::CBandedSmithWaterman(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty, unsigned int bandWidth) 
+: mCurrentMatrixSize(0)
+, mCurrentAnchorSize(0)
+, mCurrentAQSumSize(0)
+, mBandwidth(bandWidth)
+, mPointers(NULL)
+, mMatchScore(matchScore)
+, mMismatchScore(mismatchScore)
+, mGapOpenPenalty(gapOpenPenalty)
+, mGapExtendPenalty(gapExtendPenalty)
+, mAnchorGapScores(NULL)
+, mBestScores(NULL)
+, mReversedAnchor(NULL)
+, mReversedQuery(NULL)
+, mUseHomoPolymerGapOpenPenalty(false)
+{
+	CreateScoringMatrix();
+
+	//if((bandWidth % 2) != 1) {
+		//printf("ERROR: The bandwidth must be an odd number.\n");
+		//exit(1);
+	//}
+
+	try {
+		mBestScores	 = new float[bandWidth + 2];
+		mAnchorGapScores = new float[bandWidth + 2];
+	} catch(bad_alloc) {
+		printf("ERROR: Unable to allocate enough memory for the banded Smith-Waterman algorithm.\n");
+		exit(1);
+	}
+}
+
+// destructor
+CBandedSmithWaterman::~CBandedSmithWaterman(void) {
+	if(mPointers)              delete [] mPointers;
+	if(mAnchorGapScores)       delete [] mAnchorGapScores;
+	if(mBestScores)            delete [] mBestScores;
+	if(mReversedAnchor)        delete [] mReversedAnchor;
+	if(mReversedQuery)         delete [] mReversedQuery;
+}
+
+// aligns the query sequence to the anchor using the Smith Waterman Gotoh algorithm
+void CBandedSmithWaterman::Align(unsigned int& referenceAl, string& cigarAl, const string& s1, const string& s2, pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> >& hr) {
+
+
+	
+	unsigned int rowStart = min(hr.first.first, (unsigned int)hr.second.first);
+	hr.first.first    -= rowStart;
+	hr.second.first   -= rowStart;
+	
+	//bool isLegalBandWidth = (s2.length() - hr.QueryBegin) > (mBandwidth / 2);
+	//     isLegalBandWidth = isLegalBandWidth && ((s1.length() - hr.Begin) > (mBandwidth / 2));
+
+
+
+	// check the lengths of the input sequences
+	//if( (s1.length() <= 0) || (s2.length() <= 0) || (s1.length() < s2.length()) ) {
+	//	printf("ERROR: An unexpected sequence length was encountered during pairwise alignment.\n");
+	//	printf("Sequence lengths are listed as following:\n");
+	//	printf("1. Reference length: %u\n2. Query length: %u\n", s1.length(), s2.length());
+		//printf("3. Hash region in reference:%4u-%4u\n", hr.Begin + rowStart, hr.End);
+		//printf("4. Hash region in query:    %4u-%4u\n", hr.QueryBegin + rowStart, hr.QueryEnd);
+	//	exit(1);
+	//}
+
+	
+	// determine the hash region type
+	unsigned int rowOffset;
+	unsigned int columnOffset;
+	PositionType positionType;
+
+	if(hr.first.first == 0) {
+		if(hr.second.first == 0) {
+			rowOffset    = 1;
+			columnOffset = (mBandwidth / 2) + 1;
+			positionType = Position_REF_AND_QUERY_ZERO;
+		} else {
+			rowOffset    = 1 - hr.second.first;
+			columnOffset = (mBandwidth / 2) + 1 + hr.second.first;
+			positionType = Position_REF_ZERO;
+		}
+	} else {
+		if(hr.second.first == 0) {
+			rowOffset    = 1;
+			columnOffset = (mBandwidth / 2) + 1 - hr.first.first;
+			positionType = Position_QUERY_ZERO;
+		} else {
+			rowOffset    = 1 - hr.second.first;
+			columnOffset = (mBandwidth / 2) + 1 + hr.second.first - hr.first.first;
+			positionType = Position_REF_AND_QUERO_NONZERO;
+		}
+	}
+
+	// =========================
+	// Reinitialize the matrices
+	// =========================
+	
+	ReinitializeMatrices(positionType, s1.length(), s2.length(), hr);
+
+	// =======================================
+	// Banded Smith-Waterman forward algorithm
+	// =======================================
+
+	unsigned int bestColumn	= 0;
+	unsigned int bestRow	= 0;
+	float bestScore         = FLOAT_NEGATIVE_INFINITY;
+	float currentQueryGapScore;
+
+	// rowNum and column indicate the row and column numbers in the Smith-Waterman matrix respectively
+	unsigned int rowNum    = hr.second.first;
+	unsigned int columnNum = hr.first.first;
+
+	// indicates how many rows including blank elements in the Banded SmithWaterman
+	int numBlankElements = (mBandwidth / 2) - columnNum;
+
+	//cout << numBlankElements << endl;
+	// upper triangle matrix in Banded Smith-Waterman
+	for( ; numBlankElements > 0; numBlankElements--, rowNum++){
+		// in the upper triangle matrix, we always start at the 0th column
+		columnNum = 0;
+
+		// columnEnd indicates how many columns which should be dealt with in the current row
+		unsigned int columnEnd = min((mBandwidth - numBlankElements), ((unsigned int) s1.length() - columnNum + 1) );
+		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;
+		for( unsigned int j = 0; j < columnEnd; j++){
+			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
+			//cout << s1[columnNum] << s2[rowNum] << score << endl;
+			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
+			columnNum++;
+		}
+
+		// replace the columnNum to the middle column in the Smith-Waterman matrix
+		columnNum = columnNum - (mBandwidth / 2);
+	}
+	// complete matrix in Banded Smith-Waterman
+	unsigned int completeNum = min((s1.length() - columnNum - (mBandwidth / 2)), (s2.length() - rowNum));
+	//cout << completeNum << endl;
+	for(unsigned int i = 0; i < completeNum; i++, rowNum++){
+		columnNum = columnNum - (mBandwidth / 2);
+
+		// there are mBandwidth columns which should be dealt with in each row
+		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;
+
+		for(unsigned int j = 0; j < mBandwidth; j++){
+			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
+			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
+			//cout << s1[columnNum] << s2[rowNum] << score << endl;
+			columnNum++;
+		}
+
+		// replace the columnNum to the middle column in the Smith-Waterman matrix
+		// because mBandwidth is an odd number, everytime the following equation shifts a column (pluses 1).
+		columnNum = columnNum - (mBandwidth / 2);
+	}
+	
+	// lower triangle matrix
+	numBlankElements = min(mBandwidth, ((unsigned int) s2.length() - rowNum));
+	columnNum = columnNum - (mBandwidth / 2);
+	for(unsigned int i = 0; numBlankElements > 0; i++, rowNum++, numBlankElements--) {
+
+		mBestScores[ mBandwidth - i ] = FLOAT_NEGATIVE_INFINITY;;
+		// columnEnd indicates how many columns which should be dealt with
+		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;
+
+		for( unsigned int j = columnNum; j < s1.length(); j++){
+			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
+			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
+			//cout << s1[columnNum] << s2[rowNum] << score << endl;
+			columnNum++;
+		}
+
+		// replace the columnNum to the middle column in the Smith-Waterman matrix
+		columnNum = columnNum - mBandwidth + i + 2;
+	}
+
+	// =========================================
+	// Banded Smith-Waterman backtrace algorithm
+	// =========================================
+
+	Traceback(referenceAl, cigarAl, s1, s2, bestRow, bestColumn, rowOffset, columnOffset);
+
+}
+
+// calculates the score during the forward algorithm
+float CBandedSmithWaterman::CalculateScore(const string& s1, const string& s2, const unsigned int rowNum, const unsigned int columnNum, float& currentQueryGapScore, const unsigned int rowOffset, const unsigned int columnOffset) {
+
+	// initialize
+	const unsigned int row      = rowNum + rowOffset;
+	const unsigned int column   = columnOffset - rowNum + columnNum;
+	const unsigned int position = row * (mBandwidth + 2) + column;
+
+	// retrieve the similarity scores
+	const float similarityScore      = mScoringMatrix[s1[columnNum] - 'A'][s2[rowNum] - 'A'];
+	const float totalSimilarityScore = mBestScores[column] + similarityScore;
+
+	// ================================
+	// open a gap in the query sequence
+	// ================================
+
+	float queryGapExtendScore = currentQueryGapScore - mGapExtendPenalty;
+	float queryGapOpenScore   = mBestScores[column - 1] - mGapOpenPenalty;
+
+	// compute the homo-polymer gap score if enabled
+	if(mUseHomoPolymerGapOpenPenalty)
+		if((rowNum > 1) && (s2[rowNum] == s2[rowNum - 1]))
+			queryGapOpenScore = mBestScores[column - 1] - mHomoPolymerGapOpenPenalty;
+
+	if(queryGapExtendScore > queryGapOpenScore) {
+		currentQueryGapScore = queryGapExtendScore;
+		mPointers[position].mSizeOfHorizontalGaps = mPointers[position - 1].mSizeOfHorizontalGaps + 1;
+	} else currentQueryGapScore = queryGapOpenScore;
+
+
+	// ====================================
+	// open a gap in the reference sequence
+	// ====================================
+
+	
+	float anchorGapExtendScore = mAnchorGapScores[column + 1] - mGapExtendPenalty;
+	float anchorGapOpenScore   = mBestScores[column + 1] - mGapOpenPenalty;
+
+	// compute the homo-polymer gap score if enabled	
+	if(mUseHomoPolymerGapOpenPenalty)
+		if((columnNum > 1) && (s1[columnNum] == s1[columnNum - 1]))
+			anchorGapOpenScore = mBestScores[column + 1] - mHomoPolymerGapOpenPenalty;
+
+	if(anchorGapExtendScore > anchorGapOpenScore) {
+		mAnchorGapScores[column] = anchorGapExtendScore;
+		mPointers[position].mSizeOfVerticalGaps = mPointers[position - mBandwidth - 1].mSizeOfVerticalGaps + 1;
+	} else mAnchorGapScores[column] = anchorGapOpenScore;
+
+	// ======================================
+	// calculate the best score and direction
+	// ======================================
+
+	//mBestScores[column] = MaxFloats(totalSimilarityScore, mAnchorGapScores[column], currentQueryGapScore);
+	mBestScores[column] = MaxFloats(totalSimilarityScore, currentQueryGapScore, mAnchorGapScores[column]);
+
+	// determine the traceback direction
+	// diagonal (445364713) > stop (238960195) > up (214378647) > left (166504495)
+	if(mBestScores[column] == 0)                            mPointers[position].Direction = Directions_STOP;
+	else if(mBestScores[column] == totalSimilarityScore)    mPointers[position].Direction = Directions_UP;
+	else if(mBestScores[column] == currentQueryGapScore)   mPointers[position].Direction = Directions_LEFT;
+	else                                                    mPointers[position].Direction = Directions_DIAGONAL;
+
+	return mBestScores[column];
+}
+
+// corrects the homopolymer gap order for forward alignments
+void CBandedSmithWaterman::CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches) {
+
+
+	// this is only required for alignments with mismatches
+	//if(al.NumMismatches == 0) return;
+	if ( numMismatches == 0 ) return;
+
+	// localize the alignment data
+	//char* pReference = al.Reference.Data();
+	//char* pQuery     = al.Query.Data();
+	//const unsigned int numBases = al.Reference.Length();
+	char* pReference = mReversedAnchor;
+	char* pQuery     = mReversedQuery;
+
+	// initialize
+	bool hasReferenceGap = false, hasQueryGap = false;
+	char* pNonGapSeq = NULL;
+	char* pGapSeq    = NULL;
+	char nonGapBase  = 'J';
+
+	// identify gapped regions
+	for(unsigned int i = 0; i < numBases; i++) {
+
+		// check if the current position is gapped
+		hasReferenceGap = false;
+		hasQueryGap     = false;
+
+		if(pReference[i] == GAP) {
+			hasReferenceGap = true;
+			pNonGapSeq      = pQuery;
+			pGapSeq         = pReference;
+			nonGapBase      = pQuery[i];
+		}
+
+		if(pQuery[i] == GAP) {
+			hasQueryGap = true;
+			pNonGapSeq  = pReference;
+			pGapSeq     = pQuery;
+			nonGapBase  = pReference[i];
+		}
+
+		// continue if we don't have any gaps
+		if(!hasReferenceGap && !hasQueryGap) continue;
+
+		// sanity check
+		if(hasReferenceGap && hasQueryGap) {
+			printf("ERROR: Found a gap in both the reference sequence and query sequence.\n");
+			exit(1);
+		}
+
+		// find the non-gapped length (forward)
+		unsigned short numGappedBases = 0;
+		unsigned short nonGapLength   = 0;
+		unsigned short testPos = i;
+		while(testPos < numBases) {
+
+			const char gs  = pGapSeq[testPos];
+			const char ngs = pNonGapSeq[testPos];
+
+			bool isPartofHomopolymer = false;
+			if(((gs == nonGapBase) || (gs == GAP)) && (ngs == nonGapBase)) isPartofHomopolymer = true;
+			if(!isPartofHomopolymer) break;
+
+			if(gs == GAP) numGappedBases++;
+			else nonGapLength++;
+			testPos++;
+		}
+
+		// fix the gap order
+		if(numGappedBases != 0) {
+			char* pCurrentSequence = pGapSeq + i;
+			memset(pCurrentSequence, nonGapBase, nonGapLength);
+			pCurrentSequence += nonGapLength;
+			memset(pCurrentSequence, GAP, numGappedBases);
+		}
+
+		// increment
+		i += numGappedBases + nonGapLength - 1;
+	}
+}
+
+// creates a simple scoring matrix to align the nucleotides and the ambiguity code N
+void CBandedSmithWaterman::CreateScoringMatrix(void) {
+
+	unsigned int nIndex = 13;
+	unsigned int xIndex = 23;
+
+	// define the N score to be 1/4 of the span between mismatch and match
+	//const short nScore = mMismatchScore + (short)(((mMatchScore - mMismatchScore) / 4.0) + 0.5);
+
+	// calculate the scoring matrix
+	for(unsigned char i = 0; i < MOSAIK_NUM_NUCLEOTIDES; i++) {
+		for(unsigned char j = 0; j < MOSAIK_NUM_NUCLEOTIDES; j++) {
+
+			// N.B. matching N to everything (while conceptually correct) leads to some
+			// bad alignments, lets make N be a mismatch instead.
+
+			// add the matches or mismatches to the hashtable (N is a mismatch)
+			if((i == nIndex) || (j == nIndex)) mScoringMatrix[i][j] = mMismatchScore;
+			else if((i == xIndex) || (j == xIndex)) mScoringMatrix[i][j] = mMismatchScore;
+			else if(i == j) mScoringMatrix[i][j] = mMatchScore;
+			else mScoringMatrix[i][j] = mMismatchScore;
+		}
+	}
+
+	// add ambiguity codes
+	mScoringMatrix['M' - 'A']['A' - 'A'] = mMatchScore;	// M - A
+	mScoringMatrix['A' - 'A']['M' - 'A'] = mMatchScore;
+	// add ambiguity codes
+	mScoringMatrix['M' - 'A']['A' - 'A'] = mMatchScore;	// M - A
+	mScoringMatrix['A' - 'A']['M' - 'A'] = mMatchScore;
+	mScoringMatrix['M' - 'A']['C' - 'A'] = mMatchScore; // M - C
+	mScoringMatrix['C' - 'A']['M' - 'A'] = mMatchScore;
+
+	mScoringMatrix['R' - 'A']['A' - 'A'] = mMatchScore;	// R - A
+	mScoringMatrix['A' - 'A']['R' - 'A'] = mMatchScore;
+	mScoringMatrix['R' - 'A']['G' - 'A'] = mMatchScore; // R - G
+	mScoringMatrix['G' - 'A']['R' - 'A'] = mMatchScore;
+
+	mScoringMatrix['W' - 'A']['A' - 'A'] = mMatchScore;	// W - A
+	mScoringMatrix['A' - 'A']['W' - 'A'] = mMatchScore;
+	mScoringMatrix['W' - 'A']['T' - 'A'] = mMatchScore; // W - T
+	mScoringMatrix['T' - 'A']['W' - 'A'] = mMatchScore;
+
+	mScoringMatrix['S' - 'A']['C' - 'A'] = mMatchScore;	// S - C
+	mScoringMatrix['C' - 'A']['S' - 'A'] = mMatchScore;
+	mScoringMatrix['S' - 'A']['G' - 'A'] = mMatchScore; // S - G
+	mScoringMatrix['G' - 'A']['S' - 'A'] = mMatchScore;
+
+	mScoringMatrix['Y' - 'A']['C' - 'A'] = mMatchScore;	// Y - C
+	mScoringMatrix['C' - 'A']['Y' - 'A'] = mMatchScore;
+	mScoringMatrix['Y' - 'A']['T' - 'A'] = mMatchScore; // Y - T
+	mScoringMatrix['T' - 'A']['Y' - 'A'] = mMatchScore;
+
+	mScoringMatrix['K' - 'A']['G' - 'A'] = mMatchScore;	// K - G
+	mScoringMatrix['G' - 'A']['K' - 'A'] = mMatchScore;
+	mScoringMatrix['K' - 'A']['T' - 'A'] = mMatchScore; // K - T
+	mScoringMatrix['T' - 'A']['K' - 'A'] = mMatchScore;
+
+	mScoringMatrix['V' - 'A']['A' - 'A'] = mMatchScore;	// V - A
+	mScoringMatrix['A' - 'A']['V' - 'A'] = mMatchScore;
+	mScoringMatrix['V' - 'A']['C' - 'A'] = mMatchScore; // V - C
+	mScoringMatrix['C' - 'A']['V' - 'A'] = mMatchScore;
+	mScoringMatrix['V' - 'A']['G' - 'A'] = mMatchScore; // V - G
+	mScoringMatrix['G' - 'A']['V' - 'A'] = mMatchScore;
+
+	mScoringMatrix['H' - 'A']['A' - 'A'] = mMatchScore;	// H - A
+	mScoringMatrix['A' - 'A']['H' - 'A'] = mMatchScore;
+	mScoringMatrix['H' - 'A']['C' - 'A'] = mMatchScore; // H - C
+	mScoringMatrix['C' - 'A']['H' - 'A'] = mMatchScore;
+	mScoringMatrix['H' - 'A']['T' - 'A'] = mMatchScore; // H - T
+	mScoringMatrix['T' - 'A']['H' - 'A'] = mMatchScore;
+
+	mScoringMatrix['D' - 'A']['A' - 'A'] = mMatchScore;	// D - A
+	mScoringMatrix['A' - 'A']['D' - 'A'] = mMatchScore;
+	mScoringMatrix['D' - 'A']['G' - 'A'] = mMatchScore; // D - G
+	mScoringMatrix['G' - 'A']['D' - 'A'] = mMatchScore;
+	mScoringMatrix['D' - 'A']['T' - 'A'] = mMatchScore; // D - T
+	mScoringMatrix['T' - 'A']['D' - 'A'] = mMatchScore;
+
+	mScoringMatrix['B' - 'A']['C' - 'A'] = mMatchScore;	// B - C
+	mScoringMatrix['C' - 'A']['B' - 'A'] = mMatchScore;
+	mScoringMatrix['B' - 'A']['G' - 'A'] = mMatchScore; // B - G
+	mScoringMatrix['G' - 'A']['B' - 'A'] = mMatchScore;
+	mScoringMatrix['B' - 'A']['T' - 'A'] = mMatchScore; // B - T
+	mScoringMatrix['T' - 'A']['B' - 'A'] = mMatchScore;
+}
+
+// enables homo-polymer scoring
+void CBandedSmithWaterman::EnableHomoPolymerGapPenalty(float hpGapOpenPenalty) {
+	mUseHomoPolymerGapOpenPenalty = true;
+	mHomoPolymerGapOpenPenalty    = hpGapOpenPenalty;
+}
+
+// reinitializes the matrices
+void CBandedSmithWaterman::ReinitializeMatrices(const PositionType& positionType, const unsigned int& s1Length, const unsigned int& s2Length, const pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr) {
+
+/*
+	try {
+		mBestScores	 = new float[mBandwidth + 2];
+		mAnchorGapScores = new float[mBandwidth + 2];
+	} catch(bad_alloc) {
+		printf("ERROR: Unable to allocate enough memory for the banded Smith-Waterman algorithm.\n");
+		exit(1);
+	}
+*/
+
+	const unsigned int numColumns = mBandwidth + 2;
+	unsigned int numRows = 0;
+
+	switch(positionType) {
+		case Position_REF_AND_QUERY_ZERO:
+			numRows = s2Length + 1;
+			break;
+		case Position_REF_ZERO:
+			numRows = s2Length - hr.second.first + 2;
+			break;
+		case Position_QUERY_ZERO:
+			numRows = min(s2Length + 1, s1Length - hr.first.first + 2);
+			break;
+		case Position_REF_AND_QUERO_NONZERO:
+			numRows = min(s1Length - hr.first.first + 2, s2Length - hr.second.first + 2);
+			break;
+	}
+
+	// update the size of the backtrace matrix
+	if((numColumns * numRows) > mCurrentMatrixSize) {
+
+		mCurrentMatrixSize = numColumns * numRows;
+		if(mPointers) delete [] mPointers;
+
+		try {
+			mPointers = new ElementInfo[mCurrentMatrixSize];
+		} catch(bad_alloc) {
+			printf("ERROR: Unable to allocate enough memory for the banded Smith-Waterman algorithm.\n");
+			exit(1);
+		}
+	}
+
+	// initialize our backtrace matrix
+	ElementInfo defaultElement;
+	defaultElement.Direction = Directions_STOP;
+	defaultElement.mSizeOfHorizontalGaps = 1;
+	defaultElement.mSizeOfVerticalGaps   = 1;
+
+	uninitialized_fill(mPointers, mPointers + mCurrentMatrixSize, defaultElement);
+
+	// update the sequence character arrays
+	if((s1Length + s2Length) > mCurrentAQSumSize) {
+
+		mCurrentAQSumSize = s1Length + s2Length;
+		if(mReversedAnchor)	delete [] mReversedAnchor;
+		if(mReversedQuery)	delete [] mReversedQuery;
+
+		try {
+			mReversedAnchor	= new char[mCurrentAQSumSize + 1]; // reversed sequence #1
+			mReversedQuery	= new char[mCurrentAQSumSize + 1]; // reversed sequence #2
+		} catch(bad_alloc) {
+			printf("ERROR: Unable to allocate enough memory for the banded Smith-Waterman algorithm.\n");
+			exit(1);
+		}
+	}
+
+	// initialize the gap score and score vectors
+	uninitialized_fill(mAnchorGapScores, mAnchorGapScores + mBandwidth + 2, FLOAT_NEGATIVE_INFINITY);
+	memset((char*)mBestScores, 0, SIZEOF_FLOAT * (mBandwidth + 2));
+	mBestScores[0]              = FLOAT_NEGATIVE_INFINITY;
+	mBestScores[mBandwidth + 1] = FLOAT_NEGATIVE_INFINITY;
+}
+
+// performs the backtrace algorithm
+void CBandedSmithWaterman::Traceback(unsigned int& referenceAl, string& cigarAl, const string& s1, const string& s2, unsigned int bestRow, unsigned int bestColumn, const unsigned int rowOffset, const unsigned int columnOffset){
+
+
+	unsigned int currentRow		 = bestRow;
+	unsigned int currentColumn	 = bestColumn;
+	unsigned int currentPosition = ((currentRow + rowOffset) * (mBandwidth + 2)) + (columnOffset - currentRow + currentColumn);
+
+
+	// record the numbers of row and column before the current row and column
+	unsigned int previousRow	= bestRow;
+	unsigned int previousColumn	= bestColumn;
+
+	unsigned int gappedAnchorLen = 0;
+	unsigned int gappedQueryLen  = 0;
+	unsigned int numMismatches   = 0;
+
+	bool keepProcessing = true;
+	while(keepProcessing) {
+		unsigned int nVerticalGap = 0;
+		unsigned int nHorizontalGap = 0;
+		switch(mPointers[currentPosition].Direction){
+			case Directions_DIAGONAL:
+				nVerticalGap = mPointers[currentPosition].mSizeOfVerticalGaps;
+				for(unsigned int i = 0; i < nVerticalGap; i++){
+					mReversedAnchor[gappedAnchorLen++] = GAP;
+					mReversedQuery[gappedQueryLen++]   = s2[currentRow];
+
+					numMismatches++;
+
+					previousRow = currentRow;
+					previousColumn = currentColumn;
+
+					currentRow--;
+				}
+				break;
+
+			case Directions_STOP:
+				keepProcessing = false;
+				//mReversedAnchor[gappedAnchorLen+1]='\0';
+				//mReversedQuery [gappedQueryLen+1]='\0';
+				break;
+
+			case Directions_UP:
+
+				mReversedAnchor[gappedAnchorLen++] = s1[currentColumn];
+				mReversedQuery[gappedQueryLen++]   = s2[currentRow];
+
+				if(s1[currentColumn] != s2[currentRow]) numMismatches++;
+				previousRow = currentRow;
+				previousColumn = currentColumn;
+
+				currentRow--;
+				currentColumn--;
+				break;
+
+			case Directions_LEFT:
+				nHorizontalGap =  mPointers[currentPosition].mSizeOfHorizontalGaps;
+				for(unsigned int i = 0; i < nHorizontalGap; i++){
+
+					mReversedAnchor[gappedAnchorLen++] = s1[currentColumn];
+					mReversedQuery[gappedQueryLen++]   = GAP;
+
+					numMismatches++;
+
+					previousRow = currentRow;
+					previousColumn = currentColumn;
+
+
+					currentColumn--;
+				}
+				break;
+		}
+		currentPosition = ((currentRow + rowOffset) * (mBandwidth + 2)) + (columnOffset - currentRow + currentColumn);
+	}
+
+	// correct the reference and query sequence order
+	mReversedAnchor[gappedAnchorLen] = 0;
+	mReversedQuery [gappedQueryLen] = 0;
+	reverse(mReversedAnchor, mReversedAnchor + gappedAnchorLen);
+	reverse(mReversedQuery,  mReversedQuery  + gappedQueryLen);
+
+	//alignment.Reference = mReversedAnchor;
+	//alignment.Query     = mReversedQuery;
+
+	// assign the alignment endpoints
+	//alignment.ReferenceBegin = previousColumn;
+	//alignment.ReferenceEnd   = bestColumn;
+	referenceAl  = previousColumn;
+	/*  
+	if(alignment.IsReverseComplement){
+		alignment.QueryBegin = s2.length() - bestRow - 1; 
+		alignment.QueryEnd   = s2.length() - previousRow - 1;
+	} else {
+		alignment.QueryBegin = previousRow; 
+		alignment.QueryEnd   = bestRow;
+	}
+	*/
+	
+	//alignment.QueryLength	= alignment.QueryEnd - alignment.QueryBegin + 1;
+	//alignment.NumMismatches = numMismatches;
+
+	const unsigned int alLength = strlen(mReversedAnchor);
+	unsigned int m = 0, d = 0, i = 0;
+	bool dashRegion = false;
+	ostringstream oCigar;
+
+	if ( previousRow != 0 )
+		oCigar << previousRow << 'S';
+
+	for ( unsigned int j = 0; j < alLength; j++ ) {
+		// m
+		if ( ( mReversedAnchor[j] != GAP ) && ( mReversedQuery[j] != GAP ) ) {
+			if ( dashRegion ) {
+				if ( d != 0 ) oCigar << d << 'D';
+				else          oCigar << i << 'I';
+			}
+			dashRegion = false;
+			m++;
+			d = 0;
+			i = 0;
+		}
+		// I or D
+		else {
+			if ( !dashRegion )
+				oCigar << m << 'M';
+			dashRegion = true;
+			m = 0;
+			if ( mReversedAnchor[j] == GAP ) {
+				if ( d != 0 ) oCigar << d << 'D';
+				i++;
+				d = 0;
+			}
+			else {
+				if ( i != 0 ) oCigar << i << 'I';
+				d++;
+				i = 0;
+			}
+		}
+	}
+	
+	if      ( m != 0 ) oCigar << m << 'M';
+	else if ( d != 0 ) oCigar << d << 'D';
+	else if ( i != 0 ) oCigar << i << 'I';
+
+	if ( ( bestRow + 1 ) != s2.length() )
+		oCigar << s2.length() - bestRow - 1 << 'S';
+
+	cigarAl = oCigar.str();
+	
+
+	// correct the homopolymer gap order
+	CorrectHomopolymerGapOrder(alLength, numMismatches);
+
+}
diff --git a/BandedSmithWaterman.h b/BandedSmithWaterman.h
new file mode 100644
index 0000000..ca78ac2
--- /dev/null
+++ b/BandedSmithWaterman.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include <iostream>
+#include <algorithm>
+#include <memory>
+//#include "Alignment.h"
+#include "Mosaik.h"
+//#include "HashRegion.h"
+#include <string.h>
+#include <stdio.h>
+#include <sstream>
+#include <string>
+
+using namespace std;
+
+#define MOSAIK_NUM_NUCLEOTIDES 26
+#define GAP '-'
+
+typedef unsigned char DirectionType;
+typedef unsigned char PositionType;
+
+struct ElementInfo {
+	unsigned int Direction             : 2;
+	unsigned int mSizeOfVerticalGaps   : 15;
+	unsigned int mSizeOfHorizontalGaps : 15;
+};
+
+class CBandedSmithWaterman {
+public:
+	// constructor
+	CBandedSmithWaterman(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty, unsigned int bandWidth);
+	// destructor
+	~CBandedSmithWaterman(void);
+	// aligns the query sequence to the anchor using the Smith Waterman Gotoh algorithm
+	void Align(unsigned int& referenceAl, string& stringAl, const string& s1, const string& s2, pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> >& hr);
+	// enables homo-polymer scoring
+	void EnableHomoPolymerGapPenalty(float hpGapOpenPenalty);
+private:
+	// calculates the score during the forward algorithm
+	float CalculateScore(const string& s1, const string& s2, const unsigned int rowNum, const unsigned int columnNum, float& currentQueryGapScore, const unsigned int rowOffset, const unsigned int columnOffset);
+	// creates a simple scoring matrix to align the nucleotides and the ambiguity code N
+	void CreateScoringMatrix(void);
+	// corrects the homopolymer gap order for forward alignments
+	void CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches);
+	// returns the maximum floating point number
+	static inline float MaxFloats(const float& a, const float& b, const float& c);
+	// reinitializes the matrices
+	void ReinitializeMatrices(const PositionType& positionType, const unsigned int& s1Length, const unsigned int& s2Length, const pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr);
+	// performs the backtrace algorithm
+	void Traceback(unsigned int& referenceAl, string& stringAl, const string& s1, const string& s2, unsigned int bestRow, unsigned int bestColumn, const unsigned int rowOffset, const unsigned int columnOffset);
+	// updates the best score during the forward algorithm
+	inline void UpdateBestScore(unsigned int& bestRow, unsigned int& bestColumn, float& bestScore, const unsigned int rowNum, const unsigned int columnNum, const float score);
+	// our simple scoring matrix
+	float mScoringMatrix[MOSAIK_NUM_NUCLEOTIDES][MOSAIK_NUM_NUCLEOTIDES];
+	// keep track of maximum initialized sizes
+	unsigned int mCurrentMatrixSize;
+	unsigned int mCurrentAnchorSize;
+	unsigned int mCurrentAQSumSize;
+	unsigned int mBandwidth;
+	// define our backtrace directions
+	const static DirectionType Directions_STOP;
+	const static DirectionType Directions_LEFT;
+	const static DirectionType Directions_DIAGONAL;
+	const static DirectionType Directions_UP;
+	// store the backtrace pointers
+	ElementInfo* mPointers;
+	// define our position types
+	const static PositionType Position_REF_AND_QUERY_ZERO;
+	const static PositionType Position_REF_ZERO;
+	const static PositionType Position_QUERY_ZERO;
+	const static PositionType Position_REF_AND_QUERO_NONZERO;
+	// define scoring constants
+	const float mMatchScore;
+	const float mMismatchScore;
+	const float mGapOpenPenalty;
+	const float mGapExtendPenalty;
+	// score if xi aligns to a gap after yi
+	float* mAnchorGapScores;
+	// best score of alignment x1...xi to y1...yi
+	float* mBestScores;
+	// our reversed alignment
+	char* mReversedAnchor;
+	char* mReversedQuery;
+	// define static constants
+	static const float FLOAT_NEGATIVE_INFINITY;
+	// toggles the use of the homo-polymer gap open penalty
+	bool mUseHomoPolymerGapOpenPenalty;
+	float mHomoPolymerGapOpenPenalty;
+};
+
+// returns the maximum floating point number
+inline float CBandedSmithWaterman::MaxFloats(const float& a, const float& b, const float& c) {
+	float max = 0.0f;
+	if(a > max) max = a;
+	if(b > max) max = b;
+	if(c > max) max = c;
+	return max;
+}
+
+// updates the best score during the forward algorithm
+inline void CBandedSmithWaterman::UpdateBestScore(unsigned int& bestRow, unsigned int& bestColumn, float& bestScore, const unsigned int rowNum, const unsigned int columnNum, const float score) {
+
+	//const unsigned int row    = rowNum + rowOffset;
+	//const unsigned int column = columnOffset - rowNum + columnNum;
+
+	if(score > bestScore) {
+		bestRow    = rowNum;
+		bestColumn = columnNum;
+		bestScore  = score;
+	}
+}
diff --git a/IndelAllele.cpp b/IndelAllele.cpp
new file mode 100644
index 0000000..80b5fac
--- /dev/null
+++ b/IndelAllele.cpp
@@ -0,0 +1,62 @@
+#include "IndelAllele.h"
+
+using namespace std;
+
+
+bool IndelAllele::homopolymer(void) {
+    string::iterator s = sequence.begin();
+    char c = *s++;
+    while (s != sequence.end()) {
+        if (c != *s++) return false;
+    }
+    return true;
+}
+
+int IndelAllele::readLength(void) {
+    if (insertion) {
+	return length;
+    } else {
+	return 0;
+    }
+}
+
+int IndelAllele::referenceLength(void) {
+    if (insertion) {
+	return 0;
+    } else {
+	return length;
+    }
+}
+
+bool homopolymer(string sequence) {
+    string::iterator s = sequence.begin();
+    char c = *s++;
+    while (s != sequence.end()) {
+        if (c != *s++) return false;
+    }
+    return true;
+}
+
+ostream& operator<<(ostream& out, const IndelAllele& indel) {
+    string t = indel.insertion ? "i" : "d";
+    out << t <<  ":" << indel.position << ":" << indel.readPosition << ":" << indel.length << ":" << indel.sequence;
+    return out;
+}
+
+bool operator==(const IndelAllele& a, const IndelAllele& b) {
+    return (a.insertion == b.insertion
+            && a.length == b.length
+            && a.position == b.position
+            && a.sequence == b.sequence);
+}
+
+bool operator!=(const IndelAllele& a, const IndelAllele& b) {
+    return !(a==b);
+}
+
+bool operator<(const IndelAllele& a, const IndelAllele& b) {
+    ostringstream as, bs;
+    as << a;
+    bs << b;
+    return as.str() < bs.str();
+}
diff --git a/IndelAllele.h b/IndelAllele.h
new file mode 100644
index 0000000..247c01f
--- /dev/null
+++ b/IndelAllele.h
@@ -0,0 +1,37 @@
+#ifndef __INDEL_ALLELE_H
+#define __INDEL_ALLELE_H
+
+#include <string>
+#include <iostream>
+#include <sstream>
+
+using namespace std;
+
+class IndelAllele {
+    friend ostream& operator<<(ostream&, const IndelAllele&);
+    friend bool operator==(const IndelAllele&, const IndelAllele&);
+    friend bool operator!=(const IndelAllele&, const IndelAllele&);
+    friend bool operator<(const IndelAllele&, const IndelAllele&);
+public:
+    bool insertion;
+    int length;
+    int referenceLength(void);
+    int readLength(void);
+    int position;
+    int readPosition;
+    string sequence;
+
+    bool homopolymer(void);
+
+    IndelAllele(bool i, int l, int p, int rp, string s)
+        : insertion(i), length(l), position(p), readPosition(rp), sequence(s)
+    { }
+};
+
+bool homopolymer(string sequence);
+ostream& operator<<(ostream& out, const IndelAllele& indel);
+bool operator==(const IndelAllele& a, const IndelAllele& b);
+bool operator!=(const IndelAllele& a, const IndelAllele& b);
+bool operator<(const IndelAllele& a, const IndelAllele& b);
+
+#endif
diff --git a/LeftAlign.cpp b/LeftAlign.cpp
new file mode 100644
index 0000000..3e526aa
--- /dev/null
+++ b/LeftAlign.cpp
@@ -0,0 +1,853 @@
+#include "LeftAlign.h"
+
+//bool debug;
+#define VERBOSE_DEBUG
+
+// Attempts to left-realign all the indels represented by the alignment cigar.
+//
+// This is done by shifting all indels as far left as they can go without
+// mismatch, then merging neighboring indels of the same class.  leftAlign
+// updates the alignment cigar with changes, and returns true if realignment
+// changed the alignment cigar.
+//
+// To left-align, we move multi-base indels left by their own length as long as
+// the preceding bases match the inserted or deleted sequence.  After this
+// step, we handle multi-base homopolymer indels by shifting them one base to
+// the left until they mismatch the reference.
+//
+// To merge neighboring indels, we iterate through the set of left-stabilized
+// indels.  For each indel we add a new cigar element to the new cigar.  If a
+// deletion follows a deletion, or an insertion occurs at the same place as
+// another insertion, we merge the events by extending the previous cigar
+// element.
+//
+// In practice, we must call this function until the alignment is stabilized.
+//
+bool leftAlign(string& querySequence, string& cigar, string& baseReferenceSequence, int& offset, bool debug) {
+
+    debug = false;
+
+    string referenceSequence = baseReferenceSequence.substr(offset);
+
+    int arsOffset = 0; // pointer to insertion point in aligned reference sequence
+    string alignedReferenceSequence, alignedQuerySequence;
+    if (debug) alignedReferenceSequence = referenceSequence;
+    if (debug) alignedQuerySequence = querySequence;
+    int aabOffset = 0;
+
+    // store information about the indels
+    vector<IndelAllele> indels;
+
+    int rp = 0;  // read position, 0-based relative to read
+    int sp = 0;  // sequence position
+
+    string softBegin;
+    string softEnd;
+
+    string cigarbefore = cigar;
+
+    vector<pair<int, string> > cigarData = splitCIGAR(cigar);
+    for (vector<pair<int, string> >::const_iterator c = cigarData.begin();
+        c != cigarData.end(); ++c) {
+        unsigned int l = c->first;
+        string t = c->second;
+	if (debug) cerr << l << t << " " << sp << " " << rp << endl;
+        if (t == "M") { // match or mismatch
+            sp += l;
+            rp += l;
+        } else if (t == "D") { // deletion
+            indels.push_back(IndelAllele(false, l, sp, rp, referenceSequence.substr(sp, l)));
+            if (debug) { cerr << indels.back() << endl;  alignedQuerySequence.insert(rp + aabOffset, string(l, '-')); }
+            aabOffset += l;
+            sp += l;  // update reference sequence position
+        } else if (t == "I") { // insertion
+            indels.push_back(IndelAllele(true, l, sp, rp, querySequence.substr(rp, l)));
+            if (debug) { cerr << indels.back() << endl; alignedReferenceSequence.insert(sp + softBegin.size() + arsOffset, string(l, '-')); }
+            arsOffset += l;
+            rp += l;
+        } else if (t == "S") { // soft clip, clipped sequence present in the read not matching the reference
+            // remove these bases from the refseq and read seq, but don't modify the alignment sequence
+            if (rp == 0) {
+                alignedReferenceSequence = string(l, '*') + alignedReferenceSequence;
+		//indels.push_back(IndelAllele(true, l, sp, rp, querySequence.substr(rp, l)));
+                softBegin = querySequence.substr(0, l);
+            } else {
+                alignedReferenceSequence = alignedReferenceSequence + string(l, '*');
+		//indels.push_back(IndelAllele(true, l, sp, rp, querySequence.substr(rp, l)));
+                softEnd = querySequence.substr(querySequence.size() - l, l);
+            }
+            rp += l;
+        } else if (t == "H") { // hard clip on the read, clipped sequence is not present in the read
+        } else if (t == "N") { // skipped region in the reference not present in read, aka splice
+            sp += l;
+        }
+    }
+
+
+    if (debug) cerr << "| " << cigarbefore << endl
+		    << "| " << alignedReferenceSequence << endl
+		    << "| " << alignedQuerySequence << endl;
+
+    // if no indels, return the alignment
+    if (indels.empty()) { return false; }
+
+    if (debug) {
+	for (vector<IndelAllele>::iterator a = indels.begin(); a != indels.end(); ++a) cerr << *a << " ";
+	cerr << endl;
+    }
+
+    // for each indel, from left to right
+    //     while the indel sequence repeated to the left and we're not matched up with the left-previous indel
+    //         move the indel left
+
+    vector<IndelAllele>::iterator previous = indels.begin();
+    for (vector<IndelAllele>::iterator id = indels.begin(); id != indels.end(); ++id) {
+
+        // left shift by repeats
+        //
+        // from 1 base to the length of the indel, attempt to shift left
+        // if the move would cause no change in alignment optimality (no
+        // introduction of mismatches, and by definition no change in gap
+        // length), move to the new position.
+        // in practice this moves the indel left when we reach the size of
+        // the repeat unit.
+        //
+        int steppos, readsteppos;
+        IndelAllele& indel = *id;
+        int i = 1;
+
+        while (i <= indel.length) {
+
+            int steppos = indel.position - i;
+            int readsteppos = indel.readPosition - i;
+
+            if (debug) {
+                if (steppos >= 0 && readsteppos >= 0) {
+                    cerr << "refseq flank " << referenceSequence.substr(steppos, indel.length) << endl;
+                    cerr << "qryseq flank " << querySequence.substr(readsteppos, indel.length) << endl;
+                    cerr << "indelseq     " << indel.sequence << endl;
+                }
+            }
+
+            while (steppos >= 0 && readsteppos >= 0
+                   && indel.sequence == referenceSequence.substr(steppos, indel.length)
+                   && indel.sequence == querySequence.substr(readsteppos, indel.length)
+                   && (id == indels.begin()
+                       || (previous->insertion && steppos >= previous->position)
+                       || (!previous->insertion && steppos >= previous->position + previous->length))) {
+                LEFTALIGN_DEBUG((indel.insertion ? "insertion " : "deletion ") << indel << " shifting " << i << "bp left" << endl);
+                indel.position -= i;
+                indel.readPosition -= i;
+                steppos = indel.position - i;
+                readsteppos = indel.readPosition - i;
+            }
+            do {
+                ++i;
+            } while (i <= indel.length && indel.length % i != 0);
+        }
+
+
+
+        // left shift indels with exchangeable flanking sequence
+        //
+        // for example:
+        //
+        //    GTTACGTT           GTTACGTT
+        //    GT-----T   ---->   G-----TT
+        //
+        // GTGTGACGTGT           GTGTGACGTGT
+        // GTGTG-----T   ---->   GTG-----TGT
+        //
+        // GTGTG-----T           GTG-----TGT
+        // GTGTGACGTGT   ---->   GTGTGACGTGT
+        //
+        //
+
+        steppos = indel.position - 1;
+        readsteppos = indel.readPosition - 1;
+        while (steppos >= 0 && readsteppos >= 0
+               && querySequence.at(readsteppos) == referenceSequence.at(steppos)
+	       && referenceSequence.size() > steppos + indel.length
+	       && indel.sequence.at((int) indel.sequence.size() - 1) == referenceSequence.at(steppos + indel.length) // are the exchanged bases going to match wrt. the reference?
+               && querySequence.at(readsteppos) == indel.sequence.at((int) indel.sequence.size() - 1)
+               && (id == indels.begin()
+                   || (previous->insertion && indel.position - 1 >= previous->position)
+                   || (!previous->insertion && indel.position - 1 >= previous->position + previous->length))) {
+            if (debug) cerr << (indel.insertion ? "insertion " : "deletion ") << indel << " exchanging bases " << 1 << "bp left" << endl;
+            indel.sequence = indel.sequence.at(indel.sequence.size() - 1) + indel.sequence.substr(0, indel.sequence.size() - 1);
+            indel.position -= 1;
+            indel.readPosition -= 1;
+	    if (debug) cerr << indel << endl;
+            steppos = indel.position - 1;
+            readsteppos = indel.readPosition - 1;
+	    //if (debug && steppos && readsteppos) cerr << querySequence.at(readsteppos) << " ==? " << referenceSequence.at(steppos) << endl;
+	    //if (debug && steppos && readsteppos) cerr << indel.sequence.at((int) indel.sequence.size() - 1) << " ==? " << referenceSequence.at(steppos + indel.length) << endl;
+        }
+        // tracks previous indel, so we don't run into it with the next shift
+        previous = id;
+    }
+
+    if (debug) {
+	for (vector<IndelAllele>::iterator a = indels.begin(); a != indels.end(); ++a) cerr << *a << " ";
+	cerr << endl;
+    }
+
+    if (debug) cerr << "bring together floating indels" << endl;
+
+    // bring together floating indels
+    // from left to right
+    // check if we could merge with the next indel
+    // if so, adjust so that we will merge in the next step
+    if (indels.size() > 1) {
+        previous = indels.begin();
+        for (vector<IndelAllele>::iterator id = (indels.begin() + 1); id != indels.end(); ++id) {
+            IndelAllele& indel = *id;
+            // parsimony: could we shift right and merge with the previous indel?
+            // if so, do it
+            int prev_end_ref = previous->insertion ? previous->position : previous->position + previous->length;
+            int prev_end_read = !previous->insertion ? previous->readPosition : previous->readPosition + previous->length;
+            if (previous->insertion == indel.insertion
+                    && ((previous->insertion
+                        && (previous->position < indel.position
+                        && previous->readPosition < indel.readPosition))
+                        ||
+                        (!previous->insertion
+                        && (previous->position + previous->length < indel.position)
+                        && (previous->readPosition < indel.readPosition)
+                        ))) {
+                if (previous->homopolymer()) {
+                    string seq = referenceSequence.substr(prev_end_ref, indel.position - prev_end_ref);
+                    string readseq = querySequence.substr(prev_end_read, indel.position - prev_end_ref);
+                    if (debug) cerr << "seq: " << seq << endl << "readseq: " << readseq << endl;
+                    if (previous->sequence.at(0) == seq.at(0)
+                            && homopolymer(seq)
+                            && homopolymer(readseq)) {
+                        if (debug) cerr << "moving " << *previous << " right to " 
+					<< (indel.insertion ? indel.position : indel.position - previous->length) << endl;
+                        previous->position = indel.insertion ? indel.position : indel.position - previous->length;
+			previous->readPosition = !indel.insertion ? indel.readPosition : indel.readPosition - previous->readLength(); // should this be readLength?
+                    }
+                }
+		/*
+                else {
+                    int pos = previous->position;
+		    int readpos = previous->readPosition;
+                    while (pos < (int) referenceSequence.length() &&
+                            ((previous->insertion && pos + previous->length <= indel.position)
+                            ||
+                            (!previous->insertion && pos + previous->length < indel.position))
+			   && previous->sequence == referenceSequence.substr(pos + previous->length, previous->length)
+			   && previous->sequence == querySequence.substr(readpos + previous->length, previous->length)
+			) {
+                        pos += previous->length;
+			readpos += previous->length;
+                    }
+		    string seq = previous->sequence;
+                    if (pos > previous->position) {
+			// wobble bases right to left as far as we can go
+			int steppos = previous->position + seq.size();
+			int readsteppos = previous->readPosition + seq.size();
+
+			while (querySequence.at(readsteppos) == referenceSequence.at(steppos)
+			       && querySequence.at(readsteppos) == seq.at(0)
+			       && (id == indels.begin()
+				   || (indel.insertion && pos + seq.size() - 1 <= indel.position)
+				   || (!previous->insertion && indel.position - 1 >= pos + previous->length))) {
+			    seq = seq.substr(1) + seq.at(0);
+			    ++pos;
+			    ++readpos;
+			    steppos = pos + 1;
+			    readsteppos = readpos + 1;
+			}
+
+			if (((previous->insertion && pos + previous->length == indel.position)
+			     ||
+			     (!previous->insertion && pos == indel.position - previous->length))
+			    ) {
+			    if (debug) cerr << "right-merging tandem repeat: moving " << *previous << " right to " << pos << endl;
+			    previous->position = pos;
+			    previous->readPosition = readpos;
+			    previous->sequence = seq;
+			}
+                    }
+                }
+		*/
+            }
+            previous = id;
+        }
+    }
+
+    if (debug) {
+	for (vector<IndelAllele>::iterator a = indels.begin(); a != indels.end(); ++a) cerr << *a << " ";
+	cerr << endl;
+    }
+
+
+    if (debug) cerr << "bring in indels at ends of read" << endl;
+
+    // try to "bring in" repeat indels at the end, for maximum parsimony
+    //
+    // e.g.
+    //
+    // AGAAAGAAAGAAAAAGAAAAAGAACCAAGAAGAAAA
+    // AGAAAG------AAAGAAAAAGAACCAAGAAGAAAA
+    //
+    //     has no information which distinguishes it from:
+    //
+    // AGAAAGAAAAAGAAAAAGAACCAAGAAGAAAA
+    // AGAAAG--AAAGAAAAAGAACCAAGAAGAAAA
+    //
+    // here we take the parsimonious explanation
+
+    if (!indels.empty()) {
+	// deal with the first indel
+	// the first deletion ... or the biggest deletion
+	vector<IndelAllele>::iterator a = indels.begin();
+	vector<IndelAllele>::iterator del = indels.begin();
+	for (; a != indels.end(); ++a) {
+	    //if (!a->insertion && a->length > biggestDel->length) biggestDel = a;
+	    if (!a->insertion && a->length) del = a;
+	if (!del->insertion) {
+	    //if (!indel.insertion) { // only handle deletions like this for now
+	    //if (!indel.insertion && !(indels.size() > 1 && indel.readPosition == indels.at(1).readPosition)) { // only handle deletions like this for now
+	    int insertedBpBefore = 0;
+	    int deletedBpBefore = 0;
+	    for (vector<IndelAllele>::iterator i = indels.begin(); i != del; ++i) {
+		if (i->insertion) insertedBpBefore += i->length;
+		else deletedBpBefore += i->length;
+	    }
+	    IndelAllele& indel = *del;
+	    int minsize = indel.length;
+	    int flankingLength = indel.readPosition;
+	    if (debug) cerr << indel << endl;
+	    string flanking = querySequence.substr(0, flankingLength);
+	    if (debug) cerr << flanking << endl;
+
+	    size_t p = referenceSequence.substr(0, indel.position + indel.length).rfind(flanking);
+	    if (p == string::npos) {
+		if (debug) cerr << "flanking not found" << endl;
+	    } else {
+		if (debug) {
+		    cerr << "flanking is at " << p << endl;
+		    cerr << "minsize would be " << (indel.position + indel.length) - ((int) p + flankingLength) << endl;
+		}
+		minsize = (indel.position + indel.length) - ((int) p + flankingLength);
+	    }
+
+	    if (debug) cerr << minsize << endl;
+
+	    if (minsize >= 0 && minsize < indel.length) {
+
+		int softdiff = softBegin.size();
+		if (!softBegin.empty()) { // remove soft clips if we can
+		    if (flankingLength < softBegin.size()) {
+			softBegin = softBegin.substr(0, flankingLength - softBegin.size());
+			softdiff -= softBegin.size();
+		    } else {
+			softBegin.clear();
+		    }
+		}
+
+		// the new read position == the current read position
+		// the new reference position == the flanking length size
+		// the positional offset of the reference sequence == the new position of the deletion - the flanking length
+
+		int diff = indel.length - minsize - softdiff  + deletedBpBefore - insertedBpBefore;
+		//int querydiff = indel.length - minsize - softBegin.size() - insertedBpBefore + deletedBpBefore;
+		if (debug) cerr << "adjusting " << indel.length <<" " << minsize << "  " << softdiff << " " << diff << endl;
+		offset += diff;
+		///
+		indel.length = minsize;
+		indel.sequence = indel.sequence.substr(indel.sequence.size() - minsize);
+		indel.position = flankingLength;
+		indel.readPosition = indel.position; // if we have removed all the sequence before, this should be ==
+		referenceSequence = referenceSequence.substr(diff);
+
+		for (vector<IndelAllele>::iterator i = indels.begin(); i != indels.end(); ++i) {
+		    if (i < del) {
+			i->length = 0; // remove
+		    } else if (i > del) {
+			i->position -= diff;
+		    }
+		}
+	    }
+	    if (debug) cerr << indel << endl;
+
+	    // now, do the same for the reverse
+	    if (indel.length > 0) {
+		int minsize = indel.length + 1;
+		int flankingLength = querySequence.size() - indel.readPosition + indel.readLength();
+		string flanking = querySequence.substr(indel.readPosition + indel.readLength(), flankingLength);
+		int indelRefEnd = indel.position + indel.referenceLength();
+
+		size_t p = referenceSequence.find(flanking, indel.position);
+		if (p == string::npos) {
+		    if (debug)
+			cerr << "flanking not found" << endl;
+		} else {
+		    if (debug) {
+			cerr << "flanking is at " << p << endl;
+			cerr << "minsize would be " << (int) p - indel.position << endl;
+		    }
+		    minsize = (int) p - indel.position;
+		}
+
+		if (debug) cerr << "minsize " << minsize << endl;
+		if (minsize >= 0 && minsize <= indel.length) {
+		    //referenceSequence = referenceSequence.substr(0, referenceSequence.size() - (indel.length - minsize));
+		    if (debug) cerr << "adjusting " << indel << endl;
+		    indel.length = minsize;
+		    indel.sequence = indel.sequence.substr(0, minsize);
+		    //cerr << indel << endl;
+		    if (!softEnd.empty()) { // remove soft clips if we can
+			if (flankingLength < softEnd.size()) {
+			    softEnd = softEnd.substr(flankingLength - softEnd.size());
+			} else {
+			    softEnd.clear();
+			}
+		    }
+		    for (vector<IndelAllele>::iterator i = indels.begin(); i != indels.end(); ++i) {
+			if (i > del) {
+			    i->length = 0; // remove
+			}
+		    }
+		}
+	    }
+	}
+	}
+    }
+
+    if (debug) {
+	for (vector<IndelAllele>::iterator a = indels.begin(); a != indels.end(); ++a) cerr << *a << " ";
+	cerr << endl;
+    }
+
+    if (debug) cerr << "parsing indels" << endl;
+
+    // if soft clipping can be reduced by adjusting the tailing indels in the read, do it
+    // TODO
+
+    /*
+    int numEmptyIndels = 0;
+
+    if (!indels.empty()) {
+	vector<IndelAllele>::iterator a = indels.begin();
+	while (a != indels.end()) {
+	    if (debug) cerr << "parsing " << *a << endl;
+	    if (!(a->length > 0 && a->position >= 0)) {
+		++numEmptyIndels;
+	    }
+	    ++a;
+	}
+    }
+    */
+
+    for (vector<IndelAllele>::iterator i = indels.begin(); i != indels.end(); ++i) {
+	if (i->length == 0) continue;
+	if (i->insertion) {
+	    if (querySequence.substr(i->readPosition, i->readLength()) != i->sequence) {
+		cerr << "failure: " << *i << " should be " << querySequence.substr(i->readPosition, i->readLength()) << endl;
+		cerr << baseReferenceSequence << endl;
+		cerr << querySequence << endl;
+		throw 1;
+	    }
+	} else {
+	    if (referenceSequence.substr(i->position, i->length) != i->sequence) {
+		cerr << "failure: " << *i << " should be " << referenceSequence.substr(i->position, i->length) << endl;
+		cerr << baseReferenceSequence << endl;
+		cerr << querySequence << endl;
+		throw 1;
+	    }
+	}
+    }
+
+    if (indels.size() > 1) {
+        vector<IndelAllele>::iterator id = indels.begin();
+	while ((id + 1) != indels.end()) {
+	    if (debug) {
+		cerr << "indels: ";
+		for (vector<IndelAllele>::iterator a = indels.begin(); a != indels.end(); ++a) cerr << *a << " ";
+		cerr << endl;
+		//for (vector<IndelAllele>::iterator a = newIndels.begin(); a != newIndels.end(); ++a) cerr << *a << " ";
+		//cerr << endl;
+	    }
+
+	    // get the indels to try to merge
+	    while (id->length == 0 && (id + 1) != indels.end()) ++id;
+	    vector<IndelAllele>::iterator idn = (id + 1);
+	    while (idn != indels.end() && idn->length == 0) ++idn;
+	    if (idn == indels.end()) break;
+
+            IndelAllele& indel = *idn;
+	    IndelAllele& last = *id;
+	    if (debug) cerr << "trying " << last << " against " << indel << endl;
+
+	    int lastend = last.insertion ? last.position : (last.position + last.length);
+	    if (indel.position == lastend) {
+		if (debug) cerr << "indel.position " << indel.position << " lastend " << lastend << endl;
+		if (indel.insertion == last.insertion) {
+		    last.length += indel.length;
+		    last.sequence += indel.sequence;
+		    indel.length = 0;
+		    indel.sequence.clear();
+		    id = idn;
+		} else if (last.length && indel.length) { // if the end of the previous == the start of the current, cut it off of both the ins and the del
+		    if (debug) cerr << "Merging " << last << " " << indel << endl;
+		    int matchsize = 1;
+		    int biggestmatchsize = 0;
+
+		    while (matchsize <= last.sequence.size() && matchsize <= indel.sequence.size()) {
+			if (last.sequence.substr(last.sequence.size() - matchsize) == indel.sequence.substr(0, matchsize)) {
+			    biggestmatchsize = matchsize;
+			}
+			++matchsize;
+		    }
+		    if (debug) cerr << "biggestmatchsize " << biggestmatchsize << endl;
+
+		    last.sequence = last.sequence.substr(0, last.sequence.size() - biggestmatchsize);
+		    last.length -= biggestmatchsize;
+		    indel.sequence = indel.sequence.substr(biggestmatchsize);
+		    indel.length -= biggestmatchsize;
+		    if (indel.insertion) indel.readPosition += biggestmatchsize;
+		    else indel.position += biggestmatchsize;
+
+		    if (indel.length > 0) {
+			id = idn;
+		    }
+		}
+	    } else {
+		if (last.insertion != indel.insertion) {
+		    if (debug) cerr << "merging by overlap " << last << " " << indel << endl;
+		    // see if we can slide the sequence in between these two indels together
+		    string lastOverlapSeq;
+		    string indelOverlapSeq;
+
+		    if (last.insertion) {
+			lastOverlapSeq =
+			    last.sequence
+			    + querySequence.substr(last.readPosition + last.readLength(),
+						   indel.readPosition - (last.readPosition + last.readLength()));
+			indelOverlapSeq =
+			    referenceSequence.substr(last.position + last.referenceLength(),
+						     indel.position - (last.position + last.referenceLength()))
+			    + indel.sequence;
+		    } else {
+			lastOverlapSeq =
+			    last.sequence
+			    + referenceSequence.substr(last.position + last.referenceLength(),
+						       indel.position - (last.position + last.referenceLength()));
+			indelOverlapSeq =
+			    querySequence.substr(last.readPosition + last.readLength(),
+						 indel.readPosition - (last.readPosition + last.readLength()))
+			    + indel.sequence;
+		    }
+
+		    if (debug) {
+			if (!last.insertion) {
+			    if (last.insertion) cerr << string(last.length, '-');
+			    cerr << lastOverlapSeq;
+			    if (indel.insertion) cerr << string(indel.length, '-');
+			    cerr << endl;
+			    if (!last.insertion) cerr << string(last.length, '-');
+			    cerr << indelOverlapSeq;
+			    if (!indel.insertion) cerr << string(indel.length, '-');
+			    cerr << endl;
+			} else {
+			    if (last.insertion) cerr << string(last.length, '-');
+			    cerr << indelOverlapSeq;
+			    if (indel.insertion) cerr << string(indel.length, '-');
+			    cerr << endl;
+			    if (!last.insertion) cerr << string(last.length, '-');
+			    cerr << lastOverlapSeq;
+			    if (!indel.insertion) cerr << string(indel.length, '-');
+			    cerr << endl;
+			}
+		    }
+
+
+		    int dist = min(last.length, indel.length);
+		    int matchingInBetween = indel.position - (last.position + last.referenceLength());
+		    int previousMatchingInBetween = matchingInBetween;
+		    //int matchingInBetween = indel.position - last.position;
+		    if (debug) cerr << "matchingInBetween " << matchingInBetween << endl;
+		    if (debug) cerr << "dist " << dist << endl;
+		    int mindist = matchingInBetween - dist;
+		    if (lastOverlapSeq == indelOverlapSeq) {
+			matchingInBetween = lastOverlapSeq.size();
+		    } else {
+			// TODO change to use string::find()
+			for (int i = dist; i > 0; --i) {
+			    if (debug) cerr << "lastoverlap: "
+					    << lastOverlapSeq.substr(lastOverlapSeq.size() - previousMatchingInBetween - i)
+					    << " thisoverlap: "
+					    << indelOverlapSeq.substr(0, i + previousMatchingInBetween) << endl;
+			    if (lastOverlapSeq.substr(lastOverlapSeq.size() - previousMatchingInBetween - i)
+				== indelOverlapSeq.substr(0, i + previousMatchingInBetween)) {
+				matchingInBetween = previousMatchingInBetween + i;
+				break;
+			    }
+			}
+		    }
+		    //cerr << last << " " << indel << endl;
+		    if (matchingInBetween > 0 && matchingInBetween > previousMatchingInBetween) {
+			if (debug) cerr << "matching " << matchingInBetween  << "bp between " << last << " " << indel
+				        << " was matching " << previousMatchingInBetween << endl;
+			int diff = matchingInBetween - previousMatchingInBetween;
+			last.length -= diff;
+			last.sequence = last.sequence.substr(0, last.length);
+			indel.length -= diff;
+			indel.sequence = indel.sequence.substr(diff);
+			if (!indel.insertion) indel.position += diff;
+			else indel.readPosition += diff;
+			if (debug) cerr << last << " " << indel << endl;
+		    }// else if (matchingInBetween == 0 || matchingInBetween == indel.position - last.position) {
+			//if (!newIndels.empty()) newIndels.pop_back();
+		    //} //else { newIndels.push_back(indel); }
+		    id = idn;
+		    //newIndels.push_back(indel);
+		} else {
+		    id = idn;
+		    //newIndels.push_back(indel);
+		}
+	    }
+	}
+    }
+
+    vector<IndelAllele> newIndels;
+    for (vector<IndelAllele>::iterator i = indels.begin(); i != indels.end(); ++i) {
+	if (!i->insertion && i->position == 0) { offset += i->length;
+	} else if (i->length > 0) newIndels.push_back(*i); // remove dels at front
+    }
+
+    // for each indel
+    //     if ( we're matched up to the previous insertion (or deletion) 
+    //          and it's also an insertion or deletion )
+    //         merge the indels
+    //
+    // and simultaneously reconstruct the cigar
+    //
+    // if there are spurious deletions at the start and end of the read, remove them
+    // if there are spurious insertions after soft-clipped bases, make them soft clips
+
+    vector<pair<int, string> > newCigar;
+
+    if (!softBegin.empty()) {
+	newCigar.push_back(make_pair(softBegin.size(), "S"));
+    }
+
+    if (newIndels.empty()) {
+
+	int remainingReadBp = querySequence.size() - softEnd.size() - softBegin.size();
+	newCigar.push_back(make_pair(remainingReadBp, "M"));
+
+	if (!softEnd.empty()) {
+	    newCigar.push_back(make_pair(softEnd.size(), "S"));
+	}
+
+	cigar = joinCIGAR(newCigar);
+
+	// check if we're realigned
+	if (cigar == cigarbefore) {
+	    return false;
+	} else {
+	    return true;
+	}
+    }
+
+    vector<IndelAllele>::iterator id = newIndels.begin();
+    vector<IndelAllele>::iterator last = id++;
+
+    if (last->position > 0) {
+	newCigar.push_back(make_pair(last->position, "M"));
+	newCigar.push_back(make_pair(last->length, (last->insertion ? "I" : "D")));
+    } else if (last->position == 0) { // discard floating indels
+	if (last->insertion) newCigar.push_back(make_pair(last->length, "S"));
+	else  newCigar.push_back(make_pair(last->length, "D"));
+    } else {
+	cerr << "negative indel position " << *last << endl;
+    }
+
+    int lastend = last->insertion ? last->position : (last->position + last->length);
+    LEFTALIGN_DEBUG(*last << ",");
+
+    for (; id != newIndels.end(); ++id) {
+	IndelAllele& indel = *id;
+	if (indel.length == 0) continue; // remove 0-length indels
+	if (debug) cerr << indel << " " << *last << endl;
+	LEFTALIGN_DEBUG(indel << ",");
+	if ((id + 1) == newIndels.end()
+	    && (indel.insertion && indel.position == referenceSequence.size()
+		|| (!indel.insertion && indel.position + indel.length == referenceSequence.size()))
+	    ) {
+	    if (indel.insertion) {
+		if (!newCigar.empty() && newCigar.back().second == "S") {
+		    newCigar.back().first += indel.length;
+		} else {
+		    newCigar.push_back(make_pair(indel.length, "S"));
+		}
+	    }
+	} else if (indel.position < lastend) {
+	    cerr << "impossibility?: indel realigned left of another indel" << endl;
+	    return false;
+	} else if (indel.position == lastend) {
+	    // how?
+	    if (indel.insertion == last->insertion) {
+		pair<int, string>& op = newCigar.back();
+		op.first += indel.length;
+	    } else {
+		newCigar.push_back(make_pair(indel.length, (indel.insertion ? "I" : "D")));
+	    }
+        } else if (indel.position > lastend) {  // also catches differential indels, but with the same position
+	    if (!newCigar.empty() && newCigar.back().second == "M") newCigar.back().first += indel.position - lastend;
+	    else newCigar.push_back(make_pair(indel.position - lastend, "M"));
+            newCigar.push_back(make_pair(indel.length, (indel.insertion ? "I" : "D")));
+        }
+
+	last = id;
+	lastend = last->insertion ? last->position : (last->position + last->length);
+
+	if (debug) {
+	    for (vector<pair<int, string> >::iterator c = newCigar.begin(); c != newCigar.end(); ++c)
+		cerr << c->first << c->second;
+	    cerr << endl;
+	}
+
+    }
+
+    int remainingReadBp = querySequence.size() - (last->readPosition + last->readLength()) - softEnd.size();
+    if (remainingReadBp > 0) {
+	if (debug) cerr << "bp remaining = " << remainingReadBp << endl;
+	if (newCigar.back().second == "M") newCigar.back().first += remainingReadBp;
+	else newCigar.push_back(make_pair(remainingReadBp, "M"));
+    }
+
+    if (newCigar.back().second == "D") newCigar.pop_back(); // remove trailing deletions
+
+    if (!softEnd.empty()) {
+	if (newCigar.back().second == "S") newCigar.back().first += softEnd.size();
+	else newCigar.push_back(make_pair(softEnd.size(), "S"));
+    }
+
+    LEFTALIGN_DEBUG(endl);
+
+    cigar = joinCIGAR(newCigar);
+
+    LEFTALIGN_DEBUG(cigar << endl);
+
+    // check if we're realigned
+    if (cigar == cigarbefore) {
+        return false;
+    } else {
+        return true;
+    }
+
+}
+
+int countMismatches(string& querySequence, string& cigar, string referenceSequence) {
+
+    int mismatches = 0;
+    int sp = 0;
+    int rp = 0;
+    vector<pair<int, string> > cigarData = splitCIGAR(cigar);
+    for (vector<pair<int, string> >::const_iterator c = cigarData.begin();
+        c != cigarData.end(); ++c) {
+        unsigned int l = c->first;
+        string t = c->second;
+        if (t == "M") { // match or mismatch
+            for (int i = 0; i < l; ++i) {
+                if (querySequence.at(rp) != referenceSequence.at(sp))
+                    ++mismatches;
+                ++sp;
+                ++rp;
+            }
+        } else if (t == "D") { // deletion
+            sp += l;  // update reference sequence position
+        } else if (t == "I") { // insertion
+            rp += l;  // update read position
+        } else if (t == "S") { // soft clip, clipped sequence present in the read not matching the reference
+            rp += l;
+        } else if (t == "H") { // hard clip on the read, clipped sequence is not present in the read
+        } else if (t == "N") { // skipped region in the reference not present in read, aka splice
+            sp += l;
+        }
+    }
+
+    return mismatches;
+
+}
+
+// Iteratively left-aligns the indels in the alignment until we have a stable
+// realignment.  Returns true on realignment success or non-realignment.
+// Returns false if we exceed the maximum number of realignment iterations.
+//
+bool stablyLeftAlign(string querySequence, string& cigar, string referenceSequence, int& offset, int maxiterations, bool debug) {
+
+    if (!leftAlign(querySequence, cigar, referenceSequence, offset)) {
+
+        LEFTALIGN_DEBUG("did not realign" << endl);
+        return true;
+
+    } else {
+
+        while (leftAlign(querySequence, cigar, referenceSequence, offset) && --maxiterations > 0) {
+            LEFTALIGN_DEBUG("realigning ..." << endl);
+        }
+
+        if (maxiterations <= 0) {
+            return false;
+        } else {
+            return true;
+        }
+    }
+}
+
+string mergeCIGAR(const string& c1, const string& c2) {
+    vector<pair<int, string> > cigar1 = splitCIGAR(c1);
+    vector<pair<int, string> > cigar2 = splitCIGAR(c2);
+    // check if the middle elements are the same
+    if (cigar1.back().second == cigar2.front().second) {
+        cigar1.back().first += cigar2.front().first;
+        cigar2.erase(cigar2.begin());
+    }
+    for (vector<pair<int, string> >::iterator c = cigar2.begin(); c != cigar2.end(); ++c) {
+        cigar1.push_back(*c);
+    }
+    return joinCIGAR(cigar1);
+}
+
+vector<pair<int, string> > splitCIGAR(const string& cigarStr) {
+    vector<pair<int, string> > cigar;
+    string number;
+    string type;
+    // strings go [Number][Type] ...
+    for (string::const_iterator s = cigarStr.begin(); s != cigarStr.end(); ++s) {
+        char c = *s;
+        if (isdigit(c)) {
+            if (type.empty()) {
+                number += c;
+            } else {
+                // signal for next token, push back the last pair, clean up
+                cigar.push_back(make_pair(atoi(number.c_str()), type));
+                number.clear();
+                type.clear();
+                number += c;
+            }
+        } else {
+            type += c;
+        }
+    }
+    if (!number.empty() && !type.empty()) {
+        cigar.push_back(make_pair(atoi(number.c_str()), type));
+    }
+    return cigar;
+}
+
+string joinCIGAR(const vector<pair<int, string> >& cigar) {
+    string cigarStr;
+    for (vector<pair<int, string> >::const_iterator c = cigar.begin(); c != cigar.end(); ++c) {
+        if (c->first) {
+            cigarStr += convert(c->first) + c->second;
+        }
+    }
+    return cigarStr;
+}
diff --git a/LeftAlign.h b/LeftAlign.h
new file mode 100644
index 0000000..7fb796e
--- /dev/null
+++ b/LeftAlign.h
@@ -0,0 +1,32 @@
+#ifndef __LEFTALIGN_H
+#define __LEFTALIGN_H
+
+#include <algorithm>
+#include <map>
+#include <vector>
+#include <list>
+#include <utility>
+#include <sstream>
+
+#include "IndelAllele.h"
+#include "convert.h"
+
+#ifdef VERBOSE_DEBUG
+#define LEFTALIGN_DEBUG(msg) \
+    if (debug) { cerr << msg; }
+#else
+#define LEFTALIGN_DEBUG(msg)
+#endif
+
+using namespace std;
+
+bool leftAlign(string& alternateQuery, string& cigar, string& referenceSequence, int& offset, bool debug = false);
+bool stablyLeftAlign(string alternateQuery, string& cigar, string referenceSequence, int& offset, int maxiterations = 20, bool debug = false);
+int countMismatches(string& alternateQuery, string& cigar, string& referenceSequence);
+
+string mergeCIGAR(const string& c1, const string& c2);
+vector<pair<int, string> > splitCIGAR(const string& cigarStr);
+string joinCIGAR(const vector<pair<int, string> >& cigar);
+
+
+#endif
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..3c62e93
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,66 @@
+# =========================================
+# MOSAIK Banded Smith-Waterman Makefile
+# (c) 2009 Michael Stromberg & Wan-Ping Lee
+# =========================================
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= smithwaterman.cpp BandedSmithWaterman.cpp SmithWatermanGotoh.cpp Repeats.cpp LeftAlign.cpp IndelAllele.cpp
+OBJECTS= $(SOURCES:.cpp=.o) disorder.o
+OBJECTS_NO_MAIN= disorder.o BandedSmithWaterman.o SmithWatermanGotoh.o Repeats.o LeftAlign.o IndelAllele.o
+
+# ----------------
+# compiler options
+# ----------------
+
+# Use ?= to allow overriding from the env or command-line
+CXX?=		c++
+CXXFLAGS?=	-O3
+OBJ?=		sw.o
+
+# I don't think := is useful here, since there is nothing to expand
+LDFLAGS:=	-Wl,-s
+#CXXFLAGS=-g
+EXE:=		smithwaterman
+LIBS=
+
+all: $(EXE) $(OBJ)
+
+.PHONY: all
+
+libsw.a: smithwaterman.o BandedSmithWaterman.o SmithWatermanGotoh.o LeftAlign.o Repeats.o IndelAllele.o disorder.o
+	ar rs $@ smithwaterman.o SmithWatermanGotoh.o disorder.o BandedSmithWaterman.o LeftAlign.o Repeats.o IndelAllele.o
+
+sw.o:  BandedSmithWaterman.o SmithWatermanGotoh.o LeftAlign.o Repeats.o IndelAllele.o disorder.o
+	ld -r $^ -o sw.o -L.
+	#$(CXX) $(CFLAGS) -c -o smithwaterman.cpp $(OBJECTS_NO_MAIN) -I.
+
+### @$(CXX) $(LDFLAGS) $(CFLAGS) -o $@ $^ -I.
+$(EXE): smithwaterman.o BandedSmithWaterman.o SmithWatermanGotoh.o disorder.o LeftAlign.o Repeats.o IndelAllele.o
+	$(CXX) $(CFLAGS) $^ -I. -o $@
+
+#smithwaterman: $(OBJECTS)
+#	$(CXX) $(CXXFLAGS) -o $@ $< -I.
+
+smithwaterman.o: smithwaterman.cpp disorder.o
+	$(CXX) $(CXXFLAGS) -c -o $@ smithwaterman.cpp -I.
+
+disorder.o: disorder.cpp disorder.h
+	$(CXX) $(CXXFLAGS) -c -o $@ $< -I.
+BandedSmithWaterman.o: BandedSmithWaterman.cpp BandedSmithWaterman.h
+	$(CXX) $(CXXFLAGS) -c -o $@ $< -I.
+SmithWatermanGotoh.o: SmithWatermanGotoh.cpp SmithWatermanGotoh.h disorder.o
+	$(CXX) $(CXXFLAGS) -c -o $@ $< -I.
+Repeats.o: Repeats.cpp
+	$(CXX) $(CXXFLAGS) -c -o $@ $< -I.
+LeftAlign.o: LeftAlign.cpp
+	$(CXX) $(CXXFLAGS) -c -o $@ $< -I.
+IndelAllele.o: IndelAllele.cpp
+	$(CXX) $(CXXFLAGS) -c -o $@ $< -I.
+
+.PHONY: clean
+
+clean:
+	@echo "Cleaning up."
+	@rm -f *.o $(PROGRAM) *~
diff --git a/Mosaik.h b/Mosaik.h
new file mode 100644
index 0000000..24d70d0
--- /dev/null
+++ b/Mosaik.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#ifndef WIN32
+//#include "SafeFunctions.h"
+#endif
+
+// ==============
+// MOSAIK version
+// ==============
+
+#define MOSAIK_VERSION_DATE "2009-02-11"
+
+// adopt a major.minor.build version number [1].[1].[3]
+const unsigned char  MOSAIK_MAJOR_VERSION = 0;
+const unsigned char  MOSAIK_MINOR_VERSION = 9;
+const unsigned short MOSAIK_BUILD_VERSION = 899;
+
+// ================================
+// Platform specific variable sizes
+// ================================
+
+// Windows Vista 32-bit
+// Fedora Core 7 32-bit
+// Fedora Core 6 64-bit
+// Itanium2      64-bit
+#define SIZEOF_CHAR          1
+#define SIZEOF_WCHAR         2
+#define SIZEOF_SHORT         2
+#define SIZEOF_INT           4
+#define SIZEOF_FLOAT         4
+#define SIZEOF_DOUBLE        8
+#define SIZEOF_UINT64        8
+#define MOSAIK_LITTLE_ENDIAN 1
+
+#ifdef WIN32
+typedef signed long long    int64_t;
+typedef unsigned long long uint64_t;
+#endif
+
+#define NEGATIVE_ONE_INT     0xffffffff
+#define NEGATIVE_TWO_INT     0xfffffffe
+#define NEGATIVE_THREE_INT   0xfffffffd
+#define NEGATIVE_FOUR_INT    0xfffffffc
+#define MAX_SHORT            0xffff
+
+// ==========================
+// Platform specific file I/O 
+// ==========================
+
+#ifdef WIN32
+const char OS_DIRECTORY_SEPARATOR = '\\';
+#else
+const char OS_DIRECTORY_SEPARATOR = '/';
+#endif
+
+#define DIRECTORY_NAME_LENGTH    255
+
+// ====================================
+// Enable unit test diagnostic messages
+// ====================================
+
+#ifdef UNITTEST
+#define SILENTMODE if(0)
+#else
+#define SILENTMODE
+#endif
+
+// =================
+// Aligner constants
+// =================
+
+const double HASH_REGION_EXTENSION_PERCENT     = 0.025;
+const unsigned char REFERENCE_SEQUENCE_QUALITY = 40;
diff --git a/Repeats.cpp b/Repeats.cpp
new file mode 100644
index 0000000..289e7c7
--- /dev/null
+++ b/Repeats.cpp
@@ -0,0 +1,69 @@
+#include "Repeats.h"
+
+map<string, int> repeatCounts(long int position, const string& sequence, int maxsize) {
+    map<string, int> counts;
+    for (int i = 1; i <= maxsize; ++i) {
+        // subseq here i bases
+        string seq = sequence.substr(position, i);
+        // go left.
+
+        int j = position - i;
+        int leftsteps = 0;
+        while (j >= 0 && seq == sequence.substr(j, i)) {
+            j -= i;
+            ++leftsteps;
+        }
+
+        // go right.
+        j = position;
+
+        int rightsteps = 0;
+        while (j + i <= sequence.size() && seq == sequence.substr(j, i)) {
+            j += i;
+            ++rightsteps;
+        }
+        // if we went left and right a non-zero number of times, 
+        if (leftsteps + rightsteps > 1) {
+            counts[seq] = leftsteps + rightsteps;
+        }
+    }
+
+    // filter out redundant repeat information
+    if (counts.size() > 1) {
+        map<string, int> filteredcounts;
+        map<string, int>::iterator c = counts.begin();
+        string prev = c->first;
+        filteredcounts[prev] = c->second;  // shortest sequence
+        ++c;
+        for (; c != counts.end(); ++c) {
+            int i = 0;
+            string seq = c->first;
+            while (i + prev.length() <= seq.length() && seq.substr(i, prev.length()) == prev) {
+                i += prev.length();
+            }
+            if (i < seq.length()) {
+                filteredcounts[seq] = c->second;
+                prev = seq;
+            }
+        }
+        return filteredcounts;
+    } else {
+        return counts;
+    }
+}
+
+bool isRepeatUnit(const string& seq, const string& unit) {
+
+    if (seq.size() % unit.size() != 0) {
+	return false;
+    } else {
+	int maxrepeats = seq.size() / unit.size();
+	for (int i = 0; i < maxrepeats; ++i) {
+	    if (seq.substr(i * unit.size(), unit.size()) != unit) {
+		return false;
+	    }
+	}
+	return true;
+    }
+
+}
diff --git a/Repeats.h b/Repeats.h
new file mode 100644
index 0000000..2efc0ea
--- /dev/null
+++ b/Repeats.h
@@ -0,0 +1,8 @@
+#include <iostream>
+#include <string>
+#include <map>
+
+using namespace std;
+
+map<string, int> repeatCounts(long int pos, const string& seq, int maxsize);
+bool isRepeatUnit(const string& seq, const string& unit);
diff --git a/SWMain.cpp b/SWMain.cpp
new file mode 100644
index 0000000..6ef3421
--- /dev/null
+++ b/SWMain.cpp
@@ -0,0 +1,126 @@
+#include <iostream>
+#include <string.h>
+//#include "Alignment.h"
+//#include "Benchmark.h"
+//#include "HashRegion.h"
+#include "SmithWatermanGotoh.h"
+#include "BandedSmithWaterman.h"
+
+using namespace std;
+
+int main(int argc, char* argv[]) {
+/*
+	printf("------------------------------------------------------------------------------\n");
+	printf("Banded Smith-Waterman Algorithm (worst case)\n");
+	printf("Michael Stromberg & Wan-Ping Lee  Marth Lab, Boston College Biology Department\n");
+	printf("------------------------------------------------------------------------------\n\n");
+*/
+	// this version simulates the worst case of only a fragment hashing to the
+	// reference sequence. Basically a non-centered diagonal in the Smith-Waterman
+	// dynamic programming matrix.
+
+	// here we simulate a region on the reference that occurs between position 4001
+	// and position 4136. During hashing, only the first 20 bases in the query 
+	// matched perfectly.
+
+	// define the start and end coordinates of the entire reference region
+	//const unsigned int start = 4001;
+	//const unsigned int end   = 4136;
+
+	//const unsigned int testStart = atoi(argv[1]);
+	//const unsigned int testEnd = atoi(argv[2]);
+	//const unsigned int testQueryStart = atoi(argv[3]);
+	//const unsigned int testQueryEnd = atoi(argv[4]);
+	
+	//cout << endl<< "=====================================================" << endl;
+	//cout << testStart << "\t" << testQueryStart << endl;
+	
+	// define the 20 b:q
+	// ases that matched perfectly
+	//HashRegion hr;
+	
+	//=====================================================
+	// defind the hash region
+	// first.first:   reference begin
+	// first.second:  reference end
+	// second.first:  query begin
+	// second.second: query end
+	//=====================================================
+	
+	pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr;
+	hr.first.first   = 5;
+	hr.first.second  = 13;
+	hr.second.first  = 0;
+	hr.second.second = 8;
+
+	//=====================================================
+
+	// for 76 bp reads, we expect as much as 12 mismatches - however this does not
+	// translate to a bandwidth of 12 * 2 + 1 since most of these will be
+	// substitution errors
+	const unsigned char bandwidth = 11;
+
+	// initialize
+	const char* pReference = "ATGGCGGGGATCGGGACACTCGCCGGTGCGGGTACCCTA";
+	const char* pQuery     =      "GGGGATCGGGACACTCGCTCTCCGGTGCGGGTA";
+	
+	const unsigned int referenceLen = strlen(pReference);
+	const unsigned int queryLen     = strlen(pQuery);
+
+	// ==============================================================================================
+	// benchmarking reference on koi.bc.edu when NUM_ITERATIONS = 38000 on 76 bp read (1 try):
+	// CPU time: 23.920 s, wall time: 24.012 s (1582.6 alignments/s)
+	// ==============================================================================================
+	//const unsigned int NUM_ITERATIONS = 38000;
+	//unsigned int NUM_ITERATIONS = 1;
+
+	// create a new Smith-Waterman alignment object
+	CSmithWatermanGotoh sw(10.0f, -9.0f, 15.0f, 6.66f);
+	CBandedSmithWaterman bsw(10.0f, -9.0f, 15.0f, 6.66f, bandwidth);
+
+	// start timing the algorithm
+	//CBenchmark bench;
+	//bench.Start();
+
+	// perform NUM_ITERATIONS alignments
+	//Alignment bswAl;
+	//Alignment swAl;
+	//   referenceBegin, referenceEnd
+	unsigned int referenceSW, referenceBSW;
+	string cigarSW, cigarBSW;
+	//for(unsigned int i = 0; i < NUM_ITERATIONS; i++) {
+	  sw.Align(referenceSW, cigarSW, pReference, referenceLen, pQuery, queryLen);
+	  bsw.Align(referenceBSW, cigarBSW, pReference, referenceLen, pQuery, queryLen, hr);
+	//}
+
+	// stop timing the algorithm
+	//bench.Stop();
+	
+	// calculate the alignments per second
+	//double elapsedWallTime = bench.GetElapsedWallTime();
+	//double alignmentsPerSecond = (double)NUM_ITERATIONS / elapsedWallTime;
+	
+	// show our results
+	//printf("%d\t%d\n", al.ReferenceBegin,al.QueryBegin);
+
+	printf("Smith-Waterman\n");
+	printf("reference:    %s %3u\n", cigarSW.c_str(), referenceSW);
+	printf("Banded Smith-Waterman\n");
+	printf("reference:    %s %3u\n", cigarBSW.c_str(), referenceBSW);
+	/*  
+	printf("Smith-Waterman\n");
+	printf("reference:    %s %3u %3u\n", swAl.Reference.CData(), swAl.ReferenceBegin, swAl.ReferenceEnd);
+        printf("query:        %s %3u %3u\n", swAl.Query.CData(), swAl.QueryBegin, swAl.QueryEnd);
+        printf("mismatches:   %u\n", swAl.NumMismatches);
+	printf("\n");	
+	printf("Banded Smith-Waterman\n");
+	printf("reference:    %s %3u %3u\n", bswAl.Reference.CData(), bswAl.ReferenceBegin, bswAl.ReferenceEnd);
+        printf("query:        %s %3u %3u\n", bswAl.Query.CData(), bswAl.QueryBegin, bswAl.QueryEnd);
+        printf("mismatches:   %u\n", bswAl.NumMismatches);
+	*/
+	//printf("alignments/s: %.1f\n\n", alignmentsPerSecond);
+
+	//bench.DisplayTime("BandedSmithWaterman");
+
+	return 0;
+}
diff --git a/SmithWatermanGotoh.cpp b/SmithWatermanGotoh.cpp
new file mode 100644
index 0000000..49f687f
--- /dev/null
+++ b/SmithWatermanGotoh.cpp
@@ -0,0 +1,741 @@
+#include "SmithWatermanGotoh.h"
+
+const float CSmithWatermanGotoh::FLOAT_NEGATIVE_INFINITY = (float)-1e+30;
+
+const char CSmithWatermanGotoh::Directions_STOP     = 0;
+const char CSmithWatermanGotoh::Directions_LEFT     = 1;
+const char CSmithWatermanGotoh::Directions_DIAGONAL = 2;
+const char CSmithWatermanGotoh::Directions_UP       = 3;
+
+const int CSmithWatermanGotoh::repeat_size_max      = 12;
+
+CSmithWatermanGotoh::CSmithWatermanGotoh(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty) 
+    : mCurrentMatrixSize(0)
+    , mCurrentAnchorSize(0)
+    , mCurrentQuerySize(0)
+    , mCurrentAQSumSize(0)
+    , mMatchScore(matchScore)
+    , mMismatchScore(mismatchScore)
+    , mGapOpenPenalty(gapOpenPenalty)
+    , mGapExtendPenalty(gapExtendPenalty)
+    , mPointers(NULL)
+    , mSizesOfVerticalGaps(NULL)
+    , mSizesOfHorizontalGaps(NULL)
+    , mQueryGapScores(NULL)
+    , mBestScores(NULL)
+    , mReversedAnchor(NULL)
+    , mReversedQuery(NULL)
+    , mUseHomoPolymerGapOpenPenalty(false)
+    , mUseEntropyGapOpenPenalty(false)
+    , mUseRepeatGapExtensionPenalty(false)
+{
+    CreateScoringMatrix();
+}
+
+CSmithWatermanGotoh::~CSmithWatermanGotoh(void) {
+    if(mPointers)              delete [] mPointers;
+    if(mSizesOfVerticalGaps)   delete [] mSizesOfVerticalGaps;
+    if(mSizesOfHorizontalGaps) delete [] mSizesOfHorizontalGaps;
+    if(mQueryGapScores)        delete [] mQueryGapScores;
+    if(mBestScores)            delete [] mBestScores;
+    if(mReversedAnchor)        delete [] mReversedAnchor;
+    if(mReversedQuery)         delete [] mReversedQuery;
+}
+
+// aligns the query sequence to the reference using the Smith Waterman Gotoh algorithm
+void CSmithWatermanGotoh::Align(unsigned int& referenceAl, string& cigarAl, const string& s1, const string& s2) {
+
+    if((s1.length() == 0) || (s2.length() == 0)) {
+	cout << "ERROR: Found a read with a zero length." << endl;
+	exit(1);
+    }
+
+    unsigned int referenceLen      = s1.length() + 1;
+    unsigned int queryLen          = s2.length() + 1;
+    unsigned int sequenceSumLength = s1.length() + s2.length();
+
+    // reinitialize our matrices
+
+    if((referenceLen * queryLen) > mCurrentMatrixSize) {
+
+	// calculate the new matrix size
+	mCurrentMatrixSize = referenceLen * queryLen;
+
+	// delete the old arrays
+	if(mPointers)              delete [] mPointers;
+	if(mSizesOfVerticalGaps)   delete [] mSizesOfVerticalGaps;
+	if(mSizesOfHorizontalGaps) delete [] mSizesOfHorizontalGaps;
+
+	try {
+
+	    // initialize the arrays
+	    mPointers              = new char[mCurrentMatrixSize];
+	    mSizesOfVerticalGaps   = new short[mCurrentMatrixSize];
+	    mSizesOfHorizontalGaps = new short[mCurrentMatrixSize];
+
+	} catch(bad_alloc) {
+	    cout << "ERROR: Unable to allocate enough memory for the Smith-Waterman algorithm." << endl;
+	    exit(1);
+	}
+    }
+
+    // initialize the traceback matrix to STOP
+    memset((char*)mPointers, 0, SIZEOF_CHAR * queryLen);
+    for(unsigned int i = 1; i < referenceLen; i++) mPointers[i * queryLen] = 0;
+
+    // initialize the gap matrices to 1
+    uninitialized_fill(mSizesOfVerticalGaps, mSizesOfVerticalGaps + mCurrentMatrixSize, 1);
+    uninitialized_fill(mSizesOfHorizontalGaps, mSizesOfHorizontalGaps + mCurrentMatrixSize, 1);
+
+
+    // initialize our repeat counts if they are needed
+    vector<map<string, int> > referenceRepeats;
+    vector<map<string, int> > queryRepeats;
+    int queryBeginRepeatBases = 0;
+    int queryEndRepeatBases = 0;
+    if (mUseRepeatGapExtensionPenalty) {
+	for (unsigned int i = 0; i < queryLen; ++i)
+	    queryRepeats.push_back(repeatCounts(i, s2, repeat_size_max));
+	for (unsigned int i = 0; i < referenceLen; ++i)
+	    referenceRepeats.push_back(repeatCounts(i, s1, repeat_size_max));
+
+	// keep only the biggest repeat
+	vector<map<string, int> >::iterator q = queryRepeats.begin();
+	for (; q != queryRepeats.end(); ++q) {
+	    map<string, int>::iterator biggest = q->begin();
+	    map<string, int>::iterator z = q->begin();
+	    for (; z != q->end(); ++z)
+		if (z->first.size() > biggest->first.size()) biggest = z;
+	    z = q->begin();
+	    while (z != q->end()) {
+		if (z != biggest)
+		    q->erase(z++);
+		else ++z;
+	    }
+	}
+
+	q = referenceRepeats.begin();
+	for (; q != referenceRepeats.end(); ++q) {
+	    map<string, int>::iterator biggest = q->begin();
+	    map<string, int>::iterator z = q->begin();
+	    for (; z != q->end(); ++z)
+		if (z->first.size() > biggest->first.size()) biggest = z;
+	    z = q->begin();
+	    while (z != q->end()) {
+		if (z != biggest)
+		    q->erase(z++);
+		else ++z;
+	    }
+	}
+
+	// remove repeat information from ends of queries
+	// this results in the addition of spurious flanking deletions in repeats
+	map<string, int>& qrend = queryRepeats.at(queryRepeats.size() - 2);
+	if (!qrend.empty()) {
+	    int queryEndRepeatBases = qrend.begin()->first.size() * qrend.begin()->second;
+	    for (int i = 0; i < queryEndRepeatBases; ++i)
+		queryRepeats.at(queryRepeats.size() - 2 - i).clear();
+	}
+
+	map<string, int>& qrbegin = queryRepeats.front();
+	if (!qrbegin.empty()) {
+	    int queryBeginRepeatBases = qrbegin.begin()->first.size() * qrbegin.begin()->second;
+	    for (int i = 0; i < queryBeginRepeatBases; ++i)
+		queryRepeats.at(i).clear();
+	}
+
+    }
+
+    int entropyWindowSize = 8;
+    vector<float> referenceEntropies;
+    vector<float> queryEntropies;
+    if (mUseEntropyGapOpenPenalty) {
+	for (unsigned int i = 0; i < queryLen; ++i)
+	    queryEntropies.push_back(
+		shannon_H((char*) &s2[max(0, min((int) i - entropyWindowSize / 2, (int) queryLen - entropyWindowSize - 1))],
+			  entropyWindowSize));
+	for (unsigned int i = 0; i < referenceLen; ++i)
+	    referenceEntropies.push_back(
+		shannon_H((char*) &s1[max(0, min((int) i - entropyWindowSize / 2, (int) referenceLen - entropyWindowSize - 1))],
+			  entropyWindowSize));
+    }
+
+    // normalize entropies
+    /*
+    float qsum = 0;
+    float qnorm = 0;
+    float qmax = 0;
+    for (vector<float>::iterator q = queryEntropies.begin(); q != queryEntropies.end(); ++q) {
+	qsum += *q;
+	if (*q > qmax) qmax = *q;
+    }
+    qnorm = qsum / queryEntropies.size();
+    for (vector<float>::iterator q = queryEntropies.begin(); q != queryEntropies.end(); ++q)
+	*q = *q / qsum + qmax;
+
+    float rsum = 0;
+    float rnorm = 0;
+    float rmax = 0;
+    for (vector<float>::iterator r = referenceEntropies.begin(); r != referenceEntropies.end(); ++r) {
+	rsum += *r;
+	if (*r > rmax) rmax = *r;
+    }
+    rnorm = rsum / referenceEntropies.size();
+    for (vector<float>::iterator r = referenceEntropies.begin(); r != referenceEntropies.end(); ++r)
+	*r = *r / rsum + rmax;
+    */
+
+    //
+    // construct
+    //
+
+    // reinitialize our query-dependent arrays
+    if(s2.length() > mCurrentQuerySize) {
+
+	// calculate the new query array size
+	mCurrentQuerySize = s2.length();
+
+	// delete the old arrays
+	if(mQueryGapScores) delete [] mQueryGapScores;
+	if(mBestScores)     delete [] mBestScores;
+
+	// initialize the arrays
+	try {
+
+	    mQueryGapScores = new float[mCurrentQuerySize + 1];
+	    mBestScores     = new float[mCurrentQuerySize + 1];
+
+	} catch(bad_alloc) {
+	    cout << "ERROR: Unable to allocate enough memory for the Smith-Waterman algorithm." << endl;
+	    exit(1);
+	}
+    }
+
+    // reinitialize our reference+query-dependent arrays
+    if(sequenceSumLength > mCurrentAQSumSize) {
+
+	// calculate the new reference array size
+	mCurrentAQSumSize = sequenceSumLength;
+
+	// delete the old arrays
+	if(mReversedAnchor) delete [] mReversedAnchor;
+	if(mReversedQuery)  delete [] mReversedQuery;
+
+	// initialize the arrays
+	try {
+
+	    mReversedAnchor = new char[mCurrentAQSumSize + 1];	// reversed sequence #1
+	    mReversedQuery  = new char[mCurrentAQSumSize + 1];	// reversed sequence #2
+
+	} catch(bad_alloc) {
+	    cout << "ERROR: Unable to allocate enough memory for the Smith-Waterman algorithm." << endl;
+	    exit(1);
+	}
+    }
+
+    // initialize the gap score and score vectors
+    uninitialized_fill(mQueryGapScores, mQueryGapScores + queryLen, FLOAT_NEGATIVE_INFINITY);
+    memset((char*)mBestScores, 0, SIZEOF_FLOAT * queryLen);
+
+    float similarityScore, totalSimilarityScore, bestScoreDiagonal;
+    float queryGapExtendScore, queryGapOpenScore;
+    float referenceGapExtendScore, referenceGapOpenScore, currentAnchorGapScore;
+
+    unsigned int BestColumn = 0;
+    unsigned int BestRow    = 0;
+    BestScore               = FLOAT_NEGATIVE_INFINITY;
+
+    for(unsigned int i = 1, k = queryLen; i < referenceLen; i++, k += queryLen) {
+
+	currentAnchorGapScore = FLOAT_NEGATIVE_INFINITY;
+	bestScoreDiagonal = mBestScores[0];
+
+	for(unsigned int j = 1, l = k + 1; j < queryLen; j++, l++) {
+
+	    // calculate our similarity score
+	    similarityScore = mScoringMatrix[s1[i - 1] - 'A'][s2[j - 1] - 'A'];
+
+	    // fill the matrices
+	    totalSimilarityScore = bestScoreDiagonal + similarityScore;
+	    
+	    //cerr << "i: " << i << ", j: " << j << ", totalSimilarityScore: " << totalSimilarityScore << endl;
+
+	    queryGapExtendScore = mQueryGapScores[j] - mGapExtendPenalty;
+	    queryGapOpenScore   = mBestScores[j] - mGapOpenPenalty;
+	    
+	    // compute the homo-polymer gap score if enabled
+	    if(mUseHomoPolymerGapOpenPenalty)
+		if((j > 1) && (s2[j - 1] == s2[j - 2]))
+		    queryGapOpenScore = mBestScores[j] - mHomoPolymerGapOpenPenalty;
+	    
+	    // compute the entropy gap score if enabled
+	    if (mUseEntropyGapOpenPenalty) {
+		queryGapOpenScore = 
+		    mBestScores[j] - mGapOpenPenalty 
+		    * max(queryEntropies.at(j), referenceEntropies.at(i))
+		    * mEntropyGapOpenPenalty;
+	    }
+
+	    int gaplen = mSizesOfVerticalGaps[l - queryLen] + 1;
+
+	    if (mUseRepeatGapExtensionPenalty) {
+		map<string, int>& repeats = queryRepeats[j];
+		// does the sequence which would be inserted or deleted in this gap match the repeat structure which it is embedded in?
+		if (!repeats.empty()) {
+
+		    const pair<string, int>& repeat = *repeats.begin();
+		    int repeatsize = repeat.first.size();
+		    if (gaplen != repeatsize && gaplen % repeatsize != 0) {
+			gaplen = gaplen / repeatsize + repeatsize;
+		    }
+
+		    if ((repeat.first.size() * repeat.second) > 3 && gaplen + i < s1.length()) {
+			string gapseq = string(&s1[i], gaplen);
+			if (gapseq == repeat.first || isRepeatUnit(gapseq, repeat.first)) {
+			    queryGapExtendScore = mQueryGapScores[j]
+				+ mRepeatGapExtensionPenalty / (float) gaplen;
+				//    mMaxRepeatGapExtensionPenalty)
+			} else {
+			    queryGapExtendScore = mQueryGapScores[j] - mGapExtendPenalty;
+			}
+		    }
+		} else {
+		    queryGapExtendScore = mQueryGapScores[j] - mGapExtendPenalty;
+		}
+	    }
+		  
+	    if(queryGapExtendScore > queryGapOpenScore) {
+		mQueryGapScores[j] = queryGapExtendScore;
+		mSizesOfVerticalGaps[l] = gaplen;
+	    } else mQueryGapScores[j] = queryGapOpenScore;
+	    
+	    referenceGapExtendScore = currentAnchorGapScore - mGapExtendPenalty;
+	    referenceGapOpenScore   = mBestScores[j - 1] - mGapOpenPenalty;
+		  
+	    // compute the homo-polymer gap score if enabled
+	    if(mUseHomoPolymerGapOpenPenalty)
+		if((i > 1) && (s1[i - 1] == s1[i - 2]))
+		    referenceGapOpenScore = mBestScores[j - 1] - mHomoPolymerGapOpenPenalty;
+		  
+	    // compute the entropy gap score if enabled
+	    if (mUseEntropyGapOpenPenalty) {
+		referenceGapOpenScore = 
+		    mBestScores[j - 1] - mGapOpenPenalty 
+		    * max(queryEntropies.at(j), referenceEntropies.at(i))
+		    * mEntropyGapOpenPenalty;
+	    }
+
+	    gaplen = mSizesOfHorizontalGaps[l - 1] + 1;
+
+	    if (mUseRepeatGapExtensionPenalty) {
+		map<string, int>& repeats = referenceRepeats[i];
+		// does the sequence which would be inserted or deleted in this gap match the repeat structure which it is embedded in?
+		if (!repeats.empty()) {
+
+		    const pair<string, int>& repeat = *repeats.begin();
+		    int repeatsize = repeat.first.size();
+		    if (gaplen != repeatsize && gaplen % repeatsize != 0) {
+			gaplen = gaplen / repeatsize + repeatsize;
+		    }
+
+		    if ((repeat.first.size() * repeat.second) > 3 && gaplen + j < s2.length()) {
+			string gapseq = string(&s2[j], gaplen);
+			if (gapseq == repeat.first || isRepeatUnit(gapseq, repeat.first)) {
+			    referenceGapExtendScore = currentAnchorGapScore
+				+ mRepeatGapExtensionPenalty / (float) gaplen;
+				//mMaxRepeatGapExtensionPenalty)
+			} else {
+			    referenceGapExtendScore = currentAnchorGapScore - mGapExtendPenalty;
+			}
+		    }
+		} else {
+		    referenceGapExtendScore = currentAnchorGapScore - mGapExtendPenalty;
+		}
+	    }
+
+	    if(referenceGapExtendScore > referenceGapOpenScore) {
+		currentAnchorGapScore = referenceGapExtendScore;
+		mSizesOfHorizontalGaps[l] = gaplen;
+	    } else currentAnchorGapScore = referenceGapOpenScore;
+		  
+	    bestScoreDiagonal = mBestScores[j];
+	    mBestScores[j] = MaxFloats(totalSimilarityScore, mQueryGapScores[j], currentAnchorGapScore);
+		  
+		  
+	    // determine the traceback direction
+	    // diagonal (445364713) > stop (238960195) > up (214378647) > left (166504495)
+	    if(mBestScores[j] == 0)                         mPointers[l] = Directions_STOP;
+	    else if(mBestScores[j] == totalSimilarityScore) mPointers[l] = Directions_DIAGONAL;
+	    else if(mBestScores[j] == mQueryGapScores[j])   mPointers[l] = Directions_UP;
+	    else                                            mPointers[l] = Directions_LEFT;
+		  
+	    // set the traceback start at the current cell i, j and score
+	    if(mBestScores[j] > BestScore) {
+		BestRow    = i;
+		BestColumn = j;
+		BestScore  = mBestScores[j];
+	    }
+	}
+    }
+
+    //
+    // traceback
+    //
+
+    // aligned sequences
+    int gappedAnchorLen  = 0;   // length of sequence #1 after alignment
+    int gappedQueryLen   = 0;   // length of sequence #2 after alignment
+    int numMismatches    = 0;   // the mismatched nucleotide count
+
+    char c1, c2;
+
+    int ci = BestRow;
+    int cj = BestColumn;
+    int ck = ci * queryLen;
+
+    // traceback flag
+    bool keepProcessing = true;
+
+    while(keepProcessing) {
+	//cerr << ci << " " << cj << " " << ck << "  ... " << gappedAnchorLen << " " << gappedQueryLen <<  endl;
+
+	// diagonal (445364713) > stop (238960195) > up (214378647) > left (166504495)
+	switch(mPointers[ck + cj]) {
+
+	case Directions_DIAGONAL:
+	    c1 = s1[--ci];
+	    c2 = s2[--cj];
+	    ck -= queryLen;
+
+	    mReversedAnchor[gappedAnchorLen++] = c1;
+	    mReversedQuery[gappedQueryLen++]   = c2;
+
+	    // increment our mismatch counter
+	    if(mScoringMatrix[c1 - 'A'][c2 - 'A'] == mMismatchScore) numMismatches++;	
+	    break;
+
+	case Directions_STOP:
+	    keepProcessing = false;
+	    break;
+
+	case Directions_UP:
+	    for(unsigned int l = 0, len = mSizesOfVerticalGaps[ck + cj]; l < len; l++) {
+		if (ci <= 0) {
+		    keepProcessing = false;
+		    break;
+		}
+		mReversedAnchor[gappedAnchorLen++] = s1[--ci];
+		mReversedQuery[gappedQueryLen++]   = GAP;
+		ck -= queryLen;
+		numMismatches++;
+	    }
+	    break;
+
+	case Directions_LEFT:
+	    for(unsigned int l = 0, len = mSizesOfHorizontalGaps[ck + cj]; l < len; l++) {
+		if (cj <= 0) {
+		    keepProcessing = false;
+		    break;
+		}
+		mReversedAnchor[gappedAnchorLen++] = GAP;
+		mReversedQuery[gappedQueryLen++]   = s2[--cj];
+		numMismatches++;
+	    }
+	    break;
+	}
+    }
+
+    // define the reference and query sequences
+    mReversedAnchor[gappedAnchorLen] = 0;
+    mReversedQuery[gappedQueryLen]   = 0;
+
+    // catch sequences with different lengths
+    if(gappedAnchorLen != gappedQueryLen) {
+	cout << "ERROR: The aligned sequences have different lengths after Smith-Waterman-Gotoh algorithm." << endl;
+	exit(1);
+    }
+
+    // reverse the strings and assign them to our alignment structure
+    reverse(mReversedAnchor, mReversedAnchor + gappedAnchorLen);
+    reverse(mReversedQuery,  mReversedQuery  + gappedQueryLen);
+
+    //alignment.Reference = mReversedAnchor;
+    //alignment.Query     = mReversedQuery;
+
+    // set the reference endpoints
+    //alignment.ReferenceBegin = ci;
+    //alignment.ReferenceEnd   = BestRow - 1;
+    referenceAl = ci;
+
+    // set the query endpoints
+    /*  
+	if(alignment.IsReverseComplement) {
+	alignment.QueryBegin = s2Length - BestColumn;
+	alignment.QueryEnd   = s2Length - cj - 1;
+	// alignment.QueryLength= alignment.QueryBegin - alignment.QueryEnd + 1;
+	} else {
+	alignment.QueryBegin = cj;
+	alignment.QueryEnd   = BestColumn - 1;
+	// alignment.QueryLength= alignment.QueryEnd - alignment.QueryBegin + 1;
+	}
+    */
+
+    // set the query length and number of mismatches
+    //alignment.QueryLength = alignment.QueryEnd - alignment.QueryBegin + 1;
+    //alignment.NumMismatches  = numMismatches;
+
+    unsigned int alLength = strlen(mReversedAnchor);
+    unsigned int m = 0, d = 0, i = 0;
+    bool dashRegion = false;
+    ostringstream oCigar (ostringstream::out);
+    int insertedBases = 0;
+
+    if ( cj != 0 ) {
+	if ( cj > 0 ) {
+	    oCigar << cj << 'S';
+	} else { // how do we get negative cj's?
+	    referenceAl -= cj;
+	    alLength += cj;
+	}
+    }
+	
+    for ( unsigned int j = 0; j < alLength; j++ ) {
+	// m
+	if ( ( mReversedAnchor[j] != GAP ) && ( mReversedQuery[j] != GAP ) ) {
+	    if ( dashRegion ) {
+		if ( d != 0 ) oCigar << d << 'D';
+		else          { oCigar << i << 'I'; insertedBases += i; }
+	    }
+	    dashRegion = false;
+	    m++;
+	    d = 0;
+	    i = 0;
+	}
+	else {
+	    if ( !dashRegion && m )
+		oCigar << m << 'M';
+	    dashRegion = true;
+	    m = 0;
+	    if ( mReversedAnchor[j] == GAP ) {
+		if ( d != 0 ) oCigar << d << 'D';
+		i++;
+		d = 0;
+	    }
+	    else {
+		if ( i != 0) { oCigar << i << 'I'; insertedBases += i; }
+		d++;
+		i = 0;
+	    }
+	}
+    }
+    if      ( m != 0 ) oCigar << m << 'M';
+    else if ( d != 0 ) oCigar << d << 'D';
+    else if ( i != 0 ) oCigar << i << 'I';
+
+    if ( BestColumn != s2.length() )
+	oCigar << s2.length() - BestColumn << 'S';
+
+    cigarAl = oCigar.str();
+
+    // fix the gap order
+    CorrectHomopolymerGapOrder(alLength, numMismatches);
+
+    if (mUseEntropyGapOpenPenalty || mUseRepeatGapExtensionPenalty) {
+	int offset = 0;
+	string oldCigar;
+	try {
+	    oldCigar = cigarAl;
+	    stablyLeftAlign(s2, cigarAl, s1.substr(referenceAl, alLength - insertedBases), offset);
+	} catch (...) {
+	    cerr << "an exception occurred when left-aligning " << s1 << " " << s2 << endl;
+	    cigarAl = oldCigar; // undo the failed left-realignment attempt
+	    offset = 0;
+	}
+	referenceAl += offset;
+    }
+
+}
+
+// creates a simple scoring matrix to align the nucleotides and the ambiguity code N
+void CSmithWatermanGotoh::CreateScoringMatrix(void) {
+
+    unsigned int nIndex = 13;
+    unsigned int xIndex = 23;
+
+    // define the N score to be 1/4 of the span between mismatch and match
+    //const short nScore = mMismatchScore + (short)(((mMatchScore - mMismatchScore) / 4.0) + 0.5);
+
+    // calculate the scoring matrix
+    for(unsigned char i = 0; i < MOSAIK_NUM_NUCLEOTIDES; i++) {
+	for(unsigned char j = 0; j < MOSAIK_NUM_NUCLEOTIDES; j++) {
+
+	    // N.B. matching N to everything (while conceptually correct) leads to some
+	    // bad alignments, lets make N be a mismatch instead.
+
+	    // add the matches or mismatches to the hashtable (N is a mismatch)
+	    if((i == nIndex) || (j == nIndex)) mScoringMatrix[i][j] = mMismatchScore;
+	    else if((i == xIndex) || (j == xIndex)) mScoringMatrix[i][j] = mMismatchScore;
+	    else if(i == j) mScoringMatrix[i][j] = mMatchScore;
+	    else mScoringMatrix[i][j] = mMismatchScore;
+	}
+    }
+
+    // add ambiguity codes
+    mScoringMatrix['M' - 'A']['A' - 'A'] = mMatchScore;	// M - A
+    mScoringMatrix['A' - 'A']['M' - 'A'] = mMatchScore;
+    mScoringMatrix['M' - 'A']['C' - 'A'] = mMatchScore; // M - C
+    mScoringMatrix['C' - 'A']['M' - 'A'] = mMatchScore;
+
+    mScoringMatrix['R' - 'A']['A' - 'A'] = mMatchScore;	// R - A
+    mScoringMatrix['A' - 'A']['R' - 'A'] = mMatchScore;
+    mScoringMatrix['R' - 'A']['G' - 'A'] = mMatchScore; // R - G
+    mScoringMatrix['G' - 'A']['R' - 'A'] = mMatchScore;
+
+    mScoringMatrix['W' - 'A']['A' - 'A'] = mMatchScore;	// W - A
+    mScoringMatrix['A' - 'A']['W' - 'A'] = mMatchScore;
+    mScoringMatrix['W' - 'A']['T' - 'A'] = mMatchScore; // W - T
+    mScoringMatrix['T' - 'A']['W' - 'A'] = mMatchScore;
+
+    mScoringMatrix['S' - 'A']['C' - 'A'] = mMatchScore;	// S - C
+    mScoringMatrix['C' - 'A']['S' - 'A'] = mMatchScore;
+    mScoringMatrix['S' - 'A']['G' - 'A'] = mMatchScore; // S - G
+    mScoringMatrix['G' - 'A']['S' - 'A'] = mMatchScore;
+
+    mScoringMatrix['Y' - 'A']['C' - 'A'] = mMatchScore;	// Y - C
+    mScoringMatrix['C' - 'A']['Y' - 'A'] = mMatchScore;
+    mScoringMatrix['Y' - 'A']['T' - 'A'] = mMatchScore; // Y - T
+    mScoringMatrix['T' - 'A']['Y' - 'A'] = mMatchScore;
+
+    mScoringMatrix['K' - 'A']['G' - 'A'] = mMatchScore;	// K - G
+    mScoringMatrix['G' - 'A']['K' - 'A'] = mMatchScore;
+    mScoringMatrix['K' - 'A']['T' - 'A'] = mMatchScore; // K - T
+    mScoringMatrix['T' - 'A']['K' - 'A'] = mMatchScore;
+
+    mScoringMatrix['V' - 'A']['A' - 'A'] = mMatchScore;	// V - A
+    mScoringMatrix['A' - 'A']['V' - 'A'] = mMatchScore;
+    mScoringMatrix['V' - 'A']['C' - 'A'] = mMatchScore; // V - C
+    mScoringMatrix['C' - 'A']['V' - 'A'] = mMatchScore;
+    mScoringMatrix['V' - 'A']['G' - 'A'] = mMatchScore; // V - G
+    mScoringMatrix['G' - 'A']['V' - 'A'] = mMatchScore;
+
+    mScoringMatrix['H' - 'A']['A' - 'A'] = mMatchScore;	// H - A
+    mScoringMatrix['A' - 'A']['H' - 'A'] = mMatchScore;
+    mScoringMatrix['H' - 'A']['C' - 'A'] = mMatchScore; // H - C
+    mScoringMatrix['C' - 'A']['H' - 'A'] = mMatchScore;
+    mScoringMatrix['H' - 'A']['T' - 'A'] = mMatchScore; // H - T
+    mScoringMatrix['T' - 'A']['H' - 'A'] = mMatchScore;
+
+    mScoringMatrix['D' - 'A']['A' - 'A'] = mMatchScore;	// D - A
+    mScoringMatrix['A' - 'A']['D' - 'A'] = mMatchScore;
+    mScoringMatrix['D' - 'A']['G' - 'A'] = mMatchScore; // D - G
+    mScoringMatrix['G' - 'A']['D' - 'A'] = mMatchScore;
+    mScoringMatrix['D' - 'A']['T' - 'A'] = mMatchScore; // D - T
+    mScoringMatrix['T' - 'A']['D' - 'A'] = mMatchScore;
+
+    mScoringMatrix['B' - 'A']['C' - 'A'] = mMatchScore;	// B - C
+    mScoringMatrix['C' - 'A']['B' - 'A'] = mMatchScore;
+    mScoringMatrix['B' - 'A']['G' - 'A'] = mMatchScore; // B - G
+    mScoringMatrix['G' - 'A']['B' - 'A'] = mMatchScore;
+    mScoringMatrix['B' - 'A']['T' - 'A'] = mMatchScore; // B - T
+    mScoringMatrix['T' - 'A']['B' - 'A'] = mMatchScore;
+}
+
+// enables homo-polymer scoring
+void CSmithWatermanGotoh::EnableHomoPolymerGapPenalty(float hpGapOpenPenalty) {
+    mUseHomoPolymerGapOpenPenalty = true;
+    mHomoPolymerGapOpenPenalty    = hpGapOpenPenalty;
+}
+
+// enables entropy-based gap open penalty
+void CSmithWatermanGotoh::EnableEntropyGapPenalty(float enGapOpenPenalty) {
+    mUseEntropyGapOpenPenalty = true;
+    mEntropyGapOpenPenalty    = enGapOpenPenalty;
+}
+
+// enables repeat-aware gap extension penalty
+void CSmithWatermanGotoh::EnableRepeatGapExtensionPenalty(float rGapExtensionPenalty, float rMaxGapRepeatExtensionPenaltyFactor) {
+    mUseRepeatGapExtensionPenalty = true;
+    mRepeatGapExtensionPenalty    = rGapExtensionPenalty;
+    mMaxRepeatGapExtensionPenalty = rMaxGapRepeatExtensionPenaltyFactor * rGapExtensionPenalty;
+}
+
+// corrects the homopolymer gap order for forward alignments
+void CSmithWatermanGotoh::CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches) {
+
+    // this is only required for alignments with mismatches
+    //if(al.NumMismatches == 0) return;
+    if ( numMismatches == 0 ) return;
+
+    // localize the alignment data
+    //char* pReference = al.Reference.Data();
+    //char* pQuery     = al.Query.Data();
+    //const unsigned int numBases = al.Reference.Length();
+    char* pReference = mReversedAnchor;
+    char* pQuery     = mReversedQuery;
+
+    // initialize
+    bool hasReferenceGap = false, hasQueryGap = false;
+    char* pNonGapSeq = NULL;
+    char* pGapSeq    = NULL;
+    char nonGapBase  = 'J';
+
+    // identify gapped regions
+    for(unsigned int i = 0; i < numBases; i++) {
+
+	// check if the current position is gapped
+	hasReferenceGap = false;
+	hasQueryGap     = false;
+
+	if(pReference[i] == GAP) {
+	    hasReferenceGap = true;
+	    pNonGapSeq      = pQuery;
+	    pGapSeq         = pReference;
+	    nonGapBase      = pQuery[i];
+	}
+
+	if(pQuery[i] == GAP) {
+	    hasQueryGap = true;
+	    pNonGapSeq  = pReference;
+	    pGapSeq     = pQuery;
+	    nonGapBase  = pReference[i];
+	}
+
+	// continue if we don't have any gaps
+	if(!hasReferenceGap && !hasQueryGap) continue;
+
+	// sanity check
+	if(hasReferenceGap && hasQueryGap) {
+	    printf("ERROR: Found a gap in both the reference sequence and query sequence.\n");
+	    exit(1);
+	}
+
+	// find the non-gapped length (forward)
+	unsigned short numGappedBases = 0;
+	unsigned short nonGapLength   = 0;
+	unsigned short testPos = i;
+	while(testPos < numBases) {
+
+	    const char gs  = pGapSeq[testPos];
+	    const char ngs = pNonGapSeq[testPos];
+
+	    bool isPartofHomopolymer = false;
+	    if(((gs == nonGapBase) || (gs == GAP)) && (ngs == nonGapBase)) isPartofHomopolymer = true;
+	    if(!isPartofHomopolymer) break;
+
+	    if(gs == GAP) numGappedBases++;
+	    else nonGapLength++;
+	    testPos++;
+	}
+
+	// fix the gap order
+	if(numGappedBases != 0) {
+	    char* pCurrentSequence = pGapSeq + i;
+	    memset(pCurrentSequence, nonGapBase, nonGapLength);
+	    pCurrentSequence += nonGapLength;
+	    memset(pCurrentSequence, GAP, numGappedBases);
+	}
+
+	// increment
+	i += numGappedBases + nonGapLength - 1;
+    }
+}
diff --git a/SmithWatermanGotoh.h b/SmithWatermanGotoh.h
new file mode 100644
index 0000000..da15036
--- /dev/null
+++ b/SmithWatermanGotoh.h
@@ -0,0 +1,103 @@
+#pragma once
+
+#include <iostream>
+#include <algorithm>
+#include <memory>
+//#include "Alignment.h"
+#include "Mosaik.h"
+#include <stdio.h>
+#include <string.h>
+#include <sstream>
+#include <string>
+#include "disorder.h"
+#include "Repeats.h"
+#include "LeftAlign.h"
+
+using namespace std;
+
+#define MOSAIK_NUM_NUCLEOTIDES 26
+#define GAP '-'
+
+class CSmithWatermanGotoh {
+public:
+    // constructor
+    CSmithWatermanGotoh(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty);
+    // destructor
+    ~CSmithWatermanGotoh(void);
+    // aligns the query sequence to the reference using the Smith Waterman Gotoh algorithm
+    void Align(unsigned int& referenceAl, string& cigarAl, const string& s1, const string& s2);
+    // enables homo-polymer scoring
+    void EnableHomoPolymerGapPenalty(float hpGapOpenPenalty);
+    // enables non-repeat gap open penalty
+    void EnableEntropyGapPenalty(float enGapOpenPenalty);
+    // enables repeat gap extension penalty
+    void EnableRepeatGapExtensionPenalty(float rGapExtensionPenalty, float rMaxGapRepeatExtensionPenaltyFactor = 10);
+    // record the best score for external use
+    float BestScore;
+private:
+    // creates a simple scoring matrix to align the nucleotides and the ambiguity code N
+    void CreateScoringMatrix(void);
+    // corrects the homopolymer gap order for forward alignments
+    void CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches);
+    // returns the maximum floating point number
+    static inline float MaxFloats(const float& a, const float& b, const float& c);
+    // our simple scoring matrix
+    float mScoringMatrix[MOSAIK_NUM_NUCLEOTIDES][MOSAIK_NUM_NUCLEOTIDES];
+    // keep track of maximum initialized sizes
+    unsigned int mCurrentMatrixSize;
+    unsigned int mCurrentAnchorSize;
+    unsigned int mCurrentQuerySize;
+    unsigned int mCurrentAQSumSize;
+    // define our traceback directions
+    // N.B. This used to be defined as an enum, but gcc doesn't like being told
+    // which storage class to use
+    const static char Directions_STOP;
+    const static char Directions_LEFT;
+    const static char Directions_DIAGONAL;
+    const static char Directions_UP;
+    // repeat structure determination
+    const static int repeat_size_max;
+    // define scoring constants
+    const float mMatchScore;
+    const float mMismatchScore;
+    const float mGapOpenPenalty;
+    const float mGapExtendPenalty;
+    // store the backtrace pointers
+    char* mPointers;
+    // store the vertical gap sizes - assuming gaps are not longer than 32768 bases long
+    short* mSizesOfVerticalGaps;
+    // store the horizontal gap sizes - assuming gaps are not longer than 32768 bases long
+    short* mSizesOfHorizontalGaps;	
+    // score if xi aligns to a gap after yi
+    float* mQueryGapScores;
+    // best score of alignment x1...xi to y1...yi
+    float* mBestScores;
+    // our reversed alignment
+    char* mReversedAnchor;
+    char* mReversedQuery;
+    // define static constants
+    static const float FLOAT_NEGATIVE_INFINITY;
+    // toggles the use of the homo-polymer gap open penalty
+    bool mUseHomoPolymerGapOpenPenalty;
+    // specifies the homo-polymer gap open penalty
+    float mHomoPolymerGapOpenPenalty;
+    // toggles the use of the entropy gap open penalty
+    bool mUseEntropyGapOpenPenalty;
+    // specifies the entropy gap open penalty (multiplier)
+    float mEntropyGapOpenPenalty;
+    // toggles the use of the repeat gap extension penalty
+    bool mUseRepeatGapExtensionPenalty;
+    // specifies the repeat gap extension penalty
+    float mRepeatGapExtensionPenalty;
+    // specifies the max repeat gap extension penalty
+    float mMaxRepeatGapExtensionPenalty;
+};
+
+// returns the maximum floating point number
+inline float CSmithWatermanGotoh::MaxFloats(const float& a, const float& b, const float& c) {
+    float max = 0.0f;
+    if(a > max) max = a;
+    if(b > max) max = b;
+    if(c > max) max = c;
+    return max;
+}
diff --git a/convert.h b/convert.h
new file mode 100644
index 0000000..399bcea
--- /dev/null
+++ b/convert.h
@@ -0,0 +1,22 @@
+#ifndef __CONVERT_H
+#define __CONVERT_H
+
+#include <sstream>
+
+// converts the string into the specified type, setting r to the converted
+// value and returning true/false on success or failure
+template<typename T>
+bool convert(const std::string& s, T& r) {
+    std::istringstream iss(s);
+    iss >> r;
+    return iss.eof() ? true : false;
+}
+
+template<typename T>
+std::string convert(const T& r) {
+    std::ostringstream iss;
+    iss << r;
+    return iss.str();
+}
+
+#endif
diff --git a/disorder.cpp b/disorder.cpp
new file mode 100644
index 0000000..67684b4
--- /dev/null
+++ b/disorder.cpp
@@ -0,0 +1,191 @@
+/***************************************************************************
+ *  libdisorder: A Library for Measuring Byte Stream Entropy
+ *  Copyright (C) 2010 Michael E. Locasto
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the:
+ *       Free Software Foundation, Inc.
+ *       59 Temple Place, Suite 330
+ *       Boston, MA  02111-1307  USA
+ *
+ * $Id$
+ **************************************************************************/
+
+#include <math.h> //for log2()
+#include <stdio.h> //for NULL
+#include "disorder.h"
+
+#if defined(__FreeBSD__)
+#define        log2(x) (log((x)) * (1./M_LN2))
+#endif
+
+/** Frequecies for each byte */
+static int m_token_freqs[LIBDO_MAX_BYTES]; //frequency of each token in sample
+static float m_token_probs[LIBDO_MAX_BYTES]; //P(each token appearing)
+static int m_num_tokens = 0; //actual number of `seen' tokens, max 256 
+static float m_maxent = 0.0;
+static float m_ratio = 0.0;
+static int LIBDISORDER_INITIALIZED = 0;
+
+static void
+initialize_lib()
+{
+  int i = 0;
+  if(1==LIBDISORDER_INITIALIZED)
+    return;
+
+  m_num_tokens = 0;
+
+  for(i=0;i<LIBDO_MAX_BYTES;i++)
+  {
+    m_token_freqs[i]=0;
+    m_token_probs[i]=0.0;
+  }
+
+  LIBDISORDER_INITIALIZED = 1;
+}
+
+/**
+ * Set m_num_tokens by iterating over m_token_freq[] and maintaining
+ * a counter of each position that does not hold the value of zero.
+ */
+static void
+count_num_tokens()
+{
+  int i = 0;
+  int counter = 0;
+  for(i=0;i<LIBDO_MAX_BYTES;i++)
+  {
+    if(0!=m_token_freqs[i])
+    {
+      counter++;
+    }
+  }
+  m_num_tokens = counter;
+  return;
+}
+
+/**
+ * Sum frequencies for each token (i.e., byte values 0 through 255)
+ * We assume the `length' parameter is correct.
+ *
+ * This function is available only to functions in this file.
+ */
+static void
+get_token_frequencies(char* buf, 
+		      long long length)
+{
+  int i=0;
+  char* itr=NULL;
+  unsigned char c=0;
+
+  itr = buf;
+
+  //reset number of tokens
+  m_num_tokens = 0;
+
+  //make sure freqency and probability arrays are cleared
+  for(i=0;i<LIBDO_MAX_BYTES;i++)
+  {
+    m_token_freqs[i] = 0;
+    m_token_probs[i] = 0.0;
+  }
+
+  for(i=0;i<length;i++)
+  {
+    c = (unsigned char)*itr;
+    //assert(0<=c<LIBDO_MAX_BYTES);
+    m_token_freqs[c]++;
+    itr++;
+  }
+}
+
+/**
+ * Return entropy (in bits) of this buffer of bytes. We assume that the
+ * `length' parameter is correct. This implementation is a translation
+ * of the PHP code found here:
+ *
+ *    http://onlamp.com/pub/a/php/2005/01/06/entropy.html
+ *
+ * with a helpful hint on the `foreach' statement from here:
+ *
+ *    http://php.net/manual/en/control-structures.foreach.php
+ */
+float shannon_H(char* buf, 
+	  long long length)
+{
+  int i = 0;
+  float bits = 0.0;
+  char* itr=NULL; //values of itr should be zero to 255
+  unsigned char token;
+  int num_events = 0; //`length' parameter
+  float freq = 0.0; //loop variable for holding freq from m_token_freq[]
+  float entropy = 0.0; //running entropy sum
+
+  if(NULL==buf || 0==length)
+    return 0.0;
+
+  if(0==LIBDISORDER_INITIALIZED)
+    initialize_lib();
+
+  itr = buf;
+  m_maxent = 0.0;
+  m_ratio = 0.0;
+  num_events = length;
+  get_token_frequencies(itr, num_events); //modifies m_token_freqs[]
+  //set m_num_tokens by counting unique m_token_freqs entries
+  count_num_tokens(); 
+
+  if(m_num_tokens>LIBDO_MAX_BYTES)
+  {
+    //report error somehow?
+    return 0.0;
+  }
+
+  //iterate through whole m_token_freq array, but only count
+  //spots that have a registered token (i.e., freq>0)
+  for(i=0;i<LIBDO_MAX_BYTES;i++)
+  {
+    if(0!=m_token_freqs[i])
+    {
+      token = i;
+      freq = ((float)m_token_freqs[token]); 
+      m_token_probs[token] = (freq / ((float)num_events));
+      entropy += m_token_probs[token] * log2(m_token_probs[token]);
+    }
+  }
+
+  bits = -1.0 * entropy;
+  m_maxent = log2(m_num_tokens);
+  m_ratio = bits / m_maxent;
+
+  return bits;
+}
+
+int 
+get_num_tokens()
+{
+  return m_num_tokens;
+}
+
+float 
+get_max_entropy()
+{
+  return m_maxent;
+}
+
+float 
+get_entropy_ratio()
+{
+  return m_ratio;
+}
diff --git a/disorder.h b/disorder.h
new file mode 100644
index 0000000..3458774
--- /dev/null
+++ b/disorder.h
@@ -0,0 +1,62 @@
+/***************************************************************************
+ *  libdisorder: A Library for Measuring Byte Stream Entropy
+ *  Copyright (C) 2010 Michael E. Locasto
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the:
+ *       Free Software Foundation, Inc.
+ *       59 Temple Place, Suite 330
+ *       Boston, MA  02111-1307  USA
+ *
+ * $Id$
+ **************************************************************************/
+
+#ifndef __DISORDER_H_
+#define __DISORDER_H_
+
+/** Max number of bytes (i.e., tokens) */
+#define LIBDO_MAX_BYTES      256
+
+/** A convienance value for clients of this library. Feel free to change
+ * if you plan to use a larger buffer. You can also safely ignore it, as
+ * libdisorder does not use this value internally; it relies on the
+ * client-supplied `length' parameter.
+ *
+ * NB: Might become deprecated because it is potentially misleading and
+ * has zero relationship to any library internal state.
+ */
+#define LIBDO_BUFFER_LEN   16384
+
+/** 
+ * Given a pointer to an array of bytes, return a float indicating the
+ * level of entropy in bits (a number between zero and eight),
+ * assuming a space of 256 possible byte values. The second argument
+ * indicates the number of bytes in the sequence. If this sequence
+ * runs into unallocated memory, this function should fail with a
+ * SIGSEGV.
+ */
+float    shannon_H(char*, long long);
+
+/** Report the number of (unique) tokens seen. This is _not_ the
+    number of individual events seen. For example, if the library sees
+    the string `aaab', the number of events is 4 and the number of
+    tokens is 2. */
+int      get_num_tokens(void);
+
+/** Returns maximum entropy for byte distributions log2(256)=8 bits*/
+float    get_max_entropy(void);
+
+/** Returns the ratio of entropy to maxentropy */
+float    get_entropy_ratio(void);
+
+#endif
diff --git a/examples.txt b/examples.txt
new file mode 100644
index 0000000..aefe062
--- /dev/null
+++ b/examples.txt
@@ -0,0 +1,76 @@
+TCTGTGACCTCAAAGCCCAACTGTGCATACACAAGCATACACACACACACACACACACACACACACACACACACATACACACACA TCTGTGACCTCAAAGCCCAACTGTGCATACACAAGCATACACACACACACACACACA 
+AAAGGCTGGGGACCACTGATCTAAATACACCAATAAAAAGAAAAAGATTGTAAGATTGGATTTTAAAAGACCTGACTCTATACTGACCACAAAAAAAAACCCTCACT AAAGGCTGGGGACCACTGATCTAAATACACCAATAAAAAGAAAAAGATTGTAAGATTGGATTTTAAAAGACCTGACTCTATACTGACCACAAAAAAAACC 
+TGTGACCTCAAAGCCCAACTGTGCATACACAAGCATACACACACACACACACACACACACACACACACACACATACACA TGTGACCTCAAAGCCCAACTGTGCATACACAAGCATACACACACACACGCACACACACACATACACACAC 
+ATTTTTTAAATCAGGAATAACTTAGACCAGGGTGAACAAACTACTGCTGTCAGGGCAAATCCAGCCCATAGCCTGCTTTTGGAAATAAATTTGTATTAGAACACACACACACACACACACACACACACACACACACACACATACACACATACACACAAATATATCTTCACTAATGTTCTTTTTTTCTTGTTTTTC GGTGAACAAACTACTGCTGTCAGGGCAAATCCAGCCCATAGCCTGCTTTTGGAAATAAATTTGTATTAGAACACACACACACACACACACACATACACACATACACACAAAT 
+ATGCATGCCTCTCTCTCTCTCTCTGTCGCTCTCTCTCTCTCTCTCTCTCTCTCTCT ATGCATGCCTCTCTCTCTCTCTCTCGCTCTCTCTCTCTCTCTCTCT 
+ACACCAGCTGGGGTGTGTGTGTGTGTGTGTGTGTGTGCGTGTGTGTGTGTGTGTGTGATTCTCGTGCCT GGGGGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGATTCTCGTGC 
+GCCTGGGCAACATAGTGAGACCTTGTCTCTACAAATAGTTAAAAAAAAAAAAAAAATTAGCCAGGTGTGGTGGTGCACACATGT GCCTGGGCAACATAGTGAGACCTTGTCTCTACAAATAGTTAAAAAAAAAAAAAATTAGCCAGGTGTGGTGGTGCACACATGT 
+TTTTCTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTTTGAGATGGAATTTCACTCTTGTTGCCCAGGCTGGAGTGCAATGGTGTGATCTCGGCTCACGGCAACCTCCG TTTTCTTTTTTTTTTTGAGATGGAATTTCACTCTTGTTGCCCAGGCCGGAGTGCAATGGTGTGCTCTCGGCTCCCGGCAACCTCCG
+AGTAATGGAAATACTGTTTATCATCATTAGAGTTGGGTGTATACTACTACATTACTCTCTCTCTCTCTCTATATATATATATATATATATATATATTTTTTTTT AGTAATGGAAATACTGTTTATCATCATTAGAGTTGGGTGTATACTACTACATTACTCTCTCTCTCTCTCTCTCTATATATATATATATATATTTTTTTTT
+AGCCTGGGCGACAGGGCGAGACTCCGTCTCAAATAATAATAATAATAATAATAATAATAATAATAATAATAATAAAATAAAATAAAATAAAATAAAAATACAAAAAT AGCCTGGGCGACAGGGCGAGACTCCGTCTCAAATAATAATAATAATAATAATAATAATAATAAAATAAAATAAAATAAAATAAAAATACAAAAAT 
+CATACACACACACACACACACACACACACACACACACACACACACACACACACCTCATGTAGTGAACTTAATAAATTTAATCTGCAGCTCTGATGATTTCCTTAAGG CATACACACACACACACACACATACACACACACACACACACACACCTCATGTAGTGAACTTAATAAATTTAATCTGCAGCTCTGATGATTTCCTTAAGG 
+GGGAGGCTGAGGCAGGAGGATCACACCACTGCACTTTAGCCTGAATACTGAGTAACAAAGCAAAACCCTGTCTCTCTTAAAAAAAAAATTGGGGGGAAGGACAAGTCTTTTTTCTTTTCTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTTTGAGATGGAATTTCACTCTTGTTGCCCAGGCTGGAGTGCAATGGTGTGATC AGTAACAAAGCAAAACCCTGTCTCTCTTAAAAAAAAAATTGGGGGGAAGGACAAGTCTTTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTTTTTTTTGTGATGGAATT 
+ATTTTTTAAATCAGGAATAACTTAGACCAGGGTGAACAAACTACTGCTGTCAGGGCAAATCCAGCCCATAGCCTGCTTTTGGAAATAAATTTGTATTAGAACACACACACACACACACACACACACACACACACACACACATACACACATACACACAAATATATCTTCACTAATGTTCTTTTTTTCTTGTTTTTC GGTGAACAAACTACTGCTGTCAGGGCAAATCCAGCCCATAGCCTGCTTTTGGAAATAAATTTGTATTAGAACACACACACACACACACACACATACACACATACACACAAAT 
+CATACACACACACACACACACACACACACACACACACACACACACACACACACCTCATGTAGTGAACTTAATAAATTTAATCTGCAGCTCTGATGATTTCCTTAAGGC CATACACACACACACACACACATACACACACACACACACACACACCTCATGTAGTGAACTTAATAAATTTAATCTGCAGCTCTGATGATTTCCTTAAGGC 
+TATACAGATTACTTTTATAGCTGATGAGGCAAGTCCTTCTATCATTGTTTCAAAGATTCCATGGCTTTTACTGAACATTTTCTTTTTTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTTTC TTTTCTTTTTTCTTTTTTTTTTTTTGAGACGGAGTTTC 
+GAGAGAGGCGCGCGCGCGCGTGCGCACGCACACACACACACACACACACATAACTAATATATATAAATACATATATATGTGGGTATATATTATTTATTTATG GAGAGAGGCGCGCGCGCGCGCGCGCACACACACACACACACACACACATAACTAATATATATAAATACATATATATGTGGGTATATATTATTTATTTATG 
+TATATGTATATATATGTGTGTATATATATGTATATATATGTGTGTATATATATGTGTATATATATGTGTGTGTGTGTGTGTGTGTGTGTATATATATATATATATATATATATCAGTTTGCCCTTGCTGGATAACAAA TATATGTATATATATGTGTGTATATATATGTATATATATGTGTGTATATATATGTGTATATATGTGTGTGTGTGTGTATATATATATCAGTTTGCCCTTG 
+AGCAAACACCTATTGTGCATTTTCTTTTCTTTCTTTCTTTCTTTCTTTTTTTTTTTTGAGACGGAGTTTCGCTCTTGTTGTCCAGGCTAGAGTACGATGG AGCAAACACCTATTGTGCATTTTCTTTTCCTTCTTTCTTTCTTTTTTTTTTTTTTTGAGACGGAGTTTCGCTCTTGTTGTCCAGTCTAGAGTCAGTGG 
+GATTTTGGTATATTGGTCTTACATTTTTTCACTTTGCTGAACTCATTTATTAGTTCTAATTCATGGGGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTCTGTGTGTGTGTGTGTCTGTGTGTT GATTTTGGTATATTGGTCTTACATTTTTTCACTTTGCTGAACTCATTTATTAGTTCTAATTCATGGGGTGTGTGTGTGTGTGTGTGTGTGTCTGTGTGTG 
+CCAGCCTGGGCGACAGAGTGAGACTCCATCTCAAAACAAACAAACAAACAAACAAACAAACAAACAACTCCACTAAGACTTTTGGGACAACCTGTACCTACCTGACCTGCCTTTCCATCTTTAATGCTCTTCT CCAGCCTGGGCGACAGAGTGAGACTCCATCTCAAAACAAACAAACAAACAAACAAACAAACAACTCCACTAAGACTTTTGGGACAACCTGTACCTATCT 
+TATATGTATATATATGTGTGTATATATATGTATATATATGTGTGTATATATATGTGTATATATATGTGTGTGTGTGTGTGTGTGTGTGTATATATATATATATATATATATATCAGTTTGCCCTTGCTGGATAACAAA TATATGTATATATATGTGTGTATATATATGTATATATATGTGTGTATATATATGTGTATATATGTGTGTGTGTGTGTATATATATATCAGTTTGCCCTTG 
+TAATATATATAATATCTAATATATATAATATCTAATATATATAATATATATATAGAGAGAGAGAGAGAGCGAGAGAGAGAGAGAGAGGGAGAGACGGAGTTTCGCTCTTGTTGCCCAGACTGGAGTGCAATGGCGCGATCTC AATATATATATAGAGAGAGAGAGAGCGAGAGAGAGAGAGAGAGAGGGAGAGACGGAGTTTCGCTCTTGTTGCCCAGACTGGAGTGCAATGGCGCGATCTC 
+GAGAAAGAAACTATTTAGCATTGTGGCTTTCATAATTTCTTTCTTTCTTTTTTTTTTTTTTTTTGAAGCAGAGTCTAGCTCTGTCGCCCAGGCTGGAAAGCAGTGGTGCAAACTC TTAGCATTGTGGCTTTCATAATTTCTTTCTTTTTTTTTTTTTTTTTTTTTAAGCAGAGTCTAGCTCTGTCCCCCCGGCTGGAAAGCAGTGGTGCAACCTC 
+GGTGACAGAGCAAGACTCCATCTCTCTCTCTCTCTCTCTCTCTCTATATATATATATATATATATACATATATATATGTATATATATGTATATATATGTGTATATATGTGTATATATATGTATATATGTGTATATATATAT GTGACAGAGCAAGACTCCATCTCTCTCTCTCTCTCTCTCTCTATATATATATATATACACACATATATATATGTATATATATGTATATATATATGTGTAT 
+GAGAGAGGCGCGCGCGCGCGTGCGCACGCACACACACACACACACACACATAACTAATATATATAAATACATATATATGTGGGTATATATTATTTATTTATG GAGAGAGGCGCGCGCGCGCGCGCGCACACACACACACACACACACACATAACTAATATATATAAATACATATATATGTGGGTATATATTATTTATTTATG 
+ATTTTTTAAATCAGGAATAACTTAGACCAGGGTGAACAAACTACTGCTGTCAGGGCAAATCCAGCCCATAGCCTGCTTTTGGAAATAAATTTGTATTAGAACACACACACACACACACACACACACACACACACACACACATACACACATACACACAAATATATCTTCACTAATGTTCTTTTTTTCTTGTTTTTC GGTGAACAAACTACTGCTGTCAGGGCAAATCCAGCCCATAGCCTGCTTTTGGAAATAAATTTGTATTAGAACACACACACACACACACACACATACACACATACACACAAAT 
+TTTTCTTTTCTTTTCTTTTTTTTTTTTTTTTTGAGATGGAATTTCACTCTTGTTGCCCAGGCTGGAGTGCAATGGTGTGATCTCGGCTCACGGCAACCTCCGCCTCC TTTTCTTTTTTTTTTTGAGATGGAATTTCACTCTTGTTGCCCAGGCCGGAGTGCAATGGTGTGCTCTCGGCTCCCGGCAACCTCCGCCTCT 
+GAGCCGAGATCGTGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAACAAAAAACAAGCAAACAAAAAAACAAAAAAAAACAAAAAATCCCCAGCA ATCGTGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAACAAAAAACAAGCAAACAAAAAGCAAAAAAAACAAAAAACCCCCAGCA 
+GAGCCGAGATCGTGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAACAAAAAACAAGCAAACAAAAAAACAAAAAAAAACAAAAAATCCCCAGCA ATCGTGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAACAAAAAACAAGCAAACAAAAAGCAAAAAAAACAAAAAACCCCCAGCA 
+TAATATATATAATATCTAATATATATAATATCTAATATATATAATATATATATAGAGAGAGAGAGAGAGCGAGAGAGAGAGAGAGAGGGAGAGACGGAGTTTCGCTCTTGTTGCCCAGACTGGAGTG AATATATATATAGAGAGAGAGAGAGCGAGAGAGAGAGAGAGAGAGGGAGAGACGGAGTTTCGCTCTTGTTGCCCAGACTGGAGTG 
+AAACCCCATCTCTGCTACAAATACAAATACAAATAATAATAATAATAATAATAATAATAATAATAATAATAATAATAATAGCCAGGCATGC AAACCCCATCTCTGCTACAAATACAAATACAAATAATAATAATAATAATAATAATAATAATAATAATAATAATAGCCAGGCATGC 
+TATATAATATCTAATATATATAATATATATATAGAGAGAGAGAGAGAGCGAGAGAGAGAGAGAGAGGGAGAGACGGAGTTTCGCTCTTGTTGCCCAGACTGGAGTGC TATATAATATCTAATATATATAATATATATATATAGAGAGAGAGAGCGAGAGAGAGAGAGAGAGGGAGAGA 
+AGAGGCAGTAGATTTAGGGACCACTCAACCTAGTGAGACACCAGCTGGGGTGTGTGTGTGTGTGTGTGTGTGTGCGTGTGTGTGTGTGTGTGTGATTCTCGTGCCTC TAGATTTAGGGAGCACTCAACCTAGTGAGACACCAGCTGGGGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGATT 
+ACACCAGCTGGGGTGTGTGTGTGTGTGTGTGTGTGTGCGTGTGTGTGTGTGTGTGTGATTCTCGTGCCTC GGGGGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGATTCTCGTGC 
+CAAGGCTCCAAAATTTTCAGATGAAATGTACAGAAAGAATACAATCTCATTTTAATAGTTTTTTTTTTTTTTAAAAAGGTCCTTGACCAATTCCCCAAGGTCCAT CAAGGCTCCAAAATTTTCAGATGAAATGTACAGAAAGAATACAATCTCATTTTAATAGTTTTTTTTTTTTAAAAAAGGTCCTTGACCAATTCCCCAAGGT 
+AAACCCCATCTCTGCTACAAATACAAATACAAATAATAATAATAATAATAATAATAATAATAATAATAATAATAATAATAGCCAGGCATGC AAACCCCATCTCTGCTACAAATACAAATACAAATAATAATAATAATAATAATAATAATAATAATAATAATAATAGCCAGGCATGC 
+TACTTCATATGTTCCACGCTCTGGTTGTTTTGTGGGGAGCAAAAGAGAAGTTCCCATTTCTGTTTATGTTAGAAACAAAACAAGACAAAAAACAAATTAACAAGCAAATACATACTGATTATAAAATAATGTGATGTGAAGAAAAAAACTGCTAAGAAAGAATAATGAGGT TTCCCATTTCTGTTTATGTTAAAAACAAAACAAAACAAGACAAAAAACAAATTAACAAGCAAATACATACTGATTATAAAATAATGTGATGTGAAGAAAA
+GGTTGTTTTGTGGGGAGCAAAAGAGAAGTTCCCATTTCTGTTTATGTTAGAAACAAAACAAGACAAAAAACAAATTAACAAGCAAATACATACTGATTATAAAATAATGTGATGTGAAGAAAAAAACTGCTAAGAAAGAATAATGAGGTAGGAGGGGGCTACTTGAGATCAGGTGGTCACGCTGAGAGTTGA TTAAAAACAAAACGAAACAAGACAAAAAACAAATTAACAAGCAAATACATACTGATTATAAAATAATGTGATGTGAAGAAAAAAACTGCTAAGAAAGAAT
+TTAGAAACAAAACAAGACAAAAAACAAATTAACAAGCAAATACATACTGATTATAAAATAATGTGATGTGAAGAAAAAAACTGCTAAGAAAGAAT TTAAAAACAAAACGAAACAAGACAAAAAACAAATTAACAAGCAAATACATACTGATTATAAAATAATGTGATGTGAAGAAAAAAACTGCTAAGAAAGAAT 
+ACCTTGAATATGAACACTCTAAATGCTCCACTTAAGAGGCACAGAGTAGCAAGTTGGAAAAGAAAAGAAAAGAAAAGAAAAGAAAACTCATCTGTCTGCAG ACCTTGAATATGAACACTCTAAATGCTCCACTTAAGAGGCACAGAGTAGCAAGTTGGAAAAGAAAAGAAAAGAAAAGAAAACAAAACAAAACTCATCTGT 
+TTTTCTTTTCTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTTTGAGATGGAATTTCACTCTTGTTGCCCAGGCTGGAGTGCAATGGTGTGATCTCGGCTCACGGCAAC TTTTCTTTTTTTTTTTGAGATGGAATTTCACTCTTGTTGCCCAGGCCGGAGTGCAATGGTGTGCTCTCGGCTCCCGGCAAC 
+AGAAAGAAAGAAAAAGAAAAAGAACCAAGAAGAAAAAATAATCACCGGAGATTCCTCCCCTCCCCTAGAGCTAACTAGGCTAACATTTTGGTATATATCTTTCCAG AGAAAGAAAGAAAAAGAACCAAGAAGAAAAAATAATCACCGGAGATTCCTCCCCTCCCCTAGAGCTGACTAGGCTAACATTTTGGTATATATCTTTCCAG 
+GACCTTTCTATATATGGTTTTACAATCGGATCAATCGAGATCCCCTCCCCTCCTTAGAGGCCACTAATAAAAAAGAAGAACCAAGAAAAAGAAAAAGAAAGA GACCTTTCTATATATGGTTTTACAATCGGATCAATCGAGATCCCCTCCCCTCCTTAGAGGCCACTAATAAAAAAGAAGAACCAAGAAAAAGAAAAAGA
+AGAAAGAAAGAAAAAGAAAAAGAACCAAGAAGAAAAAATAATCACCGGAGATTCCTCCCCTCCCCTAGAGCTAACTAGGCTAACATTTTGGTATATATCTTTCCAG AGAAAGAAAGAAAAAGAACCAAGAAGAAAAAATAATCACCGGAGATTCCTCCCCTCCCCTAGAGCTGACTAGGCTAACATTTTGGTATATATCTTTCCAG 
+ATTTTTTAAATCAGGAATAACTTAGACCAGGGTGAACAAACTACTGCTGTCAGGGCAAATCCAGCCCATAGCCTGCTTTTGGAAATAAATTTGTATTAGAACACACACACACACACACACACACACACACACACACACACATACACACATACACACAAATATATCTTCACTAATGTTCTTTTTTTCTTGTTTTTC GGTGAACAAACTACTGCTGTCAGGGCAAATCCAGCCCATAGCCTGCTTTTGGAAATAAATTTGTATTAGAACACACACACACACACACACACATACACACATACACACAAAT 
+GGTTGTTTTGTGGGGAGCAAAAGAGAAGTTCCCATTTCTGTTTATGTTAGAAACAAAACAAGACAAAAAACAAATTAACAAGCAAATACATACTGATTATAAAATAATGTGATGTGAAGAAAAAAACTGCTAAGAAAGAATAATGAGGTAGGAGGGGGCTACTTGAGATCAGGTGGTCACGCTGAGAGTTGA TTAAAAACAAAACGAAACAAGACAAAAAACAAATTAACAAGCAAATACATACTGATTATAAAATAATGTGATGTGAAGAAAAAAACTGCTAAGAAAGAAT 
+TAAGAAAGAATCGTCAAAAAAAGAAGTGTAGTGTAATAAAATATTAGTCATACATAAACGAACAATTAAACAAAAAACAGAACAAAGCACACACACACACACA TAAGAAAGAATCGTCAAAAAAAGAAGTGTAGTGTAATAAAATATTAGTCATACATAAACGAACAATTAAACAAAAAACAGAACAAAGCACACACACACACACA
+TAATATATATAATATCTAATATATATAATATCTAATATATATAATATATATATAGAGAGAGAGAGAGAGCGAGAGAGAGAGAGAGAGGGAGAGACGGAGTTTCGCTCTTGTTGCCCAGACTGGAGTGCAATGGCGCGATCTC AATATATATATAGAGAGAGAGAGAGCGAGAGAGAGAGAGAGAGAGGGAGAGACGGAGTTTCGCTCTTGTTGCCCAGACTGGAGTGCAATGGCGCGATCTC 
+AAAGAAAGAAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAAAAAGAACCAAGAAGAAAAAATAATCACCGGAGATTCCTCCCCTCCCCTAGAGCTAACTAGGCTAACATTTTGGTATATATCTTTCCAGTCCGGATCCTGTGTGACTGAGTGTGTATATGCATATGTATTATTTTCAAC AAGAAAGAAAGAAAAAGAACCAAGAAGAAAAAATAATCACCCGAGATTCCTCCCCTCCCCTAGAGCTAACTAGGCTAACATTTTGGTATATATCTTTCCA 
+GAATGCACATTTGGTCCTTCATTAACTCATTTACTCACAATTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTTTCACTCTTGTCGCCCAGGCTGGAGTGCAATGGTGTGA ACAATTTTTTTTTTTTTTTTTTTGAGACGGAGTTTCACTCTTGTCGCCCAGGCTGGAGTGCAATGGTGTGA 
+CCACTGCACTCCAGCCTGGGCGACAGGGCGAGACTCCGTCTCAAATAATAATAATAATAATAATAATAATAATAATAATAATAATAAAATAAAATAAAA CCACTGCACTCCAGCCTGGGCGACAGGGCGAGACTCCGTCTCAAATAATAATAATAATAATAATAATAATAATAAAATAAAATAAAATAAAATAAAA 
+ATTCCAATACTATTCAATTGTTCCACAGCACAGAAAAGAAAAAATTCTAAATTCTTTCTATGGAACAAAAAAATCATCAATGACACCTGACCAAAGATGGCACACACACACACACACACACACACACACACACACACACACTACAGTCCAACCCCACTAATGAATACAAAAATCCTAACACTAGCA GGGACACACACACACACACACACACACACACACTACAGTCCAACCCCACTAATGAATACAAAAATCCTAACACTAGCA
+GGCAGGAGAATTGCTTGAACCCAGGGGGCAGAGGTGGCAGTGAGCCGGGATCATGCCACTTCACTCCAGCCTGGGTGAAAGAGCAAAACTCTGTCTCAAAAAAAAAAAAAAAAAAAGACAGCTGCAACAAATGTCAAGTTCTGTGTGTTTTCTTTTCTTTTCTTTTTTTTCTATTTAATTAATTTATT AAAAAAAAAAAAAAAAAGACAGCTGCAACAAATGTCAAGTTCTGTGTGTTTTCTTTTCTTTTCTTTTTTTTCTATTTAATTAATTTATT
+GGCAGGAGAATTGCTTGAACCCAGGGGGCAGAGGTGGCAGTGAGCCGGGATCATGCCACTTCACTCCAGCCTGGGTGAAAGAGCAAAACTCTGTCTCAAAAAAAAAAAAAAAAAAAGACAGCTGCAACAAATGTCAAGTTCTGTGTGTTTTCTTTTCTTTTCTTTTTTTTCTATTTAATTAATTTATT CAAAAAAAAAAAAAAAAAGACAGCTGCAACAAATGTCAAGTTCTGTGTGTTTTCTTTTCTTTTCTTTTTTTTCTATTTAATTAATTTATT
+ATTGCTTGAGCCCAGGAGTTCAGGGCTGCAATGAGCTATGATCATGCCACTGCACTCCAGCCTGGGCAACAGAGTGAGATCCTGTCTCTAAAATATGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTATGTGTGTGTGTCTGTACATATACGTATATATATATGTGTGTATATATACAT AATATGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTATGTGTGTGTGTCTGTACATATACGTATATATATATGTGTGTATATATACAT
+GCCCACCACGCTGCAGATGAATATGCCACAATGTCAACAGTGTTTAGGCTCATATATATATATATATATATATATATATATATATATATATATATATATATTTTAAGACAGTCTTGCTCTGTCACCC TCTCATATATATATATATATATATATATATTTTAAGACAGTCTTGCTCTGTCACCC
+GCCCACCACGCTGCAGATGAATATGCCACAATGTCAACAGTGTTTAGGCTCATATATATATATATATATATATATATATATATATATATATATATATATATTTTAAGACAGTCTTGCTCTGTCACCC TATATATATATATATATATATATATATATATATATTGAGACAGTCTCGCTCTGTCACCC 
+ATTCCAATACTATTCAATTGTTCCACAGCACAGAAAAGAAAAAATTCTAAATTCTTTCTATGGAACAAAAAAATCATCAATGACACCTGACCAAAGATGGCACACACACACACACACACACACACACACACACACACACACTACAGTCCAACCCCACTAATGAATACAAAAATCCTAACACTAGCA GGGACACACACACACACACACACACACACACACTACAGTCCAACCCCACTAATGAATACAAAAATCCTAACACTAGCA 
+GCCCACCACGCTGCAGATGAATATGCCACAATGTCAACAGTGTTTAGGCTCATATATATATATATATATATATATATATATATATATATATATATATATATTTTAAGACAGTCTTGCTCTGTCACCC TATATATATATATATATATATATATATATATATATTGAGACAGTCTCGCTCTGTCACCC 
+CACAAGAACTGCAATTCCTAGGCAACTGCTAGTGCTGTGCTGGGCTCAGAGGCAGTAGATTTAGGGACCACTCAACCTAGTGAGACACCAGCTGGGGTGTGTGTGTGTGTGTGTGTGTGTGCGTGTGTGTGTGTGTGTGTGATTCTCGTGCCTCAGCCTCCCAAGTAGCTGGTGATGGCAGTGGCAGCCCATCTGGAGTGGACGCTGCCATCAAGCCAGCTGCAGCAGGGAGGGACAGCTGGGGCTGCACAT GGTCAGAGAGAGTATATAGAGAGAGCACACACCAGAGAGAGACACGCTGGGGGGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGATTCTCGTGC 
+CGCCTGTAGTCTCAGCTATTAATATTTGGGAGGCTGAGGCAGGAGGATCACACCACTGCACTTTAGCCTGAATACTGAGTAACAAAGCAAAACCCTGTCTCTCTTAAAAAAAAAATTGGGGGGAAGGACAAGTCTTTTTTCTTTTCTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTTTGAGATGGAATTTCACTCTTGTTGCCCAGGCTGGAGTGCAATGGTGTGATCTCGGCTCACGGCAACCTCCGCCTCCTGGGTTCAAGCAATTCTGCTTCAGCCTCCCGAGTGGCTGGGATTATAGT CTCTTAAAAAAAAAATTGGGGGGAAGGACAAGTCTTTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTTTTTTTTGGGTTGGAATTTTACTCTTGTTG 
+CTCGGCTCACTGCAAGCTCCACCTCCCGGGTTCACGCCATTCTCCTGCCTCAGTCTCCCAAGTAGCTGGGACTACAGGTGCCCACCACCACGTCTGGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGATTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACTTTGTGATCCACCCGCCTCAGCCTCCCAAAGCCTCCTTCACTTTTCTTTATTAGCCTCAACCCCATGATTCACCACTCCAAGTACTCCCTTGCCAGCATCCTCAAATCCCAATACCATTTTTAAAATTTTTTAA ATTTTTTTGTATTTTTAGTAGAGATGGGGATTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACTTTGTGATCCGCCCGCCTCAGCCTCCCAAAGCC 
+TTGGGGGGAAGGACAAGTCTTTTTTCTTTTCTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTTTGAGATGGAATTTCACTCTTGTTGCCCAGGCTGGAGTGCAATG TTCTTTTTTTTTTTGAGATGGAATTTCACTCTTGTTGCCCAGGCCGGAGTGCAATG 
+GGAGGCAGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCAAGACTCCATCTCTCTCTCTCTCTCTCTCTCTCTATATATATATATATATATATACATATATATATGTATATATATGTATATATATGTGTATATATGTGTATATATATGTATATATGTGTATATATATATGTAT CCTGTGTGACAGAGCAAGACTCCATCTCTCTCTCTCTCTCTCTCTCTCTATATATATATATATATATACATATATATATATATGTATATATATGTATATA
+TAATTTACAACAACACGTAAGTTGTTACTCTGTAAACCCTTGCCTCCCCCCCACCCCCCACCCAATTGGGTCTTTTTTTTTTTTCTCTCTCTCCATGCTTCTGCAGTGACTCTTAAGTAGCATTTTTAAAAACTTC CCACCCCCCACCCAATTGGGTCTTTTTTTTTTTTCTCTCTCTCCATGCTTCTGCAGTGACTCTTAAGTAGCATTTTTTAAAACTTCTATTTATTTTAAAA
+GTTCACACTTTTGGCCATACCCAGGGTCAGCCATGAAGATTGTTCTCAGAATGTTTTCTTTCTTCCTTCCTTTCTCTTTCTTTTCTTTCTTTCTTCCTTTCTTTCTTTCCTTTCTTTTTCTTTCTTCTCTTTCTTTTTTTCTTTTCTTCTTTCTCTCTCTTTCTTTCTCTCTCTCTCTCTCCTTCCTTCCTTCCTTCT ATGTTTTCTTTCTTCCTTCCTTTCTCTTTCTTTTCTTTCTTTCTTCCTTTCTTTCTTTCCTTTCTTTTTCTTTCTTCTCTTTCTTTCTTTTTTCTTTTCT
+GTTCACACTTTTGGCCATACCCAGGGTCAGCCATGAAGATTGTTCTCAGAATGTTTTCTTTCTTCCTTCCTTTCTCTTTCTTTTCTTTCTTTCTTCCTTTCTTTCTTTCCTTTCTTTTTCTTTCTTCTCTTTCTTTTTTTCTTTTCTTCTTTCTCTCTCTTTCTTTCTCTCTCTCTCTCTCCTTCCTTCCTTCCTTCT ATGTTTTCTTTCTTCCTTCCTTTCTCTTTCTTTTCTTTCTTTCTTCCTTTCTTTCTTTCCTTTCTTTTTCTTTCTTCTCTTTCTTTCTTTTTTCTTTTCT
+GGAGGCAGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCAAGACTCCATCTCTCTCTCTCTCTCTCTCTCTCTATATATATATATATATATATACATATATATATGTATATATATGTATATATATGTGTATATATGTGTATATATATGTATATATGTGTATATATATATGTAT CCTGTGTGACAGAGCAAGACTCCATCTCTCTCTCTCTCTCTCTCTCTCTATATATATATATATATATACATATATATATATATGTATATATATGTATATA
+CTCCCACTGGGGTAATCCATCTTTTCTTTTAATTATTTTCCTTTTGAGATATATTAAAAATGCAAAAAAAAAAATTTTTATTTTTTTGAGACGGAGTCTCGCTCTATCACCTAGGCTGGAGTGCAGTGGCATGATCTTGGCTCACTGCAACCTCCGCCTCCTGGGTTCAACTGATTCTCCTGCCTCAGCCTCCTGAATAG ATATTAAAAATGCAAAAAAAAATTTTTTTTATTTTTTTGAGACGGAGTCTCGCTCTATCGCCTAGGCTGGAGTGCAGTGGCATGATCTTGGCTCACTGCA
+GGCAGGAGAATTGCTTGAACCCAGGGGGCAGAGGTGGCAGTGAGCCGGGATCATGCCACTTCACTCCAGCCTGGGTGAAAGAGCAAAACTCTGTCTCAAAAAAAAAAAAAAAAAAAGACAGCTGCAACAAATGTCAAGTTCTGTGTGTTTTCTTTTCTTTTCTTTTTTTTCTATTTAATTAATTTATT CAAAAAAAAAAAAAAAAAGACAGCTGCAACAAATGTCAAGTTCTGTGTGTTTTCTTTTCTTTTCTTTTTTTTCTATTTAATTAATTTATT
+CAAGATCGTGCCGCTGCACTCCAGCCCAGGTGACAGAGCGAGACTTCATCTCAAAAAAAAAAAGGGCGCCAAACATCTACTGTGTACCCACAAAAATTAAAATTATAAAAAGACGGCATCAGCAATCCCAGGAGGTGATGTGTCCCTGGTTGGTGTACCTCAGGAGTTGCTGCATTTGCCTCACATCACCATGTGAGATAA CAAAAAAAAAAAAGGGCGCCAAACATCTACTGTGTACCCACAAAAATTAAAATTATAAAAAGACGGCATCAGCAATCCCAGGAGGTGATGTGTCCCTGGT
+CCTCTTAGGTGGTCACATCCTAGAGAGGGGGAAATTACATCAGAAAAGGACCAATGCCAAATTACAGCAACAAAGGGAAAGTAATCCTGGAAGCTGATTTAAGCTATGTGACTGTGTCTTCAATTAAAATATTCAGTCCCTTCCCCTCCCCCTCCCCCTCCCTTCCGTCTCCCTCTGTTGCTGAGGCTGGACTG CCAATGCCAAATTACAGCAACAAAGGGAAAGTAATCCTGGAAGCTGATTTAAGCTATGTGACTGTGTCTTCAATTAAAATATTCAGTCCCTCCCCCTCCC
+CCAAGTGACCCTTTCACCTCAGCTTCCCAAGTAGCTGGGATTACAGGTGCACACCAACTGTGCTTTGCAGTTTTGTTTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTATTTTGTTTTTTTTACAAGCCCACAACATCATAGC TCCTCAGCGTCCCAAGTAGCTGGGAGTAGAGATGCACACCAACTGTGCTTTGCAGTTTTGTTTGTGTGTGTGTGTGTGTGTGTGTGTGTATTTTGTTTTT
+CAAGATTTGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTATCTCAAAAAAAAAAAATAAATAAATAAAGAGAATAAGGCATTTGATATAGTTTCTTTGCATAACAAAATACAAATAAAACCACATTCTATTTCATCTAAACACTTTCTCCCAAGTCATTCTCTCTTAGCCTCA GGACAGGGGGAGACTGTATCTCAAAAAAAAAAAAAAAATAAATAAAGAGAATAAGGCATTTGATATAGTTTCTTTGCATAACAAAATACAAATAAAACCA
+GATCATGCCACTGCACTCCAGCCTGGGCAAAAGAGCGAGACTCTGTCTCAAAAAAAAAAAAAAAATCCAGAAAGAATTGGCACACCTATGTTGTTAAGTTTTCCAATCCAAGAATACTGTATTCCTTATCATTTTT AAAAAAAAAAAAAAATCCAGAAAGAATTGGCACCCTTTTGTTGTAAAGTTTTCCATTCCAAGAAAACTGGATTCCTTCTTTTTTTTTTCTTTTTGTTTCT
+TCCCTCCCTTCCTCCCTTTCTCTCCCTCTCCCTCTCTTTCTTTCTCTCTCTCTCTTTCTCCCCTTCTTTCTTTCTTTCTCTCTCTCTTTTTCTTTCTTTCTTTCTTTCTTTC TCCCTCCCTTCCTCCCTTTCTCTCCCTCTCCCTCTCTTTCTTTCTCTCTCTCTCTTTCTCCCCTTCTTTTTTTCTCTCTCTCTCTTTTTTTTTCTTTCTC
diff --git a/libdisorder.LICENSE b/libdisorder.LICENSE
new file mode 100644
index 0000000..8bbcff9
--- /dev/null
+++ b/libdisorder.LICENSE
@@ -0,0 +1,339 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+

+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+

+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+

+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+

+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+

+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/smithwaterman.cpp b/smithwaterman.cpp
new file mode 100644
index 0000000..92aa73c
--- /dev/null
+++ b/smithwaterman.cpp
@@ -0,0 +1,306 @@
+#include <iostream>
+#include <string.h>
+#include <string>
+#include <sstream>
+#include <getopt.h>
+#include <utility>
+#include <vector>
+#include <stdlib.h>
+#include "SmithWatermanGotoh.h"
+#include "BandedSmithWaterman.h"
+
+using namespace std;
+
+/* Returns the Reverse Complement of a DNA Sequence, from the alphabet {A,T,C,G,N} */
+string reverseComplement(string read) {
+	
+	// Declare the (empty) reverse complement read as a string
+	string rc_read;	
+	
+	// Reverse Read
+	rc_read.assign(read.rbegin(), read.rend());
+	
+	// Complement.  Note that not IUPAC compliant. Uses the alphabet {A,T,C,G,N}
+	string::iterator t;
+	for (t = rc_read.begin(); t != rc_read.end(); ++t) {
+		switch (*t) {
+			case 'A':
+				*t = 'T';
+				break;
+			case 'T':
+				*t = 'A';
+				break;
+			case 'C':
+				*t = 'G';
+				break;
+			case 'G':
+				*t = 'C';
+				break;
+			case 'N':
+				*t = 'N';
+				break;
+			default:
+				cout << "Unknown Nucleotide!";
+				break;
+		}
+	}
+	
+	// Return the Read  (faster if done through pointers?)
+	return rc_read;
+}
+
+
+void printSummary(void) {
+    cerr << "usage: smithwaterman [options] <reference sequence> <query sequence>" << endl
+         << endl
+         << "options:" << endl 
+         << "    -m, --match-score         the match score (default 10.0)" << endl
+         << "    -n, --mismatch-score      the mismatch score (default -9.0)" << endl
+         << "    -g, --gap-open-penalty    the gap open penalty (default 15.0)" << endl
+         << "    -z, --entropy-gap-open-penalty  enable entropy scaling of the gap open penalty" << endl
+         << "    -e, --gap-extend-penalty  the gap extend penalty (default 6.66)" << endl
+         << "    -r, --repeat-gap-extend-penalty  use repeat information when generating gap extension penalties" << endl
+         << "    -b, --bandwidth           bandwidth to use (default 0, or non-banded algorithm)" << endl
+         << "    -p, --print-alignment     print out the alignment" << endl
+         << "    -R, --reverse-complement  report the reverse-complement alignment if it scores better" << endl
+         << endl
+         << "When called with literal reference and query sequences, smithwaterman" << endl
+         << "prints the cigar match positional string and the match position for the" << endl
+         << "query sequence against the reference sequence." << endl;
+}
+
+
+int main (int argc, char** argv) {
+
+    int c;
+
+    string reference;
+    string query;
+
+    int bandwidth = 0;
+
+    float matchScore = 10.0f;
+    float mismatchScore = -9.0f;
+    float gapOpenPenalty = 15.0f;
+    float gapExtendPenalty = 6.66f;
+    float entropyGapOpenPenalty = 0.0f;
+    bool useRepeatGapExtendPenalty = false;
+    float repeatGapExtendPenalty = 1.0f;
+    
+    bool print_alignment = false;
+    bool tryReverseComplement = false;
+
+    while (true) {
+        static struct option long_options[] =
+            {
+                {"help", no_argument, 0, 'h'},
+                {"match-score",  required_argument, 0, 'm'},
+                {"mismatch-score",  required_argument, 0, 'n'},
+                {"gap-open-penalty",  required_argument, 0, 'g'},
+                {"entropy-gap-open-penalty",  required_argument, 0, 'z'},
+                {"gap-extend-penalty",  required_argument, 0, 'e'},
+                {"repeat-gap-extend-penalty",  required_argument, 0, 'r'},
+                {"print-alignment",  required_argument, 0, 'p'},
+                {"bandwidth", required_argument, 0, 'b'},
+                {"reverse-complement", no_argument, 0, 'R'},
+                {0, 0, 0, 0}
+            };
+        int option_index = 0;
+
+        c = getopt_long (argc, argv, "hpRzm:n:g:r:e:b:r:",
+                         long_options, &option_index);
+
+        if (c == -1)
+            break;
+ 
+        switch (c)
+        {
+        case 0:
+            /* If this option set a flag, do nothing else now. */
+            if (long_options[option_index].flag != 0)
+                break;
+            printf ("option %s", long_options[option_index].name);
+            if (optarg)
+                printf (" with arg %s", optarg);
+            printf ("\n");
+            break;
+
+        case 'R':
+            tryReverseComplement = true;
+            break;
+
+        case 'm':
+            matchScore = atof(optarg);
+            break;
+ 
+        case 'n':
+            mismatchScore = atof(optarg);
+            break;
+ 
+        case 'g':
+            gapOpenPenalty = atof(optarg);
+            break;
+
+        case 'z':
+            entropyGapOpenPenalty = 1;
+            break;
+ 
+        case 'r':
+            useRepeatGapExtendPenalty = true;
+            repeatGapExtendPenalty = atof(optarg);
+            break;
+ 
+        case 'e':
+            gapExtendPenalty = atof(optarg);
+            break;
+
+        case 'b':
+            bandwidth = atoi(optarg);
+            break;
+
+        case 'p':
+            print_alignment = true;
+            break;
+ 
+        case 'h':
+            printSummary();
+            exit(0);
+            break;
+ 
+        case '?':
+            /* getopt_long already printed an error message. */
+            printSummary();
+            exit(1);
+            break;
+ 
+        default:
+            abort ();
+        }
+    }
+
+    /* Print any remaining command line arguments (not options). */
+    if (optind == argc - 2) {
+        //cerr << "fasta file: " << argv[optind] << endl;
+        reference = string(argv[optind]);
+        ++optind;
+        query = string(argv[optind]);
+    } else {
+        cerr << "please specify a reference and query sequence" << endl
+             << "execute " << argv[0] << " --help for command-line usage" << endl;
+        exit(1);
+    }
+
+    // initialize
+	
+    unsigned int referencePos;
+    string cigar;
+
+    float bestScore = 0;
+
+    bool alignedReverse = false;
+
+    // create a new Smith-Waterman alignment object
+    if (bandwidth > 0) {
+        pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr;
+        hr.first.first   = 2;
+        hr.first.second  = 18;
+        hr.second.first  = 1;
+        hr.second.second = 17;
+        CBandedSmithWaterman bsw(matchScore, mismatchScore, gapOpenPenalty, gapExtendPenalty, bandwidth);
+        bsw.Align(referencePos, cigar, reference, query, hr);
+    } else {
+        CSmithWatermanGotoh sw(matchScore, mismatchScore, gapOpenPenalty, gapExtendPenalty);
+        if (useRepeatGapExtendPenalty)
+            sw.EnableRepeatGapExtensionPenalty(repeatGapExtendPenalty);
+        if (entropyGapOpenPenalty > 0)
+            sw.EnableEntropyGapPenalty(entropyGapOpenPenalty);
+        sw.Align(referencePos, cigar, reference, query);
+        bestScore = sw.BestScore;
+        if (tryReverseComplement) {
+            string queryRevC = reverseComplement(query);
+            sw.Align(referencePos, cigar, reference, query);
+            if (sw.BestScore > bestScore) {
+                alignedReverse = true;
+                bestScore = sw.BestScore;
+                query = queryRevC;
+            }
+        }
+    }
+ 
+    printf("%s %3u %f %s\n", cigar.c_str(), referencePos, bestScore, (alignedReverse ? "-" : "+"));
+
+    // optionally print out the alignment
+    if (print_alignment) {
+        int alignmentLength = 0;
+        int len;
+        string slen;
+        vector<pair<int, char> > cigarData;
+        for (string::iterator c = cigar.begin(); c != cigar.end(); ++c) {
+            switch (*c) {
+            case 'I':
+                len = atoi(slen.c_str());
+                slen.clear();
+                cigarData.push_back(make_pair(len, *c));
+                break;
+            case 'D':
+                len = atoi(slen.c_str());
+                alignmentLength += len;
+                slen.clear();
+                cigarData.push_back(make_pair(len, *c));
+                break;
+            case 'M':
+                len = atoi(slen.c_str());
+                alignmentLength += len;
+                slen.clear();
+                cigarData.push_back(make_pair(len, *c));
+                break;
+            case 'S':
+                len = atoi(slen.c_str());
+                slen.clear();
+                cigarData.push_back(make_pair(len, *c));
+                break;
+            default:
+                len = 0;
+                slen += *c;
+                break;
+            }
+        }
+
+        string gapped_ref = string(reference).substr(referencePos, alignmentLength);
+        string gapped_query = string(query);
+
+        int refpos = 0;
+        int readpos = 0;
+        for (vector<pair<int, char> >::iterator c = cigarData.begin(); c != cigarData.end(); ++c) {
+            int len = c->first;
+            switch (c->second) {
+            case 'I':
+                gapped_ref.insert(refpos, string(len, '-'));
+                readpos += len;
+                refpos += len;
+                break;
+            case 'D':
+                gapped_query.insert(readpos, string(len, '-'));
+                refpos += len;
+                readpos += len;
+                break;
+            case 'M':
+                readpos += len;
+                refpos += len;
+                break;
+            case 'S':
+                readpos += len;
+                gapped_ref.insert(refpos, string(len, '*'));
+                refpos += len;
+                break;
+            default:
+                break;
+            }
+        }
+
+        cout << gapped_ref << endl << gapped_query << endl;
+    }
+
+	return 0;
+
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libsmithwaterman.git



More information about the debian-med-commit mailing list