[med-svn] [last-align] 01/03: Imported Upstream version 746

Charles Plessy plessy at moszumanska.debian.org
Fri Jul 15 04:35:24 UTC 2016


This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch master
in repository last-align.

commit d1b9bf48ecde01dfd16f8942016c1b8c736a06e9
Author: Charles Plessy <plessy at debian.org>
Date:   Fri Jul 15 13:31:29 2016 +0900

    Imported Upstream version 746
---
 ChangeLog.txt                   |  51 ++++++++++++++++++-
 src/Centroid.cc                 | 110 ++++++++++++++++++++--------------------
 src/Centroid.hh                 |  17 ++++---
 src/GappedXdropAligner.cc       |  29 ++++-------
 src/GappedXdropAligner.hh       |  35 +++++++------
 src/GappedXdropAligner2qual.cc  |   6 +--
 src/GappedXdropAligner3frame.cc |  12 ++---
 src/GappedXdropAlignerPssm.cc   |   6 +--
 src/LastalArguments.cc          |  15 +++---
 src/SubsetSuffixArraySearch.cc  |   3 +-
 src/alp/sls_alp_data.cpp        |  20 +-------
 src/alp/sls_alp_data.hpp        |   4 --
 src/alp/sls_basic.cpp           |  26 +++++++++-
 src/alp/sls_basic.hpp           |   4 +-
 src/alp/sls_fsa1.cpp            |  19 +------
 src/alp/sls_fsa1_utils.cpp      |  18 +------
 src/alp/sls_fsa1_utils.hpp      |   6 ---
 src/lastal.cc                   |   3 ++
 src/version.hh                  |   2 +-
 19 files changed, 202 insertions(+), 184 deletions(-)

diff --git a/ChangeLog.txt b/ChangeLog.txt
index 42d258b..214187c 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,8 +1,57 @@
+2016-07-05  Martin C. Frith  <Martin C. Frith>
+
+	* src/SubsetSuffixArraySearch.cc:
+	Bugfix: lastal with -l0 was not reliable, I think.
+	[60738c7a24e3] [tip]
+
+	* src/LastalArguments.cc:
+	Allowed gap open costs < 0.
+	[4513e0b7e26d]
+
+2016-05-20  Martin C. Frith  <Martin C. Frith>
+
+	* src/lastal.cc:
+	Maybe made lastal faster in some cases.
+	[bb03593936dc]
+
+	* src/GappedXdropAligner.hh, src/GappedXdropAligner3frame.cc:
+	Refactoring.
+	[788dd86f42f0]
+
+	* src/Centroid.cc, src/GappedXdropAligner.cc,
+	src/GappedXdropAligner.hh, src/GappedXdropAligner2qual.cc,
+	src/GappedXdropAlignerPssm.cc:
+	Refactoring.
+	[eea957b121e8]
+
+	* src/Centroid.cc:
+	Refactoring.
+	[db4de3a2e6a3]
+
+	* src/Centroid.hh, src/GappedXdropAligner.cc,
+	src/GappedXdropAligner.hh, src/GappedXdropAligner3frame.cc:
+	Refactoring.
+	[a317e3410f24]
+
+2016-05-19  Martin C. Frith  <Martin C. Frith>
+
+	* src/Centroid.cc, src/Centroid.hh, src/GappedXdropAligner.cc,
+	src/GappedXdropAligner.hh, src/GappedXdropAligner3frame.cc:
+	Refactoring.
+	[12fa981e66d0]
+
+	* src/alp/sls_alp_data.cpp, src/alp/sls_alp_data.hpp,
+	src/alp/sls_basic.cpp, src/alp/sls_basic.hpp, src/alp/sls_fsa1.cpp,
+	src/alp/sls_fsa1_utils.cpp, src/alp/sls_fsa1_utils.hpp:
+	Tried to fix compile error on Cygwin (I don't fully understand
+	this).
+	[21d485934bef]
+
 2016-03-31  Martin C. Frith  <Martin C. Frith>
 
 	* doc/lastal.txt, src/LastalArguments.cc:
 	Tried to document lastal options more clearly.
-	[9ecd0219342e] [tip]
+	[9ecd0219342e]
 
 2016-03-30  Martin C. Frith  <Martin C. Frith>
 
diff --git a/src/Centroid.cc b/src/Centroid.cc
index ba8866a..792ed45 100644
--- a/src/Centroid.cc
+++ b/src/Centroid.cc
@@ -96,8 +96,8 @@ namespace cbrc{
   }
 
   void Centroid::initForwardMatrix(){
-    scale.assign ( numAntidiagonals, 1.0 ); // scaling
-    std::size_t n = xa.scoreEndIndex( numAntidiagonals );
+    scale.assign ( numAntidiagonals + 2, 1.0 ); // scaling
+    size_t n = xa.scoreEndIndex( numAntidiagonals );
 
     if ( fM.size() < n ) {
       fM.resize( n );
@@ -111,12 +111,12 @@ namespace cbrc{
 
   void Centroid::initBackwardMatrix(){
     pp.resize( fM.size() );
-    mD.assign( numAntidiagonals, 0.0 );
-    mI.assign( numAntidiagonals, 0.0 );
-    mX1.assign ( numAntidiagonals, 1.0 );
-    mX2.assign ( numAntidiagonals, 1.0 );
+    mD.assign( numAntidiagonals + 2, 0.0 );
+    mI.assign( numAntidiagonals + 2, 0.0 );
+    mX1.assign ( numAntidiagonals + 2, 1.0 );
+    mX2.assign ( numAntidiagonals + 2, 1.0 );
 
-    std::size_t n = xa.scoreEndIndex( numAntidiagonals );
+    size_t n = xa.scoreEndIndex( numAntidiagonals );
     bM.assign( n, 0.0 );
     bD.assign( n, 0.0 );
     bI.assign( n, 0.0 );
@@ -169,26 +169,26 @@ namespace cbrc{
 
     assert( gap.insExist == gap.delExist || eP <= 0.0 );
 
-    for( size_t k = 2; k < numAntidiagonals; ++k ){  // loop over antidiagonals
+    for( size_t k = 0; k < numAntidiagonals; ++k ){  // loop over antidiagonals
       double sum_f = 0.0; // sum of forward values
-      const size_t seq1beg = xa.seq1start( k );
-      const std::size_t seq2pos = k - 2 - seq1beg;
-      const double scale12 = 1.0 / ( scale[k-1] * scale[k-2] );
-      const double scale1  = 1.0 / scale[k-1];
+      const size_t seq1beg = seq1start( k );
+      const size_t seq2pos = k - seq1beg;
+      const double scale12 = 1.0 / ( scale[k+1] * scale[k] );
+      const double scale1  = 1.0 / scale[k+1];
 
       const double seE = eE * scale1;
       const double seEI = eEI * scale1;
       const double seP = eP * scale12;
 
-      const std::size_t scoreEnd = xa.scoreEndIndex( k );
+      const size_t scoreEnd = xa.scoreEndIndex( k );
       double* fM0 = &fM[ scoreEnd ];
       double* fD0 = &fD[ scoreEnd ];
       double* fI0 = &fI[ scoreEnd ];
       double* fP0 = &fP[ scoreEnd ];
 
-      const std::size_t horiBeg = xa.hori( k, seq1beg );
-      const std::size_t vertBeg = xa.vert( k, seq1beg );
-      const std::size_t diagBeg = xa.diag( k, seq1beg );
+      const size_t horiBeg = xa.hori( k, seq1beg );
+      const size_t vertBeg = xa.vert( k, seq1beg );
+      const size_t diagBeg = xa.diag( k, seq1beg );
       const double* fD1 = &fD[ horiBeg ];
       const double* fI1 = &fI[ vertBeg ];
       const double* fM2 = &fM[ diagBeg ];
@@ -270,12 +270,12 @@ namespace cbrc{
 	}
       }
       if( !globality ) Z += sum_f;
-      scale[k] = sum_f + 1.0;  // seems ugly
-      Z /= scale[k]; // scaling
+      scale[k+2] = sum_f + 1.0;  // seems ugly
+      Z /= scale[k+2]; // scaling
     } // k
     //std::cout << "# Z=" << Z << std::endl;
     assert( Z > 0.0 );
-    scale[ numAntidiagonals - 1 ] *= Z;  // this causes scaled Z to equal 1
+    scale[ numAntidiagonals + 1 ] *= Z;  // this causes scaled Z to equal 1
   }
 
   // added by M. Hamada
@@ -309,18 +309,18 @@ namespace cbrc{
 
     assert( gap.insExist == gap.delExist || eP <= 0.0 );
 
-    for( size_t k = numAntidiagonals-1; k > 1; --k ){
-      const size_t seq1beg = xa.seq1start( k );
-      const std::size_t seq2pos = k - 2 - seq1beg;
-      const double scale12 = 1.0 / ( scale[k-1] * scale[k-2] );
-      const double scale1  = 1.0 / scale[k-1];
-      scaledUnit /= scale[k];
+    for( size_t k = numAntidiagonals; k-- > 0; ){
+      const size_t seq1beg = seq1start( k );
+      const size_t seq2pos = k - seq1beg;
+      const double scale12 = 1.0 / ( scale[k+1] * scale[k] );
+      const double scale1  = 1.0 / scale[k+1];
+      scaledUnit /= scale[k+2];
 
       const double seE = eE * scale1;
       const double seEI = eEI * scale1;
       const double seP = eP * scale12;
 
-      const std::size_t scoreEnd = xa.scoreEndIndex( k );
+      const size_t scoreEnd = xa.scoreEndIndex( k );
       const double* bM0 = &bM[ scoreEnd + 1 ];
       const double* bD0 = &bD[ scoreEnd + 1 ];
       const double* bI0 = &bI[ scoreEnd + 1 ];
@@ -328,9 +328,9 @@ namespace cbrc{
 
       double* pp0 = &pp[ scoreEnd ];
 
-      const std::size_t horiBeg = xa.hori( k, seq1beg );
-      const std::size_t vertBeg = xa.vert( k, seq1beg );
-      const std::size_t diagBeg = xa.diag( k, seq1beg );
+      const size_t horiBeg = xa.hori( k, seq1beg );
+      const size_t vertBeg = xa.vert( k, seq1beg );
+      const size_t diagBeg = xa.diag( k, seq1beg );
       double* bD1 = &bD[ horiBeg ];
       double* bI1 = &bI[ vertBeg ];
       double* bM2 = &bM[ diagBeg ];
@@ -503,11 +503,11 @@ namespace cbrc{
 
     initDecodingMatrix();
 
-    for( size_t k = 3; k < numAntidiagonals; ++k ){  // loop over antidiagonals
-      const std::size_t scoreEnd = xa.scoreEndIndex( k );
+    for( size_t k = 1; k < numAntidiagonals; ++k ){  // loop over antidiagonals
+      const size_t scoreEnd = xa.scoreEndIndex( k );
       double* X0 = &X[ scoreEnd ];
       const double* P0 = &pp[ scoreEnd ];
-      size_t cur = xa.seq1start( k );
+      size_t cur = seq1start( k );
 
       const double* const x0end = X0 + xa.numCellsAndPads( k );
       const double* X1 = &X[xa.hori(k, cur)];
@@ -536,7 +536,7 @@ namespace cbrc{
     size_t i = bestPos1;
     size_t oldPos1 = i;
 
-    while( k > 2 ){
+    while( k > 0 ){
       const int m =
 	maxIndex( diagx( X, k, i ) + ( gamma + 1 ) * cellx( pp, k, i ) - 1,
                   horix( X, k, i ),
@@ -545,8 +545,8 @@ namespace cbrc{
 	k -= 2;
 	i -= 1;
       }
-      if( (m > 0 && oldPos1 != i) || k == 2 ){
-	chunks.push_back( SegmentPair( i, k - i - 2, oldPos1 - i ) );
+      if( (m > 0 && oldPos1 != i) || k == 0 ){
+	chunks.push_back( SegmentPair( i, k - i, oldPos1 - i ) );
       }
       if( m > 0 ){
 	k -= 1;
@@ -560,12 +560,12 @@ namespace cbrc{
 
     initDecodingMatrix();
 
-    for( size_t k = 3; k < numAntidiagonals; ++k ){  // loop over antidiagonals
-      const std::size_t scoreEnd = xa.scoreEndIndex( k );
+    for( size_t k = 1; k < numAntidiagonals; ++k ){  // loop over antidiagonals
+      const size_t scoreEnd = xa.scoreEndIndex( k );
       double* X0 = &X[ scoreEnd ];
       const double* P0 = &pp[ scoreEnd ];
-      size_t cur = xa.seq1start( k );
-      size_t seq2pos = k - 2 - cur;
+      size_t cur = seq1start( k );
+      size_t seq2pos = k - cur;
 
       const double* const x0end = X0 + xa.numCellsAndPads( k );
       const double* X1 = &X[ xa.hori(k, cur) ];
@@ -597,8 +597,8 @@ namespace cbrc{
     size_t i = bestPos1;
     size_t oldPos1 = i;
 
-    while( k > 2 ){
-      const size_t j = k - i - 2;
+    while( k > 0 ){
+      const size_t j = k - i;
       const double s = 2 * gamma * cellx( pp, k, i ) - ( mX1[ i ] + mX2[ j ] );
       const double t = gamma * mI[ j ] - mX2[ j ];
       const double u = gamma * mD[ i ] - mX1[ i ];
@@ -610,8 +610,8 @@ namespace cbrc{
 	k -= 2;
 	i -= 1;
       }
-      if( (m > 0 && oldPos1 != i) || k == 2 ){
-	chunks.push_back( SegmentPair( i, k - i - 2, oldPos1 - i ) );
+      if( (m > 0 && oldPos1 != i) || k == 0 ){
+	chunks.push_back( SegmentPair( i, k - i, oldPos1 - i ) );
       }
       if( m > 0 ){
 	k -= 1;
@@ -653,7 +653,7 @@ namespace cbrc{
       size_t seq2pos = i->end2();
 
       for( size_t j = 0; j < i->size; ++j ){
-	double p = cellx( pp, seq1pos + seq2pos + 2, seq1pos );
+	double p = cellx( pp, seq1pos + seq2pos, seq1pos );
 	ambiguityCodes.push_back( asciiProbability(p) );
 	--seq1pos;
 	--seq2pos;
@@ -678,8 +678,8 @@ namespace cbrc{
 
   double Centroid::logPartitionFunction() const{
     double x = 0.0;
-    for( std::size_t k = 2; k < numAntidiagonals; ++k ){
-      x += std::log( scale[k] );
+    for( size_t k = 0; k < numAntidiagonals; ++k ){
+      x += std::log( scale[k+2] );
     }
     return T * x;
   }
@@ -708,11 +708,11 @@ namespace cbrc{
 
     assert( gap.insExist == gap.delExist || eP <= 0.0 );
 
-    for( size_t k = 2; k < numAntidiagonals; ++k ){  // loop over antidiagonals
-      const size_t seq1beg = xa.seq1start( k );
-      const std::size_t seq2pos = k - 2 - seq1beg;
-      const double scale12 = 1.0 / ( scale[k-1] * scale[k-2] );
-      const double scale1  = 1.0 / scale[k-1];
+    for( size_t k = 0; k < numAntidiagonals; ++k ){  // loop over antidiagonals
+      const size_t seq1beg = seq1start( k );
+      const size_t seq2pos = k - seq1beg;
+      const double scale12 = 1.0 / ( scale[k+1] * scale[k] );
+      const double scale1  = 1.0 / scale[k+1];
 
       const double seE = eE * scale1;
       const double seEI = eEI * scale1;
@@ -721,15 +721,15 @@ namespace cbrc{
       const uchar* s1 = seqPtr( seq1, isForward, seq1beg );
       const uchar* s2 = seqPtr( seq2, isForward, seq2pos );
 
-      const std::size_t scoreEnd = xa.scoreEndIndex( k );
+      const size_t scoreEnd = xa.scoreEndIndex( k );
       const double* bM0 = &bM[ scoreEnd + 1 ];
       const double* bD0 = &bD[ scoreEnd + 1 ];
       const double* bI0 = &bI[ scoreEnd + 1 ];
       const double* bP0 = &bP[ scoreEnd + 1 ];
 
-      const std::size_t horiBeg = xa.hori( k, seq1beg );
-      const std::size_t vertBeg = xa.vert( k, seq1beg );
-      const std::size_t diagBeg = xa.diag( k, seq1beg );
+      const size_t horiBeg = xa.hori( k, seq1beg );
+      const size_t vertBeg = xa.vert( k, seq1beg );
+      const size_t diagBeg = xa.diag( k, seq1beg );
       const double* fD1 = &fD[ horiBeg ];
       const double* fI1 = &fI[ vertBeg ];
       const double* fM2 = &fM[ diagBeg ];
diff --git a/src/Centroid.hh b/src/Centroid.hh
index e597c4d..14cef9e 100644
--- a/src/Centroid.hh
+++ b/src/Centroid.hh
@@ -7,7 +7,6 @@
 #include "GeneralizedAffineGapCosts.hh"
 #include "SegmentPair.hh"
 #include "OneQualityScoreMatrix.hh"
-#include <cassert>
 #include <stddef.h>  // size_t
 #include <vector>
 #include <iostream> // for debug
@@ -123,30 +122,34 @@ namespace cbrc{
 
     void updateScore( double score, size_t antiDiagonal, size_t cur );
 
+    // start of the x-drop region (i.e. number of skipped seq1 letters
+    // before the x-drop region) for this antidiagonal
+    size_t seq1start( size_t antidiagonal ) const {
+      return xa.scoreEndIndex( antidiagonal ) - xa.scoreOrigin( antidiagonal );
+    }
+
     // get DP matrix value at the given position
     double cellx( const dvec_t& matrix,
                   size_t antiDiagonal, size_t seq1pos ) const{
-      return matrix[ xa.scoreEndIndex( antiDiagonal ) + seq1pos - xa.seq1start( antiDiagonal ) ];
+      return matrix[ xa.scoreOrigin( antiDiagonal ) + seq1pos ];
     }
 
     // get DP matrix value "left of" the given position
     double horix( const dvec_t& matrix,
                   size_t antiDiagonal, size_t seq1pos ) const{
-      assert( antiDiagonal > 0 );
-      return cellx( matrix, antiDiagonal-1, seq1pos );
+      return matrix[ xa.hori( antiDiagonal, seq1pos ) ];
     }
 
     // get DP matrix value "above" the given position
     double vertx( const dvec_t& matrix,
                   size_t antiDiagonal, size_t seq1pos ) const{
-      assert( antiDiagonal > 0 );
-      return cellx( matrix, antiDiagonal-1, seq1pos+1 );
+      return matrix[ xa.vert( antiDiagonal, seq1pos ) ];
     }
 
     // get DP matrix value "diagonal from" the given position
     double diagx( const dvec_t& matrix,
                   size_t antiDiagonal, size_t seq1pos ) const{
-      return cellx( matrix, antiDiagonal-2, seq1pos );
+      return matrix[ xa.diag( antiDiagonal, seq1pos ) ];
     }
 
     // get a pointer into a sequence, taking start and direction into account
diff --git a/src/GappedXdropAligner.cc b/src/GappedXdropAligner.cc
index 91d9f41..4c8a532 100644
--- a/src/GappedXdropAligner.cc
+++ b/src/GappedXdropAligner.cc
@@ -55,7 +55,7 @@ namespace cbrc {
 // Puts 2 "dummy" antidiagonals at the start, so that we can safely
 // look-back from subsequent antidiagonals.
 void GappedXdropAligner::init() {
-  seq1starts.resize(0);
+  scoreOrigins.resize(0);
   scoreEnds.resize(1);
 
   initAntidiagonal(0, 0, 0);
@@ -68,21 +68,15 @@ void GappedXdropAligner::init() {
   yScores[1] = -INF;
   zScores[1] = -INF;
 
-  bestAntidiagonal = 2;
+  bestAntidiagonal = 0;
 }
 
 void GappedXdropAligner::initAntidiagonal(std::size_t seq1beg,
                                           std::size_t scoreEnd,
                                           std::size_t numCells) {
+  scoreOrigins.push_back(scoreEnd - seq1beg);
   std::size_t newEnd = scoreEnd + numCells + 1;  // + 1 pad cell
-
-  if (xScores.size() < newEnd) {
-    xScores.resize(newEnd);
-    yScores.resize(newEnd);
-    zScores.resize(newEnd);
-  }
-
-  seq1starts.push_back(seq1beg);
+  resizeScoresIfSmaller(newEnd);
   scoreEnds.push_back(newEnd);
 }
 
@@ -108,12 +102,12 @@ int GappedXdropAligner::align(const uchar *seq1,
 
   int bestScore = 0;
   int bestEdgeScore = -INF;
-  std::size_t bestEdgeAntidiagonal = 2;
+  std::size_t bestEdgeAntidiagonal = 0;
   std::size_t bestEdgeSeq1position = 0;
 
   init();
 
-  for (std::size_t antidiagonal = 2; /* noop */; ++antidiagonal) {
+  for (std::size_t antidiagonal = 0; /* noop */; ++antidiagonal) {
     std::size_t seq1beg = std::min(maxSeq1begs[0], maxSeq1begs[1]);
     std::size_t seq1end = std::max(minSeq1ends[0], minSeq1ends[1]);
 
@@ -124,7 +118,7 @@ int GappedXdropAligner::align(const uchar *seq1,
 
     initAntidiagonal(seq1beg, scoreEnd, numCells);
 
-    std::size_t seq2pos = antidiagonal - 2 - seq1beg;
+    std::size_t seq2pos = antidiagonal - seq1beg;
 
     const uchar *s1 = isForward ? seq1 + seq1beg : seq1 - seq1beg - 1;
     const uchar *s2 = isForward ? seq2 + seq2pos : seq2 - seq2pos - 1;
@@ -243,18 +237,17 @@ bool GappedXdropAligner::getNextChunk(std::size_t &end1,
 				      int insExistenceCost,
 				      int insExtensionCost,
                                       int gapUnalignedCost) {
-  if (bestAntidiagonal == 2) return false;
+  if (bestAntidiagonal == 0) return false;
 
   end1 = bestSeq1position;
-  end2 = bestAntidiagonal - 2 - bestSeq1position;
+  end2 = bestAntidiagonal - bestSeq1position;
   const std::size_t undefined = -1;
   length = undefined;
 
   int state = 0;
 
   while (1) {
-    assert(bestAntidiagonal >= 2);
-    assert(bestSeq1position <= bestAntidiagonal - 2);
+    assert(bestSeq1position <= bestAntidiagonal);
 
     std::size_t h = hori(bestAntidiagonal, bestSeq1position);
     std::size_t v = vert(bestAntidiagonal, bestSeq1position);
@@ -278,7 +271,7 @@ bool GappedXdropAligner::getNextChunk(std::size_t &end1,
 
     state = maxIndex(x, y, z, a, b);
 
-    if (length == undefined && (state > 0 || bestAntidiagonal == 2)) {
+    if (length == undefined && (state > 0 || bestAntidiagonal == 0)) {
       length = end1 - bestSeq1position;
       assert(length != undefined);
     }
diff --git a/src/GappedXdropAligner.hh b/src/GappedXdropAligner.hh
index 8103344..5a0607f 100644
--- a/src/GappedXdropAligner.hh
+++ b/src/GappedXdropAligner.hh
@@ -165,57 +165,62 @@ class GappedXdropAligner {
   // The next 4 functions are for use by Centroid.  If the Centroid
   // code gets updated, it might make sense to change these functions too.
 
-  // The number of antidiagonals, including dummy ones at the beginning.
+  // The number of antidiagonals, excluding dummy ones at the beginning.
   std::size_t numAntidiagonals() const
-  { return seq1starts.size(); }
+  { return scoreOrigins.size() - 2; }
 
-  std::size_t seq1start(std::size_t antidiagonal) const
-  { return seq1starts[antidiagonal]; }
+  std::size_t scoreOrigin(std::size_t antidiagonal) const
+  { return scoreOrigins[antidiagonal + 2]; }
 
   std::size_t numCellsAndPads(std::size_t antidiagonal) const
-  { return scoreEnds[antidiagonal + 1] - scoreEnds[antidiagonal]; }
+  { return scoreEnds[antidiagonal + 3] - scoreEnds[antidiagonal + 2]; }
 
   std::size_t scoreEndIndex(std::size_t antidiagonal) const
-  { return scoreEnds[antidiagonal]; }
+  { return scoreEnds[antidiagonal + 2]; }
 
   // The index in the score vectors, of the previous "horizontal" cell.
   std::size_t hori(std::size_t antidiagonal, std::size_t seq1coordinate) const
-  { return scoreBase(antidiagonal - 1) + seq1coordinate; }
+  { return scoreOrigins[antidiagonal + 1] + seq1coordinate; }
 
   // The index in the score vectors, of the previous "vertical" cell.
   std::size_t vert(std::size_t antidiagonal, std::size_t seq1coordinate) const
-  { return scoreBase(antidiagonal - 1) + seq1coordinate + 1; }
+  { return scoreOrigins[antidiagonal + 1] + seq1coordinate + 1; }
 
   // The index in the score vectors, of the previous "diagonal" cell.
   std::size_t diag(std::size_t antidiagonal, std::size_t seq1coordinate) const
-  { return scoreBase(antidiagonal - 2) + seq1coordinate; }
+  { return scoreOrigins[antidiagonal] + seq1coordinate; }
 
   // The index in the score vectors, of the previous in-frame horizontal cell.
   std::size_t hori3(std::size_t antidiagonal, std::size_t seq1coordinate) const
-  { return scoreBase(antidiagonal - 3) + seq1coordinate + 1; }
+  { return scoreOrigins[antidiagonal - 3] + seq1coordinate; }
 
   // The index in the score vectors, of the previous in-frame vertical cell.
   std::size_t vert3(std::size_t antidiagonal, std::size_t seq1coordinate) const
-  { return scoreBase(antidiagonal - 3) + seq1coordinate + 2; }
+  { return scoreOrigins[antidiagonal - 3] + seq1coordinate + 1; }
 
   // The index in the score vectors, of the previous in-frame diagonal cell.
   std::size_t diag3(std::size_t antidiagonal, std::size_t seq1coordinate) const
-  { return scoreBase(antidiagonal - 6) + seq1coordinate + 1; }
+  { return scoreOrigins[antidiagonal - 6] + seq1coordinate; }
 
  private:
   std::vector<int> xScores;  // best score ending with aligned letters
   std::vector<int> yScores;  // best score ending with insertion in seq1
   std::vector<int> zScores;  // best score ending with insertion in seq2
 
-  std::vector<std::size_t> seq1starts;  // seq1 start pos for each antidiagonal
+  std::vector<std::size_t> scoreOrigins;  // score origin for each antidiagonal
   std::vector<std::size_t> scoreEnds;  // score end pos for each antidiagonal
 
   // Our position during the trace-back:
   std::size_t bestAntidiagonal;
   std::size_t bestSeq1position;
 
-  std::size_t scoreBase(std::size_t antidiagonal) const
-  { return scoreEnds[antidiagonal] - seq1starts[antidiagonal]; }
+  void resizeScoresIfSmaller(std::size_t size) {
+    if (xScores.size() < size) {
+      xScores.resize(size);
+      yScores.resize(size);
+      zScores.resize(size);
+    }
+  }
 
   void init();
 
diff --git a/src/GappedXdropAligner2qual.cc b/src/GappedXdropAligner2qual.cc
index e42c4f5..00c1559 100644
--- a/src/GappedXdropAligner2qual.cc
+++ b/src/GappedXdropAligner2qual.cc
@@ -34,12 +34,12 @@ int GappedXdropAligner::align2qual(const uchar *seq1,
 
   int bestScore = 0;
   int bestEdgeScore = -INF;
-  std::size_t bestEdgeAntidiagonal = 2;
+  std::size_t bestEdgeAntidiagonal = 0;
   std::size_t bestEdgeSeq1position = 0;
 
   init();
 
-  for (std::size_t antidiagonal = 2; /* noop */; ++antidiagonal) {
+  for (std::size_t antidiagonal = 0; /* noop */; ++antidiagonal) {
     std::size_t seq1beg = std::min(maxSeq1begs[0], maxSeq1begs[1]);
     std::size_t seq1end = std::max(minSeq1ends[0], minSeq1ends[1]);
 
@@ -50,7 +50,7 @@ int GappedXdropAligner::align2qual(const uchar *seq1,
 
     initAntidiagonal(seq1beg, scoreEnd, numCells);
 
-    std::size_t seq2pos = antidiagonal - 2 - seq1beg;
+    std::size_t seq2pos = antidiagonal - seq1beg;
 
     const uchar *s1 = isForward ? seq1 + seq1beg : seq1 - seq1beg - 1;
     const uchar *q1 = isForward ? qual1 + seq1beg : qual1 - seq1beg - 1;
diff --git a/src/GappedXdropAligner3frame.cc b/src/GappedXdropAligner3frame.cc
index ab0d70b..87d428a 100644
--- a/src/GappedXdropAligner3frame.cc
+++ b/src/GappedXdropAligner3frame.cc
@@ -59,7 +59,7 @@ namespace cbrc {
 // Puts 7 "dummy" antidiagonals at the start, so that we can safely
 // look-back from subsequent antidiagonals.
 void GappedXdropAligner::init3() {
-  seq1starts.resize(0);
+  scoreOrigins.resize(0);
   scoreEnds.resize(1);
 
   initAntidiagonal3(0, 0, 0);
@@ -82,15 +82,9 @@ void GappedXdropAligner::init3() {
 void GappedXdropAligner::initAntidiagonal3(std::size_t seq1beg,
                                            std::size_t scoreEnd,
                                            std::size_t numCells) {
+  scoreOrigins.push_back(scoreEnd - seq1beg + 1);
   std::size_t newEnd = scoreEnd + numCells + 2;  // + 2 pad cells
-
-  if (xScores.size() < newEnd) {
-    xScores.resize(newEnd);
-    yScores.resize(newEnd);
-    zScores.resize(newEnd);
-  }
-
-  seq1starts.push_back(seq1beg);
+  resizeScoresIfSmaller(newEnd);
   scoreEnds.push_back(newEnd);
 }
 
diff --git a/src/GappedXdropAlignerPssm.cc b/src/GappedXdropAlignerPssm.cc
index e6f2ff4..6d776d3 100644
--- a/src/GappedXdropAlignerPssm.cc
+++ b/src/GappedXdropAlignerPssm.cc
@@ -26,12 +26,12 @@ int GappedXdropAligner::alignPssm(const uchar *seq,
 
   int bestScore = 0;
   int bestEdgeScore = -INF;
-  std::size_t bestEdgeAntidiagonal = 2;
+  std::size_t bestEdgeAntidiagonal = 0;
   std::size_t bestEdgeSeq1position = 0;
 
   init();
 
-  for (std::size_t antidiagonal = 2; /* noop */; ++antidiagonal) {
+  for (std::size_t antidiagonal = 0; /* noop */; ++antidiagonal) {
     std::size_t seq1beg = std::min(maxSeq1begs[0], maxSeq1begs[1]);
     std::size_t seq1end = std::max(minSeq1ends[0], minSeq1ends[1]);
 
@@ -42,7 +42,7 @@ int GappedXdropAligner::alignPssm(const uchar *seq,
 
     initAntidiagonal(seq1beg, scoreEnd, numCells);
 
-    std::size_t seq2pos = antidiagonal - 2 - seq1beg;
+    std::size_t seq2pos = antidiagonal - seq1beg;
 
     const uchar *s1 = isForward ? seq + seq1beg : seq - seq1beg - 1;
     const ScoreMatrixRow *s2 = isForward ? pssm + seq2pos : pssm - seq2pos - 1;
diff --git a/src/LastalArguments.cc b/src/LastalArguments.cc
index 6a568fa..82ecca1 100644
--- a/src/LastalArguments.cc
+++ b/src/LastalArguments.cc
@@ -9,6 +9,7 @@
 #include <vector>
 #include <stdexcept>
 #include <cctype>
+#include <climits>
 #include <cmath>  // log
 #include <cstring>  // strtok
 #include <cstdlib>  // EXIT_SUCCESS
@@ -58,9 +59,9 @@ LastalArguments::LastalArguments() :
   minScoreGapless(-1),  // depends on minScoreGapped and the outputType
   matchScore(-1),  // depends on the alphabet
   mismatchCost(-1),  // depends on the alphabet
-  gapExistCost(-1),  // depends on the alphabet
+  gapExistCost(INT_MIN),  // depends on the alphabet
   gapExtendCost(-1),  // depends on the alphabet
-  insExistCost(-1),  // depends on gapExistCost
+  insExistCost(INT_MIN),  // depends on gapExistCost
   insExtendCost(-1),  // depends on gapExtendCost
   gapPairCost(-1),  // this means: OFF
   frameshiftCost(-1),  // this means: ordinary, non-translated alignment
@@ -199,7 +200,6 @@ LAST home page: http://last.cbrc.jp/\n\
       break;
     case 'a':
       unstringify( gapExistCost, optarg );
-      if( gapExistCost < 0 ) badopt( c, optarg );
       break;
     case 'b':
       unstringify( gapExtendCost, optarg );
@@ -207,7 +207,6 @@ LAST home page: http://last.cbrc.jp/\n\
       break;
     case 'A':
       unstringify( insExistCost, optarg );
-      if( insExistCost < 0 ) badopt( c, optarg );
       break;
     case 'B':
       unstringify( insExtendCost, optarg );
@@ -428,19 +427,19 @@ void LastalArguments::setDefaultsFromAlphabet( bool isDna, bool isProtein,
     // default match & mismatch scores: Blosum62 matrix
     if( matchScore < 0 && mismatchCost >= 0 ) matchScore   = 1;  // idiot-proof
     if( mismatchCost < 0 && matchScore >= 0 ) mismatchCost = 1;  // idiot-proof
-    if( gapExistCost   < 0 ) gapExistCost   =  11;
+    if( gapExistCost   == INT_MIN ) gapExistCost   =  11;
     if( gapExtendCost  < 0 ) gapExtendCost  =   2;
   }
   else if( !isQuality( inputFormat ) ){
     if( matchScore     < 0 ) matchScore     =   1;
     if( mismatchCost   < 0 ) mismatchCost   =   1;
-    if( gapExistCost   < 0 ) gapExistCost   =   7;
+    if( gapExistCost   == INT_MIN ) gapExistCost   =   7;
     if( gapExtendCost  < 0 ) gapExtendCost  =   1;
   }
   else{  // sequence quality scores will be used:
     if( matchScore     < 0 ) matchScore     =   6;
     if( mismatchCost   < 0 ) mismatchCost   =  18;
-    if( gapExistCost   < 0 ) gapExistCost   =  21;
+    if( gapExistCost   == INT_MIN ) gapExistCost   =  21;
     if( gapExtendCost  < 0 ) gapExtendCost  =   9;
     // With this scoring scheme for DNA, gapless lambda ~= ln(10)/10,
     // so these scores should be comparable to PHRED scores.
@@ -455,7 +454,7 @@ void LastalArguments::setDefaultsFromAlphabet( bool isDna, bool isProtein,
     maxEvalue = 1e18 / (numOfStrands() * r * queryLettersPerRandomAlignment);
   }
 
-  if( insExistCost < 0 ) insExistCost = gapExistCost;
+  if( insExistCost == INT_MIN ) insExistCost = gapExistCost;
   if( insExtendCost < 0 ) insExtendCost = gapExtendCost;
 
   if( tantanSetting < 0 ){
diff --git a/src/SubsetSuffixArraySearch.cc b/src/SubsetSuffixArraySearch.cc
index 0629559..2c0efff 100644
--- a/src/SubsetSuffixArraySearch.cc
+++ b/src/SubsetSuffixArraySearch.cc
@@ -39,7 +39,8 @@ void SubsetSuffixArray::match( const indexT*& begPtr, const indexT*& endPtr,
     uchar subset = oldMap[ queryPtr[depth-1] ];
     bucketPtr -= subset * bucketSteps[depth];
     indexT oldBeg = *bucketPtr;
-    indexT oldEnd = *(bucketPtr + bucketSteps[depth-1]);
+    indexT oldEnd =
+      (depth > 1) ? *(bucketPtr + bucketSteps[depth-1]) : suffixArray.size();
     if( oldEnd - oldBeg > maxHits ) break;
     subsetMap = oldMap;
     beg = oldBeg;
diff --git a/src/alp/sls_alp_data.cpp b/src/alp/sls_alp_data.cpp
index c752f07..6d32520 100644
--- a/src/alp/sls_alp_data.cpp
+++ b/src/alp/sls_alp_data.cpp
@@ -27,7 +27,7 @@
 
 File name: sls_alp_data.cpp
 
-Author: Sergey Sheetlin
+Author: Sergey Sheetlin, Martin Frith
 
 Contents: Input data for the ascending ladder points simulation
 
@@ -228,23 +228,7 @@ bool insertions_after_deletions_)//if true, then insertions after deletions are
 
 		if(!d_rand_flag&&rand_<0)
 		{
-			
-			rand_=(long int)time(NULL);
-			#ifndef _MSC_VER //UNIX program
-				struct timeval tv;
-				struct timezone tz;
-				gettimeofday(&tv, &tz);
-				rand_+=tv.tv_usec*10000000;
-			#else
-				struct _timeb timebuffer;
-				char *timeline;
-				_ftime( &timebuffer );
-				timeline = ctime( & ( timebuffer.time ) );
-				rand_+=timebuffer.millitm*10000000;
-			#endif
-
-			rand_=abs(rand_);
-
+			rand_=sls_basic::random_seed_from_time();
 			d_rand_flag=false;
 			
 		};
diff --git a/src/alp/sls_alp_data.hpp b/src/alp/sls_alp_data.hpp
index 24e6150..39a81de 100644
--- a/src/alp/sls_alp_data.hpp
+++ b/src/alp/sls_alp_data.hpp
@@ -53,13 +53,9 @@ Contents: Contains input data
 
 
 #ifndef _MSC_VER //UNIX program
-#include <sys/time.h>
 #else
-#include <sys/timeb.h>
-
 #define _CRTDBG_MAP_ALLOC
 #include <crtdbg.h>
-
 #endif
 
 
diff --git a/src/alp/sls_basic.cpp b/src/alp/sls_basic.cpp
index f5c5a17..7b8ebd2 100644
--- a/src/alp/sls_basic.cpp
+++ b/src/alp/sls_basic.cpp
@@ -27,13 +27,19 @@
 
 File name: sls_basic.cpp
 
-Author: Sergey Sheetlin
+Author: Sergey Sheetlin, Martin Frith
 
 Contents: Some basic functions and types
 
 ******************************************************************************/
 
+// 2016: this voodoo is needed to compile on Cygwin, with g++ options
+// such as -std=c++11 or -std=c++03, else it complains about gettimeofday
+#define _DEFAULT_SOURCE 1
+
 #include "sls_basic.hpp"
+#include <cstdlib>  // std::abs
+#include <ctime>
 
 using namespace Sls;
 
@@ -67,6 +73,24 @@ double &seconds_)
 #endif
 }
 
+long int sls_basic::random_seed_from_time()
+{
+	long int random_factor=(long int)std::time(NULL);
+#ifndef _MSC_VER //UNIX program
+	struct timeval tv;
+	struct timezone tz;
+	gettimeofday(&tv, &tz);
+	random_factor+=tv.tv_usec*10000000;
+#else
+	struct _timeb timebuffer;
+	char *timeline;
+	_ftime( &timebuffer );
+	timeline = ctime( & ( timebuffer.time ) );
+	random_factor+=timebuffer.millitm*10000000;
+#endif
+	return std::abs(random_factor);
+}
+
 double sls_basic::one_minus_exp_function(
 double y_)
 {
diff --git a/src/alp/sls_basic.hpp b/src/alp/sls_basic.hpp
index 1c6e7f1..d289ffd 100644
--- a/src/alp/sls_basic.hpp
+++ b/src/alp/sls_basic.hpp
@@ -30,7 +30,7 @@
 
 File name: sls_basic.hpp
 
-Author: Sergey Sheetlin
+Author: Sergey Sheetlin, Martin Frith
 
 Contents: Some basic functions and types
 
@@ -184,6 +184,8 @@ namespace Sls {
 		static void get_current_time(
 		double &seconds_);
 
+		static long int random_seed_from_time();
+
 		static double one_minus_exp_function(
 		double y_);
 
diff --git a/src/alp/sls_fsa1.cpp b/src/alp/sls_fsa1.cpp
index 8b93f60..eb247e8 100644
--- a/src/alp/sls_fsa1.cpp
+++ b/src/alp/sls_fsa1.cpp
@@ -27,7 +27,7 @@
 
 File name: sls_repwords.cpp
 
-Author: Sergey Sheetlin
+Author: Sergey Sheetlin, Martin Frith
 
 Contents: Frameshift alignment algorithms 
 
@@ -181,22 +181,7 @@ long int seq_number_)//number of tested alignments
 
 		if(random_factor<0)
 		{
-			random_factor=(long int)time(NULL);
-			#ifndef _MSC_VER //UNIX program
-				struct timeval tv;
-				struct timezone tz;
-				gettimeofday(&tv, &tz);
-				random_factor+=tv.tv_usec*10000000;
-			#else
-				struct _timeb timebuffer;
-				char *timeline;
-				_ftime( &timebuffer );
-				timeline = ctime( & ( timebuffer.time ) );
-				rand_+=timebuffer.millitm*10000000;
-			#endif
-
-			random_factor=abs(random_factor);
-
+			random_factor=sls_basic::random_seed_from_time();
 			d_rand_flag=false;
 
 		};
diff --git a/src/alp/sls_fsa1_utils.cpp b/src/alp/sls_fsa1_utils.cpp
index 18aa419..617e50b 100644
--- a/src/alp/sls_fsa1_utils.cpp
+++ b/src/alp/sls_fsa1_utils.cpp
@@ -27,7 +27,7 @@
 
 File name: sls_fsa1_utils.cpp
 
-Author: Sergey Sheetlin
+Author: Sergey Sheetlin, Martin Frith
 
 Contents: Frameshift alignment algorithms 
 
@@ -1881,21 +1881,7 @@ bool *rand_flag_)
 
 	if(random_factor_<0)
 	{
-		random_factor_=(long int)time(NULL);
-		#ifndef _MSC_VER //UNIX program
-			struct timeval tv;
-			struct timezone tz;
-			gettimeofday(&tv, &tz);
-			random_factor_+=tv.tv_usec*10000000;
-		#else
-			struct _timeb timebuffer;
-			char *timeline;
-			_ftime( &timebuffer );
-			timeline = ctime( & ( timebuffer.time ) );
-			random_factor_+=timebuffer.millitm*10000000;
-		#endif
-
-		random_factor_=abs(random_factor_);
+		random_factor_=sls_basic::random_seed_from_time();
 
 		if(rand_flag_)
 		{
diff --git a/src/alp/sls_fsa1_utils.hpp b/src/alp/sls_fsa1_utils.hpp
index e991047..45e6ed8 100644
--- a/src/alp/sls_fsa1_utils.hpp
+++ b/src/alp/sls_fsa1_utils.hpp
@@ -51,12 +51,6 @@ Contents: Frameshift alignment algorithms utilities
 #include <climits>
 #include <errno.h>
 
-#ifndef _MSC_VER //UNIX program
-#include <sys/time.h>
-#else
-#include <sys/timeb.h>
-#endif
-
 #include "njn_random.hpp"
 #include "njn_uniform.hpp"
 
diff --git a/src/lastal.cc b/src/lastal.cc
index 324833b..41732e5 100644
--- a/src/lastal.cc
+++ b/src/lastal.cc
@@ -826,6 +826,7 @@ void scan( LastAligner& aligner,
   SegmentPairPot gaplessAlns;
   alignGapless( aligner, gaplessAlns, queryNum, strand, querySeq );
   if( args.outputType == 1 ) return;  // we just want gapless alignments
+  if( gaplessAlns.size() == 0 ) return;
 
   if( args.maskLowercase == 1 || args.maskLowercase == 2 )
     makeQualityPssm( aligner, queryNum, strand, querySeq, false );
@@ -840,11 +841,13 @@ void scan( LastAligner& aligner,
 
   alignGapped( aligner, gappedAlns, gaplessAlns,
 	       queryNum, strand, querySeq, Phase::final );
+  if( gappedAlns.size() == 0 ) return;
 
   if (args.maskLowercase == 2) {
     makeQualityPssm(aligner, queryNum, strand, querySeq, true);
     eraseWeakAlignments(aligner, gappedAlns, queryNum, strand, querySeq);
     LOG2("lowercase-filtered alignments=" << gappedAlns.size());
+    if (gappedAlns.size() == 0) return;
     if (args.outputType > 3)
       makeQualityPssm(aligner, queryNum, strand, querySeq, false);
   }
diff --git a/src/version.hh b/src/version.hh
index aefb9eb..2db7e02 100644
--- a/src/version.hh
+++ b/src/version.hh
@@ -1 +1 @@
-"737"
+"746"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/last-align.git



More information about the debian-med-commit mailing list