[med-svn] [last-align] 04/06: New upstream version 759

Andreas Tille tille at debian.org
Tue Oct 18 11:46:18 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository last-align.

commit efc61168e70f34c6119d92b9cf49414286aeeee5
Author: Andreas Tille <tille at debian.org>
Date:   Tue Oct 18 13:42:51 2016 +0200

    New upstream version 759
---
 ChangeLog.txt                 | 25 +++++++++++++++-
 doc/lastal.html               |  3 +-
 doc/lastal.txt                |  3 +-
 doc/lastdb.html               |  8 ++---
 doc/lastdb.txt                |  8 ++---
 scripts/last-dotplot          |  2 +-
 src/LastalArguments.cc        |  3 --
 src/gaplessPssmXdrop.cc       | 39 ++++++++++++++++++++++++
 src/gaplessPssmXdrop.hh       |  7 +++++
 src/gaplessTwoQualityXdrop.cc | 46 +++++++++++++++++++++++++++++
 src/gaplessTwoQualityXdrop.hh | 11 +++++++
 src/gaplessXdrop.cc           | 40 +++++++++++++++++++++++++
 src/gaplessXdrop.hh           | 15 ++++++++++
 src/lastal.cc                 | 69 ++++++++++++++++++++++++++++---------------
 src/version.hh                |  2 +-
 15 files changed, 241 insertions(+), 40 deletions(-)

diff --git a/ChangeLog.txt b/ChangeLog.txt
index d49aba7..00416a1 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,3 +1,26 @@
+2016-09-29  Martin C. Frith  <Martin C. Frith>
+
+	* test/last-test.out, test/last-test.sh:
+	Added (incomplete) tests for gapless overlap alignment.
+	[19b58d988059] [tip]
+
+	* src/LastalArguments.cc, src/gaplessPssmXdrop.cc,
+	src/gaplessPssmXdrop.hh, src/gaplessTwoQualityXdrop.cc,
+	src/gaplessTwoQualityXdrop.hh, src/gaplessXdrop.cc,
+	src/gaplessXdrop.hh, src/lastal.cc:
+	Enabled gapless overlap alignment.
+	[8f2bf0e299a1]
+
+	* src/lastal.cc:
+	Refactoring.
+	[64ac7ea995de]
+
+2016-09-16  Martin C. Frith  <Martin C. Frith>
+
+	* doc/lastal.txt, doc/lastdb.txt, scripts/last-dotplot:
+	Fixed last-dotplot axis labels for breaking change in Pillow 3.0.
+	[a0074597cb5d]
+
 2016-09-01  Martin C. Frith  <Martin C. Frith>
 
 	* doc/last-map-probs.txt, doc/last-pair-probs.txt, doc/last-
@@ -5,7 +28,7 @@
 	/last-bisulfite-paired.sh, examples/last-bisulfite.sh,
 	test/bs100.maf, test/maf-convert-test.out:
 	Replaced score thresholds with significance thresholds.
-	[d48e9d4a7268] [tip]
+	[d48e9d4a7268]
 
 	* doc/last-split.txt, src/split/last-split-main.cc, src/split/last-
 	split.cc, test/last-split-test.out:
diff --git a/doc/lastal.html b/doc/lastal.html
index 0dea52a..d58d90a 100644
--- a/doc/lastal.html
+++ b/doc/lastal.html
@@ -567,7 +567,8 @@ letters spanned by the match.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-k <var>STEP</var></span></kbd></td>
 <td>Look for initial matches starting only at every STEP-th position
-in the query.  This makes lastal faster but less sensitive.</td></tr>
+in each query (positions 0, STEP, 2×STEP, etc).  This makes
+lastal faster but less sensitive.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-W <var>SIZE</var></span></kbd></td>
 <td>Look for initial matches starting only at query positions that
diff --git a/doc/lastal.txt b/doc/lastal.txt
index a8f6b32..ce62516 100644
--- a/doc/lastal.txt
+++ b/doc/lastal.txt
@@ -224,7 +224,8 @@ Initial-match options
 
   -k STEP
       Look for initial matches starting only at every STEP-th position
-      in the query.  This makes lastal faster but less sensitive.
+      in each query (positions 0, STEP, 2×STEP, etc).  This makes
+      lastal faster but less sensitive.
 
   -W SIZE
       Look for initial matches starting only at query positions that
diff --git a/doc/lastdb.html b/doc/lastdb.html
index c5dfc0b..7fc9a57 100644
--- a/doc/lastdb.html
+++ b/doc/lastdb.html
@@ -414,10 +414,10 @@ lastal command line.</p>
 <tr><td class="option-group">
 <kbd><span class="option">-w <var>STEP</var></span></kbd></td>
 <td>Allow initial matches to start only at every STEP-th position in
-each of the sequences given to lastdb.  This reduces the memory
-usage of lastdb and lastal, and it makes lastdb faster.  Its
-effect on the speed and sensitivity of lastal is not entirely
-clear.</td></tr>
+each of the sequences given to lastdb (positions 0, STEP,
+2×STEP, etc).  This reduces the memory usage of lastdb and
+lastal, and it makes lastdb faster.  Its effect on the speed and
+sensitivity of lastal is not entirely clear.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-W <var>SIZE</var></span></kbd></td>
 <td><p class="first">Allow initial matches to start only at positions that are
diff --git a/doc/lastdb.txt b/doc/lastdb.txt
index b0485fb..caba336 100644
--- a/doc/lastdb.txt
+++ b/doc/lastdb.txt
@@ -78,10 +78,10 @@ Advanced Options
 
   -w STEP
       Allow initial matches to start only at every STEP-th position in
-      each of the sequences given to lastdb.  This reduces the memory
-      usage of lastdb and lastal, and it makes lastdb faster.  Its
-      effect on the speed and sensitivity of lastal is not entirely
-      clear.
+      each of the sequences given to lastdb (positions 0, STEP,
+      2×STEP, etc).  This reduces the memory usage of lastdb and
+      lastal, and it makes lastdb faster.  Its effect on the speed and
+      sensitivity of lastal is not entirely clear.
 
   -W SIZE
       Allow initial matches to start only at positions that are
diff --git a/scripts/last-dotplot b/scripts/last-dotplot
index f8948e8..52becbf 100755
--- a/scripts/last-dotplot
+++ b/scripts/last-dotplot
@@ -350,7 +350,7 @@ def lastDotplot(opts, args):
                                font, image_mode, opts)
         axis2 = get_axis_image(seq_names2, name_sizes2, seq_starts2, seq_pix2,
                                font, image_mode, opts)
-        axis2 = axis2.rotate(270)
+        axis2 = axis2.rotate(270, expand=1)
         im.paste(axis1, (0, 0))
         im.paste(axis2, (0, 0))
 
diff --git a/src/LastalArguments.cc b/src/LastalArguments.cc
index a4f71f2..f9b2dbf 100644
--- a/src/LastalArguments.cc
+++ b/src/LastalArguments.cc
@@ -354,9 +354,6 @@ LAST home page: http://last.cbrc.jp/\n\
   if( isTranslated() && isQueryStrandMatrix )
     ERR( "can't combine option -F with option -S 1" );
 
-  if( globality == 1 && outputType == 1 )
-    ERR( "can't combine option -T 1 with option -j 1" );
-
   if( isGreedy && outputType > 3 )
     ERR( "can't combine option -M with option -j > 3" );
 
diff --git a/src/gaplessPssmXdrop.cc b/src/gaplessPssmXdrop.cc
index b59ab83..e71dd4b 100644
--- a/src/gaplessPssmXdrop.cc
+++ b/src/gaplessPssmXdrop.cc
@@ -71,6 +71,45 @@ bool isOptimalGaplessPssmXdrop(const uchar *seq,
   return true;
 }
 
+int gaplessPssmXdropOverlap(const uchar *seq,
+			    const ScoreMatrixRow *pssm,
+			    int maxScoreDrop,
+			    size_t &reverseLength,
+			    size_t &forwardLength) {
+  int minScore = 0;
+  int maxScore = 0;
+  int score = 0;
+
+  const uchar *rs = seq;
+  const ScoreMatrixRow *rp = pssm;
+  while (true) {
+    --rs;  --rp;
+    int s = (*rp)[*rs];
+    if (s <= -INF) break;
+    score += s;
+    if (score > maxScore) maxScore = score;
+    else if (score < maxScore - maxScoreDrop) return -INF;
+    else if (score < minScore) minScore = score;
+  }
+
+  maxScore = score - minScore;
+
+  const uchar *fs = seq;
+  const ScoreMatrixRow *fp = pssm;
+  while (true) {
+    int s = (*fp)[*fs];
+    if (s <= -INF) break;
+    score += s;
+    if (score > maxScore) maxScore = score;
+    else if (score < maxScore - maxScoreDrop) return -INF;
+    ++fs;  ++fp;
+  }
+
+  reverseLength = seq - (rs + 1);
+  forwardLength = fs - seq;
+  return score;
+}
+
 int gaplessPssmAlignmentScore(const uchar *seq,
                               const uchar *seqEnd,
                               const ScoreMatrixRow *pssm) {
diff --git a/src/gaplessPssmXdrop.hh b/src/gaplessPssmXdrop.hh
index ebbcea2..53fd6e3 100644
--- a/src/gaplessPssmXdrop.hh
+++ b/src/gaplessPssmXdrop.hh
@@ -10,6 +10,7 @@
 #define GAPLESS_PSSM_XDROP_HH
 
 #include "ScoreMatrixRow.hh"
+#include <stddef.h>
 
 namespace cbrc {
 
@@ -36,6 +37,12 @@ bool isOptimalGaplessPssmXdrop(const uchar *seq,
                                const ScoreMatrixRow *pssm,
                                int maxScoreDrop);
 
+int gaplessPssmXdropOverlap(const uchar *seq,
+			    const ScoreMatrixRow *pssm,
+			    int maxScoreDrop,
+			    size_t &reverseLength,
+			    size_t &forwardLength);
+
 int gaplessPssmAlignmentScore(const uchar *seq,
                               const uchar *seqEnd,
                               const ScoreMatrixRow *pssm);
diff --git a/src/gaplessTwoQualityXdrop.cc b/src/gaplessTwoQualityXdrop.cc
index c089dd3..4989d41 100644
--- a/src/gaplessTwoQualityXdrop.cc
+++ b/src/gaplessTwoQualityXdrop.cc
@@ -87,6 +87,52 @@ bool isOptimalGaplessTwoQualityXdrop(const uchar *seq1,
   return true;
 }
 
+int gaplessTwoQualityXdropOverlap(const uchar *seq1,
+				  const uchar *qual1,
+				  const uchar *seq2,
+				  const uchar *qual2,
+				  const TwoQualityScoreMatrix &m,
+				  int maxScoreDrop,
+				  size_t &reverseLength,
+				  size_t &forwardLength) {
+  int minScore = 0;
+  int maxScore = 0;
+  int score = 0;
+
+  const uchar *rs1 = seq1;
+  const uchar *rq1 = qual1;
+  const uchar *rs2 = seq2;
+  const uchar *rq2 = qual2;
+  while (true) {
+    --rs1;  --rq1;  --rs2;  --rq2;
+    int s = m(*rs1, *rs2, *rq1, *rq2);
+    if (s <= -INF) break;
+    score += s;
+    if (score > maxScore) maxScore = score;
+    else if (score < maxScore - maxScoreDrop) return -INF;
+    else if (score < minScore) minScore = score;
+  }
+
+  maxScore = score - minScore;
+
+  const uchar *fs1 = seq1;
+  const uchar *fq1 = qual1;
+  const uchar *fs2 = seq2;
+  const uchar *fq2 = qual2;
+  while (true) {
+    int s = m(*fs1, *fs2, *fq1, *fq2);
+    if (s <= -INF) break;
+    score += s;
+    if (score > maxScore) maxScore = score;
+    else if (score < maxScore - maxScoreDrop) return -INF;
+    ++fs1;  ++fq1;  ++fs2;  ++fq2;
+  }
+
+  reverseLength = seq1 - (rs1 + 1);
+  forwardLength = fs1 - seq1;
+  return score;
+}
+
 int gaplessTwoQualityAlignmentScore(const uchar *seq1,
                                     const uchar *seq1end,
                                     const uchar *qual1,
diff --git a/src/gaplessTwoQualityXdrop.hh b/src/gaplessTwoQualityXdrop.hh
index a3e4606..afd5274 100644
--- a/src/gaplessTwoQualityXdrop.hh
+++ b/src/gaplessTwoQualityXdrop.hh
@@ -9,6 +9,8 @@
 #ifndef GAPLESS_TWO_QUALITY_XDROP_HH
 #define GAPLESS_TWO_QUALITY_XDROP_HH
 
+#include <stddef.h>
+
 namespace cbrc {
 
 typedef unsigned char uchar;
@@ -51,6 +53,15 @@ bool isOptimalGaplessTwoQualityXdrop(const uchar *seq1,
                                      const TwoQualityScoreMatrix &m,
                                      int maxScoreDrop);
 
+int gaplessTwoQualityXdropOverlap(const uchar *seq1,
+				  const uchar *qual1,
+				  const uchar *seq2,
+				  const uchar *qual2,
+				  const TwoQualityScoreMatrix &m,
+				  int maxScoreDrop,
+				  size_t &reverseLength,
+				  size_t &forwardLength);
+
 int gaplessTwoQualityAlignmentScore(const uchar *seq1,
                                     const uchar *seq1end,
                                     const uchar *qual1,
diff --git a/src/gaplessXdrop.cc b/src/gaplessXdrop.cc
index 0129a7d..09bdf17 100644
--- a/src/gaplessXdrop.cc
+++ b/src/gaplessXdrop.cc
@@ -76,6 +76,46 @@ bool isOptimalGaplessXdrop(const uchar *seq1,
   return true;
 }
 
+int gaplessXdropOverlap(const uchar *seq1,
+			const uchar *seq2,
+			const ScoreMatrixRow *scorer,
+			int maxScoreDrop,
+			size_t &reverseLength,
+			size_t &forwardLength) {
+  int minScore = 0;
+  int maxScore = 0;
+  int score = 0;
+
+  const uchar *r1 = seq1;
+  const uchar *r2 = seq2;
+  while (true) {
+    --r1;  --r2;
+    int s = scorer[*r1][*r2];
+    if (s <= -INF) break;
+    score += s;
+    if (score > maxScore) maxScore = score;
+    else if (score < maxScore - maxScoreDrop) return -INF;
+    else if (score < minScore) minScore = score;
+  }
+
+  maxScore = score - minScore;
+
+  const uchar *f1 = seq1;
+  const uchar *f2 = seq2;
+  while (true) {
+    int s = scorer[*f1][*f2];
+    if (s <= -INF) break;
+    score += s;
+    if (score > maxScore) maxScore = score;
+    else if (score < maxScore - maxScoreDrop) return -INF;
+    ++f1;  ++f2;
+  }
+
+  reverseLength = seq1 - (r1 + 1);
+  forwardLength = f1 - seq1;
+  return score;
+}
+
 int gaplessAlignmentScore(const uchar *seq1,
                           const uchar *seq1end,
                           const uchar *seq2,
diff --git a/src/gaplessXdrop.hh b/src/gaplessXdrop.hh
index e213d3b..7ef7df5 100644
--- a/src/gaplessXdrop.hh
+++ b/src/gaplessXdrop.hh
@@ -6,6 +6,7 @@
 #define GAPLESS_XDROP_HH
 
 #include "ScoreMatrixRow.hh"
+#include <stddef.h>
 
 namespace cbrc {
 
@@ -54,6 +55,20 @@ bool isOptimalGaplessXdrop(const uchar *seq1,
                            const ScoreMatrixRow *scorer,
                            int maxScoreDrop);
 
+// Returns the score, and sets the reverse and forward extension
+// lengths, for a gapless "overlap" alignment starting at (seq1,
+// seq2).  "Overlap" means that the alignment must extend, in each
+// direction, until it hits a score <= -INF (presumably from a
+// sentinel indicating a sequence end).  If the alignment would have
+// any region with score < -maxScoreDrop, -INF is returned and the
+// extension lengths are not set.
+int gaplessXdropOverlap(const uchar *seq1,
+			const uchar *seq2,
+			const ScoreMatrixRow *scorer,
+			int maxScoreDrop,
+			size_t &reverseLength,
+			size_t &forwardLength);
+
 // Calculate the score of the gapless alignment starting at (seq1,
 // seq2) and ending at seq1end.
 int gaplessAlignmentScore(const uchar *seq1,
diff --git a/src/lastal.cc b/src/lastal.cc
index 80f3886..3219c3e 100644
--- a/src/lastal.cc
+++ b/src/lastal.cc
@@ -464,6 +464,12 @@ struct Dispatcher{
          (e == Phase::gapped ) ? args.maxDropGapped : args.maxDropFinal ),
       z( t ? 2 : p ? 1 : 0 ){}
 
+  int gaplessOverlap( indexT x, indexT y, size_t &rev, size_t &fwd ) const{
+    if( z==0 ) return gaplessXdropOverlap( a+x, b+y, m, d, rev, fwd );
+    if( z==1 ) return gaplessPssmXdropOverlap( a+x, p+y, d, rev, fwd );
+    return gaplessTwoQualityXdropOverlap( a+x, i+x, b+y, j+y, t, d, rev, fwd );
+  }
+
   int forwardGaplessScore( indexT x, indexT y ) const{
     if( z==0 ) return forwardGaplessXdropScore( a+x, b+y, m, d );
     if( z==1 ) return forwardGaplessPssmXdropScore( a+x, p+y, d );
@@ -523,9 +529,18 @@ static void writeAlignment(LastAligner &aligner, const Alignment &aln,
     printAndDelete(a.text);
 }
 
+static void writeSegmentPair(LastAligner &aligner, const SegmentPair &s,
+			     size_t queryNum, char strand,
+			     const uchar* querySeq) {
+  Alignment a;
+  a.fromSegmentPair(s);
+  writeAlignment(aligner, a, queryNum, strand, querySeq);
+}
+
 // Find query matches to the suffix array, and do gapless extensions
 void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns,
 		   size_t queryNum, char strand, const uchar* querySeq ){
+  const bool isOverlap = (args.globality && args.outputType == 1);
   Dispatcher dis( Phase::gapless, aligner, queryNum, strand, querySeq );
   DiagonalTable dt;  // record already-covered positions on each diagonal
   countT matchCount = 0, gaplessExtensionCount = 0, gaplessAlignmentCount = 0;
@@ -563,37 +578,43 @@ void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns,
 	    args.maxGaplessAlignmentsPerQueryPosition ) break;
 
 	indexT j = *beg;  // coordinate in the reference sequence
-
 	if( dt.isCovered( i, j ) ) continue;
-
-	int fs = dis.forwardGaplessScore( j, i );
-	int rs = dis.reverseGaplessScore( j, i );
-	int score = fs + rs;
 	++gaplessExtensionCount;
 
-	// Tried checking the score after isOptimal & addEndpoint, but
-	// the number of extensions decreased by < 10%, and it was
-	// slower overall.
-	if( score < minScoreGapless ) continue;
-
-	indexT tEnd = dis.forwardGaplessEnd( j, i, fs );
-	indexT tBeg = dis.reverseGaplessEnd( j, i, rs );
-	indexT qBeg = i - (j - tBeg);
-	if( !dis.isOptimalGapless( tBeg, tEnd, qBeg ) ) continue;
-	SegmentPair sp( tBeg, qBeg, tEnd - tBeg, score );
-
-	if( args.outputType == 1 ){  // we just want gapless alignments
-	  Alignment aln;
-	  aln.fromSegmentPair(sp);
-	  writeAlignment( aligner, aln, queryNum, strand, querySeq );
-	}
-	else{
-	  gaplessAlns.add(sp);  // add the gapless alignment to the pot
+	if( isOverlap ){
+	  size_t revLen, fwdLen;
+	  int score = dis.gaplessOverlap( j, i, revLen, fwdLen );
+	  if( score < minScoreGapless ) continue;
+	  SegmentPair sp( j - revLen, i - revLen, revLen + fwdLen, score );
+	  dt.addEndpoint( sp.end2(), sp.end1() );
+	  writeSegmentPair( aligner, sp, queryNum, strand, querySeq );
+	}else{
+	  int fs = dis.forwardGaplessScore( j, i );
+	  int rs = dis.reverseGaplessScore( j, i );
+	  int score = fs + rs;
+
+	  // Tried checking the score after isOptimal & addEndpoint,
+	  // but the number of extensions decreased by < 10%, and it
+	  // was slower overall.
+	  if( score < minScoreGapless ) continue;
+
+	  indexT tEnd = dis.forwardGaplessEnd( j, i, fs );
+	  indexT tBeg = dis.reverseGaplessEnd( j, i, rs );
+	  indexT qBeg = i - (j - tBeg);
+	  if( !dis.isOptimalGapless( tBeg, tEnd, qBeg ) ) continue;
+	  SegmentPair sp( tBeg, qBeg, tEnd - tBeg, score );
+	  dt.addEndpoint( sp.end2(), sp.end1() );
+
+	  if( args.outputType == 1 ){  // we just want gapless alignments
+	    writeSegmentPair( aligner, sp, queryNum, strand, querySeq );
+	  }
+	  else{
+	    gaplessAlns.add(sp);  // add the gapless alignment to the pot
+	  }
 	}
 
 	++gaplessAlignmentsPerQueryPosition;
 	++gaplessAlignmentCount;
-	dt.addEndpoint( sp.end2(), sp.end1() );
       }
     }
   }
diff --git a/src/version.hh b/src/version.hh
index 7f9046d..406a230 100644
--- a/src/version.hh
+++ b/src/version.hh
@@ -1 +1 @@
-"755"
+"759"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/last-align.git



More information about the debian-med-commit mailing list