[med-svn] [last-align] 01/08: New upstream version 921

Andreas Tille tille at debian.org
Mon Feb 5 09:44:33 UTC 2018


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository last-align.

commit 86ec81874c63279f774ca37a17478fe534085b7c
Author: Andreas Tille <tille at debian.org>
Date:   Mon Feb 5 10:29:16 2018 +0100

    New upstream version 921
---
 ChangeLog.txt                       | 177 +++++++-
 doc/last-dotplot.html               |  99 +++--
 doc/last-dotplot.txt                |  82 +++-
 doc/last-train.html                 |   3 +
 doc/last-train.txt                  |   2 +
 scripts/last-dotplot                | 829 ++++++++++++++++++++++++------------
 scripts/last-train                  |   3 +
 src/Alignment.cc                    |  36 +-
 src/Alignment.hh                    |  13 +-
 src/AlignmentWrite.cc               |  70 +--
 src/Centroid.cc                     | 290 ++++++-------
 src/Centroid.hh                     |  73 ++--
 src/CyclicSubsetSeed.cc             |   9 +-
 src/LastEvaluer.cc                  |  10 +-
 src/LastEvaluer.hh                  |   3 +-
 src/LastdbArguments.cc              |  13 +-
 src/LastdbArguments.hh              |   1 +
 src/MultiSequence.cc                |  92 +++-
 src/MultiSequence.hh                |  90 ++--
 src/MultiSequenceQual.cc            |  56 +--
 src/ScoreMatrix.cc                  |   7 +-
 src/alp/sls_pvalues.cpp             |   4 +-
 src/io.cc                           |  23 -
 src/io.hh                           |  61 +--
 src/last-pair-probs.cc              |  15 +-
 src/last.hh                         |  55 +++
 src/lastal.cc                       |  76 +---
 src/lastdb.cc                       |  69 ++-
 src/makefile                        |  45 +-
 src/mcf_zstream.hh                  |   4 +-
 src/split/cbrc_split_aligner.cc     |  10 +-
 src/split/cbrc_unsplit_alignment.cc |   3 +-
 src/split/cbrc_unsplit_alignment.hh |   2 +
 src/split/last-split.cc             |   2 +-
 src/version.hh                      |   2 +-
 src/zio.hh                          |  37 ++
 36 files changed, 1462 insertions(+), 904 deletions(-)

diff --git a/ChangeLog.txt b/ChangeLog.txt
index 7c46466..eb4f5b2 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,8 +1,183 @@
+2018-01-05  Martin C. Frith  <Martin C. Frith>
+
+	* src/Alignment.cc, src/Alignment.hh, src/Centroid.cc,
+	src/Centroid.hh:
+	Refactor
+	[8430dbf5abe7] [tip]
+
+	* src/Centroid.cc:
+	Try to calculate lastal probs a tiny bit faster
+	[1abf5130f283]
+
+	* src/Alignment.cc, src/Alignment.hh, src/AlignmentWrite.cc,
+	src/Centroid.cc, src/Centroid.hh:
+	Refactor
+	[57012f1307c8]
+
+	* src/Alignment.cc, src/Centroid.cc, src/Centroid.hh:
+	Refactor
+	[c98504d1610e]
+
+	* src/Centroid.cc, src/Centroid.hh:
+	Refactor
+	[c83131b68177]
+
+2017-12-21  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	last-dotplot: add strand/orientation options
+	[ffe68ba3d865]
+
+	* scripts/last-dotplot:
+	Refactor last-dotplot
+	[6079fabb49a5]
+
+2017-12-05  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	last-dotplot: add alignment-order options
+	[f4ca19d126e3]
+
+	* scripts/last-dotplot:
+	Refactor last-dotplot
+	[e87abc7ae9c6]
+
+2017-11-08  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-dotplot:
+	Tweak last-dotplot range-end pixels
+	[8834139fa8a8]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Add last-dotplot secondary alignments option
+	[4080f3f6f73d]
+
+	* scripts/last-dotplot:
+	Add gap-cutting options to last-dotplot
+	[42653029d900]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Remove last-dotplot trim options
+	[92dfad507286]
+
+	* scripts/last-dotplot:
+	Refactor last-dotplot
+	[6840398323e1]
+
+	* scripts/last-dotplot:
+	Tweak last-dotplot's sort options
+	[87991b57ed04]
+
+	* scripts/last-dotplot:
+	Refactor last-dotplot
+	[21fbf377641f]
+
+	* scripts/last-dotplot:
+	Refactor last-dotplot
+	[8d52cef22afa]
+
+	* scripts/last-dotplot:
+	Tweak dotplot labels
+	[c2f50cd4e29d]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Make last-dotplot try to find a nice font
+	[3af6b02e3f9c]
+
+	* src/CyclicSubsetSeed.cc, src/ScoreMatrix.cc, src/io.cc, src/io.hh,
+	src/last-pair-probs.cc, src/lastal.cc, src/lastdb.cc, src/makefile,
+	src/zio.hh:
+	Try to fix build fails on some computers
+	[db84a7e4ff8a]
+
+2017-11-02  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-dotplot:
+	Tweak last-dotplot's label placement
+	[2003b209d750]
+
+	* scripts/last-dotplot:
+	Refactor last-dotplot
+	[5ce3e786e05a]
+
+	* scripts/last-dotplot:
+	Refactor last-dotplot
+	[ccf8902bf86c]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Change last-dotplot's label options
+	[f6a9c15287ea]
+
+	* scripts/last-dotplot:
+	Refactor last-dotplot
+	[f7bc7d099e2b]
+
+	* scripts/last-dotplot:
+	Refactor last-dotplot
+	[2238466f36bd]
+
+	* scripts/last-dotplot:
+	Tweak the dotplot colors
+	[8eb06508b3ed]
+
+2017-10-24  Martin C. Frith  <Martin C. Frith>
+
+	* src/CyclicSubsetSeed.cc, src/MultiSequence.cc, src/MultiSequence.hh,
+	src/MultiSequenceQual.cc, src/ScoreMatrix.cc:
+	Make the code a bit more robust
+	[efb59c80bd48]
+
+	* src/split/cbrc_split_aligner.cc,
+	src/split/cbrc_unsplit_alignment.cc,
+	src/split/cbrc_unsplit_alignment.hh, src/split/last-split.cc:
+	Make last-split work with lastdb strand option
+	[e6f6334cedb8]
+
+	* doc/last-train.txt, scripts/last-train, src/LastEvaluer.cc,
+	src/LastEvaluer.hh, src/alp/sls_pvalues.cpp, src/lastal.cc:
+	Fix some E-value & last-train fails
+	[c208904d9c3e]
+
+2017-10-20  Martin C. Frith  <Martin C. Frith>
+
+	* src/AlignmentWrite.cc, src/LastdbArguments.cc,
+	src/LastdbArguments.hh, src/MultiSequence.cc, src/MultiSequence.hh,
+	src/lastdb.cc:
+	Add lastdb strand option
+	[bef94bc275d5]
+
+	* src/MultiSequence.hh, src/last.hh, src/lastal.cc, src/lastdb.cc,
+	src/makefile, src/mcf_zstream.hh:
+	Refactor
+	[a4cdd7fbaee2]
+
+2017-10-19  Martin C. Frith  <Martin C. Frith>
+
+	* src/Alignment.hh, src/AlignmentWrite.cc, src/MultiSequence.cc,
+	src/MultiSequence.hh, src/lastal.cc:
+	Start implementing reverse strands in lastdb
+	[5187bd412fb6]
+
+	* src/MultiSequence.cc, src/MultiSequence.hh,
+	src/MultiSequenceQual.cc:
+	Refactor
+	[3ba00f2e93e0]
+
+	* src/MultiSequence.cc, src/MultiSequence.hh,
+	src/MultiSequenceQual.cc, src/lastdb.cc:
+	Refactor
+	[69da6e43d607]
+
+	* src/LastdbArguments.cc, src/MultiSequence.cc, src/MultiSequence.hh,
+	src/lastal.cc:
+	Refactor
+	[284c69b1bf3e]
+
 2017-10-10  Martin C. Frith  <Martin C. Frith>
 
 	* scripts/last-train:
 	Make last-train print the lastal version
-	[c2ce08b5a76a] [tip]
+	[c2ce08b5a76a]
 
 	* src/Alphabet.cc, src/Alphabet.hh, src/MultiSequence.hh,
 	src/lastal.cc, src/lastdb.cc:
diff --git a/doc/last-dotplot.html b/doc/last-dotplot.html
index 21cc1d9..8ebcced 100644
--- a/doc/last-dotplot.html
+++ b/doc/last-dotplot.html
@@ -329,16 +329,12 @@ last-dotplot my-alignments my-plot.png
 <pre class="literal-block">
 last-dotplot alns alns.gif
 </pre>
-<p>To get a nicer font, try something like:</p>
-<pre class="literal-block">
-last-dotplot -f /usr/share/fonts/liberation/LiberationSans-Regular.ttf alns alns.png
-</pre>
-<p>or:</p>
-<pre class="literal-block">
-last-dotplot -f /Library/Fonts/Arial.ttf alns alns.png
-</pre>
-<p>If the fonts are located somewhere different on your computer, change
-this as appropriate.</p>
+<div class="section" id="terminology">
+<h2>Terminology</h2>
+<p>last-dotplot shows alignments of one set of sequences against another
+set of sequences.  This document calls a "set of sequences" a
+"genome", though it need not actually be a genome.</p>
+</div>
 <div class="section" id="choosing-sequences">
 <h2>Choosing sequences</h2>
 <p>If there are too many sequences, the dotplot will be very cluttered,
@@ -387,6 +383,14 @@ last-dotplot -1 'chr?' -1 'chr??' alns alns.png
 last-dotplot -1 chr9:0-1000 alns alns.png
 </pre>
 </div>
+<div class="section" id="fonts">
+<h2>Fonts</h2>
+<p>last-dotplot tries to find a nice font on your computer, but may fail
+and use an ugly font.  You can specify a font like this:</p>
+<pre class="literal-block">
+last-dotplot -f /usr/share/fonts/liberation/LiberationSans-Regular.ttf alns alns.png
+</pre>
+</div>
 <div class="section" id="options">
 <h2>Options</h2>
 <blockquote>
@@ -419,25 +423,64 @@ last-dotplot -1 chr9:0-1000 alns alns.png
 <kbd><span class="option">-r <var>COLOR</var></span>, <span class="option">--reversecolor=<var>COLOR</var></span></kbd></td>
 <td>Color for reverse alignments.</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">--alignments=<var>FILE</var></span></kbd></td>
+<td>Read secondary alignments.  For example: we could use primary
+alignment data with one human DNA read aligned to the human
+genome, and secondary alignment data with the whole chimpanzee
+versus human genomes.  last-dotplot will show the parts of the
+secondary alignments that are near the primary alignments.</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">--sort1=<var>N</var></span></kbd></td>
 <td>Put the 1st genome's sequences left-to-right in order of: their
-appearance in the input (0), their names (1), their lengths (2).</td></tr>
+appearance in the input (0), their names (1), their lengths (2),
+the top-to-bottom order of (the midpoints of) their alignments
+(3).  You can use two colon-separated values, e.g. "2:1" to
+specify 2 for primary and 1 for secondary alignments.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">--sort2=<var>N</var></span></kbd></td>
 <td>Put the 2nd genome's sequences top-to-bottom in order of: their
-appearance in the input (0), their names (1), their lengths (2).</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">--trim1</span></kbd></td>
-<td>Trim unaligned sequence flanks from the 1st (horizontal) genome.</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">--trim2</span></kbd></td>
-<td>Trim unaligned sequence flanks from the 2nd (vertical) genome.</td></tr>
+appearance in the input (0), their names (1), their lengths (2),
+the left-to-right order of (the midpoints of) their alignments
+(3).</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--strands1=<var>N</var></span></kbd></td>
+<td>Put the 1st genome's sequences: in forwards orientation (0), in
+the orientation of most of their aligned bases (1).  In the
+latter case, the labels will be colored (in the same way as the
+alignments) to indicate the sequences' orientations.  You can
+use two colon-separated values for primary and secondary
+alignments.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--strands2=<var>N</var></span></kbd></td>
+<td>Put the 2nd genome's sequences: in forwards orientation (0), in
+the orientation of most of their aligned bases (1).</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--max-gap1=<var>FRAC</var></span></kbd></td>
+<td>Maximum unaligned gap in the 1st genome.  For example, if two
+parts of one DNA read align to widely-separated parts of one
+chromosome, it's probably best to cut the intervening region
+from the dotplot.  FRAC is a fraction of the length of the
+(primary) alignments.  You can specify "inf" to keep all
+unaligned gaps.  You can use two comma-separated values,
+e.g. "0.5,3" to specify 0.5 for end-gaps (unaligned sequence
+ends) and 3 for mid-gaps (between alignments).  You can use two
+colon-separated values (each of which may be comma-separated)
+for primary and secondary alignments.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--max-gap2=<var>FRAC</var></span></kbd></td>
+<td>Maximum unaligned gap in the 2nd genome.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--pad=<var>FRAC</var></span></kbd></td>
+<td>Length of pad to leave when cutting unaligned gaps.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">--border-pixels=<var>INT</var></span></kbd></td>
 <td>Number of pixels between sequences.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">--border-color=<var>COLOR</var></span></kbd></td>
 <td>Color for pixels between sequences.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--margin-color=<var>COLOR</var></span></kbd></td>
+<td>Color for the margins.</td></tr>
 </tbody>
 </table>
 </blockquote>
@@ -455,17 +498,21 @@ appearance in the input (0), their names (1), their lengths (2).</td></tr>
 <kbd><span class="option">-s <var>SIZE</var></span>, <span class="option">--fontsize=<var>SIZE</var></span></kbd></td>
 <td>TrueType or OpenType font size.</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">--labels1=<var>N</var></span></kbd></td>
+<td>Label the displayed regions of the 1st genome with their:
+sequence name (0), name:length (1), name:start:length (2),
+name:start-end (3).</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--labels2=<var>N</var></span></kbd></td>
+<td>Label the displayed regions of the 2nd genome with their:
+sequence name (0), name:length (1), name:start:length (2),
+name:start-end (3).</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">--rot1=<var>ROT</var></span></kbd></td>
 <td>Text rotation for the 1st genome: h(orizontal) or v(ertical).</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">--rot2=<var>ROT</var></span></kbd></td>
 <td>Text rotation for the 2nd genome: h(orizontal) or v(ertical).</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">--lengths1</span></kbd></td>
-<td>Show sequence lengths for the 1st (horizontal) genome.</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">--lengths2</span></kbd></td>
-<td>Show sequence lengths for the 2nd (vertical) genome.</td></tr>
 </tbody>
 </table>
 </blockquote>
@@ -554,11 +601,11 @@ of unknown sequence.</p>
 <p>An unsequenced gap will be shown only if it covers at least one whole
 pixel.</p>
 </div>
+</div>
 <div class="section" id="colors">
-<h3>Colors</h3>
+<h2>Colors</h2>
 <p>Colors can be specified in <a class="reference external" href="http://effbot.org/imagingbook/imagecolor.htm">various ways described here</a>.</p>
 </div>
 </div>
-</div>
 </body>
 </html>
diff --git a/doc/last-dotplot.txt b/doc/last-dotplot.txt
index 2c1ff46..a908a88 100644
--- a/doc/last-dotplot.txt
+++ b/doc/last-dotplot.txt
@@ -12,16 +12,12 @@ The output can be in any format supported by the Imaging Library::
 
   last-dotplot alns alns.gif
 
-To get a nicer font, try something like::
+Terminology
+-----------
 
-  last-dotplot -f /usr/share/fonts/liberation/LiberationSans-Regular.ttf alns alns.png
-
-or::
-
-  last-dotplot -f /Library/Fonts/Arial.ttf alns alns.png
-
-If the fonts are located somewhere different on your computer, change
-this as appropriate.
+last-dotplot shows alignments of one set of sequences against another
+set of sequences.  This document calls a "set of sequences" a
+"genome", though it need not actually be a genome.
 
 Choosing sequences
 ------------------
@@ -59,6 +55,14 @@ You can also specify a sequence range; for example this gets the first
 
   last-dotplot -1 chr9:0-1000 alns alns.png
 
+Fonts
+-----
+
+last-dotplot tries to find a nice font on your computer, but may fail
+and use an ugly font.  You can specify a font like this::
+
+  last-dotplot -f /usr/share/fonts/liberation/LiberationSans-Regular.ttf alns alns.png
+
 Options
 -------
 
@@ -78,20 +82,54 @@ Options
       Color for forward alignments.
   -r COLOR, --reversecolor=COLOR
       Color for reverse alignments.
+  --alignments=FILE
+      Read secondary alignments.  For example: we could use primary
+      alignment data with one human DNA read aligned to the human
+      genome, and secondary alignment data with the whole chimpanzee
+      versus human genomes.  last-dotplot will show the parts of the
+      secondary alignments that are near the primary alignments.
   --sort1=N
       Put the 1st genome's sequences left-to-right in order of: their
-      appearance in the input (0), their names (1), their lengths (2).
+      appearance in the input (0), their names (1), their lengths (2),
+      the top-to-bottom order of (the midpoints of) their alignments
+      (3).  You can use two colon-separated values, e.g. "2:1" to
+      specify 2 for primary and 1 for secondary alignments.
   --sort2=N
       Put the 2nd genome's sequences top-to-bottom in order of: their
-      appearance in the input (0), their names (1), their lengths (2).
-  --trim1
-      Trim unaligned sequence flanks from the 1st (horizontal) genome.
-  --trim2
-      Trim unaligned sequence flanks from the 2nd (vertical) genome.
+      appearance in the input (0), their names (1), their lengths (2),
+      the left-to-right order of (the midpoints of) their alignments
+      (3).
+  --strands1=N
+      Put the 1st genome's sequences: in forwards orientation (0), in
+      the orientation of most of their aligned bases (1).  In the
+      latter case, the labels will be colored (in the same way as the
+      alignments) to indicate the sequences' orientations.  You can
+      use two colon-separated values for primary and secondary
+      alignments.
+  --strands2=N
+      Put the 2nd genome's sequences: in forwards orientation (0), in
+      the orientation of most of their aligned bases (1).
+  --max-gap1=FRAC
+      Maximum unaligned gap in the 1st genome.  For example, if two
+      parts of one DNA read align to widely-separated parts of one
+      chromosome, it's probably best to cut the intervening region
+      from the dotplot.  FRAC is a fraction of the length of the
+      (primary) alignments.  You can specify "inf" to keep all
+      unaligned gaps.  You can use two comma-separated values,
+      e.g. "0.5,3" to specify 0.5 for end-gaps (unaligned sequence
+      ends) and 3 for mid-gaps (between alignments).  You can use two
+      colon-separated values (each of which may be comma-separated)
+      for primary and secondary alignments.
+  --max-gap2=FRAC
+      Maximum unaligned gap in the 2nd genome.
+  --pad=FRAC
+      Length of pad to leave when cutting unaligned gaps.
   --border-pixels=INT
       Number of pixels between sequences.
   --border-color=COLOR
       Color for pixels between sequences.
+  --margin-color=COLOR
+      Color for the margins.
 
 Text options
 ~~~~~~~~~~~~
@@ -100,14 +138,18 @@ Text options
       TrueType or OpenType font file.
   -s SIZE, --fontsize=SIZE
       TrueType or OpenType font size.
+  --labels1=N
+      Label the displayed regions of the 1st genome with their:
+      sequence name (0), name:length (1), name:start:length (2),
+      name:start-end (3).
+  --labels2=N
+      Label the displayed regions of the 2nd genome with their:
+      sequence name (0), name:length (1), name:start:length (2),
+      name:start-end (3).
   --rot1=ROT
       Text rotation for the 1st genome: h(orizontal) or v(ertical).
   --rot2=ROT
       Text rotation for the 2nd genome: h(orizontal) or v(ertical).
-  --lengths1
-      Show sequence lengths for the 1st (horizontal) genome.
-  --lengths2
-      Show sequence lengths for the 2nd (vertical) genome.
 
 Annotation options
 ~~~~~~~~~~~~~~~~~~
@@ -166,7 +208,7 @@ An unsequenced gap will be shown only if it covers at least one whole
 pixel.
 
 Colors
-~~~~~~
+------
 
 Colors can be specified in `various ways described here
 <http://effbot.org/imagingbook/imagecolor.htm>`_.
diff --git a/doc/last-train.html b/doc/last-train.html
index f510a51..ee0fee2 100644
--- a/doc/last-train.html
+++ b/doc/last-train.html
@@ -346,6 +346,9 @@ zcat queries.fasta.gz | last-train mydb
 <tr><td class="option-group">
 <kbd><span class="option">-h</span>, <span class="option">--help</span></kbd></td>
 <td>Show a help message, with default option values, and exit.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">-v</span>, <span class="option">--verbose</span></kbd></td>
+<td>Show more details of intermediate steps.</td></tr>
 </tbody>
 </table>
 </blockquote>
diff --git a/doc/last-train.txt b/doc/last-train.txt
index c965ff3..89ae977 100644
--- a/doc/last-train.txt
+++ b/doc/last-train.txt
@@ -27,6 +27,8 @@ Options
 
   -h, --help
          Show a help message, with default option values, and exit.
+  -v, --verbose
+         Show more details of intermediate steps.
 
 Training options
 ~~~~~~~~~~~~~~~~
diff --git a/scripts/last-dotplot b/scripts/last-dotplot
index f29fed2..419ba82 100755
--- a/scripts/last-dotplot
+++ b/scripts/last-dotplot
@@ -9,14 +9,21 @@
 # according to the number of aligned nt-pairs within it, but the
 # result is too faint.  How can this be done better?
 
+import collections
+import functools
 import gzip
-import fnmatch, itertools, optparse, os, re, sys
+from fnmatch import fnmatchcase
+from operator import itemgetter
+import subprocess
+import itertools, optparse, os, re, sys
 
 # Try to make PIL/PILLOW work:
 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
 
 def myOpen(fileName):  # faster than fileinput
+    if fileName is None:
+        return []
     if fileName == "-":
         return sys.stdin
     if fileName.endswith(".gz"):
@@ -28,20 +35,29 @@ def warn(message):
         prog = os.path.basename(sys.argv[0])
         sys.stderr.write(prog + ": " + message + "\n")
 
-def croppedBlocks(blocks, range1, range2):
-    cropBeg1, cropEnd1 = range1
-    cropBeg2, cropEnd2 = range2
-    if blocks[0][0] < 0: cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
-    if blocks[0][1] < 0: cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
-    for beg1, beg2, size in blocks:
-        b1 = max(cropBeg1, beg1)
-        e1 = min(cropEnd1, beg1 + size)
-        if b1 >= e1: continue
-        offset = beg2 - beg1
-        b2 = max(cropBeg2, b1 + offset)
-        e2 = min(cropEnd2, e1 + offset)
-        if b2 >= e2: continue
-        yield b2 - offset, b2, e2 - b2
+def groupByFirstItem(things):
+    for k, v in itertools.groupby(things, itemgetter(0)):
+        yield k, [i[1:] for i in v]
+
+def croppedBlocks(blocks, ranges1, ranges2):
+    headBeg1, headBeg2, headSize = blocks[0]
+    for r1 in ranges1:
+        for r2 in ranges2:
+            cropBeg1, cropEnd1 = r1
+            if headBeg1 < 0:
+                cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
+            cropBeg2, cropEnd2 = r2
+            if headBeg2 < 0:
+                cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
+            for beg1, beg2, size in blocks:
+                b1 = max(cropBeg1, beg1)
+                e1 = min(cropEnd1, beg1 + size)
+                if b1 >= e1: continue
+                offset = beg2 - beg1
+                b2 = max(cropBeg2, b1 + offset)
+                e2 = min(cropEnd2, e1 + offset)
+                if b2 >= e2: continue
+                yield b2 - offset, b2, e2 - b2
 
 def tabBlocks(beg1, beg2, blocks):
     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
@@ -102,7 +118,7 @@ def alignmentInput(lines):
                 yield chr1, seqlen1, chr2, seqlen2, blocks
                 mafCount = 0
 
-def seqRangeFromText(text):
+def seqRequestFromText(text):
     if ":" in text:
         pattern, interval = text.rsplit(":", 1)
         if "-" in interval:
@@ -110,52 +126,154 @@ def seqRangeFromText(text):
             return pattern, int(beg), int(end)  # beg may be negative
     return text, 0, sys.maxsize
 
-def rangeFromSeqName(seqRanges, name, seqLen):
-    if not seqRanges: return 0, seqLen
-    base = name.split(".")[-1]  # allow for names like hg19.chr7
-    for pat, beg, end in seqRanges:
-        if fnmatch.fnmatchcase(name, pat) or fnmatch.fnmatchcase(base, pat):
-            return max(beg, 0), min(end, seqLen)
-    return None
-
-def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
-    if seqName not in seqLimits:
-        seqNames.append(seqName)
-    if isTrim:
-        beg = blocks[0][index]
-        end = blocks[-1][index] + blocks[-1][2]
-        if beg < 0: beg, end = -end, -beg
-        if seqName in seqLimits:
-            b, e = seqLimits[seqName]
-            seqLimits[seqName] = min(b, beg), max(e, end)
-        else:
-            seqLimits[seqName] = beg, end
+def rangesFromSeqName(seqRequests, name, seqLen):
+    if seqRequests:
+        base = name.split(".")[-1]  # allow for names like hg19.chr7
+        for pat, beg, end in seqRequests:
+            if fnmatchcase(name, pat) or fnmatchcase(base, pat):
+                yield max(beg, 0), min(end, seqLen)
+    else:
+        yield 0, seqLen
+
+def updateSeqs(coverDict, seqRanges, seqName, ranges, coveredRange):
+    beg, end = coveredRange
+    if beg < 0:
+        coveredRange = -end, -beg
+    if seqName in coverDict:
+        coverDict[seqName].append(coveredRange)
     else:
-        seqLimits[seqName] = seqRange
+        coverDict[seqName] = [coveredRange]
+        for beg, end in ranges:
+            r = seqName, beg, end
+            seqRanges.append(r)
 
 def readAlignments(fileName, opts):
     '''Get alignments and sequence limits, from MAF or tabular format.'''
-    seqRanges1 = map(seqRangeFromText, opts.seq1)
-    seqRanges2 = map(seqRangeFromText, opts.seq2)
+    seqRequests1 = map(seqRequestFromText, opts.seq1)
+    seqRequests2 = map(seqRequestFromText, opts.seq2)
 
     alignments = []
-    seqNames1 = []
-    seqNames2 = []
-    seqLimits1 = {}
-    seqLimits2 = {}
+    seqRanges1 = []
+    seqRanges2 = []
+    coverDict1 = {}
+    coverDict2 = {}
     lines = myOpen(fileName)
     for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
-        range1 = rangeFromSeqName(seqRanges1, seqName1, seqLen1)
-        if not range1: continue
-        range2 = rangeFromSeqName(seqRanges2, seqName2, seqLen2)
-        if not range2: continue
-        b = list(croppedBlocks(list(blocks), range1, range2))
+        ranges1 = sorted(rangesFromSeqName(seqRequests1, seqName1, seqLen1))
+        if not ranges1: continue
+        ranges2 = sorted(rangesFromSeqName(seqRequests2, seqName2, seqLen2))
+        if not ranges2: continue
+        b = list(croppedBlocks(list(blocks), ranges1, ranges2))
+        if not b: continue
+        aln = seqName1, seqName2, b
+        alignments.append(aln)
+        coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
+        updateSeqs(coverDict1, seqRanges1, seqName1, ranges1, coveredRange1)
+        coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
+        updateSeqs(coverDict2, seqRanges2, seqName2, ranges2, coveredRange2)
+    return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
+
+def nameAndRangesFromDict(cropDict, seqName):
+    if seqName in cropDict:
+        return seqName, cropDict[seqName]
+    n = seqName.split(".")[-1]
+    if n in cropDict:
+        return n, cropDict[n]
+    return seqName, []
+
+def rangesForSecondaryAlignments(primaryRanges, seqLen):
+    if primaryRanges:
+        return primaryRanges
+    return [(0, seqLen)]
+
+def readSecondaryAlignments(opts, cropRanges1, cropRanges2):
+    cropDict1 = dict(groupByFirstItem(cropRanges1))
+    cropDict2 = dict(groupByFirstItem(cropRanges2))
+
+    alignments = []
+    seqRanges1 = []
+    seqRanges2 = []
+    coverDict1 = {}
+    coverDict2 = {}
+    lines = myOpen(opts.alignments)
+    for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
+        seqName1, ranges1 = nameAndRangesFromDict(cropDict1, seqName1)
+        seqName2, ranges2 = nameAndRangesFromDict(cropDict2, seqName2)
+        if not ranges1 and not ranges2:
+            continue
+        r1 = rangesForSecondaryAlignments(ranges1, seqLen1)
+        r2 = rangesForSecondaryAlignments(ranges2, seqLen2)
+        b = list(croppedBlocks(list(blocks), r1, r2))
         if not b: continue
         aln = seqName1, seqName2, b
         alignments.append(aln)
-        updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
-        updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
-    return alignments, seqNames1, seqNames2, seqLimits1, seqLimits2
+        if not ranges1:
+            coveredRange1 = b[0][0], b[-1][0] + b[-1][2]
+            updateSeqs(coverDict1, seqRanges1, seqName1, r1, coveredRange1)
+        if not ranges2:
+            coveredRange2 = b[0][1], b[-1][1] + b[-1][2]
+            updateSeqs(coverDict2, seqRanges2, seqName2, r2, coveredRange2)
+    return alignments, seqRanges1, coverDict1, seqRanges2, coverDict2
+
+def twoValuesFromOption(text, separator):
+    if separator in text:
+        return text.split(separator)
+    return text, text
+
+def mergedRanges(ranges):
+    oldBeg, maxEnd = ranges[0]
+    for beg, end in ranges:
+        if beg > maxEnd:
+            yield oldBeg, maxEnd
+            oldBeg = beg
+            maxEnd = end
+        elif end > maxEnd:
+            maxEnd = end
+    yield oldBeg, maxEnd
+
+def mergedRangesPerSeq(coverDict):
+    for k, v in coverDict.iteritems():
+        v.sort()
+        yield k, list(mergedRanges(v))
+
+def coveredLength(mergedCoverDict):
+    return sum(sum(e - b for b, e in v) for v in mergedCoverDict.itervalues())
+
+def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
+    maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
+    maxEndGap = max(float(maxEndGapFrac) * minAlignedBases, endPad * 1.0)
+    maxMidGap = max(float(maxMidGapFrac) * minAlignedBases, midPad * 2.0)
+
+    for seqName, rangeBeg, rangeEnd in seqRanges:
+        seqBlocks = coverDict[seqName]
+        blocks = [i for i in seqBlocks if i[0] < rangeEnd and i[1] > rangeBeg]
+        if blocks[0][0] - rangeBeg > maxEndGap:
+            rangeBeg = blocks[0][0] - endPad
+        for j, y in enumerate(blocks):
+            if j:
+                x = blocks[j - 1]
+                if y[0] - x[1] > maxMidGap:
+                    yield seqName, rangeBeg, x[1] + midPad
+                    rangeBeg = y[0] - midPad
+        if rangeEnd - blocks[-1][1] > maxEndGap:
+            rangeEnd = blocks[-1][1] + endPad
+        yield seqName, rangeBeg, rangeEnd
+
+def rangesWithStrandInfo(seqRanges, strandOpt, alignments, seqIndex):
+    if strandOpt == "1":
+        forwardMinusReverse = collections.defaultdict(int)
+        for i in alignments:
+            blocks = i[2]
+            beg1, beg2, size = blocks[0]
+            numOfAlignedLetterPairs = sum(i[2] for i in blocks)
+            if (beg1 < 0) != (beg2 < 0):  # opposite-strand alignment
+                numOfAlignedLetterPairs *= -1
+            forwardMinusReverse[i[seqIndex]] += numOfAlignedLetterPairs
+    strandNum = 0
+    for seqName, beg, end in seqRanges:
+        if strandOpt == "1":
+            strandNum = 1 if forwardMinusReverse[seqName] >= 0 else 2
+        yield seqName, beg, end, strandNum
 
 def natural_sort_key(my_string):
     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
@@ -163,17 +281,93 @@ def natural_sort_key(my_string):
     parts[1::2] = map(int, parts[1::2])
     return parts
 
-def textDimensions(imageDraw, font, textRot, text):
-    x, y = imageDraw.textsize(text, font=font)
-    return (y, x) if textRot else (x, y)
-
-def get_text_sizes(my_strings, font, fontsize, image_mode, textRot):
-    '''Get widths & heights, in pixels, of some strings.'''
-    if fontsize == 0: return [(0, 0) for i in my_strings]
-    image_size = 1, 1
-    im = Image.new(image_mode, image_size)
-    draw = ImageDraw.Draw(im)
-    return [textDimensions(draw, font, textRot, i) for i in my_strings]
+def nameKey(oneSeqRanges):
+    return natural_sort_key(oneSeqRanges[0][0])
+
+def sizeKey(oneSeqRanges):
+    return sum(b - e for n, b, e in oneSeqRanges), nameKey(oneSeqRanges)
+
+def alignmentKey(seqNamesToLists, oneSeqRanges):
+    seqName = oneSeqRanges[0][0]
+    alignmentsOfThisSequence = seqNamesToLists[seqName]
+    numOfAlignedLetterPairs = sum(i[3] for i in alignmentsOfThisSequence)
+    toMiddle = numOfAlignedLetterPairs // 2
+    for i in alignmentsOfThisSequence:
+        toMiddle -= i[3]
+        if toMiddle < 0:
+            return i[1:3]  # sequence-rank and "position" of this alignment
+
+def rankAndFlipPerSeq(seqRanges):
+    rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
+    for rank, group in enumerate(rangesGroupedBySeqName):
+        seqName, ranges = group
+        strandNum = next(ranges)[3]
+        flip = 1 if strandNum < 2 else -1
+        yield seqName, (rank, flip)
+
+def alignmentSortData(alignments, seqIndex, otherNamesToRanksAndFlips):
+    otherIndex = 1 - seqIndex
+    for i in alignments:
+        blocks = i[2]
+        otherRank, otherFlip = otherNamesToRanksAndFlips[i[otherIndex]]
+        otherPos = otherFlip * abs(blocks[0][otherIndex] +
+                                   blocks[-1][otherIndex] + blocks[-1][2])
+        numOfAlignedLetterPairs = sum(i[2] for i in blocks)
+        yield i[seqIndex], otherRank, otherPos, numOfAlignedLetterPairs
+
+def mySortedRanges(seqRanges, sortOpt, seqIndex, alignments, otherRanges):
+    rangesGroupedBySeqName = itertools.groupby(seqRanges, itemgetter(0))
+    g = [list(ranges) for seqName, ranges in rangesGroupedBySeqName]
+    for i in g:
+        if i[0][3] > 1:
+            i.reverse()
+    if sortOpt == "1":
+        g.sort(key=nameKey)
+    if sortOpt == "2":
+        g.sort(key=sizeKey)
+    if sortOpt == "3":
+        otherNamesToRanksAndFlips = dict(rankAndFlipPerSeq(otherRanges))
+        alns = sorted(alignmentSortData(alignments, seqIndex,
+                                        otherNamesToRanksAndFlips))
+        alnsGroupedBySeqName = itertools.groupby(alns, itemgetter(0))
+        seqNamesToLists = dict((k, list(v)) for k, v in alnsGroupedBySeqName)
+        g.sort(key=functools.partial(alignmentKey, seqNamesToLists))
+    return [j for i in g for j in i]
+
+def allSortedRanges(opts, alignments, alignmentsB,
+                    seqRanges1, seqRangesB1, seqRanges2, seqRangesB2):
+    o1, oB1 = twoValuesFromOption(opts.strands1, ":")
+    o2, oB2 = twoValuesFromOption(opts.strands2, ":")
+    if o1 == "1" and o2 == "1":
+        raise Exception("the strand options have circular dependency")
+    seqRanges1 = list(rangesWithStrandInfo(seqRanges1, o1, alignments, 0))
+    seqRanges2 = list(rangesWithStrandInfo(seqRanges2, o2, alignments, 1))
+    seqRangesB1 = list(rangesWithStrandInfo(seqRangesB1, oB1, alignmentsB, 0))
+    seqRangesB2 = list(rangesWithStrandInfo(seqRangesB2, oB2, alignmentsB, 1))
+
+    o1, oB1 = twoValuesFromOption(opts.sort1, ":")
+    o2, oB2 = twoValuesFromOption(opts.sort2, ":")
+    if o1 == "3" and o2 == "3":
+        raise Exception("the sort options have circular dependency")
+    if o1 != "3":
+        s1 = mySortedRanges(seqRanges1, o1, None, None, None)
+    if o2 != "3":
+        s2 = mySortedRanges(seqRanges2, o2, None, None, None)
+    if o1 == "3":
+        s1 = mySortedRanges(seqRanges1, o1, 0, alignments, s2)
+    if o2 == "3":
+        s2 = mySortedRanges(seqRanges2, o2, 1, alignments, s1)
+    t1 = mySortedRanges(seqRangesB1, oB1, 0, alignmentsB, s2)
+    t2 = mySortedRanges(seqRangesB2, oB2, 1, alignmentsB, s1)
+    return s1 + t1, s2 + t2
+
+def prettyNum(n):
+    t = str(n)
+    groups = []
+    while t:
+        groups.append(t[-3:])
+        t = t[:-3]
+    return ",".join(reversed(groups))
 
 def sizeText(size):
     suffixes = "bp", "kb", "Mb", "Gb"
@@ -184,67 +378,79 @@ def sizeText(size):
         if size < j * 1000 or i == len(suffixes) - 1:
             return "%.0f" % (1.0 * size / j) + x
 
-def seqNameAndSizeText(seqName, seqSize):
-    return seqName + ": " + sizeText(seqSize)
-
-def getSeqInfo(sortOpt, seqNames, seqLimits,
-               font, fontsize, image_mode, isShowSize, textRot):
-    '''Return miscellaneous information about the sequences.'''
-    if sortOpt == 1:
-        seqNames.sort(key=natural_sort_key)
-    seqSizes = [seqLimits[i][1] - seqLimits[i][0] for i in seqNames]
-    for i in seqNames:
-        r = seqLimits[i]
-        out = i, str(r[0]), str(r[1])
+def labelText(seqRange, labelOpt):
+    seqName, beg, end, strandNum = seqRange
+    if labelOpt == 1:
+        return seqName + ": " + sizeText(end - beg)
+    if labelOpt == 2:
+        return seqName + ":" + prettyNum(beg) + ": " + sizeText(end - beg)
+    if labelOpt == 3:
+        return seqName + ":" + prettyNum(beg) + "-" + prettyNum(end)
+    return seqName
+
+def rangeLabels(seqRanges, labelOpt, font, fontsize, image_mode, textRot):
+    if fontsize:
+        image_size = 1, 1
+        im = Image.new(image_mode, image_size)
+        draw = ImageDraw.Draw(im)
+    x = y = 0
+    for r in seqRanges:
+        text = labelText(r, labelOpt)
+        if fontsize:
+            x, y = draw.textsize(text, font=font)
+            if textRot:
+                x, y = y, x
+        yield text, x, y, r[3]
+
+def dataFromRanges(sortedRanges, font, fontSize, imageMode, labelOpt, textRot):
+    for seqName, rangeBeg, rangeEnd, strandNum in sortedRanges:
+        out = [seqName, str(rangeBeg), str(rangeEnd)]
+        if strandNum > 0:
+            out.append(".+-"[strandNum])
         warn("\t".join(out))
     warn("")
-    if sortOpt == 2:
-        seqRecords = sorted(zip(seqSizes, seqNames), reverse=True)
-        seqSizes = [i[0] for i in seqRecords]
-        seqNames = [i[1] for i in seqRecords]
-    if isShowSize:
-        seqLabels = map(seqNameAndSizeText, seqNames, seqSizes)
-    else:
-        seqLabels = seqNames
-    labelSizes = get_text_sizes(seqLabels, font, fontsize, image_mode, textRot)
-    margin = max(i[1] for i in labelSizes)
+    rangeSizes = [e - b for n, b, e, s in sortedRanges]
+    labs = list(rangeLabels(sortedRanges, labelOpt, font, fontSize,
+                            imageMode, textRot))
+    margin = max(i[2] for i in labs)
     # xxx the margin may be too big, because some labels may get omitted
-    return seqNames, seqSizes, seqLabels, labelSizes, margin
+    return rangeSizes, labs, margin
 
 def div_ceil(x, y):
     '''Return x / y rounded up.'''
     q, r = divmod(x, y)
     return q + (r != 0)
 
-def get_bp_per_pix(seq_sizes, pix_tween_seqs, pix_limit):
+def get_bp_per_pix(rangeSizes, pixTweenRanges, maxPixels):
     '''Get the minimum bp-per-pixel that fits in the size limit.'''
-    seq_num = len(seq_sizes)
-    seq_pix_limit = pix_limit - pix_tween_seqs * (seq_num - 1)
-    if seq_pix_limit < seq_num:
+    warn("choosing bp per pixel...")
+    numOfRanges = len(rangeSizes)
+    maxPixelsInRanges = maxPixels - pixTweenRanges * (numOfRanges - 1)
+    if maxPixelsInRanges < numOfRanges:
         raise Exception("can't fit the image: too many sequences?")
-    negLimit = -seq_pix_limit
-    negBpPerPix = sum(seq_sizes) // negLimit
+    negLimit = -maxPixelsInRanges
+    negBpPerPix = sum(rangeSizes) // negLimit
     while True:
-        if sum(i // negBpPerPix for i in seq_sizes) >= negLimit:
+        if sum(i // negBpPerPix for i in rangeSizes) >= negLimit:
             return -negBpPerPix
         negBpPerPix -= 1
 
-def get_seq_starts(seq_pix, pix_tween_seqs, margin):
-    '''Get the start pixel for each sequence.'''
-    seq_starts = []
-    pix_tot = margin - pix_tween_seqs
-    for i in seq_pix:
-        pix_tot += pix_tween_seqs
-        seq_starts.append(pix_tot)
+def getRangePixBegs(rangePixLens, pixTweenRanges, margin):
+    '''Get the start pixel for each range.'''
+    rangePixBegs = []
+    pix_tot = margin - pixTweenRanges
+    for i in rangePixLens:
+        pix_tot += pixTweenRanges
+        rangePixBegs.append(pix_tot)
         pix_tot += i
-    return seq_starts
+    return rangePixBegs
 
-def get_pix_info(seq_sizes, bp_per_pix, pix_tween_seqs, margin):
-    '''Return pixel information about the sequences.'''
-    seq_pix = [div_ceil(i, bp_per_pix) for i in seq_sizes]
-    seq_starts = get_seq_starts(seq_pix, pix_tween_seqs, margin)
-    tot_pix = seq_starts[-1] + seq_pix[-1]
-    return seq_pix, seq_starts, tot_pix
+def pixelData(rangeSizes, bp_per_pix, pixTweenRanges, margin):
+    '''Return pixel information about the ranges.'''
+    rangePixLens = [div_ceil(i, bp_per_pix) for i in rangeSizes]
+    rangePixBegs = getRangePixBegs(rangePixLens, pixTweenRanges, margin)
+    tot_pix = rangePixBegs[-1] + rangePixLens[-1]
+    return rangePixBegs, rangePixLens, tot_pix
 
 def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
     while True:
@@ -258,7 +464,6 @@ def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
         size -= next_pix
 
 def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
-    beg2 = -1 - beg2
     while True:
         q1, r1 = divmod(beg1, bp_per_pix)
         q2, r2 = divmod(beg2, bp_per_pix)
@@ -269,21 +474,31 @@ def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
         beg2 -= next_pix
         size -= next_pix
 
-def alignmentPixels(width, height, alignments, bp_per_pix, origins1, origins2):
+def strandAndOrigin(ranges, beg, size):
+    isReverseStrand = (beg < 0)
+    if isReverseStrand:
+        beg = -(beg + size)
+    for rangeBeg, rangeEnd, isReverseRange, origin in ranges:
+        if rangeEnd > beg:
+            return (isReverseStrand != isReverseRange), origin
+
+def alignmentPixels(width, height, alignments, bp_per_pix,
+                    rangeDict1, rangeDict2):
     hits = [0] * (width * height)  # the image data
     for seq1, seq2, blocks in alignments:
-        ori1 = origins1[seq1]
-        ori2 = origins2[seq2]
+        beg1, beg2, size = blocks[0]
+        isReverse1, ori1 = strandAndOrigin(rangeDict1[seq1], beg1, size)
+        isReverse2, ori2 = strandAndOrigin(rangeDict2[seq2], beg2, size)
         for beg1, beg2, size in blocks:
-            if beg1 < 0:
+            if isReverse1:
                 beg1 = -(beg1 + size)
                 beg2 = -(beg2 + size)
-            if beg2 >= 0:
+            if isReverse1 == isReverse2:
                 drawLineForward(hits, width, bp_per_pix,
-                                beg1 + ori1, beg2 + ori2, size)
+                                ori1 + beg1, ori2 + beg2, size)
             else:
                 drawLineReverse(hits, width, bp_per_pix,
-                                beg1 + ori1, beg2 - ori2, size)
+                                ori1 + beg1, ori2 - beg2 - 1, size)
     return hits
 
 def expandedSeqDict(seqDict):
@@ -297,40 +512,41 @@ def expandedSeqDict(seqDict):
             newDict[base] = x
     return newDict
 
-def readBed(fileName, seqLimits):
-    if not fileName: return
+def readBed(fileName, rangeDict):
     for line in myOpen(fileName):
         w = line.split()
         if not w: continue
         seqName = w[0]
-        if seqName not in seqLimits: continue
+        if seqName not in rangeDict: continue
         beg = int(w[1])
         end = int(w[2])
         layer = 900
-        color = "#ffe4ff"
+        color = "#fbf"
         if len(w) > 4:
             if w[4] != ".":
                 layer = float(w[4])
             if len(w) > 5:
                 if len(w) > 8 and w[8].count(",") == 2:
                     color = "rgb(" + w[8] + ")"
-                elif w[5] == "+":
-                    color = "#fff4f4"
-                elif w[5] == "-":
-                    color = "#f4f4ff"
+                else:
+                    strand = w[5]
+                    isRev = rangeDict[seqName][0][2]
+                    if strand == "+" and not isRev or strand == "-" and isRev:
+                        color = "#ffe8e8"
+                    if strand == "-" and not isRev or strand == "+" and isRev:
+                        color = "#e8e8ff"
         yield layer, color, seqName, beg, end
 
 def commaSeparatedInts(text):
     return map(int, text.rstrip(",").split(","))
 
-def readGenePred(opts, fileName, seqLimits):
-    if not fileName: return
+def readGenePred(opts, fileName, rangeDict):
     for line in myOpen(fileName):
         fields = line.split()
         if not fields: continue
         if fields[2] not in "+-": fields = fields[1:]
         seqName = fields[1]
-        if seqName not in seqLimits: continue
+        if seqName not in rangeDict: continue
         #strand = fields[2]
         cdsBeg = int(fields[5])
         cdsEnd = int(fields[6])
@@ -342,20 +558,19 @@ def readGenePred(opts, fileName, seqLimits):
             e = min(end, cdsEnd)
             if b < e: yield 400, opts.cds_color, seqName, b, e
 
-def readRmsk(fileName, seqLimits):
-    if not fileName: return
+def readRmsk(fileName, rangeDict):
     for line in myOpen(fileName):
         fields = line.split()
         if len(fields) == 17:  # rmsk.txt
             seqName = fields[5]
-            if seqName not in seqLimits: continue  # do this ASAP for speed
+            if seqName not in rangeDict: continue  # do this ASAP for speed
             beg = int(fields[6])
             end = int(fields[7])
             strand = fields[9]
             repeatClass = fields[11]
         elif len(fields) == 15:  # .out
             seqName = fields[4]
-            if seqName not in seqLimits: continue
+            if seqName not in rangeDict: continue
             beg = int(fields[5]) - 1
             end = int(fields[6])
             strand = fields[8]
@@ -363,25 +578,24 @@ def readRmsk(fileName, seqLimits):
         else:
             continue
         if repeatClass in ("Low_complexity", "Simple_repeat"):
-            yield 200, "#ffe4ff", seqName, beg, end
-        elif strand == "+":
-            yield 100, "#fff4f4", seqName, beg, end
+            yield 200, "#fbf", seqName, beg, end
+        elif (strand == "+") != rangeDict[seqName][0][2]:
+            yield 100, "#ffe8e8", seqName, beg, end
         else:
-            yield 100, "#f4f4ff", seqName, beg, end
+            yield 100, "#e8e8ff", seqName, beg, end
 
 def isExtraFirstGapField(fields):
     return fields[4].isdigit()
 
-def readGaps(opts, fileName, seqLimits):
+def readGaps(opts, fileName, rangeDict):
     '''Read locations of unsequenced gaps, from an agp or gap file.'''
-    if not fileName: return
     for line in myOpen(fileName):
         w = line.split()
         if not w or w[0][0] == "#": continue
         if isExtraFirstGapField(w): w = w[1:]
         if w[4] not in "NU": continue
         seqName = w[0]
-        if seqName not in seqLimits: continue
+        if seqName not in rangeDict: continue
         end = int(w[2])
         beg = end - int(w[5])  # zero-based coordinate
         if w[7] == "yes":
@@ -389,144 +603,222 @@ def readGaps(opts, fileName, seqLimits):
         else:
             yield 2000, opts.unbridged_color, seqName, beg, end
 
-def bedBoxes(beds, seqLimits, origins, margin, edge, isTop, bpPerPix):
-    for layer, color, seqName, beg, end in beds:
-        cropBeg, cropEnd = seqLimits[seqName]
-        beg = max(beg, cropBeg)
-        end = min(end, cropEnd)
-        if beg >= end: continue
-        ori = origins[seqName]
-        if layer <= 1000:
-            # include partly-covered pixels
-            b = (ori + beg) // bpPerPix
-            e = div_ceil(ori + end, bpPerPix)
-        else:
-            # exclude partly-covered pixels
-            b = div_ceil(ori + beg, bpPerPix)
-            e = (ori + end) // bpPerPix
-            if e <= b: continue
-        if isTop:
-            box = b, margin, e, edge
-        else:
-            box = margin, b, edge, e
-        yield layer, color, box
+def bedBoxes(beds, rangeDict, margin, edge, isTop, bpPerPix):
+    for layer, color, seqName, bedBeg, bedEnd in beds:
+        for rangeBeg, rangeEnd, isReverseRange, origin in rangeDict[seqName]:
+            beg = max(bedBeg, rangeBeg)
+            end = min(bedEnd, rangeEnd)
+            if beg >= end: continue
+            if isReverseRange:
+                beg, end = -end, -beg
+            if layer <= 1000:
+                # include partly-covered pixels
+                b = (origin + beg) // bpPerPix
+                e = div_ceil(origin + end, bpPerPix)
+            else:
+                # exclude partly-covered pixels
+                b = div_ceil(origin + beg, bpPerPix)
+                e = (origin + end) // bpPerPix
+                if e <= b: continue
+                if bedEnd >= rangeEnd:  # include partly-covered end pixels
+                    if isReverseRange:
+                        b = (origin + beg) // bpPerPix
+                    else:
+                        e = div_ceil(origin + end, bpPerPix)
+            if isTop:
+                box = b, margin, e, edge
+            else:
+                box = margin, b, edge, e
+            yield layer, color, box
 
 def drawAnnotations(im, boxes):
     # xxx use partial transparency for different-color overlaps?
     for layer, color, box in boxes:
         im.paste(color, box)
 
-def make_label(text, text_size, range_start, range_size):
-    '''Return an axis label with endpoint & sort-order information.'''
-    text_width, text_height = text_size
-    label_start = range_start + (range_size - text_width) // 2
-    label_end   = label_start + text_width
-    sort_key    = text_width - range_size
-    return sort_key, label_start, label_end, text, text_height
-
-def get_nonoverlapping_labels(labels, label_space):
+def placedLabels(labels, rangePixBegs, rangePixLens, beg, end):
+    '''Return axis labels with endpoint & sort-order information.'''
+    maxWidth = end - beg
+    for i, j, k in zip(labels, rangePixBegs, rangePixLens):
+        text, textWidth, textHeight, strandNum = i
+        if textWidth > maxWidth:
+            continue
+        labelBeg = j + (k - textWidth) // 2
+        labelEnd = labelBeg + textWidth
+        sortKey = textWidth - k
+        if labelBeg < beg:
+            sortKey += maxWidth * (beg - labelBeg)
+            labelBeg = beg
+            labelEnd = beg + textWidth
+        if labelEnd > end:
+            sortKey += maxWidth * (labelEnd - end)
+            labelEnd = end
+            labelBeg = end - textWidth
+        yield sortKey, labelBeg, labelEnd, text, textHeight, strandNum
+
+def nonoverlappingLabels(labels, minPixTweenLabels):
     '''Get a subset of non-overlapping axis labels, greedily.'''
-    nonoverlapping_labels = []
+    out = []
     for i in labels:
-        if True not in [i[1] < j[2] + label_space and j[1] < i[2] + label_space
-                        for j in nonoverlapping_labels]:
-            nonoverlapping_labels.append(i)
-    return nonoverlapping_labels
-
-def get_axis_image(seqNames, name_sizes, seq_starts, seq_pix, textRot,
-                   textAln, font, image_mode, opts):
+        beg = i[1] - minPixTweenLabels
+        end = i[2] + minPixTweenLabels
+        if all(j[2] <= beg or j[1] >= end for j in out):
+            out.append(i)
+    return out
+
+def axisImage(labels, rangePixBegs, rangePixLens, textRot,
+              textAln, font, image_mode, opts):
     '''Make an image of axis labels.'''
-    min_pos = seq_starts[0]
-    max_pos = seq_starts[-1] + seq_pix[-1]
-    margin = max(i[1] for i in name_sizes)
-    labels = map(make_label, seqNames, name_sizes, seq_starts, seq_pix)
-    labels = [i for i in labels if i[1] >= min_pos and i[2] <= max_pos]
-    labels.sort()
+    beg = rangePixBegs[0]
+    end = rangePixBegs[-1] + rangePixLens[-1]
+    margin = max(i[2] for i in labels)
+    labels = sorted(placedLabels(labels, rangePixBegs, rangePixLens, beg, end))
     minPixTweenLabels = 0 if textRot else opts.label_space
-    labels = get_nonoverlapping_labels(labels, minPixTweenLabels)
-    image_size = (margin, max_pos) if textRot else (max_pos, margin)
-    im = Image.new(image_mode, image_size, opts.border_color)
+    labels = nonoverlappingLabels(labels, minPixTweenLabels)
+    image_size = (margin, end) if textRot else (end, margin)
+    im = Image.new(image_mode, image_size, opts.margin_color)
     draw = ImageDraw.Draw(im)
-    for i in labels:
-        base = margin - i[4] if textAln else 0
-        position = (base, i[1]) if textRot else (i[1], base)
-        draw.text(position, i[3], font=font, fill=opts.text_color)
+    for sortKey, labelBeg, labelEnd, text, textHeight, strandNum in labels:
+        base = margin - textHeight if textAln else 0
+        position = (base, labelBeg) if textRot else (labelBeg, base)
+        fill = ("black", opts.forwardcolor, opts.reversecolor)[strandNum]
+        draw.text(position, text, font=font, fill=fill)
     return im
 
-def seqOrigins(seqNames, seq_starts, seqLimits, bp_per_pix):
-    for i, j in zip(seqNames, seq_starts):
-        yield i, bp_per_pix * j - seqLimits[i][0]
+def rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
+    for i, j, k in zip(sortedRanges, rangePixBegs, rangePixLens):
+        seqName, rangeBeg, rangeEnd, strandNum = i
+        isReverseRange = (strandNum > 1)
+        if isReverseRange:
+            origin = bpPerPix * (j + k) + rangeBeg
+        else:
+            origin = bpPerPix * j - rangeBeg
+        yield seqName, (rangeBeg, rangeEnd, isReverseRange, origin)
+
+def rangesPerSeq(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
+    a = rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix)
+    for k, v in itertools.groupby(a, itemgetter(0)):
+        yield k, [i[1] for i in v]
+
+def getFont(opts):
+    if opts.fontfile:
+        return ImageFont.truetype(opts.fontfile, opts.fontsize)
+    fileNames = []
+    try:
+        x = ["fc-match", "-f%{file}", "arial"]
+        p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        out, err = p.communicate()
+        fileNames.append(out)
+    except OSError as e:
+        warn("fc-match error: " + str(e))
+    fileNames.append("/Library/Fonts/Arial.ttf")  # for Mac
+    for i in fileNames:
+        try:
+            font = ImageFont.truetype(i, opts.fontsize)
+            warn("font: " + i)
+            return font
+        except IOError as e:
+            warn("font load error: " + str(e))
+    return ImageFont.load_default()
 
 def lastDotplot(opts, args):
-    if opts.fontfile:  font = ImageFont.truetype(opts.fontfile, opts.fontsize)
-    else:              font = ImageFont.load_default()
-
+    font = getFont(opts)
     image_mode = 'RGB'
     forward_color = ImageColor.getcolor(opts.forwardcolor, image_mode)
     reverse_color = ImageColor.getcolor(opts.reversecolor, image_mode)
     zipped_colors = zip(forward_color, reverse_color)
     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
 
+    maxGap1, maxGapB1 = twoValuesFromOption(opts.max_gap1, ":")
+    maxGap2, maxGapB2 = twoValuesFromOption(opts.max_gap2, ":")
+
     warn("reading alignments...")
-    alignmentInfo = readAlignments(args[0], opts)
-    alignments, seqNames1, seqNames2, seqLimits1, seqLimits2 = alignmentInfo
-    warn("done")
+    alnData = readAlignments(args[0], opts)
+    alignments, seqRanges1, coverDict1, seqRanges2, coverDict2 = alnData
     if not alignments: raise Exception("there are no alignments")
+    warn("cutting...")
+    coverDict1 = dict(mergedRangesPerSeq(coverDict1))
+    coverDict2 = dict(mergedRangesPerSeq(coverDict2))
+    minAlignedBases = min(coveredLength(coverDict1), coveredLength(coverDict2))
+    pad = int(opts.pad * minAlignedBases)
+    cutRanges1 = list(trimmed(seqRanges1, coverDict1, minAlignedBases,
+                              maxGap1, pad, pad))
+    cutRanges2 = list(trimmed(seqRanges2, coverDict2, minAlignedBases,
+                              maxGap2, pad, pad))
+
+    warn("reading secondary alignments...")
+    alnDataB = readSecondaryAlignments(opts, cutRanges1, cutRanges2)
+    alignmentsB, seqRangesB1, coverDictB1, seqRangesB2, coverDictB2 = alnDataB
+    warn("cutting...")
+    coverDictB1 = dict(mergedRangesPerSeq(coverDictB1))
+    coverDictB2 = dict(mergedRangesPerSeq(coverDictB2))
+    cutRangesB1 = trimmed(seqRangesB1, coverDictB1, minAlignedBases,
+                          maxGapB1, 0, 0)
+    cutRangesB2 = trimmed(seqRangesB2, coverDictB2, minAlignedBases,
+                          maxGapB2, 0, 0)
+
+    warn("sorting...")
+    sortOut = allSortedRanges(opts, alignments, alignmentsB,
+                              cutRanges1, cutRangesB1, cutRanges2, cutRangesB2)
+    sortedRanges1, sortedRanges2 = sortOut
 
     textRot1 = "vertical".startswith(opts.rot1)
-    i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
-                    font, opts.fontsize, image_mode, opts.lengths1, textRot1)
-    seqNames1, seqSizes1, seqLabels1, labelSizes1, tMargin = i1
+    i1 = dataFromRanges(sortedRanges1, font,
+                        opts.fontsize, image_mode, opts.labels1, textRot1)
+    rangeSizes1, labelData1, tMargin = i1
 
     textRot2 = "horizontal".startswith(opts.rot2)
-    i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
-                    font, opts.fontsize, image_mode, opts.lengths2, textRot2)
-    seqNames2, seqSizes2, seqLabels2, labelSizes2, lMargin = i2
-
-    warn("choosing bp per pixel...")
-    pix_limit1 = opts.width  - lMargin
-    pix_limit2 = opts.height - tMargin
-    bpPerPix1 = get_bp_per_pix(seqSizes1, opts.border_pixels, pix_limit1)
-    bpPerPix2 = get_bp_per_pix(seqSizes2, opts.border_pixels, pix_limit2)
+    i2 = dataFromRanges(sortedRanges2, font,
+                        opts.fontsize, image_mode, opts.labels2, textRot2)
+    rangeSizes2, labelData2, lMargin = i2
+
+    maxPixels1 = opts.width  - lMargin
+    maxPixels2 = opts.height - tMargin
+    bpPerPix1 = get_bp_per_pix(rangeSizes1, opts.border_pixels, maxPixels1)
+    bpPerPix2 = get_bp_per_pix(rangeSizes2, opts.border_pixels, maxPixels2)
     bpPerPix = max(bpPerPix1, bpPerPix2)
     warn("bp per pixel = " + str(bpPerPix))
 
-    seq_pix1, seq_starts1, width  = get_pix_info(seqSizes1, bpPerPix,
-                                                 opts.border_pixels, lMargin)
-    seq_pix2, seq_starts2, height = get_pix_info(seqSizes2, bpPerPix,
-                                                 opts.border_pixels, tMargin)
+    p1 = pixelData(rangeSizes1, bpPerPix, opts.border_pixels, lMargin)
+    rangePixBegs1, rangePixLens1, width = p1
+    rangeDict1 = dict(rangesPerSeq(sortedRanges1, rangePixBegs1,
+                                   rangePixLens1, bpPerPix))
+
+    p2 = pixelData(rangeSizes2, bpPerPix, opts.border_pixels, tMargin)
+    rangePixBegs2, rangePixLens2, height = p2
+    rangeDict2 = dict(rangesPerSeq(sortedRanges2, rangePixBegs2,
+                                   rangePixLens2, bpPerPix))
+
     warn("width:  " + str(width))
     warn("height: " + str(height))
 
-    origins1 = dict(seqOrigins(seqNames1, seq_starts1, seqLimits1, bpPerPix))
-    origins2 = dict(seqOrigins(seqNames2, seq_starts2, seqLimits2, bpPerPix))
-
     warn("processing alignments...")
-    hits = alignmentPixels(width, height, alignments, bpPerPix,
-                           origins1, origins2)
-    warn("done")
+    hits = alignmentPixels(width, height, alignments + alignmentsB, bpPerPix,
+                           rangeDict1, rangeDict2)
 
-    image_size = width, height
-    im = Image.new(image_mode, image_size, opts.background_color)
+    warn("reading annotations...")
 
-    seqLimits1 = expandedSeqDict(seqLimits1)
-    seqLimits2 = expandedSeqDict(seqLimits2)
-    origins1 = expandedSeqDict(origins1)
-    origins2 = expandedSeqDict(origins2)
+    rangeDict1 = expandedSeqDict(rangeDict1)
+    beds1 = itertools.chain(readBed(opts.bed1, rangeDict1),
+                            readRmsk(opts.rmsk1, rangeDict1),
+                            readGenePred(opts, opts.genePred1, rangeDict1),
+                            readGaps(opts, opts.gap1, rangeDict1))
+    b1 = bedBoxes(beds1, rangeDict1, tMargin, height, True, bpPerPix)
 
-    beds1 = itertools.chain(readBed(opts.bed1, seqLimits1),
-                            readRmsk(opts.rmsk1, seqLimits1),
-                            readGenePred(opts, opts.genePred1, seqLimits1),
-                            readGaps(opts, opts.gap1, seqLimits1))
-    b1 = bedBoxes(beds1, seqLimits1, origins1, tMargin, height, True, bpPerPix)
-
-    beds2 = itertools.chain(readBed(opts.bed2, seqLimits2),
-                            readRmsk(opts.rmsk2, seqLimits2),
-                            readGenePred(opts, opts.genePred2, seqLimits2),
-                            readGaps(opts, opts.gap2, seqLimits2))
-    b2 = bedBoxes(beds2, seqLimits2, origins2, lMargin, width, False, bpPerPix)
+    rangeDict2 = expandedSeqDict(rangeDict2)
+    beds2 = itertools.chain(readBed(opts.bed2, rangeDict2),
+                            readRmsk(opts.rmsk2, rangeDict2),
+                            readGenePred(opts, opts.genePred2, rangeDict2),
+                            readGaps(opts, opts.gap2, rangeDict2))
+    b2 = bedBoxes(beds2, rangeDict2, lMargin, width, False, bpPerPix)
 
     boxes = sorted(itertools.chain(b1, b2))
+
+    warn("drawing...")
+
+    image_size = width, height
+    im = Image.new(image_mode, image_size, opts.background_color)
+
     drawAnnotations(im, boxes)
 
     for i in range(height):
@@ -538,22 +830,22 @@ def lastDotplot(opts, args):
             elif store_value == 3: im.putpixel(xy, overlap_color)
 
     if opts.fontsize != 0:
-        axis1 = get_axis_image(seqLabels1, labelSizes1, seq_starts1, seq_pix1,
-                               textRot1, False, font, image_mode, opts)
+        axis1 = axisImage(labelData1, rangePixBegs1, rangePixLens1,
+                          textRot1, False, font, image_mode, opts)
         if textRot1:
             axis1 = axis1.transpose(Image.ROTATE_90)
-        axis2 = get_axis_image(seqLabels2, labelSizes2, seq_starts2, seq_pix2,
-                               textRot2, textRot2, font, image_mode, opts)
+        axis2 = axisImage(labelData2, rangePixBegs2, rangePixLens2,
+                          textRot2, textRot2, font, image_mode, opts)
         if not textRot2:
             axis2 = axis2.transpose(Image.ROTATE_270)
         im.paste(axis1, (0, 0))
         im.paste(axis2, (0, 0))
 
-    for i in seq_starts1[1:]:
+    for i in rangePixBegs1[1:]:
         box = i - opts.border_pixels, tMargin, i, height
         im.paste(opts.border_color, box)
 
-    for i in seq_starts2[1:]:
+    for i in rangePixBegs2[1:]:
         box = lMargin, i - opts.border_pixels, width, i
         im.paste(opts.border_color, box)
 
@@ -583,35 +875,51 @@ if __name__ == "__main__":
                   help="color for forward alignments (default: %default)")
     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
                   help="color for reverse alignments (default: %default)")
-    op.add_option("--sort1", type="int", default=1, metavar="N",
+    op.add_option("--alignments", metavar="FILE", help="secondary alignments")
+    op.add_option("--sort1", default="1", metavar="N",
                   help="genome1 sequence order: 0=input order, 1=name order, "
-                  "2=length order (default=%default)")
-    op.add_option("--sort2", type="int", default=1, metavar="N",
+                  "2=length order, 3=alignment order (default=%default)")
+    op.add_option("--sort2", default="1", metavar="N",
                   help="genome2 sequence order: 0=input order, 1=name order, "
-                  "2=length order (default=%default)")
-    op.add_option("--trim1", action="store_true",
-                  help="trim unaligned sequence flanks from the 1st genome")
-    op.add_option("--trim2", action="store_true",
-                  help="trim unaligned sequence flanks from the 2nd genome")
+                  "2=length order, 3=alignment order (default=%default)")
+    op.add_option("--strands1", default="0", metavar="N", help=
+                  "genome1 sequence orientation: 0=forward orientation, "
+                  "1=alignment orientation (default=%default)")
+    op.add_option("--strands2", default="0", metavar="N", help=
+                  "genome2 sequence orientation: 0=forward orientation, "
+                  "1=alignment orientation (default=%default)")
+    op.add_option("--max-gap1", metavar="FRAC", default="0.5,2", help=
+                  "maximum unaligned (end,mid) gap in genome1: "
+                  "fraction of aligned length (default=%default)")
+    op.add_option("--max-gap2", metavar="FRAC", default="0.5,2", help=
+                  "maximum unaligned (end,mid) gap in genome2: "
+                  "fraction of aligned length (default=%default)")
+    op.add_option("--pad", metavar="FRAC", type="float", default=0.04, help=
+                  "pad length when cutting unaligned gaps: "
+                  "fraction of aligned length (default=%default)")
     op.add_option("--border-pixels", metavar="INT", type="int", default=1,
                   help="number of pixels between sequences (default=%default)")
-    op.add_option("--border-color", metavar="COLOR", default="#dcdcdc",
+    op.add_option("--border-color", metavar="COLOR", default="black",
                   help="color for pixels between sequences (default=%default)")
-    # xxx --margin-color?
+    # --break-color and/or --break-pixels for intra-sequence breaks?
+    op.add_option("--margin-color", metavar="COLOR", default="#dcdcdc",
+                  help="margin color")
 
     og = optparse.OptionGroup(op, "Text options")
     og.add_option("-f", "--fontfile", metavar="FILE",
                   help="TrueType or OpenType font file")
-    og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=11,
+    og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=14,
                   help="TrueType or OpenType font size (default: %default)")
+    og.add_option("--labels1", type="int", default=0, metavar="N", help=
+                  "genome1 labels: 0=name, 1=name:length, "
+                  "2=name:start:length, 3=name:start-end (default=%default)")
+    og.add_option("--labels2", type="int", default=0, metavar="N", help=
+                  "genome2 labels: 0=name, 1=name:length, "
+                  "2=name:start:length, 3=name:start-end (default=%default)")
     og.add_option("--rot1", metavar="ROT", default="h",
                   help="text rotation for the 1st genome (default=%default)")
     og.add_option("--rot2", metavar="ROT", default="v",
                   help="text rotation for the 2nd genome (default=%default)")
-    og.add_option("--lengths1", action="store_true",
-                  help="show sequence lengths for the 1st (horizontal) genome")
-    og.add_option("--lengths2", action="store_true",
-                  help="show sequence lengths for the 2nd (vertical) genome")
     op.add_option_group(og)
 
     og = optparse.OptionGroup(op, "Annotation options")
@@ -630,9 +938,9 @@ if __name__ == "__main__":
                   help="read genome1 genes from genePred file")
     og.add_option("--genePred2", metavar="FILE",
                   help="read genome2 genes from genePred file")
-    og.add_option("--exon-color", metavar="COLOR", default="#dfd",
+    og.add_option("--exon-color", metavar="COLOR", default="PaleGreen",
                   help="color for exons (default=%default)")
-    og.add_option("--cds-color", metavar="COLOR", default="#bdb",
+    og.add_option("--cds-color", metavar="COLOR", default="LimeGreen",
                   help="color for protein-coding regions (default=%default)")
     op.add_option_group(og)
 
@@ -643,18 +951,17 @@ if __name__ == "__main__":
                   help="read genome2 unsequenced gaps from agp or gap file")
     og.add_option("--bridged-color", metavar="COLOR", default="yellow",
                   help="color for bridged gaps (default: %default)")
-    og.add_option("--unbridged-color", metavar="COLOR", default="pink",
+    og.add_option("--unbridged-color", metavar="COLOR", default="orange",
                   help="color for unbridged gaps (default: %default)")
     op.add_option_group(og)
     (opts, args) = op.parse_args()
     if len(args) != 2: op.error("2 arguments needed")
 
-    opts.text_color = "black"
     opts.background_color = "white"
     opts.label_space = 5     # minimum number of pixels between axis labels
 
     try: lastDotplot(opts, args)
     except KeyboardInterrupt: pass  # avoid silly error message
-    except Exception, e:
+    except Exception as e:
         prog = os.path.basename(sys.argv[0])
         sys.exit(prog + ": error: " + str(e))
diff --git a/scripts/last-train b/scripts/last-train
index 4b360c7..a324194 100755
--- a/scripts/last-train
+++ b/scripts/last-train
@@ -360,6 +360,7 @@ def fixedLastalArgs(opts, lastalProgName):
     if opts.k: x.append("-k" + opts.k)
     if opts.P: x.append("-P" + opts.P)
     if opts.Q: x.append("-Q" + opts.Q)
+    if opts.verbose: x.append("-" + "v" * opts.verbose)
     return x
 
 def doTraining(opts, args):
@@ -452,6 +453,8 @@ if __name__ == "__main__":
     usage = "%prog [options] lastdb-name sequence-file(s)"
     description = "Try to find suitable score parameters for aligning the given sequences."
     op = optparse.OptionParser(usage=usage, description=description)
+    op.add_option("-v", "--verbose", action="count",
+                  help="show more details of intermediate steps")
     og = optparse.OptionGroup(op, "Training options")
     og.add_option("--revsym", action="store_true",
                   help="force reverse-complement symmetry")
diff --git a/src/Alignment.cc b/src/Alignment.cc
index 0ef6c6c..551a642 100644
--- a/src/Alignment.cc
+++ b/src/Alignment.cc
@@ -96,7 +96,7 @@ void Alignment::makeXdrop( Centroid& centroid,
   }
 
   // extend a gapped alignment in the left/reverse direction from the seed:
-  std::vector<uchar>& columnAmbiguityCodes = extras.columnAmbiguityCodes;
+  std::vector<char>& columnAmbiguityCodes = extras.columnAmbiguityCodes;
   extend( blocks, columnAmbiguityCodes, centroid, greedyAligner, isGreedy,
 	  seq1, seq2, seed.beg1(), seed.beg2(), false, globality,
 	  scoreMatrix, smMax, maxDrop, gap, frameshiftCost,
@@ -116,7 +116,7 @@ void Alignment::makeXdrop( Centroid& centroid,
 
   // extend a gapped alignment in the right/forward direction from the seed:
   std::vector<SegmentPair> forwardBlocks;
-  std::vector<uchar> forwardAmbiguities;
+  std::vector<char> forwardAmbiguities;
   extend( forwardBlocks, forwardAmbiguities, centroid, greedyAligner, isGreedy,
 	  seq1, seq2, seed.end1(), seed.end2(), true, globality,
 	  scoreMatrix, smMax, maxDrop, gap, frameshiftCost,
@@ -262,13 +262,12 @@ bool Alignment::hasGoodSegment(const uchar *seq1, const uchar *seq2,
   return false;
 }
 
-static void getColumnAmbiguities(const Centroid& centroid,
-				 std::vector<uchar>& ambiguityCodes,
-				 const std::vector<SegmentPair>& chunks,
-				 bool isForward) {
+static void getColumnCodes(const Centroid& centroid, std::vector<char>& codes,
+			   const std::vector<SegmentPair>& chunks,
+			   bool isForward) {
   for (size_t i = 0; i < chunks.size(); ++i) {
     const SegmentPair& x = chunks[i];
-    centroid.getMatchAmbiguities(ambiguityCodes, x.end1(), x.end2(), x.size);
+    centroid.getMatchAmbiguities(codes, x.end1(), x.end2(), x.size);
     size_t j = i + 1;
     bool isNext = (j < chunks.size());
     size_t end1 = isNext ? chunks[j].end1() : 0;
@@ -276,17 +275,17 @@ static void getColumnAmbiguities(const Centroid& centroid,
     // ASSUMPTION: if there is an insertion adjacent to a deletion,
     // the deletion will get printed first.
     if (isForward) {
-      centroid.getInsertAmbiguities(ambiguityCodes, x.beg2(), end2);
-      centroid.getDeleteAmbiguities(ambiguityCodes, x.beg1(), end1);
+      centroid.getInsertAmbiguities(codes, x.beg2(), end2);
+      centroid.getDeleteAmbiguities(codes, x.beg1(), end1);
     } else {
-      centroid.getDeleteAmbiguities(ambiguityCodes, x.beg1(), end1);
-      centroid.getInsertAmbiguities(ambiguityCodes, x.beg2(), end2);
+      centroid.getDeleteAmbiguities(codes, x.beg1(), end1);
+      centroid.getInsertAmbiguities(codes, x.beg2(), end2);
     }
   }
 }
 
 void Alignment::extend( std::vector< SegmentPair >& chunks,
-			std::vector< uchar >& ambiguityCodes,
+			std::vector< char >& columnCodes,
 			Centroid& centroid,
 			GreedyXdropAligner& greedyAligner, bool isGreedy,
 			const uchar* seq1, const uchar* seq2,
@@ -358,7 +357,7 @@ void Alignment::extend( std::vector< SegmentPair >& chunks,
 
   score += extensionScore;
 
-  if( outputType < 5 || outputType == 7 ){  // ordinary max-score alignment
+  if( outputType < 5 || outputType > 6 ){  // ordinary max-score alignment
     size_t end1, end2, size;
     if( isGreedy ){
       while( greedyAligner.getNextChunk( end1, end2, size ) )
@@ -375,16 +374,19 @@ void Alignment::extend( std::vector< SegmentPair >& chunks,
   if( outputType > 3 ){  // calculate match probabilities
     assert( !isGreedy );
     assert( !sm2qual );
-    centroid.reset();
-    centroid.forward( seq1, seq2, start1, start2, isForward, globality, gap );
-    centroid.backward( seq1, seq2, start1, start2, isForward, globality, gap );
+    if (!isForward) {
+      --start1;
+      --start2;
+    }
+    centroid.doForwardBackwardAlgorithm(seq1, seq2, start1, start2, isForward,
+					gap, globality);
 
     if( outputType > 4 && outputType < 7 ){  // gamma-centroid / LAMA alignment
       centroid.dp( gamma );
       centroid.traceback( chunks, gamma );
     }
 
-    getColumnAmbiguities( centroid, ambiguityCodes, chunks, isForward );
+    getColumnCodes(centroid, columnCodes, chunks, isForward);
     extras.fullScore += centroid.logPartitionFunction();
 
     if( outputType == 7 ){
diff --git a/src/Alignment.hh b/src/Alignment.hh
index 4d1e9fc..de4e580 100644
--- a/src/Alignment.hh
+++ b/src/Alignment.hh
@@ -62,7 +62,7 @@ struct AlignmentText {
 struct AlignmentExtras {
   // Optional (probabilistic) attributes of an alignment.
   // To save memory, these are outside the main Alignment struct.
-  std::vector<uchar> columnAmbiguityCodes;  // char or uchar?
+  std::vector<char> columnAmbiguityCodes;
   std::vector<double> expectedCounts;  // expected emission & transition counts
   double fullScore;  // a.k.a. forward score, sum-of-paths score
   AlignmentExtras() : fullScore(0) {}
@@ -111,7 +111,7 @@ struct Alignment{
 		      const uchar *qual1, const uchar *qual2) const;
 
   AlignmentText write(const MultiSequence& seq1, const MultiSequence& seq2,
-		      size_t seqNum2, char strand, const uchar* seqData2,
+		      size_t seqNum2, const uchar* seqData2,
 		      bool isTranslated, const Alphabet& alph,
 		      const LastEvaluer& evaluer, int format,
 		      const AlignmentExtras& extras) const;
@@ -127,7 +127,7 @@ struct Alignment{
   size_t end2() const{ return blocks.back().end2(); }
 
   void extend( std::vector< SegmentPair >& chunks,
-	       std::vector< uchar >& ambiguityCodes,
+	       std::vector< char >& columnCodes,
 	       Centroid& centroid,
 	       GreedyXdropAligner& greedyAligner, bool isGreedy,
 	       const uchar* seq1, const uchar* seq2,
@@ -143,20 +143,19 @@ struct Alignment{
 	       double gamma, int outputType );
 
   AlignmentText writeTab(const MultiSequence& seq1, const MultiSequence& seq2,
-			 size_t seqNum2, char strand, bool isTranslated,
+			 size_t seqNum2, bool isTranslated,
 			 const LastEvaluer& evaluer,
 			 const AlignmentExtras& extras) const;
 
   AlignmentText writeMaf(const MultiSequence& seq1, const MultiSequence& seq2,
-			 size_t seqNum2, char strand, const uchar* seqData2,
+			 size_t seqNum2, const uchar* seqData2,
 			 bool isTranslated, const Alphabet& alph,
 			 const LastEvaluer& evaluer,
 			 const AlignmentExtras& extras) const;
 
   AlignmentText writeBlastTab(const MultiSequence& seq1,
 			      const MultiSequence& seq2,
-			      size_t seqNum2, char strand,
-			      const uchar* seqData2,
+			      size_t seqNum2, const uchar* seqData2,
 			      bool isTranslated, const Alphabet& alph,
 			      const LastEvaluer& evaluer,
 			      bool isExtraColumns) const;
diff --git a/src/AlignmentWrite.cc b/src/AlignmentWrite.cc
index 51b7a32..010db57 100644
--- a/src/AlignmentWrite.cc
+++ b/src/AlignmentWrite.cc
@@ -90,21 +90,19 @@ private:
 
 AlignmentText Alignment::write(const MultiSequence& seq1,
 			       const MultiSequence& seq2,
-			       size_t seqNum2, char strand,
-			       const uchar* seqData2,
+			       size_t seqNum2, const uchar* seqData2,
 			       bool isTranslated, const Alphabet& alph,
 			       const LastEvaluer& evaluer, int format,
 			       const AlignmentExtras& extras) const {
   assert( !blocks.empty() );
 
   if( format == 'm' )
-    return writeMaf( seq1, seq2, seqNum2, strand, seqData2,
+    return writeMaf( seq1, seq2, seqNum2, seqData2,
 		     isTranslated, alph, evaluer, extras );
   if( format == 't' )
-    return writeTab( seq1, seq2, seqNum2, strand,
-		     isTranslated, evaluer, extras );
+    return writeTab( seq1, seq2, seqNum2, isTranslated, evaluer, extras );
   else
-    return writeBlastTab( seq1, seq2, seqNum2, strand, seqData2,
+    return writeBlastTab( seq1, seq2, seqNum2, seqData2,
 			  isTranslated, alph, evaluer, format == 'B' );
 }
 
@@ -178,8 +176,7 @@ static char* writeTags( const LastEvaluer& evaluer, double queryLength,
 
 AlignmentText Alignment::writeTab(const MultiSequence& seq1,
 				  const MultiSequence& seq2,
-				  size_t seqNum2, char strand,
-				  bool isTranslated,
+				  size_t seqNum2, bool isTranslated,
 				  const LastEvaluer& evaluer,
 				  const AlignmentExtras& extras) const {
   size_t alnBeg1 = beg1();
@@ -197,7 +194,9 @@ AlignmentText Alignment::writeTab(const MultiSequence& seq1,
 
   IntText sc(score);
   std::string n1 = seq1.seqName(seqNum1);
+  char strand1 = seq1.strand(seqNum1);
   std::string n2 = seq2.seqName(seqNum2);
+  char strand2 = seq2.strand(seqNum2);
   IntText b1(alnBeg1 - seqStart1);
   IntText b2(alnBeg2 - seqStart2);
   IntText r1(alnEnd1 - alnBeg1);
@@ -220,13 +219,13 @@ AlignmentText Alignment::writeTab(const MultiSequence& seq1,
   char *text = new char[textLen + 1];
   Writer w(text);
   w << sc << t;
-  w << n1 << t << b1 << t << r1 << t << '+'    << t << s1 << t;
-  w << n2 << t << b2 << t << r2 << t << strand << t << s2 << t;
+  w << n1 << t << b1 << t << r1 << t << strand1 << t << s1 << t;
+  w << n2 << t << b2 << t << r2 << t << strand2 << t << s2 << t;
   w.copy(&blockText[0] + blockText.size() - blockLen, blockLen);
   w.copy(tags, tagLen);
   w << '\0';
 
-  return AlignmentText(seqNum2, alnBeg2, alnEnd2, strand, score, 0, 0, text);
+  return AlignmentText(seqNum2, alnBeg2, alnEnd2, strand2, score, 0, 0, text);
 }
 
 static void putLeft(Writer &w, const std::string &t, size_t width) {
@@ -274,13 +273,6 @@ static void writeMafHeadQ(char *out,
   w.fill(qLineBlankLen, ' ');
 }
 
-// Write the first part of a "p" line:
-static void writeMafHeadP(char *out, size_t pLineBlankLen) {
-  Writer w(out);
-  w << 'p' << ' ';
-  w.fill(pLineBlankLen, ' ');
-}
-
 // Write a "c" line
 static void writeMafLineC(std::vector<char> &cLine,
 			  const std::vector<double> &counts) {
@@ -297,13 +289,12 @@ static void writeMafLineC(std::vector<char> &cLine,
 
 AlignmentText Alignment::writeMaf(const MultiSequence& seq1,
 				  const MultiSequence& seq2,
-				  size_t seqNum2, char strand,
-				  const uchar* seqData2,
+				  size_t seqNum2, const uchar* seqData2,
 				  bool isTranslated, const Alphabet& alph,
 				  const LastEvaluer& evaluer,
 				  const AlignmentExtras& extras) const {
   double fullScore = extras.fullScore;
-  const std::vector<uchar>& columnAmbiguityCodes = extras.columnAmbiguityCodes;
+  const std::vector<char>& columnAmbiguityCodes = extras.columnAmbiguityCodes;
 
   size_t alnBeg1 = beg1();
   size_t alnEnd1 = end1();
@@ -323,7 +314,9 @@ AlignmentText Alignment::writeMaf(const MultiSequence& seq1,
   size_t aLineLen = aLineEnd - aLine;
 
   const std::string n1 = seq1.seqName(seqNum1);
+  char strand1 = seq1.strand(seqNum1);
   const std::string n2 = seq2.seqName(seqNum2);
+  char strand2 = seq2.strand(seqNum2);
   IntText b1(alnBeg1 - seqStart1);
   IntText b2(alnBeg2 - seqStart2);
   IntText r1(alnEnd1 - alnBeg1);
@@ -356,7 +349,7 @@ AlignmentText Alignment::writeMaf(const MultiSequence& seq1,
 
   char *dest = std::copy(aLine, aLineEnd, text);
 
-  writeMafHeadS(dest, n1, nw, b1, bw, r1, rw, '+', s1, sw);
+  writeMafHeadS(dest, n1, nw, b1, bw, r1, rw, strand1, s1, sw);
   dest = writeTopSeq(seq1.seqReader(), alph, 0, frameSize2, dest + headLen);
   *dest++ = '\n';
 
@@ -367,7 +360,7 @@ AlignmentText Alignment::writeMaf(const MultiSequence& seq1,
     *dest++ = '\n';
   }
 
-  writeMafHeadS(dest, n2, nw, b2, bw, r2, rw, strand, s2, sw);
+  writeMafHeadS(dest, n2, nw, b2, bw, r2, rw, strand2, s2, sw);
   dest = writeBotSeq(seqData2, alph, 0, frameSize2, dest + headLen);
   *dest++ = '\n';
 
@@ -379,25 +372,25 @@ AlignmentText Alignment::writeMaf(const MultiSequence& seq1,
     *dest++ = '\n';
   }
 
+  Writer w(dest);
+
   if (!columnAmbiguityCodes.empty()) {
-    writeMafHeadP(dest, pLineBlankLen);
-    dest = copy(columnAmbiguityCodes.begin(),
-		columnAmbiguityCodes.end(), dest + headLen);
-    *dest++ = '\n';
+    w << 'p' << ' ';
+    w.fill(pLineBlankLen, ' ');
+    w.copy(&columnAmbiguityCodes[0], columnAmbiguityCodes.size());
+    w << '\n';
   }
 
-  dest = copy(cLine.begin(), cLine.end(), dest);
+  if (!cLine.empty()) w.copy(&cLine[0], cLine.size());
 
-  *dest++ = '\n';  // blank line afterwards
-  *dest++ = '\0';
+  w << '\n' << '\0';  // blank line afterwards
 
-  return AlignmentText(seqNum2, alnBeg2, alnEnd2, strand, score, 0, 0, text);
+  return AlignmentText(seqNum2, alnBeg2, alnEnd2, strand2, score, 0, 0, text);
 }
 
 AlignmentText Alignment::writeBlastTab(const MultiSequence& seq1,
 				       const MultiSequence& seq2,
-				       size_t seqNum2, char strand,
-				       const uchar* seqData2,
+				       size_t seqNum2, const uchar* seqData2,
 				       bool isTranslated, const Alphabet& alph,
 				       const LastEvaluer& evaluer,
 				       bool isExtraColumns) const {
@@ -406,6 +399,7 @@ AlignmentText Alignment::writeBlastTab(const MultiSequence& seq1,
   size_t seqNum1 = seq1.whichSequence(alnBeg1);
   size_t seqStart1 = seq1.seqBeg(seqNum1);
   size_t seqLen1 = seq1.seqLen(seqNum1);
+  char strand1 = seq1.strand(seqNum1);
 
   size_t size2 = seq2.padLen(seqNum2);
   size_t frameSize2 = isTranslated ? (size2 / 3) : 0;
@@ -413,6 +407,7 @@ AlignmentText Alignment::writeBlastTab(const MultiSequence& seq1,
   size_t alnEnd2 = aaToDna( end2(), frameSize2 );
   size_t seqStart2 = seq2.seqBeg(seqNum2) - seq2.padBeg(seqNum2);
   size_t seqLen2 = seq2.seqLen(seqNum2);
+  char strand2 = seq2.strand(seqNum2);
 
   size_t alnSize = numColumns( frameSize2 );
   size_t matches = matchCount( blocks, seq1.seqReader(), seqData2,
@@ -423,9 +418,14 @@ AlignmentText Alignment::writeBlastTab(const MultiSequence& seq1,
 
   size_t blastAlnBeg1 = alnBeg1 + 1;  // 1-based coordinate
   size_t blastAlnEnd1 = alnEnd1;
+  if (strand1 == '-') {
+    blastAlnBeg1 = seqStart1 + seqLen1 - alnBeg1;
+    blastAlnEnd1 = seqStart1 + seqLen1 - alnEnd1 + 1;  // 1-based coordinate
+    seqStart1 = 0;
+  }
   size_t blastAlnBeg2 = alnBeg2 + 1;  // 1-based coordinate
   size_t blastAlnEnd2 = alnEnd2;
-  if (strand == '-') {
+  if (strand2 == '-') {
     blastAlnBeg2 = size2 - alnBeg2;
     blastAlnEnd2 = size2 - alnEnd2 + 1;  // 1-based coordinate
     /*
@@ -474,7 +474,7 @@ AlignmentText Alignment::writeBlastTab(const MultiSequence& seq1,
   if (isExtraColumns)   w << t << s2 << t << s1 << t << sc;
   w << '\n' << '\0';
 
-  return AlignmentText(seqNum2, alnBeg2, alnEnd2, strand, score,
+  return AlignmentText(seqNum2, alnBeg2, alnEnd2, strand2, score,
 		       alnSize, matches, text);
 }
 
diff --git a/src/Centroid.cc b/src/Centroid.cc
index 36edd35..98303be 100644
--- a/src/Centroid.cc
+++ b/src/Centroid.cc
@@ -96,7 +96,6 @@ namespace cbrc{
   }
 
   void Centroid::initForwardMatrix(){
-    scale.assign ( numAntidiagonals + 2, 1.0 ); // scaling
     size_t n = xa.scoreEndIndex( numAntidiagonals );
 
     if ( fM.size() < n ) {
@@ -110,9 +109,6 @@ namespace cbrc{
   }
 
   void Centroid::initBackwardMatrix(){
-    mD.assign( numAntidiagonals + 2, 0.0 );
-    mI.assign( numAntidiagonals + 2, 0.0 );
-
     size_t n = xa.scoreEndIndex( numAntidiagonals );
     bM.assign( n, 0.0 );
     bD.assign( n, 0.0 );
@@ -120,51 +116,20 @@ namespace cbrc{
     bP.assign( n, 0.0 );
   }
 
-  void Centroid::initDecodingMatrix(){
-    X.resize( fM.size() );
-  }
-
-  void Centroid::updateScore( double score, size_t antiDiagonal, size_t cur ){
-    if( bestScore < score ){
-      bestScore = score;
-      bestAntiDiagonal = antiDiagonal;
-      bestPos1 = cur;
-    }
-  }
-
-  // xxx this will go wrong for non-delimiters with severe mismatch scores
-  static bool isDelimiter(uchar c, const double *expScores) {
-    return expScores[c] <= 0.0;
-  }
-
-  void Centroid::forward( const uchar* seq1, const uchar* seq2,
-			  size_t start1, size_t start2,
-			  bool isForward, int globality,
-			  const GeneralizedAffineGapCosts& gap ){
-
-    //std::cout << "[forward] start1=" << start1 << "," << "start2=" << start2 << "," << "isForward=" << isForward << std::endl;
-    seq1 += start1;
-    seq2 += start2;
-    const ExpMatrixRow* pssm = isPssm ? pssmExp2 + start2 : 0;
-    const int seqIncrement = isForward ? 1 : -1;
-
+  void Centroid::forward(const uchar* seq1, const uchar* seq2,
+			 const ExpMatrixRow* pssm, bool isExtendFwd,
+			 const GeneralizedAffineGapCosts& gap,
+			 int globality) {
     initForwardMatrix();
-
-    double Z = 0.0;  // partion function of forward values
-
+    const int seqIncrement = isExtendFwd ? 1 : -1;
     const bool isAffine = gap.isAffine();
-    const int E = gap.delExtend;
-    const int F = gap.delExist;
-    const int EI = gap.insExtend;
-    const int FI = gap.insExist;
-    const int P = gap.pairExtend;
-    const double eE = EXP ( - E / T );
-    const double eF = EXP ( - F / T );
-    const double eEI = EXP ( - EI / T );
-    const double eFI = EXP ( - FI / T );
-    const double eP = EXP ( - P / T );
-
+    const double eE = EXP ( - gap.delExtend / T );
+    const double eF = EXP ( - gap.delExist / T );
+    const double eEI = EXP ( - gap.insExtend / T );
+    const double eFI = EXP ( - gap.insExist / T );
+    const double eP = EXP ( - gap.pairExtend / T );
     assert( gap.insExist == gap.delExist || eP <= 0.0 );
+    double Z = 0.0;  // partion function of forward values
 
     for( size_t k = 0; k < numAntidiagonals; ++k ){  // loop over antidiagonals
       const size_t seq1beg = seq1start( k );
@@ -193,26 +158,28 @@ namespace cbrc{
 
       const double* fM0last = fM0 + xa.numCellsAndPads( k ) - 1;
 
-      const uchar* s1 = seqPtr( seq1, isForward, seq1beg );
+      const uchar* s1 = isExtendFwd ? seq1 + seq1beg : seq1 - seq1beg;
 
       *fM0++ = *fD0++ = *fI0++ = *fP0++ = 0.0;  // add one pad cell
 
       if (! isPssm) {
-	const uchar* s2 = seqPtr( seq2, isForward, seq2pos );
+	const uchar* s2 = isExtendFwd ? seq2 + seq2pos : seq2 - seq2pos;
 
 	if (isAffine) {
 	  while (1) {
 	    const double xM = *fM2 * scale12;
 	    const double xD = *fD1 * seE;
 	    const double xI = *fI1 * seEI;
+	    const unsigned letter1 = *s1;
+	    const unsigned letter2 = *s2;
+	    const double matchProb = match_score[letter1][letter2];
 	    *fD0 = xM * eF + xD;
 	    *fI0 = (xM + xD) * eFI + xI;
-	    *fM0 = (xM + xD + xI) * match_score[ *s1 ][ *s2 ];
+	    const double total = xM + xD + xI;
+	    *fM0 = total * matchProb;
 	    sum_f += xM;
-	    if( globality && (isDelimiter(*s2, *match_score) ||
-			      isDelimiter(*s1, *match_score)) ){
-	      Z += xM + xD + xI;
-	    }
+	    if (globality && matchProb <= 0) Z += total;  // xxx
+
 	    if (fM0 == fM0last) break;
 	    fM0++; fD0++; fI0++;
 	    fM2++; fD1++; fI1++;
@@ -225,15 +192,17 @@ namespace cbrc{
 	    const double xD = *fD1 * seE;
 	    const double xI = *fI1 * seEI;
 	    const double xP = *fP2 * seP;
-	    *fD0 = xM * eF + xD + xP;
-	    *fI0 = (xM + xD) * eFI + xI + xP;
-	    *fM0 = (xM + xD + xI + xP) * match_score[ *s1 ][ *s2 ];
+	    const unsigned letter1 = *s1;
+	    const unsigned letter2 = *s2;
+	    const double matchProb = match_score[letter1][letter2];
 	    *fP0 = xM * eF + xP;
+	    *fD0 = xM * eF + xP + xD;
+	    *fI0 = (xM + xD) * eFI + (xI + xP);
+	    const double total = (xM + xD) + (xI + xP);
+	    *fM0 = total * matchProb;
 	    sum_f += xM;
-	    if( globality && (isDelimiter(*s2, *match_score) ||
-			      isDelimiter(*s1, *match_score)) ){
-	      Z += xM + xD + xI + xP;
-	    }
+	    if (globality && matchProb <= 0) Z += total;  // xxx
+
 	    if (fM0 == fM0last) break;
 	    fM0++; fD0++; fI0++; fP0++;
 	    fM2++; fD1++; fI1++; fP2++;
@@ -242,21 +211,23 @@ namespace cbrc{
 	  }
 	}
       } else {
-	const ExpMatrixRow* p2 = seqPtr( pssm, isForward, seq2pos );
+	const ExpMatrixRow* p2 = isExtendFwd ? pssm + seq2pos : pssm - seq2pos;
 
 	if (isAffine) {
 	  while (1) {
 	    const double xM = *fM2 * scale12;
 	    const double xD = *fD1 * seE;
 	    const double xI = *fI1 * seEI;
+	    const unsigned letter1 = *s1;
+	    const double *matchProbs = *p2;
+	    const double matchProb = matchProbs[letter1];
 	    *fD0 = xM * eF + xD;
 	    *fI0 = (xM + xD) * eFI + xI;
-	    *fM0 = (xM + xD + xI) * (*p2)[ *s1 ];
+	    const double total = xM + xD + xI;
+	    *fM0 = total * matchProb;
 	    sum_f += xM;
-	    if ( globality && (isDelimiter(0, *p2) ||
-			       isDelimiter(*s1, *pssm)) ){
-	      Z += xM + xD + xI;
-	    }
+	    if (globality && matchProb <= 0) Z += total;  // xxx
+
 	    if (fM0 == fM0last) break;
 	    fM0++; fD0++; fI0++;
 	    fM2++; fD1++; fI1++;
@@ -269,15 +240,17 @@ namespace cbrc{
 	    const double xD = *fD1 * seE;
 	    const double xI = *fI1 * seEI;
 	    const double xP = *fP2 * seP;
-	    *fD0 = xM * eF + xD + xP;
-	    *fI0 = (xM + xD) * eFI + xI + xP;
-	    *fM0 = (xM + xD + xI + xP) * (*p2)[ *s1 ];
+	    const unsigned letter1 = *s1;
+	    const double *matchProbs = *p2;
+	    const double matchProb = matchProbs[letter1];
 	    *fP0 = xM * eF + xP;
+	    *fD0 = xM * eF + xP + xD;
+	    *fI0 = (xM + xD) * eFI + (xI + xP);
+	    const double total = (xM + xD) + (xI + xP);
+	    *fM0 = total * matchProb;
 	    sum_f += xM;
-	    if ( globality && (isDelimiter(0, *p2) ||
-			       isDelimiter(*s1, *pssm)) ){
-	      Z += xM + xD + xI + xP;
-	    }
+	    if (globality && matchProb <= 0) Z += total;  // xxx
+
 	    if (fM0 == fM0last) break;
 	    fM0++; fD0++; fI0++; fP0++;
 	    fM2++; fD1++; fI1++; fP2++;
@@ -299,33 +272,20 @@ namespace cbrc{
 
   // added by M. Hamada
   // compute posterior probabilities while executing backward algorithm
-  void Centroid::backward( const uchar* seq1, const uchar* seq2,
-			   size_t start1, size_t start2,
-			   bool isForward, int globality,
-			   const GeneralizedAffineGapCosts& gap ){
-
-    //std::cout << "[backward] start1=" << start1 << "," << "start2=" << start2 << "," << "isForward=" << isForward << std::endl;
-    seq1 += start1;
-    seq2 += start2;
-    const ExpMatrixRow* pssm = isPssm ? pssmExp2 + start2 : 0;
-    const int seqIncrement = isForward ? 1 : -1;
-
+  void Centroid::backward(const uchar* seq1, const uchar* seq2,
+			  const ExpMatrixRow* pssm, bool isExtendFwd,
+			  const GeneralizedAffineGapCosts& gap,
+			  int globality) {
     initBackwardMatrix();
-
+    const int seqIncrement = isExtendFwd ? 1 : -1;
     const bool isAffine = gap.isAffine();
-    const int E = gap.delExtend;
-    const int F = gap.delExist;
-    const int EI = gap.insExtend;
-    const int FI = gap.insExist;
-    const int P = gap.pairExtend;
-    const double eE = EXP ( - E / T );
-    const double eF = EXP ( - F / T );
-    const double eEI = EXP ( - EI / T );
-    const double eFI = EXP ( - FI / T );
-    const double eP = EXP ( - P / T );
-    double scaledUnit = 1.0;
-
+    const double eE = EXP ( - gap.delExtend / T );
+    const double eF = EXP ( - gap.delExist / T );
+    const double eEI = EXP ( - gap.insExtend / T );
+    const double eFI = EXP ( - gap.insExist / T );
+    const double eP = EXP ( - gap.pairExtend / T );
     assert( gap.insExist == gap.delExist || eP <= 0.0 );
+    double scaledUnit = 1.0;
 
     for( size_t k = numAntidiagonals; k-- > 0; ){
       const size_t seq1beg = seq1start( k );
@@ -361,22 +321,24 @@ namespace cbrc{
       double* mDout = &mD[ seq1beg ];
       double* mIout = &mI[ seq2pos ];
 
-      const uchar* s1 = seqPtr( seq1, isForward, seq1beg );
+      const uchar *s1 = isExtendFwd ? seq1 + seq1beg : seq1 - seq1beg;
 
       if (! isPssm ) {
-	const uchar* s2 = seqPtr( seq2, isForward, seq2pos );
+	const uchar *s2 = isExtendFwd ? seq2 + seq2pos : seq2 - seq2pos;
 
 	if (isAffine) {
 	  while (1) {
-	    double yM = *bM0 * match_score[ *s1 ][ *s2 ];
-	    double yD = *bD0;
-	    double yI = *bI0;
+	    const double matchProb = match_score[*s1][*s2];
+	    const double yM = (*bM0) * matchProb;
+	    const double yD = *bD0;
+	    const double yI = *bI0;
 	    double zM = yM + yD * eF + yI * eFI;
 	    double zD = yM + yD + yI * eFI;
 	    double zI = yM + yI;
 	    if( globality ){
-	      if( isDelimiter(*s2, *match_score) ||
-		  isDelimiter(*s1, *match_score) ){
+	      if( matchProb <= 0 ){
+		// xxx should get here only at delimiters, but will get
+		// here for non-delimiters with severe mismatch scores
 		zM += scaledUnit;  zD += scaledUnit;  zI += scaledUnit;
 	      }
 	    }else{
@@ -399,17 +361,17 @@ namespace cbrc{
 	  }
 	} else {
 	  while (1) {
-	    double yM = *bM0 * match_score[ *s1 ][ *s2 ];
-	    double yD = *bD0;
-	    double yI = *bI0;
-	    double yP = *bP0;
+	    const double matchProb = match_score[*s1][*s2];
+	    const double yM = (*bM0) * matchProb;
+	    const double yD = *bD0;
+	    const double yI = *bI0;
+	    const double yP = *bP0;
 	    double zM = yM + yD * eF + yI * eFI + yP * eF;
 	    double zD = yM + yD + yI * eFI;
 	    double zI = yM + yI;
-	    double zP = yM + yP + yD + yI;
+	    double zP = yM + yD + yI + yP;
 	    if( globality ){
-	      if( isDelimiter(*s2, *match_score) ||
-		  isDelimiter(*s1, *match_score) ){
+	      if( matchProb <= 0 ){  // xxx
 		zM += scaledUnit;  zD += scaledUnit;  zI += scaledUnit;
 		zP += scaledUnit;
 	      }
@@ -435,19 +397,19 @@ namespace cbrc{
 	  }
 	}
       } else {
-	const ExpMatrixRow* p2 = seqPtr( pssm, isForward, seq2pos );
+	const ExpMatrixRow *p2 = isExtendFwd ? pssm + seq2pos : pssm - seq2pos;
 
 	if (isAffine) {
 	  while (1) {
-	    double yM = *bM0 * ( *p2 )[ *s1 ];
-	    double yD = *bD0;
-	    double yI = *bI0;
+	    const double matchProb = (*p2)[*s1];
+	    const double yM = (*bM0) * matchProb;
+	    const double yD = *bD0;
+	    const double yI = *bI0;
 	    double zM = yM + yD * eF + yI * eFI;
 	    double zD = yM + yD + yI * eFI;
 	    double zI = yM + yI;
 	    if( globality ){
-	      if( isDelimiter(0, *p2) ||
-		  isDelimiter(*s1, *pssm) ){
+	      if( matchProb <= 0 ){  // xxx
 		zM += scaledUnit;  zD += scaledUnit;  zI += scaledUnit;
 	      }
 	    }else{
@@ -470,17 +432,17 @@ namespace cbrc{
 	  }
 	} else {
 	  while (1) {
-	    double yM = *bM0 * ( *p2 )[ *s1 ];
-	    double yD = *bD0;
-	    double yI = *bI0;
-	    double yP = *bP0;
+	    const double matchProb = (*p2)[*s1];
+	    const double yM = (*bM0) * matchProb;
+	    const double yD = *bD0;
+	    const double yI = *bI0;
+	    const double yP = *bP0;
 	    double zM = yM + yD * eF + yI * eFI + yP * eF;
 	    double zD = yM + yD + yI * eFI;
 	    double zI = yM + yI;
-	    double zP = yM + yP + yD + yI;
+	    double zP = yM + yD + yI + yP;
 	    if( globality ){
-	      if( isDelimiter(0, *p2) ||
-		  isDelimiter(*s1, *pssm) ){
+	      if( matchProb <= 0 ){  // xxx
 		zM += scaledUnit;  zD += scaledUnit;  zI += scaledUnit;
 		zP += scaledUnit;
 	      }
@@ -509,27 +471,19 @@ namespace cbrc{
     }
 
     //std::cout << "# bM[0]=" << bM[0] << std::endl;
-    //ExpectedCount ec;
-    //computeExpectedCounts ( seq1, seq2, start1, start2, isForward, gap, ec );
-    //ec.write (std::cerr);
   }
 
   double Centroid::dp( double gamma ){
-    if (outputType == 5 ) return dp_centroid( gamma );
-    else if (outputType == 6 ) return dp_ama (gamma);
+    bestScore = 0;
+    bestAntiDiagonal = 0;
+    bestPos1 = 0;
+    X.resize(fM.size());
+    if (outputType == 5) return dp_centroid(gamma);
+    if (outputType == 6) return dp_ama(gamma);
     return 0;
   }
 
-  void Centroid::traceback( std::vector< SegmentPair >& chunks,
-			    double gamma ) const{
-    if (outputType==5) traceback_centroid( chunks, gamma );
-    else if (outputType==6) traceback_ama( chunks, gamma);
-  }
-
   double Centroid::dp_centroid( double gamma ){
-
-    initDecodingMatrix();
-
     for( size_t k = 1; k < numAntidiagonals; ++k ){  // loop over antidiagonals
       const size_t scoreEnd = xa.scoreEndIndex( k );
       double* X0 = &X[ scoreEnd ];
@@ -589,9 +543,6 @@ namespace cbrc{
   }
 
   double Centroid::dp_ama( double gamma ){
-
-    initDecodingMatrix();
-
     mX1.assign ( numAntidiagonals + 2, 1.0 );
     mX2.assign ( numAntidiagonals + 2, 1.0 );
 
@@ -684,7 +635,7 @@ namespace cbrc{
   // Return an ASCII code representing an error probability.  The
   // printable codes are 33--126, but here we use a maximum of 125, so
   // that 126 is reserved for special cases.
-  static uchar asciiProbability( double probCorrect ){
+  static char asciiProbability( double probCorrect ){
     assert( probCorrect >= 0 );
     //assert( probCorrect <= 1 );  // can fail: floating point is imperfect
     double e = 1 - probCorrect;
@@ -693,10 +644,10 @@ namespace cbrc{
     int i = static_cast<int>(g);  // round fractions down
     int j = i + 33;
     int k = std::min( j, 125 );
-    return static_cast<uchar>(k);
+    return static_cast<char>(k);
   }
 
-  void Centroid::getMatchAmbiguities(std::vector<uchar>& ambiguityCodes,
+  void Centroid::getMatchAmbiguities(std::vector<char>& ambiguityCodes,
 				     size_t seq1end, size_t seq2end,
 				     size_t length) const {
     while (length) {
@@ -707,13 +658,13 @@ namespace cbrc{
     }
   }
 
-  void Centroid::getDeleteAmbiguities(std::vector<uchar>& ambiguityCodes,
+  void Centroid::getDeleteAmbiguities(std::vector<char>& ambiguityCodes,
 				      size_t seq1end, size_t seq1beg) const {
     for (size_t i = seq1end; i > seq1beg; --i)
       ambiguityCodes.push_back(asciiProbability(mD[i]));
   }
 
-  void Centroid::getInsertAmbiguities(std::vector<uchar>& ambiguityCodes,
+  void Centroid::getInsertAmbiguities(std::vector<char>& ambiguityCodes,
 				      size_t seq2end, size_t seq2beg) const {
     for (size_t i = seq2end; i > seq2beg; --i)
       ambiguityCodes.push_back(asciiProbability(mI[i]));
@@ -729,25 +680,20 @@ namespace cbrc{
 
   void Centroid::computeExpectedCounts ( const uchar* seq1, const uchar* seq2,
 					 size_t start1, size_t start2,
-					 bool isForward,
+					 bool isExtendFwd,
 					 const GeneralizedAffineGapCosts& gap,
 					 ExpectedCount& c ) const{
     seq1 += start1;
     seq2 += start2;
     const ExpMatrixRow* pssm = isPssm ? pssmExp2 + start2 : 0;
-    const int seqIncrement = isForward ? 1 : -1;
+    const int seqIncrement = isExtendFwd ? 1 : -1;
 
     const bool isAffine = gap.isAffine();
-    const int E = gap.delExtend;
-    const int F = gap.delExist;
-    const int EI = gap.insExtend;
-    const int FI = gap.insExist;
-    const int P = gap.pairExtend;
-    const double eE = EXP ( - E / T );
-    const double eF = EXP ( - F / T );
-    const double eEI = EXP ( - EI / T );
-    const double eFI = EXP ( - FI / T );
-    const double eP = EXP ( - P / T );
+    const double eE = EXP ( - gap.delExtend / T );
+    const double eF = EXP ( - gap.delExist / T );
+    const double eEI = EXP ( - gap.insExtend / T );
+    const double eFI = EXP ( - gap.insExist / T );
+    const double eP = EXP ( - gap.pairExtend / T );
 
     assert( gap.insExist == gap.delExist || eP <= 0.0 );
 
@@ -761,8 +707,8 @@ namespace cbrc{
       const double seEI = eEI * scale1;
       const double seP = eP * scale12;
 
-      const uchar* s1 = seqPtr( seq1, isForward, seq1beg );
-      const uchar* s2 = seqPtr( seq2, isForward, seq2pos );
+      const uchar* s1 = isExtendFwd ? seq1 + seq1beg : seq1 - seq1beg;
+      const uchar* s2 = isExtendFwd ? seq2 + seq2pos : seq2 - seq2pos;
 
       const size_t scoreEnd = xa.scoreEndIndex( k );
       const double* bM0 = &bM[ scoreEnd + 1 ];
@@ -786,10 +732,12 @@ namespace cbrc{
 	    const double xM = *fM2 * scale12;
 	    const double xD = *fD1 * seE;
 	    const double xI = *fI1 * seEI;
-	    const double yM = *bM0 * match_score[ *s1 ][ *s2 ];
+	    const unsigned letter1 = *s1;
+	    const unsigned letter2 = *s2;
+	    const double yM = *bM0 * match_score[letter1][letter2];
 	    const double yD = *bD0;
 	    const double yI = *bI0;
-	    c.emit[*s1][*s2] += (xM + xD + xI) * yM;
+	    c.emit[letter1][letter2] += (xM + xD + xI) * yM;
 	    c.MM += xM * yM;
 	    c.DM += xD * yM;
 	    c.IM += xI * yM;
@@ -810,11 +758,13 @@ namespace cbrc{
 	    const double xD = *fD1 * seE;
 	    const double xI = *fI1 * seEI;
 	    const double xP = *fP2 * seP;
-	    const double yM = *bM0 * match_score[ *s1 ][ *s2 ];
+	    const unsigned letter1 = *s1;
+	    const unsigned letter2 = *s2;
+	    const double yM = *bM0 * match_score[letter1][letter2];
 	    const double yD = *bD0;
 	    const double yI = *bI0;
 	    const double yP = *bP0;
-	    c.emit[*s1][*s2] += (xM + xD + xI + xP) * yM;
+	    c.emit[letter1][letter2] += (xM + xD + xI + xP) * yM;
 	    c.MM += xM * yM;
 	    c.DM += xD * yM;
 	    c.IM += xI * yM;
@@ -837,17 +787,18 @@ namespace cbrc{
 	}
       }
       else {
-	const ExpMatrixRow* p2 = seqPtr( pssm, isForward, seq2pos );
+	const ExpMatrixRow* p2 = isExtendFwd ? pssm + seq2pos : pssm - seq2pos;
 
 	if (isAffine) {
 	  while (1) { // inner most loop
 	    const double xM = *fM2 * scale12;
 	    const double xD = *fD1 * seE;
 	    const double xI = *fI1 * seEI;
-	    const double yM = *bM0 * ( *p2 )[ *s1 ];
+	    const unsigned letter1 = *s1;
+	    const double yM = *bM0 * (*p2)[letter1];
 	    const double yD = *bD0;
 	    const double yI = *bI0;
-	    c.emit[*s1][*s2] += (xM + xD + xI) * yM;  // xxx
+	    c.emit[letter1][*s2] += (xM + xD + xI) * yM;  // xxx
 	    c.MM += xM * yM;
 	    c.DM += xD * yM;
 	    c.IM += xI * yM;
@@ -869,11 +820,12 @@ namespace cbrc{
 	    const double xD = *fD1 * seE;
 	    const double xI = *fI1 * seEI;
 	    const double xP = *fP2 * seP;
-	    const double yM = *bM0 * ( *p2 )[ *s1 ];
+	    const unsigned letter1 = *s1;
+	    const double yM = *bM0 * (*p2)[letter1];
 	    const double yD = *bD0;
 	    const double yI = *bI0;
 	    const double yP = *bP0;
-	    c.emit[*s1][*s2] += (xM + xD + xI + xP) * yM;  // xxx
+	    c.emit[letter1][*s2] += (xM + xD + xI + xP) * yM;  // xxx
 	    c.MM += xM * yM;
 	    c.DM += xD * yM;
 	    c.IM += xI * yM;
diff --git a/src/Centroid.hh b/src/Centroid.hh
index 9c3b8f5..afb7603 100644
--- a/src/Centroid.hh
+++ b/src/Centroid.hh
@@ -25,6 +25,7 @@ namespace cbrc{
     ExpectedCount ();
     std::ostream& write (std::ostream& os) const;
   };
+
   /**
    * (1) Forward and backward algorithm on the DP region given by Xdrop algorithm
    * (2) \gamma-centroid decoding
@@ -40,23 +41,31 @@ namespace cbrc{
                    const uchar* sequenceBeg, const uchar* qualityBeg );
     void setOutputType( int m ) { outputType = m; }
 
-    void reset( ) {
+    // start1 is the index of the first letter to look at in seq1
+    // start2 is the index of the first letter to look at in seq2
+
+    void doForwardBackwardAlgorithm(const uchar* seq1, const uchar* seq2,
+				    size_t start1, size_t start2,
+				    bool isExtendFwd,
+				    const GeneralizedAffineGapCosts& gap,
+				    int globality) {
+      seq1 += start1;
+      seq2 += start2;
+      const ExpMatrixRow *pssm = isPssm ? pssmExp2 + start2 : 0;
       numAntidiagonals = xa.numAntidiagonals();
-      bestScore = 0;
-      bestAntiDiagonal = 0;
-      bestPos1 =0;
+      scale.assign(numAntidiagonals + 2, 1.0);
+      forward(seq1, seq2, pssm, isExtendFwd, gap, globality);
+      mD.assign(numAntidiagonals + 2, 0.0);
+      mI.assign(numAntidiagonals + 2, 0.0);
+      backward(seq1, seq2, pssm, isExtendFwd, gap, globality);
     }
 
-    void forward( const uchar* seq1, const uchar* seq2,
-		  size_t start1, size_t start2, bool isForward,
-		  int globality, const GeneralizedAffineGapCosts& gap );
-
-    void backward( const uchar* seq1, const uchar* seq2,
-		   size_t start1, size_t start2, bool isForward,
-		   int globality, const GeneralizedAffineGapCosts& gap );
-
     double dp( double gamma );
-    void traceback( std::vector< SegmentPair >& chunks, double gamma ) const;
+
+    void traceback(std::vector<SegmentPair> &chunks, double gamma) const {
+      if (outputType==5) traceback_centroid(chunks, gamma);
+      if (outputType==6) traceback_ama(chunks, gamma);
+    }
 
     double dp_centroid( double gamma );
     void traceback_centroid( std::vector< SegmentPair >& chunks, double gamma ) const;
@@ -64,23 +73,23 @@ namespace cbrc{
     double dp_ama( double gamma );
     void traceback_ama( std::vector< SegmentPair >& chunks, double gamma ) const;
 
-    void getMatchAmbiguities(std::vector<uchar>& ambiguityCodes,
+    void getMatchAmbiguities(std::vector<char>& ambiguityCodes,
 			     size_t seq1end, size_t seq2end,
 			     size_t length) const;
 
-    void getDeleteAmbiguities(std::vector<uchar>& ambiguityCodes,
+    void getDeleteAmbiguities(std::vector<char>& ambiguityCodes,
 			      size_t seq1end, size_t seq1beg) const;
 
-    void getInsertAmbiguities(std::vector<uchar>& ambiguityCodes,
+    void getInsertAmbiguities(std::vector<char>& ambiguityCodes,
 			      size_t seq2end, size_t seq2beg) const;
 
     double logPartitionFunction() const;  // a.k.a. full score, forward score
 
     // Added by MH (2008/10/10) : compute expected counts for transitions and emissions
-    void computeExpectedCounts ( const uchar* seq1, const uchar* seq2,
-				 size_t start1, size_t start2, bool isForward,
-				 const GeneralizedAffineGapCosts& gap,
-				 ExpectedCount& count ) const;
+    void computeExpectedCounts(const uchar* seq1, const uchar* seq2,
+			       size_t start1, size_t start2, bool isExtendFwd,
+			       const GeneralizedAffineGapCosts& gap,
+			       ExpectedCount& count) const;
 
   private:
     typedef double ExpMatrixRow[scoreMatrixRowSize];
@@ -119,24 +128,30 @@ namespace cbrc{
     size_t bestAntiDiagonal;
     size_t bestPos1;
 
+    void forward(const uchar* seq1, const uchar* seq2,
+		 const ExpMatrixRow* pssm, bool isExtendFwd,
+		 const GeneralizedAffineGapCosts& gap, int globality);
+
+    void backward(const uchar* seq1, const uchar* seq2,
+		  const ExpMatrixRow* pssm, bool isExtendFwd,
+		  const GeneralizedAffineGapCosts& gap, int globality);
+
     void initForwardMatrix();
     void initBackwardMatrix();
-    void initDecodingMatrix();
 
-    void updateScore( double score, size_t antiDiagonal, size_t cur );
+    void updateScore(double score, size_t antiDiagonal, size_t cur) {
+      if (bestScore < score) {
+	bestScore = score;
+	bestAntiDiagonal = antiDiagonal;
+	bestPos1 = cur;
+      }
+    }
 
     // start of the x-drop region (i.e. number of skipped seq1 letters
     // before the x-drop region) for this antidiagonal
     size_t seq1start( size_t antidiagonal ) const {
       return xa.scoreEndIndex( antidiagonal ) - xa.scoreOrigin( antidiagonal );
     }
-
-    // get a pointer into a sequence, taking start and direction into account
-    template < typename T >
-    static const T* seqPtr( const T* seq, bool isForward, size_t pos ){
-      if( isForward ) return seq + pos;
-      else            return seq - pos - 1;
-    }
   };
 
 }
diff --git a/src/CyclicSubsetSeed.cc b/src/CyclicSubsetSeed.cc
index 8017fde..bc2f525 100644
--- a/src/CyclicSubsetSeed.cc
+++ b/src/CyclicSubsetSeed.cc
@@ -2,7 +2,7 @@
 
 #include "CyclicSubsetSeed.hh"
 #include "CyclicSubsetSeedData.hh"
-#include "io.hh"
+#include "zio.hh"
 #include "stringify.hh"
 #include <algorithm>  // sort
 #include <sstream>
@@ -23,7 +23,7 @@ std::string CyclicSubsetSeed::stringFromName( const std::string& name ){
     if( name == subsetSeeds[i].name )
       return subsetSeeds[i].text;
 
-  return slurp( name );
+  return slurp( name.c_str() );
 }
 
 std::string
@@ -122,8 +122,9 @@ void CyclicSubsetSeed::appendPosition( std::istream& inputLine,
     std::string subset;
 
     for( size_t i = 0; i < inputWord.size(); ++i ){
-      uchar upper = std::toupper( inputWord[i] );
-      uchar lower = std::tolower( inputWord[i] );
+      uchar c = inputWord[i];
+      uchar upper = std::toupper(c);
+      uchar lower = std::tolower(c);
       addLetter( &numbersToSubsets[0], upper, subsetNum, letterCode );
       subset += upper;
       if( !isMaskLowercase && lower != upper ){
diff --git a/src/LastEvaluer.cc b/src/LastEvaluer.cc
index 72b1dae..372123a 100644
--- a/src/LastEvaluer.cc
+++ b/src/LastEvaluer.cc
@@ -6,6 +6,7 @@
 
 #include <algorithm>
 #include <cstring>
+#include <iostream>
 
 #define COUNTOF(a) (sizeof (a) / sizeof *(a))
 
@@ -263,7 +264,8 @@ void LastEvaluer::init(const char *matrixName,
 		       int insEpen,
 		       int frameshiftCost,
 		       const GeneticCode &geneticCode,
-		       bool isStandardGeneticCode) {
+		       bool isStandardGeneticCode,
+		       int verbosity) {
   const double lambdaTolerance = 0.01;
   const double kTolerance = 0.05;
   const double maxMegabytes = 500;
@@ -359,14 +361,20 @@ void LastEvaluer::init(const char *matrixName,
       evaluer.set_gapped_computation_parameters_simplified(maxSeconds);
       for (int i = 0; ; ++i) {
 	double t = Sls::default_importance_sampling_temperature + 0.01 * i;
+	if (verbosity > 0) std::cerr << "try temperature=" << t << " ";
 	try {
 	  evaluer.initGapped(alphabetSize, &matrix[0],
 			     letterFreqs2, letterFreqs1,
 			     delOpen, delEpen, insOpen, insEpen,
 			     true, lambdaTolerance, kTolerance,
 			     0, maxMegabytes, randomSeed, t);
+	  if (verbosity > 0) std::cerr << "OK\n";
 	  break;
 	} catch (const Sls::error& e) {
+	  if (verbosity > 0) std::cerr << "NG\n";
+	  if (verbosity > 1) {
+	    std::cerr << "ALP: " << e.error_code << ": " << e.st;
+	  }
 	  if (i == 20) throw;
 	}
       }
diff --git a/src/LastEvaluer.hh b/src/LastEvaluer.hh
index eee084e..4b05d1f 100644
--- a/src/LastEvaluer.hh
+++ b/src/LastEvaluer.hh
@@ -50,7 +50,8 @@ public:
 	    int insEpen,
 	    int frameshiftCost,
 	    const GeneticCode &geneticCode,
-	    bool isStandardGeneticCode);
+	    bool isStandardGeneticCode,
+	    int verbosity);
 
   void setSearchSpace(double databaseLength,  // number of database letters
 		      double numOfStrands) {  // 1 or 2
diff --git a/src/LastdbArguments.cc b/src/LastdbArguments.cc
index 95060ec..66535f7 100644
--- a/src/LastdbArguments.cc
+++ b/src/LastdbArguments.cc
@@ -31,6 +31,7 @@ LastdbArguments::LastdbArguments() :
   tantanSetting(0),
   isCaseSensitive(false),
   seedPatterns(0),
+  strand(1),
   volumeSize(-1),
   indexStep(1),
   minimizerWindow(1),
@@ -66,9 +67,11 @@ Advanced Options (default settings):\n\
     + stringify(indexStep) + ")\n\
 -W: use \"minimum\" positions in sliding windows of W consecutive positions ("
     + stringify(minimizerWindow) + ")\n\
+-S: strand: 0=reverse, 1=forward, 2=both ("
+    + stringify(strand) + ")\n\
 -s: volume size (unlimited)\n\
 -Q: input format: 0=fasta, 1=fastq-sanger, 2=fastq-solexa, 3=fastq-illumina ("
-      + stringify(inputFormat) + ")\n\
+    + stringify(inputFormat) + ")\n\
 -P: number of parallel threads ("
     + stringify(numOfThreads) + ")\n\
 -m: seed pattern\n\
@@ -86,8 +89,10 @@ Report bugs to: last-align (ATmark) googlegroups (dot) com\n\
 LAST home page: http://last.cbrc.jp/\n\
 ";
 
+  static const char sOpts[] = "hVpR:cm:S:s:w:W:P:u:a:i:b:C:xvQ:";
+
   int c;
-  while( (c = myGetopt(argc, argv, "hVpR:cm:s:w:W:P:u:a:i:b:C:xvQ:")) != -1 ) {
+  while ((c = myGetopt(argc, argv, sOpts)) != -1) {
     switch(c){
     case 'h':
       std::cout << help;
@@ -113,6 +118,10 @@ LAST home page: http://last.cbrc.jp/\n\
     case 'm':
       seedPatterns.push_back(optarg);
       break;
+    case 'S':
+      unstringify( strand, optarg );
+      if( strand < 0 || strand > 2 ) badopt( c, optarg );
+      break;
     case 's':
       unstringifySize( volumeSize, optarg );
       break;
diff --git a/src/LastdbArguments.hh b/src/LastdbArguments.hh
index 4eb25db..95d7a72 100644
--- a/src/LastdbArguments.hh
+++ b/src/LastdbArguments.hh
@@ -34,6 +34,7 @@ struct LastdbArguments{
   int tantanSetting;
   bool isCaseSensitive;
   std::vector< std::string > seedPatterns;
+  int strand;
   size_t volumeSize;
   size_t indexStep;
   size_t minimizerWindow;
diff --git a/src/MultiSequence.cc b/src/MultiSequence.cc
index 1f1d05a..463ca8a 100644
--- a/src/MultiSequence.cc
+++ b/src/MultiSequence.cc
@@ -19,12 +19,21 @@ void MultiSequence::initForAppending( indexT padSizeIn ){
 }
 
 void MultiSequence::reinitForAppending(){
-  seq.v.erase( seq.v.begin(), seq.v.begin() + ends.v.back() - padSize );
-  names.v.erase( names.v.begin(),
-                 names.v.begin() + nameEnds.v[ finishedSequences() ] );
+  size_t n = finishedSequences();
+  size_t s = padBeg(n);
+
+  seq.v.erase(seq.v.begin(), seq.v.begin() + s);
+  names.v.erase(names.v.begin(), names.v.begin() + nameEnds.v[n]);
   ends.v.resize(1);
   nameEnds.v.resize(1);
   if( !names.v.empty() ) nameEnds.v.push_back( names.v.size() );
+
+  qualityScores.v.erase(qualityScores.v.begin(),
+			qualityScores.v.begin() + s * qualsPerLetter());
+
+  if (!pssm.empty()) {
+    pssm.erase(pssm.begin(), pssm.begin() + s * scoreMatrixRowSize);
+  }
 }
 
 void MultiSequence::fromFiles( const std::string& baseName, indexT seqCount,
@@ -91,34 +100,71 @@ MultiSequence::appendFromFasta( std::istream& stream, indexT maxSeqLen ){
     ++inpos;
   }
 
-  if( isFinishable(maxSeqLen) ) finish();
+  if (isRoomToAppendPad(maxSeqLen)) finish();
   return stream;
 }
 
-void MultiSequence::finish(){
-  assert( !isFinished() );
-  seq.v.insert( seq.v.end(), padSize, ' ' );
-  ends.v.push_back( seq.v.size() );
-  assert( ends.v.back() == seq.v.size() );
+MultiSequence::indexT MultiSequence::whichSequence( indexT coordinate ) const{
+  const indexT* u = std::upper_bound( ends.begin(), ends.end(), coordinate );
+  assert( u != ends.begin() && u != ends.end() );
+  return u - ends.begin() - 1;
 }
 
-void MultiSequence::unfinish(){
-  assert( isFinished() );
-  ends.v.pop_back();
-  seq.v.erase( seq.v.end() - padSize, seq.v.end() );
+static void reverseComplementPssm(int *beg, int *end,
+				  const uchar *complement) {
+  while (beg < end) {
+    end -= scoreMatrixRowSize;
+    for (unsigned i = 0; i < scoreMatrixRowSize; ++i) {
+      unsigned j = complement[i];
+      if (beg < end || i < j) std::swap(beg[i], end[j]);
+    }
+    beg += scoreMatrixRowSize;
+  }
 }
 
-bool MultiSequence::isFinishable( indexT maxSeqLen ) const{
-  return seq.v.size() + padSize <= maxSeqLen;
-}
+void MultiSequence::reverseComplementOneSequence(indexT seqNum,
+						 const uchar *complement) {
+  size_t b = seqBeg(seqNum);
+  size_t e = seqEnd(seqNum);
 
-MultiSequence::indexT MultiSequence::whichSequence( indexT coordinate ) const{
-  const indexT* u = std::upper_bound( ends.begin(), ends.end(), coordinate );
-  assert( u != ends.begin() && u != ends.end() );
-  return u - ends.begin() - 1;
+  uchar *s = seqWriter();
+  std::reverse(s + b, s + e);
+  for (size_t i = b; i < e; ++i) {
+    s[i] = complement[s[i]];
+  }
+
+  reverse(qualityScores.v.begin() + b * qualsPerLetter(),
+	  qualityScores.v.begin() + e * qualsPerLetter());
+
+  if (!pssm.empty()) {
+    int *p = &pssm[0];
+    reverseComplementPssm(p + b * scoreMatrixRowSize,
+			  p + e * scoreMatrixRowSize, complement);
+  }
+
+  char &strandChar = names.v[nameEnds.v[seqNum + 1] - 1];
+  strandChar = "\n\t"[strandChar == '\n'];
 }
 
-std::string MultiSequence::seqName( indexT seqNum ) const{
-  return std::string( names.begin() + nameEnds[ seqNum ],
-		      names.begin() + nameEnds[ seqNum + 1 ] );
+void MultiSequence::duplicateOneSequence(indexT seqNum) {
+  indexT nameBeg = nameEnds[seqNum];
+  indexT nameEnd = nameEnds[seqNum + 1];
+  for (indexT i = nameBeg; i < nameEnd; ++i) {
+    names.v.push_back(names.v[i]);
+  }
+  finishName();
+
+  size_t b = seqBeg(seqNum);
+  size_t e = padEnd(seqNum);
+
+  for (size_t i = b; i < e; ++i) {
+    seq.v.push_back(seq.v[i]);
+  }
+  ends.v.push_back(seq.v.size());
+
+  for (size_t i = b * qualsPerLetter(); i < e * qualsPerLetter(); ++i) {
+    qualityScores.v.push_back(qualityScores.v[i]);
+  }
+
+  assert(pssm.empty());  // implement this if & when needed
 }
diff --git a/src/MultiSequence.hh b/src/MultiSequence.hh
index 3876058..465634a 100644
--- a/src/MultiSequence.hh
+++ b/src/MultiSequence.hh
@@ -29,6 +29,12 @@ class MultiSequence{
   // re-initialize, but keep the last sequence if it is unfinished
   void reinitForAppending();
 
+  void eraseAllButTheLastSequence() {
+    ends.v.pop_back();
+    reinitForAppending();
+    ends.v.push_back(seq.v.size());
+  }
+
   // read seqCount finished sequences, and their names, from binary files
   void fromFiles( const std::string& baseName, indexT seqCount,
                   size_t qualitiesPerLetter );
@@ -53,21 +59,12 @@ class MultiSequence{
                                 const uchar* lettersToNumbers,
                                 bool isMaskLowercase );
 
-  // finish the last sequence: add final pad and end coordinate
-  void finish();
-
-  // unfinish the last sequence: remove final pad and end coordinate
-  void unfinish();
-
   // did we finish reading the last sequence?
   bool isFinished() const{ return ends.size() == nameEnds.size(); }
 
   // how many finished sequences are there?
   indexT finishedSequences() const{ return ends.size() - 1; }
 
-  // total length of finished sequences plus delimiters
-  indexT finishedSize() const{ return ends.back(); }
-
   // total length of finished and unfinished sequences plus delimiters
   size_t unfinishedSize() const{ return seq.size(); }
 
@@ -80,7 +77,21 @@ class MultiSequence{
   indexT padEnd( indexT seqNum ) const{ return ends[seqNum+1]; }
   indexT seqLen( indexT seqNum ) const{ return seqEnd(seqNum)-seqBeg(seqNum); }
   indexT padLen( indexT seqNum ) const{ return padEnd(seqNum)-padBeg(seqNum); }
-  std::string seqName( indexT seqNum ) const;
+
+  std::string seqName(indexT seqNum) const {
+    const char *n = names.begin();
+    indexT b = nameEnds[seqNum];
+    indexT e = nameEnds[seqNum + 1];
+    if (e > b && n[e-1] <= ' ') --e;
+    return std::string(n + b, n + e);
+  }
+
+  char strand(indexT seqNum) const {
+    const char *n = names.begin();
+    indexT b = nameEnds[seqNum];
+    indexT e = nameEnds[seqNum + 1];
+    return (e > b && n[e-1] == '\t') ? '-' : '+';
+  }
 
   // get a pointer to the start of the sequence data
   const uchar* seqReader() const{ return seq.begin(); }
@@ -94,19 +105,17 @@ class MultiSequence{
         : reinterpret_cast< const ScoreMatrixRow* >( &pssm[0] );
   }
 
-  /* */ ScoreMatrixRow* pssmWriter()      {
-    return pssm.empty() ? 0
-        : reinterpret_cast<       ScoreMatrixRow* >( &pssm[0] );
-  }
-
   // get a pointer to the start of the quality data
   const uchar* qualityReader() const{ return qualityScores.begin(); }
-  /***/ uchar* qualityWriter()      { return &qualityScores.v[0]; }
 
   // How many quality scores are there per letter?  There might be
   // none at all, one per letter, or several (e.g. 4) per letter.
   size_t qualsPerLetter() const { return qualityScoresPerLetter; }
 
+  void reverseComplementOneSequence(indexT seqNum, const uchar *complement);
+
+  void duplicateOneSequence(indexT seqNum);
+
  private:
   indexT padSize;  // number of delimiter chars between sequences
   VectorOrMmap<uchar> seq;  // concatenated sequences
@@ -140,57 +149,30 @@ class MultiSequence{
 
   void addName(const std::string &name) {  // add a new sequence name
     names.v.insert(names.v.end(), name.begin(), name.end());
+    names.v.push_back('\n');
     finishName();
   }
 
-  void finishQual() {  // add delimiter to the end of the quality scores
+  void appendQualPad() {  // add delimiter to the end of the quality scores
     uchar padQualityScore = 64;  // should never be used, but a valid value
     size_t s = padSize * qualityScoresPerLetter;
     qualityScores.v.insert(qualityScores.v.end(), s, padQualityScore);
   }
 
-  void finishPssm() {  // add delimiter to the end of the PSSM
+  void appendPssmPad() {  // add delimiter to the end of the PSSM
     pssm.insert(pssm.end(), padSize * scoreMatrixRowSize, -INF);
   }
 
-  // can we finish the last sequence and stay within the memory limit?
-  bool isFinishable( indexT maxSeqLen ) const;
-};
-
-inline void reverseComplementPssm(ScoreMatrixRow *beg, ScoreMatrixRow *end,
-				  const uchar *complement) {
-  while (beg < end) {
-    --end;
-    for (unsigned i = 0; i < scoreMatrixRowSize; ++i) {
-      unsigned j = complement[i];
-      if (beg < end || i < j) std::swap((*beg)[i], (*end)[j]);
-    }
-    ++beg;
+  bool isRoomToAppendPad(indexT maxSeqLen) const {
+    return seq.v.size() <= maxSeqLen && padSize <= maxSeqLen - seq.v.size();
   }
-}
-
-inline void reverseComplementOneSequence(MultiSequence &m,
-					 const uchar *complement,
-					 size_t seqNum) {
-  size_t b = m.seqBeg(seqNum);
-  size_t e = m.seqEnd(seqNum);
 
-  uchar *s = m.seqWriter();
-  std::reverse(s + b, s + e);
-  for (size_t i = b; i < e; ++i) {
-    s[i] = complement[s[i]];
-  }
-
-  size_t qpl = m.qualsPerLetter();
-  if (qpl) {
-    std::reverse(m.qualityWriter() + b * qpl, m.qualityWriter() + e * qpl);
-  }
-
-  ScoreMatrixRow *p = m.pssmWriter();
-  if (p) {
-    reverseComplementPssm(p + b, p + e, complement);
+  // finish the last sequence: add final pad and end coordinate
+  void finish() {
+    seq.v.insert(seq.v.end(), padSize, ' ');
+    ends.v.push_back(seq.v.size());
   }
-}
+};
 
 // Divide the sequences into a given number of roughly-equally-sized
 // chunks, and return the first sequence in the Nth chunk.
@@ -198,7 +180,7 @@ inline size_t firstSequenceInChunk(const MultiSequence &m,
 				   size_t numOfChunks, size_t chunkNum) {
   size_t numOfSeqs = m.finishedSequences();
   size_t beg = m.seqBeg(0);
-  size_t end = m.padEnd(numOfSeqs - 1) - 1;
+  size_t end = m.seqBeg(numOfSeqs) - 1;
   unsigned long long len = end - beg;  // try to avoid overflow
   size_t pos = beg + len * chunkNum / numOfChunks;
   size_t seqNum = m.whichSequence(pos);
diff --git a/src/MultiSequenceQual.cc b/src/MultiSequenceQual.cc
index 415fa14..d83f7da 100644
--- a/src/MultiSequenceQual.cc
+++ b/src/MultiSequenceQual.cc
@@ -6,9 +6,6 @@
 #include <cctype>  // toupper
 #include <limits>  // numeric_limits
 
-// make C++ tolerable:
-#define CI(type) std::vector<type>::const_iterator
-
 #define ERR(x) throw std::runtime_error(x)
 
 using namespace cbrc;
@@ -17,12 +14,7 @@ std::istream&
 MultiSequence::appendFromFastq( std::istream& stream, indexT maxSeqLen ){
   // initForAppending:
   qualityScoresPerLetter = 1;
-  if( qualityScores.v.empty() ) finishQual();
-
-  // reinitForAppending:
-  if( qualityScores.v.size() > seq.v.size() )
-    qualityScores.v.erase( qualityScores.v.begin(),
-                           qualityScores.v.end() - seq.v.size() );
+  if( qualityScores.v.empty() ) appendQualPad();
 
   if( isFinished() ){
     uchar c = '@';
@@ -46,9 +38,9 @@ MultiSequence::appendFromFastq( std::istream& stream, indexT maxSeqLen ){
     if( seq.v.size() != qualityScores.v.size() ) ERR( "bad FASTQ data" );
   }
 
-  if( isFinishable(maxSeqLen) ){
+  if (isRoomToAppendPad(maxSeqLen)) {
     finish();
-    finishQual();
+    appendQualPad();
   }
 
   return stream;
@@ -57,16 +49,9 @@ MultiSequence::appendFromFastq( std::istream& stream, indexT maxSeqLen ){
 std::istream&
 MultiSequence::appendFromPrb( std::istream& stream, indexT maxSeqLen,
 			      unsigned alphSize, const uchar decode[] ){
-  size_t qualSize = seq.v.size() * alphSize;
-
   // initForAppending:
   qualityScoresPerLetter = alphSize;
-  if( qualityScores.v.empty() ) finishQual();
-
-  // reinitForAppending:
-  if( qualityScores.v.size() > qualSize )
-    qualityScores.v.erase( qualityScores.v.begin(),
-                           qualityScores.v.end() - qualSize );
+  if( qualityScores.v.empty() ) appendQualPad();
 
   if( isFinished() ){
     std::string line;
@@ -78,6 +63,8 @@ MultiSequence::appendFromPrb( std::istream& stream, indexT maxSeqLen,
     std::string name = stringify( ++lineCount );
     addName(name);
 
+    size_t oldSize = qualityScores.v.size();
+
     std::istringstream iss(line);
     int q;
     while( iss >> q ){
@@ -86,18 +73,19 @@ MultiSequence::appendFromPrb( std::istream& stream, indexT maxSeqLen,
       qualityScores.v.push_back( q + 64 );  // ASCII-encode the quality score
     }
 
-    if( qualityScores.v.size() % alphSize != 0 ) ERR( "bad PRB data" );
+    size_t newSize = qualityScores.v.size();
+    if (newSize % qualityScoresPerLetter != 0) ERR("bad PRB data");
 
-    for( CI(uchar) i = qualityScores.v.begin() + qualSize;
-	 i < qualityScores.v.end(); i += alphSize ){
-      unsigned maxIndex = std::max_element( i, i + alphSize ) - i;
+    for (size_t i = oldSize; i < newSize; i += qualityScoresPerLetter) {
+      const uchar *q = &qualityScores.v[i];
+      unsigned maxIndex = std::max_element(q, q + qualityScoresPerLetter) - q;
       seq.v.push_back( decode[ maxIndex ] );
     }
   }
 
-  if( isFinishable(maxSeqLen) ){
+  if (isRoomToAppendPad(maxSeqLen)) {
     finish();
-    finishQual();
+    appendQualPad();
   }
 
   return stream;
@@ -118,7 +106,8 @@ std::istream& MultiSequence::readPssmHeader( std::istream& stream ){
 
     while( iss >> word ){
       if( word.size() == 1 ){
-        uchar letter = std::toupper( word[0] );
+	uchar c = word[0];
+        uchar letter = std::toupper(c);
         // allow for PSI-BLAST format, with repeated letters:
         if( pssmColumnLetters.size() && pssmColumnLetters[0] == letter ) break;
         pssmColumnLetters.push_back(letter);
@@ -140,14 +129,8 @@ std::istream&
 MultiSequence::appendFromPssm( std::istream& stream, indexT maxSeqLen,
                                const uchar* lettersToNumbers,
                                bool isMaskLowercase ){
-  size_t pssmSize = seq.v.size() * scoreMatrixRowSize;
-
   // initForAppending:
-  if( pssm.empty() ) finishPssm();
-
-  // reinitForAppending:
-  if( pssm.size() > pssmSize )
-    pssm.erase( pssm.begin(), pssm.end() - pssmSize );
+  if( pssm.empty() ) appendPssmPad();
 
   if( isFinished() ){
     readPssmHeader(stream);
@@ -181,16 +164,17 @@ MultiSequence::appendFromPssm( std::istream& stream, indexT maxSeqLen,
       if( isMaskLowercase ) scores[i] = std::min(scores[i], 0);
       if( maskColumn != column ) row[maskColumn] = scores[i];
     }
-    unsigned delimiterColumn = lettersToNumbers[' '];
+    uchar delimiter = ' ';
+    unsigned delimiterColumn = lettersToNumbers[delimiter];
     assert( delimiterColumn < scoreMatrixRowSize );
     row[delimiterColumn] = -INF;
 
     stream.ignore( std::numeric_limits<std::streamsize>::max(), '\n' );
   }
 
-  if( isFinishable(maxSeqLen) ){
+  if (isRoomToAppendPad(maxSeqLen)) {
     finish();
-    finishPssm();
+    appendPssmPad();
   }
 
   if( !stream.bad() ) stream.clear();
diff --git a/src/ScoreMatrix.cc b/src/ScoreMatrix.cc
index 8c68032..6f984ec 100644
--- a/src/ScoreMatrix.cc
+++ b/src/ScoreMatrix.cc
@@ -2,7 +2,7 @@
 
 #include "ScoreMatrix.hh"
 #include "ScoreMatrixData.hh"
-#include "io.hh"
+#include "zio.hh"
 #include <sstream>
 #include <iomanip>
 #include <algorithm>  // min, max
@@ -32,7 +32,7 @@ std::string ScoreMatrix::stringFromName( const std::string& name ){
     if( n == scoreMatrices[i].name )
       return scoreMatrices[i].text;
 
-  return slurp( n );
+  return slurp( n.c_str() );
 }
 
 void ScoreMatrix::matchMismatch( int match, int mismatch,
@@ -104,7 +104,8 @@ void ScoreMatrix::init( const uchar encode[] ){
   }
 
   // set a hugely negative score for the delimiter symbol:
-  uchar z = encode[' '];
+  uchar delimiter = ' ';
+  uchar z = encode[delimiter];
   assert( z < MAT );
   for( unsigned i = 0; i < MAT; ++i ){
     caseSensitive[z][i] = -INF;
diff --git a/src/alp/sls_pvalues.cpp b/src/alp/sls_pvalues.cpp
index 515a41c..2220477 100644
--- a/src/alp/sls_pvalues.cpp
+++ b/src/alp/sls_pvalues.cpp
@@ -1213,14 +1213,14 @@ ALP_set_of_parameters &gumbel_params_)
 bool pvalues::assert_Gumbel_parameters(
 const ALP_set_of_parameters &par_)//a set of Gumbel parameters
 {
-		if(par_.lambda<=0||
+		if(!(par_.lambda>0)||
 		par_.lambda_error<0||
 
 		//the parameters C and K_C are not necessary for the P-value calculation
 		//par_.C<0||
 		//par_.C_error<0||
 
-		par_.K<=0||
+		!(par_.K>0)||
 		par_.K_error<0||
 
 		par_.a_I<0||
diff --git a/src/io.cc b/src/io.cc
deleted file mode 100644
index f1abcce..0000000
--- a/src/io.cc
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2008, 2009, 2010 Martin C. Frith
-
-#include "io.hh"
-#include <iostream>
-#include <iterator>  // istreambuf_iterator
-
-namespace cbrc{
-
-std::istream& openIn( const std::string& fileName, mcf::izstream& ifs ){
-  if( fileName == "-" ) return std::cin;
-  ifs.open( fileName.c_str() );
-  if( !ifs ) throw std::runtime_error("can't open file: " + fileName);
-  return ifs;
-}
-
-std::string slurp( const std::string& fileName ){
-  mcf::izstream inFileStream;
-  std::istream& in = openIn( fileName, inFileStream );
-  std::istreambuf_iterator<char> beg(in), end; 
-  return std::string( beg, end );
-}
-
-}  // end namespace cbrc
diff --git a/src/io.hh b/src/io.hh
index c08cd8c..24eadf9 100644
--- a/src/io.hh
+++ b/src/io.hh
@@ -1,14 +1,8 @@
 // Copyright 2008, 2009, 2010 Martin C. Frith
 
-// Generally useful input/output functions, mostly for binary reading
-// and writing of vectors.
+#ifndef IO_HH
+#define IO_HH
 
-#ifndef IO_H
-#define IO_H
-
-#include "mcf_zstream.hh"
-
-#include <vector>
 #include <string>
 #include <stdexcept>
 #include <fstream>
@@ -16,30 +10,6 @@
 
 namespace cbrc{
 
-// open an input file, but if the name is "-", just return cin
-std::istream& openIn( const std::string& fileName, mcf::izstream& ifs );
-
-// read a file into a string, but if the name is "-", read cin
-std::string slurp( const std::string& fileName );
-
-template <typename T>  // T should be a vector-iterator or a pointer
-void memoryFromStream( T beg, T end, std::istream& s ){
-  assert( beg < end );
-  enum { CHUNK_SIZE = 1073741824 };  // need to do big reads in chunks: why?
-  char * b = reinterpret_cast<char*>(&(*beg));
-  char * e = reinterpret_cast<char*>(&(*end));
-  while( e - b > CHUNK_SIZE ){
-    s.read( b, CHUNK_SIZE );
-    b += CHUNK_SIZE;
-  }
-  s.read( b, e - b );
-}
-
-template<typename T>
-void vectorFromStream( std::vector<T>& v, std::istream& s ){
-  memoryFromStream( v.begin(), v.end(), s );
-}
-
 template<typename T>  // T should be a vector-iterator or a pointer
 void memoryToStream( T beg, T end, std::ostream& s ){
   assert( beg < end );
@@ -53,26 +23,6 @@ void memoryToStream( T beg, T end, std::ostream& s ){
   s.write( b, e - b );
 }
 
-template<typename T>
-void vectorToStream( const std::vector<T>& v, std::ostream& s ){
-  memoryToStream( v.begin(), v.end(), s );
-}
-
-template<typename T>  // T should be a vector-iterator or a pointer
-void memoryFromBinaryFile( T beg, T end, const std::string& fileName ){
-  if( beg == end ) return;
-  std::ifstream file( fileName.c_str(), std::ios::binary );
-  if( !file ) throw std::runtime_error( "can't open file: " + fileName );
-  memoryFromStream( beg, end, file );
-  if( !file ) throw std::runtime_error( "can't read file: " + fileName );
-}
-
-template<typename T>
-void vectorFromBinaryFile( std::vector<T>& v,
-			   const std::string& fileName ){
-  memoryFromBinaryFile( v.begin(), v.end(), fileName );
-}
-
 template<typename T>  // T should be a vector-iterator or a pointer
 void memoryToBinaryFile( T beg, T end, const std::string& fileName ){
   if( beg == end ) return;
@@ -83,11 +33,6 @@ void memoryToBinaryFile( T beg, T end, const std::string& fileName ){
   if( !file ) throw std::runtime_error( "can't write file: " + fileName );
 }
 
-template<typename T>
-void vectorToBinaryFile( const std::vector<T>& v,
-			 const std::string& fileName ){
-  memoryToBinaryFile( v.begin(), v.end(), fileName );
 }
 
-}  // end namespace cbrc
-#endif  // IO_H
+#endif
diff --git a/src/last-pair-probs.cc b/src/last-pair-probs.cc
index 21adc33..220d3d0 100644
--- a/src/last-pair-probs.cc
+++ b/src/last-pair-probs.cc
@@ -3,7 +3,7 @@
 
 #include "last-pair-probs.hh"
 
-#include "io.hh"
+#include "zio.hh"
 #include "stringify.hh"
 
 #include <algorithm>
@@ -12,7 +12,6 @@
 #include <cmath>
 #include <cstdlib>  // atof
 #include <cstring>  // strncmp
-#include <fstream>
 #include <iostream>
 #include <sstream>
 #include <stdexcept>
@@ -617,8 +616,10 @@ void lastPairProbs(LastPairProbsOptions& opts) {
 
   if (!opts.isFraglen || !opts.isSdev) {
     mcf::izstream inFile1, inFile2;
-    std::istream& in1 = (n > 0) ? cbrc::openIn(inputs[0], inFile1) : std::cin;
-    std::istream& in2 = (n > 1) ? cbrc::openIn(inputs[1], inFile2) : in1;
+    std::istream& in1 =
+      (n > 0) ? cbrc::openIn(inputs[0].c_str(), inFile1) : std::cin;
+    std::istream& in2 =
+      (n > 1) ? cbrc::openIn(inputs[1].c_str(), inFile2) : in1;
     if (n < 2) skipOneBatchMarker(in1);
     std::vector<long> lengths = readQueryPairs1pass(in1, in2, 1.0, 1.0,
 						    opts.circular);
@@ -627,8 +628,10 @@ void lastPairProbs(LastPairProbsOptions& opts) {
 
   if (!opts.estdist) {
     mcf::izstream inFile1, inFile2;
-    std::istream& in1 = (n > 0) ? cbrc::openIn(inputs[0], inFile1) : std::cin;
-    std::istream& in2 = (n > 1) ? cbrc::openIn(inputs[1], inFile2) : in1;
+    std::istream& in1 =
+      (n > 0) ? cbrc::openIn(inputs[0].c_str(), inFile1) : std::cin;
+    std::istream& in2 =
+      (n > 1) ? cbrc::openIn(inputs[1].c_str(), inFile2) : in1;
     AlignmentParameters params1 = readHeaderOrDie(in1);
     AlignmentParameters params2 = (n > 1) ? readHeaderOrDie(in2) : params1;
     calculateScorePieces(opts, params1, params2);
diff --git a/src/last.hh b/src/last.hh
new file mode 100644
index 0000000..6fd3f69
--- /dev/null
+++ b/src/last.hh
@@ -0,0 +1,55 @@
+// Copyright 2017 Martin C. Frith
+
+#ifndef LAST_HH
+#define LAST_HH
+
+#include "Alphabet.hh"
+#include "MultiSequence.hh"
+#include "SequenceFormat.hh"
+#include "qualityScoreUtil.hh"
+
+namespace cbrc {
+
+typedef MultiSequence::indexT indexT;
+
+inline void err(const char *s) { throw std::runtime_error(s); }
+
+// Read the next sequence, adding it to the MultiSequence
+inline std::istream &appendSequence(MultiSequence &m, std::istream &in,
+				    indexT maxSeqLen, sequenceFormat::Enum f,
+				    const Alphabet &a, bool isKeepLowercase,
+				    bool isMaskLowercase) {
+  if (m.finishedSequences() == 0) maxSeqLen = -1;
+
+  size_t oldSize = m.unfinishedSize();
+
+  if (f == sequenceFormat::fasta) {
+    m.appendFromFasta(in, maxSeqLen);
+  } else if (f == sequenceFormat::prb) {
+    m.appendFromPrb(in, maxSeqLen, a.size, a.decode);
+  } else if (f == sequenceFormat::pssm) {
+    m.appendFromPssm(in, maxSeqLen, a.encode, isMaskLowercase);
+  } else {
+    m.appendFromFastq(in, maxSeqLen);
+  }
+
+  if (!m.isFinished() && m.finishedSequences() == 0) {
+    err("encountered a sequence that's too long");
+  }
+
+  size_t newSize = m.unfinishedSize();
+
+  // encode the newly-read sequence
+  a.tr(m.seqWriter() + oldSize, m.seqWriter() + newSize, isKeepLowercase);
+
+  if (isPhred(f)) {
+    checkQualityCodes(m.qualityReader() + oldSize,
+		      m.qualityReader() + newSize, qualityOffset(f));
+  }  // assumes one quality code per letter
+
+  return in;
+}
+
+}
+
+#endif
diff --git a/src/lastal.cc b/src/lastal.cc
index 47bff6e..cda3415 100644
--- a/src/lastal.cc
+++ b/src/lastal.cc
@@ -2,11 +2,12 @@
 
 // BLAST-like pair-wise sequence alignment, using suffix arrays.
 
+#include "last.hh"
+
 #include "LastalArguments.hh"
 #include "QualityPssmMaker.hh"
 #include "OneQualityScoreMatrix.hh"
 #include "TwoQualityScoreMatrix.hh"
-#include "qualityScoreUtil.hh"
 #include "LambdaCalculator.hh"
 #include "LastEvaluer.hh"
 #include "GeneticCode.hh"
@@ -18,8 +19,6 @@
 #include "SegmentPairPot.hh"
 #include "SegmentPair.hh"
 #include "ScoreMatrix.hh"
-#include "Alphabet.hh"
-#include "MultiSequence.hh"
 #include "TantanMasker.hh"
 #include "DiagonalTable.hh"
 #include "GeneralizedAffineGapCosts.hh"
@@ -27,7 +26,7 @@
 #include "gaplessXdrop.hh"
 #include "gaplessPssmXdrop.hh"
 #include "gaplessTwoQualityXdrop.hh"
-#include "io.hh"
+#include "zio.hh"
 #include "stringify.hh"
 #include "threadUtil.hh"
 #include <iomanip>  // setw
@@ -54,7 +53,6 @@ struct LastAligner {  // data that changes between queries
 };
 
 namespace {
-  typedef MultiSequence::indexT indexT;
   typedef unsigned long long countT;
 
   LastalArguments args;
@@ -280,13 +278,12 @@ void calculateScoreStatistics( const std::string& matrixName,
                   p1, p2, isGapped,
                   gapCosts.delExist, gapCosts.delExtend,
                   gapCosts.insExist, gapCosts.insExtend,
-                  args.frameshiftCost, geneticCode, isStandardGeneticCode );
+                  args.frameshiftCost, geneticCode, isStandardGeneticCode,
+		  args.verbosity );
     evaluer.setSearchSpace( refLetters, args.numOfStrands() );
     if( args.verbosity > 0 ) evaluer.writeParameters( std::cerr );
   }catch( const Sls::error& e ){
     LOG( "can't get E-value parameters for this scoring scheme" );
-    if( args.verbosity > 1 )
-      std::cerr << "ALP: " << e.error_code << ": " << e.st;
   }
 }
 
@@ -526,9 +523,9 @@ static void printAndDelete(char *text) {
 }
 
 static void writeAlignment(LastAligner &aligner, const Alignment &aln,
-			   size_t queryNum, char strand, const uchar* querySeq,
+			   size_t queryNum, const uchar* querySeq,
 			   const AlignmentExtras &extras = AlignmentExtras()) {
-  AlignmentText a = aln.write(text, query, queryNum, strand, querySeq,
+  AlignmentText a = aln.write(text, query, queryNum, querySeq,
 			      args.isTranslated(), alph, evaluer,
 			      args.outputFormat, extras);
   if (isCollatedAlignments() || aligners.size() > 1)
@@ -538,11 +535,10 @@ static void writeAlignment(LastAligner &aligner, const Alignment &aln,
 }
 
 static void writeSegmentPair(LastAligner &aligner, const SegmentPair &s,
-			     size_t queryNum, char strand,
-			     const uchar* querySeq) {
+			     size_t queryNum, const uchar* querySeq) {
   Alignment a;
   a.fromSegmentPair(s);
-  writeAlignment(aligner, a, queryNum, strand, querySeq);
+  writeAlignment(aligner, a, queryNum, querySeq);
 }
 
 // Find query matches to the suffix array, and do gapless extensions
@@ -597,7 +593,7 @@ void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns,
 	  if( score < minScoreGapless ) continue;
 	  SegmentPair sp( j - revLen, i - revLen, revLen + fwdLen, score );
 	  dt.addEndpoint( sp.end2(), sp.end1() );
-	  writeSegmentPair( aligner, sp, queryNum, strand, querySeq );
+	  writeSegmentPair( aligner, sp, queryNum, querySeq );
 	}else{
 	  int fs = dis.forwardGaplessScore( j, i );
 	  int rs = dis.reverseGaplessScore( j, i );
@@ -616,7 +612,7 @@ void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns,
 	  dt.addEndpoint( sp.end2(), sp.end1() );
 
 	  if( args.outputType == 1 ){  // we just want gapless alignments
-	    writeSegmentPair( aligner, sp, queryNum, strand, querySeq );
+	    writeSegmentPair( aligner, sp, queryNum, querySeq );
 	  }
 	  else{
 	    gaplessAlns.add(sp);  // add the gapless alignment to the pot
@@ -747,7 +743,7 @@ void alignFinish( LastAligner& aligner, const AlignmentPot& gappedAlns,
   for( size_t i = 0; i < gappedAlns.size(); ++i ){
     const Alignment& aln = gappedAlns.items[i];
     if( args.outputType < 4 ){
-      writeAlignment( aligner, aln, queryNum, strand, querySeq );
+      writeAlignment( aligner, aln, queryNum, querySeq );
     }
     else{  // calculate match probabilities:
       Alignment probAln;
@@ -760,7 +756,7 @@ void alignFinish( LastAligner& aligner, const AlignmentPot& gappedAlns,
 			 dis.p, dis.t, dis.i, dis.j, alph, extras,
 			 args.gamma, args.outputType );
       assert( aln.score != -INF );
-      writeAlignment( aligner, probAln, queryNum, strand, querySeq, extras );
+      writeAlignment( aligner, probAln, queryNum, querySeq, extras );
     }
   }
 }
@@ -953,13 +949,13 @@ void translateAndScan( LastAligner& aligner, size_t queryNum, char strand ){
 static void alignOneQuery(LastAligner &aligner,
 			  size_t queryNum, bool isFirstVolume) {
   if (args.strand == 2 && !isFirstVolume)
-    reverseComplementOneSequence(query, queryAlph.complement, queryNum);
+    query.reverseComplementOneSequence(queryNum, queryAlph.complement);
 
   if (args.strand != 0)
     translateAndScan(aligner, queryNum, '+');
 
   if (args.strand == 2 || (args.strand == 0 && isFirstVolume))
-    reverseComplementOneSequence(query, queryAlph.complement, queryNum);
+    query.reverseComplementOneSequence(queryNum, queryAlph.complement);
 
   if (args.strand != 1)
     translateAndScan(aligner, queryNum, '-');
@@ -1098,40 +1094,6 @@ void writeHeader( countT refSequences, countT refLetters, std::ostream& out ){
   }
 }
 
-// Read the next sequence, adding it to the MultiSequence
-std::istream& appendFromFasta( std::istream& in ){
-  indexT maxSeqLen = args.batchSize;
-  if( maxSeqLen < args.batchSize ) maxSeqLen = indexT(-1);
-  if( query.finishedSequences() == 0 ) maxSeqLen = indexT(-1);
-
-  size_t oldSize = query.unfinishedSize();
-
-  /**/ if( args.inputFormat == sequenceFormat::fasta )
-    query.appendFromFasta( in, maxSeqLen );
-  else if( args.inputFormat == sequenceFormat::prb )
-    query.appendFromPrb( in, maxSeqLen, queryAlph.size, queryAlph.decode );
-  else if( args.inputFormat == sequenceFormat::pssm )
-    query.appendFromPssm( in, maxSeqLen, queryAlph.encode,
-                          args.maskLowercase > 1 );
-  else
-    query.appendFromFastq( in, maxSeqLen );
-
-  if( !query.isFinished() && query.finishedSequences() == 0 )
-    ERR( "encountered a sequence that's too long" );
-
-  // encode the newly-read sequence
-  uchar* seq = query.seqWriter();
-  size_t newSize = query.unfinishedSize();
-  queryAlph.tr( seq + oldSize, seq + newSize, args.isKeepLowercase );
-
-  if( isPhred( args.inputFormat ) )  // assumes one quality code per letter:
-    checkQualityCodes( query.qualityReader() + oldSize,
-                       query.qualityReader() + newSize,
-                       qualityOffset( args.inputFormat ) );
-
-  return in;
-}
-
 void lastal( int argc, char** argv ){
   args.fromArgs( argc, argv );
   args.resetCumulativeOptions();  // because we will do fromArgs again
@@ -1209,8 +1171,7 @@ void lastal( int argc, char** argv ){
   if( !isMultiVolume ) args.minScoreGapless = minScoreGapless;
   if( args.outputType > 0 ) makeQualityScorers();
 
-  queryAlph.tr( query.seqWriter(),
-                query.seqWriter() + query.unfinishedSize() );
+  queryAlph.tr(query.seqWriter(), query.seqWriter() + query.seqBeg(0));
 
   if( volumes+1 == 0 ) readIndex( args.lastdbName, refSequences );
 
@@ -1219,6 +1180,8 @@ void lastal( int argc, char** argv ){
   out.precision(3);  // print non-integers more compactly
   countT queryBatchCount = 0;
   countT sequenceCount = 0;
+  indexT maxSeqLen = args.batchSize;
+  if (maxSeqLen < args.batchSize) maxSeqLen = -1;
 
   char defaultInputName[] = "-";
   char* defaultInput[] = { defaultInputName, 0 };
@@ -1229,7 +1192,8 @@ void lastal( int argc, char** argv ){
     std::istream& in = openIn( *i, inFileStream );
     LOG( "reading " << *i << "..." );
 
-    while( appendFromFasta( in ) ){
+    while (appendSequence(query, in, maxSeqLen, args.inputFormat, queryAlph,
+			  args.isKeepLowercase, args.maskLowercase > 1)) {
       if( query.isFinished() ){
 	++sequenceCount;
       }else{
diff --git a/src/lastdb.cc b/src/lastdb.cc
index daea418..d20572b 100644
--- a/src/lastdb.cc
+++ b/src/lastdb.cc
@@ -3,13 +3,12 @@
 // Read fasta-format sequences; construct a suffix array of them; and
 // write the results to files.
 
+#include "last.hh"
+
 #include "LastdbArguments.hh"
 #include "SubsetSuffixArray.hh"
-#include "Alphabet.hh"
-#include "MultiSequence.hh"
 #include "TantanMasker.hh"
-#include "io.hh"
-#include "qualityScoreUtil.hh"
+#include "zio.hh"
 #include "stringify.hh"
 #include "threadUtil.hh"
 #include <stdexcept>
@@ -23,7 +22,6 @@
 
 using namespace cbrc;
 
-typedef MultiSequence::indexT indexT;
 typedef unsigned long long countT;
 
 // Set up an alphabet (e.g. DNA or protein), based on the user options
@@ -185,7 +183,7 @@ void makeVolume( std::vector< CyclicSubsetSeed >& seeds,
 		 const std::string& seedText, const std::string& baseName ){
   size_t numOfIndexes = seeds.size();
   size_t numOfSequences = multi.finishedSequences();
-  size_t textLength = multi.finishedSize();
+  size_t textLength = multi.seqBeg(numOfSequences);
   const uchar* seq = multi.seqReader();
 
   std::vector<countT> letterCountsInThisVolume(alph.size);
@@ -252,34 +250,11 @@ static indexT maxLettersPerVolume( const LastdbArguments& args,
   return z;
 }
 
-// Read the next sequence, adding it to the MultiSequence
-std::istream&
-appendFromFasta( MultiSequence& multi, indexT maxSeqLen,
-		 const LastdbArguments& args, const Alphabet& alph,
-		 std::istream& in ){
-  if( multi.finishedSequences() == 0 ) maxSeqLen = indexT(-1);
-
-  size_t oldSize = multi.unfinishedSize();
-
-  if ( args.inputFormat == sequenceFormat::fasta )
-    multi.appendFromFasta( in, maxSeqLen );
-  else
-    multi.appendFromFastq( in, maxSeqLen );
-
-  if( !multi.isFinished() && multi.finishedSequences() == 0 )
-    ERR( "encountered a sequence that's too long" );
-
-  // encode the newly-read sequence
-  uchar* seq = multi.seqWriter();
-  size_t newSize = multi.unfinishedSize();
-  alph.tr( seq + oldSize, seq + newSize, args.isKeepLowercase );
-
-  if( isPhred( args.inputFormat ) )  // assumes one quality code per letter:
-    checkQualityCodes( multi.qualityReader() + oldSize,
-                       multi.qualityReader() + newSize,
-                       qualityOffset( args.inputFormat ) );
-
-  return in;
+static bool isRoomToDuplicateTheLastSequence(const MultiSequence &multi,
+					     size_t maxSeqLen) {
+  size_t n = multi.finishedSequences();
+  size_t s = multi.seqBeg(n);
+  return s <= maxSeqLen && s - multi.seqBeg(n - 1) <= maxSeqLen - s;
 }
 
 void lastdb( int argc, char** argv ){
@@ -297,16 +272,16 @@ void lastdb( int argc, char** argv ){
   unsigned numOfThreads =
     decideNumberOfThreads(args.numOfThreads, args.programName, args.verbosity);
   Alphabet alph;
-  MultiSequence multi;
-  std::vector< CyclicSubsetSeed > seeds;
   makeAlphabet( alph, args );
   TantanMasker tantanMasker;
   if( args.tantanSetting )
     tantanMasker.init( alph.isProtein(), args.tantanSetting > 1,
 		       alph.letters, alph.encode );
+  std::vector< CyclicSubsetSeed > seeds;
   makeSubsetSeeds( seeds, seedText, args, alph );
+  MultiSequence multi;
   multi.initForAppending(1);
-  alph.tr( multi.seqWriter(), multi.seqWriter() + multi.unfinishedSize() );
+  alph.tr(multi.seqWriter(), multi.seqWriter() + multi.seqBeg(0));
   unsigned volumeNumber = 0;
   countT sequenceCount = 0;
   std::vector<countT> letterCounts( alph.size );
@@ -321,13 +296,31 @@ void lastdb( int argc, char** argv ){
     std::istream& in = openIn( *i, inFileStream );
     LOG( "reading " << *i << "..." );
 
-    while( appendFromFasta( multi, maxSeqLen, args, alph, in ) ){
+    while (appendSequence(multi, in, maxSeqLen, args.inputFormat, alph,
+			  args.isKeepLowercase, 0)) {
       if( !args.isProtein && args.userAlphabet.empty() &&
           sequenceCount == 0 && isDubiousDna( alph, multi ) ){
         std::cerr << args.programName << ": that's some funny-lookin DNA\n";
       }
 
       if( multi.isFinished() ){
+	if (args.strand != 1) {
+	  if (args.strand == 2) {
+	    ++sequenceCount;
+	    if (isRoomToDuplicateTheLastSequence(multi, maxSeqLen)) {
+	      size_t lastSeq = multi.finishedSequences() - 1;
+	      multi.duplicateOneSequence(lastSeq);
+	    } else {
+	      std::string baseName =
+		args.lastdbName + stringify(volumeNumber++);
+	      makeVolume(seeds, multi, args, alph, letterCounts,
+			 tantanMasker, numOfThreads, seedText, baseName);
+	      multi.eraseAllButTheLastSequence();
+	    }
+	  }
+	  size_t lastSeq = multi.finishedSequences() - 1;
+	  multi.reverseComplementOneSequence(lastSeq, alph.complement);
+	}
         ++sequenceCount;
       }
       else{
diff --git a/src/makefile b/src/makefile
index de49694..98d6831 100644
--- a/src/makefile
+++ b/src/makefile
@@ -14,15 +14,15 @@ alp/njn_dynprogproblim.o alp/njn_ioutil.o alp/njn_random.o	\
 alp/sls_falp_alignment_evaluer.o alp/sls_fsa1_pvalues.o		\
 alp/sls_fsa1_utils.o alp/sls_fsa1.o alp/sls_fsa1_parameters.o
 
-indexObj0 = Alphabet.o CyclicSubsetSeed.o LambdaCalculator.o		\
-ScoreMatrix.o SubsetMinimizerFinder.o TantanMasker.o fileMap.o io.o	\
+indexObj0 = Alphabet.o CyclicSubsetSeed.o LambdaCalculator.o	\
+ScoreMatrix.o SubsetMinimizerFinder.o TantanMasker.o fileMap.o	\
 tantan.o LastdbArguments.o
 
 indexObj4 = MultiSequence.o MultiSequenceQual.o SubsetSuffixArray.o	\
 SubsetSuffixArraySort.o lastdb.o
 
 alignObj0 = Alphabet.o CyclicSubsetSeed.o LambdaCalculator.o		\
-ScoreMatrix.o SubsetMinimizerFinder.o TantanMasker.o fileMap.o io.o	\
+ScoreMatrix.o SubsetMinimizerFinder.o TantanMasker.o fileMap.o		\
 tantan.o LastalArguments.o GappedXdropAligner.o				\
 GappedXdropAlignerPssm.o GappedXdropAligner2qual.o			\
 GappedXdropAligner3frame.o GeneralizedAffineGapCosts.o GeneticCode.o	\
@@ -41,7 +41,7 @@ split/cbrc_unsplit_alignment.o split/last-split-main.o
 splitObj4 = MultiSequence.o split/cbrc_split_aligner.o	\
 split/last-split.o
 
-PPOBJ = last-pair-probs.o last-pair-probs-main.o io.o
+PPOBJ = last-pair-probs.o last-pair-probs-main.o
 
 MBOBJ = last-merge-batches.o
 
@@ -148,7 +148,7 @@ Centroid.o Centroid.o8: Centroid.cc Centroid.hh GappedXdropAligner.hh \
  ScoreMatrixRow.hh GeneralizedAffineGapCosts.hh SegmentPair.hh \
  OneQualityScoreMatrix.hh GappedXdropAlignerInl.hh
 CyclicSubsetSeed.o CyclicSubsetSeed.o8: CyclicSubsetSeed.cc CyclicSubsetSeed.hh \
- CyclicSubsetSeedData.hh io.hh mcf_zstream.hh stringify.hh
+ CyclicSubsetSeedData.hh zio.hh mcf_zstream.hh stringify.hh
 DiagonalTable.o DiagonalTable.o8: DiagonalTable.cc DiagonalTable.hh
 GappedXdropAligner.o GappedXdropAligner.o8: GappedXdropAligner.cc GappedXdropAligner.hh \
  ScoreMatrixRow.hh GappedXdropAlignerInl.hh
@@ -176,7 +176,7 @@ LastalArguments.o LastalArguments.o8: LastalArguments.cc LastalArguments.hh \
 LastdbArguments.o LastdbArguments.o8: LastdbArguments.cc LastdbArguments.hh \
  SequenceFormat.hh stringify.hh getoptUtil.hh version.hh
 MultiSequence.o MultiSequence.o8: MultiSequence.cc MultiSequence.hh ScoreMatrixRow.hh \
- VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh io.hh mcf_zstream.hh
+ VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh io.hh
 MultiSequenceQual.o MultiSequenceQual.o8: MultiSequenceQual.cc MultiSequence.hh \
  ScoreMatrixRow.hh VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh
 OneQualityScoreMatrix.o OneQualityScoreMatrix.o8: OneQualityScoreMatrix.cc \
@@ -184,7 +184,7 @@ OneQualityScoreMatrix.o OneQualityScoreMatrix.o8: OneQualityScoreMatrix.cc \
  stringify.hh
 QualityPssmMaker.o QualityPssmMaker.o8: QualityPssmMaker.cc QualityPssmMaker.hh \
  ScoreMatrixRow.hh qualityScoreUtil.hh stringify.hh
-ScoreMatrix.o ScoreMatrix.o8: ScoreMatrix.cc ScoreMatrix.hh ScoreMatrixData.hh io.hh \
+ScoreMatrix.o ScoreMatrix.o8: ScoreMatrix.cc ScoreMatrix.hh ScoreMatrixData.hh zio.hh \
  mcf_zstream.hh
 SegmentPair.o SegmentPair.o8: SegmentPair.cc SegmentPair.hh
 SegmentPairPot.o SegmentPairPot.o8: SegmentPairPot.cc SegmentPairPot.hh SegmentPair.hh
@@ -192,7 +192,7 @@ SubsetMinimizerFinder.o SubsetMinimizerFinder.o8: SubsetMinimizerFinder.cc \
  SubsetMinimizerFinder.hh CyclicSubsetSeed.hh
 SubsetSuffixArray.o SubsetSuffixArray.o8: SubsetSuffixArray.cc SubsetSuffixArray.hh \
  CyclicSubsetSeed.hh VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh \
- SubsetMinimizerFinder.hh io.hh mcf_zstream.hh
+ SubsetMinimizerFinder.hh io.hh
 SubsetSuffixArraySearch.o SubsetSuffixArraySearch.o8: SubsetSuffixArraySearch.cc \
  SubsetSuffixArray.hh CyclicSubsetSeed.hh VectorOrMmap.hh Mmap.hh \
  fileMap.hh stringify.hh
@@ -209,29 +209,28 @@ gaplessPssmXdrop.o gaplessPssmXdrop.o8: gaplessPssmXdrop.cc gaplessPssmXdrop.hh
 gaplessTwoQualityXdrop.o gaplessTwoQualityXdrop.o8: gaplessTwoQualityXdrop.cc \
  gaplessTwoQualityXdrop.hh TwoQualityScoreMatrix.hh ScoreMatrixRow.hh
 gaplessXdrop.o gaplessXdrop.o8: gaplessXdrop.cc gaplessXdrop.hh ScoreMatrixRow.hh
-io.o io.o8: io.cc io.hh mcf_zstream.hh
 last-pair-probs-main.o last-pair-probs-main.o8: last-pair-probs-main.cc last-pair-probs.hh \
  stringify.hh version.hh
-last-pair-probs.o last-pair-probs.o8: last-pair-probs.cc last-pair-probs.hh io.hh \
+last-pair-probs.o last-pair-probs.o8: last-pair-probs.cc last-pair-probs.hh zio.hh \
  mcf_zstream.hh stringify.hh
-lastal.o lastal.o8: lastal.cc LastalArguments.hh SequenceFormat.hh \
- QualityPssmMaker.hh ScoreMatrixRow.hh OneQualityScoreMatrix.hh \
- TwoQualityScoreMatrix.hh qualityScoreUtil.hh stringify.hh \
+lastal.o lastal.o8: lastal.cc last.hh Alphabet.hh MultiSequence.hh \
+ ScoreMatrixRow.hh VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh \
+ SequenceFormat.hh qualityScoreUtil.hh LastalArguments.hh \
+ QualityPssmMaker.hh OneQualityScoreMatrix.hh TwoQualityScoreMatrix.hh \
  LambdaCalculator.hh LastEvaluer.hh alp/sls_alignment_evaluer.hpp \
  alp/sls_pvalues.hpp alp/sls_basic.hpp alp/sls_falp_alignment_evaluer.hpp \
  alp/sls_fsa1_pvalues.hpp GeneticCode.hh SubsetMinimizerFinder.hh \
- SubsetSuffixArray.hh CyclicSubsetSeed.hh VectorOrMmap.hh Mmap.hh \
- fileMap.hh Centroid.hh GappedXdropAligner.hh \
- GeneralizedAffineGapCosts.hh SegmentPair.hh AlignmentPot.hh Alignment.hh \
- SegmentPairPot.hh ScoreMatrix.hh Alphabet.hh MultiSequence.hh \
+ SubsetSuffixArray.hh CyclicSubsetSeed.hh Centroid.hh \
+ GappedXdropAligner.hh GeneralizedAffineGapCosts.hh SegmentPair.hh \
+ AlignmentPot.hh Alignment.hh SegmentPairPot.hh ScoreMatrix.hh \
  TantanMasker.hh tantan.hh DiagonalTable.hh GreedyXdropAligner.hh \
- gaplessXdrop.hh gaplessPssmXdrop.hh gaplessTwoQualityXdrop.hh io.hh \
+ gaplessXdrop.hh gaplessPssmXdrop.hh gaplessTwoQualityXdrop.hh zio.hh \
  mcf_zstream.hh threadUtil.hh version.hh
-lastdb.o lastdb.o8: lastdb.cc LastdbArguments.hh SequenceFormat.hh \
- SubsetSuffixArray.hh CyclicSubsetSeed.hh VectorOrMmap.hh Mmap.hh \
- fileMap.hh stringify.hh Alphabet.hh MultiSequence.hh ScoreMatrixRow.hh \
- TantanMasker.hh tantan.hh io.hh mcf_zstream.hh qualityScoreUtil.hh \
- threadUtil.hh version.hh
+lastdb.o lastdb.o8: lastdb.cc last.hh Alphabet.hh MultiSequence.hh \
+ ScoreMatrixRow.hh VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh \
+ SequenceFormat.hh qualityScoreUtil.hh LastdbArguments.hh \
+ SubsetSuffixArray.hh CyclicSubsetSeed.hh TantanMasker.hh tantan.hh \
+ zio.hh mcf_zstream.hh threadUtil.hh version.hh
 tantan.o tantan.o8: tantan.cc tantan.hh
 last-merge-batches.o: last-merge-batches.c version.hh
 alp/njn_dynprogprob.o: alp/njn_dynprogprob.cpp alp/njn_dynprogprob.hpp \
diff --git a/src/mcf_zstream.hh b/src/mcf_zstream.hh
index fd527c5..24c7ec3 100644
--- a/src/mcf_zstream.hh
+++ b/src/mcf_zstream.hh
@@ -4,8 +4,8 @@
 // you give it a gzip-compressed file, it will decompress what it
 // reads.
 
-#ifndef MCF_ZSTREAM
-#define MCF_ZSTREAM
+#ifndef MCF_ZSTREAM_HH
+#define MCF_ZSTREAM_HH
 
 #include <zlib.h>
 
diff --git a/src/split/cbrc_split_aligner.cc b/src/split/cbrc_split_aligner.cc
index 7cfe2c7..fa42d95 100644
--- a/src/split/cbrc_split_aligner.cc
+++ b/src/split/cbrc_split_aligner.cc
@@ -390,7 +390,7 @@ void SplitAligner::traceBack(long viterbiScore,
     // procedure for tied scores?
 
     bool isStay = (score == Vmat[ij] + Dmat[ij]);
-    if (isStay && alns[i].qstrand == '+') continue;
+    if (isStay && alns[i].isForwardStrand()) continue;
 
     long s = score - spliceEndScore(ij);
     long t = s - restartScore;
@@ -736,7 +736,7 @@ void SplitAligner::initSpliceSignals(unsigned i) {
   unsigned char *endSignals = &spliceEndSignals[rowBeg];
   unsigned dpLen = dpEnd(i) - dpBeg(i);
 
-  if (a.qstrand == '+') {
+  if (a.isForwardStrand()) {
     for (unsigned j = 0; j <= dpLen; ++j) {
       begSignals[j] = spliceBegSignalFwd(chromBeg + begCoords[j], toUnmasked);
       endSignals[j] = spliceEndSignalFwd(chromBeg + endCoords[j], toUnmasked);
@@ -783,7 +783,7 @@ void SplitAligner::spliceBegSignal(char *out,
 				   unsigned alnNum, unsigned queryPos,
 				   bool isSenseStrand) const {
   const UnsplitAlignment& a = alns[alnNum];
-  bool isForwardStrand = (a.qstrand == '+');
+  bool isForwardStrand = a.isForwardStrand();
   StringNumMap::const_iterator f = chromosomeIndex.find(a.rname);
   size_t v = f->second % maxGenomeVolumes();
   size_t c = f->second / maxGenomeVolumes();
@@ -799,7 +799,7 @@ void SplitAligner::spliceEndSignal(char *out,
 				   unsigned alnNum, unsigned queryPos,
 				   bool isSenseStrand) const {
   const UnsplitAlignment& a = alns[alnNum];
-  bool isForwardStrand = (a.qstrand == '+');
+  bool isForwardStrand = a.isForwardStrand();
   StringNumMap::const_iterator f = chromosomeIndex.find(a.rname);
   size_t v = f->second % maxGenomeVolumes();
   size_t c = f->second / maxGenomeVolumes();
@@ -1194,6 +1194,8 @@ void SplitAligner::readGenomeVolume(const std::string& baseName,
   genome[volumeNumber].fromFiles(baseName, seqCount, 0);
 
   for (unsigned long long i = 0; i < seqCount; ++i) {
+    char s = genome[volumeNumber].strand(i);
+    if (s == '-') continue;
     std::string n = genome[volumeNumber].seqName(i);
     unsigned long long j = i * maxGenomeVolumes() + volumeNumber;
     if (!chromosomeIndex.insert(std::make_pair(n, j)).second)
diff --git a/src/split/cbrc_unsplit_alignment.cc b/src/split/cbrc_unsplit_alignment.cc
index f85d2bd..090b51b 100644
--- a/src/split/cbrc_unsplit_alignment.cc
+++ b/src/split/cbrc_unsplit_alignment.cc
@@ -192,12 +192,13 @@ void UnsplitAlignment::init() {
       if (s == 1) {
 	rstart = start;
 	rend = start + len;
+	qstrand = (strand == '-') * 2;
 	rname = i->c_str() + (d - c);
 	ralign = i->c_str() + (f - c);
       } else if (s == 2) {
 	qstart = start;
 	qend = start + len;
-	qstrand = strand;
+	if (strand == '-') qstrand = 3 - qstrand;
 	qname = i->c_str() + (d - c);
 	qalign = i->c_str() + (f - c);
       }
diff --git a/src/split/cbrc_unsplit_alignment.hh b/src/split/cbrc_unsplit_alignment.hh
index 76f64b7..aebc114 100644
--- a/src/split/cbrc_unsplit_alignment.hh
+++ b/src/split/cbrc_unsplit_alignment.hh
@@ -34,6 +34,8 @@ public:
     UnsplitAlignment(StringIt linesBegIn, StringIt linesEndIn)
       : linesBeg(linesBegIn), linesEnd(linesEndIn) { init(); }
     void init();
+    bool isForwardStrand() const { return qstrand < 2; }
+    bool isFlipped() const { return qstrand % 2; }
 };
 
 void flipMafStrands(StringIt linesBeg, StringIt linesEnd);
diff --git a/src/split/last-split.cc b/src/split/last-split.cc
index 619f54c..fcea62d 100644
--- a/src/split/last-split.cc
+++ b/src/split/last-split.cc
@@ -147,7 +147,7 @@ static void doOneAlignmentPart(cbrc::SplitAligner& sa,
   }
   std::cout << "\n" << std::setprecision(6);
 
-  if (a.qstrand == '-') cbrc::flipMafStrands(s.begin(), s.end());
+  if (a.isFlipped()) cbrc::flipMafStrands(s.begin(), s.end());
   if (opts.no_split && a.linesEnd[-1][0] == 'c') s.push_back(a.linesEnd[-1]);
   cbrc::printMaf(s);
 }
diff --git a/src/version.hh b/src/version.hh
index 0fe2c52..96659c2 100644
--- a/src/version.hh
+++ b/src/version.hh
@@ -1 +1 @@
-"885"
+"921"
diff --git a/src/zio.hh b/src/zio.hh
new file mode 100644
index 0000000..0602350
--- /dev/null
+++ b/src/zio.hh
@@ -0,0 +1,37 @@
+// Copyright 2017 Martin C. Frith
+
+#ifndef ZIO_HH
+#define ZIO_HH
+
+#include "mcf_zstream.hh"
+
+#include <iostream>
+#include <iterator>  // istreambuf_iterator
+#include <stdexcept>
+#include <string>
+
+namespace cbrc {
+
+// open an input file, but if the name is "-", just return cin
+inline std::istream &openIn(const char *fileName, mcf::izstream &z) {
+  if (fileName[0] == '-' && fileName[1] == 0) {
+    return std::cin;
+  }
+  z.open(fileName);
+  if (!z) {
+    throw std::runtime_error(std::string("can't open file: ") + fileName);
+  }
+  return z;
+}
+
+// read a file into a string, but if the name is "-", read cin
+inline std::string slurp(const char *fileName) {
+  mcf::izstream z;
+  std::istream &s = openIn(fileName, z);
+  std::istreambuf_iterator<char> beg(s), end;
+  return std::string(beg, end);
+}
+
+}
+
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/last-align.git



More information about the debian-med-commit mailing list