[med-svn] [gmap] 04/06: Imported Upstream version 2016-08-08

Alex Mestiashvili malex-guest at moszumanska.debian.org
Tue Aug 9 09:08:05 UTC 2016


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository gmap.

commit 84f4c61401075141553d1c63d88e9fbc0af453f3
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Tue Aug 9 10:48:35 2016 +0200

    Imported Upstream version 2016-08-08
---
 ChangeLog          |  169 +++--
 VERSION            |    2 +-
 configure          |   24 +-
 src/atoi.c         |    6 +-
 src/atoiindex.c    |    2 +-
 src/cmet.c         |    6 +-
 src/comp.h         |    2 +-
 src/doublelist.c   |    2 +-
 src/doublelist.h   |    2 +-
 src/filestring.c   |   94 +--
 src/genome_sites.c |   26 +-
 src/gmap.c         |    2 +-
 src/gsnap.c        |    2 +-
 src/inbuffer.c     |    2 +-
 src/intlist.c      |    2 +-
 src/intlist.h      |    2 +-
 src/pair.c         |    2 +-
 src/pairpool.c     |    2 +-
 src/samprint.c     |   54 +-
 src/sarray-read.c  |    2 +-
 src/sedgesort.c    |    2 +-
 src/sedgesort.h    |    2 +-
 src/shortread.c    |    2 +-
 src/splice.c       |   61 +-
 src/stage1hr.c     | 1752 ++++++++++++++++++++++++++++++++++++++++------------
 src/stage3.c       |   13 +-
 src/stage3.h       |    2 +-
 src/stage3hr.c     |    2 +-
 src/stage3hr.h     |    2 +-
 src/substring.c    |  137 ++--
 src/substring.h    |   10 +-
 src/uintlist.c     |    2 +-
 src/uintlist.h     |    2 +-
 src/uniqscan.c     |    2 +-
 src/univdiag.h     |    2 +-
 35 files changed, 1731 insertions(+), 667 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index d5366eb..f9cb1d7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,110 +1,145 @@
+2016-08-08  twu
+
+    * atoi.c, cmet.c: Fixed reduce procedures for 64-bit oligos
+
+    * stage1hr.c: Fixed values of splice_pos_start and splice_pos_end given to
+      Genome_donor_positions and related functions
+
+    * filestring.c: Handling the case where stringlen is negative
+
+    * stage3.c: Merged revision 195962 from trunk to fix an issue where we tried
+      to use pairs_pretrim after path_trim altered the pairs
+
+    * samprint.c, substring.c, substring.h: Merged revision 195960 from trunk to
+      fix XT field to have correct fusion coordinates
+
+2016-08-04  twu
+
+    * 2016-08-02-long-read-fusions, comp.h, config.site.rescomp.prd, pair.c,
+      pairpool.c, sarray-read.c, src, stage3.c, util: Merged revisions 195492
+      through 195762 from branches/2016-07-01-better-triage to get latest fixes
+
+    * 2016-08-02-long-read-fusions, Makefile.gsnaptoo.am, comp.h,
+      config.site.rescomp.prd, configure.ac, filestring.c, gsnap.c, pair.c,
+      pairpool.c, samprint.c, sarray-read.c, sedgesort.c, sedgesort.h,
+      shortread.c, src, stage1hr.c, stage3.c, stage3hr.c, stage3hr.h,
+      substring.c, substring.h, univdiag.c, univdiag.h: Merged revisions 193240
+      to 195491 from branches/2016-07-01-better-triage for better performance
+
+2016-08-03  twu
+
+    * stage1hr.c: Hard-coded some values for plusp
+
+    * splice.c: Using new interface to Substring_new_donor and
+      Substring_new_acceptor
+
+    * stage1hr.c: In computing spliceable segments, using a variable for holding
+      previous spliceable information, to resolve writing to an uninitialized
+      ptr at end.  Using a streamlined version of splicing for distant RNA.
+
+    * substring.c, substring.h: Added parameters substring_querystart and
+      substring_queryend to Substring_new_donor and Substring_new_acceptor, so
+      we can handle splicing segments in the middle of the read
+
+    * genome_sites.c: Added debugging statements
+
+    * stage1hr.c: Allowing fusions to occur between middle segments that are
+      spliceable on their distal ends
+
 2016-08-02  twu
 
-    * gtf_genes.pl.in, gtf_introns.pl.in, gtf_splicesites.pl.in, util: Merged
-      revision 195495 from trunk to use preference order of desired keys, rather
-      than the text
+    * 2016-08-02-long-read-fusions: Created branch to find fusions in long reads
 
-    * comp.h, pair.c, pairpool.c, src, stage3.c, stage3hr.c: Merging revisions
-      195548 through 195550 from trunk to revert recent changes to extraexon
-      comp, addition of dual breaks, and restoring final procedure in
+    * stage3hr.c: Restoring final procedure based on nmatches in
       Stage3pair_optimal_score
 
-    * sarray-read.c: Limiting suffix array procedure by nmisses_allowed again
+    * stage3.c: Reverting from revision 195487 to allow extraexon comps again
+      and from revision 193238 to always insert dual break alignments
 
-    * inbuffer.c: Fixed problem where --force-single-end terminated when file
-      had reads that were a multiple of --input-buffer-size
+    * comp.h, pair.c, pairpool.c: Reverting from revision 195484 to allow
+      extraexon comps again
 
-    * shortread.c: Changed debugging statements to write to stderr
+    * gtf_genes.pl.in, gtf_introns.pl.in, gtf_splicesites.pl.in: Imposing
+      preference order based on desired keys, rather than the text
 
-    * atoiindex.c: Merged revision 194848 from trunk to fix calculation of oligo
-      using new block algorithm
+    * inbuffer.c, shortread.c, src: Merged revisions 195492 and 195493 to fix
+      problem where --force-single-end terminated when a file had reads that
+      were a multiple of --input-buffer-size
 
-    * stage3.c: Merged revision 195487 from trunk to use shortgap comp instead
-      of extraexon comp for representing dual breaks
+    * stage3.c: Using shortgap comp instead of extraexon comp for representing
+      dual breaks
 
-    * comp.h, pair.c, pairpool.c: Merged revision 195484 from trunk to use
-      shortgap comp instead of extraexon comp for representing dual breaks
+    * comp.h, pair.c, pairpool.c: Using shortgap comp instead of extraexon comp
+      for representing dual breaks
 
-    * shortread.c: Merged revision 195483 from trunk to fix issues in reading
-      multiple pairs of files from command line
+    * shortread.c: Fixed issues in reading multiple pairs of files from command
+      line
 
-2016-07-18  twu
+2016-07-23  twu
 
-    * filestring.c: Implemented right-justified strings
+    * atoiindex.c: Fixed calculation of oligo using new block algorithm
 
 2016-07-12  twu
 
-    * 2016-07-01-better-triage, archive.html, doublelist.c, doublelist.h,
-      gmap.c, gsnap.c, intlist.c, intlist.h, sarray-read.c, src, stage1hr.c,
-      stage3.c, stage3.h, stage3hr.c, substring.c, uintlist.c, uintlist.h,
-      uniqscan.c: Merged revisions from trunk
+    * stage1hr.c: For paired terminals, assigning final pairtype to be
+      concordant
 
-    * config.site.rescomp.prd, configure.ac: Added -fomit-frame-pointer
+    * archive.html: Exposed version 2015-07-23.  Improved formatting.
 
-    * univdiag.c, univdiag.h: Implemented Univdiag_diagonal_cmp
+    * stage3hr.c, substring.c: Handling the special case when alignstart or
+      alignend is requested on an ambiguous substring
 
-    * substring.c: Using new format types for filestrings
+    * stage3hr.c: Computing insertlength and concordance properly for
+      overlapping dual GMAP alignments
 
-    * stage1hr.c: Separated code for heaps and loser trees.  Reverted to using
-      heaps.
+    * stage1hr.c: For dynamic programming of anchor segments, proceeding from
+      closest to farthest segments, to favor shorter splice distances.  Resolved
+      uninitialized variable when completeset algorithm is called but
+      spanningset was not called, so read_oligos was not called.  Skipping
+      re-alignment of hits that are already have a type of GMAP.
 
-    * shortread.c: Using new format types for filestrings
+    * sarray-read.c: Resolving ambiguous ends if one dominates by both
+      probability and splice distance
 
-    * sarray-read.c: Using Sedgewick sort.  For left and right diagonals, trying
-      to consolidate segments on the same diagonal
-
-    * samprint.c: Consolidated some calls to Filestring_put
+    * gmap.c, gsnap.c, uniqscan.c: Using new interface to Stage3_setup
 
-    * gsnap.c: Making --end-detail=low the default
+    * stage3.c, stage3.h: Adding dual break for both SAM and non-SAM output,
+      needed to give the correct CIGAR starting coordinate
 
-    * stage3.c: Restored backwards motion of ptr
+    * doublelist.c, doublelist.h, intlist.c, intlist.h, uintlist.c, uintlist.h:
+      Implemented procedures for keeping a single item in the list
 
-    * stage3hr.h: Added Stage3pair_determine_pairtype
+2016-07-11  twu
 
-    * stage3hr.c: Changed trim threshold for paired terminals from 15 to 8. 
-      Solving ends if the trims are 30 or more, regardless of --end-detail
-      value.
+    * sarray-read.c: Among ambiguous splice segments, ranking by probability and
+      selecting closest one if it is less than half the distance of the second
+      one
 
-    * pair.c: Added debugging macros for hiding method information
+    * substring.c: Improved debugging statements
 
-    * filestring.c: Made transfer_string routine more efficient.  Added ability
-      to transfer strings in reverse and reverse complement format.
+    * stage3hr.c: Improved debugging statements
 
-    * Makefile.gsnaptoo.am, sedgesort.c, sedgesort.h: Added Sedgewick sort
-      routines
+    * sarray-read.c: Fixed value of substring1p passed to Substring_new_ambig
+      for alignments on the minus strand, which resulted in problems with the
+      --merge-overlap feature
 
 2016-07-06  twu
 
-    * stage1hr.c: Replaced heap in spanningset algorithm with a loser tree. 
-      Allowing different anchor length criteria for different mods.  Changing
-      paired result priorities so concordant results take priority over
-      concordant terminals, which then take priority over samechr and
-      translocations.
-
-2016-07-01  twu
+    * stage3.c: Restored backward movement of ptr
 
-    * substring.c, substring.h: Defining Substring_nmismatches_region
+    * stage1hr.c: Fixed infinite loop due to circular list
 
-    * stage3hr.c: Using max_trim instead of total_trim to determine whether a
-      pair is a concordant terminal.  Revised criteria for performing GMAP
-      algorithm on middle part of substrings, now based on mismatches in any
-      substring region being greater than 2.
+    * gsnap.c: Changed default end detail to be medium
 
-    * stage1hr.c: Revised criteria for performing spanningset and completeset
-      algorithms.  Spanningset run if a terminal end has total trim > 15.
-      Completeset run if a terminal score is greater than done_level.
+    * substring.c: Fixed standard GSNAP output for deletions, by reducing the
+      number of final dashes
 
-    * sarray-read.c: Ignoring value of nmisses_allowed, thereby allowing more
-      solutions to come from the suffix array algorithm
+    * stage3hr.c: Added debugging statements
 
-    * pair.c, stage3.c: Restored backwards movement in processing pairs, but
-      making an exception when we reach the last pair
+    * pair.c: Restored backward movement of ptr
 
 2016-06-30  twu
 
-    * 2016-07-01-better-triage: Created branch to triage better for GSNAP or
-      GMAP
-
     * pair.c, stage3.c: No longer going backward after an indel, which could
       cause an infinite loop
 
diff --git a/VERSION b/VERSION
index d04a41d..6f5cbc7 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-07-11
\ No newline at end of file
+2016-08-08
\ No newline at end of file
diff --git a/configure b/configure
index 54ac889..1e873be 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2016-07-11.
+# Generated by GNU Autoconf 2.69 for gmap 2016-08-08.
 #
 # Report bugs to <Thomas Wu <twu at gene.com>>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='gmap'
 PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-07-11'
-PACKAGE_STRING='gmap 2016-07-11'
+PACKAGE_VERSION='2016-08-08'
+PACKAGE_STRING='gmap 2016-08-08'
 PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
 PACKAGE_URL=''
 
@@ -1368,7 +1368,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures gmap 2016-07-11 to adapt to many kinds of systems.
+\`configure' configures gmap 2016-08-08 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1439,7 +1439,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of gmap 2016-07-11:";;
+     short | recursive ) echo "Configuration of gmap 2016-08-08:";;
    esac
   cat <<\_ACEOF
 
@@ -1574,7 +1574,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-gmap configure 2016-07-11
+gmap configure 2016-08-08
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2180,7 +2180,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by gmap $as_me 2016-07-11, which was
+It was created by gmap $as_me 2016-08-08, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2530,8 +2530,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
 $as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-07-11" >&5
-$as_echo "2016-07-11" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-08-08" >&5
+$as_echo "2016-08-08" >&6; }
 
 
 ### Read defaults
@@ -4396,7 +4396,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='gmap'
- VERSION='2016-07-11'
+ VERSION='2016-08-08'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -20072,7 +20072,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by gmap $as_me 2016-07-11, which was
+This file was extended by gmap $as_me 2016-08-08, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -20138,7 +20138,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-gmap config.status 2016-07-11
+gmap config.status 2016-08-08
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/src/atoi.c b/src/atoi.c
index c5fce58..386c4a1 100644
--- a/src/atoi.c
+++ b/src/atoi.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: atoi.c 181327 2015-12-19 13:07:25Z twu $";
+static char rcsid[] = "$Id: atoi.c 195988 2016-08-08 19:29:00Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -16403,7 +16403,7 @@ Atoi_reduce_tc (Oligospace_T oligo) {
 
 #ifdef HAVE_64_BIT
   /* Handles 48 bits, or k-mer up to 24 */
-  reduced = (Oligospace_T) convert_tc[oligo >> 32];
+  reduced = (Oligospace_T) convert_tc[oligo >> 32 & 0xFFFF];
   reduced <<= 32;		/* Put in separate line to make it clear we want 64 bits */
   reduced |= (Oligospace_T) (convert_tc[oligo >> 16 & 0xFFFF] << 16);
   reduced |= (Oligospace_T) convert_tc[oligo & 0xFFFF];
@@ -16423,7 +16423,7 @@ Atoi_reduce_ag (Oligospace_T oligo) {
 
 #ifdef HAVE_64_BIT
   /* Handles 48 bits, or k-mer up to 24 */
-  reduced = (Oligospace_T) convert_ag[oligo >> 32];
+  reduced = (Oligospace_T) convert_ag[oligo >> 32 & 0xFFFF];
   reduced <<= 32;		/* Put in separate line to make it clear we want 64 bits */
   reduced |= (Oligospace_T) (convert_ag[oligo >> 16 & 0xFFFF] << 16);
   reduced |= (Oligospace_T) convert_ag[oligo & 0xFFFF];
diff --git a/src/atoiindex.c b/src/atoiindex.c
index 793d1f7..20b16a8 100644
--- a/src/atoiindex.c
+++ b/src/atoiindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: atoiindex.c 195489 2016-08-02 00:16:53Z twu $";
+static char rcsid[] = "$Id: atoiindex.c 194848 2016-07-23 21:11:43Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/cmet.c b/src/cmet.c
index dfd41b3..ea9f6f5 100644
--- a/src/cmet.c
+++ b/src/cmet.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: cmet.c 181327 2015-12-19 13:07:25Z twu $";
+static char rcsid[] = "$Id: cmet.c 195988 2016-08-08 19:29:00Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -16406,7 +16406,7 @@ Cmet_reduce_ct (Oligospace_T oligo) {
 
 #ifdef HAVE_64_BIT
   /* Handles 48 bits, or k-mer up to 24 */
-  reduced = (Oligospace_T) convert_ct[oligo >> 32];
+  reduced = (Oligospace_T) convert_ct[oligo >> 32 & 0xFFFF];
   reduced <<= 32;		/* Put in separate line to make it clear we want 64 bits */
   reduced |= (Oligospace_T) (convert_ct[oligo >> 16 & 0xFFFF] << 16);
   reduced |= (Oligospace_T) convert_ct[oligo & 0xFFFF];
@@ -16425,7 +16425,7 @@ Cmet_reduce_ga (Oligospace_T oligo) {
 
 #ifdef HAVE_64_BIT
   /* Handles 48 bits, or k-mer up to 24 */
-  reduced = (Oligospace_T) convert_ga[oligo >> 32];
+  reduced = (Oligospace_T) convert_ga[oligo >> 32 & 0xFFFF];
   reduced <<= 32;		/* Put in separate line to make it clear we want 64 bits */
   reduced |= (Oligospace_T) (convert_ga[oligo >> 16 & 0xFFFF] << 16);
   reduced |= (Oligospace_T) convert_ga[oligo & 0xFFFF];
diff --git a/src/comp.h b/src/comp.h
index e3aa9ef..5115764 100644
--- a/src/comp.h
+++ b/src/comp.h
@@ -1,4 +1,4 @@
-/* $Id: comp.h 195553 2016-08-02 17:32:39Z twu $ */
+/* $Id: comp.h 195763 2016-08-04 01:37:20Z twu $ */
 #ifndef COMP_INCLUDED
 #define COMP_INCLUDED
 
diff --git a/src/doublelist.c b/src/doublelist.c
index 0dd3c3a..e9f95a0 100644
--- a/src/doublelist.c
+++ b/src/doublelist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: doublelist.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: doublelist.c 193875 2016-07-12 02:43:38Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/doublelist.h b/src/doublelist.h
index e6e271f..c03c51a 100644
--- a/src/doublelist.h
+++ b/src/doublelist.h
@@ -1,4 +1,4 @@
-/* $Id: doublelist.h 193899 2016-07-12 04:41:34Z twu $ */
+/* $Id: doublelist.h 193875 2016-07-12 02:43:38Z twu $ */
 #ifndef DOUBLELIST_INCLUDED
 #define DOUBLELIST_INCLUDED
 
diff --git a/src/filestring.c b/src/filestring.c
index ebd9651..aea43ac 100644
--- a/src/filestring.c
+++ b/src/filestring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: filestring.c 194346 2016-07-18 17:06:16Z twu $";
+static char rcsid[] = "$Id: filestring.c 195969 2016-08-08 17:01:27Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -249,21 +249,23 @@ static void
 transfer_string (T this, char *string, int stringlen) {
   char *block, *q;
 
-  q = string;
-  while (this->nleft <= stringlen) {
-    strncpy(this->ptr,q,this->nleft);
-    q += this->nleft;
-    stringlen -= this->nleft;
+  if (stringlen > 0) {
+    q = string;
+    while (this->nleft <= stringlen) {
+      strncpy(this->ptr,q,this->nleft);
+      q += this->nleft;
+      stringlen -= this->nleft;
     
-    block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
-    this->blocks = List_push_out(this->blocks,(void *) block);
-    this->nleft = BLOCKSIZE;
-    this->ptr = &(block[0]);
-  }
+      block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+      this->blocks = List_push_out(this->blocks,(void *) block);
+      this->nleft = BLOCKSIZE;
+      this->ptr = &(block[0]);
+    }
 
-  strncpy(this->ptr,q,stringlen);
-  this->ptr += stringlen;
-  this->nleft -= stringlen;
+    strncpy(this->ptr,q,stringlen);
+    this->ptr += stringlen;
+    this->nleft -= stringlen;
+  }
 
   return;
 }
@@ -290,23 +292,25 @@ static void
 transfer_string_reverse (T this, char *string, int stringlen) {
   char *block, *q;
 
-  q = &(string[stringlen]);
-  while (this->nleft <= stringlen) {
-    q -= this->nleft;
-    strncpy(this->ptr,q,this->nleft);
-    reverse_inplace(this->ptr,this->nleft);
-    stringlen -= this->nleft;
+  if (stringlen > 0) {
+    q = &(string[stringlen]);
+    while (this->nleft <= stringlen) {
+      q -= this->nleft;
+      strncpy(this->ptr,q,this->nleft);
+      reverse_inplace(this->ptr,this->nleft);
+      stringlen -= this->nleft;
     
-    block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
-    this->blocks = List_push_out(this->blocks,(void *) block);
-    this->nleft = BLOCKSIZE;
-    this->ptr = &(block[0]);
-  }
+      block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+      this->blocks = List_push_out(this->blocks,(void *) block);
+      this->nleft = BLOCKSIZE;
+      this->ptr = &(block[0]);
+    }
 
-  strncpy(this->ptr,string,stringlen);
-  reverse_inplace(this->ptr,stringlen);
-  this->ptr += stringlen;
-  this->nleft -= stringlen;
+    strncpy(this->ptr,string,stringlen);
+    reverse_inplace(this->ptr,stringlen);
+    this->ptr += stringlen;
+    this->nleft -= stringlen;
+  }
 
   return;
 }
@@ -338,23 +342,25 @@ static void
 transfer_string_revcomp (T this, char *string, int stringlen) {
   char *block, *q;
 
-  q = &(string[stringlen]);
-  while (this->nleft <= stringlen) {
-    q -= this->nleft;
-    strncpy(this->ptr,q,this->nleft);
-    revcomp_inplace(this->ptr,this->nleft);
-    stringlen -= this->nleft;
+  if (stringlen > 0) {
+    q = &(string[stringlen]);
+    while (this->nleft <= stringlen) {
+      q -= this->nleft;
+      strncpy(this->ptr,q,this->nleft);
+      revcomp_inplace(this->ptr,this->nleft);
+      stringlen -= this->nleft;
     
-    block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
-    this->blocks = List_push_out(this->blocks,(void *) block);
-    this->nleft = BLOCKSIZE;
-    this->ptr = &(block[0]);
-  }
+      block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+      this->blocks = List_push_out(this->blocks,(void *) block);
+      this->nleft = BLOCKSIZE;
+      this->ptr = &(block[0]);
+    }
 
-  strncpy(this->ptr,string,stringlen);
-  revcomp_inplace(this->ptr,stringlen);
-  this->ptr += stringlen;
-  this->nleft -= stringlen;
+    strncpy(this->ptr,string,stringlen);
+    revcomp_inplace(this->ptr,stringlen);
+    this->ptr += stringlen;
+    this->nleft -= stringlen;
+  }
 
   return;
 }
diff --git a/src/genome_sites.c b/src/genome_sites.c
index 32f041f..e3724dd 100644
--- a/src/genome_sites.c
+++ b/src/genome_sites.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome_sites.c 184186 2016-02-12 20:38:12Z twu $";
+static char rcsid[] = "$Id: genome_sites.c 195749 2016-08-03 23:35:09Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -24670,7 +24670,7 @@ static const Genomecomp_T antiacceptor_bits[] =
 
 /* Code starts here */
 
-#if 0
+#ifdef DEBUG2
 static void
 write_chars_comp (UINT4 high, UINT4 low, UINT4 flags) {
   char Buffer[33];
@@ -24711,7 +24711,7 @@ write_chars_comp (UINT4 high, UINT4 low, UINT4 flags) {
 #endif
 
 
-#if 0
+#ifdef DEBUG2
 static void
 Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T endpos) {
   /* Chrpos_T length = endpos - startpos; */
@@ -25045,10 +25045,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
 #ifdef HAVE_BUILTIN_CTZ
       pos = offset + (relpos = __builtin_ctz(found));
       while (*knownpos < pos) {
+	debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       }
       if (*knownpos == pos) {
+	debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       } else {
@@ -25060,10 +25062,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
       debug2(printf("found is %08X, -found & found is %08X\n",found,-found & found));
       pos = offset + mod_37_bit_position[(lowbit = -found & found) % 37];
       while (*knownpos < pos) {
+	debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       }
       if (*knownpos == pos) {
+	debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       } else {
@@ -25106,10 +25110,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
 #ifdef HAVE_BUILTIN_CTZ
       pos = offset + (relpos = __builtin_ctz(found));
       while (*knownpos < pos) {
+	debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       }
       if (*knownpos == pos) {
+	debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       } else {
@@ -25121,10 +25127,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
       debug2(printf("found is %08X, -found & found is %08X\n",found,-found & found));
       pos = offset + mod_37_bit_position[(lowbit = -found & found) % 37];
       while (*knownpos < pos) {
+	debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       }
       if (*knownpos == pos) {
+	debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       } else {
@@ -25152,10 +25160,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
 	debug2(printf("low_halfsite & prev_high_halfsite => offset %llu - 1\n",(unsigned long long) offset));
 	pos = offset - 1;	/* verified that this should be offset - 1 */
 	while (*knownpos < pos) {
+	debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
 	  site_knowni[nfound] = *knowni++;
 	  site_positions[nfound++] = *knownpos++;
 	}
 	if (*knownpos == pos) {
+	  debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
 	  site_knowni[nfound] = *knowni++;
 	  site_positions[nfound++] = *knownpos++;
 	} else {
@@ -25168,10 +25178,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
 #ifdef HAVE_BUILTIN_CTZ
 	pos = offset + (relpos = __builtin_ctz(found));
 	while (*knownpos < pos) {
+	  debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
 	  site_knowni[nfound] = *knowni++;
 	  site_positions[nfound++] = *knownpos++;
 	}
 	if (*knownpos == pos) {
+	  debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
 	  site_knowni[nfound] = *knowni++;
 	  site_positions[nfound++] = *knownpos++;
 	} else {
@@ -25183,10 +25195,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
 	debug2(printf("found is %08X, -found & found is %08X\n",found,-found & found));
 	pos = offset + mod_37_bit_position[(lowbit = -found & found) % 37];
 	while (*knownpos < pos) {
+	  debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
 	  site_knowni[nfound] = *knowni++;
 	  site_positions[nfound++] = *knownpos++;
 	}
 	if (*knownpos == pos) {
+	  debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
 	  site_knowni[nfound] = *knowni++;
 	  site_positions[nfound++] = *knownpos++;
 	} else {
@@ -25217,10 +25231,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
       debug2(printf("low_halfsite & prev_high_halfsite => offset %llu - 1\n",(unsigned long long) offset));
       pos = offset - 1;		/* verified that this should be offset - 1 */
       while (*knownpos < pos) {
+	debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       }
       if (*knownpos == pos) {
+	debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       } else {
@@ -25233,10 +25249,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
 #ifdef HAVE_BUILTIN_CTZ
       pos = offset + (relpos = __builtin_ctz(found));
       while (*knownpos < pos) {
+	debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       }
       if (*knownpos == pos) {
+	debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       } else {
@@ -25248,10 +25266,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
       debug2(printf("found is %08X, -found & found is %08X\n",found,-found & found));
       pos = offset + mod_37_bit_position[(lowbit = -found & found) % 37];
       while (*knownpos < pos) {
+	debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       }
       if (*knownpos == pos) {
+	debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
 	site_knowni[nfound] = *knowni++;
 	site_positions[nfound++] = *knownpos++;
       } else {
diff --git a/src/gmap.c b/src/gmap.c
index 5141a2c..39c14f4 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: gmap.c 193877 2016-07-12 02:46:33Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/gsnap.c b/src/gsnap.c
index e0b4048..264c34c 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: gsnap.c 195760 2016-08-04 00:12:04Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/inbuffer.c b/src/inbuffer.c
index 064d817..a9be1b7 100644
--- a/src/inbuffer.c
+++ b/src/inbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: inbuffer.c 195493 2016-08-02 02:01:39Z twu $";
+static char rcsid[] = "$Id: inbuffer.c 195494 2016-08-02 02:03:20Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/intlist.c b/src/intlist.c
index 0dfd82d..e3c040c 100644
--- a/src/intlist.c
+++ b/src/intlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: intlist.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: intlist.c 193875 2016-07-12 02:43:38Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/intlist.h b/src/intlist.h
index ba85895..537c32e 100644
--- a/src/intlist.h
+++ b/src/intlist.h
@@ -1,4 +1,4 @@
-/* $Id: intlist.h 193899 2016-07-12 04:41:34Z twu $ */
+/* $Id: intlist.h 193875 2016-07-12 02:43:38Z twu $ */
 #ifndef INTLIST_INCLUDED
 #define INTLIST_INCLUDED
 
diff --git a/src/pair.c b/src/pair.c
index e0a0d5c..c24366a 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 195553 2016-08-02 17:32:39Z twu $";
+static char rcsid[] = "$Id: pair.c 195763 2016-08-04 01:37:20Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/pairpool.c b/src/pairpool.c
index aa61a0a..7572f6e 100644
--- a/src/pairpool.c
+++ b/src/pairpool.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pairpool.c 195553 2016-08-02 17:32:39Z twu $";
+static char rcsid[] = "$Id: pairpool.c 195763 2016-08-04 01:37:20Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/samprint.c b/src/samprint.c
index 3ca6985..d2fce93 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 193892 2016-07-12 04:07:27Z twu $";
+static char rcsid[] = "$Id: samprint.c 195961 2016-08-08 16:36:34Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -858,7 +858,8 @@ print_cigar (Filestring_T fp, char type, int stringlength, int querypos, int que
 	debug1(printf("  Pushing %dS\n",matchlength));
 	FPRINTF(fp,"%dS",matchlength);
       } else {
-	debug1(printf("  Pushing %dH\n",matchlength));
+	debug1(printf("  Pushing %dH because matchlength %d != trimlength %d\n",
+		      matchlength,matchlength,trimlength));
 	FPRINTF(fp,"%dH",matchlength);
       }
     }
@@ -909,7 +910,8 @@ print_cigar (Filestring_T fp, char type, int stringlength, int querypos, int que
 	debug1(printf("  Pushing %dS\n",matchlength));
 	FPRINTF(fp,"%dS",matchlength);
       } else {
-	debug1(printf("  Pushing %dH\n",matchlength));
+	debug1(printf("  Pushing %dH because matchlength %d != trimlength %d\n",
+		      matchlength,matchlength,trimlength));
 	FPRINTF(fp,"%dH",matchlength);
       }
     }
@@ -2173,7 +2175,7 @@ halfacceptor_dinucleotide (char *acceptor2, char *acceptor1, Substring_T accepto
 
 
 static void
-print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Stage3end_T this, Stage3end_T mate,
+print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
 		 char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
 		 int absmq_score, int first_absmq, int second_absmq, int mapq_score,
 		 Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
@@ -2667,7 +2669,8 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Stage3end_T t
   /* 12. TAGS: XT */
   if (print_xt_p == true) {
     FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
-    FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_chrpos,acceptor_strand,acceptor_chr,acceptor_chrpos);
+    FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord(donor),
+	    acceptor_strand,acceptor_chr,Substring_chr_splicecoord(acceptor));
   }
 
   /* 12. TAGS: XC */
@@ -2687,7 +2690,7 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Stage3end_T t
 
 
 static void
-print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
+print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
 		    char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
 		    int absmq_score, int first_absmq, int second_absmq, int mapq_score,
 		    Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
@@ -3171,7 +3174,8 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T acceptor, Stage3e
   /* 12. TAGS: XT */
   if (print_xt_p == true) {
     FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
-    FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_chrpos,acceptor_strand,acceptor_chr,acceptor_chrpos);
+    FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord(donor),
+	    acceptor_strand,acceptor_chr,Substring_chr_splicecoord(acceptor));
   }
 
 
@@ -3270,7 +3274,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
     /* NEEDS WORK: Need to decide whether to split halfdonor or halfacceptor */
     /* Not sure if circular chromosomes should participate in distant splicing anyway */
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
-      print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
 		      chromosome_iit,queryseq,pairedlength,
 		      donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3280,7 +3284,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 		      /*circularp*/true);
-      print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
 		      chromosome_iit,queryseq,pairedlength,
 		      /*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
@@ -3291,7 +3295,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 		      /*circularp*/true);
     } else {
-      print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
 		      chromosome_iit,queryseq,pairedlength,
 		      donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3304,7 +3308,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
     }
 
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
-      print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
 			 chromosome_iit,queryseq,pairedlength,
 			 donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3314,7 +3318,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 			 /*circularp*/true);
-      print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
 			 chromosome_iit,queryseq,pairedlength,
 			 donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
@@ -3325,7 +3329,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 			 /*circularp*/true);
     } else {
-      print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
 			 chromosome_iit,queryseq,pairedlength,
 			 donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3339,7 +3343,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 
   } else if (Stage3end_sensedir(this) == SENSE_ANTI) {
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
-      print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
 			 chromosome_iit,queryseq,pairedlength,
 			 donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3349,7 +3353,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 			 /*circularp*/true);
-      print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
 			 chromosome_iit,queryseq,pairedlength,
 			 donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
@@ -3360,7 +3364,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 			 /*circularp*/true);
     } else {
-      print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
 			 chromosome_iit,queryseq,pairedlength,
 			 donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3373,7 +3377,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
     }
 
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
-      print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
 		      chromosome_iit,queryseq,pairedlength,
 		      donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3383,7 +3387,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 		      /*circularp*/true);
-      print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
 		      chromosome_iit,queryseq,pairedlength,
 		      /*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
@@ -3394,7 +3398,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 		      /*circularp*/true);
     } else {
-      print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
 		      chromosome_iit,queryseq,pairedlength,
 		      donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3409,7 +3413,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
   } else {
     /* SENSE_NULL (DNA distant chimera) */
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
-      print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
 			 chromosome_iit,queryseq,pairedlength,
 			 donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3419,7 +3423,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 			 /*circularp*/true);
-      print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
 			 chromosome_iit,queryseq,pairedlength,
 			 donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
@@ -3430,7 +3434,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 			 /*circularp*/true);
     } else {
-      print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
 			 chromosome_iit,queryseq,pairedlength,
 			 donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3443,7 +3447,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
     }
 
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
-      print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
 		      chromosome_iit,queryseq,pairedlength,
 		      donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3453,7 +3457,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 		      /*circularp*/true);
-      print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
 		      chromosome_iit,queryseq,pairedlength,
 		      /*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
@@ -3464,7 +3468,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
 		      /*circularp*/true);
     } else {
-      print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+      print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
 		      chromosome_iit,queryseq,pairedlength,
 		      donor_chrpos,acceptor_chrpos,mate_chrpos,
diff --git a/src/sarray-read.c b/src/sarray-read.c
index bc01892..2062400 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 195552 2016-08-02 17:31:06Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 195763 2016-08-04 01:37:20Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/sedgesort.c b/src/sedgesort.c
index 5fde4cc..3d7da76 100644
--- a/src/sedgesort.c
+++ b/src/sedgesort.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sedgesort.c 193883 2016-07-12 03:14:35Z twu $";
+static char rcsid[] = "$Id: sedgesort.c 195760 2016-08-04 00:12:04Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/sedgesort.h b/src/sedgesort.h
index 28512a3..39167de 100644
--- a/src/sedgesort.h
+++ b/src/sedgesort.h
@@ -1,4 +1,4 @@
-/* $Id: sedgesort.h 193883 2016-07-12 03:14:35Z twu $ */
+/* $Id: sedgesort.h 195760 2016-08-04 00:12:04Z twu $ */
 #ifndef SEDGESORT_INCLUDED
 #define SEDGESORT_INCLUDED
 
diff --git a/src/shortread.c b/src/shortread.c
index 66d6389..f434d24 100644
--- a/src/shortread.c
+++ b/src/shortread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: shortread.c 195492 2016-08-02 02:00:54Z twu $";
+static char rcsid[] = "$Id: shortread.c 195760 2016-08-04 00:12:04Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/splice.c b/src/splice.c
index 5605e64..2755863 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 193235 2016-06-30 22:34:59Z twu $";
+static char rcsid[] = "$Id: splice.c 195753 2016-08-03 23:44:46Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1051,21 +1051,25 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
 		    best_prob,best_splice_pos,best_donor_splicecoord,best_acceptor_splicecoord));
       if (orig_plusp == true) {
 	/* Originally from plus strand.  No complement. */
+
 	sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+	assert(plusp == true);
 	assert(sensedir == SENSE_FORWARD);
 
 	donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
-				    best_splice_pos,best_segmenti_nmismatches,
+				    best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+				    best_segmenti_nmismatches,
 				    best_donor_prob,/*left*/segmenti_left,query_compress,
-				    querylength,plusp,genestrand,sensedir,
+				    querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
 				    segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
 
 	acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
-					  best_splice_pos,best_segmentj_nmismatches,
+					  best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+					  best_segmentj_nmismatches,
 					  best_acceptor_prob,/*left*/segmentj_left,query_compress,
-					  querylength,plusp,genestrand,sensedir,
+					  querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
 					  segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
-
+	  
 	if (donor == NULL || acceptor == NULL) {
 	  if (donor != NULL) Substring_free(&donor);
 	  if (acceptor != NULL) Substring_free(&acceptor);
@@ -1120,18 +1124,21 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
       } else {
 	/* Originally from minus strand.  Complement. */
 	sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+	assert(plusp == false);
 	assert(sensedir == SENSE_FORWARD);
 
 	donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
-				    best_splice_pos,best_segmentj_nmismatches,
+				    best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+				    best_segmentj_nmismatches,
 				    best_donor_prob,/*left*/segmentj_left,query_compress,
-				    querylength,plusp,genestrand,sensedir,
+				    querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
 				    segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
 
 	acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
-					  best_splice_pos,best_segmenti_nmismatches,
+					  best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+					  best_segmenti_nmismatches,
 					  best_acceptor_prob,/*left*/segmenti_left,query_compress,
-					  querylength,plusp,genestrand,sensedir,
+					  querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
 					  segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
 
 	if (donor == NULL || acceptor == NULL) {
@@ -1508,18 +1515,21 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
       if (orig_plusp == true) {
 	/* Originally from plus strand.  No complement. */
 	sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+	assert(plusp == false);
 	assert(sensedir == SENSE_ANTI);
 
 	donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
-				    best_splice_pos,best_segmenti_nmismatches,
+				    best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+				    best_segmenti_nmismatches,
 				    best_donor_prob,/*left*/segmenti_left,query_compress,
-				    querylength,plusp,genestrand,sensedir,
+				    querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
 				    segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
 
 	acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
-					  best_splice_pos,best_segmentj_nmismatches,
+					  best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+					  best_segmentj_nmismatches,
 					  best_acceptor_prob,/*left*/segmentj_left,query_compress,
-					  querylength,plusp,genestrand,sensedir,
+					  querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
 					  segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
 
 	if (donor == NULL || acceptor == NULL) {
@@ -1576,18 +1586,21 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
       } else {
 	/* Originally from minus strand.  Complement. */
 	sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+	assert(plusp == true);
 	assert(sensedir == SENSE_ANTI);
 
 	donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
-				    best_splice_pos,best_segmentj_nmismatches,
+				    best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+				    best_segmentj_nmismatches,
 				    best_donor_prob,/*left*/segmentj_left,query_compress,
-				    querylength,plusp,genestrand,sensedir,
+				    querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
 				    segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
 
 	acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
-					  best_splice_pos,best_segmenti_nmismatches,
+					  best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+					  best_segmenti_nmismatches,
 					  best_acceptor_prob,/*left*/segmenti_left,query_compress,
-					  querylength,plusp,genestrand,sensedir,
+					  querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
 					  segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
 
 	if (donor == NULL || acceptor == NULL) {
@@ -2120,7 +2133,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
 	sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
 
 	donor = Substring_new_donor(best_donor1_splicecoord,best_donor1_knowni,
-				    best_splice_pos_1,best_segmenti_nmismatches,
+				    best_splice_pos_1,/*substring_querystart*/0,/*substring_queryend*/querylength,
+				    best_segmenti_nmismatches,
 				    best_donor1_prob,/*left*/segmenti_left,query_compress,
 				    querylength,plusp,genestrand,sensedir,
 				    segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
@@ -2135,7 +2149,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
 					    segmentm_chrnum,segmentm_chroffset,segmentm_chrhigh,segmentm_chrlength);
 
 	acceptor = Substring_new_acceptor(best_acceptor2_splicecoord,best_acceptor2_knowni,
-					  best_splice_pos_2,best_segmentj_nmismatches,
+					  best_splice_pos_2,/*substring_querystart*/0,/*substring_queryend*/querylength,
+					  best_segmentj_nmismatches,
 					  best_acceptor2_prob,/*left*/segmentj_left,query_compress,
 					  querylength,plusp,genestrand,sensedir,
 					  segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
@@ -2199,7 +2214,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
 	sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
 
 	donor = Substring_new_donor(best_donor2_splicecoord,best_donor2_knowni,
-				    best_splice_pos_2,best_segmentj_nmismatches,
+				    best_splice_pos_2,/*substring_querystart*/0,/*substring_queryend*/querylength,
+				    best_segmentj_nmismatches,
 				    best_donor2_prob,/*left*/segmentj_left,query_compress,
 				    querylength,plusp,genestrand,sensedir,
 				    segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
@@ -2213,7 +2229,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
 					    segmentm_chrnum,segmentm_chroffset,segmentm_chrhigh,segmentm_chrlength);
 
 	acceptor = Substring_new_acceptor(best_acceptor1_splicecoord,best_acceptor1_knowni,
-					  best_splice_pos_1,best_segmenti_nmismatches,
+					  best_splice_pos_1,/*substring_querystart*/0,/*substring_queryend*/querylength,
+					  best_segmenti_nmismatches,
 					  best_acceptor1_prob,/*left*/segmenti_left,query_compress,
 					  querylength,plusp,genestrand,sensedir,
 					  segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
diff --git a/src/stage1hr.c b/src/stage1hr.c
index ae138e9..665fc5a 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 195972 2016-08-08 17:11:50Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -446,6 +446,8 @@ struct Segment_T {
   int rightmost;		/* For segmentj of local splice */
   bool left_splice_p; /* Set by find_singlesplices, used by find_doublesplices for speed */
   bool right_splice_p; /* Set by find_singlesplices, used by find_doublesplices for speed */
+  bool spliceable_low_p;
+  bool spliceable_high_p;
 
   bool usedp;
   bool pairablep;
@@ -4127,6 +4129,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
 
   Segment_T ptr, ptr_chrstart;
   Segment_T *ptr_spliceable;
+  bool last_spliceable_p = false;
   /* bool next_spliceable_p; */
 #ifdef DEBUG19
   Segment_T ptr0;
@@ -4557,6 +4560,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
 	ptr->floor_right = floor_right;
 	ptr->leftmost = ptr->rightmost = -1;
 	ptr->left_splice_p = ptr->right_splice_p = false;
+	ptr->spliceable_low_p = last_spliceable_p;
 #if 0
 	ptr->leftspan = ptr->rightspan = -1;
 #endif
@@ -4572,6 +4576,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
 	    /* Not spliceable */
 	  } else if (diagonal <= last_diagonal + max_distance) {
 	    *ptr_spliceable++ = ptr;
+	    ptr->spliceable_high_p = last_spliceable_p = true;
 	  }
 	} else {
 	  /* For minus-strand splicing, require segmenti->querypos5 > segmentj->querypos3,
@@ -4580,23 +4585,25 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
 	    /* Not spliceable */
 	  } else if (diagonal <= last_diagonal + max_distance) {
 	    *ptr_spliceable++ = ptr;
+	    ptr->spliceable_high_p = last_spliceable_p = true;
 	  }
 	}
 #endif
 	if (diagonal <= last_diagonal + max_distance) {
 	  *ptr_spliceable++ = ptr;
+	  ptr->spliceable_high_p = last_spliceable_p = true;
 	  debug4s(printf("%s diagonal %u is spliceable because next one is at %u\n",
 			 plusp ? "plus" : "minus",last_diagonal,diagonal));
 	} else {
+	  ptr->spliceable_high_p = last_spliceable_p = false;
 	  debug4s(printf("%s diagonal %u is not spliceable because next one is at %u\n",
 			 plusp ? "plus" : "minus",last_diagonal,diagonal));
 	}
 	debug14(printf("Saving segment at %u (%u), query %d..%d",last_diagonal,last_diagonal-chroffset,ptr->querypos5,ptr->querypos3));
 	*ptr_all++ = ptr;
 	if (last_querypos >= first_querypos + /*min_segment_length*/1) {
-
-	  *ptr_anchor++ = ptr;
 	  debug14(printf(" ANCHOR"));
+	  *ptr_anchor++ = ptr;
 	}
 	debug14(printf("\n"));
 	ptr++;
@@ -4837,6 +4844,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
     ptr->floor_right = floor_right;
     ptr->leftmost = ptr->rightmost = -1;
     ptr->left_splice_p = ptr->right_splice_p = false;
+    ptr->spliceable_low_p = last_spliceable_p;
 #if 0
     ptr->leftspan = ptr->rightspan = -1;
 #endif
@@ -4961,7 +4969,8 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
   printf("%d selected anchor segments\n",*nanchors);
   for (p = &(*anchor_segments)[0]; p< &((*anchor_segments)[*nanchors]); p++) {
     segment = (Segment_T) *p;
-    printf("%u %d..%d\n",segment->diagonal,segment->querypos5,segment->querypos3);
+    printf("%u %d..%d spliceable_low:%d spliceable_high:%d\n",
+	   segment->diagonal,segment->querypos5,segment->querypos3,segment->spliceable_low_p,segment->spliceable_high_p);
   }
 #endif
 
@@ -10067,6 +10076,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
       if (shortexon_orig_plusp == true) {
 	debug4k(printf("Short exon candidate, orig_plusp.  Saw short exon acceptor...donor on segment i\n"));
 	sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+	assert(plusp == true);
+	assert(sensedir == SENSE_FORWARD);
 
 	for (j1 = joffset; j1 < j; j1++) {
 	  if (splicetypes[j1] == ACCEPTOR) {
@@ -10075,7 +10086,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 	    if ((splicesites_i_left =
 		 Splicetrie_find_left(&nmismatches_shortexon_left,&nmismatches_list_left,j1,
 				      /*origleft*/segmentm_left,/*pos5*/0,/*pos3*/leftpos,segmentm->chroffset,
-				      query_compress,queryptr,querylength,max_mismatches_allowed,plusp,genestrand,first_read_p,
+				      query_compress,queryptr,querylength,max_mismatches_allowed,/*plusp*/true,
+				      genestrand,first_read_p,
 				      /*collect_all_p*/pairedp == true && first_read_p != plusp)) != NULL) {
 	      ambp_left = (leftpos < min_shortend || Intlist_length(splicesites_i_left) > 1) ? true : false;
 
@@ -10091,7 +10103,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 					     /*origleft*/segmentm_left,/*pos5*/rightpos,/*pos3*/querylength,segmentm->chrhigh,
 					     query_compress,queryptr,
 					     max_mismatches_allowed - nmismatches_shortexon_left - nmismatches_shortexon_middle,
-					     plusp,genestrand,first_read_p,
+					     /*plusp*/true,genestrand,first_read_p,
 					     /*collect_all_p*/pairedp == true && first_read_p == plusp)) != NULL) {
 		    ambp_right = (querylength - rightpos < min_shortend || Intlist_length(splicesites_i_right) > 1) ? true : false;
 
@@ -10107,7 +10119,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 							  nmismatches_shortexon_middle,
 							  /*acceptor_prob*/2.0,/*donor_prob*/2.0,
 							  /*left*/segmentm_left,query_compress,
-							  querylength,plusp,genestrand,
+							  querylength,/*plusp*/true,genestrand,
 							  sensedir,/*acceptor_ambp*/true,/*donor_ambp*/true,
 							  segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 		      if (shortexon != NULL) {
@@ -10150,15 +10162,17 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 							  nmismatches_shortexon_middle,
 							  /*acceptor_prob*/2.0,/*donor_prob*/2.0,
 							  /*left*/segmentm_left,query_compress,
-							  querylength,plusp,genestrand,
+							  querylength,/*plusp*/true,genestrand,
 							  sensedir,/*acceptor_ambp*/true,/*donor_ambp*/false,
 							  segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      debug4k(printf("acceptor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j]));
 		      acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j,
-							/*splice_pos*/rightpos,nmismatches_shortexon_right,
+							/*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+							nmismatches_shortexon_right,
 							/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
-							query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+							query_compress,querylength,/*plusp*/true,genestrand,
+							/*sensedir*/SENSE_FORWARD,segmentm->chrnum,
 							segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      if (shortexon == NULL || acceptor == NULL) {
@@ -10194,9 +10208,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 
 		      debug4k(printf("donor at %d (%llu)\n",best_left_j,(unsigned long long) splicesites[best_left_j]));
 		      donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j,
-						  /*splice_pos*/leftpos,nmismatches_shortexon_left,
+						  /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+						  nmismatches_shortexon_left,
 						  /*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
-						  query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+						  query_compress,querylength,/*plusp*/true,genestrand,
+						  /*sensedir*/SENSE_FORWARD,segmentm->chrnum,
 						  segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      debug4k(printf("shortexon with acceptor at %d (%llu) ... amb_donor %d (%llu)\n",
@@ -10207,8 +10223,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 							  nmismatches_shortexon_middle,
 							  /*acceptor_prob*/2.0,/*donor_prob*/2.0,
 							  /*left*/segmentm_left,query_compress,
-							  querylength,plusp,genestrand,
-							  sensedir,/*acceptor_ambp*/false,/*donor_ambp*/true,
+							  querylength,/*plusp*/true,genestrand,
+							  /*sensedir*/SENSE_FORWARD,/*acceptor_ambp*/false,/*donor_ambp*/true,
 							  segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      if (donor == NULL || shortexon == NULL) {
@@ -10242,9 +10258,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 		      best_left_j = Intlist_head(splicesites_i_left);
 		      best_right_j = Intlist_head(splicesites_i_right);
 		      donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j,
-						  /*splice_pos*/leftpos,nmismatches_shortexon_left,
+						  /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+						  nmismatches_shortexon_left,
 						  /*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
-						  query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+						  query_compress,querylength,/*plusp*/true,genestrand,
+						  /*sensedir*/SENSE_FORWARD,segmentm->chrnum,
 						  segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1,
@@ -10252,14 +10270,16 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 							  /*acceptor_pos*/leftpos,/*donor_pos*/rightpos,
 							  nmismatches_shortexon_middle,/*acceptor_prob*/2.0,/*donor_prob*/2.0,
 							  /*left*/segmentm_left,query_compress,
-							  querylength,plusp,genestrand,
+							  querylength,/*plusp*/true,genestrand,
 							  sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
 							  segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 		      
 		      acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j,
-							/*splice_pos*/rightpos,nmismatches_shortexon_right,
+							/*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+							nmismatches_shortexon_right,
 							/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
-							query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+							query_compress,querylength,/*plusp*/true,genestrand,
+							/*sensedir*/SENSE_FORWARD,segmentm->chrnum,
 							segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      if (donor == NULL || shortexon == NULL || acceptor == NULL) {
@@ -10298,6 +10318,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
       if (shortexon_orig_minusp == true) {
 	debug4k(printf("Short exon candidate, orig_minusp.  Saw short exon antidonor...antiacceptor on segment i\n"));
 	sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+	assert(plusp == false);
+	assert(sensedir == SENSE_ANTI);
 
 	for (j1 = joffset; j1 < j; j1++) {
 	  if (splicetypes[j1] == ANTIDONOR) {
@@ -10307,7 +10329,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 		 Splicetrie_find_left(&nmismatches_shortexon_left,&nmismatches_list_left,j1,
 				      /*origleft*/segmentm_left,/*pos5*/0,/*pos3*/leftpos,segmentm->chroffset,
 				      query_compress,queryptr,querylength,max_mismatches_allowed,
-				      plusp,genestrand,first_read_p,
+				      /*plusp*/false,genestrand,first_read_p,
 				      /*collect_all_p*/pairedp == true && first_read_p != plusp)) != NULL) {
 	      ambp_left = (leftpos < min_shortend || Intlist_length(splicesites_i_left) > 1) ? true : false;
 	      
@@ -10317,13 +10339,13 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 		  debug4k(printf("  Doing Splicetrie_find_right from rightpos %d (minus)\n",rightpos));
 		  if ((nmismatches_shortexon_middle =
 		       Genome_count_mismatches_substring(query_compress,segmentm_left,/*pos5*/leftpos,/*pos3*/rightpos,
-							 plusp,genestrand)) <= max_mismatches_allowed - nmismatches_shortexon_left &&
+							 /*plusp*/false,genestrand)) <= max_mismatches_allowed - nmismatches_shortexon_left &&
 		      (splicesites_i_right =
 		       Splicetrie_find_right(&nmismatches_shortexon_right,&nmismatches_list_right,j2,
 					     /*origleft*/segmentm_left,/*pos5*/rightpos,/*pos3*/querylength,segmentm->chrhigh,
 					     query_compress,queryptr,
 					     max_mismatches_allowed - nmismatches_shortexon_left - nmismatches_shortexon_middle,
-					     plusp,genestrand,first_read_p,
+					     /*plusp*/false,genestrand,first_read_p,
 					     /*collect_all_p*/pairedp == true && first_read_p == plusp)) != NULL) {
 		    ambp_right = (querylength - rightpos < min_shortend || Intlist_length(splicesites_i_right) > 1) ? true : false;
 
@@ -10338,7 +10360,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 							  /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
 							  /*acceptor_prob*/2.0,/*donor_prob*/2.0,
 							  /*left*/segmentm_left,query_compress,
-							  querylength,plusp,genestrand,
+							  querylength,/*plusp*/false,genestrand,
 							  sensedir,/*acceptor_ambp*/true,/*donor_ambp*/true,
 							  segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 		      if (shortexon != NULL) {
@@ -10380,15 +10402,17 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 							  /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
 							  /*acceptor_prob*/2.0,/*donor_prob*/2.0,
 							  /*left*/segmentm_left,query_compress,
-							  querylength,plusp,genestrand,
+							  querylength,/*plusp*/false,genestrand,
 							  sensedir,/*acceptor_ambp*/false,/*donor_ambp*/true,
 							  segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      debug4k(printf("donor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j]));
 		      donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j,
-						  /*splice_pos*/rightpos,nmismatches_shortexon_right,
+						  /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+						  nmismatches_shortexon_right,
 						  /*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
-						  query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+						  query_compress,querylength,/*plusp*/false,genestrand,
+						  /*sensedir*/SENSE_ANTI,segmentm->chrnum,
 						  segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      if (donor == NULL || shortexon == NULL) {
@@ -10422,9 +10446,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 
 		      debug4k(printf("acceptor at %d (%llu)\n",best_left_j,(unsigned long long) splicesites[best_left_j]));
 		      acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j,
-							/*splice_pos*/leftpos,nmismatches_shortexon_left,
+							/*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+							nmismatches_shortexon_left,
 							/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
-							query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+							query_compress,querylength,/*plusp*/false,genestrand,
+							/*sensedir*/SENSE_ANTI,segmentm->chrnum,
 							segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      debug4k(printf("shortexon with donor at %d (%llu) ... amb_acceptor at %d (%llu)\n",
@@ -10434,7 +10460,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 							  /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
 							  /*acceptor_prob*/2.0,/*donor_prob*/2.0,
 							  /*left*/segmentm_left,query_compress,
-							  querylength,plusp,genestrand,
+							  querylength,/*plusp*/false,genestrand,
 							  sensedir,/*acceptor_ambp*/true,/*donor_ambp*/false,
 							  segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
@@ -10469,9 +10495,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 		      best_left_j = Intlist_head(splicesites_i_left);
 		      best_right_j = Intlist_head(splicesites_i_right);
 		      acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j,
-							/*splice_pos*/leftpos,nmismatches_shortexon_left,
+							/*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+							nmismatches_shortexon_left,
 							/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
-							query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+							query_compress,querylength,/*plusp*/false,genestrand,
+							/*sensedir*/SENSE_ANTI,segmentm->chrnum,
 							segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2,
@@ -10479,14 +10507,16 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
 							  /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,
 							  nmismatches_shortexon_middle,/*acceptor_prob*/2.0,/*donor_prob*/2.0,
 							  /*left*/segmentm_left,query_compress,
-							  querylength,plusp,genestrand,
+							  querylength,/*plusp*/false,genestrand,
 							  sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
 							  segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j,
-						  /*splice_pos*/rightpos,nmismatches_shortexon_right,
+						  /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+						  nmismatches_shortexon_right,
 						  /*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
-						  query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+						  query_compress,querylength,/*plusp*/false,genestrand,
+						  /*sensedir*/SENSE_ANTI,segmentm->chrnum,
 						  segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
 
 		      if (acceptor == NULL || shortexon == NULL || donor == NULL) {
@@ -10565,7 +10595,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
   int i;
 #endif
 
-  debug4e(printf("Entering find_spliceends_shortend with %d segments\n",nsegments));
+  debug4e(printf("Entering find_spliceends_shortend with %d anchor segments\n",nanchors));
 
   if (floors != NULL) {
     floors_from_neg3 = floors->scorefrom[-index1interval];
@@ -10613,7 +10643,6 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
 	  } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
 	    splice_pos_end = querylength - 1;
 	  }
-	  splice_pos_end = querylength - 1;
 
 	  debug4e(printf("Search for splice sites from %d up (%llu) to %d (%llu)\n",
 			 splice_pos_start,(unsigned long long) segment_left+splice_pos_start,
@@ -10648,8 +10677,9 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
 			     segment_left,(unsigned long long) splice_pos,nmismatches,splice_pos_end));
 	      sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
 
-	      if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,splice_pos,nmismatches,
-					     /*prob*/2.0,/*left*/segment_left,query_compress,
+	      if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,
+					     splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+					     nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
 					     querylength,plusp,genestrand,
 					     sensedir,segment->chrnum,segment->chroffset,
 					     segment->chrhigh,segment->chrlength)) != NULL) {
@@ -10665,7 +10695,8 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
 	      sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
 
 	      if ((hit = Substring_new_acceptor(/*acceptor_coord*/splicesites[j],/*acceptor_knowni*/j,
-						splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+						splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+						nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
 						querylength,plusp,genestrand,
 						sensedir,segment->chrnum,segment->chroffset,
 						segment->chrhigh,segment->chrlength)) != NULL) {
@@ -10700,6 +10731,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
 	  } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
 	    splice_pos_start = 1;
 	  }
+	  /* splice_pos_start = 1; */
 
 	  debug4e(printf("Search for splice sites from %d (%llu) down to %d (%llu)\n",
 			 splice_pos_end,(unsigned long long) segment_left+splice_pos_end,
@@ -10735,7 +10767,8 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
 	      sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
 
 	      if ((hit = Substring_new_acceptor(/*acceptor_coord*/splicesites[j],/*acceptor_knowni*/j,
-						splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+						splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+						nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
 						querylength,plusp,genestrand,
 						sensedir,segment->chrnum,segment->chroffset,
 						segment->chrhigh,segment->chrlength)) != NULL) {
@@ -10750,8 +10783,9 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
 			     (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
 	      sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
 
-	      if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,splice_pos,nmismatches,
-					     /*prob*/2.0,/*left*/segment_left,query_compress,
+	      if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,
+					     splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+					     nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
 					     querylength,plusp,genestrand,
 					     sensedir,segment->chrnum,segment->chroffset,
 					     segment->chrhigh,segment->chrlength)) != NULL) {
@@ -10780,6 +10814,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
 				  int max_mismatches_allowed, int genestrand) {
 #ifdef DEBUG4E
   char *gbuffer;
+  int i;
 #endif
 
   Segment_T segment, *p;
@@ -10800,7 +10835,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
 #endif
 
 
-  debug4e(printf("Entering find_spliceends_distant_dna with %d segments\n",nsegments));
+  debug4e(printf("Entering find_spliceends_distant_dna with %d anchor segments\n",nanchors));
 
   if (floors != NULL) {
     floors_from_neg3 = floors->scorefrom[-index1interval];
@@ -10811,7 +10846,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
       assert(segment->diagonal != (Univcoord_T) -1);
 
       segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
-      debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+      debug4e(printf("find_spliceends_distant_dna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
 		     max_mismatches_allowed,(unsigned long long) segment->diagonal,
 		     segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
 		     floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
@@ -10914,7 +10949,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
 	while (splice_pos >= splice_pos_start && nmismatches <= max_mismatches_allowed) {
 	  debug4e(printf(" splice pos %d, nmismatches (quick) %d, nmismatches (goldstd) %d\n",splice_pos,nmismatches,
 			 Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
-							   /*plusp*/true,genestrand,first_read_p)));
+							   /*plusp*/true,genestrand)));
 	  assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
 								  /*plusp*/true,genestrand));
 	  if ((hit = Substring_new_endfrag(/*endfrag_coord*/segment_left + splice_pos,
@@ -10952,6 +10987,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
 				   int max_mismatches_allowed, int genestrand) {
 #ifdef DEBUG4E
   char *gbuffer;
+  int i;
 #endif
 
   Segment_T segment, *p;
@@ -10972,7 +11008,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
 #endif
 
 
-  debug4e(printf("Entering find_spliceends_distant_dna with %d segments\n",nsegments));
+  debug4e(printf("Entering find_spliceends_distant_dna with %d anchor segments\n",nanchors));
 
   if (floors != NULL) {
     floors_from_neg3 = floors->scorefrom[-index1interval];
@@ -10983,7 +11019,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
       assert(segment->diagonal != (Univcoord_T) -1);
 
       segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
-      debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+      debug4e(printf("find_spliceends_distant_dna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
 		     max_mismatches_allowed,(unsigned long long) segment->diagonal,
 		     segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
 		     floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
@@ -11086,7 +11122,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
 	while (splice_pos >= splice_pos_start && nmismatches <= max_mismatches_allowed) {
 	  debug4e(printf(" splice pos %d, nmismatches (quick) %d, nmismatches (goldstd) %d\n",splice_pos,nmismatches,
 			 Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
-							   /*plusp*/false,genestrand,first_read_p)));
+							   /*plusp*/false,genestrand)));
 	  assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
 								  /*plusp*/false,genestrand));
 	  if ((hit = Substring_new_startfrag(/*startfrag_coord*/segment_left + splice_pos,
@@ -11113,6 +11149,8 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
 }
 
 
+#if 0
+/* Using a streamlined version instead */
 /* Produces lists of distant_donors and distant_acceptors that are substrings */
 /* TODO: Change to lists of Stage3end_T objects, including GMAP.
    Change definition of a chimera to be two Stage3end_T objects, instead
@@ -11134,7 +11172,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
   Substring_T hit;
   Univcoord_T segment_left;
   int nmismatches, j, i;
-  int splice_pos;
+  int splice_pos, last_querypos;
   double prob;
 
   int nmismatches_left, nmismatches_right;
@@ -11171,7 +11209,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
   int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
 
 
-  debug4e(printf("Entering find_spliceends_distant_rna with %d segments\n",nsegments));
+  debug4e(printf("Entering find_spliceends_distant_rna with %d anchor segments\n",nanchors));
 
   if (floors != NULL) {
     floors_from_neg3 = floors->scorefrom[-index1interval];
@@ -11182,10 +11220,13 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
       assert(segment->diagonal != (Univcoord_T) -1);
 
       segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
-      debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+      last_querypos = segment->querypos3 + index1part;
+
+      debug4e(printf("find_spliceends_distant_rna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d, plusp %d\n",
 		     max_mismatches_allowed,(unsigned long long) segment->diagonal,
 		     segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
-		     floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
+		     floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3],
+		     plusp));
 
       debug4e(
 	      gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
@@ -11195,406 +11236,1267 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
 	      FREE(gbuffer);
 	      );
 
-      /* Splice ends from left to splice site */
-      if ((plusp == true && floors_from_neg3[segment->querypos5] <= max_mismatches_allowed) ||
-	  (plusp == false && floors_to_pos3[segment->querypos3] <= max_mismatches_allowed)) {
+      /* Find splices on genomic right */
+      if (plusp) {
+	/* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
+	if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+	  /* genomic left anchor */
+	  debug4e(printf("Searching genomic right: plus genomic left anchor\n"));
+	  nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+						    query_compress,/*left*/segment_left,
+#if 0
+						    /*pos5*/0,/*pos3*/querylength,
+#else
+						    /*pos5 (was 0)*/segment->querypos5,/*pos3*/querylength,
+#endif
+						    plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on left (%d allowed) at:",
+			 nmismatches_left,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_left; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
 
-	/* pos3 was trimpos */
-	nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
-						  query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
-						  plusp,genestrand);
+#if 0
+	  splice_pos_start = index1part;
+#else
+	  splice_pos_start = segment->querypos5;
+#endif
+	  if (nmismatches_left <= max_mismatches_allowed) {
+	    splice_pos_end = querylength - 1;
+	  } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+	    splice_pos_end = querylength - 1;
+	  }
+	  
+	} else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+	  /* genomic right anchor.  No need to find splices on genomic right */
+	  debug4e(printf("Searching genomic right: plus genomic right anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
+	  
+	} else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+		   segment->spliceable_low_p == true) {
+	  /* middle anchor */
+	  debug4e(printf("Searching genomic right: plus middle anchor\n"));
+	  nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+						    query_compress,/*left*/segment_left,
+						    /*pos5*/segment->querypos5,/*pos3*/querylength,
+						    plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on left (%d allowed) at:",
+			 nmismatches_left,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_left; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
 
-	debug4e(
-		printf("%d mismatches on left (%d allowed) at:",
-		       nmismatches_left,max_mismatches_allowed);
-		for (i = 0; i <= nmismatches_left; i++) {
-		  printf(" %d",mismatch_positions[i]);
-		}
-		printf("\n");
-		);
+	  splice_pos_start = segment->querypos5;
+	  if (nmismatches_left <= max_mismatches_allowed) {
+	    splice_pos_end = querylength - 1;
+	  } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+	    splice_pos_end = querylength - 1;
+	  }
 
-	splice_pos_start = index1part;
-	if (nmismatches_left <= max_mismatches_allowed) {
-	  splice_pos_end = querylength - 1;
-	} else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
-	  splice_pos_end = querylength - 1;
+	} else {
+	  /* full anchor */
+	  debug4e(printf("Searching genomic right: plus full anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
 	}
 
-	if (splice_pos_start <= splice_pos_end) {
-	  debug4e(printf("Search for splice sites from %d up to %d\n",splice_pos_start,splice_pos_end));
-
-	  segment_donor_nknown = 0;
-	  segment_antiacceptor_nknown = 0;
-	  if ((j = segment->splicesites_i) >= 0) {
-	    /* Known splicing */
-	    /* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segment */
-	    while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
-	      if (splicetypes[j] == DONOR) {
-		segment_donor_knownpos[segment_donor_nknown] = splicesites[j] - segment_left;
-		segment_donor_knowni[segment_donor_nknown++] = j;
-	      } else if (splicetypes[j] == ANTIACCEPTOR) {
-		segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = splicesites[j] - segment_left;
-		segment_antiacceptor_knowni[segment_antiacceptor_nknown++] = j;
-	      }
-	      j++;
-	    }
+      } else {
+	/* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
+	if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+	  /* genomic right anchor.  No need to find splices on genomic right */
+	  debug4e(printf("Searching genomic right: minus genomic right anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
+
+	} else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+	  /* genomic left anchor */
+	  debug4e(printf("Searching genomic right: minus genomic left anchor\n"));
+	  nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+						    query_compress,/*left*/segment_left,
+#if 0
+						    /*pos5*/0,/*pos3*/querylength,
+#else
+						    /*pos5 (was 0)*/querylength - last_querypos,
+						    /*pos3*/querylength,
+#endif
+						    plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on left (%d allowed) at:",
+			 nmismatches_left,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_left; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
+
+#if 0
+	  splice_pos_start = index1part;
+#else
+	  splice_pos_start = querylength - last_querypos;
+#endif
+	  if (nmismatches_left <= max_mismatches_allowed) {
+	    splice_pos_end = querylength - 1;
+	  } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+	    splice_pos_end = querylength - 1;
 	  }
-	  segment_donor_knownpos[segment_donor_nknown] = querylength;
-	  segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = querylength;
-
-	  /* Originally on plus strand.  No complement */
-	  sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
-
-	  if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
-	    donori_nsites = Genome_donor_positions(positions_alloc,knowni_alloc,
-						   segment_donor_knownpos,segment_donor_knowni,
-						   segment_left,splice_pos_start,splice_pos_end+1);
-	    donori_positions = positions_alloc;
-	    donori_knowni = knowni_alloc;
-	    debug4e(
-		    printf("Donor dinucleotides:");
-		    for (i = 0; i < donori_nsites; i++) {
-		      printf(" %d",donori_positions[i]);
-		    }
-		    printf("\n");
-		    );
-	  } else {
-	    donori_nsites = segment_donor_nknown;
-	    donori_positions = segment_donor_knownpos;
-	    donori_knowni = segment_donor_knowni;
+
+	} else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+		   segment->spliceable_low_p == true) {
+	  /* middle anchor */
+	  debug4e(printf("Searching genomic right: minus middle anchor\n"));
+	  nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+						    query_compress,/*left*/segment_left,
+						    /*pos5*/querylength - segment->querypos3 - index1part,
+						    /*pos3*/querylength,plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on left (%d allowed) at:",
+			 nmismatches_left,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_left; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
+
+	  splice_pos_start = querylength - last_querypos;
+	  if (nmismatches_left <= max_mismatches_allowed) {
+	    splice_pos_end = querylength - 1;
+	  } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+	    splice_pos_end = querylength - 1;
 	  }
 
-	  i = 0;
-	  nmismatches = 0;
-	  while (i < donori_nsites && nmismatches <= max_mismatches_allowed) {
-	    splice_pos = donori_positions[i];
-	    while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
-	      debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
-	      nmismatches++;
+	} else {
+	  /* full anchor */
+	  debug4e(printf("Searching genomic right: minus full anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
+	}
+      }
+
+      if (splice_pos_start <= splice_pos_end) {
+	debug4e(printf("Search for splice sites from %d up to %d\n",splice_pos_start,splice_pos_end));
+
+	segment_donor_nknown = 0;
+	segment_antiacceptor_nknown = 0;
+	if ((j = segment->splicesites_i) >= 0) {
+	  /* Known splicing */
+	  /* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segment */
+	  while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
+	    if (splicetypes[j] == DONOR) {
+	      segment_donor_knownpos[segment_donor_nknown] = splicesites[j] - segment_left;
+	      segment_donor_knowni[segment_donor_nknown++] = j;
+	    } else if (splicetypes[j] == ANTIACCEPTOR) {
+	      segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = splicesites[j] - segment_left;
+	      segment_antiacceptor_knowni[segment_antiacceptor_nknown++] = j;
 	    }
-	    debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+	    j++;
+	  }
+	}
+	segment_donor_knownpos[segment_donor_nknown] = querylength;
+	segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = querylength;
+
+	/* Originally on plus strand.  No complement */
+	sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+	if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
+	  donori_nsites = Genome_donor_positions(positions_alloc,knowni_alloc,
+						 segment_donor_knownpos,segment_donor_knowni,
+						 segment_left,splice_pos_start,splice_pos_end);
+	  donori_positions = positions_alloc;
+	  donori_knowni = knowni_alloc;
+	  debug4e(
+		  printf("Donor dinucleotides:");
+		  for (i = 0; i < donori_nsites; i++) {
+		    printf(" %d",donori_positions[i]);
+		  }
+		  printf("\n");
+		  );
+	} else {
+	  donori_nsites = segment_donor_nknown;
+	  donori_positions = segment_donor_knownpos;
+	  donori_knowni = segment_donor_knowni;
+	}
+
+	i = 0;
+	nmismatches = 0;
+	while (i < donori_nsites && nmismatches <= max_mismatches_allowed) {
+	  splice_pos = donori_positions[i];
+	  while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
+	    debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
+	    nmismatches++;
+	  }
+	  debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
 #if 0
-	    assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
-								    plusp,genestrand,first_read_p));
+	  assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+								  plusp,genestrand,first_read_p));
 #endif
-	    if (nmismatches <= max_mismatches_allowed) {
-	      if (donori_knowni[i] >= 0) {
-		debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
-			       (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+	  if (nmismatches <= max_mismatches_allowed) {
+	    if (donori_knowni[i] >= 0) {
+	      debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			     (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
 		
-		if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
-					       splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+	      if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
+					     splice_pos,/*substring_querystart*/segment->querypos5,
+					     /*substring_queryend*/last_querypos,
+					     nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+					     querylength,plusp,genestrand,
+					     sensedir,segment->chrnum,segment->chroffset,
+					     segment->chrhigh,segment->chrlength)) != NULL) {
+		debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
+			       plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset),
+			       Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+		debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		(*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
+	      }
+
+	    } else {
+	      prob = Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset);
+	      debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+			     splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
+	      if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
+		debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			       (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+		if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
+					       splice_pos,/*substring_querystart*/segment->querypos5,
+					       /*substring_queryend*/last_querypos,
+					       nmismatches,prob,/*left*/segment_left,query_compress,
 					       querylength,plusp,genestrand,
 					       sensedir,segment->chrnum,segment->chroffset,
 					       segment->chrhigh,segment->chrlength)) != NULL) {
-		  debug4e(printf("=> %s donor: %f at %d (%d mismatches)\n",
-				 plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset),
-				 Substring_chimera_pos(hit),nmismatches));
+		  debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
+				 plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+				 Substring_querystart(hit),Substring_queryend(hit)));
 		  debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
 		  (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
 		}
-
-	      } else {
-		prob = Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset);
-		debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
-			       splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
-		if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
-		  debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
-				 (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
-		  if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
-						 splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
-						 querylength,plusp,genestrand,
-						 sensedir,segment->chrnum,segment->chroffset,
-						 segment->chrhigh,segment->chrlength)) != NULL) {
-		    debug4e(printf("=> %s donor: %f at %d (%d mismatches)\n",
-				   plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
-		    debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
-		    (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
-		  }
-		}
 	      }
 	    }
-
-	    i++;
 	  }
 
+	  i++;
+	}
+
 
-	  /* Splicing originally on minus strand.  Complement */
-	  sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+	/* Splicing originally on minus strand.  Complement */
+	sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
 
-	  if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
-	    antiacceptori_nsites = Genome_antiacceptor_positions(positions_alloc,knowni_alloc,
-								 segment_antiacceptor_knownpos,segment_antiacceptor_knowni,
-								 segment_left,splice_pos_start,splice_pos_end+1);
-	    antiacceptori_positions = positions_alloc;
-	    antiacceptori_knowni = knowni_alloc;
-	    debug4e(
-		    printf("Antiacceptor dinucleotides:");
-		    for (i = 0; i < antiacceptori_nsites; i++) {
-		      printf(" %d",antiacceptori_positions[i]);
-		    }
-		    printf("\n");
-		    );
-	  } else {
-	    antiacceptori_nsites = segment_antiacceptor_nknown;
-	    antiacceptori_positions = segment_antiacceptor_knownpos;
-	    antiacceptori_knowni = segment_antiacceptor_knowni;
-	  }
+	if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
+	  antiacceptori_nsites = Genome_antiacceptor_positions(positions_alloc,knowni_alloc,
+							       segment_antiacceptor_knownpos,segment_antiacceptor_knowni,
+							       segment_left,splice_pos_start,splice_pos_end);
+	  antiacceptori_positions = positions_alloc;
+	  antiacceptori_knowni = knowni_alloc;
+	  debug4e(
+		  printf("Antiacceptor dinucleotides:");
+		  for (i = 0; i < antiacceptori_nsites; i++) {
+		    printf(" %d",antiacceptori_positions[i]);
+		  }
+		  printf("\n");
+		  );
+	} else {
+	  antiacceptori_nsites = segment_antiacceptor_nknown;
+	  antiacceptori_positions = segment_antiacceptor_knownpos;
+	  antiacceptori_knowni = segment_antiacceptor_knowni;
+	}
 
-	  i = 0;
-	  nmismatches = 0;
-	  while (i < antiacceptori_nsites && nmismatches <= max_mismatches_allowed) {
-	    splice_pos = antiacceptori_positions[i];
-	    while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
-	      debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
-	      nmismatches++;
-	    }
-	    debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+	i = 0;
+	nmismatches = 0;
+	while (i < antiacceptori_nsites && nmismatches <= max_mismatches_allowed) {
+	  splice_pos = antiacceptori_positions[i];
+	  while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
+	    debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
+	    nmismatches++;
+	  }
+	  debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
 #if 0
-	    assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
-								    plusp,genestrand,first_read_p));
+	  assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+								  plusp,genestrand,first_read_p));
 #endif
-	    if (nmismatches <= max_mismatches_allowed) {
-	      if (antiacceptori_knowni[i] >= 0) {
-		debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+	  if (nmismatches <= max_mismatches_allowed) {
+	    if (antiacceptori_knowni[i] >= 0) {
+	      debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			     (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+	      if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
+						splice_pos,/*substring_querystart*/segment->querypos5,
+						/*substring_queryend*/last_querypos,
+						nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+						querylength,plusp,genestrand,
+						sensedir,segment->chrnum,segment->chroffset,
+						segment->chrhigh,segment->chrlength)) != NULL) {
+		debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
+			       plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset),
+			       Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+		debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		(*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
+	      }
+
+	    } else {
+	      prob = Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset);
+	      debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+			     splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
+	      if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
+		debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
 			       (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
-		if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
-						  splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+		if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
+						  splice_pos,/*substring_querystart*/segment->querypos5,
+						  /*substring_queryend*/last_querypos,
+						  nmismatches,prob,/*left*/segment_left,query_compress,
 						  querylength,plusp,genestrand,
 						  sensedir,segment->chrnum,segment->chroffset,
 						  segment->chrhigh,segment->chrlength)) != NULL) {
-		  debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches)\n",
-				 plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset),
-				 Substring_chimera_pos(hit),nmismatches));
+		  debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
+				 plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+				 Substring_querystart(hit),Substring_queryend(hit)));
 		  debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
 		  (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
 		}
-
-	      } else {
-		prob = Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset);
-		debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
-			       splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
-		if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
-		  debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
-				 (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
-		  if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
-						    splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
-						    querylength,plusp,genestrand,
-						    sensedir,segment->chrnum,segment->chroffset,
-						    segment->chrhigh,segment->chrlength)) != NULL) {
-		    debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches)\n",
-				   plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
-		    debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
-		    (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
-		  }
-		}
 	      }
 	    }
-
-	    i++;
 	  }
-	}
 
+	  i++;
+	}
       }
 
-      /* Splice ends from splice site to right end */
-      if ((plusp == true && floors_to_pos3[segment->querypos3] <= max_mismatches_allowed) ||
-	  (plusp == false && floors_from_neg3[segment->querypos5] <= max_mismatches_allowed)) {
-
-	/* pos5 was trimpos+1 */
-	nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
-						    query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
-						    plusp,genestrand);
 
-	debug4e(
-		printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
-		for (i = 0; i <= nmismatches_right; i++) {
-		  printf(" %d",mismatch_positions[i]);
-		}
-		printf("\n");
-		);
+      /* Find splices on genomic left */
+      if (plusp) {
+	/* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
+	if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+	  /* genomic left anchor.  No need to find splices on genomic left. */
+	  debug4e(printf("Searching genomic left: plus genomic left anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
 
-	splice_pos_end = query_lastpos;
-	if (nmismatches_right <= max_mismatches_allowed) {
-	  splice_pos_start = 1;
-	} else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
-	  splice_pos_start = 1;
-	}
+	} else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+	  /* genomic right anchor */
+	  debug4e(printf("Searching genomic left: plus genomic right anchor\n"));
+	  nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+						      query_compress,/*left*/segment_left,
+#if 0
+						      /*pos5*/0,/*pos3*/querylength,
+#else
+						      /*pos5*/0,/*pos3 (was querylength)*/last_querypos,
+#endif
+						      plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_right; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
 
-	if (splice_pos_start <= splice_pos_end) {
-	  debug4e(printf("Search for splice sites from %d down to %d\n",splice_pos_end,splice_pos_start));
-
-	  segment_acceptor_nknown = 0;
-	  segment_antidonor_nknown = 0;
-	  if ((j = segment->splicesites_i) >= 0) {
-	    /* Known splicing */
-	    while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
-	      if (splicetypes[j] == ACCEPTOR) {
-		debug4k(printf("Setting known acceptor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
-		segment_acceptor_knownpos[segment_acceptor_nknown] = splicesites[j] - segment_left;
-		segment_acceptor_knowni[segment_acceptor_nknown++] = j;
-	      } else if (splicetypes[j] == ANTIDONOR) {
-		debug4k(printf("Setting known antidonor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
-		segment_antidonor_knownpos[segment_antidonor_nknown] = splicesites[j] - segment_left;
-		segment_antidonor_knowni[segment_antidonor_nknown++] = j;
-	      }
-	      j++;
-	    }
+#if 0
+	  splice_pos_end = query_lastpos;
+#else
+	  splice_pos_end = last_querypos;
+#endif
+	  if (nmismatches_right <= max_mismatches_allowed) {
+	    splice_pos_start = 1;
+	  } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+	    splice_pos_start = 1;
 	  }
-	  segment_acceptor_knownpos[segment_acceptor_nknown] = querylength;
-	  segment_antidonor_knownpos[segment_antidonor_nknown] = querylength;
 
+	} else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+		   segment->spliceable_high_p == true) {
+	  /* middle anchor */
+	  debug4e(printf("Searching genomic left: plus middle anchor\n"));
+	  nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+						      query_compress,/*left*/segment_left,
+						      /*pos5*/0,/*pos3*/last_querypos,
+						      plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_right; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
 
-	  /* Splicing originally on plus strand.  No complement. */
-	  sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+	  splice_pos_end = last_querypos;
+	  if (nmismatches_right <= max_mismatches_allowed) {
+	    splice_pos_start = 1;
+	  } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+	    splice_pos_start = 1;
+	  }
 
-	  if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
-	    acceptorj_nsites = Genome_acceptor_positions(positions_alloc,knowni_alloc,
-							 segment_acceptor_knownpos,segment_acceptor_knowni,
-							 segment_left,splice_pos_start,splice_pos_end+1);
-	    acceptorj_positions = positions_alloc;
-	    acceptorj_knowni = knowni_alloc;
-	    debug4e(
-		    printf("Acceptor dinucleotides:");
-		    for (i = 0; i < acceptorj_nsites; i++) {
-		      printf(" %d",acceptorj_positions[i]);
-		    }
-		    printf("\n");
-		    );
-	  } else {
-	    acceptorj_nsites = segment_acceptor_nknown;
-	    acceptorj_positions = segment_acceptor_knownpos;
-	    acceptorj_knowni = segment_acceptor_knowni;
+	} else {
+	  /* full anchor */
+	  debug4e(printf("Searching genomic left: plus full anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
+	}
+
+      } else {
+	/* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
+	if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+	  /* genomic right anchor */
+	  debug4e(printf("Searching genomic left: minus genomic right anchor\n"));
+	  nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+						      query_compress,/*left*/segment_left,
+#if 0
+						      /*pos5*/0,/*pos3*/querylength,
+#else						      
+						      /*pos5*/0,/*pos3 (was querylength)*/querylength - segment->querypos5,
+#endif
+						      plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_right; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
+
+#if 0
+	  splice_pos_end = query_lastpos;
+#else
+	  splice_pos_end = querylength - segment->querypos5;
+#endif
+	  if (nmismatches_right <= max_mismatches_allowed) {
+	    splice_pos_start = 1;
+	  } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+	    splice_pos_start = 1;
 	  }
 
-	  i = acceptorj_nsites - 1;
-	  nmismatches = 0;
-	  while (i >= 0 && nmismatches <= max_mismatches_allowed) {
-	    splice_pos = acceptorj_positions[i];
-	    while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
-	      debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
-	      nmismatches++;
+	} else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+	  /* genomic left anchor.  No need to find splices on genomic left. */
+	  debug4e(printf("Searching genomic left: minus genomic left anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
+
+	} else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+		   segment->spliceable_high_p == true) {
+	  /* middle anchor */
+	  debug4e(printf("Searching genomic left: minus middle anchor\n"));
+	  nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+						      query_compress,/*left*/segment_left,
+						      /*pos5*/0,/*pos3*/querylength - segment->querypos5,
+						      plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_right; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
+
+	  splice_pos_end = querylength - segment->querypos5;
+	  if (nmismatches_right <= max_mismatches_allowed) {
+	    splice_pos_start = 1;
+	  } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+	    splice_pos_start = 1;
+	  }
+
+	} else {
+	  /* full anchor */
+	  debug4e(printf("Searching genomic left: minus full anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
+	}
+      }
+
+      if (splice_pos_start <= splice_pos_end) {
+	debug4e(printf("Search for splice sites from %d down to %d\n",splice_pos_end,splice_pos_start));
+
+	segment_acceptor_nknown = 0;
+	segment_antidonor_nknown = 0;
+	if ((j = segment->splicesites_i) >= 0) {
+	  /* Known splicing */
+	  while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
+	    if (splicetypes[j] == ACCEPTOR) {
+	      debug4k(printf("Setting known acceptor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
+	      segment_acceptor_knownpos[segment_acceptor_nknown] = splicesites[j] - segment_left;
+	      segment_acceptor_knowni[segment_acceptor_nknown++] = j;
+	    } else if (splicetypes[j] == ANTIDONOR) {
+	      debug4k(printf("Setting known antidonor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
+	      segment_antidonor_knownpos[segment_antidonor_nknown] = splicesites[j] - segment_left;
+	      segment_antidonor_knowni[segment_antidonor_nknown++] = j;
 	    }
-	    debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+	    j++;
+	  }
+	}
+	segment_acceptor_knownpos[segment_acceptor_nknown] = querylength;
+	segment_antidonor_knownpos[segment_antidonor_nknown] = querylength;
+
+	/* Splicing originally on plus strand.  No complement. */
+	sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+	if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
+	  acceptorj_nsites = Genome_acceptor_positions(positions_alloc,knowni_alloc,
+						       segment_acceptor_knownpos,segment_acceptor_knowni,
+						       segment_left,splice_pos_start,splice_pos_end);
+	  acceptorj_positions = positions_alloc;
+	  acceptorj_knowni = knowni_alloc;
+	  debug4e(
+		  printf("Acceptor dinucleotides:");
+		  for (i = 0; i < acceptorj_nsites; i++) {
+		    printf(" %d",acceptorj_positions[i]);
+		  }
+		  printf("\n");
+		  );
+	} else {
+	  acceptorj_nsites = segment_acceptor_nknown;
+	  acceptorj_positions = segment_acceptor_knownpos;
+	  acceptorj_knowni = segment_acceptor_knowni;
+	}
+
+	i = acceptorj_nsites - 1;
+	nmismatches = 0;
+	while (i >= 0 && nmismatches <= max_mismatches_allowed) {
+	  splice_pos = acceptorj_positions[i];
+	  while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+	    debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
+	    nmismatches++;
+	  }
+	  debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
 #if 0
-	    assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
-								    plusp,genestrand,first_read_p));
+	  assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+								  plusp,genestrand,first_read_p));
 #endif
-	    if (nmismatches <= max_mismatches_allowed) {
-	      if (acceptorj_knowni[i] >= 0) {
-		debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+	  if (nmismatches <= max_mismatches_allowed) {
+	    if (acceptorj_knowni[i] >= 0) {
+	      debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			     (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+	      if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i],
+						splice_pos,/*substring_querystart*/segment->querypos5,
+						/*substring_queryend*/last_querypos,
+						nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+						querylength,plusp,genestrand,
+						sensedir,segment->chrnum,segment->chroffset,
+						segment->chrhigh,segment->chrlength)) != NULL) {
+		debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
+			       plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset),
+			       Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+		debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		(*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
+	      }
+
+	    } else {
+	      prob = Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset);
+	      debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+			     splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
+	      if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
+		debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
 			       (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
-		if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i],
-						  splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+		if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
+						  splice_pos,/*substring_querystart*/segment->querypos5,
+						  /*substring_queryend*/last_querypos,
+						  nmismatches,prob,/*left*/segment_left,query_compress,
 						  querylength,plusp,genestrand,
 						  sensedir,segment->chrnum,segment->chroffset,
 						  segment->chrhigh,segment->chrlength)) != NULL) {
-		  debug4e(printf("=> %s acceptor: %f at %d (%d mismatches)\n",
-				 plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset),
-				 Substring_chimera_pos(hit),nmismatches));
+		  debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
+				 plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+				 Substring_querystart(hit),Substring_queryend(hit)));
 		  debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
 		  (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
 		}
+	      }
+	    }
+	  }
+	  
+	  i--;
+	}
 
-	      } else {
-		prob = Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset);
-		debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
-			       splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
-		if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
-		  debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
-				 (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
-		  if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
-						    splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
-						    querylength,plusp,genestrand,
-						    sensedir,segment->chrnum,segment->chroffset,
-						    segment->chrhigh,segment->chrlength)) != NULL) {
-		    debug4e(printf("=> %s acceptor: %f at %d (%d mismatches)\n",
-				   plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
-		    debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
-		    (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
+
+	/* Splicing originally on minus strand.  Complement.  */
+	sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+
+	if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
+	  antidonorj_nsites = Genome_antidonor_positions(positions_alloc,knowni_alloc,
+							 segment_antidonor_knownpos,segment_antidonor_knowni,
+							 segment_left,splice_pos_start,splice_pos_end);
+	  antidonorj_positions = positions_alloc;
+	  antidonorj_knowni = knowni_alloc;
+	  debug4e(
+		  printf("Antidonor dinucleotides:");
+		  for (i = 0; i < antidonorj_nsites; i++) {
+		    printf(" %d",antidonorj_positions[i]);
 		  }
+		  printf("\n");
+		  );
+	} else {
+	  antidonorj_nsites = segment_antidonor_nknown;
+	  antidonorj_positions = segment_antidonor_knownpos;
+	  antidonorj_knowni = segment_antidonor_knowni;
+	}
+
+	i = antidonorj_nsites - 1;
+	nmismatches = 0;
+	while (i >= 0 && nmismatches <= max_mismatches_allowed) {
+	  splice_pos = antidonorj_positions[i];
+	  while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+	    debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
+	    nmismatches++;
+	  }
+	  debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+#if 0
+	  assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+								  plusp,genestrand,first_read_p));
+#endif
+	  if (nmismatches <= max_mismatches_allowed) {
+	    if (antidonorj_knowni[i] >= 0) {
+	      debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			     (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+	      if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i],
+					     splice_pos,/*substring_querystart*/segment->querypos5,
+					     /*substring_queryend*/last_querypos,
+					     nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+					     querylength,plusp,genestrand,
+					     sensedir,segment->chrnum,segment->chroffset,
+					     segment->chrhigh,segment->chrlength)) != NULL) {
+		debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
+			       plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset),
+			       Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+		debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		(*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+	      }
+		  
+	    } else {
+	      prob = Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset);
+	      debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+			     splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
+	      if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
+		debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			       (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+		if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
+					       splice_pos,/*substring_querystart*/segment->querypos5,
+					       /*substring_queryend*/last_querypos,
+					       nmismatches,prob,/*left*/segment_left,query_compress,
+					       querylength,plusp,genestrand,
+					       sensedir,segment->chrnum,segment->chroffset,
+					       segment->chrhigh,segment->chrlength)) != NULL) {
+		  debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
+				 plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+				 Substring_querystart(hit),Substring_queryend(hit)));
+		  debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		  (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
 		}
 	      }
 	    }
-	  
-	    i--;
 	  }
 
+	  i--;
+	}
+      }
+    }
+  }
 
-	  /* Splicing originally on minus strand.  Complement.  */
-	  sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+  return;
+}
+#endif
 
-	  if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
-	    antidonorj_nsites = Genome_antidonor_positions(positions_alloc,knowni_alloc,
-							   segment_antidonor_knownpos,segment_antidonor_knowni,
-							   segment_left,splice_pos_start,splice_pos_end+1);
-	    antidonorj_positions = positions_alloc;
-	    antidonorj_knowni = knowni_alloc;
-	    debug4e(
-		    printf("Antidonor dinucleotides:");
-		    for (i = 0; i < antidonorj_nsites; i++) {
-		      printf(" %d",antidonorj_positions[i]);
-		    }
-		    printf("\n");
-		    );
-	  } else {
-	    antidonorj_nsites = segment_antidonor_nknown;
-	    antidonorj_positions = segment_antidonor_knownpos;
-	    antidonorj_knowni = segment_antidonor_knowni;
+
+
+/* Streamlined version */
+/* Produces lists of distant_donors and distant_acceptors that are substrings */
+/* Note: Call to Genome_donor_positions and similar functions can
+   yield positions at (splice_pos_start - 1) through (splice_pos_end +
+   1).  Therefore, need to have splice_pos_start > 0 and
+   splice_pos_end < querylength. */
+/* TODO: Change to lists of Stage3end_T objects, including GMAP.
+   Change definition of a chimera to be two Stage3end_T objects, instead
+   of two substrings. */
+static void
+find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonors,
+			     List_T **distant_acceptors, List_T **distant_antiacceptors,
+			     Segment_T *anchor_segments, int nanchors,
+#ifdef DEBUG4E
+			     char *queryptr,
+#endif
+			     Floors_T floors, int querylength, int query_lastpos, Compress_T query_compress,
+			     int max_mismatches_allowed, bool plusp, int genestrand) {
+#ifdef DEBUG4E
+  char *gbuffer;
+#endif
+
+  Segment_T segment, *p;
+  Substring_T hit;
+  Univcoord_T segment_left;
+  int nmismatches, j, i;
+  int splice_pos, last_querypos;
+  double prob;
+
+  int nmismatches_left, nmismatches_right;
+  int *floors_from_neg3, *floors_to_pos3;
+  int sensedir;
+
+  int splice_pos_start, splice_pos_end;
+
+#ifdef HAVE_ALLOCA
+  int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+  int *segment_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+  int *segment_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+  int *segment_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+  int *segment_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+  int *segment_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+  int *segment_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+  int *segment_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+  int *segment_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+  int *positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+  int *knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+#else
+  int mismatch_positions[MAX_READLENGTH+1];
+  int segment_donor_knownpos[MAX_READLENGTH+1], segment_acceptor_knownpos[MAX_READLENGTH+1];
+  int segment_antidonor_knownpos[MAX_READLENGTH+1], segment_antiacceptor_knownpos[MAX_READLENGTH+1];
+  int segment_donor_knowni[MAX_READLENGTH+1], segment_acceptor_knowni[MAX_READLENGTH+1];
+  int segment_antidonor_knowni[MAX_READLENGTH+1], segment_antiacceptor_knowni[MAX_READLENGTH+1];
+  int positions_alloc[MAX_READLENGTH+1];
+  int knowni_alloc[MAX_READLENGTH+1];
+#endif
+
+  int segment_donor_nknown, segment_acceptor_nknown, segment_antidonor_nknown, segment_antiacceptor_nknown;
+  int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
+  int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
+  int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+
+
+  debug4e(printf("Entering find_spliceends_distant_rna with %d anchor segments\n",nanchors));
+
+  if (floors != NULL) {
+    floors_from_neg3 = floors->scorefrom[-index1interval];
+    floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
+
+    for (p = &(anchor_segments[0]); p < &(anchor_segments[nanchors]); p++) {
+      segment = *p;
+      assert(segment->diagonal != (Univcoord_T) -1);
+
+      segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
+      last_querypos = segment->querypos3 + index1part;
+      assert(last_querypos <= querylength);
+
+      debug4e(printf("find_spliceends_distant_rna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d, plusp %d\n",
+		     max_mismatches_allowed,(unsigned long long) segment->diagonal,
+		     segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
+		     floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3],
+		     plusp));
+
+      debug4e(
+	      gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
+	      Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
+	      printf("genome 0..: %s\n",gbuffer);
+	      printf("query  0..: %s\n",queryptr);
+	      FREE(gbuffer);
+	      );
+
+      /* Find splices on genomic right */
+      if (plusp) {
+	/* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
+	if (last_querypos < query_lastpos &&
+	    (segment->querypos5 < index1part || segment->spliceable_low_p == true)) {
+	  /* genomic left anchor or middle anchor */
+	  debug4e(printf("Searching genomic right: plus genomic left anchor or middle anchor\n"));
+	  nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+						    query_compress,/*left*/segment_left,
+						    /*pos5*/segment->querypos5,/*pos3*/querylength,
+						    plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on left (%d allowed) at:",
+			 nmismatches_left,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_left; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
+
+	  splice_pos_start = segment->querypos5 + 1;
+	  if (nmismatches_left <= max_mismatches_allowed) {
+	    splice_pos_end = querylength - 1 - 1;
+	  } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1 - 1) {
+	    splice_pos_end = querylength - 1 - 1;
 	  }
+	  
+	} else {
+	  /* genomic right anchor or full anchor */
+	  debug4e(printf("Searching genomic right: plus genomic right anchor or full anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
+	}
 
-	  i = antidonorj_nsites - 1;
-	  nmismatches = 0;
-	  while (i >= 0 && nmismatches <= max_mismatches_allowed) {
-	    splice_pos = antidonorj_positions[i];
-	    while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
-	      debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
-	      nmismatches++;
+      } else {
+	/* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
+	if (segment->querypos5 > index1part &&
+	    (last_querypos > query_lastpos || segment->spliceable_low_p == true)) {
+	  /* genomic left anchor or middle anchor */
+	  debug4e(printf("Searching genomic right: minus genomic left anchor or middle anchor\n"));
+	  nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+						    query_compress,/*left*/segment_left,
+						    /*pos5*/querylength - last_querypos,
+						    /*pos3*/querylength,plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on left (%d allowed) at:",
+			 nmismatches_left,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_left; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
+
+	  splice_pos_start = querylength - last_querypos + 1;
+	  if (nmismatches_left <= max_mismatches_allowed) {
+	    splice_pos_end = querylength - 1 - 1;
+	  } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1 - 1) {
+	    splice_pos_end = querylength - 1 - 1;
+	  }
+
+	} else {
+	  /* genomic right anchor or full anchor */
+	  debug4e(printf("Searching genomic right: minus genomic right anchor or full anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
+	}
+      }
+
+      if (splice_pos_start <= splice_pos_end) {
+	debug4e(printf("Search for splice sites from %d up to %d\n",splice_pos_start,splice_pos_end));
+
+	segment_donor_nknown = 0;
+	segment_antiacceptor_nknown = 0;
+	if ((j = segment->splicesites_i) >= 0) {
+	  /* Known splicing */
+	  /* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segment */
+	  while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
+	    if (splicetypes[j] == DONOR) {
+	      segment_donor_knownpos[segment_donor_nknown] = splicesites[j] - segment_left;
+	      segment_donor_knowni[segment_donor_nknown++] = j;
+	    } else if (splicetypes[j] == ANTIACCEPTOR) {
+	      segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = splicesites[j] - segment_left;
+	      segment_antiacceptor_knowni[segment_antiacceptor_nknown++] = j;
 	    }
-	    debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+	    j++;
+	  }
+	}
+	segment_donor_knownpos[segment_donor_nknown] = querylength;
+	segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = querylength;
+
+	/* Originally on plus strand.  No complement */
+	sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+	if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
+	  assert(splice_pos_start > 0);
+	  assert(splice_pos_end < querylength - 1);
+	  donori_nsites = Genome_donor_positions(positions_alloc,knowni_alloc,
+						 segment_donor_knownpos,segment_donor_knowni,
+						 segment_left,splice_pos_start,splice_pos_end);
+	  donori_positions = positions_alloc;
+	  donori_knowni = knowni_alloc;
+	  debug4e(
+		  printf("Donor dinucleotides:");
+		  for (i = 0; i < donori_nsites; i++) {
+		    printf(" %d",donori_positions[i]);
+		  }
+		  printf("\n");
+		  );
+	} else {
+	  donori_nsites = segment_donor_nknown;
+	  donori_positions = segment_donor_knownpos;
+	  donori_knowni = segment_donor_knowni;
+	}
+
+	i = 0;
+	nmismatches = 0;
+	while (i < donori_nsites && nmismatches <= max_mismatches_allowed) {
+	  splice_pos = donori_positions[i];
+	  while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
+	    debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
+	    nmismatches++;
+	  }
+	  debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
 #if 0
-	    assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
-								    plusp,genestrand,first_read_p));
+	  assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+								  plusp,genestrand,first_read_p));
 #endif
-	    if (nmismatches <= max_mismatches_allowed) {
-	      if (antidonorj_knowni[i] >= 0) {
-		debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
-			       (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
-		if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i],
-					       splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+	  if (nmismatches <= max_mismatches_allowed) {
+	    if (donori_knowni[i] >= 0) {
+	      debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			     (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+		
+	      if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
+					     splice_pos,/*substring_querystart*/segment->querypos5,
+					     /*substring_queryend*/last_querypos,
+					     nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+					     querylength,plusp,genestrand,
+					     sensedir,segment->chrnum,segment->chroffset,
+					     segment->chrhigh,segment->chrlength)) != NULL) {
+		debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
+			       plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset),
+			       Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+		debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		(*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
+	      }
+
+	    } else {
+	      prob = Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset);
+	      debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+			     splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
+	      if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
+		debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			       (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+		if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
+					       splice_pos,/*substring_querystart*/segment->querypos5,
+					       /*substring_queryend*/last_querypos,
+					       nmismatches,prob,/*left*/segment_left,query_compress,
 					       querylength,plusp,genestrand,
 					       sensedir,segment->chrnum,segment->chroffset,
 					       segment->chrhigh,segment->chrlength)) != NULL) {
-		  debug4e(printf("=> %s antidonor: %f at %d (%d mismatches)\n",
-				 plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset),
-				 Substring_chimera_pos(hit),nmismatches));
+		  debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
+				 plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+				 Substring_querystart(hit),Substring_queryend(hit)));
 		  debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
-		  (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+		  (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
 		}
-		  
-	      } else {
-		prob = Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset);
-		debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
-			       splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
-		if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
-		  debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
-				 (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
-		  if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
-						 splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
-						 querylength,plusp,genestrand,
-						 sensedir,segment->chrnum,segment->chroffset,
-						 segment->chrhigh,segment->chrlength)) != NULL) {
-		    debug4e(printf("=> %s antidonor: %f at %d (%d mismatches)\n",
-				   plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
-		    debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
-		    (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+	      }
+	    }
+	  }
+
+	  i++;
+	}
+
+
+	/* Splicing originally on minus strand.  Complement */
+	sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+
+	if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
+	  assert(splice_pos_start > 0);
+	  assert(splice_pos_end < querylength - 1);
+	  antiacceptori_nsites = Genome_antiacceptor_positions(positions_alloc,knowni_alloc,
+							       segment_antiacceptor_knownpos,segment_antiacceptor_knowni,
+							       segment_left,splice_pos_start,splice_pos_end);
+	  antiacceptori_positions = positions_alloc;
+	  antiacceptori_knowni = knowni_alloc;
+	  debug4e(
+		  printf("Antiacceptor dinucleotides:");
+		  for (i = 0; i < antiacceptori_nsites; i++) {
+		    printf(" %d",antiacceptori_positions[i]);
+		  }
+		  printf("\n");
+		  );
+	} else {
+	  antiacceptori_nsites = segment_antiacceptor_nknown;
+	  antiacceptori_positions = segment_antiacceptor_knownpos;
+	  antiacceptori_knowni = segment_antiacceptor_knowni;
+	}
+
+	i = 0;
+	nmismatches = 0;
+	while (i < antiacceptori_nsites && nmismatches <= max_mismatches_allowed) {
+	  splice_pos = antiacceptori_positions[i];
+	  while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
+	    debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
+	    nmismatches++;
+	  }
+	  debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+#if 0
+	  assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+								  plusp,genestrand,first_read_p));
+#endif
+	  if (nmismatches <= max_mismatches_allowed) {
+	    if (antiacceptori_knowni[i] >= 0) {
+	      debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			     (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+	      if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
+						splice_pos,/*substring_querystart*/segment->querypos5,
+						/*substring_queryend*/last_querypos,
+						nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+						querylength,plusp,genestrand,
+						sensedir,segment->chrnum,segment->chroffset,
+						segment->chrhigh,segment->chrlength)) != NULL) {
+		debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
+			       plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset),
+			       Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+		debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		(*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
+	      }
+
+	    } else {
+	      prob = Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset);
+	      debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+			     splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
+	      if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
+		debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			       (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+		if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
+						  splice_pos,/*substring_querystart*/segment->querypos5,
+						  /*substring_queryend*/last_querypos,
+						  nmismatches,prob,/*left*/segment_left,query_compress,
+						  querylength,plusp,genestrand,
+						  sensedir,segment->chrnum,segment->chroffset,
+						  segment->chrhigh,segment->chrlength)) != NULL) {
+		  debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
+				 plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+				 Substring_querystart(hit),Substring_queryend(hit)));
+		  debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		  (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
+		}
+	      }
+	    }
+	  }
+
+	  i++;
+	}
+      }
+
+
+      /* Find splices on genomic left */
+      if (plusp) {
+	/* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
+	if (segment->querypos5 > index1part && 
+	    (last_querypos > query_lastpos || segment->spliceable_high_p == true)) {
+	  /* genomic right anchor or middle anchor */
+	  debug4e(printf("Searching genomic left: plus genomic right anchor or middle anchor\n"));
+	  nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+						      query_compress,/*left*/segment_left,
+						      /*pos5*/0,/*pos3*/last_querypos,
+						      plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_right; i++) {
+		    printf(" %d",mismatch_positions[i]);
+		  }
+		  printf("\n");
+		  );
+	  
+	  splice_pos_end = last_querypos - 1 - 1;
+	  if (nmismatches_right <= max_mismatches_allowed) {
+	    splice_pos_start = 1;
+	  } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+	    splice_pos_start = 1;
+	  }
+	  
+	} else {
+	  /* genomic left anchor or full anchor */
+	  debug4e(printf("Searching genomic left: plus genomic left anchor or full anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
+	}
+
+      } else {
+	/* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
+	if (last_querypos < query_lastpos &&
+	    (segment->querypos5 < index1part || segment->spliceable_high_p == true)) {
+	  /* genomic right anchor or middle anchor*/
+	  debug4e(printf("Searching genomic left: minus genomic right anchor or middle anchor\n"));
+	  nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+						      query_compress,/*left*/segment_left,
+						      /*pos5*/0,/*pos3*/querylength - segment->querypos5,
+						      plusp,genestrand);
+	  debug4e(
+		  printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+		  for (i = 0; i <= nmismatches_right; i++) {
+		    printf(" %d",mismatch_positions[i]);
 		  }
+		  printf("\n");
+		  );
+
+	  splice_pos_end = querylength - segment->querypos5 - 1 - 1;
+	  if (nmismatches_right <= max_mismatches_allowed) {
+	    splice_pos_start = 1;
+	  } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+	    splice_pos_start = 1;
+	  }
+
+	} else {
+	  /* genomic left anchor or full anchor */
+	  debug4e(printf("Searching genomic left: minus genomic left anchor or full anchor\n"));
+	  splice_pos_start = querylength;
+	  splice_pos_end = 0;
+	}
+      }
+
+      if (splice_pos_start <= splice_pos_end) {
+	debug4e(printf("Search for splice sites from %d down to %d\n",splice_pos_end,splice_pos_start));
+
+	segment_acceptor_nknown = 0;
+	segment_antidonor_nknown = 0;
+	if ((j = segment->splicesites_i) >= 0) {
+	  /* Known splicing */
+	  while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
+	    if (splicetypes[j] == ACCEPTOR) {
+	      debug4k(printf("Setting known acceptor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
+	      segment_acceptor_knownpos[segment_acceptor_nknown] = splicesites[j] - segment_left;
+	      segment_acceptor_knowni[segment_acceptor_nknown++] = j;
+	    } else if (splicetypes[j] == ANTIDONOR) {
+	      debug4k(printf("Setting known antidonor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
+	      segment_antidonor_knownpos[segment_antidonor_nknown] = splicesites[j] - segment_left;
+	      segment_antidonor_knowni[segment_antidonor_nknown++] = j;
+	    }
+	    j++;
+	  }
+	}
+	segment_acceptor_knownpos[segment_acceptor_nknown] = querylength;
+	segment_antidonor_knownpos[segment_antidonor_nknown] = querylength;
+
+	/* Splicing originally on plus strand.  No complement. */
+	sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+	if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
+	  assert(splice_pos_start > 0);
+	  assert(splice_pos_end < querylength - 1);
+	  acceptorj_nsites = Genome_acceptor_positions(positions_alloc,knowni_alloc,
+						       segment_acceptor_knownpos,segment_acceptor_knowni,
+						       segment_left,splice_pos_start,splice_pos_end);
+	  acceptorj_positions = positions_alloc;
+	  acceptorj_knowni = knowni_alloc;
+	  debug4e(
+		  printf("Acceptor dinucleotides:");
+		  for (i = 0; i < acceptorj_nsites; i++) {
+		    printf(" %d",acceptorj_positions[i]);
+		  }
+		  printf("\n");
+		  );
+	} else {
+	  acceptorj_nsites = segment_acceptor_nknown;
+	  acceptorj_positions = segment_acceptor_knownpos;
+	  acceptorj_knowni = segment_acceptor_knowni;
+	}
+
+	i = acceptorj_nsites - 1;
+	nmismatches = 0;
+	while (i >= 0 && nmismatches <= max_mismatches_allowed) {
+	  splice_pos = acceptorj_positions[i];
+	  while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+	    debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
+	    nmismatches++;
+	  }
+	  debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+#if 0
+	  assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+								  plusp,genestrand,first_read_p));
+#endif
+	  if (nmismatches <= max_mismatches_allowed) {
+	    if (acceptorj_knowni[i] >= 0) {
+	      debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			     (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+	      if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i],
+						splice_pos,/*substring_querystart*/segment->querypos5,
+						/*substring_queryend*/last_querypos,
+						nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+						querylength,plusp,genestrand,
+						sensedir,segment->chrnum,segment->chroffset,
+						segment->chrhigh,segment->chrlength)) != NULL) {
+		debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
+			       plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset),
+			       Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+		debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		(*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
+	      }
+
+	    } else {
+	      prob = Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset);
+	      debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+			     splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
+	      if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
+		debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			       (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+		if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
+						  splice_pos,/*substring_querystart*/segment->querypos5,
+						  /*substring_queryend*/last_querypos,
+						  nmismatches,prob,/*left*/segment_left,query_compress,
+						  querylength,plusp,genestrand,
+						  sensedir,segment->chrnum,segment->chroffset,
+						  segment->chrhigh,segment->chrlength)) != NULL) {
+		  debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
+				 plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+				 Substring_querystart(hit),Substring_queryend(hit)));
+		  debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		  (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
 		}
 	      }
 	    }
+	  }
+	  
+	  i--;
+	}
+
+
+	/* Splicing originally on minus strand.  Complement.  */
+	sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+
+	if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
+	  assert(splice_pos_start > 0);
+	  assert(splice_pos_end < querylength - 1);
+	  antidonorj_nsites = Genome_antidonor_positions(positions_alloc,knowni_alloc,
+							 segment_antidonor_knownpos,segment_antidonor_knowni,
+							 segment_left,splice_pos_start,splice_pos_end);
+	  antidonorj_positions = positions_alloc;
+	  antidonorj_knowni = knowni_alloc;
+	  debug4e(
+		  printf("Antidonor dinucleotides:");
+		  for (i = 0; i < antidonorj_nsites; i++) {
+		    printf(" %d",antidonorj_positions[i]);
+		  }
+		  printf("\n");
+		  );
+	} else {
+	  antidonorj_nsites = segment_antidonor_nknown;
+	  antidonorj_positions = segment_antidonor_knownpos;
+	  antidonorj_knowni = segment_antidonor_knowni;
+	}
 
-	    i--;
+	i = antidonorj_nsites - 1;
+	nmismatches = 0;
+	while (i >= 0 && nmismatches <= max_mismatches_allowed) {
+	  splice_pos = antidonorj_positions[i];
+	  while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+	    debug4e(printf("  mismatch at %d\n",mismatch_positions[nmismatches]));
+	    nmismatches++;
 	  }
+	  debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+#if 0
+	  assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+								  plusp,genestrand,first_read_p));
+#endif
+	  if (nmismatches <= max_mismatches_allowed) {
+	    if (antidonorj_knowni[i] >= 0) {
+	      debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			     (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+	      if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i],
+					     splice_pos,/*substring_querystart*/segment->querypos5,
+					     /*substring_queryend*/last_querypos,
+					     nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+					     querylength,plusp,genestrand,
+					     sensedir,segment->chrnum,segment->chroffset,
+					     segment->chrhigh,segment->chrlength)) != NULL) {
+		debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
+			       plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset),
+			       Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+		debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		(*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+	      }
+		  
+	    } else {
+	      prob = Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset);
+	      debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+			     splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
+	      if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
+		debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+			       (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+		if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
+					       splice_pos,/*substring_querystart*/segment->querypos5,
+					       /*substring_queryend*/last_querypos,
+					       nmismatches,prob,/*left*/segment_left,query_compress,
+					       querylength,plusp,genestrand,
+					       sensedir,segment->chrnum,segment->chroffset,
+					       segment->chrhigh,segment->chrlength)) != NULL) {
+		  debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
+				 plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+				 Substring_querystart(hit),Substring_queryend(hit)));
+		  debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+		  (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+		}
+	      }
+	    }
+	  }
+
+	  i--;
 	}
       }
     }
@@ -12331,8 +13233,11 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
       startfrag = (Substring_T) p->first;
       endfrag = (Substring_T) q->first;
-      debug4ld(printf("end1-end2: startfrag at %llu and endfrag at %llu\n",
-		      (unsigned long long) Substring_genomicstart(startfrag),(unsigned long long) Substring_genomicstart(endfrag)));
+      debug4ld(printf("end1-end2: startfrag at %llu %d..%d and endfrag at %llu %d..%d\n",
+		      (unsigned long long) Substring_genomicstart(startfrag),
+		      Substring_querystart(startfrag),Substring_queryend(startfrag),
+		      (unsigned long long) Substring_genomicstart(endfrag),
+		      Substring_querystart(endfrag),Substring_queryend(endfrag)));
 
       if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
 	debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
@@ -12423,9 +13328,11 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
       startfrag = (Substring_T) p->first;
       endfrag = (Substring_T) q->first;
-      debug4ld(printf("end3-end4: startfrag at %llu and endfrag at %llu\n",
+      debug4ld(printf("end3-end4: startfrag at %llu %d..%d and endfrag at %llu %d..%d\n",
 		      (unsigned long long) Substring_genomicstart(startfrag),
-		      (unsigned long long) Substring_genomicstart(endfrag)));
+		      Substring_querystart(startfrag),Substring_queryend(startfrag),
+		      (unsigned long long) Substring_genomicstart(endfrag),
+		      Substring_querystart(endfrag),Substring_queryend(endfrag)));
 
       if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
 	debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
@@ -12521,9 +13428,11 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
       startfrag = (Substring_T) p->first;
       endfrag = (Substring_T) q->first;
-      debug4ld(printf("end1-end4: startfrag at %llu and endfrag at %llu\n",
+      debug4ld(printf("end1-end4: startfrag at %llu %d..%d and endfrag at %llu %d..%d\n",
 		      (unsigned long long) Substring_genomicstart(startfrag),
-		      (unsigned long long) Substring_genomicstart(endfrag)));
+		      Substring_querystart(startfrag),Substring_queryend(startfrag),
+		      (unsigned long long) Substring_genomicstart(endfrag),
+		      Substring_querystart(endfrag),Substring_queryend(endfrag)));
 
       if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
 	debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
@@ -12583,10 +13492,12 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
       startfrag = (Substring_T) p->first;
       endfrag = (Substring_T) q->first;
-      debug4ld(printf("end3-end2: startfrag at %llu and endfrag at %llu\n",
+      debug4ld(printf("end3-end2: startfrag at %llu %d..%d and endfrag at %llu %d..%d\n",
 		      (unsigned long long) Substring_genomicstart(startfrag),
-		      (unsigned long long) Substring_genomicstart(endfrag)));
-
+		      Substring_querystart(startfrag),Substring_queryend(startfrag),
+		      (unsigned long long) Substring_genomicstart(endfrag),
+		      Substring_querystart(endfrag),Substring_queryend(endfrag)));
+		      
       if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
 	debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
 	p = p->rest;
@@ -12709,8 +13620,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
       donor = (Substring_T) p->first;
       acceptor = (Substring_T) q->first;
-      debug4ld(printf("end1-end2: donor at %llu and acceptor at %llu\n",
-		      (unsigned long long) Substring_genomicstart(donor),(unsigned long long) Substring_genomicstart(acceptor)));
+      debug4ld(printf("end1-end2: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
+		      (unsigned long long) Substring_genomicstart(donor),
+		      Substring_querystart(donor),Substring_queryend(donor),
+		      (unsigned long long) Substring_genomicstart(acceptor),
+		      Substring_querystart(acceptor),Substring_queryend(acceptor)));
 
       if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
 	debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -12801,9 +13715,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
       donor = (Substring_T) p->first;
       acceptor = (Substring_T) q->first;
-      debug4ld(printf("end3-end4: donor at %llu and acceptor at %llu\n",
+      debug4ld(printf("end3-end4: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
 		      (unsigned long long) Substring_genomicstart(donor),
-		      (unsigned long long) Substring_genomicstart(acceptor)));
+		      Substring_querystart(donor),Substring_queryend(donor),
+		      (unsigned long long) Substring_genomicstart(acceptor),
+		      Substring_querystart(acceptor),Substring_queryend(acceptor)));
 
       if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
 	debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -12891,9 +13807,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
       donor = (Substring_T) p->first;
       acceptor = (Substring_T) q->first;
-      debug4ld(printf("end5-end6: donor at %llu and acceptor at %llu\n",
+      debug4ld(printf("end5-end6: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
 		      (unsigned long long) Substring_genomicstart(donor),
-		      (unsigned long long) Substring_genomicstart(acceptor)));
+		      Substring_querystart(donor),Substring_queryend(donor),
+		      (unsigned long long) Substring_genomicstart(acceptor),
+		      Substring_querystart(acceptor),Substring_queryend(acceptor)));
 
       if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
 	debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -12982,9 +13900,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
       donor = (Substring_T) p->first;
       acceptor = (Substring_T) q->first;
-      debug4ld(printf("end7-end8: donor at %llu and acceptor at %llu\n",
+      debug4ld(printf("end7-end8: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
 		      (unsigned long long) Substring_genomicstart(donor),
-		      (unsigned long long) Substring_genomicstart(acceptor)));
+		      Substring_querystart(donor),Substring_queryend(donor),
+		      (unsigned long long) Substring_genomicstart(acceptor),
+		      Substring_querystart(acceptor),Substring_queryend(acceptor)));
 
       if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
 	debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -13078,9 +13998,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
       donor = (Substring_T) p->first;
       acceptor = (Substring_T) q->first;
-      debug4ld(printf("end1-end4: donor at %llu and acceptor at %llu\n",
+      debug4ld(printf("end1-end4: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
 		      (unsigned long long) Substring_genomicstart(donor),
-		      (unsigned long long) Substring_genomicstart(acceptor)));
+		      Substring_querystart(donor),Substring_queryend(donor),
+		      (unsigned long long) Substring_genomicstart(acceptor),
+		      Substring_querystart(acceptor),Substring_queryend(acceptor)));
 
       if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
 	debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -13140,9 +14062,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
       donor = (Substring_T) p->first;
       acceptor = (Substring_T) q->first;
-      debug4ld(printf("end3-end2: donor at %llu and acceptor at %llu\n",
+      debug4ld(printf("end3-end2: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
 		      (unsigned long long) Substring_genomicstart(donor),
-		      (unsigned long long) Substring_genomicstart(acceptor)));
+		      Substring_querystart(donor),Substring_queryend(donor),
+		      (unsigned long long) Substring_genomicstart(acceptor),
+		      Substring_querystart(acceptor),Substring_queryend(acceptor)));
 
       if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
 	debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -13203,9 +14127,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
       donor = (Substring_T) p->first;
       acceptor = (Substring_T) q->first;
-      debug4ld(printf("end5-end8: donor at %llu and acceptor at %llu\n",
+      debug4ld(printf("end5-end8: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
 		      (unsigned long long) Substring_genomicstart(donor),
-		      (unsigned long long) Substring_genomicstart(acceptor)));
+		      Substring_querystart(donor),Substring_queryend(donor),
+		      (unsigned long long) Substring_genomicstart(acceptor),
+		      Substring_querystart(acceptor),Substring_queryend(acceptor)));
 
       if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
 	debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -13265,9 +14191,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
     while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
       donor = (Substring_T) p->first;
       acceptor = (Substring_T) q->first;
-      debug4ld(printf("end7-end6: donor at %llu and acceptor at %llu\n",
+      debug4ld(printf("end7-end6: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
 		      (unsigned long long) Substring_genomicstart(donor),
-		      (unsigned long long) Substring_genomicstart(acceptor)));
+		      Substring_querystart(donor),Substring_queryend(donor),
+		      (unsigned long long) Substring_genomicstart(acceptor),
+		      Substring_querystart(acceptor),Substring_queryend(acceptor)));
 
       if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
 	debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -13440,8 +14368,8 @@ find_splicepairs_shortend (int *found_score, List_T hits,
 	      bestj = Intlist_head(splicesites_i);
 	      bestleft = splicesites[bestj] - support;
 	      if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
-						     Substring_chimera_pos(donor),nmismatches_shortend,
-						     /*prob*/2.0,/*left*/bestleft,query_compress_fwd,
+						     Substring_chimera_pos(donor),/*substring_querystart*/0,/*substring_queryend*/querylength,
+						     nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
 						     querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
 						     Substring_chrnum(donor),Substring_chroffset(donor),
 						     Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
@@ -13516,8 +14444,8 @@ find_splicepairs_shortend (int *found_score, List_T hits,
 	      bestj = Intlist_head(splicesites_i);
 	      bestleft = splicesites[bestj] - endlength;
 	      if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
-					       Substring_chimera_pos(acceptor),nmismatches_shortend,
-					       /*prob*/2.0,/*left*/bestleft,query_compress_fwd,
+					       Substring_chimera_pos(acceptor),/*substring_querystart*/0,/*substring_queryend*/querylength,
+					       nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
 					       querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
 					       Substring_chrnum(acceptor),Substring_chroffset(acceptor),
 					       Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
@@ -13592,8 +14520,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
 	      bestj = Intlist_head(splicesites_i);
 	      bestleft = splicesites[bestj] - endlength;
 	      if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
-						     querylength-Substring_chimera_pos(donor),nmismatches_shortend,
-						     /*prob*/2.0,/*left*/bestleft,query_compress_rev,
+						     querylength-Substring_chimera_pos(donor),
+						     /*substring_querystart*/0,/*substring_queryend*/querylength,
+						     nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
 						     querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
 						     Substring_chrnum(donor),Substring_chroffset(donor),
 						     Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
@@ -13669,8 +14598,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
 	      bestj = Intlist_head(splicesites_i);
 	      bestleft = splicesites[bestj] - support;
 	      if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
-					       querylength-Substring_chimera_pos(acceptor),nmismatches_shortend,
-					       /*prob*/2.0,/*left*/bestleft,query_compress_rev,
+					       querylength-Substring_chimera_pos(acceptor),
+					       /*substring_querystart*/0,/*substring_queryend*/querylength,
+					       nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
 					       querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
 					       Substring_chrnum(acceptor),Substring_chroffset(acceptor),
 					       Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
@@ -13745,8 +14675,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
 	      bestj = Intlist_head(splicesites_i);
 	      bestleft = splicesites[bestj] - endlength;
 	      if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
-						     Substring_chimera_pos(donor),nmismatches_shortend,
-						     /*prob*/2.0,/*left*/bestleft,query_compress_fwd,
+						     Substring_chimera_pos(donor),
+						     /*substring_querystart*/0,/*substring_queryend*/querylength,
+						     nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
 						     querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
 						     Substring_chrnum(donor),Substring_chroffset(donor),
 						     Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
@@ -13822,8 +14753,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
 	      bestj = Intlist_head(splicesites_i);
 	      bestleft = splicesites[bestj] - support;
 	      if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
-					       Substring_chimera_pos(acceptor),nmismatches_shortend,
-					       /*prob*/2.0,/*left*/bestleft,query_compress_fwd,
+					       Substring_chimera_pos(acceptor),
+					       /*substring_querystart*/0,/*substring_queryend*/querylength,
+					       nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
 					       querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
 					       Substring_chrnum(acceptor),Substring_chroffset(acceptor),
 					       Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
@@ -13899,8 +14831,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
 	      bestj = Intlist_head(splicesites_i);
 	      bestleft = splicesites[bestj] - support;
 	      if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
-						     querylength-Substring_chimera_pos(donor),nmismatches_shortend,
-						     /*prob*/2.0,/*left*/bestleft,query_compress_rev,
+						     querylength-Substring_chimera_pos(donor),
+						     /*substring_querystart*/0,/*substring_queryend*/querylength,
+						     nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
 						     querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
 						     Substring_chrnum(donor),Substring_chroffset(donor),
 						     Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
@@ -13975,8 +14908,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
 	      bestj = Intlist_head(splicesites_i);
 	      bestleft = splicesites[bestj] - endlength;
 	      if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
-					       querylength-Substring_chimera_pos(acceptor),nmismatches_shortend,
-					       /*prob*/2.0,/*left*/bestleft,query_compress_rev,
+					       querylength-Substring_chimera_pos(acceptor),
+					       /*substring_querystart*/0,/*substring_queryend*/querylength,
+					       nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
 					       querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
 					       Substring_chrnum(acceptor),Substring_chroffset(acceptor),
 					       Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
diff --git a/src/stage3.c b/src/stage3.c
index b13c3ab..ef171c2 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 195553 2016-08-02 17:32:39Z twu $";
+static char rcsid[] = "$Id: stage3.c 195963 2016-08-08 16:38:05Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -12404,7 +12404,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
 		int sense_try, int sense_filter,
 		Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
   struct Pair_T *pairarray1;
-  List_T p;
+  List_T pairs_fwd_copy, pairs_rev_copy, p;
   Chrpos_T *last_genomedp5_fwd = NULL, *last_genomedp3_fwd = NULL, *last_genomedp5_rev = NULL, *last_genomedp3_rev = NULL;
   List_T pairs_pretrim, pairs_fwd, pairs_rev, best_pairs, temp_pairs, path_fwd, path_rev, best_path, temp_path;
   List_T copy;
@@ -12921,10 +12921,13 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
 
     *cdna_direction = +1;
     *sensedir = SENSE_FORWARD;
+
+    /* path_trim alters pairs_fwd, so make a copy in case we use it for pairs_pretrim */
+    pairs_fwd_copy = Pairpool_copy(pairs_fwd,pairpool);
     *finalpairs1 = path_trim(defect_rate_fwd,&(*ambig_end_length_5_1),&(*ambig_end_length_3_1),
 			     &(*ambig_splicetype_5_1),&(*ambig_splicetype_3_1),
 			     &(*ambig_prob_5_1),&(*ambig_prob_3_1),
-			     pairs_fwd,&(*cdna_direction),watsonp,
+			     pairs_fwd_copy,&(*cdna_direction),watsonp,
 			     jump_late_p,querylength,
 #ifdef GSNAP
 			     &(*sensedir),
@@ -12942,10 +12945,11 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
 
     *cdna_direction = -1;
     *sensedir = SENSE_ANTI;
+    pairs_rev_copy = Pairpool_copy(pairs_rev,pairpool);
     *finalpairs2 = path_trim(defect_rate_rev,&(*ambig_end_length_5_2),&(*ambig_end_length_3_2),
 			     &(*ambig_splicetype_5_2),&(*ambig_splicetype_3_2),
 			     &(*ambig_prob_5_2),&(*ambig_prob_3_2),
-			     pairs_rev,&(*cdna_direction),watsonp,
+			     pairs_rev_copy,&(*cdna_direction),watsonp,
 			     jump_late_p,querylength,
 #ifdef GSNAP
 			     &(*sensedir),
@@ -13044,6 +13048,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
 #endif
   }
 
+  /* Okay for path_trim to alter pairs_pretrim */
   *finalpairs1 = path_trim(defect_rate,&(*ambig_end_length_5_1),&(*ambig_end_length_3_1),
 			   &(*ambig_splicetype_5_1),&(*ambig_splicetype_3_1),
 			   &(*ambig_prob_5_1),&(*ambig_prob_3_1),
diff --git a/src/stage3.h b/src/stage3.h
index ba18dae..4707bc3 100644
--- a/src/stage3.h
+++ b/src/stage3.h
@@ -1,4 +1,4 @@
-/* $Id: stage3.h 193899 2016-07-12 04:41:34Z twu $ */
+/* $Id: stage3.h 193876 2016-07-12 02:46:04Z twu $ */
 #ifndef STAGE3_INCLUDED
 #define STAGE3_INCLUDED
 
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 006780a..d5fba8b 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 195553 2016-08-02 17:32:39Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 195760 2016-08-04 00:12:04Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/stage3hr.h b/src/stage3hr.h
index 5278244..15cbc60 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 193887 2016-07-12 03:23:17Z twu $ */
+/* $Id: stage3hr.h 195760 2016-08-04 00:12:04Z twu $ */
 #ifndef STAGE3HR_INCLUDED
 #define STAGE3HR_INCLUDED
 
diff --git a/src/substring.c b/src/substring.c
index aaa657b..5ec012f 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: substring.c 195961 2016-08-08 16:36:34Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -3410,6 +3410,11 @@ Substring_splicecoord (T this) {
   return this->splicecoord;
 }
 
+Chrpos_T
+Substring_chr_splicecoord (T this) {
+  return (Chrpos_T) (this->splicecoord - this->chroffset);
+}
+
 int
 Substring_splicesites_knowni (T this) {
   return this->splicesites_knowni;
@@ -4186,13 +4191,15 @@ Substring_new_endfrag (Univcoord_T endfrag_coord, int splice_pos, int nmismatche
 
 
 T
-Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, int donor_nmismatches,
+Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
+		     int substring_querystart, int substring_queryend, int donor_nmismatches,
 		     double donor_prob, Univcoord_T left, Compress_T query_compress,
 		     int querylength, bool plusp, int genestrand, int sensedir,
 		     Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength) {
   T new;
   int querystart, queryend;
-  Univcoord_T genomicstart, genomicend, alignstart, alignend;
+  Univcoord_T genomicstart, alignstart, alignend;
+  /* Univcoord_T genomicend; */
   Endtype_T start_endtype, end_endtype;
   Trimaction_T trim_left_action, trim_right_action;
 
@@ -4206,16 +4213,18 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
 
   } else if (plusp == true) {
     genomicstart = left;
-    genomicend = left + querylength;
+    /* genomicend = left + querylength; */
     if (sensedir == SENSE_FORWARD) {
       start_endtype = END;
       end_endtype = DON;
 
-      querystart = 0;
+      querystart = substring_querystart; /* 0, for an end piece */
       queryend = donor_pos;
-      alignstart = genomicstart;
-      alignend = genomicstart + donor_pos;
-      trim_left_action = COMPUTE_TRIM;	/* querystart == 0 */
+      if (querystart == 0) {
+	trim_left_action = COMPUTE_TRIM;	/* querystart == 0 */
+      } else {
+	trim_left_action = PRE_TRIMMED;
+      }
       trim_right_action = NO_TRIM;
 
     } else if (sensedir == SENSE_ANTI) {
@@ -4223,28 +4232,35 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
       end_endtype = END;
 
       querystart = donor_pos;
-      queryend = querylength;
-      alignstart = genomicstart + donor_pos;
-      alignend = genomicend;
+      queryend = substring_queryend; /* querylength, for an end piece */
       trim_left_action = NO_TRIM;
-      trim_right_action = COMPUTE_TRIM;	/* queryend == querylength */
+      if (queryend == querylength) {
+	trim_right_action = COMPUTE_TRIM;	/* queryend == querylength */
+      } else {
+	trim_right_action = PRE_TRIMMED;
+      }
 
     } else {
       abort();
     }
 
+    alignstart = genomicstart + querystart;
+    alignend = genomicstart + queryend;
+
   } else {
     genomicstart = left + querylength;
-    genomicend = left;
+    /* genomicend = left; */
     if (sensedir == SENSE_FORWARD) {
       start_endtype = END;
       end_endtype = DON;
 
-      querystart = 0;
+      querystart = substring_querystart; /* 0, for an end piece */
       queryend = querylength - donor_pos;
-      alignstart = genomicstart;
-      alignend = genomicstart - (querylength - donor_pos);
-      trim_left_action = COMPUTE_TRIM;	/* querystart == 0 */
+      if (querystart == 0) {
+	trim_left_action = COMPUTE_TRIM;	/* querystart == 0 */
+      } else {
+	trim_left_action = PRE_TRIMMED;
+      }
       trim_right_action = NO_TRIM;
 
     } else if (sensedir == SENSE_ANTI) {
@@ -4252,15 +4268,20 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
       end_endtype = END;
 
       querystart = querylength - donor_pos;
-      queryend = querylength;
-      alignstart = genomicstart - (querylength - donor_pos);
-      alignend = genomicend;
+      queryend = substring_queryend; /* querylength, for an end piece */
       trim_left_action = NO_TRIM;
-      trim_right_action = COMPUTE_TRIM;	/* queryend == querylength */
+      if (queryend == querylength) {
+	trim_right_action = COMPUTE_TRIM;	/* queryend == querylength */
+      } else {
+	trim_right_action = PRE_TRIMMED;
+      }
 
     } else {
       abort();
     }
+
+    alignstart = genomicstart - querystart;
+    alignend = genomicstart - queryend;
   }
 
   if ((new = Substring_new(donor_nmismatches,chrnum,chroffset,chrhigh,chrlength,
@@ -4302,13 +4323,15 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
 
 
 T
-Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acceptor_pos, int acceptor_nmismatches,
+Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acceptor_pos,
+			int substring_querystart, int substring_queryend, int acceptor_nmismatches,
 			double acceptor_prob, Univcoord_T left, Compress_T query_compress,
 			int querylength, bool plusp, int genestrand, int sensedir,
 			Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength) {
   T new;
   int querystart, queryend;
-  Univcoord_T genomicstart, genomicend, alignstart, alignend;
+  Univcoord_T genomicstart, alignstart, alignend;
+  /* Univcoord_T genomicend; */
   Endtype_T start_endtype, end_endtype;
   Trimaction_T trim_left_action, trim_right_action;
 
@@ -4322,61 +4345,75 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
 
   } else if (plusp == true) {
     genomicstart = left;
-    genomicend = left + querylength;
+    /* genomicend = left + querylength; */
     if (sensedir == SENSE_FORWARD) {
       start_endtype = ACC;
       end_endtype = END;
 
       querystart = acceptor_pos;
-      queryend = querylength;
-      alignstart = genomicstart + acceptor_pos;
-      alignend = genomicend;
+      queryend = substring_queryend; /* querylength, for an end piece */
       trim_left_action = NO_TRIM;
-      trim_right_action = COMPUTE_TRIM;	/* queryend == querylength */
+      if (queryend == querylength) {
+	trim_right_action = COMPUTE_TRIM;	/* queryend == querylength */
+      } else {
+	trim_right_action = PRE_TRIMMED;
+      }
 
     } else if (sensedir == SENSE_ANTI) {
       start_endtype = END;
       end_endtype = ACC;
 
-      querystart = 0;
+      querystart = substring_querystart; /* 0, for an end piece */
       queryend = acceptor_pos;
-      alignstart = genomicstart;
-      alignend = genomicstart + acceptor_pos;
-      trim_left_action = COMPUTE_TRIM;	/* querystart == 0 */
+      if (querystart == 0) {
+	trim_left_action = COMPUTE_TRIM;	/* querystart == 0 */
+      } else {
+	trim_left_action = PRE_TRIMMED;
+      }
       trim_right_action = NO_TRIM;
 
     } else {
       abort();
     }
 
+    alignstart = genomicstart + querystart;
+    alignend = genomicstart + queryend;
+
   } else {
     genomicstart = left + querylength;
-    genomicend = left;
+    /* genomicend = left; */
     if (sensedir == SENSE_FORWARD) {
       start_endtype = ACC;
       end_endtype = END;
 
       querystart = querylength - acceptor_pos;
-      queryend = querylength;
-      alignstart = genomicstart - (querylength - acceptor_pos);
-      alignend = genomicend;
+      queryend = substring_queryend; /* querylength, for an end piece */
       trim_left_action = NO_TRIM;
-      trim_right_action = COMPUTE_TRIM;	/* queryend == querylength */
+      if (queryend == querylength) {
+	trim_right_action = COMPUTE_TRIM;	/* queryend == querylength */
+      } else {
+	trim_right_action = PRE_TRIMMED;
+      }
 
     } else if (sensedir == SENSE_ANTI) {
       start_endtype = END;
       end_endtype = ACC;
 
-      querystart = 0;
+      querystart = substring_querystart; /* 0, for an end piece */
       queryend = querylength - acceptor_pos;
-      alignstart = genomicstart;
-      alignend = genomicstart - (querylength - acceptor_pos);
-      trim_left_action = COMPUTE_TRIM;	/* querystart == 0 */
+      if (querystart == 0) {
+	trim_left_action = COMPUTE_TRIM;	/* querystart == 0 */
+      } else {
+	trim_left_action = PRE_TRIMMED;
+      }
       trim_right_action = NO_TRIM;
 
     } else {
       abort();
     }
+
+    alignstart = genomicstart - querystart;
+    alignend = genomicstart - queryend;
   }
 
   if ((new = Substring_new(acceptor_nmismatches,chrnum,chroffset,chrhigh,chrlength,
@@ -4389,6 +4426,8 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
 
   debug2(printf("Making new acceptor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, genome %u..%u\n",
 		acceptor_knowni,acceptor_coord,left,plusp,sensedir,querystart,queryend,alignstart - chroffset,alignend - chroffset));
+  debug2(printf("Original bounds were %d..%d\n",substring_querystart,substring_queryend));
+
   new->splicecoord = acceptor_coord;
   new->splicesites_knowni = acceptor_knowni;
 
@@ -4542,16 +4581,16 @@ Substring_assign_donor_prob (T donor) {
 
   } else if (donor->chimera_sensedir == SENSE_FORWARD) {
     if (donor->plusp == true) {
-      donor->chimera_prob = Maxent_hr_donor_prob(donor->chimera_modelpos,donor->chroffset);
+      donor->chimera_prob = Maxent_hr_donor_prob(donor->splicecoord,donor->chroffset);
     } else {
-      donor->chimera_prob = Maxent_hr_antidonor_prob(donor->chimera_modelpos,donor->chroffset);
+      donor->chimera_prob = Maxent_hr_antidonor_prob(donor->splicecoord,donor->chroffset);
     }
 
   } else if (donor->chimera_sensedir == SENSE_ANTI) {
     if (donor->plusp == true) {
-      donor->chimera_prob = Maxent_hr_antidonor_prob(donor->chimera_modelpos,donor->chroffset);
+      donor->chimera_prob = Maxent_hr_antidonor_prob(donor->splicecoord,donor->chroffset);
     } else {
-      donor->chimera_prob = Maxent_hr_donor_prob(donor->chimera_modelpos,donor->chroffset);
+      donor->chimera_prob = Maxent_hr_donor_prob(donor->splicecoord,donor->chroffset);
     }
 
   } else {
@@ -4573,16 +4612,16 @@ Substring_assign_acceptor_prob (T acceptor) {
 
   } else if (acceptor->chimera_sensedir == SENSE_FORWARD) {
     if (acceptor->plusp == true) {
-      acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+      acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord,acceptor->chroffset);
     } else {
-      acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+      acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord,acceptor->chroffset);
     }
     
   } else if (acceptor->chimera_sensedir == SENSE_ANTI) {
     if (acceptor->plusp == true) {
-      acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+      acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord,acceptor->chroffset);
     } else {
-      acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+      acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord,acceptor->chroffset);
     }
 
   } else {
diff --git a/src/substring.h b/src/substring.h
index 3975d6e..56936c6 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 193327 2016-07-01 19:24:12Z twu $ */
+/* $Id: substring.h 195961 2016-08-08 16:36:34Z twu $ */
 #ifndef SUBSTRING_INCLUDED
 #define SUBSTRING_INCLUDED
 
@@ -108,6 +108,8 @@ extern Univcoord_T
 Substring_left (T this);
 extern Univcoord_T
 Substring_splicecoord (T this);
+extern Chrpos_T
+Substring_chr_splicecoord (T this);
 extern int
 Substring_splicesites_knowni (T this);
 extern Univcoord_T
@@ -273,12 +275,14 @@ Substring_new_endfrag (Univcoord_T endfrag_coord, int splice_pos, int nmismatche
 		       Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
 
 extern T
-Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, int donor_nmismatches,
+Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, 
+		     int substring_querystart, int substring_queryend, int donor_nmismatches,
 		     double donor_prob, Univcoord_T left, Compress_T query_compress,
 		     int querylength, bool plusp, int genestrand, int sensedir,
 		     Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
 extern T
-Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acceptor_pos, int acceptor_nmismatches,
+Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acceptor_pos,
+			int substring_querystart, int substring_queryend, int acceptor_nmismatches,
 			double acceptor_prob, Univcoord_T left, Compress_T query_compress,
 			int querylength, bool plusp, int genestrand, int sensedir,
 			Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
diff --git a/src/uintlist.c b/src/uintlist.c
index e35d892..f0111d1 100644
--- a/src/uintlist.c
+++ b/src/uintlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uintlist.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: uintlist.c 193875 2016-07-12 02:43:38Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/uintlist.h b/src/uintlist.h
index 83b43e5..2afcae5 100644
--- a/src/uintlist.h
+++ b/src/uintlist.h
@@ -1,4 +1,4 @@
-/* $Id: uintlist.h 193899 2016-07-12 04:41:34Z twu $ */
+/* $Id: uintlist.h 193875 2016-07-12 02:43:38Z twu $ */
 #ifndef UINTLIST_INCLUDED
 #define UINTLIST_INCLUDED
 
diff --git a/src/uniqscan.c b/src/uniqscan.c
index cc0fc87..67e63ef 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 193877 2016-07-12 02:46:33Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/univdiag.h b/src/univdiag.h
index 6fa8ff7..b10fab2 100644
--- a/src/univdiag.h
+++ b/src/univdiag.h
@@ -1,4 +1,4 @@
-/* $Id: univdiag.h 193897 2016-07-12 04:12:45Z twu $ */
+/* $Id: univdiag.h 195760 2016-08-04 00:12:04Z twu $ */
 #ifndef UNIVDIAG_INCLUDED
 #define UNIVDIAG_INCLUDED
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git



More information about the debian-med-commit mailing list