[med-svn] [gmap] 01/05: New upstream version 2017-09-30

Alex Mestiashvili malex-guest at moszumanska.debian.org
Mon Oct 2 11:39:31 UTC 2017


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository gmap.

commit b588a6101504d84da4d6f572f90a8e5668277f8c
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Mon Oct 2 09:59:27 2017 +0200

    New upstream version 2017-09-30
---
 ChangeLog           |  32 +++++++
 VERSION             |   2 +-
 configure           |  24 ++---
 src/Makefile.am     |   4 +-
 src/Makefile.in     |  30 ++++---
 src/chimera.c       |   4 +-
 src/gmap.c          |  41 ++++++---
 src/gsnap.c         |   4 +-
 src/pair.c          | 150 +++++++++++++++++++++++++------
 src/pair.h          |   7 +-
 src/stage3.c        |  44 ++++++---
 src/translation.c   |  54 +++++++++--
 src/uinttableuint.c | 252 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/uinttableuint.h |  33 +++++++
 src/uniqscan.c      |   4 +-
 15 files changed, 597 insertions(+), 88 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index f8bfc3b..e5cd7b1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,35 @@
+2017-09-29  twu
+
+    * VERSION, public-2017-09-05, src: Updated version number
+
+    * stage3.c: Merged revision 210194 from trunk to move build_dual_breaks step
+
+2017-09-27  twu
+
+    * stage3.c: Merged revision 210170 from trunk to stop over-aggressive use of
+      maxintronlen_end from wrong side of the sequence
+
+    * chimera.c: Merged revision 210169 from trunk to initialize a variable
+
+2017-09-23  twu
+
+    * VERSION, public-2017-09-05, src, uinttableuint.c, uinttableuint.h: Merged
+      revision 210071 from trunk to add files
+
+    * gsnap.c, pair.c, pair.h, public-2017-09-05, src, uniqscan.c: Merged
+      revision 210062 from trunk to fix computation of cds bounds for GFF3
+      output
+
+    * translation.c: Merged revision 210057 from trunk to assign aaphase_g for
+      final genomic codon
+
+    * gmap.c: Merged revision 210059 from trunk to restore MAX_CHIMERA_ITER to
+      3, but not iterating multiple times for middle pieces.  Added option
+      --gff3-cds
+
+    * Makefile.gsnaptoo.am: Merged revision 210058 from trunk to add
+      uinttableuint to library
+
 2017-09-11  twu
 
     * VERSION: Updated version number
diff --git a/VERSION b/VERSION
index 5c57d19..2079994 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2017-09-11
\ No newline at end of file
+2017-09-30
\ No newline at end of file
diff --git a/configure b/configure
index 9f64c4b..1400e2e 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2017-09-11.
+# Generated by GNU Autoconf 2.69 for gmap 2017-09-30.
 #
 # Report bugs to <Thomas Wu <twu at gene.com>>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='gmap'
 PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2017-09-11'
-PACKAGE_STRING='gmap 2017-09-11'
+PACKAGE_VERSION='2017-09-30'
+PACKAGE_STRING='gmap 2017-09-30'
 PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
 PACKAGE_URL=''
 
@@ -1369,7 +1369,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures gmap 2017-09-11 to adapt to many kinds of systems.
+\`configure' configures gmap 2017-09-30 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1440,7 +1440,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of gmap 2017-09-11:";;
+     short | recursive ) echo "Configuration of gmap 2017-09-30:";;
    esac
   cat <<\_ACEOF
 
@@ -1577,7 +1577,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-gmap configure 2017-09-11
+gmap configure 2017-09-30
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2183,7 +2183,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by gmap $as_me 2017-09-11, which was
+It was created by gmap $as_me 2017-09-30, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2533,8 +2533,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
 $as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-09-11" >&5
-$as_echo "2017-09-11" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-09-30" >&5
+$as_echo "2017-09-30" >&6; }
 
 
 ### Read defaults
@@ -4401,7 +4401,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='gmap'
- VERSION='2017-09-11'
+ VERSION='2017-09-30'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -19978,7 +19978,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by gmap $as_me 2017-09-11, which was
+This file was extended by gmap $as_me 2017-09-30, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -20044,7 +20044,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-gmap config.status 2017-09-11
+gmap config.status 2017-09-30
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/src/Makefile.am b/src/Makefile.am
index f73ce68..cf8f009 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -21,7 +21,7 @@ include_HEADERS = fopen.h bool.h types.h separator.h comp.h \
  interval.h uintlist.h uint8list.h \
  iitdef.h iit-read.h iit-write.h parserange.h \
  univinterval.h iit-read-univ.h \
- table.h tableuint.h uinttable.h \
+ table.h tableuint.h uinttable.h uinttableuint.h \
  stopwatch.h semaphore.h access.h \
  chrom.h filestring.h \
  md5.h complement.h bzip2.h sequence.h \
@@ -88,7 +88,7 @@ LIBGMAP_LA_FILES = fopen.h bool.h types.h separator.h comp.h \
  iit-read.c iit-read.h iit-write.c iit-write.h parserange.c parserange.h \
  univinterval.c univinterval.h iit-read-univ.c iit-read-univ.h \
  stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \
- table.c table.h tableuint.c tableuint.h uinttable.c uinttable.h \
+ table.c table.h tableuint.c tableuint.h uinttable.c uinttable.h uinttableuint.c uinttableuint.h\
  chrom.c chrom.h filestring.c filestring.h \
  md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \
  genomicpos.c genomicpos.h \
diff --git a/src/Makefile.in b/src/Makefile.in
index 5b6c083..0712337 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -188,15 +188,15 @@ am__objects_1 = libgmap_la-except.lo libgmap_la-assert.lo \
 	libgmap_la-iit-read-univ.lo libgmap_la-stopwatch.lo \
 	libgmap_la-semaphore.lo libgmap_la-access.lo \
 	libgmap_la-table.lo libgmap_la-tableuint.lo \
-	libgmap_la-uinttable.lo libgmap_la-chrom.lo \
-	libgmap_la-filestring.lo libgmap_la-md5.lo libgmap_la-bzip2.lo \
-	libgmap_la-sequence.lo libgmap_la-genomicpos.lo \
-	libgmap_la-bitpack64-read.lo libgmap_la-bitpack64-readtwo.lo \
-	libgmap_la-maxent_hr.lo libgmap_la-popcount.lo \
-	libgmap_la-genome128_hr.lo libgmap_la-compress.lo \
-	libgmap_la-bytecoding.lo libgmap_la-sarray-read.lo \
-	libgmap_la-chrnum.lo libgmap_la-genome.lo \
-	libgmap_la-datadir.lo
+	libgmap_la-uinttable.lo libgmap_la-uinttableuint.lo \
+	libgmap_la-chrom.lo libgmap_la-filestring.lo libgmap_la-md5.lo \
+	libgmap_la-bzip2.lo libgmap_la-sequence.lo \
+	libgmap_la-genomicpos.lo libgmap_la-bitpack64-read.lo \
+	libgmap_la-bitpack64-readtwo.lo libgmap_la-maxent_hr.lo \
+	libgmap_la-popcount.lo libgmap_la-genome128_hr.lo \
+	libgmap_la-compress.lo libgmap_la-bytecoding.lo \
+	libgmap_la-sarray-read.lo libgmap_la-chrnum.lo \
+	libgmap_la-genome.lo libgmap_la-datadir.lo
 dist_libgmap_la_OBJECTS = $(am__objects_1)
 libgmap_la_OBJECTS = $(dist_libgmap_la_OBJECTS)
 AM_V_lt = $(am__v_lt_ at AM_V@)
@@ -2760,7 +2760,7 @@ include_HEADERS = fopen.h bool.h types.h separator.h comp.h \
  interval.h uintlist.h uint8list.h \
  iitdef.h iit-read.h iit-write.h parserange.h \
  univinterval.h iit-read-univ.h \
- table.h tableuint.h uinttable.h \
+ table.h tableuint.h uinttable.h uinttableuint.h \
  stopwatch.h semaphore.h access.h \
  chrom.h filestring.h \
  md5.h complement.h bzip2.h sequence.h \
@@ -2781,7 +2781,7 @@ LIBGMAP_LA_FILES = fopen.h bool.h types.h separator.h comp.h \
  iit-read.c iit-read.h iit-write.c iit-write.h parserange.c parserange.h \
  univinterval.c univinterval.h iit-read-univ.c iit-read-univ.h \
  stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \
- table.c table.h tableuint.c tableuint.h uinttable.c uinttable.h \
+ table.c table.h tableuint.c tableuint.h uinttable.c uinttable.h uinttableuint.c uinttableuint.h\
  chrom.c chrom.h filestring.c filestring.h \
  md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \
  genomicpos.c genomicpos.h \
@@ -6498,6 +6498,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uint8list.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uintlist.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uinttable.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uinttableuint.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-univinterval.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-access.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-assert.Po at am__quote@
@@ -6908,6 +6909,13 @@ libgmap_la-uinttable.lo: uinttable.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-uinttable.lo `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c
 
+libgmap_la-uinttableuint.lo: uinttableuint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-uinttableuint.lo -MD -MP -MF $(DEPDIR)/libgmap_la-uinttableuint.Tpo -c -o libgmap_la-uinttableuint.lo `test -f 'uinttableuint.c' || echo '$(srcdir)/'`uinttableuint.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-uinttableuint.Tpo $(DEPDIR)/libgmap_la-uinttableuint.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uinttableuint.c' object='libgmap_la-uinttableuint.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-uinttableuint.lo `test -f 'uinttableuint.c' || echo '$(srcdir)/'`uinttableuint.c
+
 libgmap_la-chrom.lo: chrom.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-chrom.lo -MD -MP -MF $(DEPDIR)/libgmap_la-chrom.Tpo -c -o libgmap_la-chrom.lo `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-chrom.Tpo $(DEPDIR)/libgmap_la-chrom.Plo
diff --git a/src/chimera.c b/src/chimera.c
index ba7a23f..86dc981 100644
--- a/src/chimera.c
+++ b/src/chimera.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: chimera.c 206139 2017-05-11 03:28:35Z twu $";
+static char rcsid[] = "$Id: chimera.c 210171 2017-09-27 22:22:11Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -699,7 +699,7 @@ Chimera_find_breakpoint (int *chimeraequivpos, int *rangelow, int *rangehigh,
 
 
   /* Use secondbest to find a range for exon-exon searching */
-  *rangelow = 0;
+  *rangelow = *rangehigh = 0;
   for (pos = 0; pos < queryntlength - 1; pos++) {
     if (gapp_sub1[pos] == false) {
       if (gapp_sub2[pos+1] == false) {
diff --git a/src/gmap.c b/src/gmap.c
index 8c9e698..f649c12 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 209801 2017-09-11 21:58:12Z twu $";
+static char rcsid[] = "$Id: gmap.c 210067 2017-09-23 00:16:06Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -120,7 +120,11 @@ static char rcsid[] = "$Id: gmap.c 209801 2017-09-11 21:58:12Z twu $";
 #define MAX_BADOLIGOS 0.30	/* Setting to 1.0 effectively turns this check off */
 #define MAX_REPOLIGOS 0.40	/* Setting to 1.0 effectively turns this check off */
 
-#define MAX_CHIMERA_ITER 1	/* Values larger than 1 can lead to very long (or infinite?) run times */
+/* Value of 1 can miss end exons, but values larger than 1 can lead to
+   very long (or infinite?) run times when combined with
+   --intronlength */
+#define MAX_CHIMERA_ITER 3
+
 #define CHIMERA_PENALTY 30	/* A small value for chimera_margin will reduce this  */
 #define CHIMERA_IDENTITY 0.98
 #define CHIMERA_PVALUE 0.01
@@ -390,6 +394,7 @@ static bool require_splicedir_p = false;
 /* GFF3 */
 static bool gff3_separators_p = true;
 static bool gff3_phase_swap_p = false;
+static CDStype_T cdstype = CDS_CDNA;
 
 /* SAM */
 /* Applicable to PMAP? */
@@ -571,6 +576,7 @@ static struct option long_options[] = {
 
   {"gff3-add-separators", required_argument, 0, 0}, /* gff3_separators_p */
   {"gff3-swap-phase", required_argument, 0, 0}, /* gff3_phase_swap_p */
+  {"gff3-cds", required_argument, 0, 0}, /* cdstype */
 
 #ifndef PMAP
   {"quality-protocol", required_argument, 0, 0}, /* quality_shift */
@@ -3898,7 +3904,8 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
 	
 	if (*mergedp == true) {
 	  testlocalp = true;	/* Local merge */
-	} else {
+	} else if (iter == 1) {
+	  /* Check for middle pieces only on first iteration */
 	  debug2(printf("Checking for middle piece local, starting with list length %d\n",List_length(stage3list)));
 	  stage3list = check_middle_piece_local(&foundp,stage3list,queryseq,queryuc,
 #ifdef PMAP
@@ -3910,6 +3917,8 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
 	    /* Iterate */
 	    testlocalp = true;
 	  }
+	} else {
+	  testlocalp = false;
 	}
       }
     }
@@ -3987,7 +3996,8 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
 	} else {
 	  if (*mergedp == true) {
 	    testchimerap = true;	/* Local merge */
-	  } else {
+	  } else if (iter == 1) {
+	    /* Check for middle pieces only on first iteration */
 	    debug2(printf("Checking for middle piece chimera, starting with list length %d\n",List_length(stage3list)));
 	    stage3list = check_middle_piece_chimera(&foundp,stage3list,queryseq,queryuc,
 #ifdef PMAP
@@ -4001,6 +4011,8 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
 	    } else {
 	      testchimerap = false;
 	    }
+	  } else {
+	    testchimerap = false;
 	  }
 	}
 	debug2(printf("testchimerap is %d\n",testchimerap));
@@ -5410,11 +5422,6 @@ parse_command_line (int argc, char *argv[], int optind) {
       } else if (!strcmp(long_name,"min-intronlength")) {
 	min_intronlength = atoi(check_valid_int(optarg));
 
-      } else if (!strcmp(long_name,"intronlength")) {
-	/*  Included for backwards compatibility.  Sets both
-	    --max-intronlength-middle and --max-intronlength-ends */
-	maxintronlen = maxintronlen_ends = atoi(check_valid_int(optarg));
-
       } else if (!strcmp(long_name,"max-intronlength-middle")) {
 	maxintronlen = atoi(check_valid_int(optarg));
 
@@ -5553,6 +5560,16 @@ parse_command_line (int argc, char *argv[], int optind) {
 	  return 9;
 	}
 
+      } else if (!strcmp(long_name,"gff3-cds")) {
+	if (!strcmp(optarg,"cdna")) {
+	  cdstype = CDS_CDNA;
+	} else if (!strcmp(optarg,"genomic")) {
+	  cdstype = CDS_GENOMIC;
+	} else {
+	  fprintf(stderr,"--gff3-cds flag must be cdna or genomic\n");
+	  return 9;
+	}
+
 #ifndef PMAP
       } else if (!strcmp(long_name,"no-sam-headers")) {
 	sam_headers_p = false;
@@ -5663,6 +5680,8 @@ parse_command_line (int argc, char *argv[], int optind) {
       }
       break;
 
+    case 'K': maxintronlen = maxintronlen_ends = atoi(check_valid_int(optarg)); break;
+
     case 'w': shortsplicedist = strtoul(check_valid_int(optarg),NULL,10); break;
 
     case 'L': maxtotallen_bound = atoi(check_valid_int(optarg)); break;
@@ -6823,7 +6842,7 @@ main (int argc, char *argv[]) {
 	     force_xs_direction_p,md_lowercase_variant_p,
 	     /*snps_p*/genomecomp_alt ? true : false,
 	     /*print_nsnpdiffs_p*/genomecomp_alt ? true : false,genomelength,
-	     gff3_phase_swap_p,sam_cigar_extended_p);
+	     gff3_phase_swap_p,cdstype,sam_cigar_extended_p);
   Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
 	       require_splicedir_p,splicing_iit,splicing_divint_crosstable,
 	       donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
@@ -7452,6 +7471,8 @@ Output options\n\
   --gff3-swap-phase=INT          Whether to swap phase (0 => 0, 1 => 2, 2 => 1) in gff3_gene format\n\
                                    Needed by some analysis programs, but deviates from GFF3 specification\n\
                                    Values: 0 (no, default), 1 (yes)\n\
+  --gff3-cds=STRING              Whether to use cDNA or genomic translation for the CDS coordinates\n\
+                                   Values: cdna (default), genomic\n\
 ");
   fprintf(stdout,"\n");
 
diff --git a/src/gsnap.c b/src/gsnap.c
index 130a893..cd61704 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 207853 2017-06-29 20:33:16Z twu $";
+static char rcsid[] = "$Id: gsnap.c 210070 2017-09-23 00:17:54Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -3442,7 +3442,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
 	     force_xs_direction_p,md_lowercase_variant_p,
 	     /*snps_p*/snps_iit ? true : false,print_nsnpdiffs_p,
 	     Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
-	     /*gff3_phase_swap_p*/false,/*cigar_extended_p*/false);
+	     /*gff3_phase_swap_p*/false,/*cdstype*/CDS_CDNA,/*cigar_extended_p*/false);
   Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
 	       /*require_splicedir_p*/true,splicing_iit,splicing_divint_crosstable,
 	       donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
diff --git a/src/pair.c b/src/pair.c
index 1d7fd5e..d75929e 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 209799 2017-09-11 21:45:19Z twu $";
+static char rcsid[] = "$Id: pair.c 210070 2017-09-23 00:17:54Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -151,6 +151,7 @@ static bool print_nsnpdiffs_p;
 static double genomelength;	/* For BLAST E-value */
 
 static bool gff3_phase_swap_p;
+static CDStype_T cdstype;
 static bool cigar_extended_p;
 
 
@@ -158,7 +159,8 @@ void
 Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
 	    bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
 	    bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
-	    Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, bool cigar_extended_p_in) {
+	    Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, CDStype_T cdstype_in,
+	    bool cigar_extended_p_in) {
   trim_mismatch_score = trim_mismatch_score_in;
   trim_indel_score = trim_indel_score_in;
   gff3_separators_p = gff3_separators_p_in;
@@ -169,6 +171,7 @@ Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
   print_nsnpdiffs_p = print_nsnpdiffs_p_in;
   genomelength = (double) genomelength_in;
   gff3_phase_swap_p = gff3_phase_swap_p_in;
+  cdstype = cdstype_in;
   cigar_extended_p = cigar_extended_p_in;
 
   return;
@@ -2713,7 +2716,7 @@ print_gff3_exons_forward (Filestring_T fp, struct T *pairs, int npairs, int path
   struct T *ptr, *this = NULL;
   int exon_querystart = -1, exon_queryend, exon_phase;
   Chrpos_T exon_genomestart = -1, exon_genomeend, intron_start, intron_end;
-  int pctidentity, num = 0, den = 0, exonno = 0, cdsno = 0, starti, endi, i;
+  int pctidentity, num = 0, den = 0, exonno = 0, cdsno = 0, starti, endi, last_valid_i, i;
   int Mlength = 0, Ilength = 0, Dlength = 0;
   List_T tokens = NULL;
   char token[11];
@@ -2724,26 +2727,74 @@ print_gff3_exons_forward (Filestring_T fp, struct T *pairs, int npairs, int path
   int last_querypos = -1;
   Chrpos_T last_genomepos = -1U;
 
+  endi = npairs;
   if (cds_p == false) {
     starti = 0;
-    endi = npairs;
 
-  } else {
+  } else if (cdstype == CDS_CDNA) {
     i = 0;
-    while (i < npairs && (pairs[i].gapp == false && pairs[i].cdna != ' ' && pairs[i].aaphase_e == -1)) {
-      i++;
+    while (i < npairs) {
+      if (pairs[i].gapp == true) {
+	i++;
+      } else if (pairs[i].cdna == ' ') {
+	i++;
+      } else if (pairs[i].aaphase_e == -1) {
+	i++;
+      } else {
+	starti = i;
+	last_valid_i = i;
+	while (i < npairs) {
+	  if (pairs[i].gapp == true) {
+	    i++;
+	  } else if (pairs[i].cdna == ' ') {
+	    i++;
+	  } else if (pairs[i].aaphase_e != -1) {
+	    last_valid_i = i;
+	    i++;
+	  } else {
+	    endi = last_valid_i; /* inclusive */
+	    i = npairs;
+	  }
+	}
+      }
     }
-    starti = i;
-    
-    while (i < npairs && (pairs[i].gapp == true || pairs[i].cdna == ' ' || pairs[i].aaphase_e != -1)) {
-      i++;
+
+  } else if (cdstype == CDS_GENOMIC) {
+    i = 0;
+    while (i < npairs) {
+      if (pairs[i].gapp == true) {
+	i++;
+      } else if (pairs[i].genome == ' ') {
+	i++;
+      } else if (pairs[i].aaphase_g == -1) {
+	i++;
+      } else {
+	starti = i;
+	last_valid_i = i;
+	while (i < npairs) {
+	  if (pairs[i].gapp == true) {
+	    i++;
+	  } else if (pairs[i].genome == ' ') {
+	    i++;
+	  } else if (pairs[i].aaphase_g != -1) {
+	    last_valid_i = i;
+	    i++;
+	  } else {
+	    endi = last_valid_i; /* inclusive */
+	    i = npairs;
+	  }
+	}
+      }
     }
-    endi = i;
+
+  } else {
+    fprintf(stderr,"Do not recognize cdstype %d\n",cdstype);
+    abort();
   }
 
 
   ptr = &(pairs[starti]);
-  for (i = starti; i < endi; i++) {
+  for (i = starti; i <= endi; i++) {
     /* prev = this; */
     this = ptr++;
 
@@ -2975,7 +3026,7 @@ print_gff3_exons_backward (Filestring_T fp, struct T *pairs, int npairs, int pat
   struct T *ptr, *this = NULL;
   int exon_querystart = -1, exon_queryend, exon_phase;
   Chrpos_T exon_genomestart = -1, exon_genomeend;
-  int pctidentity, num = 0, den = 0, exonno = 0, cdsno = 0, starti, endi, i;
+  int pctidentity, num = 0, den = 0, exonno = 0, cdsno = 0, starti, endi, last_valid_i, i;
 #if 0
   int intronno = 0;
   Chrpos_T intron_start, intron_end;
@@ -2983,26 +3034,73 @@ print_gff3_exons_backward (Filestring_T fp, struct T *pairs, int npairs, int pat
   int last_querypos = -1;
   Chrpos_T last_genomepos = -1U;
 
+  endi = npairs;
   if (cds_p == false) {
     starti = 0;
-    endi = npairs;
 
-  } else {
+  } else if (cdstype == CDS_CDNA) {
     i = 0;
-    while (i < npairs && (pairs[i].gapp == false && pairs[i].cdna != ' ' && pairs[i].aaphase_e == -1)) {
-      i++;
+    while (i < npairs) {
+      if (pairs[i].gapp == true) {
+	i++;
+      } else if (pairs[i].cdna == ' ') {
+	i++;
+      } else if (pairs[i].aaphase_e == -1) {
+	i++;
+      } else {
+	starti = i;
+	last_valid_i = i;
+	while (i < npairs) {
+	  if (pairs[i].gapp == true) {
+	    i++;
+	  } else if (pairs[i].cdna == ' ') {
+	    i++;
+	  } else if (pairs[i].aaphase_e != -1) {
+	    last_valid_i = i;
+	    i++;
+	  } else {
+	    endi = last_valid_i; /* inclusive */
+	    i = npairs;
+	  }
+	}
+      }
     }
-    starti = i;
-    
-    while (i < npairs && (pairs[i].gapp == true || pairs[i].cdna == ' ' || pairs[i].aaphase_e != -1)) {
-      i++;
+
+  } else if (cdstype == CDS_GENOMIC) {
+    i = 0;
+    while (i < npairs) {
+      if (pairs[i].gapp == true) {
+	i++;
+      } else if (pairs[i].genome == ' ') {
+	i++;
+      } else if (pairs[i].aaphase_g == -1) {
+	i++;
+      } else {
+	starti = i;
+	last_valid_i = i;
+	while (i < npairs) {
+	  if (pairs[i].gapp == true) {
+	    i++;
+	  } else if (pairs[i].genome == ' ') {
+	    i++;
+	  } else if (pairs[i].aaphase_g != -1) {
+	    last_valid_i = i;
+	    i++;
+	  } else {
+	    endi = last_valid_i; /* inclusive */
+	    i = npairs;
+	  }
+	}
+      }
     }
-    endi = i;
-  }
 
+  } else {
+    fprintf(stderr,"Do not recognize cdstype %d\n",cdstype);
+    abort();
+  }
 
-  ptr = &(pairs[endi-1]);
-  for (i = endi-1; i >= starti; i--) {
+  ptr = &(pairs[endi]);
+  for (i = endi; i >= starti; i--) {
     /* prev = this; */
     this = ptr--;
 
diff --git a/src/pair.h b/src/pair.h
index e9e110b..62494b6 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 207201 2017-06-12 18:40:57Z twu $ */
+/* $Id: pair.h 210070 2017-09-23 00:17:54Z twu $ */
 #ifndef PAIR_INCLUDED
 #define PAIR_INCLUDED
 
@@ -27,13 +27,16 @@ typedef struct Pair_T *Pair_T;
 
 #define MATCHESPERGAP 3
 
+typedef enum {CDS_CDNA, CDS_GENOMIC} CDStype_T;
+
 #define T Pair_T
 
 extern void
 Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
 	    bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
 	    bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
-	    Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, bool cigar_extended_p_in);
+	    Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, CDStype_T cdstype,
+	    bool cigar_extended_p_in);
 extern int
 Pair_querypos (T this);
 extern Chrpos_T
diff --git a/src/stage3.c b/src/stage3.c
index a3d2a74..58d2638 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 208645 2017-07-28 00:55:34Z twu $";
+static char rcsid[] = "$Id: stage3.c 210195 2017-09-29 15:12:33Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -102,6 +102,7 @@ static char rcsid[] = "$Id: stage3.c 208645 2017-07-28 00:55:34Z twu $";
 #define MICROEXON_PROB_MISMATCH 0.80
 
 #define END_MIN_EXONLENGTH 12
+#define END_SUFFICIENT_EXONLENGTH 24 /* Defines length beyond which we can ignore maxintronlen_ends */
 
 #if 0
 /* No longer used.  Not sure why it was used before */
@@ -4022,7 +4023,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs,
 #endif
 
   } else {
-    if (splice->genomejump > maxintronlen_ends) {
+    if (nmatches < END_SUFFICIENT_EXONLENGTH && splice->genomejump > maxintronlen_ends) {
       debug3(printf("End intron is too long, so trimming it\n"));
       path = (List_T) NULL;
       *trim5p = true;
@@ -4450,7 +4451,7 @@ trim_end3_exons (bool *indelp, bool *trim3p, int ambig_end_length, List_T path,
 #endif
     
   } else {
-    if (splice->genomejump > maxintronlen_ends) {
+    if (nmatches < END_SUFFICIENT_EXONLENGTH && splice->genomejump > maxintronlen_ends) {
       debug3(printf("End intron is too long, so trimming it\n"));
       pairs = (List_T) NULL;
       *trim3p = true;
@@ -9046,10 +9047,7 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
 
     } else if (*finalscore <= 0) {
       *knownsplicep = false;
-#if 0
       return (List_T) NULL;
-#endif
-      return continuous_gappairs_distalgap;
     } else {
       return continuous_gappairs_distalgap;
     }
@@ -9268,10 +9266,7 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
       
     } else if (*finalscore <= 0) {
       *knownsplicep = false;
-#if 0
       return (List_T) NULL;
-#endif
-      return continuous_gappairs_distalgap;
     } else {
       return continuous_gappairs_distalgap;
     }
@@ -12398,6 +12393,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
 #endif
 
 
+#if 0
+    /* Moved per version 2016-09-14 to case 99 below */
     /* Pass 4: Fix dual breaks */
     debug(printf("\n*** Pass 4 (dir %d): Fix dual breaks.  Iteration0 %d\n",cdna_direction,iter0));
     /* pairs = remove_indel_gaps(path); */
@@ -12420,6 +12417,7 @@ path_compute_dir (double *defect_rate, List_T pairs,
       return path;
     }
 #endif
+#endif
 
 
     /* Pass 5: introns */
@@ -12526,15 +12524,38 @@ path_compute_dir (double *defect_rate, List_T pairs,
     }
 #endif
 
+
+#if 1
+    /* Moved per version 2016-09-14 from case 4 above */
+    /* Pass 99: Fix dual breaks */
+    /* >>pairs */
+    debug(printf("\n*** Pass 99 (dir %d): Fix dual breaks.  Iteration0 %d\n",cdna_direction,iter0));
+    /* pairs = remove_indel_gaps(path); */
+    path = List_reverse(pairs);
+
+    pairs = build_dual_breaks(&dual_break_p,&dynprogindex_minor,&dynprogindex_major,path,
+			      chrnum,chroffset,chrhigh,
+#ifdef PMAP
+			      queryaaseq_ptr,
+#endif
+			      queryseq_ptr,queryuc_ptr,querylength,
+			      cdna_direction,watsonp,genestrand,jump_late_p,pairpool,
+			      dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,maxpeelback,
+			      oligoindices_minor,diagpool,cellpool,
+			      *defect_rate,/*finalp*/false,/*simplep*/true);
+#endif
+
+
+#if 0
     path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
 			     /*finalp*/false);
     debug(Pair_dump_list(path,/*zerobasedp*/true));
-
     pairs = List_reverse(path);
     debug14(printf("Result of build_dual_breaks\n"));
     debug14(Pair_dump_list(pairs,true));
     debug(printf("Result of build_dual_breaks\n"));
     debug(Pair_dump_list(pairs,true));
+#endif
       
 #ifdef GSNAP
     /* Too expensive to loop */
@@ -12545,7 +12566,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
     debug(printf("At end of outer loop: dual_break_p %d\n",dual_break_p));
   }
 
-  path = List_reverse(pairs);
+  path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+			   /*finalp*/false);
   return path;
 }
 
diff --git a/src/translation.c b/src/translation.c
index 8112765..d81e423 100644
--- a/src/translation.c
+++ b/src/translation.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: translation.c 188718 2016-04-30 01:53:47Z twu $";
+static char rcsid[] = "$Id: translation.c 210069 2017-09-23 00:16:39Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -79,7 +79,7 @@ Translation_array_new (struct Pair_T *pairs, int translationlen) {
   return new;
 }
 
-#if 0
+#ifdef DEBUG
 static void
 Translation_dump (struct Pair_T *pairs, struct T *translation, int translationlen) {
   int i;
@@ -1255,7 +1255,7 @@ assign_cdna_forward (int ncdna, struct Pair_T *pairs, int npairs, bool revcompp,
     pair = &(pairs[i]);
     codon = pair->aa_e = get_codon_forward(&nexti,pairs,npairs,i,revcompp);
     debug2(Pair_dump_one(pair,true));
-    debug2(printf(" marked with amino acid %c\n",pair->aa_e));
+    debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
     i = nexti;
     j += 3;
   }
@@ -1276,7 +1276,7 @@ terminate_cdna_forward (struct Pair_T *pairs, int npairs, bool revcompp, int sta
     pair = &(pairs[i]);
     lastcodon = pair->aa_e = get_codon_forward(&nexti,pairs,npairs,i,revcompp);
     debug2(Pair_dump_one(pair,true));
-    debug2(printf(" marked with amino acid %c\n",pair->aa_e));
+    debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
     i = nexti;
   }
   return;
@@ -1296,7 +1296,7 @@ assign_cdna_backward (int ncdna, struct Pair_T *pairs, int npairs, bool revcompp
     pair = &(pairs[i]);
     codon = pair->aa_e = get_codon_backward(&nexti,pairs,i,revcompp);
     debug2(Pair_dump_one(pair,true));
-    debug2(printf(" marked with amino acid %c\n",pair->aa_e));
+    debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
     i = nexti;
     j += 3;
   }
@@ -1319,7 +1319,7 @@ terminate_cdna_backward (struct Pair_T *pairs, int npairs, bool revcompp, int st
     pair = &(pairs[i]);
     lastcodon = pair->aa_e = get_codon_backward(&nexti,pairs,i,revcompp);
     debug2(Pair_dump_one(pair,true));
-    debug2(printf(" marked with amino acid %c\n",pair->aa_e));
+    debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
     i = nexti;
   }
   return;
@@ -1335,6 +1335,8 @@ assign_genomic (int ngenomic, struct Pair_T *pairs, int npairs, int starti) {
   while (j < ngenomic) {
     pair = &(pairs[i]);
     codon = pair->aa_g = get_codon_genomic(&nexti,pairs,npairs,i);
+    debug2(Pair_dump_one(pair,true));
+    debug2(printf(" marked with amino acid %c for genomic\n",pair->aa_g));
     i = nexti;
     j += 3;
   }
@@ -1351,6 +1353,8 @@ terminate_genomic (struct Pair_T *pairs, int npairs, int starti) {
   while (i <= npairs - 3 && lastcodon != '*') {
     pair = &(pairs[i]);
     lastcodon = pair->aa_g = get_codon_genomic(&nexti,pairs,npairs,i);
+    debug2(Pair_dump_one(pair,true));
+    debug2(printf(" marked with amino acid %c for genomic\n",pair->aa_g));
     i = nexti;
   }
   return;
@@ -1658,7 +1662,7 @@ Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, in
   char lastaa;
   struct T *translation;
   bool endstopp;
-  int i, aapos = 0;
+  int i, j, aapos = 0;
   Frame_T translation_frame;
   int translation_starti = 0, translation_endi = 0, phase;
   int minpos, maxpos;
@@ -1738,12 +1742,14 @@ Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, in
 	      }
 	      lastaa = pairs[i].aa_g;
 	      aapos++;
+	      /* printf("Assigning aaphase_g of zero to %d\n",i); */
 	      pairs[i].aaphase_g = 0;
 	    }
 	  } else if (translation[i].frame != 3) {
 	    if ((phase = translation_frame - translation[i].frame) < 0) {
 	      phase += 3;
 	    }
+	    /* printf("Assigning aaphase_g of %d to %d\n",phase,i); */
 	    pairs[i].aaphase_g = phase;
 	  }
 	}
@@ -1768,6 +1774,22 @@ Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, in
       }
 #endif
     
+      j = i;
+      while (j < npairs && pairs[j].genome == ' ') {
+	j++;
+      }
+      if (j < npairs) {
+	/* printf("Assigning aaphase_g of one to %d\n",j); */
+	pairs[j++].aaphase_g = 1;
+      }
+      while (j < npairs && pairs[j].genome == ' ') {
+	j++;
+      }
+      if (j < npairs) {
+	/* printf("Assigning aaphase_g of two to %d\n",j); */
+	pairs[j].aaphase_g = 2;
+      }
+
       /* Fill in aapos to the end */
       for ( ; i < npairs; i++) {
 	pairs[i].aapos = aapos;
@@ -1790,12 +1812,14 @@ Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, in
 	      }
 	      lastaa = pairs[i].aa_g;
 	      aapos++;
+	      /* printf("Assigning aaphase_g of zero to %d\n",i); */
 	      pairs[i].aaphase_g = 0;
 	    }
 	  } else if (translation[i].frame != 3) {
 	    if ((phase = translation_frame - translation[i].frame) < 0) {
 	      phase += 3;
 	    }
+	    /* printf("Assigning aaphase_g of %d to %d\n",phase,i); */
 	    pairs[i].aaphase_g = phase;
 	  }
 	}
@@ -1820,6 +1844,22 @@ Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, in
       }
 #endif
 
+      j = i;
+      while (j >= 0 && pairs[j].genome == ' ') {
+	j--;
+      }
+      if (j >= 0) {
+	/* printf("Assigning aaphase_g of one to %d\n",j); */
+	pairs[j--].aaphase_g = 1;
+      }
+      while (j >= 0 && pairs[j].genome == ' ') {
+	j--;
+      }
+      if (j >= 0) {
+	/* printf("Assigning aaphase_g of two to %d\n",j); */
+	pairs[j].aaphase_g = 2;
+      }
+
       /* Fill in aapos to the end */
       for ( ; i >= 0; --i) {
 	pairs[i].aapos = aapos;
diff --git a/src/uinttableuint.c b/src/uinttableuint.c
new file mode 100644
index 0000000..1f3ff3f
--- /dev/null
+++ b/src/uinttableuint.c
@@ -0,0 +1,252 @@
+static char rcsid[] = "$Id: uinttableuint.c 210072 2017-09-23 00:41:47Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "uinttableuint.h"
+#include <stdio.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdlib.h>		/* For qsort */
+#include <string.h>		/* For strcmp */
+#include "mem.h"
+#include "assert.h"
+
+#define T Uinttableuint_T
+struct T {
+  int size;
+  int length;
+  unsigned int timestamp;
+  struct binding {
+    struct binding *link;
+    unsigned int key;
+    unsigned int value;
+    unsigned int timeindex;
+  } **buckets;
+};
+
+
+
+T 
+Uinttableuint_new (int hint) {
+  T table;
+  int i;
+  static int primes[] = { 509, 509, 1021, 2053, 4093,
+			  8191, 16381, 32771, 65521, INT_MAX };
+
+  assert(hint >= 0);
+  for (i = 1; primes[i] < hint; i++) {
+  }
+  table = (T) MALLOC(sizeof(*table) +
+		     primes[i-1]*sizeof(table->buckets[0]));
+  table->size = primes[i-1];
+  table->buckets = (struct binding **)(table + 1);
+  for (i = 0; i < table->size; i++) {
+    table->buckets[i] = NULL;
+  }
+  table->length = 0;
+  table->timestamp = 0;
+  return table;
+}
+
+unsigned int
+Uinttableuint_get (T table, const unsigned int key) {
+  int i;
+  struct binding *p;
+
+  assert(table);
+  /* assert(key); -- Doesn't hold for atomic 0 */
+  i = key % table->size;
+  /* printf("Doing Uinttableuint_get on %s at bucket %d\n",(char *) key, i); */
+  for (p = table->buckets[i]; p; p = p->link) {
+    /* printf("  Comparing %s with %s at %p, key = %p\n",(char *) key, (char *) p->key, p, p->key); */
+    if (key == p->key) {
+      break;
+    }
+  }
+  return p ? p->value : 0;
+}
+
+unsigned int
+Uinttableuint_put (T table, const unsigned int key, unsigned int value) {
+  int i;
+  struct binding *p;
+  unsigned int prev;
+
+  assert(table);
+  /* assert(key); -- Doesn't hold for atomic 0 */
+  i = key % table->size;
+  for (p = table->buckets[i]; p; p = p->link) {
+    if (key == p->key) {
+      break;
+    }
+  }
+  if (p == NULL) {
+    NEW(p);
+    p->key = key;
+    /* printf("Doing Uinttable_put at %p, key = %p\n",p,p->key); */
+    p->link = table->buckets[i];
+    table->buckets[i] = p;
+    table->length++;
+    prev = 0;
+  } else {
+    prev = p->value;
+  }
+  p->value = value;
+  p->timeindex = table->timestamp;
+  table->timestamp++;
+  return prev;
+}
+
+int 
+Uinttableuint_length (T table) {
+  assert(table);
+  return table->length;
+}
+
+void 
+Uinttableuint_map (T table,
+	       void (*apply)(const unsigned int key, unsigned int *value, void *cl),
+	       void *cl) {
+  int i;
+  struct binding *p;
+
+  assert(table);
+  assert(apply);
+  for (i = 0; i < table->size; i++)
+    for (p = table->buckets[i]; p; p = p->link) {
+      apply(p->key, &p->value, cl);
+    }
+}
+
+unsigned int
+Uinttableuint_remove (T table, const unsigned int key) {
+  int i;
+  struct binding **pp;
+
+  assert(table);
+  /* assert(key); -- Doesn't hold for atomic 0 */
+  table->timestamp++;
+  i = key % table->size;
+  for (pp = &table->buckets[i]; *pp; pp = &(*pp)->link) {
+    if (key == (*pp)->key) {
+      struct binding *p = *pp;
+      unsigned int value = p->value;
+      *pp = p->link;
+      FREE(p);
+      table->length--;
+      return value;
+    }
+  }
+  return 0;
+}
+
+static int
+uint_compare (const void *a, const void *b) {
+  unsigned int x = * (unsigned int *) a;
+  unsigned int y = * (unsigned int *) b;
+
+  if (x < y) {
+    return -1;
+  } else if (y < x) {
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+
+unsigned int *
+Uinttableuint_keys (T table, bool sortp) {
+  unsigned int *keyarray;
+  int i, j = 0;
+  struct binding *p;
+
+  assert(table);
+  keyarray = (unsigned int *) CALLOC(table->length+1,sizeof(unsigned int));
+  for (i = 0; i < table->size; i++) {
+    for (p = table->buckets[i]; p; p = p->link) {
+      keyarray[j++] = p->key;
+    }
+  }
+
+  if (sortp == true) {
+    qsort(keyarray,table->length,sizeof(unsigned int),uint_compare);
+  }
+
+  return keyarray;
+}
+
+
+static int
+timeindex_cmp (const void *x, const void *y) {
+  struct binding *a = * (struct binding **) x;
+  struct binding *b = * (struct binding **) y;
+
+  if (a->timeindex < b->timeindex) {
+    return -1;
+  } else if (a->timeindex > b->timeindex) {
+    return +1;
+  } else {
+    return 0;
+  }
+}
+
+
+unsigned int *
+Uinttableuint_keys_by_timeindex (T table) {
+  unsigned int *keyarray;
+  int i, j = 0;
+  struct binding **buckets, *p;
+
+  assert(table);
+  buckets = (struct binding **) CALLOC(table->length+1,sizeof(struct binding *));
+  for (i = 0; i < table->size; i++) {
+    for (p = table->buckets[i]; p; p = p->link) {
+      buckets[j++] = p;
+    }
+  }
+  qsort(buckets,table->length,sizeof(struct binding *),timeindex_cmp);
+
+  keyarray = (unsigned int *) CALLOC(table->length,sizeof(unsigned int));
+  for (j = 0; j < table->length; j++) {
+    p = buckets[j];
+    keyarray[j] = p->key;
+  }
+  FREE(buckets);
+
+  return keyarray;
+}
+
+
+unsigned int *
+Uinttableuint_values (T table) {
+  unsigned int *valuearray;
+  int i, j = 0;
+  struct binding *p;
+
+  assert(table);
+  valuearray = (unsigned int *) CALLOC(table->length,sizeof(unsigned int));
+  for (i = 0; i < table->size; i++) {
+    for (p = table->buckets[i]; p; p = p->link) {
+      valuearray[j++] = p->value;
+    }
+  }
+  return valuearray;
+}
+
+void 
+Uinttableuint_free (T *table) {
+  assert(table && *table);
+  if ((*table)->length > 0) {
+    int i;
+    struct binding *p, *q;
+    for (i = 0; i < (*table)->size; i++) {
+      for (p = (*table)->buckets[i]; p; p = q) {
+	q = p->link;
+	FREE(p);
+      }
+    }
+  }
+  FREE(*table);
+}
diff --git a/src/uinttableuint.h b/src/uinttableuint.h
new file mode 100644
index 0000000..217c8a2
--- /dev/null
+++ b/src/uinttableuint.h
@@ -0,0 +1,33 @@
+/* $Id: uinttableuint.h 210072 2017-09-23 00:41:47Z twu $ */
+#ifndef UINTTABLEUINT_INCLUDED
+#define UINTTABLEUINT_INCLUDED
+#include "bool.h"
+
+#define T Uinttableuint_T
+typedef struct T *T;
+
+extern T
+Uinttableuint_new (int hint);
+extern void 
+Uinttableuint_free (T *table);
+extern int   
+Uinttableuint_length (T table);
+extern unsigned int
+Uinttableuint_put (T table, const unsigned int key, unsigned int value);
+extern unsigned int
+Uinttableuint_get (T table, const unsigned int key);
+extern unsigned int
+Uinttableuint_remove (T table, const unsigned int key);
+extern void   
+Uinttableuint_map (T table,
+	       void (*apply)(const unsigned int key, unsigned int *value, void *cl),
+	       void *cl);
+extern unsigned int *
+Uinttableuint_keys (T table, bool sortp);
+extern unsigned int *
+Uinttableuint_keys_by_timeindex (T table);
+extern unsigned int *
+Uinttableuint_values (T table);
+
+#undef T
+#endif
diff --git a/src/uniqscan.c b/src/uniqscan.c
index 121d20b..a7689d1 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 207328 2017-06-14 19:47:20Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 210070 2017-09-23 00:17:54Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1327,7 +1327,7 @@ main (int argc, char *argv[]) {
 	     /*force_xs_direction_p*/false,/*md_lowercase_variant_p*/false,
 	     /*snps_p*/snps_iit ? true : false,/*print_nsnpdiffs_p*/snps_iit ? true : false,
 	     Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
-	     /*gff3_phase_swap_p*/false,/*cigar_extended_p*/false);
+	     /*gff3_phase_swap_p*/false,/*cdstype*/CDS_CDNA,/*cigar_extended_p*/false);
   Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
 	       /*require_splicedir_p*/false,splicing_iit,splicing_divint_crosstable,
 	       donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git



More information about the debian-med-commit mailing list