[med-svn] [dascrubber] 01/05: New upstream version 0~20180108

Afif Elghraoui afif at moszumanska.debian.org
Sun Feb 4 18:55:46 UTC 2018


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository dascrubber.

commit 717ed4840fa40ce73cc1007fafbd76863278a991
Author: Afif Elghraoui <afif at debian.org>
Date:   Sun Feb 4 13:51:29 2018 -0500

    New upstream version 0~20180108
---
 DASedit.c    |  21 ++-
 DASmap.c     |   6 +-
 DASpatch.c   |  44 +++---
 DASqv.c      |  28 ++--
 DASrealign.c |  23 +--
 DAStrim.c    | 497 ++++++++++++++++++++++++++++++++++++++++-------------------
 DB.c         | 325 +++++++++++++++++++++++++++++---------
 DB.h         | 208 ++++++++++++++++++++-----
 REPqv.c      |   4 +-
 REPtrim.c    |  89 ++++++-----
 align.c      | 128 +++++++++++----
 align.h      |  13 +-
 12 files changed, 990 insertions(+), 396 deletions(-)

diff --git a/DASedit.c b/DASedit.c
index 63e7a6f..2106401 100644
--- a/DASedit.c
+++ b/DASedit.c
@@ -82,7 +82,7 @@ static char *Usage = "[-v] [-x<int>] <source:db> <target:db>";
 
   //  Read-only
 
-static HITS_DB _DB, *DB = &_DB;    //  Data base
+static DAZZ_DB _DB, *DB = &_DB;    //  Data base
 
 static int64   *TRIM_IDX;
 static int     *TRIM;
@@ -191,7 +191,7 @@ static int Load_Model(int *patch, char *target, int depth)
 }
 
 int main(int argc, char *argv[])
-{ HITS_READ  *reads;
+{ DAZZ_READ  *reads;
   int         nreads;
   int64       boff;
 
@@ -368,7 +368,7 @@ int main(int argc, char *argv[])
   reads  = DB->reads;
   nreads = DB->nreads;
 
-  { HITS_TRACK *track;
+  { DAZZ_TRACK *track;
     int         i;
 
     track = Load_Track(DB,"trim");
@@ -498,7 +498,12 @@ int main(int argc, char *argv[])
     for (i = 0; i < STACK_SIZE; i++)
       BSTACK[i] = NULL;
 
-    target = New_Read_Buffer(DB);
+    { int ml = DB->maxlen;
+      DB->maxlen = 1.5*ml + 10000;
+      target = New_Read_Buffer(DB);
+      DB->maxlen = ml;
+    }
+
     aseq   = BSTACK[0] = New_Read_Buffer(DB);
     segfate[0] = GOOD_LAST;
 
@@ -756,8 +761,8 @@ int main(int argc, char *argv[])
   { int       i, s;
     int       bi, gb;
     int       tlen, first;
-    HITS_DB   NB;
-    HITS_READ newrec;
+    DAZZ_DB   NB;
+    DAZZ_READ newrec;
 #ifdef DEBUG_INDEX
     int       ni;
 #endif
@@ -772,7 +777,7 @@ int main(int argc, char *argv[])
     NB.totlen = ntot;
     NB.maxlen = nmax;
 
-    fwrite(&NB,sizeof(HITS_DB),1,IDX_FILE);
+    fwrite(&NB,sizeof(DAZZ_DB),1,IDX_FILE);
 
 #ifdef DEBUG_INDEX
     printf("\nINDEXING\n");
@@ -807,7 +812,7 @@ int main(int argc, char *argv[])
             newrec.flags  = (reads[i].flags & DB_QV) | DB_BEST;
             if (segfate[bi] > TRIMMED) 
               newrec.flags |= DB_CSS;
-            fwrite(&newrec,sizeof(HITS_READ),1,IDX_FILE);
+            fwrite(&newrec,sizeof(DAZZ_READ),1,IDX_FILE);
             boff += COMPRESSED_LEN(tlen);
 
             if (s == GOOD)
diff --git a/DASmap.c b/DASmap.c
index 83adfa9..5177a0e 100644
--- a/DASmap.c
+++ b/DASmap.c
@@ -57,7 +57,7 @@ int next_read(File_Iterator *it)
   if (fgets(nbuffer,MAX_BUFFER,it->input) == NULL)
     { if (feof(it->input))
         return (1);
-      SYSTEM_ERROR;
+      SYSTEM_READ_ERROR;
     }
   if ((eol = index(nbuffer,'\n')) == NULL)
     { fprintf(stderr,"%s: Line %d in read list is longer than %d chars!\n",
@@ -77,8 +77,8 @@ int next_read(File_Iterator *it)
 }
 
 int main(int argc, char *argv[])
-{ HITS_DB     _db, *db = &_db;
-  HITS_TRACK *map;
+{ DAZZ_DB     _db, *db = &_db;
+  DAZZ_TRACK *map;
 
   int            reps, *pts;
   int            input_pts;
diff --git a/DASpatch.c b/DASpatch.c
index 53294b5..ad7aec0 100644
--- a/DASpatch.c
+++ b/DASpatch.c
@@ -58,7 +58,7 @@ static int ANCHOR_THRESH;
 
 static int     TRACE_SPACING;  //  Trace spacing (from .las file)
 
-static HITS_DB _DB, *DB  = &_DB;   //  Data base
+static DAZZ_DB _DB, *DB  = &_DB;   //  Data base
 static int     DB_FIRST;           //  First read of DB to process
 static int     DB_LAST;            //  Last read of DB to process (+1)
 static int     DB_PART;            //  0 if all, otherwise block #
@@ -549,11 +549,9 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
   else
     tbytes = sizeof(uint16);
 
-  if (novl <= 0)
-    return (0);
-
-  Read_Overlap(input,ovls);
-  if (trace)
+  if (Read_Overlap(input,ovls) != 0)
+    ovls[0].aread = INT32_MAX;
+  else if (trace)
     { if (ovls[0].path.tlen > pmax)
         { pmax  = 1.2*(ovls[0].path.tlen)+10000;
           paths = (uint16 *) Realloc(paths,sizeof(uint16)*pmax,"Expanding path buffer");
@@ -627,6 +625,12 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
       ACTION(j,ovls,n);
     }
 
+  if (ovls[n].aread < INT32_MAX)
+    { fprintf(stderr,"%s: .las file overlaps don't correspond to reads in block %d of DB\n",
+                     Prog_Name,DB_PART);
+      exit (1);
+    }
+
   return (max);
 }
 
@@ -635,9 +639,8 @@ int main(int argc, char *argv[])
   char       *root, *dpwd;
   char       *las, *lpwd;
   int64       novl;
-  HITS_TRACK *track;
+  DAZZ_TRACK *track;
   int         c;
-  int         COVERAGE;
 
   //  Process arguments
 
@@ -714,19 +717,12 @@ int main(int argc, char *argv[])
         if (extra == 4*sizeof(int))
           { fread(&GOOD_QV,sizeof(int),1,afile);
             fread(&BAD_QV,sizeof(int),1,afile);
-            fread(&COVERAGE,sizeof(int),1,afile);
+            fread(&HGAP_MIN,sizeof(int),1,afile);
             fread(&HGAP_MIN,sizeof(int),1,afile);
           }
-        else if (extra == 3*sizeof(int))
-          { fread(&GOOD_QV,sizeof(int),1,afile);
-            fread(&BAD_QV,sizeof(int),1,afile);
-            fread(&COVERAGE,sizeof(int),1,afile);
-            HGAP_MIN = 0;
-          }
-        else if (extra == 2*sizeof(int))
+        else if (extra == 2*sizeof(int) || extra == 3*sizeof(int))
           { fread(&GOOD_QV,sizeof(int),1,afile);
             fread(&BAD_QV,sizeof(int),1,afile);
-            COVERAGE = -1;
             HGAP_MIN = 0;
           }
         else
@@ -802,19 +798,19 @@ int main(int argc, char *argv[])
                 if (dbfile == NULL)
                   exit (1);
                 if (fscanf(dbfile,DB_NFILE,&nfiles) != 1)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 for (i = 0; i < nfiles; i++)
                   if (fgets(buffer,2*MAX_NAME+100,dbfile) == NULL)
-                    SYSTEM_ERROR
+                    SYSTEM_READ_ERROR
                 if (fscanf(dbfile,DB_NBLOCK,&nblocks) != 1)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 if (fscanf(dbfile,DB_PARAMS,&size,&cutoff,&all) != 3)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 for (i = 1; i <= part; i++)
                   if (fscanf(dbfile,DB_BDATA,&oindx,&DB_FIRST) != 2)
-                    SYSTEM_ERROR
+                    SYSTEM_READ_ERROR
                 if (fscanf(dbfile,DB_BDATA,&oindx,&DB_LAST) != 2)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 fclose(dbfile);
                 DB_PART = part;
                 *p = '\0';
@@ -849,7 +845,7 @@ int main(int argc, char *argv[])
 
       //  Open overlap file
 
-      lpwd = PathTo(argv[2]);
+      lpwd = PathTo(argv[c]);
       if (DB_PART)
         input = Fopen(Catenate(lpwd,"/",las,Numbered_Suffix(".",DB_PART,".las")),"r");
       else
diff --git a/DASqv.c b/DASqv.c
index 96b3d6f..806a5db 100644
--- a/DASqv.c
+++ b/DASqv.c
@@ -38,7 +38,7 @@ static int     HGAP_MIN;  //  Under this length do not process for HGAP
 static int     TRACE_SPACING;  //  Trace spacing (from .las file)
 static int     TBYTES;         //  Bytes per trace segment (from .las file)
 
-static HITS_DB _DB, *DB  = &_DB;   //  Data base
+static DAZZ_DB _DB, *DB  = &_DB;   //  Data base
 static int     DB_FIRST;           //  First read of DB to process
 static int     DB_LAST;            //  Last read of DB to process (+1)
 static int     DB_PART;            //  0 if all, otherwise block #
@@ -306,11 +306,9 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
   else
     TBYTES = sizeof(uint16);
 
-  if (novl <= 0)
-    return (0);
-
-  Read_Overlap(input,ovls);
-  if (trace)
+  if (Read_Overlap(input,ovls) != 0)
+    ovls[0].aread = INT32_MAX;
+  else if (trace)
     { if (ovls[0].path.tlen > pmax)
         { pmax  = 1.2*(ovls[0].path.tlen)+10000;
           paths = (uint16 *) Realloc(paths,sizeof(uint16)*pmax,"Expanding path buffer");
@@ -384,6 +382,12 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
       ACTION(j,ovls,n);
     }
 
+  if (ovls[n].aread < INT32_MAX)
+    { fprintf(stderr,"%s: .las file overlaps don't correspond to reads in block %d of DB\n",
+                     Prog_Name,DB_PART);
+      exit (1);
+    }
+
   return (max);
 }
 
@@ -509,19 +513,19 @@ int main(int argc, char *argv[])
                 if (dbfile == NULL)
                   exit (1);
                 if (fscanf(dbfile,DB_NFILE,&nfiles) != 1)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 for (i = 0; i < nfiles; i++)
                   if (fgets(buffer,2*MAX_NAME+100,dbfile) == NULL)
-                    SYSTEM_ERROR
+                    SYSTEM_READ_ERROR
                 if (fscanf(dbfile,DB_NBLOCK,&nblocks) != 1)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 if (fscanf(dbfile,DB_PARAMS,&size,&cutoff,&all) != 3)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 for (i = 1; i <= part; i++)
                   if (fscanf(dbfile,DB_BDATA,&oindx,&DB_FIRST) != 2)
-                    SYSTEM_ERROR
+                    SYSTEM_READ_ERROR
                 if (fscanf(dbfile,DB_BDATA,&oindx,&DB_LAST) != 2)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 fclose(dbfile);
                 DB_PART = part;
                 *p = '\0';
diff --git a/DASrealign.c b/DASrealign.c
index 69b7a20..dd7329f 100644
--- a/DASrealign.c
+++ b/DASrealign.c
@@ -70,8 +70,8 @@ static int     TBYTES;         //  Bytes per trace segment (from .las file)
 static int     SMALL;          //  Trace points can fit in a byte
 static int     MIN_LEN;        //  Minimum piece length
 
-static HITS_DB _ADB, *ADB = &_ADB;    //  A-read database
-static HITS_DB _BDB, *BDB = &_BDB;    //  B-read database
+static DAZZ_DB _ADB, *ADB = &_ADB;    //  A-read database
+static DAZZ_DB _BDB, *BDB = &_BDB;    //  B-read database
 
 static int ADB_ofirst, ADB_olast;
 static int BDB_ofirst, BDB_olast;
@@ -536,7 +536,7 @@ static void EXTENDER(int aread, Overlap *ovls, int novl)
     return;
 
   if (work == NULL)
-    { spec = New_Align_Spec(.70,100,ADB->freq);
+    { spec = New_Align_Spec(.70,100,ADB->freq,1);
       work = New_Work_Data();
       ralign->path = &rpath;
       falign->path = &fpath;
@@ -897,11 +897,9 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
       SMALL  = 0;
     }
 
-  if (novl <= 0)
-    return (0);
-
-  Read_Overlap(input,ovls);
-  if (trace)
+  if (Read_Overlap(input,ovls) != 0)
+    ovls[0].aread = INT32_MAX;
+  else if (trace)
     { if (ovls[0].path.tlen > pmax)
         { pmax = 1.2*(ovls[0].path.tlen)+10000;
           paths = (uint16 *) Realloc(paths,sizeof(uint16)*pmax,"Expanding path buffer");
@@ -971,12 +969,18 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
         ACTION(j,ovls,n);
     }
 
+  if (ovls[n].aread < INT32_MAX)
+    { fprintf(stderr,"%s: .las file overlaps don't correspond to reads in block %d of DB\n",
+                     Prog_Name,ADB->part);
+      exit (1);
+    }
+
   return (max);
 }
 
 
 int main(int argc, char *argv[])
-{ HITS_TRACK *map1, *map2;
+{ DAZZ_TRACK *map1, *map2;
 
   //  Process arguments
 
@@ -1047,7 +1051,6 @@ int main(int argc, char *argv[])
         }
       Trim_DB(BDB);
 
-
       Read_All_Sequences(BDB,0);
     }
   AFIRST = ADB->tfirst;
diff --git a/DAStrim.c b/DAStrim.c
index ef01807..18a0c35 100644
--- a/DAStrim.c
+++ b/DAStrim.c
@@ -25,20 +25,24 @@
 #define PATHSEP "/"
 #endif
 
-#undef   DEBUG_HQ_BLOCKS     //  Various DEBUG flags (normally all off)
+#undef    DEBUG_HQ_BLOCKS     //  Various DEBUG flags (normally all off)
 #undef     SHOW_EVENTS
-#undef   DEBUG_HOLE_FINDER
-#undef   DEBUG_GAP_STATUS
+#undef    DEBUG_HOLE_FINDER
+#undef    DEBUG_GAP_STATUS
 #undef     SHOW_PAIRS
-#undef   DEBUG_SUMMARY
+#undef    DEBUG_PATCHING
+#undef    DEBUG_SUMMARY
 
-#undef   ANNOTATE   //  Output annotation tracks for DaViewer
+#undef    DEBUG_CLASS
+
+#define  ANNOTATE   //  Output annotation tracks for DaViewer
 
 
 //  Command format and global parameter variables
 
 static char *Usage = " [-v] -g<int> -b<int> <source:db> <overlaps:las> ...";
 
+static int     COVERAGE;   //  estimated coverage
 static int     BAD_QV;     //  qv >= and you are "bad"
 static int     GOOD_QV;    //  qv <= and you are "good"
 static int     HGAP_MIN;   //  less than this length do not process for HGAP
@@ -66,6 +70,8 @@ static int AdSuf = ADSUF;
 #define  COVER_LEN     400  //  An overlap covers a point if it extends COVER_LEN to either side.
 #define  ANCHOR_MATCH  .25  //  Delta in trace interval at both ends of patch must be < this %.
 
+#define  MIN_OVERLAP   900  //  Was COVER_LEN, too small?
+
 //  Wall Constants
 
 #define MIN_PNT 5        //  Minimum # of events in a wall
@@ -79,7 +85,7 @@ static int AdSuf = ADSUF;
 
 static int     TRACE_SPACING;  //  Trace spacing (from .las file)
 
-static HITS_DB _DB, *DB  = &_DB;   //  Data base
+static DAZZ_DB _DB, *DB  = &_DB;   //  Data base
 static int     DB_FIRST;           //  First read of DB to process
 static int     DB_LAST;            //  Last read of DB to process (+1)
 static int     DB_PART;            //  0 if all, otherwise block #
@@ -707,9 +713,9 @@ static int FIND_HOLES(int aread, Overlap *ovls, int novl, Interval *block, int n
                   printf("  Dev %5d-%3d-%5d -> %5d",queue[lflank].pos,nend,queue[first].pos,dpos);
 #endif
 
-                //  Second, look for the rightmost RGT-(LFT-)wall that overlaps the left (right)
+                //  Second, look for the leftmost RGT-(LFT-)wall that overlaps the left (right)
                 //    flank, i.e. queue[lflank,first].pos (queue[last,rflank].pos), and if found
-                //    take the average position of the wall.
+                //    take the average position of the flank position within the wall.
 
                   for (d = dnum-1; d >= 0; d--)
                     if (dwall[d].beg <= queue[first].pos)
@@ -728,7 +734,7 @@ static int FIND_HOLES(int aread, Overlap *ovls, int novl, Interval *block, int n
                           }
                       dpos = sum/n;
 #ifdef DEBUG_HOLE_FINDER
-                      printf(" [%5d,%5d] -> %4d\n",dwall[d].beg,dwall[d].end,dpos);
+                      printf(" Map [%5d,%5d] -> %4d\n",dwall[d].beg,dwall[d].end,dpos);
 #endif
                       dcov = dwall[d].cov + dwall[d].cnt;
 		      d -= 1;
@@ -779,7 +785,7 @@ static int FIND_HOLES(int aread, Overlap *ovls, int novl, Interval *block, int n
                           }
                       apos = sum/n;
 #ifdef DEBUG_HOLE_FINDER
-                      printf(" [%5d,%5d] -> %4d\n",awall[a].beg,awall[a].end,apos);
+                      printf(" Map [%5d,%5d] -> %4d\n",awall[a].beg,awall[a].end,apos);
 #endif
                       acov = awall[a].cov + awall[a].cnt;
                       a += 1;
@@ -804,14 +810,14 @@ static int FIND_HOLES(int aread, Overlap *ovls, int novl, Interval *block, int n
                               { dcov = dwall[d].cov + dwall[d].cnt;
                                 dpos = dwall[d--].beg;
 #ifdef DEBUG_HOLE_FINDER
-                                printf("  <- %d\n",dpos);
+                                printf("  Push <- %d\n",dpos);
 #endif
                               }
                             else
                               { acov = awall[a].cov + awall[a].cnt;
                                 apos = awall[a++].end;
 #ifdef DEBUG_HOLE_FINDER
-                                printf("  -> %d\n",apos);
+                                printf("  Push -> %d\n",apos);
 #endif
                               }
                           }
@@ -819,7 +825,7 @@ static int FIND_HOLES(int aread, Overlap *ovls, int novl, Interval *block, int n
                           { dcov = dwall[d].cov + dwall[d].cnt;
                             dpos = dwall[d--].beg;
 #ifdef DEBUG_HOLE_FINDER
-                            printf("  <- %d\n",dpos);
+                            printf("  Push <- %d\n",dpos);
 #endif
                           }
                       else
@@ -827,7 +833,7 @@ static int FIND_HOLES(int aread, Overlap *ovls, int novl, Interval *block, int n
                           { acov = awall[a].cov + awall[a].cnt;
                             apos = awall[a++].end;
 #ifdef DEBUG_HOLE_FINDER
-                            printf("  -> %d\n",apos);
+                            printf("  Push -> %d\n",apos);
 #endif
                           }
                         else
@@ -1022,6 +1028,9 @@ typedef struct
   { int lidx;    //  left LA index
     int ridx;    //  right LA index
     int delta;   //  Difference between A-gap and B-gap
+    int soft;    //  0 (soft) = pair is not close to gap border or adjacent gap border on both sides
+                 //  1 (anciliary) = pair is not close to gap border but is to adjacent gap border
+                 //  2 (hard) = pair is close to gap border on both sides
   } Spanner;
 
 typedef struct
@@ -1174,14 +1183,103 @@ static Patch *compute_patch(int lft, int rgt, Overlap *lov, Overlap *rov)
   return (&ans);
 }
 
+//  Examine the spanning pairs for a gap.  Group those with sufficient density
+//    i.e. with 20 + 10% of the last one.  If test == 1, keep groups that have at
+//    least 4 members, and are either 60% hard or at least 8 hard pairs, but trim
+//    away any extremal non-hard pairs.  If test == 0, keep the largest group that has
+//    at least 4 members, and is either 60% not soft or at least 8 non-soft pairs,
+//    but trim awya ny extermal soft pairs.  If "move" is non-zero then compress gsort
+//    accordingly, return the size of gsort after trimming in all instances.
+
+static int analyze_gap_pairs(int gsize, Spanner *gsort, Overlap *ovls,
+                             int gcnt, int scnt, int test, int move)
+{ int j, l, c, x, w;
+  int bord, soft, keeper;
+  int ncnt, biggest;
+
+  (void) ovls;
+
+  biggest = 0;
+  ncnt = 0;
+#ifdef SHOW_PAIRS
+  if (move)
+    printf("  Gsort: %d\n",gsize);
+#endif
+  c = gsize - gsort[0].delta;
+  w = 0;
+  for (j = 0; j <= gcnt; j++)
+    { l = c;
+      if (j >= gcnt)
+        bord = 1;
+      else
+        { c = gsize - gsort[j].delta;
+          if (l < 0)
+            bord = (l-c >= 20-.1*c);
+          else
+            bord = (l-c >= 20+.1*l);
+        }
+      if (bord)
+        { soft = 0;
+          for (x = w; x < j; x++)
+            soft += (gsort[x].soft <= test);
+          keeper = (j-w >= 4 && (soft < .4*(j-w) || (j-w)-soft >= 8));
+          if (test == 0)
+            { if (keeper && j-w > biggest)
+                { biggest = j-w;
+                  ncnt = 0;
+                }
+              else
+                keeper = 0;
+            }
+#ifdef SHOW_PAIRS
+          if (move)
+            { printf("----\n");
+              for (x = w; x < j; x++)
+                { printf("  %3d: %5d %5d",x,gsort[x].delta,gsize-gsort[x].delta);
+                  printf(" %5d",ovls[gsort[x].lidx].bread);
+                  if (gsort[x].soft == 0)
+                    printf(" @");
+                  else if (gsort[x].soft == 1)
+                    printf(" #");
+                  else
+                    printf("  ");
+                  if (!keeper)
+                    printf(" X");
+                  printf("\n");
+                }
+            }
+#endif
+          if (keeper)
+            { for (x = w; x < j; x++)
+                if (gsort[x].soft > test)
+                  break;
+              for (w = j; gsort[w-1].soft <= test; w--)
+                ;
+              if (move)
+                while (x < w)
+                  gsort[ncnt++] = gsort[x++];
+              else
+                ncnt += w-x;
+            }
+          w = j;
+        }
+    }
+  if (move)
+    for (x = gcnt; x < gcnt+scnt; x++)
+      gsort[ncnt++] = gsort[x];
+  else
+    ncnt += scnt;
+
+  return (ncnt);
+}
+
 //  Categorize each gap and if appropriate return the best patch for each
 
 static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rblock,
                       int *p_lft, int *p_rgt)
 { static int  nmax = 0;
 
-  static Spanner *gsort = NULL;       //  A-B delta and idx-pair for all B-reads spanning a gap
-  static int     *asort = NULL;       //  A-B delta for all B-reads spanning a gap
+  static Spanner *gsort = NULL;   //  A-B delta and idx-pair for all B-reads spanning a gap
 
   static int      ANCHOR_THRESH;
 
@@ -1197,8 +1295,7 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
     { if (novl > nmax)
         { nmax = 1.2*novl + 500;
           gsort = (Spanner *) Realloc(gsort,nmax*sizeof(Spanner),"Allocating gap vector");
-          asort = (int *) Realloc(asort,nmax*sizeof(int),"Allocating adaptemer position vector");
-          if (gsort == NULL || asort == NULL)
+          if (gsort == NULL)
             exit (1);
           ANCHOR_THRESH = ANCHOR_MATCH * TRACE_SPACING;
         }
@@ -1257,30 +1354,33 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
 #endif
     }
 
-  { int bread, bcomp, blen;
+  { int bread, bcomp, blen, blast;
     int ab, ae;
+    int lstack[10], ltop;
+    int rstack[10], rtop;
     int lcnt, rcnt, scnt, gcnt, acnt;
-    int lidx, ridx, sidx, cidx;
+    int lidx, ridx, sidx, Lidx, Ridx;
     int k;
 
     //  Find LA pairs or LAs spanning the gap flank [lcv,rcv]
 
+    bread = -1;
     lcnt = rcnt = scnt = gcnt = acnt = 0;
     for (j = 0; j < novl; j = k)
-      { bread = ovls[j].bread;
+      { blast = bread;
+        bread = ovls[j].bread;
         blen  = DB->reads[bread].rlen;
         bcomp = COMP(ovls[j].flags);
-        if (bcomp)
-          cidx = j;
+        Lidx  = lidx;
+        Ridx  = ridx;
 
+        ltop = rtop = 0;
         lidx = ridx = sidx = -1;    //  For all LA's with same b-read
         for (k = j; k < novl; k++)
           { if (ovls[k].bread != bread)
               break;
             if (COMP(ovls[k].flags) != (uint32) bcomp)   //  Note when b switches orientation
-              { cidx  = k;
-                bcomp = COMP(ovls[k].flags);
-              }
+              break;
             ab = ovls[k].path.abpos;
             ae = ovls[k].path.aepos;
 
@@ -1293,47 +1393,82 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
 #endif
 
             //  Is LA a spanner, left-partner, or right partner
+            //    A partner is hard if end=point is within COVER_LEN of the gap boundary
+            //    Record rigthmost/leftmost left/right hard partners (if any)
 
             if (ab <= lcv && ae >= rcv)
               { sidx = k;
                 lidx = ridx = -1;
+                ltop = rtop = 0;
                 continue;
               }
 
+            if (sidx >= 0)
+              continue;
+            
+            if (ae >= rcv && ab > lft)
+              { if (rtop < 10)
+                  rstack[rtop++] = k;
 #ifdef SHOW_PAIRS
-           if (ae >= rcv && ab <= rcv && ab - ovls[k].path.bbpos <= lft - COVER_LEN)
-             printf("r");
-           else
-             printf(" ");
-           if (ab <= lcv && ae >= lcv && ae + (blen-ovls[j].path.bepos) >= rgt + COVER_LEN)
-             printf("l");
-           else
-             printf(" ");
-#endif
-
-            if (ae >= rcv && ab <= rcv && ab - ovls[k].path.bbpos <= lft - COVER_LEN)
-              ridx = k;
-            if (ab <= lcv && ae >= lcv && ae + (blen-ovls[j].path.bepos) >= rgt + COVER_LEN)
-              lidx = k;
-          }
-        if (! bcomp)
-          cidx = k;
+                printf("r");
+#endif
+                if (ab <= rcv && ridx < 0)
+                  { ridx = k;
+#ifdef SHOW_PAIRS
+                    printf("+");
+#endif
+                  }
+              }
 
+            if (ab <= lcv && ae < rgt)
+              { if (ltop < 10)
+                  lstack[ltop++] = k;
 #ifdef SHOW_PAIRS
-        printf(" =");
-        if (sidx >= 0)
-          printf(" S");
-        if (lidx >= 0)
-          printf(" L");
-        if (ridx >= 0)
-          printf(" R");
-        if (0 <= lidx && lidx < ridx && (ridx < cidx || lidx >= cidx))
-          printf(" G");
-        if ((0<=ridx && ridx<cidx && cidx<=lidx) || (0<=lidx && lidx<cidx && cidx<=ridx))
-          printf(" A");
+                printf("l");
 #endif
+                if (ae >= lcv)
+                  { lidx = k;
+#ifdef SHOW_PAIRS
+                    printf("+");
+#endif
+                  }
+	      }
+          }
 
-        //  Add spanning LA or spanning pair to gsort list.  Add contra pairs to asort list.
+        //  Check for a hard contra pair and if found add
+        //  Then check for a spanner and if so then add to gsort list.
+        //  Then check for a spanning pair: use hard pair if available, otherwise
+        //    use tightest pair and term it anciliary if endpoints are within an adjacent
+        //    gap boundary, or soft otherwise.
+        //  Finally, if left or right hard (but unpaired) then record as a conflict if
+        //    projection extends MIN_OVERLAP past the other side.
+
+        if (blast == bread)
+          { if (ridx < 0)
+              { if (lidx >= 0 && Ridx >= 0 && Lidx < 0)
+                  { acnt += 1;
+                    if (Ridx >= 0 && ovls[Ridx].path.abpos - ovls[Ridx].path.bbpos
+                                             <= lft - MIN_OVERLAP)
+                      rcnt -= 1;
+#ifdef SHOW_PAIRS
+                    printf(" = A");
+#endif
+                    continue;
+                  }
+              }
+            else
+              { if (lidx < 0 && Ridx < 0 && Lidx >= 0)
+                  { acnt += 1;
+                    if (Lidx >= 0 && ovls[Lidx].path.aepos + (blen-ovls[Lidx].path.bepos)
+                                            >= rgt + MIN_OVERLAP)
+                      lcnt -= 1;
+#ifdef SHOW_PAIRS
+                    printf(" = A");
+#endif
+                    continue;
+                  }
+              }
+          }
 
         if (sidx >= 0)
           { gsort[gcnt].delta = DB->maxlen;
@@ -1341,102 +1476,148 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
             gsort[gcnt].ridx  = sidx;
             gcnt += 1;
             scnt += 1;
+#ifdef SHOW_PAIRS
+            printf(" = S");
+#endif
+            continue;
           }
-        else
-          { if (lidx >= 0)
-              lcnt += 1;
-            if (ridx >= 0)
-              rcnt += 1;
-            if (0 <= lidx && lidx < ridx && (ridx < cidx || cidx <= lidx))
-              { gsort[gcnt].delta = (ovls[ridx].path.abpos - ovls[lidx].path.aepos)
-                                  - (ovls[ridx].path.bbpos - ovls[lidx].path.bepos);
-                gsort[gcnt].lidx  = lidx;
-                gsort[gcnt].ridx  = ridx;
-                gcnt += 1;
+
+        if (ltop > 0 && rtop > 0)
+          { int lok, rok, x;
+
+            if (lidx < 0 || ridx < 0)
+              { int dif, bst;
+                int x, y;
+
+                bst = 0x7fffffff;
+                for (ltop--; ltop >= 0; ltop--)
+                  { x = lstack[ltop];
+                    for (rtop--; rtop >= 0; rtop--)
+                      { y = rstack[rtop];
+                        dif = (ovls[y].path.abpos - ovls[x].path.aepos)
+                           - (ovls[y].path.bbpos - ovls[x].path.bepos);
+                        dif = abs(dif);
+                        if (dif < bst)
+                          { bst  = dif;
+                            lidx = x;
+                            ridx = y;
+                            dif = (ovls[ridx].path.abpos - ovls[lidx].path.aepos)
+                                - (ovls[ridx].path.bbpos - ovls[lidx].path.bepos);
+#ifdef SHOW_PAIRS
+                            printf(" C(%d,%d = %d)",x,y,dif);
+#endif
+                          }
+                      }
+                  }
+              }
+
+            lok = 2;
+            if (ovls[lidx].path.aepos < lcv)
+              { x = ovls[lidx].path.aepos;
+                lok = (lblock > FirstB && x <= lblock->beg && x >= lblock[-1].end - COVER_LEN);
+              }
+            rok = 2;
+            if (ovls[ridx].path.abpos > rcv)
+              { x = ovls[ridx].path.abpos;
+                rok = (rblock < LastB && x >= rblock->end && x <= rblock[1].beg + COVER_LEN);
               }
-            else if ((0<=ridx && ridx<cidx && cidx<=lidx) || (0<=lidx && lidx<cidx && cidx<=ridx))
-              asort[acnt++] = (((blen-ovls[ridx].path.bbpos) - ovls[lidx].path.bepos)
-                            + (ovls[lidx].path.aepos + ovls[ridx].path.abpos))/2;
+  
+            if (lok >= 2 && rok >= 2)
+              gsort[gcnt].soft = 2;
+            else if (lok >= 1 && rok >= 1)
+              gsort[gcnt].soft = 1;
+            else
+              gsort[gcnt].soft = 0;
+            gsort[gcnt].delta = (ovls[ridx].path.abpos - ovls[lidx].path.aepos)
+                              - (ovls[ridx].path.bbpos - ovls[lidx].path.bepos);
+            gsort[gcnt].lidx  = lidx;
+            gsort[gcnt].ridx  = ridx;
+            gcnt += 1;
+#ifdef SHOW_PAIRS
+            printf(" = G%d",gsort[gcnt-1].delta);
+#endif
+            continue;
           }
-      }
 
+        if (ridx >= 0 && ovls[ridx].path.abpos - ovls[ridx].path.bbpos <= lft - MIN_OVERLAP)
+          { rcnt += 1;
 #ifdef SHOW_PAIRS
-    printf("\n");
+            printf(" = R");
+#endif
+          }
+        if (lidx >= 0 && ovls[lidx].path.aepos + (blen-ovls[lidx].path.bepos) >= rgt + MIN_OVERLAP)
+          { lcnt += 1;
+#ifdef SHOW_PAIRS
+            printf(" = L");
 #endif
+          }
+      }
 
-#ifdef DEBUG_GAP_STATUS
-    printf("    lcnt = %d  scnt = %d(%d)  rcnt = %d acnt = %d\n",lcnt,gcnt-scnt,scnt,rcnt,acnt);
+#ifdef SHOW_PAIRS
+    printf("\n");
 #endif
 
-    { int64 med, dev;
-      int   std, avg;
-      int   low, hgh;
+    { int ccnt, ocnt;
 
       if (lcnt < rcnt)
-        rcnt = lcnt;
+        ccnt = lcnt;
+      else
+        ccnt = rcnt;
+
+      //  Analyze pair list gsort: if standard analysis (only hard pairs count) does not yield
+      //    a span, then consider anciliary pair spanners (rarely makes a difference but does
+      //    save a few.
+  
+      qsort(gsort,gcnt,sizeof(Spanner),GSORT);
+      gcnt -= scnt;
+
+      ocnt = gcnt;
+      gcnt = analyze_gap_pairs(rgt-lft,gsort,ovls,gcnt,scnt,1,0);
+      if (scnt < 4 && gcnt < 10 && gcnt < ccnt)
+        {
+#ifdef SHOW_PAIRS
+          printf("  SPECIAL\n");
+#endif
+          gcnt = analyze_gap_pairs(rgt-lft,gsort,ovls,ocnt,scnt,0,1);
+#ifdef SHOW_PAIRS
+          if (gcnt >= 10 || gcnt >= ccnt)
+            printf("  SWITCH\n");
+#endif
+        }
+      else
+        analyze_gap_pairs(rgt-lft,gsort,ovls,ocnt,scnt,1,1);
+
+#ifdef DEBUG_GAP_STATUS
+      printf("    lcnt = %d  gcnt = %d  scnt = %d  rcnt = %d acnt = %d\n",
+             lcnt,gcnt-scnt,scnt,rcnt,acnt);
+#endif
 
       //  Lots of contra pairs and less spanning support, call it an adaptamer gap.
   
-      if (acnt >= .4*rcnt && gcnt < .3*acnt) 
+      if (acnt >= .3*ccnt && gcnt < acnt) 
         {
 #ifdef DEBUG_GAP_STATUS
-          qsort(asort,acnt,sizeof(int),ASORT);
-          med = asort[acnt/2];
-          low = acnt*.25;
-          hgh = acnt*.75;
-          dev = 0;
-          for (j = low; j <= hgh; j++)
-            dev += (asort[j]-med)*(asort[j]-med);
-          std = sqrt((1.*dev)/acnt);
-          if (std > 200)
-            printf("  UNCERTAIN, std. dev. large\n");
           printf("    ADAPT %3d\n",std);
 #endif
           return (ADAPT);
         }
 
-      //  Examine the spanning pairs for the gap and compute average and deviation
-      //     of gap deltas for second and third quartile
-  
-      qsort(gsort,gcnt,sizeof(Spanner),GSORT);
-      gcnt -= scnt;
+      //  If there is insufficient evidence for a span, then split.
 
-      if (gcnt >= 4)
-        { med = gsort[gcnt/2].delta;
-          low = gcnt*.25;
-          hgh = gcnt*.75;
-          dev = 0;
-          avg = 0;
-          for (j = low; j <= hgh; j++)
-            { dev += (gsort[j].delta-med)*(gsort[j].delta-med);
-              avg += gsort[j].delta;
-            }
-          std = sqrt((1.*dev)/gcnt);
-          avg = avg/((hgh-low)+1);
-          low = avg-4*std;
-          hgh = avg+4*std;
-        }
-      else
-        std = 0;
-
-      //  If the pairing gap deviation is too large or there are too few pairs or
-      //    most potential partners are not paired, then be safe and split it.
-
-      gcnt += scnt;
-      if (std >= 150 || gcnt < 10 || (gcnt < .4*rcnt && gcnt < 20))
+      if (scnt < 4 && gcnt < 10 && gcnt <= ccnt)
         {
 #ifdef DEBUG_GAP_STATUS
-          if (rcnt >= 20)
+          if (ccnt >= 20)
             printf("    STRONG SPLIT\n");
           else
             printf("    WEAK SPLIT\n");
           if (gcnt >= 10)
-            printf("  UNCERTAIN %5.1f %5d %3d\n",(scnt+gcnt)/(1.*rcnt),rgt-lft,gcnt);
+            printf("  UNCERTAIN %5.1f %5d %3d\n",gcnt/(1.*ccnt),rgt-lft,gcnt);
 #endif
           return (SPLIT);
         }
 
-      //  Otherwise consider the gap linkable and try to find a viable patch, declaring a split
+      //  Otherwise consider the gap spannable and try to find a viable patch, declaring a split
       //    iff all patch attemtps fail
   
       else
@@ -1506,7 +1687,7 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
             { lidx = gsort[j].lidx;
               ridx = gsort[j].ridx;
 
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
               if (lidx != ridx)
                 printf("       %5d [%5d,%5d] [%5d,%5d] %4d",
                        ovls[lidx].bread,ovls[lidx].path.abpos,ovls[lidx].path.aepos,
@@ -1520,17 +1701,17 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
 
               if (can != NULL)
                 {
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
                   printf(" %d",can->end-can->beg);
 #endif
                   if (can->anc <= ANCHOR_THRESH)
                     { ncand += 1;
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
                       printf("  AA %d %d(%d)",can->anc,can->bad,can->avg);
 #endif
                     }
                 }
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
               printf("\n");
 #endif
             } 
@@ -1542,7 +1723,7 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
             { int x, best, nlft, nrgt;
               int nanchor, ntry;
 
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
               printf("     NOT ENOUGH\n");
 #endif
 
@@ -1569,7 +1750,7 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
                     for (j = 0; j < gcnt; j++)
                       if (eval_lft_anchor(x,ovls+gsort[j].lidx) <= ANCHOR_THRESH)
                         nanchor += 1;
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
                     printf("       %5d: %3d\n",x,nanchor);
 #endif
                     if (nanchor > best)
@@ -1577,7 +1758,7 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
                         nlft = x;
                       }
                   }
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
               printf("          %5d->%5d\n",lft,nlft);
 #endif
 
@@ -1597,7 +1778,7 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
                     for (j = 0; j < gcnt; j++)
                       if (eval_rgt_anchor(x,ovls+gsort[j].ridx) <= ANCHOR_THRESH)
                         nanchor += 1;
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
                     printf("       %5d: %3d\n",x,nanchor);
 #endif
                     if (nanchor > best)
@@ -1605,7 +1786,7 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
                         nrgt = x;
                       }
                   }
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
               printf("          %5d->%5d\n",rgt,nrgt);
 #endif
 
@@ -1628,7 +1809,7 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
                 { lidx = gsort[j].lidx;
                   ridx = gsort[j].ridx;
 
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
                   if (lidx != ridx)
                     printf("       %5d [%5d,%5d] [%5d,%5d] %4d",
                            ovls[lidx].bread,ovls[lidx].path.abpos,ovls[lidx].path.aepos,
@@ -1643,18 +1824,18 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
     
                       if (can != NULL)
                         {
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
                           printf(" %d",can->end-can->beg);
 #endif
                           if (can->anc <= ANCHOR_THRESH)
                             { ncand += 1;
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
                               printf("  AA %d %d(%d)",can->anc,can->bad,can->avg);
 #endif
                             }
                         }
                     }
-#ifdef DEBUG_GAP_STATUS
+#ifdef DEBUG_PATCHING
                   printf("\n");
 #endif
                 } 
@@ -1674,7 +1855,7 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
           *p_rgt = rgt;
 
 #ifdef DEBUG_GAP_STATUS
-          printf("    SPAN %3d %5d:  PATCHABLE\n",std,rgt-lft);
+          printf("    SPAN %5d:  PATCHABLE\n",rgt-lft);
 #endif
           return (SPAN);
         }
@@ -1682,11 +1863,11 @@ static int gap_status(Overlap *ovls, int novl, Interval *lblock, Interval *rbloc
   }
 }
 
-static int *GAP_ANALYSIS(Overlap *ovls, int novl, Interval *block, int *p_nblk)
+static int *GAP_ANALYSIS(int aread, Overlap *ovls, int novl, Interval *block, int *p_nblk)
 { static int  bmax = 0;
   static int *status = NULL;      //  Status of gaps between HQ_blocks
  
-#ifdef DEBUG_SUMMARY
+#if defined(DEBUG_SUMMARY) || defined(DEBUG_CLASS)
   static char *status_string[4] = { "LOWQ", "SPAN", "SPLIT", "ADAPT" };
 #endif
 
@@ -1694,6 +1875,8 @@ static int *GAP_ANALYSIS(Overlap *ovls, int novl, Interval *block, int *p_nblk)
   int i, j;
   int slft = 0, srgt = 0;
 
+  (void) aread;
+
   nblk = *p_nblk;
   if (nblk > bmax)
     { bmax = 1.2*nblk + 100;
@@ -1733,6 +1916,11 @@ static int *GAP_ANALYSIS(Overlap *ovls, int novl, Interval *block, int *p_nblk)
   for (i = 1; i < nblk; i++)
     printf(" %s [%d,%d]",status_string[status[i]],block[i].beg,block[i].end);
 #endif
+
+#ifdef DEBUG_CLASS
+  for (i = 1; i < nblk; i++)
+    printf("AREAD %d  %s  [%d,%d]\n",aread,status_string[status[i]],block[i-1].end,block[i].beg);
+#endif
   
   *p_nblk  = nblk;
   return (status);
@@ -1759,11 +1947,7 @@ static void GAPS(int aread, Overlap *ovls, int novl)
   Interval *block;
   int      *status;
 
-#if defined(DEBUG_HQ_BLOCKS) || defined(DEBUG_HOLE_FINDER)
-  printf("\n");
-  printf("AREAD %d\n",aread);
-#endif
-#if  defined(DEBUG_GAP_STATUS) || defined(DEBUG_SUMMARY)
+#if defined(DEBUG_HQ_BLOCKS) || defined(DEBUG_HOLE_FINDER) || defined(DEBUG_GAP_STATUS) || defined(DEBUG_SUMMARY)
   printf("\n");
   printf("AREAD %d\n",aread);
 #endif
@@ -1803,7 +1987,7 @@ static void GAPS(int aread, Overlap *ovls, int novl)
   //  Determine the status of each gap between a pair of blocks
 
   if (nblk > 0)
-    status = GAP_ANALYSIS(ovls,novl,block,&nblk);
+    status = GAP_ANALYSIS(aread,ovls,novl,block,&nblk);
 
   //  No blocks? ==> nothing to do
 
@@ -2010,11 +2194,9 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
   else
     tbytes = sizeof(uint16);
 
-  if (novl <= 0)
-    return (0);
-
-  Read_Overlap(input,ovls);
-  if (trace)
+  if (Read_Overlap(input,ovls) != 0)
+    ovls[0].aread = INT32_MAX;
+  else if (trace)
     { if (ovls[0].path.tlen > pmax)
         { pmax  = 1.2*(ovls[0].path.tlen)+10000;
           paths = (uint16 *) Realloc(paths,sizeof(uint16)*pmax,"Expanding path buffer");
@@ -2088,6 +2270,12 @@ static int make_a_pass(FILE *input, void (*ACTION)(int, Overlap *, int), int tra
       ACTION(j,ovls,n);
     }
 
+  if (ovls[n].aread < INT32_MAX)
+    { fprintf(stderr,"%s: .las file overlaps don't correspond to reads in block %d of DB\n",
+                     Prog_Name,DB_PART);
+      exit (1);
+    }
+
   return (max);
 }
 
@@ -2096,9 +2284,8 @@ int main(int argc, char *argv[])
   char       *root, *dpwd;
   char       *las, *lpwd;
   int64       novl;
-  HITS_TRACK *track;
+  DAZZ_TRACK *track;
   int         c;
-  int         COVERAGE;
 
   //  Process arguments
 
@@ -2260,19 +2447,19 @@ int main(int argc, char *argv[])
                 if (dbfile == NULL)
                   exit (1);
                 if (fscanf(dbfile,DB_NFILE,&nfiles) != 1)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 for (i = 0; i < nfiles; i++)
                   if (fgets(buffer,2*MAX_NAME+100,dbfile) == NULL)
-                    SYSTEM_ERROR
+                    SYSTEM_READ_ERROR
                 if (fscanf(dbfile,DB_NBLOCK,&nblocks) != 1)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 if (fscanf(dbfile,DB_PARAMS,&size,&cutoff,&all) != 3)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 for (i = 1; i <= part; i++)
                   if (fscanf(dbfile,DB_BDATA,&oindx,&DB_FIRST) != 2)
-                    SYSTEM_ERROR
+                    SYSTEM_READ_ERROR
                 if (fscanf(dbfile,DB_BDATA,&oindx,&DB_LAST) != 2)
-                  SYSTEM_ERROR
+                  SYSTEM_READ_ERROR
                 fclose(dbfile);
                 DB_PART = part;
                 *p = '\0';
@@ -2321,7 +2508,7 @@ int main(int argc, char *argv[])
 
       //  Open overlap file
 
-      lpwd = PathTo(argv[2]);
+      lpwd = PathTo(argv[c]);
       if (DB_PART)
         input = Fopen(Catenate(lpwd,"/",las,Numbered_Suffix(".",DB_PART,".las")),"r");
       else
diff --git a/DB.c b/DB.c
index 69060d0..9b5c85f 100644
--- a/DB.c
+++ b/DB.c
@@ -41,6 +41,24 @@ char Ebuffer[1000];
 
 #endif
 
+int Count_Args(char *var)
+{ int   cnt, lev;
+  char *s;
+
+  cnt = 1;
+  lev = 0;
+  for (s = var; *s != '\0'; s++)
+    if (*s == ',')
+      { if (lev == 0)
+          cnt += 1;
+      }
+    else if (*s == '(')
+      lev += 1;
+    else if (*s == ')')
+      lev -= 1;
+  return (cnt);
+}
+
 void *Malloc(int64 size, char *mesg)
 { void *p;
 
@@ -382,7 +400,7 @@ void Number_Arrow(char *s)
  ********************************************************************************************/
 
 
-// Open the given database or dam, "path" into the supplied HITS_DB record "db". If the name has
+// Open the given database or dam, "path" into the supplied DAZZ_DB record "db". If the name has
 //   a part # in it then just the part is opened.  The index array is allocated (for all or
 //   just the part) and read in.
 // Return status of routine:
@@ -390,8 +408,8 @@ void Number_Arrow(char *s)
 //     0: Open of DB proceeded without mishap
 //     1: Open of DAM proceeded without mishap
 
-int Open_DB(char* path, HITS_DB *db)
-{ HITS_DB dbcopy;
+int Open_DB(char* path, DAZZ_DB *db)
+{ DAZZ_DB dbcopy;
   char   *root, *pwd, *bptr, *fptr, *cat;
   int     nreads;
   FILE   *index, *dbvis;
@@ -437,7 +455,7 @@ int Open_DB(char* path, HITS_DB *db)
 
   if ((index = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r")) == NULL)
     goto error1;
-  if (fread(db,sizeof(HITS_DB),1,index) != 1)
+  if (fread(db,sizeof(DAZZ_DB),1,index) != 1)
     { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
       goto error2;
     }
@@ -505,28 +523,28 @@ int Open_DB(char* path, HITS_DB *db)
 
   nreads = ulast-ufirst;
   if (part <= 0)
-    { db->reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index");
+    { db->reads = (DAZZ_READ *) Malloc(sizeof(DAZZ_READ)*(nreads+2),"Allocating Open_DB index");
       if (db->reads == NULL)
         goto error2;
       db->reads += 1;
-      if (fread(db->reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
+      if (fread(db->reads,sizeof(DAZZ_READ),nreads,index) != (size_t) nreads)
         { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
           free(db->reads-1);
           goto error2;
         }
     }
   else
-    { HITS_READ *reads;
+    { DAZZ_READ *reads;
       int        i, r, maxlen;
       int64      totlen;
 
-      reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index");
+      reads = (DAZZ_READ *) Malloc(sizeof(DAZZ_READ)*(nreads+2),"Allocating Open_DB index");
       if (reads == NULL)
         goto error2;
       reads += 1;
 
-      fseeko(index,sizeof(HITS_READ)*ufirst,SEEK_CUR);
-      if (fread(reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
+      fseeko(index,sizeof(DAZZ_READ)*ufirst,SEEK_CUR);
+      if (fread(reads,sizeof(DAZZ_READ),nreads,index) != (size_t) nreads)
         { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
           free(reads-1);
           goto error2;
@@ -580,13 +598,13 @@ error:
 //   of the current DB partition.  Reallocate smaller memory blocks for the information kept
 //   for the retained reads.
 
-void Trim_DB(HITS_DB *db)
+void Trim_DB(DAZZ_DB *db)
 { int         i, j, r;
   int         allflag, cutoff;
   int64       totlen;
   int         maxlen, nreads;
-  HITS_TRACK *record;
-  HITS_READ  *reads;
+  DAZZ_TRACK *record;
+  DAZZ_READ  *reads;
 
   if (db->trimmed) return;
 
@@ -603,7 +621,7 @@ void Trim_DB(HITS_DB *db)
 
   for (record = db->tracks; record != NULL; record = record->next)
     if (strcmp(record->name,". at qvs") == 0)
-      { uint16 *table = ((HITS_QV *) record)->table;
+      { uint16 *table = ((DAZZ_QV *) record)->table;
 
         j = 0;
         for (i = 0; i < db->nreads; i++)
@@ -675,7 +693,7 @@ void Trim_DB(HITS_DB *db)
   db->trimmed = 1;
 
   if (j < nreads)
-    { db->reads = Realloc(reads-1,sizeof(HITS_READ)*(j+2),NULL);
+    { db->reads = Realloc(reads-1,sizeof(DAZZ_READ)*(j+2),NULL);
       db->reads += 1;
     }
 }
@@ -683,12 +701,12 @@ void Trim_DB(HITS_DB *db)
 // The DB has already been trimmed, but a track over the untrimmed DB needs to be loaded.
 //   Trim the track by rereading the untrimmed DB index from the file system.
 
-static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
+static int Late_Track_Trim(DAZZ_DB *db, DAZZ_TRACK *track, int ispart)
 { int         i, j, r;
   int         allflag, cutoff;
   int         ureads;
   char       *root;
-  HITS_READ   read;
+  DAZZ_READ   read;
   FILE       *indx;
 
   if (!db->trimmed) return (0);
@@ -703,7 +721,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
 
   root = rindex(db->path,'/') + 2;
   indx = Fopen(Catenate(db->path,"","",".idx"),"r");
-  fseeko(indx,sizeof(HITS_DB) + sizeof(HITS_READ)*db->ufirst,SEEK_SET);
+  fseeko(indx,sizeof(DAZZ_DB) + sizeof(DAZZ_READ)*db->ufirst,SEEK_SET);
   if (ispart)
     ureads = ((int *) (db->reads))[-1];
   else
@@ -725,7 +743,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
       { anno = (char *) track->anno;
         j = r = 0;
         for (i = r = 0; i < ureads; i++, r += size)
-          { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+          { if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
               { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
                 fclose(indx);
                 EXIT(1);
@@ -744,7 +762,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
         anno4 = (int *) (track->anno);
         j = anno4[0] = 0;
         for (i = 0; i < ureads; i++)
-          { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+          { if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
               { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
                 fclose(indx);
                 EXIT(1);
@@ -764,7 +782,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
         anno8 = (int64 *) (track->anno);
         j = anno8[0] = 0;
         for (i = 0; i < ureads; i++)
-          { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+          { if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
               { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
                 fclose(indx);
                 EXIT(1);
@@ -789,8 +807,8 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
 //   and any open file pointers.  The record pointed at by db however remains (the user
 //   supplied it and so should free it).
 
-void Close_DB(HITS_DB *db)
-{ HITS_TRACK *t, *p;
+void Close_DB(DAZZ_DB *db)
+{ DAZZ_TRACK *t, *p;
 
   if (db->loaded)
     free(((char *) (db->bases)) - 1);
@@ -813,19 +831,19 @@ void Close_DB(HITS_DB *db)
 
 // Return the size in bytes of the memory occupied by a given DB
 
-int64 sizeof_DB(HITS_DB *db)
+int64 sizeof_DB(DAZZ_DB *db)
 { int64       s;
-  HITS_TRACK *t;
+  DAZZ_TRACK *t;
 
-  s = sizeof(HITS_DB)
-    + sizeof(HITS_READ)*(db->nreads+2)
+  s = sizeof(DAZZ_DB)
+    + sizeof(DAZZ_READ)*(db->nreads+2)
     + strlen(db->path)+1
     + (db->totlen+db->nreads+4);
 
   t = db->tracks;
   if (t != NULL && strcmp(t->name,". at qvs") == 0)
-    { HITS_QV *q = (HITS_QV *) t;
-      s += sizeof(HITS_QV)
+    { DAZZ_QV *q = (DAZZ_QV *) t;
+      s += sizeof(DAZZ_QV)
          + sizeof(uint16) * db->nreads
          + q->ncodes * sizeof(QVcoding)
          + 6;
@@ -833,7 +851,7 @@ int64 sizeof_DB(HITS_DB *db)
     }
 
   for (; t != NULL; t = t->next)
-    { s += sizeof(HITS_TRACK)
+    { s += sizeof(DAZZ_TRACK)
          + strlen(t->name)+1
          + t->size * (db->nreads+1);
       if (t->data != NULL)
@@ -854,14 +872,14 @@ int64 sizeof_DB(HITS_DB *db)
  *
  ********************************************************************************************/
 
-HITS_DB *Active_DB = NULL;  //  Last db/qv used by "Load_QVentry"
-HITS_QV *Active_QV;         //    Becomes invalid after closing
+DAZZ_DB *Active_DB = NULL;  //  Last db/qv used by "Load_QVentry"
+DAZZ_QV *Active_QV;         //    Becomes invalid after closing
 
-int Load_QVs(HITS_DB *db)
+int Load_QVs(DAZZ_DB *db)
 { FILE        *quiva, *istub, *indx;
   char        *root;
   uint16      *table;
-  HITS_QV     *qvtrk;
+  DAZZ_QV     *qvtrk;
   QVcoding    *coding, *nx;
   int          ncodes = 0;
 
@@ -956,7 +974,7 @@ int Load_QVs(HITS_DB *db)
             goto error;
           }
 
-        //  Carefully get the first coding scheme (its offset is most likely in a HITS_RECORD
+        //  Carefully get the first coding scheme (its offset is most likely in a DAZZ_RECORD
         //    in .idx that is *not* in memory).  Get all the other coding schemes normally and
         //    assign the tables # for each read in the block in "tables".
 
@@ -974,10 +992,10 @@ int Load_QVs(HITS_DB *db)
 
             i = n-fbeg;
             if (first < pfirst)
-              { HITS_READ read;
+              { DAZZ_READ read;
 
-                fseeko(indx,sizeof(HITS_DB) + sizeof(HITS_READ)*first,SEEK_SET);
-                if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+                fseeko(indx,sizeof(DAZZ_DB) + sizeof(DAZZ_READ)*first,SEEK_SET);
+                if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
                   { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
                     ncodes = i;
                     goto error;
@@ -1050,17 +1068,17 @@ int Load_QVs(HITS_DB *db)
           }
       }
 
-    //  Allocate and fill in the HITS_QV record and add it to the front of the
+    //  Allocate and fill in the DAZZ_QV record and add it to the front of the
     //    track list
 
-    qvtrk = (HITS_QV *) Malloc(sizeof(HITS_QV),"Allocating QV pseudo-track");
+    qvtrk = (DAZZ_QV *) Malloc(sizeof(DAZZ_QV),"Allocating QV pseudo-track");
     if (qvtrk == NULL)
       goto error;
     qvtrk->name   = Strdup(". at qvs","Allocating QV pseudo-track name");
     if (qvtrk->name == NULL)
       goto error;
     qvtrk->next   = db->tracks;
-    db->tracks    = (HITS_TRACK *) qvtrk;
+    db->tracks    = (DAZZ_TRACK *) qvtrk;
     qvtrk->ncodes = ncodes;
     qvtrk->table  = table;
     qvtrk->coding = coding;
@@ -1091,16 +1109,16 @@ error:
 
 // Close the QV stream, free the QV pseudo track and all associated memory
 
-void Close_QVs(HITS_DB *db)
-{ HITS_TRACK *track;
-  HITS_QV    *qvtrk;
+void Close_QVs(DAZZ_DB *db)
+{ DAZZ_TRACK *track;
+  DAZZ_QV    *qvtrk;
   int         i;
 
   Active_DB = NULL;
 
   track = db->tracks;
   if (track != NULL && strcmp(track->name,". at qvs") == 0)
-    { qvtrk = (HITS_QV *) track;
+    { qvtrk = (DAZZ_QV *) track;
       for (i = 0; i < qvtrk->ncodes; i++)
         Free_QVcoding(qvtrk->coding+i);
       free(qvtrk->coding);
@@ -1125,7 +1143,7 @@ void Close_QVs(HITS_DB *db)
 //    -1: Track is not the right size of DB either trimmed or untrimmed
 //    -2: Could not find the track 
 
-int Check_Track(HITS_DB *db, char *track, int *kind)
+int Check_Track(DAZZ_DB *db, char *track, int *kind)
 { FILE       *afile;
   int         tracklen, size, ispart;
   int         ureads, treads;
@@ -1181,10 +1199,10 @@ int Check_Track(HITS_DB *db, char *track, int *kind)
 
 // If track is not already in the db's track list, then allocate all the storage for it,
 //   read it in from the appropriate file, add it to the track list, and return a pointer
-//   to the newly created HITS_TRACK record.  If the track does not exist or cannot be
+//   to the newly created DAZZ_TRACK record.  If the track does not exist or cannot be
 //   opened for some reason, then NULL is returned.
 
-HITS_TRACK *Load_Track(HITS_DB *db, char *track)
+DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track)
 { FILE       *afile, *dfile;
   int         tracklen, size;
   int         nreads, ispart;
@@ -1192,7 +1210,7 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
   void       *anno;
   void       *data;
   char       *name;
-  HITS_TRACK *record;
+  DAZZ_TRACK *record;
 
   if (track[0] == '.')
     { EPRINTF(EPLACE,"%s: Track name, '%s', cannot begin with a .\n",Prog_Name,track);
@@ -1340,7 +1358,7 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
 
   fclose(afile);
 
-  record = (HITS_TRACK *) Malloc(sizeof(HITS_TRACK),"Allocating Track Record");
+  record = (DAZZ_TRACK *) Malloc(sizeof(DAZZ_TRACK),"Allocating Track Record");
   if (record == NULL)
     goto error;
   record->name = Strdup(track,"Allocating Track Name");
@@ -1379,8 +1397,161 @@ error:
   EXIT (NULL);
 }
 
-void Close_Track(HITS_DB *db, char *track)
-{ HITS_TRACK *record, *prev;
+// Assumming file pointer for afile is correctly positioned at the start of a extra item,
+//   and aname is the name of the .anno file, decode the value present and places it in
+//   extra if extra->nelem == 0, otherwise reduce the value just read into extra according 
+//   according the to the directive given by 'accum'.  Leave the read poinrt at the next
+//   extra or end-of-file.
+//   Returns:
+//      1 if at the end of file,
+//      0 if item was read and folded correctly,
+//     -1 if there was a system IO or allocation error (if interactive), and
+//     -2 if the new value could not be reduced into the currenct value of extra (interactive)
+
+int Read_Extra(FILE *afile, char *aname, DAZZ_EXTRA *extra)
+{ int   vtype, nelem, accum, slen;
+  char *name;
+  void *value;
+
+#define EREAD(v,s,n,file,ret)                                                           \
+  { if (fread(v,s,n,file) != (size_t) n)                                                \
+      { if (ferror(file))                                                               \
+          fprintf(stderr,"%s: System error, read failed!\n",Prog_Name);       		\
+        else if (ret)                                                                   \
+          return (1);									\
+        else										\
+          fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,aname);		\
+        EXIT(-1);									\
+      }                                                                                 \
+  }
+
+  EREAD(&vtype,sizeof(int),1,afile,1)
+  EREAD(&nelem,sizeof(int),1,afile,0)
+  EREAD(&accum,sizeof(int),1,afile,0)
+  EREAD(&slen,sizeof(int),1,afile,0)
+
+  if (extra == NULL)
+    { if (fseeko(afile,slen+8*nelem,SEEK_CUR) < 0)
+        { fprintf(stderr,"%s: System error, read failed!\n",Prog_Name);
+          EXIT(-1);
+        }
+      return (0);
+    }
+
+  name  = (char *) Malloc(slen+1,"Allocating extra name");
+  value = Malloc(8*nelem,"Allocating extra value");
+  if (name == NULL || value == NULL)
+    EXIT(-1);
+
+  EREAD(name,1,slen,afile,0);
+  EREAD(value,8,nelem,afile,0);
+  name[slen] = '\0';
+  
+  if (extra->nelem == 0)
+    { extra->vtype = vtype;
+      extra->nelem = nelem;
+      extra->accum = accum;
+      extra->name  = name;
+      extra->value = value;
+      return (0);
+    }
+
+  if (vtype != extra->vtype)
+    { fprintf(stderr,"%s: Type of extra %s does not agree with previous .anno block files\n",
+                     Prog_Name,name);
+      goto error;
+    }
+  if (nelem != extra->nelem)
+    { fprintf(stderr,"%s: Length of extra %s does not agree with previous .anno block files\n",
+                     Prog_Name,name);
+      goto error;
+    }
+  if (accum != extra->accum)
+    { fprintf(stderr,"%s: Reduction indicator of extra %s does not agree with",Prog_Name,name);
+      fprintf(stderr," previos .anno block files\n");
+      goto error;
+    }
+  if (strcmp(name,extra->name) != 0)
+    { fprintf(stderr,"%s: Expecting extra %s in .anno block file, not %s\n",
+                     Prog_Name,extra->name,name);
+      goto error;
+    }
+
+  if (vtype == DB_INT)
+    { int64 *ival = (int64 *) value;
+      int64 *eval = (int64 *) (extra->value);
+      int    j;
+
+      if (accum == DB_EXACT)
+        { for (j = 0; j < nelem; j++)
+            if (eval[j] != ival[j])
+              { fprintf(stderr,"%s: Value of extra %s doe not agree",Prog_Name,name);
+                fprintf(stderr," with previous .anno block files\n");
+                goto error;
+              }
+        }
+      else
+        { for (j = 0; j < nelem; j++)
+            eval[j] += ival[j];
+        }
+    }
+
+  else
+    { double *ival = (double *) value;
+      double *eval = (double *) (extra->value);
+      int     j;
+
+      if (accum == DB_EXACT)
+        { for (j = 0; j < nelem; j++)
+            if (eval[j] != ival[j])
+              { fprintf(stderr,"%s: Value of extra %s doe not agree",Prog_Name,name);
+                fprintf(stderr," with previous .anoo block files\n");
+                goto error;
+              }
+        }
+      else
+        { for (j = 0; j < nelem; j++)
+            eval[j] += ival[j];
+        }
+    }
+
+  free(value);
+  free(name);
+  return (0);
+
+error:
+  free(value);
+  free(name);
+  EXIT(1);
+}
+
+//  Write extra record to end of file afile and advance write pointer
+//  If interactive, then return non-zero on error, if bash, then print
+//  and halt if an error
+
+int Write_Extra(FILE *afile, DAZZ_EXTRA *extra)
+{ int slen; 
+
+#define EWRITE(v,s,n,file)                                                      \
+  { if (fwrite(v,s,n,file) != (size_t) n)                                       \
+      { fprintf(stderr,"%s: System error, read failed!\n",Prog_Name);           \
+        EXIT(1);								\
+      }									 	\
+  }
+
+  EWRITE(&(extra->vtype),sizeof(int),1,afile)
+  FWRITE(&(extra->nelem),sizeof(int),1,afile)
+  FWRITE(&(extra->accum),sizeof(int),1,afile)
+  slen = strlen(extra->name);
+  FWRITE(&slen,sizeof(int),1,afile)
+  FWRITE(extra->name,1,slen,afile)
+  FWRITE(extra->value,8,extra->nelem,afile)
+
+  return (0);
+}
+
+void Close_Track(DAZZ_DB *db, char *track)
+{ DAZZ_TRACK *record, *prev;
 
   prev = NULL;
   for (record = db->tracks; record != NULL; record = record->next)
@@ -1410,7 +1581,7 @@ void Close_Track(HITS_DB *db, char *track)
 // Allocate and return a buffer big enough for the largest read in 'db', leaving room
 //   for an initial delimiter character
 
-char *New_Read_Buffer(HITS_DB *db)
+char *New_Read_Buffer(DAZZ_DB *db)
 { char *read;
 
   read = (char *) Malloc(db->maxlen+4,"Allocating New Read Buffer");
@@ -1425,11 +1596,11 @@ char *New_Read_Buffer(HITS_DB *db)
 //
 // **NB**, the byte before read will be set to a delimiter character!
 
-int Load_Read(HITS_DB *db, int i, char *read, int ascii)
+int Load_Read(DAZZ_DB *db, int i, char *read, int ascii)
 { FILE      *bases  = (FILE *) db->bases;
   int64      off;
   int        len, clen;
-  HITS_READ *r = db->reads;
+  DAZZ_READ *r = db->reads;
 
   if (i >= db->nreads)
     { EPRINTF(EPLACE,"%s: Index out of bounds (Load_Read)\n",Prog_Name);
@@ -1472,14 +1643,14 @@ int Load_Read(HITS_DB *db, int i, char *read, int ascii)
 //   and as a numeric string otherwise.
 //
 
-HITS_DB *Arrow_DB = NULL;         //  Last db/arw used by "Load_Arrow"
+DAZZ_DB *Arrow_DB = NULL;         //  Last db/arw used by "Load_Arrow"
 FILE    *Arrow_File = NULL;       //    Becomes invalid after closing
 
-int Load_Arrow(HITS_DB *db, int i, char *read, int ascii)
+int Load_Arrow(DAZZ_DB *db, int i, char *read, int ascii)
 { FILE      *arrow;
   int64      off;
   int        len, clen;
-  HITS_READ *r = db->reads;
+  DAZZ_READ *r = db->reads;
 
   if (i >= db->nreads)
     { EPRINTF(EPLACE,"%s: Index out of bounds (Load_Arrow)\n",Prog_Name);
@@ -1519,12 +1690,12 @@ int Load_Arrow(HITS_DB *db, int i, char *read, int ascii)
   return (0);
 }
 
-char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii)
+char *Load_Subread(DAZZ_DB *db, int i, int beg, int end, char *read, int ascii)
 { FILE      *bases  = (FILE *) db->bases;
   int64      off;
   int        len, clen;
   int        bbeg, bend;
-  HITS_READ *r = db->reads;
+  DAZZ_READ *r = db->reads;
 
   if (i >= db->nreads)
     { EPRINTF(EPLACE,"%s: Index out of bounds (Load_Read)\n",Prog_Name);
@@ -1578,7 +1749,7 @@ char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii)
 
 // Allocate and return a buffer of 5 vectors big enough for the largest read in 'db'
 
-char **New_QV_Buffer(HITS_DB *db)
+char **New_QV_Buffer(DAZZ_DB *db)
 { char **entry;
   char  *qvs;
   int    i;
@@ -1595,8 +1766,8 @@ char **New_QV_Buffer(HITS_DB *db)
 // Load into entry the QV streams for the i'th read from db.  The parameter ascii applies to
 //  the DELTAG stream as described for Load_Read.
 
-int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii)
-{ HITS_READ *reads;
+int Load_QVentry(DAZZ_DB *db, int i, char **entry, int ascii)
+{ DAZZ_READ *reads;
   FILE      *quiva;
   int        rlen;
 
@@ -1605,7 +1776,7 @@ int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii)
         { EPRINTF(EPLACE,"%s: QV's are not loaded (Load_QVentry)\n",Prog_Name);
           EXIT(1);
         }
-      Active_QV = (HITS_QV *) db->tracks;
+      Active_QV = (DAZZ_QV *) db->tracks;
       Active_DB = db;
     }
   if (i >= db->nreads)
@@ -1655,10 +1826,10 @@ int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii)
 //   non-zero then the reads are converted to ACGT ascii, otherwise the reads are left
 //   as numeric strings over 0(A), 1(C), 2(G), and 3(T).
 
-int Read_All_Sequences(HITS_DB *db, int ascii)
+int Read_All_Sequences(DAZZ_DB *db, int ascii)
 { FILE      *bases;
   int        nreads = db->nreads;
-  HITS_READ *reads = db->reads;
+  DAZZ_READ *reads = db->reads;
   void     (*translate)(char *s);
 
   char  *seq;
@@ -1713,6 +1884,16 @@ int Read_All_Sequences(HITS_DB *db, int ascii)
   return (0);
 }
 
+// For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all
+//   those of the form "prefix/[.]root.part" and call actor with the complete path to each file
+//   pointed at by path, and the suffix of the path by extension.  The . proceeds the root
+//   name if the defined constant HIDE_FILES is set.  Always the first call is with the
+//   path "prefix/root.[db|dam]" and extension "db" or "dam".  There will always be calls for
+//   "prefix/[.]root.idx" and "prefix/[.]root.bps".  All other calls are for *tracks* and
+//   so this routine gives one a way to know all the tracks associated with a given DB.
+//   -1 is returned if the path could not be found, and 1 is returned if an error (reported
+//   to EPLACE) occured and INTERACTIVE is defined.  Otherwise a 0 is returned.
+
 int List_DB_Files(char *path, void actor(char *path, char *extension))
 { int            status, plen, rlen, dlen;
   char          *root, *pwd, *name;
@@ -1750,19 +1931,9 @@ int List_DB_Files(char *path, void actor(char *path, char *extension))
         { isdam = 1;
           break;
         }
-      if (strcasecmp(name,Catenate("","",root,".db")) == 0)
-        { strncpy(root,name,rlen);
-          break;
-        }
-      if (strcasecmp(name,Catenate("","",root,".dam")) == 0)
-        { strncpy(root,name,rlen);
-          isdam = 1;
-          break;
-        }
     }
   if (dp == NULL)
-    { EPRINTF(EPLACE,"%s: Cannot find %s (List_DB_Files)\n",Prog_Name,pwd);
-      status = -1;
+    { status = -1;
       closedir(dirp);
       goto error;
     }
diff --git a/DB.h b/DB.h
index dc281de..ff41e9f 100644
--- a/DB.h
+++ b/DB.h
@@ -12,9 +12,9 @@
  *
  ********************************************************************************************/
 
-#ifndef _HITS_DB
+#ifndef _DAZZ_DB
 
-#define _HITS_DB
+#define _DAZZ_DB
 
 #include <stdio.h>
 
@@ -74,11 +74,6 @@ extern char Ebuffer[];
 
 #endif
 
-#define SYSTEM_ERROR							\
-  { EPRINTF(EPLACE,"%s: System error, read failed!\n",Prog_Name);	\
-    exit (2);								\
-  }
-
 #define ARG_INIT(name)                  \
   Prog_Name = Strdup(name,"");          \
   for (i = 0; i < 128; i++)             \
@@ -125,6 +120,108 @@ extern char Ebuffer[];
       exit (1);                                                                         \
     }
 
+
+/*******************************************************************************************
+ *
+ *  GUARDED BATCH IO MACROS
+ *
+ ********************************************************************************************/
+
+    //  Utilitieis
+
+int Count_Args(char *arg);
+
+#define SYSTEM_READ_ERROR						\
+  { fprintf(stderr,"%s: System error, read failed!\n",Prog_Name);	\
+    exit (2);								\
+  }
+
+#define SYSTEM_WRITE_ERROR						\
+  { fprintf(stderr,"%s: System error, write failed!\n",Prog_Name);	\
+    exit (2);								\
+  }
+
+#define SYSTEM_CLOSE_ERROR						\
+  { fprintf(stderr,"%s: System error, file close failed!\n",Prog_Name);	\
+    exit (2);								\
+  }
+
+    //  Output
+
+#define FWRITE(v,s,n,file)			\
+  { if (fwrite(v,s,n,file) != (size_t) n)	\
+      SYSTEM_WRITE_ERROR			\
+  }
+
+#define FPRINTF(file,...)		\
+  { if (fprintf(file,__VA_ARGS__) < 0)	\
+      SYSTEM_WRITE_ERROR		\
+  }
+
+#define PRINTF(...)			\
+  { if (printf(__VA_ARGS__) < 0)	\
+      SYSTEM_WRITE_ERROR		\
+  }
+
+#define FPUTS(x,file)			\
+  { if (fputs(x,file) == EOF)		\
+      SYSTEM_WRITE_ERROR		\
+  }
+
+    //  Close
+
+#define FCLOSE(file)		\
+  { if (fclose(file) != 0)	\
+      SYSTEM_CLOSE_ERROR	\
+  }
+
+    //  Input
+
+#define FREAD(v,s,n,file)								\
+  { if (fread(v,s,n,file) != (size_t) n)						\
+      { if (ferror(file))								\
+          SYSTEM_READ_ERROR								\
+        else										\
+          { fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,file ## _name);	\
+            exit (1);									\
+          }										\
+      }											\
+  }
+
+#define FSCANF(file,...)								\
+  { if (fscanf(file,__VA_ARGS__) != Count_Args(#__VA_ARGS__)-1)				\
+      { if (ferror(file))								\
+          SYSTEM_READ_ERROR								\
+        else										\
+          { fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,file ## _name);	\
+            exit (1);									\
+          }										\
+      }											\
+  }
+
+#define FGETS(v,n,file)									\
+  { if (fgets(v,n,file) == NULL)							\
+      { if (ferror(file))								\
+          SYSTEM_READ_ERROR								\
+        else										\
+          { fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,file ## _name);	\
+            exit (1);									\
+          }										\
+      }											\
+  }
+
+#define FSEEKO(file,p,d)	\
+  { if (fseeko(file,p,d) < 0)	\
+      SYSTEM_READ_ERROR		\
+  }
+
+#define FTELLO(file)		\
+  ( { int x = ftello(file);	\
+      if (x < 0)		\
+        SYSTEM_READ_ERROR	\
+      ; x; 			\
+  } )
+
 /*******************************************************************************************
  *
  *  UTILITIES
@@ -193,7 +290,7 @@ typedef struct
                     //  Offset (in bytes) of scaffold header string in '.hdr' file (DAM)
                     //  4 compressed shorts containing snr info if an arrow DB.
     int     flags;  //  QV of read + flags above (DB only)
-  } HITS_READ;
+  } DAZZ_READ;
 
 //  A track can be of 3 types:
 //    data == NULL: there are nreads 'anno' records of size 'size'.
@@ -208,9 +305,31 @@ typedef struct _track
     int            size;  //  Size in bytes of anno records
     void          *anno;  //  over [0,nreads]: read i annotation: int, int64, or 'size' records 
     void          *data;  //     data[anno[i] .. anno[i+1]-1] is data if data != NULL
-  } HITS_TRACK;
+  } DAZZ_TRACK;
+
+//  The tailing part of a .anno track file can contain meta-information produced by the
+//    command that produced the track.  For example, the coverage, or good/bad parameters
+//    for trimming, or even say a histogram of QV values.  Each item is an array of 'nelem'
+//    64-bit ints or floats ('vtype' = DB_INT or DB_REAL), has a 'name' string that
+//    describes it, and an indicator as to whether the values should be equal accross all
+//    block tracks, or summed accross all block tracks (by Catrack).  'value' points at the
+//    array of values
+
+#define DB_INT  0
+#define DB_REAL 1
 
-//  The information for accessing QV streams is in a HITS_QV record that is a "pseudo-track"
+#define DB_EXACT 0
+#define DB_SUM   1
+
+typedef struct
+  { int   vtype;  //  INT64 or FLOAST64
+    int   nelem;  //  >= 1
+    int   accum;  //  EXACT, SUM
+    char *name;
+    void *value;
+  } DAZZ_EXTRA;
+
+//  The information for accessing QV streams is in a DAZZ_QV record that is a "pseudo-track"
 //    named ". at qvs" and is always the first track record in the list (if present).  Since normal
 //    track names cannot begin with a . (this is enforced), this pseudo-track is never confused
 //    with a normal track.
@@ -223,11 +342,11 @@ typedef struct
     uint16        *table;   //  for i in [0,db->nreads-1]: read i should be decompressed with
                             //    scheme coding[table[i]]
     FILE          *quiva;   //  the open file pointer to the .qvs file
-  } HITS_QV;
+  } DAZZ_QV;
 
 //  The DB record holds all information about the current state of an active DB including an
-//    array of HITS_READS, one per read, and a linked list of HITS_TRACKs the first of which
-//    is always a HITS_QV pseudo-track (if the QVs have been loaded).
+//    array of DAZZ_READS, one per read, and a linked list of DAZZ_TRACKs the first of which
+//    is always a DAZZ_QV pseudo-track (if the QVs have been loaded).
 
 typedef struct
   { int         ureads;     //  Total number of reads in untrimmed DB
@@ -257,9 +376,9 @@ typedef struct
     int         loaded;     //  Are reads loaded in memory?
     void       *bases;      //  file pointer for bases file (to fetch reads from),
                             //    or memory pointer to uncompressed block of all sequences.
-    HITS_READ  *reads;      //  Array [-1..nreads] of HITS_READ
-    HITS_TRACK *tracks;     //  Linked list of loaded tracks
-  } HITS_DB; 
+    DAZZ_READ  *reads;      //  Array [-1..nreads] of DAZZ_READ
+    DAZZ_TRACK *tracks;     //  Linked list of loaded tracks
+  } DAZZ_DB; 
 
 
 /*******************************************************************************************
@@ -294,7 +413,7 @@ typedef struct
   //          contain N-separated contigs), and .fpulse the first base of the contig in the
   //          fasta entry
 
-  // Open the given database or dam, "path" into the supplied HITS_DB record "db". If the name has
+  // Open the given database or dam, "path" into the supplied DAZZ_DB record "db". If the name has
   //   a part # in it then just the part is opened.  The index array is allocated (for all or
   //   just the part) and read in.
   // Return status of routine:
@@ -302,34 +421,34 @@ typedef struct
   //     0: Open of DB proceeded without mishap
   //     1: Open of DAM proceeded without mishap
 
-int Open_DB(char *path, HITS_DB *db);
+int Open_DB(char *path, DAZZ_DB *db);
 
   // Trim the DB or part thereof and all loaded tracks according to the cutoff and all settings
   //   of the current DB partition.  Reallocate smaller memory blocks for the information kept
   //   for the retained reads.
 
-void Trim_DB(HITS_DB *db);
+void Trim_DB(DAZZ_DB *db);
 
   // Shut down an open 'db' by freeing all associated space, including tracks and QV structures,
   //   and any open file pointers.  The record pointed at by db however remains (the user
   //   supplied it and so should free it).
 
-void Close_DB(HITS_DB *db);
+void Close_DB(DAZZ_DB *db);
 
   // Return the size in bytes of the given DB
 
-int64 sizeof_DB(HITS_DB *db);
+int64 sizeof_DB(DAZZ_DB *db);
 
   // If QV pseudo track is not already in db's track list, then load it and set it up.
   //   The database must not have been trimmed yet.  -1 is returned if a .qvs file is not
   //   present, and 1 is returned if an error (reported to EPLACE) occured and INTERACTIVE
   //   is defined.  Otherwise a 0 is returned.
 
-int Load_QVs(HITS_DB *db);
+int Load_QVs(DAZZ_DB *db);
 
   // Remove the QV pseudo track, all space associated with it, and close the .qvs file.
 
-void Close_QVs(HITS_DB *db);
+void Close_QVs(DAZZ_DB *db);
 
   // Look up the file and header in the file of the indicated track.  Return:
   //     1: Track is for trimmed DB
@@ -344,28 +463,47 @@ void Close_QVs(HITS_DB *db);
 #define CUSTOM_TRACK 0
 #define   MASK_TRACK 1
 
-int Check_Track(HITS_DB *db, char *track, int *kind);
+int Check_Track(DAZZ_DB *db, char *track, int *kind);
 
   // If track is not already in the db's track list, then allocate all the storage for it,
   //   read it in from the appropriate file, add it to the track list, and return a pointer
-  //   to the newly created HITS_TRACK record.  If the track does not exist or cannot be
+  //   to the newly created DAZZ_TRACK record.  If the track does not exist or cannot be
   //   opened for some reason, then NULL is returned if INTERACTIVE is defined.  Otherwise
   //   the routine prints an error message to stderr and exits if an error occurs, and returns
   //   with NULL only if the track does not exist.
 
-HITS_TRACK *Load_Track(HITS_DB *db, char *track);
+DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track);
+
+  // Assumming file pointer for afile is correctly positioned at the start of a extra item,
+  //   and aname is the name of the .anno file, decode the value present and places it in
+  //   extra if extra->nelem == 0, otherwise reduce the value just read into extra according
+  //   according the to the directive given by 'accum'.  Leave the read poinrt at the next
+  //   extra or end-of-file.
+  //   Returns:
+  //      1 if at the end of file,
+  //      0 if item was read and folded correctly,
+  //     -1 if there was a system IO or allocation error (if interactive), and
+  //     -2 if the new value could not be reduced into the currenct value of extra (interactive)
+
+int Read_Extra(FILE *afile, char *aname, DAZZ_EXTRA *extra);
+
+//  Write extra record to end of file afile and advance write pointer
+//  If interactive, then return non-zero on error, if bash, then print
+//  and halt if an error
+
+int Write_Extra(FILE *afile, DAZZ_EXTRA *extra);
 
   // If track is on the db's track list, then it is removed and all storage associated with it
   //   is freed.
 
-void Close_Track(HITS_DB *db, char *track);
+void Close_Track(DAZZ_DB *db, char *track);
 
   // Allocate and return a buffer big enough for the largest read in 'db'.
   // **NB** free(x-1) if x is the value returned as *prefix* and suffix '\0'(4)-byte
   // are needed by the alignment algorithms.  If cannot allocate memory then return NULL
   // if INTERACTIVE is defined, or print error to stderr and exit otherwise.
 
-char *New_Read_Buffer(HITS_DB *db);
+char *New_Read_Buffer(DAZZ_DB *db);
 
   // Load into 'read' the i'th read in 'db'.  As a lower case ascii string if ascii is 1, an
   //   upper case ascii string if ascii is 2, and a numeric string over 0(A), 1(C), 2(G), and 3(T)
@@ -373,12 +511,12 @@ char *New_Read_Buffer(HITS_DB *db);
   //   for traversals in either direction.  A non-zero value is returned if an error occured
   //   and INTERACTIVE is defined.
 
-int  Load_Read(HITS_DB *db, int i, char *read, int ascii);
+int  Load_Read(DAZZ_DB *db, int i, char *read, int ascii);
 
   // Exactly the same as Load_Read, save the arrow information is loaded, not the DNA sequence,
   //   and there is only a choice between numeric (0) or ascii (1);
 
-int  Load_Arrow(HITS_DB *db, int i, char *read, int ascii);
+int  Load_Arrow(DAZZ_DB *db, int i, char *read, int ascii);
 
   // Load into 'read' the subread [beg,end] of the i'th read in 'db' and return a pointer to the
   //   the start of the subinterval (not necessarily = to read !!! ).  As a lower case ascii
@@ -387,7 +525,7 @@ int  Load_Arrow(HITS_DB *db, int i, char *read, int ascii);
   //   the string holding the substring so it has a delimeter for traversals in either direction.
   //   A NULL pointer is returned if an error occured and INTERACTIVE is defined.
 
-char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii);
+char *Load_Subread(DAZZ_DB *db, int i, int beg, int end, char *read, int ascii);
 
   // Allocate a set of 5 vectors large enough to hold the longest QV stream that will occur
   //   in the database.  If cannot allocate memory then return NULL if INTERACTIVE is defined,
@@ -399,13 +537,13 @@ char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii);
 #define SUB_QV  3   //  The substitution QVs
 #define MRG_QV  4   //  The merge QVs
 
-char **New_QV_Buffer(HITS_DB *db);
+char **New_QV_Buffer(DAZZ_DB *db);
 
   // Load into 'entry' the 5 QV vectors for i'th read in 'db'.  The deletion tag or characters
   //   are converted to a numeric or upper/lower case ascii string as per ascii.  Return with
   //   a zero, except when an error occurs and INTERACTIVE is defined in which case return wtih 1.
 
-int   Load_QVentry(HITS_DB *db, int i, char **entry, int ascii);
+int   Load_QVentry(DAZZ_DB *db, int i, char **entry, int ascii);
 
   // Allocate a block big enough for all the uncompressed sequences, read them into it,
   //   reset the 'off' in each read record to be its in-memory offset, and set the
@@ -415,7 +553,7 @@ int   Load_QVentry(HITS_DB *db, int i, char **entry, int ascii);
   //   Return with a zero, except when an error occurs and INTERACTIVE is defined in which
   //   case return wtih 1.
 
-int Read_All_Sequences(HITS_DB *db, int ascii);
+int Read_All_Sequences(DAZZ_DB *db, int ascii);
 
   // For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all
   //   those of the form "prefix/[.]root.part" and call actor with the complete path to each file
@@ -429,4 +567,4 @@ int Read_All_Sequences(HITS_DB *db, int ascii);
 
 int List_DB_Files(char *path, void actor(char *path, char *extension));
 
-#endif // _HITS_DB
+#endif // _DAZZ_DB
diff --git a/REPqv.c b/REPqv.c
index 2b51f4a..f183e7b 100644
--- a/REPqv.c
+++ b/REPqv.c
@@ -25,7 +25,7 @@ static char *Usage = "<source:db> ...";
 #define  MAXQV   50     //  Max QV score is 50
 #define  MAXQV1  51
 
-static HITS_DB _DB, *DB  = &_DB;   //  Data base
+static DAZZ_DB _DB, *DB  = &_DB;   //  Data base
 
 static int64  *QV_IDX;     //  qual track index
 static uint8  *QV;         //  qual track values
@@ -48,7 +48,7 @@ int main(int argc, char *argv[])
     { int         status;
       char       *root;
       int         i, bval, gval, cover, hgap_min;
-      HITS_TRACK *track;
+      DAZZ_TRACK *track;
       int64       nreads, totlen;
       int64       qgram[MAXQV1];
       int64       qsum, qtotal;
diff --git a/REPtrim.c b/REPtrim.c
index 5e74674..831c677 100644
--- a/REPtrim.c
+++ b/REPtrim.c
@@ -30,10 +30,9 @@ static char *Usage = "<source:db> ...";
 #define ADSUF 4   //  Gap is due to adaptemer, trim suffix interval to right
 
 
-
 //  Global Variables (must exist across the processing of each pile)
 
-static HITS_DB _DB, *DB  = &_DB;   //  Data base
+static DAZZ_DB _DB, *DB  = &_DB;   //  Data base
 
 static int64  *TRIM_IDX;   //  trim track index
 static int    *TRIM;       //  trim track values
@@ -57,7 +56,7 @@ int main(int argc, char *argv[])
       char       *root;
       int         i, a, tb, te;
       int         alen;
-      HITS_TRACK *track;
+      DAZZ_TRACK *track;
       int64       nreads, totlen;
       int64       nelim, nelimbp;
       int64       n5trm, n5trmbp;
@@ -67,7 +66,8 @@ int main(int argc, char *argv[])
       int64       nlowq, nlowqbp;
       int64       nspan, nspanbp;
       int64       nchim, nchimbp;
-      int         BAD_QV, GOOD_QV, COVERAGE, HGAP_MIN;
+      int         rlog, blog;
+      int         BAD_QV, GOOD_QV, HGAP_MIN;
 
       status = Open_DB(argv[c],DB);
       if (status < 0)
@@ -104,25 +104,17 @@ int main(int argc, char *argv[])
           if (extra == 4*sizeof(int))
             { fread(&GOOD_QV,sizeof(int),1,afile);
               fread(&BAD_QV,sizeof(int),1,afile);
-              fread(&COVERAGE,sizeof(int),1,afile);
+              fread(&HGAP_MIN,sizeof(int),1,afile);
               fread(&HGAP_MIN,sizeof(int),1,afile);
             }
-          else if (extra == 3*sizeof(int))
-            { fread(&GOOD_QV,sizeof(int),1,afile);
-              fread(&BAD_QV,sizeof(int),1,afile);
-              fread(&COVERAGE,sizeof(int),1,afile);
-              HGAP_MIN = 0;
-            }
-          else if (extra == 2*sizeof(int))
+          else if (extra == 3*sizeof(int) || extra == 2*sizeof(int))
             { fread(&GOOD_QV,sizeof(int),1,afile);
               fread(&BAD_QV,sizeof(int),1,afile);
-              COVERAGE = -1;
               HGAP_MIN = 0;
             }
           else
             { GOOD_QV  = -1;
               BAD_QV   = -1;
-              COVERAGE = -1;
               HGAP_MIN = 0;
             }
           fclose(afile);
@@ -216,10 +208,35 @@ int main(int argc, char *argv[])
         printf(" [-H%d]",HGAP_MIN);
       printf(" %s\n\n",root);
 
+      { int64 mult;
+
+        rlog = 0;
+        mult = 1;
+        while (mult <= nreads || mult <= ngaps)
+          { mult *= 10;
+            rlog += 1;
+          }
+        if (rlog <= 3)
+          rlog = 3;
+        else
+          rlog += (rlog-1)/3;
+  
+        blog = 0;
+        mult = 1;
+        while (mult <= totlen)
+          { mult *= 10;
+            blog += 1;
+          }
+        if (blog <= 3)
+          blog = 3;
+        else
+          blog += (blog-1)/3;
+      }
+
       printf("  Input:    ");
-      Print_Number((int64) nreads,7,stdout);
+      Print_Number((int64) nreads,rlog,stdout);
       printf(" (100.0%%) reads     ");
-      Print_Number(totlen,12,stdout);
+      Print_Number(totlen,blog,stdout);
       printf(" (100.0%%) bases");
       if (HGAP_MIN > 0)
         { printf(" (another ");
@@ -229,69 +246,69 @@ int main(int argc, char *argv[])
       printf("\n");
 
       printf("  Trimmed:  ");
-      Print_Number(nelim,7,stdout);
+      Print_Number(nelim,rlog,stdout);
       printf(" (%5.1f%%) reads     ",(100.*nelim)/nreads);
-      Print_Number(nelimbp,12,stdout);
+      Print_Number(nelimbp,blog,stdout);
       printf(" (%5.1f%%) bases\n",(100.*nelimbp)/totlen);
 
       printf("  5' trim:  ");
-      Print_Number(n5trm,7,stdout);
+      Print_Number(n5trm,rlog,stdout);
       printf(" (%5.1f%%) reads     ",(100.*n5trm)/nreads);
-      Print_Number(n5trmbp,12,stdout);
+      Print_Number(n5trmbp,blog,stdout);
       printf(" (%5.1f%%) bases\n",(100.*n5trmbp)/totlen);
 
       printf("  3' trim:  ");
-      Print_Number(n3trm,7,stdout);
+      Print_Number(n3trm,rlog,stdout);
       printf(" (%5.1f%%) reads     ",(100.*n3trm)/nreads);
-      Print_Number(n3trmbp,12,stdout);
+      Print_Number(n3trmbp,blog,stdout);
       printf(" (%5.1f%%) bases\n",(100.*n3trmbp)/totlen);
 
       if (natrm > 0)
         { printf("  Adapter:  ");
-          Print_Number(natrm,7,stdout);
+          Print_Number(natrm,rlog,stdout);
           printf(" (%5.1f%%) reads     ",(100.*natrm)/nreads);
-          Print_Number(natrmbp,12,stdout);
+          Print_Number(natrmbp,blog,stdout);
           printf(" (%5.1f%%) bases\n",(100.*natrmbp)/totlen);
         }
 
       printf("\n");
 
       printf("  Gaps:     ");
-      Print_Number(ngaps,7,stdout);
+      Print_Number(ngaps,rlog,stdout);
       printf(" (%5.1f%%) gaps      ",(100.*(ngaps))/nreads);
-      Print_Number(ngapsbp,12,stdout);
+      Print_Number(ngapsbp,blog,stdout);
       printf(" (%5.1f%%) bases\n",(100.*(ngapsbp))/totlen);
 
       printf("    Low QV: ");
-      Print_Number(nlowq,7,stdout);
+      Print_Number(nlowq,rlog,stdout);
       printf(" (%5.1f%%) gaps      ",(100.*(nlowq))/nreads);
-      Print_Number(nlowqbp,12,stdout);
+      Print_Number(nlowqbp,blog,stdout);
       printf(" (%5.1f%%) bases\n",(100.*(nlowqbp))/totlen);
 
       printf("    Span'd: ");
-      Print_Number(nspan,7,stdout);
+      Print_Number(nspan,rlog,stdout);
       printf(" (%5.1f%%) gaps      ",(100.*(nspan))/nreads);
-      Print_Number(nspanbp,12,stdout);
+      Print_Number(nspanbp,blog,stdout);
       printf(" (%5.1f%%) bases\n",(100.*(nspanbp))/totlen);
 
       printf("    Break:  ");
-      Print_Number(nchim,7,stdout);
+      Print_Number(nchim,rlog,stdout);
       printf(" (%5.1f%%) gaps      ",(100.*(nchim))/nreads);
-      Print_Number(nchimbp,12,stdout);
+      Print_Number(nchimbp,blog,stdout);
       printf(" (%5.1f%%) bases\n",(100.*(nchimbp))/totlen);
 
       printf("\n");
 
       printf("  Clipped:  ");
-      Print_Number(n5trm+n3trm+nelim+nchim,7,stdout);
+      Print_Number(n5trm+n3trm+nelim+nchim,rlog,stdout);
       printf(" clips              ");
-      Print_Number(n5trmbp+n3trmbp+nelimbp+nchimbp,12,stdout);
+      Print_Number(n5trmbp+n3trmbp+nelimbp+nchimbp,blog,stdout);
       printf(" (%5.1f%%) bases\n",(100.*(n5trmbp+n3trmbp+nelimbp+nchimbp))/totlen);
 
       printf("  Patched:  ");
-      Print_Number(nlowq+nspan,7,stdout);
+      Print_Number(nlowq+nspan,rlog,stdout);
       printf(" patches            ");
-      Print_Number(nlowqbp+nspanbp,12,stdout);
+      Print_Number(nlowqbp+nspanbp,blog,stdout);
       printf(" (%5.1f%%) bases\n",(100.*(nlowqbp+nspanbp))/totlen);
 
       free(root);
diff --git a/align.c b/align.c
index d4fdfe8..e408eb9 100644
--- a/align.c
+++ b/align.c
@@ -161,6 +161,7 @@ static double Bias_Factor[10] = { .690, .690, .690, .690, .780,
 typedef struct
   { double ave_corr;
     int    trace_space;
+    int    reach;
     float  freq[4];
     int    ave_path;
     int16 *score;
@@ -196,7 +197,7 @@ static void set_table(int bit, int prefix, int score, int max, Table_Bits *parms
 
 /* Create an alignment specification record including path tip tables & values */
 
-Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq)
+Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq, int reach)
 { _Align_Spec *spec;
   Table_Bits   parms;
   double       match;
@@ -208,6 +209,7 @@ Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq)
 
   spec->ave_corr    = ave_corr;
   spec->trace_space = trace_space;
+  spec->reach       = reach;
   spec->freq[0]     = freq[0];
   spec->freq[1]     = freq[1];
   spec->freq[2]     = freq[2];
@@ -257,6 +259,9 @@ int Trace_Spacing(Align_Spec *espec)
 float *Base_Frequencies(Align_Spec *espec)
 { return (((_Align_Spec *) espec)->freq); }
 
+int Overlap_If_Possible(Align_Spec *espec)
+{ return (((_Align_Spec *) espec)->reach); }
+
 
 /****************************************************************************************\
 *                                                                                        *
@@ -343,6 +348,7 @@ static int forward_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
 
   int     TRACE_SPACE = spec->trace_space;
   int     PATH_AVE    = spec->ave_path;
+  int     REACH       = spec->reach;
   int16  *SCORE       = spec->score;
   int16  *TABLE       = spec->table;
 
@@ -874,7 +880,7 @@ static int forward_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
     int     a, b, k, h;
     int     d, e;
 
-    if (morem >= 0)
+    if (morem >= 0 && REACH)
       { trimx  = morea-morey;
         trimy  = morey;
         trimd  = mored;
@@ -1004,6 +1010,7 @@ static int reverse_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
 
   int     TRACE_SPACE = spec->trace_space;
   int     PATH_AVE    = spec->ave_path;
+  int     REACH       = spec->reach;
   int16  *SCORE       = spec->score;
   int16  *TABLE       = spec->table;
 
@@ -1527,7 +1534,7 @@ static int reverse_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
     int     a, b, k, h;
     int     d, e;
 
-    if (morem >= 0)
+    if (morem >= 0 && REACH)
       { trimx  = morea-morey;
         trimy  = morey;
         trimd  = mored;
@@ -4194,7 +4201,10 @@ static int ToA[4] = { 'a', 'c', 'g', 't' };
 
 #endif
 
-static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
+static char *TP_Align =
+         "Bad alignment between trace points (Compute_Trace), source DB likely incorrect";
+
+static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode, int dmax)
 { int  **PVF = wave->PVF; 
   int  **PHF = wave->PHF;
   int    D;
@@ -4226,17 +4236,21 @@ static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
         hgh = 0;
       }
 
-    posl = -INT32_MAX;
-    posh =  INT32_MAX;
+    posl = -dmax;
+    posh =  dmax;
     if (wave->Aabs == wave->Babs)
       { if (B == A)
-          { EPRINTF(EPLACE,"Error: self comparison starts on diagonal 0 (Compute_Trace)\n");
+          { EPRINTF(EPLACE,"%s: self comparison starts on diagonal 0 (Compute_Trace)\n",Prog_Name);
             EXIT(-1);
           }
         else if (B < A)
-          posl = (B-A)+1;
+          { if ((B-A)+1 > posl)
+              posl = (B-A)+1;
+          }
         else
-          posh = (B-A)-1;
+          { if ((B-A)-1 < posh)
+              posh = (B-A)-1;
+          }
       }
 
     F1 = PVF[-2];
@@ -4254,6 +4268,11 @@ static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
         int   am, ac, ap;
         char *a;
 
+        if (D > dmax)
+          { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Align);
+            EXIT(-1);
+          }
+
         F2 = F1;
         F1 = F0;
         F0 = PVF[D];
@@ -4523,7 +4542,7 @@ static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
   return (D + abs(del));
 }
 
-static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
+static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode, int dmax)
 { int  **PVF = wave->PVF; 
   int  **PHF = wave->PHF;
   int    D;
@@ -4555,17 +4574,21 @@ static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode
         hgh = 0;
       }
 
-    posl = -INT32_MAX;
-    posh =  INT32_MAX;
+    posl = -dmax;
+    posh =  dmax;
     if (wave->Aabs == wave->Babs)
       { if (B == A)
-          { EPRINTF(EPLACE,"Error: self comparison starts on diagonal 0 (Compute_Trace)\n");
+          { EPRINTF(EPLACE,"%s: self comparison starts on diagonal 0 (Compute_Trace)\n",Prog_Name);
             EXIT(1);
           }
         else if (B < A)
-          posl = (B-A)+1;
+          { if ((B-A)+1 > posl)
+              posl = (B-A)+1;
+          }
         else
-          posh = (B-A)-1;
+          { if ((B-A)-1 < posh)
+              posh = (B-A)-1;
+          }
       }
 
     F1 = PVF[-2];
@@ -4583,6 +4606,11 @@ static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode
         int   am, ac, ap;
         char *a;
 
+        if (D > dmax)
+          { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Align);
+            EXIT(-1);
+          }
+
         F2 = F1;
         F1 = F0;
         F0 = PVF[D];
@@ -4795,14 +4823,20 @@ static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode
 *                                                                                        *
 \****************************************************************************************/
 
+static char *TP_Error = "Trace point out of bounds (Compute_Trace), source DB likely incorrect";
+
 int Compute_Trace_ALL(Alignment *align, Work_Data *ework)
 { _Work_Data *work = (_Work_Data *) ework;
   Trace_Waves wave;
 
   Path *path;
   char *aseq, *bseq;
+  int   alen, blen;
   int   M, N, D;
+  int   dmax;
 
+  alen   = align->alen;
+  blen   = align->blen;
   path = align->path;
   aseq = align->aseq;
   bseq = align->bseq;
@@ -4812,7 +4846,6 @@ int Compute_Trace_ALL(Alignment *align, Work_Data *ework)
   
   { int64 s;
     int   d;
-    int   dmax;
     int   **PVF, **PHF;
 
     if (M < N)
@@ -4851,7 +4884,11 @@ int Compute_Trace_ALL(Alignment *align, Work_Data *ework)
   wave.Aabs = aseq;
   wave.Babs = bseq;
 
-  D = iter_np(aseq+path->abpos,M,bseq+path->bbpos,N,&wave,GREEDIEST);
+  if (path->aepos > alen || path->bepos > blen)
+    { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+      EXIT(1);
+    }
+  D = iter_np(aseq+path->abpos,M,bseq+path->bbpos,N,&wave,GREEDIEST,dmax);
   if (D < 0)
     EXIT(1);
   path->diffs = D;
@@ -4867,12 +4904,15 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing, int
 
   Path   *path;
   char   *aseq, *bseq;
+  int     alen, blen;
   uint16 *points;
   int     tlen;
   int     ab, bb;
   int     ae, be;
-  int     diffs;
+  int     diffs, dmax;
 
+  alen   = align->alen;
+  blen   = align->blen;
   path   = align->path;
   aseq   = align->aseq;
   bseq   = align->bseq;
@@ -4882,7 +4922,7 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing, int
   { int64 s;
     int   d;
     int   M, N;
-    int   dmax, nmax;
+    int   nmax;
     int   **PVF, **PHF;
 
     M = path->aepos-path->abpos;
@@ -4938,7 +4978,11 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing, int
     for (i = 1; i < tlen; i += 2)
       { ae = ae + trace_spacing;
         be = bb + points[i];
-        d  = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode);
+        if (ae > alen || be > blen)
+          { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+            EXIT(1);
+          }
+        d  = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode,dmax);
         if (d < 0)
           EXIT(1);
         diffs += d;
@@ -4947,7 +4991,11 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing, int
       }
     ae = path->aepos;
     be = path->bepos;
-    d  = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode);
+    if (ae > alen || be > blen)
+      { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+        EXIT(1);
+      }
+    d  = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode,dmax);
     if (d < 0)
       EXIT(1);
     diffs += d;
@@ -4966,12 +5014,15 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing, int
 
   Path   *path;
   char   *aseq, *bseq;
+  int     alen, blen;
   uint16 *points;
   int     tlen;
   int     ab, bb;
   int     ae, be;
-  int     diffs;
+  int     diffs, dmax;
 
+  alen   = align->alen;
+  blen   = align->blen;
   path   = align->path;
   aseq   = align->aseq;
   bseq   = align->bseq;
@@ -4981,7 +5032,7 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing, int
   { int64 s;
     int   d;
     int   M, N;
-    int   dmax, nmax;
+    int   nmax;
     int   **PVF, **PHF;
 
     M = path->aepos-path->abpos;
@@ -5039,11 +5090,15 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing, int
     for (i = 1; i < tlen; i += 2) 
       { ae = ae + trace_spacing;
         be = bb + points[i];
-        if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode))
+        if (ae > alen || be > blen)
+          { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+            EXIT(1);
+          }
+        if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode,dmax))
           EXIT(1);
         af = wave.mida;
         bf = wave.midb;
-        d  = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave,mode);
+        d  = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave,mode,dmax);
         if (d < 0)
           EXIT(1);
         diffs += d;
@@ -5056,18 +5111,22 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing, int
     ae = path->aepos;
     be = path->bepos;
 
-    if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode))
+    if (ae > alen || be > blen)
+      { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+        EXIT(1);
+      }
+    if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode,dmax))
       EXIT(1);
     af = wave.mida;
     bf = wave.midb;
-    d  = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave,mode);
+    d  = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave,mode,dmax);
     if (d < 0)
       EXIT(1);
     diffs += d;
     as = af;
     bs = bf;
     
-    d += iter_np(aseq+af,ae-as,bseq+bf,be-bs,&wave,mode);
+    d += iter_np(aseq+af,ae-as,bseq+bf,be-bs,&wave,mode,dmax);
     if (d < 0)
       EXIT(1);
     diffs += d;
@@ -5086,12 +5145,15 @@ int Compute_Trace_IRR(Alignment *align, Work_Data *ework, int mode)
 
   Path   *path;
   char   *aseq, *bseq;
+  int     alen, blen;
   uint16 *points;
   int     tlen;
   int     ab, bb;
   int     ae, be;
-  int     diffs;
+  int     diffs, dmax;
 
+  alen   = align->alen;
+  blen   = align->blen;
   path   = align->path;
   aseq   = align->aseq;
   bseq   = align->bseq;
@@ -5101,7 +5163,7 @@ int Compute_Trace_IRR(Alignment *align, Work_Data *ework, int mode)
   { int64 s;
     int   d;
     int   M, N;
-    int   mmax, nmax, dmax;
+    int   mmax, nmax;
     int   **PVF, **PHF;
 
     M = path->aepos-path->abpos;
@@ -5160,7 +5222,11 @@ int Compute_Trace_IRR(Alignment *align, Work_Data *ework, int mode)
     for (i = 0; i < tlen; i += 2)
       { ae = ab + points[i];
         be = bb + points[i+1];
-        d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode);
+        if (ae > alen || be > blen)
+          { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+            EXIT(1);
+          }
+        d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode,dmax);
         if (d < 0)
           EXIT(1);
         diffs += d;
diff --git a/align.h b/align.h
index daa9151..d5f49a9 100644
--- a/align.h
+++ b/align.h
@@ -138,6 +138,10 @@ typedef struct
 #define CHAIN_NEXT(x)   ((x) & NEXT_FLAG)
 #define BEST_CHAIN(x)   ((x) & BEST_FLAG)
 
+#define ELIM_FLAG  0x20  //  This LA should be ignored
+
+#define ELIM(x)  ((x) & ELIM_FLAG)
+
 typedef struct
   { Path   *path;
     uint32  flags;        /* Pipeline status and complementation flags          */
@@ -173,7 +177,9 @@ void Complement_Seq(char *a, int n);
                     description of 'trace' for Paths above)
      freq[4]:     a 4-element vector where afreq[0] = frequency of A, f(A), freq[1] = f(C),
                     freq[2] = f(G), and freq[3] = f(T).  This vector is part of the header
-                    of every HITS database (see db.h).
+                    of every DAZZ database (see db.h).
+     reach:       a boolean, if set alignment extend to the boundary when reasonable, otherwise
+                    the terminate only at suffix-positive points.
 
      If an alignment cannot reach the boundary of the d.p. matrix with this condition (i.e.
      overlap), then the last/first 30 columns of the alignment are guaranteed to be
@@ -187,13 +193,14 @@ void Complement_Seq(char *a, int n);
 
   typedef void Align_Spec;
 
-  Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq);
+  Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq, int reach);
 
   void        Free_Align_Spec(Align_Spec *spec);
 
   int    Trace_Spacing      (Align_Spec *spec);
   double Average_Correlation(Align_Spec *spec);
   float *Base_Frequencies   (Align_Spec *spec);
+  int    Overlap_If_Possible(Align_Spec *spec);
 
   /* Local_Alignment finds the longest significant local alignment between the sequences in
      'align' subject to:
@@ -303,7 +310,7 @@ void Complement_Seq(char *a, int n);
 /*** OVERLAP ABSTRACTION:
 
      Externally, between modules an Alignment is modeled by an "Overlap" record, which
-     (a) replaces the pointers to the two sequences with their ID's in the HITS data bases,
+     (a) replaces the pointers to the two sequences with their ID's in the DAZZ data bases,
      (b) does not contain the length of the 2 sequences (must fetch from DB), and
      (c) contains its path as a subrecord rather than as a pointer (indeed, typically the
      corresponding Alignment record points at the Overlap's path sub-record).  The trace pointer

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/dascrubber.git



More information about the debian-med-commit mailing list