[med-svn] [dazzdb] 01/10: Imported Upstream version 1.0+20160930

Afif Elghraoui afif at moszumanska.debian.org
Wed Oct 12 09:04:23 UTC 2016


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository dazzdb.

commit f9c46a18a31b3ee59aa4e76b12d60981c2dd8a26
Author: Afif Elghraoui <afif at debian.org>
Date:   Tue Oct 11 21:24:32 2016 -0700

    Imported Upstream version 1.0+20160930
---
 Catrack.c                |  57 ++---
 DAM2fasta.c              |  60 ++----
 DB.c                     | 284 ++++++++++++++++++------
 DB.h                     |  78 +++----
 DB2fasta.c               | 108 +++++-----
 DB2quiva.c               |  97 ++++-----
 DBshow.c => DBdump.c     | 504 +++++++++++++++++++++++++++++--------------
 DBdust.c                 |  39 +---
 DBrm.c                   |  37 ----
 DBshow.c                 |  55 +----
 DBsplit.c                |  68 ++----
 DBstats.c                | 178 +++++++--------
 DBupgrade.Dec.31.2014.c  | 115 ----------
 DBupgrade.Sep.25.2014.c  | 125 -----------
 DUSTupgrade.Jan.1.2015.c | 117 ----------
 LICENSE                  |  34 +++
 Makefile                 |  28 ++-
 QV.c                     |  93 ++++----
 QV.h                     |  55 ++---
 README                   | 442 --------------------------------------
 README.md                | 501 +++++++++++++++++++++++++++++++++++++++++++
 fasta2DAM.c              | 477 ++++++++++++++++++++++++++++++-----------
 fasta2DB.c               | 290 +++++++++++++++----------
 quiva2DB.c               | 546 +++++++++++++++++++++++++++++------------------
 rangen.c                 | 182 ++++++++++++++++
 simulator.c              | 473 ++++++++++++++++++++++++----------------
 26 files changed, 2771 insertions(+), 2272 deletions(-)

diff --git a/Catrack.c b/Catrack.c
index 8df7e3e..14d9b64 100644
--- a/Catrack.c
+++ b/Catrack.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /********************************************************************************************
  *
  *  Concate in block order all "block tracks" <DB>.<track>.# into a single track
@@ -126,6 +89,7 @@ int main(int argc, char *argv[])
     int   nfiles;
     char  data[1024];
     void *anno;
+    FILE *lfile = NULL;
 
     anno     = NULL;
     trackoff = 0;
@@ -135,7 +99,7 @@ int main(int argc, char *argv[])
 
     nfiles = 0;
     while (1)
-      { FILE *afile, *dfile;
+      { FILE *dfile, *afile;
         int   i, size, tracklen;
 
         afile = fopen(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[2],".","anno")),"r");
@@ -143,6 +107,10 @@ int main(int argc, char *argv[])
           break;
         dfile = fopen(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[2],".","data")),"r");
 
+        if (nfiles > 0)
+          fclose(lfile);
+        lfile = afile;
+
         if (VERBOSE)
           { fprintf(stderr,"Concatenating %s%d.%s ...\n",prefix,nfiles+1,argv[2]);
             fflush(stderr);
@@ -247,7 +215,6 @@ int main(int argc, char *argv[])
         tracktot += tracklen;
         nfiles   += 1;
         if (dfile != NULL) fclose(dfile);
-        fclose(afile);
       }
   
     if (nfiles == 0)
@@ -256,7 +223,9 @@ int main(int argc, char *argv[])
         goto error;
       }
     else
-      { if (dout != NULL)
+      { char *byte;
+
+        if (dout != NULL)
           { if (tracksiz == 4)
               { int anno4 = trackoff;
                 fwrite(&anno4,sizeof(int),1,aout);
@@ -266,10 +235,10 @@ int main(int argc, char *argv[])
                 fwrite(&anno8,sizeof(int64),1,aout);
               }
           }
-        else
-          { fwrite(anno,tracksiz,1,aout);
-            free(anno);
-          }
+        while (fread(&byte,1,1,lfile) == 1)
+          fwrite(&byte,1,1,aout);
+        fclose(lfile);
+
         rewind(aout);
         fwrite(&tracktot,sizeof(int),1,aout);
         fwrite(&tracksiz,sizeof(int),1,aout);
diff --git a/DAM2fasta.c b/DAM2fasta.c
index 57ca2f7..9c6ffff 100644
--- a/DAM2fasta.c
+++ b/DAM2fasta.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /********************************************************************************************
  *
  *  Recreate all the .fasta files that are in a specified DAM.
@@ -160,12 +123,22 @@ int main(int argc, char *argv[])
         if (fscanf(dbfile,DB_FDATA,&last,fname,prolog) != 3)
           SYSTEM_ERROR
 
-        if ((ofile = Fopen(Catenate(".","/",fname,".fasta"),"w")) == NULL)
-          exit (1);
+        if (strcmp(fname,"stdout") == 0)
+          { ofile  = stdout;
+
+            if (VERBOSE)
+              { fprintf(stderr,"Sending %d contigs to stdout ...\n",last-first);
+                fflush(stdout);
+              }
+          }
+        else
+          { if ((ofile = Fopen(Catenate(".","/",fname,".fasta"),"w")) == NULL)
+              exit (1);
 
-        if (VERBOSE)
-          { fprintf(stderr,"Creating %s.fasta ...\n",fname);
-            fflush(stdout);
+            if (VERBOSE)
+              { fprintf(stderr,"Creating %s.fasta ...\n",fname);
+                fflush(stdout);
+              }
           }
 
         //   For the relevant range of reads, write each to the file
@@ -225,6 +198,9 @@ int main(int argc, char *argv[])
         if (wpos > 0)
           fprintf(ofile,"\n");
 
+        if (ofile != stdout)
+          fclose(ofile);
+
         first = last;
       }
   }
diff --git a/DB.c b/DB.c
index 27b202e..3764842 100644
--- a/DB.c
+++ b/DB.c
@@ -1,47 +1,9 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Compressed data base module.  Auxiliary routines to open and manipulate a data base for
  *    which the sequence and read information are separated into two separate files, and the
  *    sequence is compressed into 2-bits for each base.  Support for tracks of additional
- *    information, and trimming according to the current partition.  Eventually will also
- *    support compressed quality information.
+ *    information, and trimming according to the current partition.
  *
  *  Author :  Gene Myers
  *  Date   :  July 2013
@@ -92,7 +54,9 @@ void *Malloc(int64 size, char *mesg)
 }
 
 void *Realloc(void *p, int64 size, char *mesg)
-{ if ((p = realloc(p,size)) == NULL)
+{ if (size <= 0)
+    size = 1;
+  if ((p = realloc(p,size)) == NULL)
     { if (mesg == NULL)
         EPRINTF(EPLACE,"%s: Out of memory\n",Prog_Name);
       else
@@ -509,10 +473,12 @@ int Open_DB(char* path, HITS_DB *db)
   nreads = ulast-ufirst;
   if (part <= 0)
     { db->reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index");
+      if (db->reads == NULL)
+        goto error2;
       db->reads += 1;
       if (fread(db->reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
         { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
-          free(db->reads);
+          free(db->reads-1);
           goto error2;
         }
     }
@@ -521,13 +487,15 @@ int Open_DB(char* path, HITS_DB *db)
       int        i, r, maxlen;
       int64      totlen;
 
-      reads  = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index");
+      reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index");
+      if (reads == NULL)
+        goto error2;
       reads += 1;
 
       fseeko(index,sizeof(HITS_READ)*ufirst,SEEK_CUR);
       if (fread(reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
         { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
-          free(reads);
+          free(reads-1);
           goto error2;
         }
 
@@ -679,6 +647,111 @@ void Trim_DB(HITS_DB *db)
     }
 }
 
+// The DB has already been trimmed, but a track over the untrimmed DB needs to be loaded.
+//   Trim the track by rereading the untrimmed DB index from the file system.
+
+static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
+{ int         i, j, r;
+  int         allflag, cutoff;
+  int         ureads;
+  char       *root;
+  HITS_READ   read;
+  FILE       *indx;
+
+  if (!db->trimmed) return (0);
+
+  if (db->cutoff <= 0 && db->all) return (0);
+
+  cutoff = db->cutoff;
+  if (db->all)
+    allflag = 0;
+  else
+    allflag = DB_BEST;
+
+  root = rindex(db->path,'/') + 2;
+  indx = Fopen(Catenate(db->path,"","",".idx"),"r");
+  fseeko(indx,sizeof(HITS_DB) + sizeof(HITS_READ)*db->ufirst,SEEK_SET);
+  if (ispart)
+    ureads = ((int *) (db->reads))[-1];
+  else
+    ureads = db->ureads;
+
+  if (strcmp(track->name,". at qvs") == 0)
+    { EPRINTF(EPLACE,"%s: Cannot load QV track after trimming\n",Prog_Name);
+      fclose(indx);
+      EXIT(1);
+    }
+
+  { int   *anno4, size;
+    int64 *anno8;
+    char  *anno, *data;
+
+    size = track->size;
+    data = (char *) track->data; 
+    if (data == NULL)
+      { anno = (char *) track->anno;
+        j = r = 0;
+        for (i = r = 0; i < ureads; i++, r += size)
+          { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+              { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
+                fclose(indx);
+                EXIT(1);
+              }
+            if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
+              { memmove(anno+j,anno+r,size);
+                j += size;
+              }
+            r += size;
+          }
+        memmove(anno+j,anno+r,size);
+      }
+    else if (size == 4)
+      { int ai;
+
+        anno4 = (int *) (track->anno);
+        j = anno4[0] = 0;
+        for (i = 0; i < ureads; i++)
+          { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+              { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
+                fclose(indx);
+                EXIT(1);
+              }
+            if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
+              { ai = anno4[i];
+                anno4[j+1] = anno4[j] + (anno4[i+1]-ai);
+                memmove(data+anno4[j],data+ai,anno4[i+1]-ai);
+                j += 1;
+              }
+          }
+        track->data = Realloc(track->data,anno4[j],NULL);
+      }
+    else // size == 8
+      { int64 ai;
+
+        anno8 = (int64 *) (track->anno);
+        j = anno8[0] = 0;
+        for (i = 0; i < ureads; i++)
+          { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+              { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
+                fclose(indx);
+                EXIT(1);
+              }
+            if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
+              { ai = anno8[i];
+                anno8[j+1] = anno8[j] + (anno8[i+1]-ai);
+                memmove(data+anno8[j],data+ai,anno8[i+1]-ai);
+                j += 1;
+              }
+          }
+        track->data = Realloc(track->data,anno8[j],NULL);
+      }
+    track->anno = Realloc(track->anno,track->size*(j+1),NULL);
+  }
+
+  fclose(indx);
+  return (0);
+}
+
 // Shut down an open 'db' by freeing all associated space, including tracks and QV structures, 
 //   and any open file pointers.  The record pointed at by db however remains (the user
 //   supplied it and so should free it).
@@ -690,7 +763,8 @@ void Close_DB(HITS_DB *db)
     free(((char *) (db->bases)) - 1);
   else if (db->bases != NULL)
     fclose((FILE *) db->bases);
-  free(db->reads-1);
+  if (db->reads != NULL)
+    free(db->reads-1);
   free(db->path);
 
   Close_QVs(db);
@@ -704,6 +778,43 @@ void Close_DB(HITS_DB *db)
 }
 
 
+// Return the size in bytes of the memory occupied by a given DB
+
+int64 sizeof_DB(HITS_DB *db)
+{ int64       s;
+  HITS_TRACK *t;
+
+  s = sizeof(HITS_DB)
+    + sizeof(HITS_READ)*(db->nreads+2)
+    + strlen(db->path)+1
+    + (db->totlen+db->nreads+4);
+
+  t = db->tracks;
+  if (t != NULL && strcmp(t->name,". at qvs") == 0)
+    { HITS_QV *q = (HITS_QV *) t;
+      s += sizeof(HITS_QV)
+         + sizeof(uint16) * db->nreads
+         + q->ncodes * sizeof(QVcoding)
+         + 6;
+      t = t->next;
+    }
+
+  for (; t != NULL; t = t->next)
+    { s += sizeof(HITS_TRACK)
+         + strlen(t->name)+1
+         + t->size * (db->nreads+1);
+      if (t->data != NULL)
+        { if (t->size == 8)
+            s += sizeof(int)*((int64 *) t->anno)[db->nreads];
+          else //  t->size == 4
+            s += sizeof(int)*((int *) t->anno)[db->nreads];
+        }
+    }
+
+  return (s);
+}
+
+
 /*******************************************************************************************
  *
  *  QV LOAD & CLOSE ROUTINES
@@ -719,7 +830,7 @@ int Load_QVs(HITS_DB *db)
   uint16      *table;
   HITS_QV     *qvtrk;
   QVcoding    *coding, *nx;
-  int          ncodes;
+  int          ncodes = 0;
 
   if (db->tracks != NULL && strcmp(db->tracks->name,". at qvs") == 0)
     return (0);
@@ -730,8 +841,14 @@ int Load_QVs(HITS_DB *db)
     }
 
   if (db->reads[db->nreads-1].coff < 0)
-    { EPRINTF(EPLACE,"%s: The requested QVs have not been added to the DB!\n",Prog_Name);
-      EXIT(1);
+    { if (db->part > 0)
+        { EPRINTF(EPLACE,"%s: All QVs for this block have not been added to the DB!\n",Prog_Name);
+          EXIT(1);
+        }
+      else
+        { EPRINTF(EPLACE,"%s: All QVs for this DB have not been added!\n",Prog_Name);
+          EXIT(1);
+        }
     }
 
   //  Open .qvs, .idx, and .db files
@@ -746,8 +863,14 @@ int Load_QVs(HITS_DB *db)
   coding = NULL;
   qvtrk  = NULL;
 
-  root = rindex(db->path,'/') + 2;
-  istub = Fopen(Catenate(db->path,"/",root,".db"),"r");
+  root = rindex(db->path,'/');
+  if (root[1] == '.')
+    { *root = '\0';
+      istub = Fopen(Catenate(db->path,"/",root+2,".db"),"r");
+      *root = '/';
+    }
+  else
+    istub = Fopen(Catenate(db->path,"","",".db"),"r");
   if (istub == NULL)
     goto error;
 
@@ -805,16 +928,16 @@ int Load_QVs(HITS_DB *db)
         //    assign the tables # for each read in the block in "tables".
 
         rewind(istub);
-        fscanf(istub,DB_NFILE,&nfiles);
+        (void) fscanf(istub,DB_NFILE,&nfiles);
 
         first = 0;
         for (n = 0; n < fbeg; n++)
-          { fscanf(istub,DB_FDATA,&last,fname,prolog);
+          { (void) fscanf(istub,DB_FDATA,&last,fname,prolog);
             first = last;
           }
 
         for (n = fbeg; n < fend; n++)
-          { fscanf(istub,DB_FDATA,&last,fname,prolog);
+          { (void) fscanf(istub,DB_FDATA,&last,fname,prolog);
 
             i = n-fbeg;
             if (first < pfirst)
@@ -969,9 +1092,9 @@ void Close_QVs(HITS_DB *db)
 //    -1: Track is not the right size of DB either trimmed or untrimmed
 //    -2: Could not find the track 
 
-int Check_Track(HITS_DB *db, char *track)
+int Check_Track(HITS_DB *db, char *track, int *kind)
 { FILE       *afile;
-  int         tracklen, ispart;
+  int         tracklen, size, ispart;
   int         ureads, treads;
 
   afile = NULL;
@@ -987,8 +1110,23 @@ int Check_Track(HITS_DB *db, char *track)
     return (-2);
 
   if (fread(&tracklen,sizeof(int),1,afile) != 1)
-    return (-1);
+    { fprintf(stderr,"%s: track files for %s are corrupted\n",Prog_Name,track);
+      exit (1);
+    }
+  if (fread(&size,sizeof(int),1,afile) != 1)
+    { fprintf(stderr,"%s: track files for %s are corrupted\n",Prog_Name,track);
+      exit (1);
+    }
 
+  if (size == 0)
+    *kind = MASK_TRACK;
+  else if (size > 0)
+    *kind = CUSTOM_TRACK;
+  else
+    { fprintf(stderr,"%s: track files for %s are corrupted\n",Prog_Name,track);
+      exit (1);
+    }
+  
   fclose(afile);
 
   if (ispart)
@@ -1000,10 +1138,10 @@ int Check_Track(HITS_DB *db, char *track)
       treads = db->treads;
     }
 
-  if (tracklen == treads)
-    return (1);
-  else if (tracklen == ureads)
+  if (tracklen == ureads)
     return (0);
+  else if (tracklen == treads)
+    return (1);
   else
     return (-1);
 }
@@ -1067,10 +1205,13 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
     { EPRINTF(EPLACE,"%s: Track '%s' annotation file is junk\n",Prog_Name,track);
       goto error;
     }
-  if (size <= 0)
+
+  if (size < 0)
     { EPRINTF(EPLACE,"%s: Track '%s' annotation file is junk\n",Prog_Name,track);
       goto error;
     }
+  if (size == 0)
+    size = 8;
 
   if (ispart)
     { ureads = ((int *) (db->reads))[-1];
@@ -1082,22 +1223,32 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
     }
 
   if (db->trimmed)
-    { if (tracklen != treads)
+    { if (tracklen != treads && tracklen != ureads)
         { EPRINTF(EPLACE,"%s: Track '%s' not same size as database !\n",Prog_Name,track);
           goto error;
         }
       if ( ! ispart && db->part > 0)
-        fseeko(afile,size*db->tfirst,SEEK_CUR);
+        { if (tracklen == treads)
+            fseeko(afile,size*db->tfirst,SEEK_CUR);
+          else
+            fseeko(afile,size*db->ufirst,SEEK_CUR);
+        }
     }
   else
     { if (tracklen != ureads)
-        { EPRINTF(EPLACE,"%s: Track '%s' not same size as database !\n",Prog_Name,track);
+        { if (tracklen == treads)
+            EPRINTF(EPLACE,"%s: Track '%s' is for a trimmed DB !\n",Prog_Name,track);
+          else
+            EPRINTF(EPLACE,"%s: Track '%s' not same size as database !\n",Prog_Name,track);
           goto error;
         }
       if ( ! ispart && db->part > 0)
         fseeko(afile,size*db->ufirst,SEEK_CUR);
     }
-  nreads = db->nreads;
+  if (tracklen == treads)
+    nreads = ((int *) (db->reads))[-2];
+  else
+    nreads = ((int *) (db->reads))[-1];
 
   anno = (void *) Malloc(size*(nreads+1),"Allocating Track Anno Vector");
   if (anno == NULL)
@@ -1166,6 +1317,11 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
   record->anno = anno;
   record->size = size;
 
+  if (db->trimmed && tracklen != treads)
+    { if (Late_Track_Trim(db,record,ispart))
+        goto error;
+    }
+
   if (db->tracks != NULL && strcmp(db->tracks->name,". at qvs") == 0)
     { record->next     = db->tracks->next;
       db->tracks->next = record;
@@ -1178,7 +1334,7 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
   return (record);
 
 error:
-  if (record == NULL)
+  if (record != NULL)
     free(record);
   if (data != NULL)
     free(data);
diff --git a/DB.h b/DB.h
index 567a91a..983bee7 100644
--- a/DB.h
+++ b/DB.h
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Compressed data base module.  Auxiliary routines to open and manipulate a data base for
@@ -71,8 +34,6 @@
 //    value.  For such routines that were previously void, they are now int, and
 //    return 1 if an error occured, 0 otherwise.
 
-#undef INTERACTIVE
-
 #ifdef INTERACTIVE
 
 #define EPRINTF sprintf
@@ -135,7 +96,8 @@ extern char Ebuffer[];
 #define ARG_POSITIVE(var,name)                                                          \
   var = strtol(argv[i]+2,&eptr,10);                                                     \
   if (*eptr != '\0' || argv[i][2] == '\0')                                              \
-    { fprintf(stderr,"%s: -%c argument is not an integer\n",Prog_Name,argv[i][1]);      \
+    { fprintf(stderr,"%s: -%c '%s' argument is not an integer\n",			\
+                     Prog_Name,argv[i][1],argv[i]+2);      				\
       exit (1);                                                                         \
     }                                                                                   \
   if (var <= 0)                                                                         \
@@ -146,7 +108,8 @@ extern char Ebuffer[];
 #define ARG_NON_NEGATIVE(var,name)                                                      \
   var = strtol(argv[i]+2,&eptr,10);                                                     \
   if (*eptr != '\0' || argv[i][2] == '\0')                                              \
-    { fprintf(stderr,"%s: -%c argument is not an integer\n",Prog_Name,argv[i][1]);      \
+    { fprintf(stderr,"%s: -%c '%s' argument is not an integer\n",			\
+                     Prog_Name,argv[i][1],argv[i]+2);      				\
       exit (1);                                                                         \
     }                                                                                   \
   if (var < 0)	                                                                        \
@@ -157,7 +120,8 @@ extern char Ebuffer[];
 #define ARG_REAL(var)                                                                   \
   var = strtod(argv[i]+2,&eptr);                                                        \
   if (*eptr != '\0' || argv[i][2] == '\0')                                              \
-    { fprintf(stderr,"%s: -%c argument is not a real number\n",Prog_Name,argv[i][1]);   \
+    { fprintf(stderr,"%s: -%c '%s' argument is not a real number\n",			\
+                     Prog_Name,argv[i][1],argv[i]+2);      				\
       exit (1);                                                                         \
     }
 
@@ -211,14 +175,17 @@ void Number_Read(char *s);    //  Convert read from letters to numbers
 #define DB_CSS  0x0400   //  This is the second or later of a group of reads from a given insert
 #define DB_BEST 0x0800   //  This is the longest read of a given insert (may be the only 1)
 
+//  Fields have different interpretations if a .db versus a .dam
+
 typedef struct
-  { int     origin; //  Well #
+  { int     origin; //  Well # (DB), Contig # (DAM)
     int     rlen;   //  Length of the sequence (Last pulse = fpulse + rlen)
-    int     fpulse; //  First pulse
+    int     fpulse; //  First pulse (DB), left index of contig in scaffold (DAM)
     int64   boff;   //  Offset (in bytes) of compressed read in 'bases' file, or offset of
                     //    uncompressed bases in memory block
-    int64   coff;   //  Offset (in bytes) of compressed quiva streams in 'quiva' file
-    int     flags;  //  QV of read + flags above
+    int64   coff;   //  Offset (in bytes) of compressed quiva streams in '.qvs' file (DB),
+                    //  Offset (in bytes) of scaffold header string in '.hdr' file (DAM)
+    int     flags;  //  QV of read + flags above (DB only)
   } HITS_READ;
 
 //  A track can be of 3 types:
@@ -299,7 +266,7 @@ typedef struct
 #define DB_NFILE  "files = %9d\n"   //  number of files
 #define DB_FDATA  "  %9d %s %s\n"   //  last read index + 1, fasta prolog, file name
 #define DB_NBLOCK "blocks = %9d\n"  //  number of blocks
-#define DB_PARAMS "size = %9lld cutoff = %9d all = %1d\n"  //  block size, len cutoff, all in well
+#define DB_PARAMS "size = %10lld cutoff = %9d all = %1d\n"  //  block size, len cutoff, all in well
 #define DB_BDATA  " %9d %9d\n"      //  First read index (untrimmed), first read index (trimmed)
 
 
@@ -342,6 +309,10 @@ void Trim_DB(HITS_DB *db);
 
 void Close_DB(HITS_DB *db);
 
+  // Return the size in bytes of the given DB
+
+int64 sizeof_DB(HITS_DB *db);
+
   // If QV pseudo track is not already in db's track list, then load it and set it up.
   //   The database must not have been trimmed yet.  -1 is returned if a .qvs file is not
   //   present, and 1 is returned if an error (reported to EPLACE) occured and INTERACTIVE
@@ -358,8 +329,15 @@ void Close_QVs(HITS_DB *db);
   //     0: Track is for untrimmed DB
   //    -1: Track is not the right size of DB either trimmed or untrimmed
   //    -2: Could not find the track
+  // In addition, if opened (0 or 1 returned), then kind points at an integer indicating
+  //   the type of track as follows:
+  //      CUSTOM  0 => a custom track
+  //      MASK    1 => a mask track
+
+#define CUSTOM_TRACK 0
+#define   MASK_TRACK 1
 
-int Check_Track(HITS_DB *db, char *track);
+int Check_Track(HITS_DB *db, char *track, int *kind);
 
   // If track is not already in the db's track list, then allocate all the storage for it,
   //   read it in from the appropriate file, add it to the track list, and return a pointer
@@ -427,11 +405,11 @@ int   Load_QVentry(HITS_DB *db, int i, char **entry, int ascii);
 
 int Read_All_Sequences(HITS_DB *db, int ascii);
 
-  // For the DB or DAM "path" = "prefix/root[.db|.dam]", find all the files for that DB, i.e. all
+  // For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all
   //   those of the form "prefix/[.]root.part" and call actor with the complete path to each file
   //   pointed at by path, and the suffix of the path by extension.  The . proceeds the root
   //   name if the defined constant HIDE_FILES is set.  Always the first call is with the
-  //   path "prefix/root.db" and extension "db".  There will always be calls for
+  //   path "prefix/root.[db|dam]" and extension "db" or "dam".  There will always be calls for
   //   "prefix/[.]root.idx" and "prefix/[.]root.bps".  All other calls are for *tracks* and
   //   so this routine gives one a way to know all the tracks associated with a given DB.
   //   -1 is returned if the path could not be found, and 1 is returned if an error (reported
diff --git a/DB2fasta.c b/DB2fasta.c
index 5080f88..eb769be 100644
--- a/DB2fasta.c
+++ b/DB2fasta.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /********************************************************************************************
  *
  *  Recreate all the .fasta files that have been loaded into a specified database.
@@ -55,7 +18,6 @@ static char *Usage = "[-vU] [-w<int(80)>] <path:db>";
 int main(int argc, char *argv[])
 { HITS_DB    _db, *db = &_db;
   FILE       *dbfile;
-  int         nfiles;
   int         VERBOSE, UPPER, WIDTH;
 
   //  Process arguments
@@ -92,9 +54,10 @@ int main(int argc, char *argv[])
       }
   }
 
-  //  Open db
+  //  Open db, and db stub file
 
   { int   status;
+    char *pwd, *root;
 
     status = Open_DB(argv[1],db);
     if (status < 0)
@@ -107,9 +70,6 @@ int main(int argc, char *argv[])
       { fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
         exit (1);
       }
-  }
-
-  { char *pwd, *root;
 
     pwd    = PathTo(argv[1]);
     root   = Root(argv[1],".db");
@@ -120,23 +80,22 @@ int main(int argc, char *argv[])
       exit (1);
   }
 
-  //  nfiles = # of files in data base
-
-  if (fscanf(dbfile,DB_NFILE,&nfiles) != 1)
-    SYSTEM_ERROR
-
-  //  For each file do:
+  //  For each cell do:
 
   { HITS_READ  *reads;
+    char        lname[MAX_NAME];
+    FILE       *ofile = NULL;
+    int         f, first, last, ofirst, nfiles;
     char       *read;
-    int         f, first;
+
+    if (fscanf(dbfile,DB_NFILE,&nfiles) != 1)
+      SYSTEM_ERROR
 
     reads = db->reads;
     read  = New_Read_Buffer(db);
-    first = 0;
+    first = ofirst = 0;
     for (f = 0; f < nfiles; f++)
-      { int   i, last;
-        FILE *ofile;
+      { int   i;
         char  prolog[MAX_NAME], fname[MAX_NAME];
 
         //  Scan db image file line, create .fasta file for writing
@@ -144,12 +103,36 @@ int main(int argc, char *argv[])
         if (fscanf(dbfile,DB_FDATA,&last,fname,prolog) != 3)
           SYSTEM_ERROR
 
-        if ((ofile = Fopen(Catenate(".","/",fname,".fasta"),"w")) == NULL)
-          exit (1);
-
-        if (VERBOSE)
-          { fprintf(stderr,"Creating %s.fasta ...\n",fname);
-            fflush(stdout);
+        if (f == 0 || strcmp(fname,lname) != 0)
+          { if (f > 0)
+              { if (ofile == stdout)
+                  { fprintf(stderr," %d reads\n",first-ofirst);
+                    fflush(stderr);
+                  }
+                else
+                  fclose(ofile);
+              }
+
+            if (strcmp(fname,"stdout") == 0)
+              { ofile  = stdout;
+                ofirst = first;
+
+                if (VERBOSE)
+                  { fprintf(stderr,"Sending to stdout ...");
+                    fflush(stdout);
+                  }
+              }
+            else
+              { if ((ofile = Fopen(Catenate(".","/",fname,".fasta"),"w")) == NULL)
+                  exit (1);
+
+                if (VERBOSE)
+                  { fprintf(stderr,"Creating %s.fasta ...\n",fname);
+                    fflush(stdout);
+                  }
+              }
+
+            strcpy(lname,fname);
           }
 
         //   For the relevant range of reads, write each to the file
@@ -179,6 +162,15 @@ int main(int argc, char *argv[])
 
         first = last;
       }
+
+    if (f > 0)
+      { if (ofile == stdout)
+          { fprintf(stderr," %d reads\n",first-ofirst);
+            fflush(stderr);
+          }
+        else
+          fclose(ofile);
+      }
   }
 
   fclose(dbfile);
diff --git a/DB2quiva.c b/DB2quiva.c
index 63c0b91..a7b93da 100644
--- a/DB2quiva.c
+++ b/DB2quiva.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /********************************************************************************************
  *
  *  Recreate all the .quiva files that have been loaded into a specified database.
@@ -115,22 +78,23 @@ int main(int argc, char *argv[])
       exit (1);
   }
 
-  //  For each file do:
+  //  For each cell do:
 
   { HITS_READ  *reads;
-    int         f, first, nfiles;
+    char        lname[MAX_NAME];
+    FILE       *ofile = NULL;
+    int         f, first, last, ofirst, nfiles;
     QVcoding   *coding;
     char      **entry;
 
     if (fscanf(dbfile,DB_NFILE,&nfiles) != 1)
       SYSTEM_ERROR
 
-    entry = New_QV_Buffer(db);
     reads = db->reads;
-    first = 0;
+    entry = New_QV_Buffer(db);
+    first = ofirst = 0;
     for (f = 0; f < nfiles; f++)
-      { int   i, last;
-        FILE *ofile;
+      { int   i;
         char  prolog[MAX_NAME], fname[MAX_NAME];
 
         //  Scan db image file line, create .quiva file for writing
@@ -140,19 +104,43 @@ int main(int argc, char *argv[])
         if (fscanf(dbfile,DB_FDATA,&last,fname,prolog) != 3)
           SYSTEM_ERROR
 
-        if ((ofile = Fopen(Catenate(".","/",fname,".quiva"),"w")) == NULL)
-          exit (1);
+        if (f == 0 || strcmp(fname,lname) != 0)
+          { if (f > 0)
+              { if (ofile == stdout)
+                  { fprintf(stderr," %d quivas\n",first-ofirst);
+                    fflush(stderr);
+                  }
+                else
+                  fclose(ofile);
+              }
 
-        if (VERBOSE)
-          { fprintf(stderr,"Creating %s.quiva ...\n",fname);
-            fflush(stderr);
-          }
+            if (strcmp(fname,"stdout") == 0)
+              { ofile  = stdout;
+                ofirst = first;
 
-        coding = Read_QVcoding(quiva);
+                if (VERBOSE)
+                  { fprintf(stderr,"Sending to stdout ...");
+                    fflush(stdout);
+                  }
+              }
+            else
+              { if ((ofile = Fopen(Catenate(".","/",fname,".quiva"),"w")) == NULL)
+                  exit (1);
+
+                if (VERBOSE)
+                  { fprintf(stderr,"Creating %s.quiva ...\n",fname);
+                    fflush(stderr);
+                  }
+              }
+
+            strcpy(lname,fname);
+          }
 
         //   For the relevant range of reads, write the header for each to the file
         //     and then uncompress and write the quiva entry for each
 
+        coding = Read_QVcoding(quiva);
+
         for (i = first; i < last; i++)
           { int        e, flags, qv, rlen;
             HITS_READ *r;
@@ -182,6 +170,15 @@ int main(int argc, char *argv[])
 
         first = last;
       }
+
+    if (f > 0)
+      { if (ofile == stdout)
+          { fprintf(stderr," %d quivas\n",first-ofirst);
+            fflush(stderr);
+          }
+        else
+          fclose(ofile);
+      }
   }
 
   fclose(quiva);
diff --git a/DBshow.c b/DBdump.c
similarity index 52%
copy from DBshow.c
copy to DBdump.c
index 703fb14..735e726 100644
--- a/DBshow.c
+++ b/DBdump.c
@@ -1,51 +1,9 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
- *  Display a specified set of reads of a database in fasta format.
+ *  Display a portion of the data-base and selected information in 1-code format.
  *
  *  Author:  Gene Myers
- *  Date  :  September 2013
- *  Mod   :  With DB overhaul, made this a routine strictly for printing a selected subset
- *             and created DB2fasta for recreating all the fasta files of a DB
- *  Date  :  April 2014
- *  Mod   :  Added options to display QV streams
- *  Date  :  July 2014
+ *  Date  :  November 2015
  *
  ********************************************************************************************/
 
@@ -63,8 +21,8 @@
 #endif
 
 static char *Usage[] =
-    { "[-unqUQ] [-w<int(80)>] [-m<track>]+",
-      "         <path:db|dam> [ <reads:FILE> | <reads:range> ... ]"
+    { "[-rhsiqp] [-uU] [-m<track>]+",
+      "          <path:db|dam> [ <reads:FILE> | <reads:range> ... ]"
     };
 
 #define LAST_READ_SYMBOL   '$'
@@ -116,35 +74,55 @@ int next_read(File_Iterator *it)
   return (0);
 }
 
+static int qv_map[51] =
+  { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
+    'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+    'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D',
+    'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
+    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
+    'Y'
+  };
+
+static int prof_map[41] =
+  { '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
+    'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
+    't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C',
+    'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+    'N',
+  };
+
 int main(int argc, char *argv[])
 { HITS_DB    _db, *db = &_db;
   FILE       *hdrs = NULL;
+  int64      *qv_idx = NULL;
+  uint8      *qv_val = NULL;
+  int64      *pf_idx = NULL;
+  uint8      *pf_val = NULL;
 
   int         nfiles;
   char      **flist = NULL;
   int        *findx = NULL;
 
-  int            reps, *pts;
   int            input_pts;
-  File_Iterator *iter;
-  FILE          *input;
+  int            reps = 0;
+  int           *pts  = NULL;
+  File_Iterator *iter = NULL;
+  FILE          *input = NULL;
 
   int         TRIM, UPPER;
-  int         DOSEQ, DOQVS, QUIVA, DAM;
-  int         WIDTH;
+  int         DORED, DOSEQ, DOQVS, DOHDR, DOIQV, DOPRF, DAM;
 
-  int         MMAX, MTOP;
-  char      **MASK;
+  int          MMAX, MTOP;
+  char       **MASK;
+  HITS_TRACK **MTRACK;
 
   //  Process arguments
 
   { int  i, j, k;
     int  flags[128];
-    char *eptr;
 
-    ARG_INIT("DBshow")
+    ARG_INIT("DBdump")
 
-    WIDTH = 80;
     MTOP  = 0;
     MMAX  = 10;
     MASK  = (char **) Malloc(MMAX*sizeof(char *),"Allocating mask track array");
@@ -156,10 +134,7 @@ int main(int argc, char *argv[])
       if (argv[i][0] == '-')
         switch (argv[i][1])
         { default:
-            ARG_FLAGS("unqUQ")
-            break;
-          case 'w':
-            ARG_NON_NEGATIVE(WIDTH,"Line width")
+            ARG_FLAGS("hpqrsiuU")
             break;
           case 'm':
             if (MTOP >= MMAX)
@@ -179,21 +154,26 @@ int main(int argc, char *argv[])
     TRIM  = 1-flags['u'];
     UPPER = 1+flags['U'];
     DOQVS = flags['q'];
-    DOSEQ = 1-flags['n'];
-    QUIVA = flags['Q'];
-    if (QUIVA && (!DOSEQ || MTOP > 0))
-      { fprintf(stderr,"%s: -Q (quiva) format request inconsistent with -n and -m options\n",
-                       Prog_Name);
-        exit (1);
-      }
-    if (QUIVA)
-      DOQVS = 1;
+    DORED = flags['r'];
+    DOSEQ = flags['s'];
+    DOHDR = flags['h'];
+    DOIQV = flags['i'];
+    DOPRF = flags['p'];
 
     if (argc <= 1)
       { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage[0]);
         fprintf(stderr,"       %*s %s\n",(int) strlen(Prog_Name),"",Usage[1]);
         exit (1);
       }
+
+    if ( ! TRIM && DOIQV)
+      { fprintf(stderr,"%s: -i and -u are incompatible\n",Prog_Name);
+        exit (1);
+      }
+    if ( ! TRIM && DOPRF)
+      { fprintf(stderr,"%s: -p and -u are incompatible\n",Prog_Name);
+        exit (1);
+      }
   }
 
   //  Open DB or DAM, and if a DAM open also .hdr file
@@ -208,11 +188,13 @@ int main(int argc, char *argv[])
       { root   = Root(argv[1],".dam");
         pwd    = PathTo(argv[1]);
 
+        if (db->part > 0)
+          *rindex(root,'.') = '\0';
         hdrs = Fopen(Catenate(pwd,PATHSEP,root,".hdr"),"r");
         if (hdrs == NULL)
           exit (1);
-        DAM   = 1;
-        if (QUIVA || DOQVS)
+        DAM = 1;
+        if (DOQVS)
           { fprintf(stderr,"%s: -Q and -q options not compatible with a .dam DB\n",Prog_Name);
             exit (1);
           }
@@ -228,23 +210,39 @@ int main(int argc, char *argv[])
     { if (Load_QVs(db) < 0)
         { fprintf(stderr,"%s: QVs requested, but no .qvs for data base\n",Prog_Name);
           exit (1);
-        }
+	}
     }
 
   //  Check tracks and load tracks for untrimmed DB
 
-  { int i, status;
+  { int i, status, kind;
+
+    MTRACK = Malloc(sizeof(HITS_TRACK *)*MTOP,"Allocation of track pointer vector");
+    if (MTRACK == NULL)
+      exit (1);
 
     for (i = 0; i < MTOP; i++)
-      { status = Check_Track(db,MASK[i]);
+      { status = Check_Track(db,MASK[i],&kind);
         if (status == -2)
-          printf("%s: Warning: -m%s option given but no track found.\n",Prog_Name,MASK[i]);
+          { fprintf(stderr,"%s: Warning: -m%s option given but no track found.\n",
+                           Prog_Name,MASK[i]);
+            exit (1);
+          }
         else if (status == -1)
-          printf("%s: Warning: %s track not sync'd with db.\n",Prog_Name,MASK[i]);
+          { fprintf(stderr,"%s: Warning: %s track not sync'd with db.\n",Prog_Name,MASK[i]);
+            exit (1);
+          }
+        else if (kind != MASK_TRACK)
+          { fprintf(stderr,"%s: Warning: %s track is not a mask track.\n",Prog_Name,MASK[i]);
+            exit (1);
+          }
         else if (status == 0)
-          Load_Track(db,MASK[i]);
+          MTRACK[i] = Load_Track(db,MASK[i]);
         else if (status == 1 && !TRIM)
-          printf("%s: Warning: %s track is for a trimmed db but -u is set.\n",Prog_Name,MASK[i]);
+          { fprintf(stderr,"%s: Warning: %s track is for a trimmed db but -u is set.\n",
+                           Prog_Name,MASK[i]);
+            exit (1);
+          }
       }
   }
 
@@ -321,21 +319,57 @@ int main(int argc, char *argv[])
     }
 
   if (TRIM)
-    { int i, status;
+    { int i, status, kind;
 
       Trim_DB(db);
 
       //  Load tracks for trimmed DB
 
       for (i = 0; i < MTOP; i++)
-        { status = Check_Track(db,MASK[i]);
+        { status = Check_Track(db,MASK[i],&kind);
           if (status < 0)
             continue;
           else if (status == 1)
-            Load_Track(db,MASK[i]);
+            MTRACK[i] = Load_Track(db,MASK[i]);
         }
     }
 
+  if (DOIQV)
+    { int         status, kind;
+      HITS_TRACK *track;
+
+      status = Check_Track(db,"qual",&kind);
+      if (status == -2)
+        { fprintf(stderr,"%s: .qual-track does not exist for this db.\n",Prog_Name);
+          exit (1);
+        }
+      if (status == -1)
+        { fprintf(stderr,"%s: .qual-track not sync'd with db.\n",Prog_Name);
+          exit (1);
+        }
+      track = Load_Track(db,"qual");
+      qv_idx = (int64 *) track->anno;
+      qv_val = (uint8 *) track->data;
+    }
+
+  if (DOPRF)
+    { int         status, kind;
+      HITS_TRACK *track;
+
+      status = Check_Track(db,"prof",&kind);
+      if (status == -2)
+        { fprintf(stderr,"%s: .prof-track does not exist for this db.\n",Prog_Name);
+          exit (1);
+        }
+      if (status == -1)
+        { fprintf(stderr,"%s: .prof-track not sync'd with db.\n",Prog_Name);
+          exit (1);
+        }
+      track = Load_Track(db,"prof");
+      pf_idx = (int64 *) track->anno;
+      pf_val = (uint8 *) track->data;
+    }
+
   //  Process read index arguments into a list of read ranges
 
   input_pts = 0;
@@ -368,7 +402,7 @@ int main(int argc, char *argv[])
       iter = init_file_iterator(input);
     }
   else
-    { pts  = (int *) Malloc(sizeof(int)*2*(argc-1),"Allocating read parameters");
+    { pts = (int *) Malloc(sizeof(int)*2*(argc-1),"Allocating read parameters");
       if (pts == NULL)
         exit (1);
 
@@ -422,40 +456,186 @@ int main(int argc, char *argv[])
         }
     }
 
+  //  Scan to count the size of things
+
+  { HITS_READ  *reads;
+    int         c, b, e, i, m;
+    int         map, substr;
+    int64       noreads;
+    int64       seqmax, seqtot;
+    int64       iqvmax, iqvtot;
+    int64       prfmax, prftot;
+    int64       hdrmax, hdrtot;
+    int64       trkmax[MTOP], trktot[MTOP];
+
+    map    = 0;
+    reads  = db->reads;
+    substr = 0;
+
+    noreads = 0;
+    seqmax = 0;
+    seqtot = 0;
+    iqvmax = 0;
+    iqvtot = 0;
+    prfmax = 0;
+    prftot = 0;
+    hdrmax = 0;
+    hdrmax = 0;
+    hdrtot = 0;
+    for (m = 0; m < MTOP; m++)
+      { trkmax[m] = 0;
+        trktot[m] = 0;
+      }
+
+    c = 0;
+    while (1)
+      { if (input_pts)
+          { if (next_read(iter))
+              break;
+            e = iter->read;
+            b = e-1;
+            substr = (iter->beg >= 0);
+          }
+        else
+          { if (c >= reps)
+              break;
+            b = pts[c]-1;
+            e = pts[c+1];
+            if (e > db->nreads)
+              e = db->nreads;
+            c += 2;
+          }
+
+        for (i = b; i < e; i++)
+          { int         len, ten;
+            int         fst, lst;
+            HITS_READ  *r;
+
+            r   = reads + i;
+            len = r->rlen;
+
+            noreads += 1;
+
+            if (DOHDR)
+              { int ten;
+
+                if (DAM)
+                  { char header[MAX_NAME];
+
+                    fseeko(hdrs,r->coff,SEEK_SET);
+                    fgets(header,MAX_NAME,hdrs);
+                    header[strlen(header)-1] = '\0';
+                    ten = strlen(header);
+                  }
+                else
+                  { while (i < findx[map-1])
+                      map -= 1;
+                    while (i >= findx[map])
+                      map += 1;
+                    ten = strlen(flist[map]);
+                  }
+                if (hdrmax < ten)
+                  hdrmax = ten;
+                hdrtot += ten;
+              }
+
+            for (m = 0; m < MTOP; m++)
+              { int64 *anno;
+
+                anno = (int64 *) MTRACK[m]->anno;
+                ten = ((anno[i+1]-anno[i]) >> 3);
+                if (ten > trkmax[m])
+                  trkmax[m] = ten;
+                trktot[m] += ten;
+              }
+
+            if (substr)
+              { fst = iter->beg;
+                lst = iter->end;
+                if (DOIQV)
+                  { fprintf(stderr,"%s: Cannot select subreads when -i is requested\n",Prog_Name);
+                    exit (1);
+                  }
+                if (DOPRF)
+                  { fprintf(stderr,"%s: Cannot select subreads when -p is requested\n",Prog_Name);
+                    exit (1);
+                  }
+              }
+            else
+              { fst = 0;
+                lst = len;
+              }
+
+            if (DOSEQ | DOQVS)
+              { int ten = lst-fst;
+                if (ten > seqmax)
+                  seqmax = ten;
+                seqtot += ten;
+              }
+            if (DOIQV)
+              { int ten = qv_idx[i+1] - qv_idx[i];
+                if (ten > iqvmax)
+                  iqvmax = ten;
+                iqvtot += ten;
+              }
+            if (DOPRF)
+              { int ten = pf_idx[i+1] - pf_idx[i];
+                if (ten > prfmax)
+                  prfmax = ten;
+                prftot += ten;
+              }
+          }
+      }
+
+    printf("+ R %lld\n",noreads);
+    printf("+ M %d\n",MTOP);
+    if (DOHDR)
+      { printf("+ H %lld\n",hdrtot);
+        printf("@ H %lld\n",hdrmax);
+      }
+    for (m = 0; m < MTOP; m++)
+      { printf("+ T%d %lld\n",m,trktot[m]);
+        printf("@ T%d %lld\n",m,trkmax[m]);
+      }
+    if (DOSEQ | DOQVS)
+      { printf("+ S %lld\n",seqtot);
+        printf("@ S %lld\n",seqmax);
+      }
+    if (DOIQV)
+      { printf("+ I %lld\n",iqvtot);
+        printf("@ I %lld\n",iqvmax);
+      }
+    if (DOPRF)
+      { printf("+ P %lld\n",prftot);
+        printf("@ P %lld\n",prfmax);
+      }
+  }
+
   //  Display each read (and/or QV streams) in the active DB according to the
   //    range pairs in pts[0..reps) and according to the display options.
 
   { HITS_READ  *reads;
-    HITS_TRACK *first;
     char       *read, **entry;
-    int         c, b, e, i;
-    int         hilight, substr;
+    int         c, b, e, i, m;
+    int         substr;
     int         map;
-    int       (*iscase)(int);
+    char        qvname[5] = { 'd', 'c', 'i', 'm', 's' };
 
     read  = New_Read_Buffer(db);
     if (DOQVS)
-      { entry = New_QV_Buffer(db);
-        first = db->tracks->next;
-      }
+      entry = New_QV_Buffer(db);
     else
-      { entry = NULL;
-        first = db->tracks;
-      }
-
-    if (UPPER == 1)
-      { hilight = 'A'-'a';
-        iscase  = islower;
-      }
-    else
-      { hilight = 'a'-'A';
-        iscase  = isupper;
-      }
+      entry = NULL;
 
     map    = 0;
     reads  = db->reads;
     substr = 0;
 
+    if (input_pts)
+      iter = init_file_iterator(input);
+    else
+      iter = NULL;
+
     c = 0;
     while (1)
       { if (input_pts)
@@ -480,65 +660,57 @@ int main(int argc, char *argv[])
             int         fst, lst;
             int         flags, qv;
             HITS_READ  *r;
-            HITS_TRACK *track;
 
             r   = reads + i;
             len = r->rlen;
+            if (DORED)
+              printf("R %d\n",i+1);
 
             flags = r->flags;
             qv    = (flags & DB_QV);
-            if (DAM)
-              { char header[MAX_NAME];
-
-                fseeko(hdrs,r->coff,SEEK_SET);
-                fgets(header,MAX_NAME,hdrs);
-                header[strlen(header)-1] = '\0';
-                printf("%s :: Contig %d[%d,%d]",header,r->origin,r->fpulse,r->fpulse+len);
-              }
-            else
-              { while (i < findx[map-1])
-                  map -= 1;
-                while (i >= findx[map])
-                  map += 1;
-                if (QUIVA)
-                  printf("@%s/%d/%d_%d",flist[map],r->origin,r->fpulse,r->fpulse+len);
+            if (DOHDR)
+              { if (DAM)
+                  { char header[MAX_NAME];
+
+                    fseeko(hdrs,r->coff,SEEK_SET);
+                    fgets(header,MAX_NAME,hdrs);
+                    header[strlen(header)-1] = '\0';
+                    printf("H %ld %s\n",strlen(header),header);
+                    printf("L %d %d %d\n",r->origin,r->fpulse,r->fpulse+len);
+                  }
                 else
-                  printf(">%s/%d/%d_%d",flist[map],r->origin,r->fpulse,r->fpulse+len);
-                if (qv > 0)
-                  printf(" RQ=0.%3d",qv);
+                  { while (i < findx[map-1])
+                      map -= 1;
+                    while (i >= findx[map])
+                      map += 1;
+                    printf("H %ld %s\n",strlen(flist[map]),flist[map]);
+                    printf("L %d %d %d\n",r->origin,r->fpulse,r->fpulse+len);
+                    if (qv > 0)
+                      printf("Q: %d\n",qv);
+                  }
               }
-            printf("\n");
 
             if (DOQVS)
               Load_QVentry(db,i,entry,UPPER);
             if (DOSEQ)
               Load_Read(db,i,read,UPPER);
 
-            for (track = first; track != NULL; track = track->next)
+            for (m = 0; m < MTOP; m++)
               { int64 *anno;
                 int   *data;
                 int64  s, f, j;
-                int    bd, ed, m;
 
-                anno = (int64 *) track->anno;
-                data = (int *) track->data;
+                anno = (int64 *) MTRACK[m]->anno;
+                data = (int *) MTRACK[m]->data;
 
                 s = (anno[i] >> 2);
                 f = (anno[i+1] >> 2);
+                printf("T%d %lld ",m,(f-s)/2);
                 if (s < f)
                   { for (j = s; j < f; j += 2)
-                      { bd = data[j];
-                        ed = data[j+1];
-                        if (DOSEQ)
-                          for (m = bd; m < ed; m++)
-                            if (iscase(read[m]))
-                              read[m] = (char) (read[m] + hilight);
-                        if (j == s)
-                          printf("> %s:",track->name);
-                        printf(" [%d,%d]",bd,ed);
-                      }
-                    printf("\n");
+                      printf(" %d %d",data[j],data[j+1]);
                   }
+                printf("\n");
               }
 
             if (substr)
@@ -550,39 +722,39 @@ int main(int argc, char *argv[])
                 lst = len;
               }
 
-            if (QUIVA)
+            if (DOSEQ)
+              { printf("S %d ",lst-fst);
+                printf("%.*s\n",lst-fst,read+fst);
+              }
+
+            if (DOIQV)
+              { int64 k, e;
+
+                k = qv_idx[i];
+                e = qv_idx[i+1];
+                printf("I %lld ",e-k);
+                while (k < e)
+                  putchar(qv_map[qv_val[k++]]);
+                printf("\n");
+              }
+
+            if (DOPRF)
+              { int64 k, e;
+
+                k = pf_idx[i];
+                e = pf_idx[i+1];
+                printf("P %lld ",e-k);
+                while (k < e)
+                  putchar(prof_map[pf_val[k++]]);
+                printf("\n");
+              }
+
+            if (DOQVS)
               { int k;
 
                 for (k = 0; k < 5; k++)
-                  printf("%.*s\n",lst-fst,entry[k]+fst);
-              }
-            else
-              { if (DOQVS)
-                  { int j, k;
-
-                    printf("\n");
-                    for (j = fst; j+WIDTH < lst; j += WIDTH)
-                      { if (DOSEQ)
-                          printf("%.*s\n",WIDTH,read+j);
-                        for (k = 0; k < 5; k++)
-                          printf("%.*s\n",WIDTH,entry[k]+j);
-                        printf("\n");
-                      }
-                    if (j < lst)
-                      { if (DOSEQ)
-                          printf("%.*s\n",lst-j,read+j);
-                        for (k = 0; k < 5; k++)
-                          printf("%.*s\n",lst-j,entry[k]+j);
-                        printf("\n");
-                      }
-                  }
-                else if (DOSEQ)
-                  { int j;
-    
-                    for (j = fst; j+WIDTH < lst; j += WIDTH)
-                      printf("%.*s\n",WIDTH,read+j);
-                    if (j < lst)
-                      printf("%.*s\n",lst-j,read+j);
+                  { printf("%c %d ",qvname[k],lst-fst);
+                    printf("%.*s\n",lst-fst,entry[k]+fst);
                   }
               }
           }
diff --git a/DBdust.c b/DBdust.c
index a63ddd4..3b75400 100644
--- a/DBdust.c
+++ b/DBdust.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  My implementation of the SDUST algorithm (Morgulis et al., JCB 13, 5 (2006), 1028-1040)
@@ -155,7 +118,7 @@ int main(int argc, char *argv[])
 
     pwd   = PathTo(argv[1]);
     root  = Root(argv[1],".db");
-    size  = 8;
+    size  = 0;
 
     fname = Catenate(pwd,PATHSEP,root,".dust.anno");
     if ((afile = fopen(fname,"r+")) == NULL || db->part > 0)
diff --git a/DBrm.c b/DBrm.c
index 390d912..6bad19b 100644
--- a/DBrm.c
+++ b/DBrm.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /********************************************************************************************
  *
  *  Remove a list of .db databases
diff --git a/DBshow.c b/DBshow.c
index 703fb14..d4346a5 100644
--- a/DBshow.c
+++ b/DBshow.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Display a specified set of reads of a database in fasta format.
@@ -126,7 +89,7 @@ int main(int argc, char *argv[])
 
   int            reps, *pts;
   int            input_pts;
-  File_Iterator *iter;
+  File_Iterator *iter = NULL;
   FILE          *input;
 
   int         TRIM, UPPER;
@@ -208,10 +171,12 @@ int main(int argc, char *argv[])
       { root   = Root(argv[1],".dam");
         pwd    = PathTo(argv[1]);
 
+        if (db->part > 0)
+          *rindex(root,'.') = '\0';
         hdrs = Fopen(Catenate(pwd,PATHSEP,root,".hdr"),"r");
         if (hdrs == NULL)
           exit (1);
-        DAM   = 1;
+        DAM = 1;
         if (QUIVA || DOQVS)
           { fprintf(stderr,"%s: -Q and -q options not compatible with a .dam DB\n",Prog_Name);
             exit (1);
@@ -233,14 +198,16 @@ int main(int argc, char *argv[])
 
   //  Check tracks and load tracks for untrimmed DB
 
-  { int i, status;
+  { int i, status, kind;
 
     for (i = 0; i < MTOP; i++)
-      { status = Check_Track(db,MASK[i]);
+      { status = Check_Track(db,MASK[i],&kind);
         if (status == -2)
           printf("%s: Warning: -m%s option given but no track found.\n",Prog_Name,MASK[i]);
         else if (status == -1)
           printf("%s: Warning: %s track not sync'd with db.\n",Prog_Name,MASK[i]);
+        else if (kind != MASK_TRACK)
+          printf("%s: Warning: %s track is not a mask track.\n",Prog_Name,MASK[i]);
         else if (status == 0)
           Load_Track(db,MASK[i]);
         else if (status == 1 && !TRIM)
@@ -321,17 +288,17 @@ int main(int argc, char *argv[])
     }
 
   if (TRIM)
-    { int i, status;
+    { int i, status, kind;
 
       Trim_DB(db);
 
       //  Load tracks for trimmed DB
 
       for (i = 0; i < MTOP; i++)
-        { status = Check_Track(db,MASK[i]);
+        { status = Check_Track(db,MASK[i],&kind);
           if (status < 0)
             continue;
-          else if (status == 1)
+          else if (status == 1 && kind == MASK_TRACK)
             Load_Track(db,MASK[i]);
         }
     }
diff --git a/DBsplit.c b/DBsplit.c
index 6961228..6a218ec 100644
--- a/DBsplit.c
+++ b/DBsplit.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Split a .db into a set of sub-database blocks for use by the Dazzler:
@@ -69,7 +32,7 @@
 #define PATHSEP "/"
 #endif
 
-static char *Usage = "[-a] [-x<int>] [-s<int(200)>] <path:db|dam>";
+static char *Usage = "[-af] [-x<int>] [-s<float(200.)>] <path:db|dam>";
 
 int main(int argc, char *argv[])
 { HITS_DB    db, dbs;
@@ -77,38 +40,46 @@ int main(int argc, char *argv[])
   FILE      *dbfile, *ixfile;
   int        status;
 
+  int        FORCE;
   int        ALL;
   int        CUTOFF;
-  int        SIZE;
+  int64      SIZE;
 
   { int   i, j, k;
     int   flags[128];
     char *eptr;
+    float size;
 
     ARG_INIT("DBsplit")
 
     CUTOFF = 0;
-    SIZE   = 200;
+    size   = 200;
 
     j = 1;
     for (i = 1; i < argc; i++)
       if (argv[i][0] == '-')
         switch (argv[i][1])
         { default:
-            ARG_FLAGS("a")
+            ARG_FLAGS("af")
             break;
           case 'x':
             ARG_NON_NEGATIVE(CUTOFF,"Min read length cutoff")
             break;
           case 's':
-            ARG_POSITIVE(SIZE,"Block size")
+            ARG_REAL(size)
+            if (size <= 0.)
+              { fprintf(stderr,"%s: Block size must be a positive number\n",Prog_Name);
+                exit (1);
+              }
             break;
         }
       else
         argv[j++] = argv[i];
     argc = j;
 
-    ALL = flags['a'];
+    SIZE  = size*1000000ll;
+    ALL   = flags['a'];
+    FORCE = flags['f'];
 
     if (argc != 2)
       { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
@@ -155,7 +126,7 @@ int main(int argc, char *argv[])
     if (fread(&dbs,sizeof(HITS_DB),1,ixfile) != 1)
       SYSTEM_ERROR
 
-    if (dbs.cutoff >= 0)
+    if (dbs.cutoff >= 0 && !FORCE)
       { printf("You are about to overwrite the current partition settings.  This\n");
         printf("will invalidate any tracks, overlaps, and other derivative files.\n");
         printf("Are you sure you want to proceed? [Y/N] ");
@@ -173,16 +144,15 @@ int main(int argc, char *argv[])
     dbpos = ftello(dbfile);
     fseeko(dbfile,dbpos,SEEK_SET);
     fprintf(dbfile,DB_NBLOCK,0);
-    fprintf(dbfile,DB_PARAMS,(int64) SIZE,CUTOFF,ALL);
+    fprintf(dbfile,DB_PARAMS,SIZE,CUTOFF,ALL);
   }
 
   { HITS_READ *reads  = db.reads;
     int        nreads = db.ureads;
-    int64      size, totlen;
+    int64      totlen;
     int        nblock, ireads, treads, rlen, fno;
     int        i;
 
-    size = SIZE*1000000ll;
 
     nblock = 0;
     totlen = 0;
@@ -196,7 +166,7 @@ int main(int argc, char *argv[])
             { ireads += 1;
               treads += 1;
               totlen += rlen;
-              if (totlen >= size)
+              if (totlen >= SIZE)
                 { fprintf(dbfile,DB_BDATA,i+1,treads);
                   totlen = 0;
                   ireads = 0;
@@ -211,7 +181,7 @@ int main(int argc, char *argv[])
             { ireads += 1;
               treads += 1;
               totlen += rlen;
-              if (totlen >= size)
+              if (totlen >= SIZE)
                 { fprintf(dbfile,DB_BDATA,i+1,treads);
                   totlen = 0;
                   ireads = 0;
diff --git a/DBstats.c b/DBstats.c
index 542b77b..b0e46c2 100644
--- a/DBstats.c
+++ b/DBstats.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Display statistics about the contents of a .db and a histogram of its read lengths.
@@ -116,7 +79,7 @@ int main(int argc, char *argv[])
       }
   }
 
-  { int i, status;
+  { int i, status, kind;
 
     //  Open .db or .dam
 
@@ -128,11 +91,13 @@ int main(int argc, char *argv[])
     //  Check tracks and load tracks for untrimmed DB
 
     for (i = 0; i < MTOP; i++)
-      { status = Check_Track(db,MASK[i]);
+      { status = Check_Track(db,MASK[i],&kind);
         if (status == -2)
           fprintf(stderr,"%s: Warning: -m%s option given but no track found.\n",Prog_Name,MASK[i]);
         else if (status == -1)
           fprintf(stderr,"%s: Warning: %s track not sync'd with db.\n",Prog_Name,MASK[i]);
+        else if (kind != MASK_TRACK)
+          fprintf(stderr,"%s: Warning: %s track is not a mask track.\n",Prog_Name,MASK[i]);
         else if (status == 0)
           Load_Track(db,MASK[i]);
         else if (status == 1 && !TRIM)
@@ -149,7 +114,7 @@ int main(int argc, char *argv[])
         //  Load tracks for trimmed DB
 
         for (i = 0; i < MTOP; i++)
-          { status = Check_Track(db,MASK[i]);
+          { status = Check_Track(db,MASK[i],&kind);
             if (status < 0)
               continue;
             else if (status == 1)
@@ -161,7 +126,6 @@ int main(int argc, char *argv[])
   { int        i;
     int64      totlen;
     int        nreads, maxlen;
-    int64      ave, dev;
     HITS_READ *reads;
 
     nreads = db->nreads;
@@ -169,7 +133,7 @@ int main(int argc, char *argv[])
     maxlen = db->maxlen;
     reads  = db->reads;
 
-    nbin  = (maxlen-1)/BIN + 1;
+    nbin  = maxlen/BIN + 1;
     hist  = (int *) Malloc(sizeof(int)*nbin,"Allocating histograms");
     bsum  = (int64 *) Malloc(sizeof(int64)*nbin,"Allocating histograms");
     if (hist == NULL || bsum == NULL)
@@ -186,15 +150,6 @@ int main(int argc, char *argv[])
         bsum[rlen/BIN] += rlen;
       }
 
-    nbin = (maxlen-1)/BIN + 1;
-    ave  = totlen/nreads;
-    dev  = 0;
-    for (i = 0; i < nreads; i++)
-      { int rlen = reads[i].rlen;
-        dev += (rlen-ave)*(rlen-ave);
-      }
-    dev = (int64) sqrt((1.*dev)/nreads);
-
     if (dam)
       printf("\nStatistics for all contigs");
     else if (db->all || !TRIM)
@@ -219,7 +174,10 @@ int main(int argc, char *argv[])
     if (TRIM)
       { printf("      out of ");
         Print_Number((int64 ) oreads,15,stdout);
-        printf("  (%5.1f%%)",(100.*nreads)/oreads);
+        if (oreads <= 0)
+          printf("  (100.0%%)");
+        else
+          printf("  (%5.1f%%)",(100.*nreads)/oreads);
       }
     printf("\n");
 
@@ -228,17 +186,40 @@ int main(int argc, char *argv[])
     if (TRIM)
       { printf("   out of ");
         Print_Number(ototal,15,stdout);
-        printf("  (%5.1f%%)",(100.*totlen)/ototal);
+        if (ototal <= 0)
+          printf("  (100.0%%)");
+        else
+          printf("  (%5.1f%%)",(100.*totlen)/ototal);
       }
     printf("\n\n");
 
-    Print_Number(ave,15,stdout);
-    if (dam)
-      printf(" average contig length\n");
-    else
-      { printf(" average read length\n");
-        Print_Number(dev,15,stdout);
-        printf(" standard deviation\n");
+    if (nreads > 0)
+      { int64 ave, dev;
+
+        ave = totlen/nreads;
+        Print_Number(ave,15,stdout);
+        if (dam)
+          printf(" average contig length\n");
+        else
+          { printf(" average read length\n");
+
+            dev  = 0;
+            for (i = 0; i < nreads; i++)
+              { int rlen = reads[i].rlen;
+                dev += (rlen-ave)*(rlen-ave);
+              }
+            dev = (int64) sqrt((1.*dev)/nreads);
+
+            Print_Number(dev,15,stdout);
+            printf(" standard deviation\n");
+          }
+      }
+
+    if (totlen <= 0)
+      { free(hist);
+        free(bsum);
+        Close_DB(db);
+        exit (0);
       }
 
     printf("\n  Base composition: %.3f(A) %.3f(C) %.3f(G) %.3f(T)\n",
@@ -246,7 +227,7 @@ int main(int argc, char *argv[])
 
     if (!NONE)
       { int64 btot;
-        int   cum, skip;
+        int   cum, skip, avg;
 
         printf("\n  Distribution of Read Lengths (Bin size = ");
         Print_Number((int64) BIN,0,stdout);
@@ -264,8 +245,11 @@ int main(int argc, char *argv[])
               { Print_Number((int64) (i*BIN),11,stdout);
                 printf(":");
                 Print_Number((int64) hist[i],11,stdout);
-                printf("    %5.1f    %5.1f   %9lld\n",(100.*cum)/nreads,
-                                                      (100.*btot)/totlen,btot/cum);
+                if (cum > 0)
+                  avg = btot/cum;
+                else
+                  avg = 0;
+                printf("    %5.1f    %5.1f   %9d\n",(100.*cum)/nreads,(100.*btot)/totlen,avg);
               }
             if (cum == nreads) break;
           }
@@ -274,14 +258,14 @@ int main(int argc, char *argv[])
 
   { int64      totlen;
     int        numint, maxlen;
-    int64      ave, dev;
     HITS_TRACK *track;
 
     for (track = db->tracks; track != NULL; track = track->next)
       { char  *data = track->data;
         int64 *anno = (int64 *) track->anno;
-        int    k, rlen;
         int   *idata, *edata;
+        int64  ave, dev, btot;
+        int    k, rlen, cum;
 
         totlen = 0;
         numint = 0;
@@ -297,14 +281,23 @@ int main(int argc, char *argv[])
               }
           }
 
-        nbin = (maxlen-1)/BIN + 1;
+        printf("\n\nStatistics for %s-track\n",track->name);
+
+        printf("\n  There are ");
+        Print_Number(numint,0,stdout);
+        printf(" intervals totaling ");
+        Print_Number(totlen,0,stdout);
+        printf(" bases (%.1f%% of all data)\n",(100.*totlen)/db->totlen);
+
+        if (numint <= 0) continue;
 
+        nbin = maxlen/BIN + 1;
         for (k = 0; k < nbin; k++)
           { hist[k] = 0;
             bsum[k] = 0;
           }
 
-        ave  = totlen/numint;
+        ave = totlen/numint;
         dev  = 0;
         for (k = 0; k < db->nreads; k++)
           { edata = (int *) (data + anno[k+1]);
@@ -317,36 +310,31 @@ int main(int argc, char *argv[])
           }
         dev = (int64) sqrt((1.*dev)/numint);
 
-        printf("\n\nStatistics for %s-track\n",track->name);
+        printf("\n");
+        Print_Number(ave,15,stdout);
+        printf(" average interval length\n");
+        Print_Number(dev,15,stdout);
+        printf(" standard deviation\n");
 
-        printf("\n  There are ");
-        Print_Number(numint,0,stdout);
-        printf(" intervals totaling ");
-        Print_Number(totlen,0,stdout);
-        printf(" bases (%.1f%% of all data)\n",(100.*totlen)/db->totlen);
 
-        { int64 btot;
-          int   cum;
-
-          printf("\n  Distribution of %s intervals (Bin size = ",track->name);
-          Print_Number((int64) BIN,0,stdout);
-          printf(")\n\n        Bin:      Count  %% Intervals  %% Bases     Average\n");
-          cum  = 0;
-          btot = 0;
-          for (k = nbin-1; k >= 0; k--)
-            { cum  += hist[k];
-              btot += bsum[k];
-              if (hist[k] > 0)
-                { Print_Number((int64) (k*BIN),11,stdout);
-                  printf(":");
-                  Print_Number((int64) hist[k],11,stdout);
-                  printf("        %5.1f    %5.1f   %9lld\n",(100.*cum)/numint,
-                                                        (100.*btot)/totlen,btot/cum);
-                  if (cum == numint) break;
-                }
-            }
-          printf("\n");
-        }
+        printf("\n  Distribution of %s intervals (Bin size = ",track->name);
+        Print_Number((int64) BIN,0,stdout);
+        printf(")\n\n        Bin:      Count  %% Intervals  %% Bases     Average\n");
+        cum  = 0;
+        btot = 0;
+        for (k = nbin-1; k >= 0; k--)
+          { cum  += hist[k];
+            btot += bsum[k];
+            if (hist[k] > 0)
+              { Print_Number((int64) (k*BIN),11,stdout);
+                printf(":");
+                Print_Number((int64) hist[k],11,stdout);
+                printf("        %5.1f    %5.1f   %9lld\n",(100.*cum)/numint,
+                                                      (100.*btot)/totlen,btot/cum);
+                if (cum == numint) break;
+              }
+          }
+        printf("\n");
       }
   }
 
diff --git a/DBupgrade.Dec.31.2014.c b/DBupgrade.Dec.31.2014.c
deleted file mode 100644
index 05513a4..0000000
--- a/DBupgrade.Dec.31.2014.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
-/*******************************************************************************************
- *
- *  Interim code: upgrade previous db to have fpulse,rlen fields
- *
- *  Author:  Gene Myers
- *  Date  :  December 2014
- *
- ********************************************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "DB.h"
-
-#ifdef HIDE_FILES
-#define PATHSEP "/."
-#else
-#define PATHSEP "/"
-#endif
-
-typedef struct
-  { int     origin; //  Well #
-    int     beg;    //  First pulse
-    int     end;    //  Last pulse
-    int64   boff;   //  Offset (in bytes) of compressed read in 'bases' file, or offset of
-                    //    uncompressed bases in memory block
-    int64   coff;   //  Offset (in bytes) of compressed quiva streams in 'quiva' file
-    int     flags;  //  QV of read + flags above
-  } HITS_OLD;
-
-int main(int argc, char *argv[])
-{ HITS_DB    db;
-  FILE      *nxfile, *ixfile;
-  char      *pwd, *root;
-  int        i;
-
-  if (argc != 2)
-    { fprintf(stderr,"Usage: %s <path:db>\n",argv[0]);
-      exit (1);
-    }
-
-  pwd    = PathTo(argv[1]);
-  root   = Root(argv[1],".db");
-  ixfile = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r");
-  nxfile = Fopen(Catenate(pwd,PATHSEP,root,".ndx"),"w");
-  if (ixfile == NULL || nxfile == NULL)
-    exit (1);
-  free(pwd);
-  free(root);
-
-  if (fread(&db,sizeof(HITS_DB),1,ixfile) != 1)
-    SYSTEM_ERROR
-  fwrite(&db,sizeof(HITS_DB),1,nxfile);
-
-  for (i = 0; i < db.oreads; i++)
-    { HITS_OLD  orec;
-      HITS_READ nrec;
-
-      if (fread(&orec,sizeof(HITS_OLD),1,ixfile) != 1)
-        SYSTEM_ERROR
-
-      nrec.origin = orec.origin;
-      nrec.fpulse = orec.beg;
-      nrec.rlen   = orec.end-orec.beg;
-      nrec.boff   = orec.boff;
-      nrec.coff   = orec.coff;
-      nrec.flags  = orec.flags;
-
-      fwrite(&nrec,sizeof(HITS_READ),1,nxfile);
-    }
-
-  fclose(ixfile);
-  fclose(nxfile);
-
-  exit (0);
-}
diff --git a/DBupgrade.Sep.25.2014.c b/DBupgrade.Sep.25.2014.c
deleted file mode 100644
index 70bbe16..0000000
--- a/DBupgrade.Sep.25.2014.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
-/*******************************************************************************************
- *
- *  Interim code: upgrade previous db to have int's for pulse positions.
- *
- *  Author:  Gene Myers
- *  Date  :  September 2014
- *
- ********************************************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "DB.h"
-
-#ifdef HIDE_FILES
-#define PATHSEP "/."
-#else
-#define PATHSEP "/"
-#endif
-
-typedef struct
-  { int     origin; //  Well #
-    uint16  beg;    //  First pulse
-    uint16  end;    //  Last pulse
-    int64   boff;   //  Offset (in bytes) of compressed read in 'bases' file, or offset of
-                    //    uncompressed bases in memory block
-    int64   coff;   //  Offset (in bytes) of compressed quiva streams in 'quiva' file
-    int     flags;  //  QV of read + flags above
-  } HITS_OLD;
-
-typedef struct
-  { int     origin; //  Well #
-    int     beg;    //  First pulse
-    int     end;    //  Last pulse
-    int64   boff;   //  Offset (in bytes) of compressed read in 'bases' file, or offset of
-                    //    uncompressed bases in memory block
-    int64   coff;   //  Offset (in bytes) of compressed quiva streams in 'quiva' file
-    int     flags;  //  QV of read + flags above
-  } HITS_NEW;
-
-int main(int argc, char *argv[])
-{ HITS_DB    db;
-  FILE      *nxfile, *ixfile;
-  char      *pwd, *root;
-  int        i;
-
-  if (argc != 2)
-    { fprintf(stderr,"Usage: %s <path:db>\n",argv[0]);
-      exit (1);
-    }
-
-  pwd    = PathTo(argv[1]);
-  root   = Root(argv[1],".db");
-  ixfile = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r");
-  nxfile = Fopen(Catenate(pwd,PATHSEP,root,".ndx"),"w");
-  if (ixfile == NULL || nxfile == NULL)
-    exit (1);
-  free(pwd);
-  free(root);
-
-  if (fread(&db,sizeof(HITS_DB),1,ixfile) != 1)
-    SYSTEM_ERROR
-  fwrite(&db,sizeof(HITS_DB),1,nxfile);
-
-  for (i = 0; i < db.oreads; i++)
-    { HITS_OLD  orec;
-      HITS_NEW  nrec;
-
-      if (fread(&orec,sizeof(HITS_OLD),1,ixfile) != 1)
-        SYSTEM_ERROR
-
-      nrec.origin = orec.origin;
-      nrec.beg    = orec.beg;
-      nrec.end    = orec.end;
-      nrec.boff   = orec.boff;
-      nrec.coff   = orec.coff;
-      nrec.flags  = orec.flags;
-
-      fwrite(&nrec,sizeof(HITS_NEW),1,nxfile);
-    }
-
-  fclose(ixfile);
-  fclose(nxfile);
-
-  exit (0);
-}
diff --git a/DUSTupgrade.Jan.1.2015.c b/DUSTupgrade.Jan.1.2015.c
deleted file mode 100644
index c53a66f..0000000
--- a/DUSTupgrade.Jan.1.2015.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
-/*******************************************************************************************
- *
- *  Interim code: upgrade previous db to have fpulse,rlen fields
- *
- *  Author:  Gene Myers
- *  Date  :  December 2014
- *
- ********************************************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "DB.h"
-
-#ifdef HIDE_FILES
-#define PATHSEP "/."
-#else
-#define PATHSEP "/"
-#endif
-
-int main(int argc, char *argv[])
-{ FILE      *afile, *dfile;
-  FILE      *nafile, *ndfile;
-  char      *pwd, *root;
-  int        size, tracklen;
-  int        i, vint, dint;
-  int64      vlong;
-
-  if (argc != 2)
-    { fprintf(stderr,"Usage: %s <path:db>\n",argv[0]);
-      exit (1);
-    }
-
-  pwd    = PathTo(argv[1]);
-  root   = Root(argv[1],".db");
-  afile  = Fopen(Catenate(pwd,PATHSEP,root,".dust.anno"),"r");
-  dfile  = Fopen(Catenate(pwd,PATHSEP,root,".dust.data"),"r");
-  nafile = Fopen(Catenate(pwd,PATHSEP,root,".next.anno"),"w");
-  ndfile = Fopen(Catenate(pwd,PATHSEP,root,".next.data"),"w");
-  if (afile == NULL || dfile == NULL || nafile == NULL || ndfile == NULL)
-    exit (1);
-  free(pwd);
-  free(root);
-
-  if (fread(&tracklen,sizeof(int),1,afile) != 1)
-    SYSTEM_ERROR
-  fwrite(&tracklen,sizeof(int),1,nafile);
-
-  if (fread(&size,sizeof(int),1,afile) != 1)
-    SYSTEM_ERROR
-  size = 8;
-  fwrite(&size,sizeof(int),1,nafile);
-
-  for (i = 0; i <= tracklen; i++)
-    { if (fread(&vint,sizeof(int),1,afile) != 1)
-        SYSTEM_ERROR
-      vlong = vint;
-      fwrite(&vlong,sizeof(int64),1,nafile);
-    }
-
-  vint >>= 2;
-  for (i = 0; i < vint; i += 2)
-    { if (fread(&dint,sizeof(int),1,dfile) != 1)
-        SYSTEM_ERROR
-      fwrite(&dint,sizeof(int),1,ndfile);
-      if (fread(&dint,sizeof(int),1,dfile) != 1)
-        SYSTEM_ERROR
-      dint += 1;
-      fwrite(&dint,sizeof(int),1,ndfile);
-    }
-
-  fclose(nafile);
-  fclose(ndfile);
-  fclose(afile);
-  fclose(dfile);
-
-  exit (0);
-}
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..9aa819c
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,34 @@
+
+  Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                
+                                                                                     
+  Redistribution and use in source and binary forms, with or without modification,   
+  are permitted provided that the following conditions are met:                      
+                                                                                     
+   · Redistributions of source code must retain the above copyright notice, this     
+     list of conditions and the following disclaimer.                                
+                                                                                     
+   · Redistributions in binary form must reproduce the above copyright notice, this  
+     list of conditions and the following disclaimer in the documentation and/or     
+     other materials provided with the distribution.                                 
+                                                                                     
+   · The name of EWM may not be used to endorse or promote products derived from     
+     this software without specific prior written permission.                        
+                                                                                     
+  THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    
+  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       
+  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   
+  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
+  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  
+  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  
+  IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      
+                                                                                     
+  For any issues regarding this software and its use, contact EWM at:                
+                                                                                     
+    Eugene W. Myers Jr.                                                              
+    Bautzner Str. 122e                                                               
+    01099 Dresden                                                                    
+    GERMANY                                                                          
+    Email: gene.myers at gmail.com                                                      
+
diff --git a/Makefile b/Makefile
index d42eeae..cf0afb8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,9 @@
-CFLAGS = -O3 -Wall -Wextra -fno-strict-aliasing
+DEST_DIR = ~/bin
+
+CFLAGS = -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing
 
 ALL = fasta2DB DB2fasta quiva2DB DB2quiva DBsplit DBdust Catrack DBshow DBstats DBrm simulator \
-      fasta2DAM DAM2fasta
+      fasta2DAM DAM2fasta DBdump rangen
 
 all: $(ALL)
 
@@ -12,7 +14,7 @@ DB2fasta: DB2fasta.c DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o DB2fasta DB2fasta.c DB.c QV.c -lm
 
 quiva2DB: quiva2DB.c DB.c DB.h QV.c QV.h
-	gcc $(CFLAGS) -o quiva2DB quiva2DB.c DB.c QV.c -lm
+	gcc $(CFLAGS) -DINTERACTIVE -o quiva2DB quiva2DB.c DB.c QV.c -lm
 
 DB2quiva: DB2quiva.c DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o DB2quiva DB2quiva.c DB.c QV.c -lm
@@ -29,6 +31,9 @@ Catrack: Catrack.c DB.c DB.h QV.c QV.h
 DBshow: DBshow.c DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o DBshow DBshow.c DB.c QV.c -lm
 
+DBdump: DBdump.c DB.c DB.h QV.c QV.h
+	gcc $(CFLAGS) -o DBdump DBdump.c DB.c QV.c -lm
+
 DBstats: DBstats.c DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o DBstats DBstats.c DB.c QV.c -lm
 
@@ -38,30 +43,23 @@ DBrm: DBrm.c DB.c DB.h QV.c QV.h
 simulator: simulator.c DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o simulator simulator.c DB.c QV.c -lm
 
+rangen: rangen.c
+	gcc $(CFLAGS) -o rangen rangen.c
+
 fasta2DAM: fasta2DAM.c DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o fasta2DAM fasta2DAM.c DB.c QV.c -lm
 
 DAM2fasta: DAM2fasta.c DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o DAM2fasta DAM2fasta.c DB.c QV.c -lm
 
-DBupgrade.Sep.25.2014: DBupgrade.Sep.25.2014.c DB.c DB.h QV.c QV.h
-	gcc $(CFLAGS) -o DBupgrade.Sep.25.2014 DBupgrade.Sep.25.2014.c DB.c QV.c -lm
-
-DBupgrade.Dec.31.2014: DBupgrade.Dec.31.2014.c DB.c DB.h QV.c QV.h
-	gcc $(CFLAGS) -o DBupgrade.Dec.31.2014 DBupgrade.Dec.31.2014.c DB.c QV.c -lm
-
-DUSTupgrade.Jan.1.2015: DUSTupgrade.Jan.1.2015.c DB.c DB.h QV.c QV.h
-	gcc $(CFLAGS) -o DUSTupgrade.Jan.1.2015 DUSTupgrade.Jan.1.2015.c DB.c QV.c -lm
-
 clean:
 	rm -f $(ALL)
 	rm -fr *.dSYM
-	rm -f DBupgrade.Sep.25.2014 DBupgrade.Dec.31.2014 DUSTupgrade.Jan.1.2015
 	rm -f dazz.db.tar.gz
 
 install:
-	cp $(ALL) ~/bin
+	cp $(ALL) $(DEST_DIR)
 
 package:
 	make clean
-	tar -zcf dazz.db.tar.gz README Makefile *.h *.c
+	tar -zcf dazz.db.tar.gz README.md Makefile *.h *.c
diff --git a/QV.c b/QV.c
index 38f6db4..cdb6a63 100644
--- a/QV.c
+++ b/QV.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Compressor/decompressor for .quiv files: customized Huffman codes for each stream based on
@@ -775,6 +738,12 @@ static int    Nline;         //  Referred by:  QVcoding_Scan
 char *QVentry()
 { return (Read); }
 
+void Set_QV_Line(int line)
+{ Nline = line; }
+
+int Get_QV_Line()
+{ return (Nline); }
+
 //  If nlines == 1 trying to read a single header, nlines = 5 trying to read 5 QV/fasta lines
 //    for a sequence.  Place line j at Read+j*Rmax and the length of every line is returned
 //    unless eof occurs in which case return -1.  If any error occurs return -2.
@@ -884,8 +853,9 @@ static void Unpack_Tag(char *tags, int clen, char *qvs, int rlen, int rchar)
  *
  ********************************************************************************************/
 
-  // Read .quiva file from input, recording stats in the histograms.  If zero is set then
-  //   start the stats anew with this file.
+  // Read up to the next num entries or until eof from the .quiva file on input and record
+  //   frequency statistics.  Copy these entries to the temporary file temp if != NULL.
+  //   If there is an error then -1 is returned, otherwise the number of entries read.
 
 static uint64   delHist[256], insHist[256], mrgHist[256], subHist[256], delRun[256], subRun[256];
 static uint64   totChar;
@@ -893,9 +863,10 @@ static int      delChar, subChar;
 
   // Referred by:  QVcoding_Scan, Create_QVcoding
 
-int QVcoding_Scan(FILE *input)
+int QVcoding_Scan(FILE *input, int num, FILE *temp)
 { char *slash;
   int   rlen;
+  int   i, r;
 
   //  Zero histograms
 
@@ -904,11 +875,8 @@ int QVcoding_Scan(FILE *input)
   bzero(insHist,sizeof(uint64)*256);
   bzero(subHist,sizeof(uint64)*256);
 
-  { int i;
-
-    for (i = 0; i < 256; i++)
-      delRun[i] = subRun[i] = 1;
-  }
+  for (i = 0; i < 256; i++)
+    delRun[i] = subRun[i] = 1;
 
   totChar    = 0;
   delChar    = -1;
@@ -917,37 +885,48 @@ int QVcoding_Scan(FILE *input)
   //  Make a sweep through the .quiva entries, histogramming the relevant things
   //    and figuring out the run chars for the deletion and substition streams
 
-  Nline = 0;
-  while (1)
+  r = 0;
+  for (i = 0; i < num; i++)
     { int well, beg, end, qv;
 
       rlen = Read_Lines(input,1);
       if (rlen == -2)
-        EXIT(1);
+        EXIT(-1);
       if (rlen < 0)
         break;
 
       if (rlen == 0 || Read[0] != '@')
-        { EPRINTF(EPLACE,"Line %d: Header in quiv file is missing\n",Nline);
-          EXIT(1);
+        { EPRINTF(EPLACE,"Line %d: Header in quiva file is missing\n",Nline);
+          EXIT(-1);
         }
       slash = index(Read+1,'/');
       if (slash == NULL)
   	{ EPRINTF(EPLACE,"%s: Line %d: Header line incorrectly formatted ?\n",
                          Prog_Name,Nline);
-          EXIT(1);
+          EXIT(-1);
         }
       if (sscanf(slash+1,"%d/%d_%d RQ=0.%d\n",&well,&beg,&end,&qv) != 4)
         { EPRINTF(EPLACE,"%s: Line %d: Header line incorrectly formatted ?\n",
                          Prog_Name,Nline);
-          EXIT(1);
+          EXIT(-1);
         }
 
+      if (temp != NULL)
+        fputs(Read,temp);
+
       rlen = Read_Lines(input,5);
       if (rlen < 0)
         { if (rlen == -1)
             EPRINTF(EPLACE,"Line %d: incomplete last entry of .quiv file\n",Nline);
-          EXIT(1);
+          EXIT(-1);
+        }
+
+      if (temp != NULL)
+        { fputs(Read,temp);
+          fputs(Read+Rmax,temp);
+          fputs(Read+2*Rmax,temp);
+          fputs(Read+3*Rmax,temp);
+          fputs(Read+4*Rmax,temp);
         }
 
       Histogram_Seqs(delHist,(uint8 *) (Read),rlen);
@@ -980,9 +959,11 @@ int QVcoding_Scan(FILE *input)
         }
       if (subChar >= 0)
         Histogram_Runs( subRun,(uint8 *) (Read+4*Rmax),rlen,subChar);
+
+      r += 1;
     }
 
-  return (0);
+  return (r);
 }
 
   //   Using the statistics in the global stat tables, create the Huffman schemes and write
@@ -1312,7 +1293,7 @@ int Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int lossy
   if (rlen < 0)
     { if (rlen == -1)
         EPRINTF(EPLACE,"Line %d: incomplete last entry of .quiv file\n",Nline);
-      EXIT (1);
+      EXIT (-1);
     }
 
   if (coding->delChar < 0)
@@ -1347,7 +1328,7 @@ int Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int lossy
     Encode_Run(coding->subScheme, coding->sRunScheme, output,
                (uint8 *) (Read+4*Rmax), rlen, coding->subChar);
 
-  return (0);
+  return (rlen);
 }
 
 int Uncompress_Next_QVentry(FILE *input, char **entry, QVcoding *coding, int rlen)
diff --git a/QV.h b/QV.h
index 35fbadc..532b2f4 100644
--- a/QV.h
+++ b/QV.h
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Compressor/decompressor for .quiv files: customized Huffman codes for each stream based on
@@ -50,6 +13,8 @@
 
 #ifndef _QV_COMPRESSOR
 
+#include <stdio.h>
+
 #define _QV_COMPRESSOR
 
   //  The defined constant INTERACTIVE (set in DB.h) determines whether an interactive or
@@ -83,10 +48,16 @@ typedef struct
 int       Read_Lines(FILE *input, int nlines);
 char     *QVentry();
 
-  // Read the .quiva file on input and record frequency statistics.  If there is an error
-  //  then 1 is returned, otherwise 0.
+  // Get and set the line counter for error reporting
+
+void      Set_QV_Line(int line);
+int       Get_QV_Line();
 
-int       QVcoding_Scan(FILE *input);
+  // Read up to the next num entries or until eof from the .quiva file on input and record
+  //   frequency statistics.  Copy these entries to the temporary file temp if != NULL.
+  //   If there is an error then -1 is returned, otherwise the number of entries read.
+
+int       QVcoding_Scan(FILE *input, int num, FILE *temp);
 
   // Given QVcoding_Scan has been called at least once, create an encoding scheme based on
   //   the accumulated statistics and return a pointer to it.  The returned encoding object
@@ -108,8 +79,8 @@ void      Free_QVcoding(QVcoding *coding);
 
   //  Assuming the file pointer is positioned just beyond an entry header line, read the
   //    next set of 5 QV lines, compress them according to 'coding', and output.  If lossy
-  //    is set then the scheme is a lossy one.  A non-zero value is return only if an
-  //    error occured.
+  //    is set then the scheme is a lossy one.  A negative value is returned if an error
+  //    occurred, and the sequence length otherwise.
 
 int      Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int lossy);
 
diff --git a/README b/README
deleted file mode 100644
index e6308e8..0000000
--- a/README
+++ /dev/null
@@ -1,442 +0,0 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
-/************************************************************************************\
-
-UPGRADE & DEVELOPER NOTES ! ! !
-
-  If you have already built a big database and don't want to rebuild it, but do want
-to use a more recent version of the software that entails a change to the data
-structures (currently the updates on Sept 25, 2014 and December 31, 2014), please note
-the routines DBupgrade.Sep.25.2014 and DBupgrade.Dec.31.2014.  These take a DB, say X,
-as an argument, and produce a file .X.ndx which you should then replace .X.idx with.
-To update a very old DB to today's version you will need to run both in sequence.
-
-  Both of the upgrade programs can be made with "make" but are not by default created
-when make is called without an argument.
-
-  For those interested in the details, on September 25, the "beg" and "end" fields went
-from shorts to ints, and on December 31, the "beg" and "end" fields became "fpulse" and
-"rlen", respectively where fpulse = beg and rlen = end-beg.
-
-  Unfortunately, the .dust track formats also changed on Dec.31.2014 and Jan.1.2015.  To
-upgrade said use DUSTupgrade.Jan.1.2015.  This program takes a DB, say X as an argument
-and produces .X.next.anno and .X.next.data which you should then replace .X.dust.* with.
-Of course, it may, if the DB is not too big, be easier and simply to just rerun DBdust.
-
-  Developers should also note carefully that the calling conventions to Open_DB have
-changed and there are new utility routines Number_Digits and Check_Track.
-
-\************************************************************************************/
-
-
-                The Dazzler Database Library
-
-                            Author:  Gene Myers
-                            First:   July 17, 2013
-                            Current: December 31, 2014
-
-  To facilitate the multiple phases of the dazzler assembler, we organize all the read
-data into what is effectively a "database" of the reads and their meta-information.
-The design goals for this data base are as follows:
-
-(1) The database stores the source Pacbio read information in such a way that it can
-       recreate the original input data, thus permitting a user to remove the
-       (effectively redundant) source files.  This avoids duplicating the same data,
-       once in the source file and once in the database.
-
-(2) The data base can be built up incrementally, that is new sequence data can be added
-       to the data base over time.
-
-(3) The data base flexibly allows one to store any meta-data desired for reads.  This
-       is accomplished with the concept of *tracks* that implementors can add as they
-       need them.
-
-(4) The data is held in a compressed form equivalent to the .dexta and .dexqv files of
-       the data extraction module.  Both the .fasta and .quiva information for each
-       read is held in the data base and can be recreated from it.  The .quiva
-       information can be added separately and later on if desired.
-
-(5) To facilitate job parallel, cluster operation of the phases of our assembler, the
-       data base has a concept of a *current partitioning* in which all the reads that
-       are over a given length and optionally unique to a well, are divided up into
-       *blocks* containing roughly a given number of bases, except possibly the last
-       block which may have a short count.  Often programs con be run on blocks or
-       pairs of blocks and each such job is reasonably well balanced as the blocks are
-       all the same size.  One must be careful about changing the partition during an
-       assembly as doing so can void the structural validity of any interim
-       block-based results.
-
-  A Dazzler DB consists of one named, *visible* file, e.g. FOO.db, and several
-*invisible* secondary files encoding various elements of the DB.  The secondary files
-are "invisible" to the UNIX OS in the sense that they begin with a "." and hence are
-not listed by "ls" unless one specifies the -a flag.  We chose to do this so that when
-a user lists the contents of a directory they just see a single name, e.g. FOO.db, that
-is the one used to refer to the DB in commands.  The files associated with a database
-named, say FOO,  are as follows:
-
-(a) "FOO.db": a text file containing
-                 (i)  the list of input files added to the database so far, and
-                 (ii) how to partition the database into blocks (if the partition
-                       parameters have been set).
-
-(b) ".FOO.idx": a binary "index" of all the meta-data about each read allowing, for
-                  example, one to randomly access a read's sequence (in the store
-                  ".FOO.bps").  It is 28N + 88 bytes in size where N is the number of
-                  reads in the database.
-
-(c) ".FOO.bps": a binary compressed "store" of all the DNA sequences.  It is M/4 bytes
-                  in size where M is the total number of base pairs in the database.
-
-(d) ".FOO.qvs": a binary compressed "store" of the 5 Pacbio quality value streams for
-                  the reads.  Its size is roughly 5/3M bytes depending on the
-                  compression acheived.  This file only exists if .quiva files have
-                  been added to the database.
-
-(e) ".FOO.<track>.anno": a *track* containing customized meta-data for each read.  For
-    ".FOO.<track>.data"  example, the DBdust command annotates low complexity intervals
-                         of reads and records the intervals for each read in two files
-                         .FOO.dust.anno & .FOO.dust.data.  Any kind of information
-                         about a read can be recorded, such as micro-sats, repeat
-                         intervals, corrected sequence, etc.  Specific tracks will be
-                         described as modules that produce them are released.
-
-If one does not like the convention of the secondary files being invisible, then
-un-defining the constant HIDE_FILES in DB.h before compiling the library, creates
-commands that do not place a prefixing "." before secondary file names, e.g. FOO.idx
-instead of .FOO.idx.  One then sees all the files realizing a DB when listing the
-contents of a directory with ls.
-
-  While a Dazzler DB holds a collection of Pacbio reads, a Dazzler map DB or DAM holds
-a collection of contigs from a reference genome assembly.  This special type of DB has
-been introduced in order to facilitate the mapping of reads to an assembly and has
-been given the suffix .dam to distinguish it from an ordinary DB.  It is structurally
-identical to a .db except:
-
-(a) there is no concept of quality values, and hence no .FOO.qvs file.
-
-(b) every .fasta scaffold (a sequence with runs of N's between contigs estimating the
-    length of the gap) is broken into a separate contig sequence in the DB and the
-    header for each scaffold is retained in a new .FOO.hdr file.
-
-(c) the original and first and last pulse fields in the meta-data records held in
-    .FOO.idx, hold instead the contig number and the interval of the contig within
-    its original scaffold sequence.
-
-A map DB can equally well be the argument of many of the commands below that operate
-on normal DBs.  In general, a .dam can be an argument anywhere a .db can, with the
-exception of routines or optioned calls to routines that involve quality values, or
-the special routines fasta2DAM and DAM2fasta that create a DAM and reverse said,
-just like the pair fasta2DB and DB2fasta do for a normal DB.  So in general when we
-refer to a database we are referring to either a DB or a DAM.
-
-  The command DBsplit sets or resets the current partition for a database which is
-determined by 3 parameters: (i) the total number of basepairs to place in each block,
-(ii) the minimum read length of reads to include within a block, and (iii) whether or
-not to only include the longest read from a given well or all reads from a well (NB:
-several reads of the same insert in a given well can be produced by the Pacbio
-instrument).  Note that the length and uniqueness parameters effectively select a
-subset of the reads that contribute to the size of a block.  We call this subset the
-*trimmed* data base.  Some commands operate on the entire database, others on the
-trimmed database, and yet others have an option flag that permits them to operate on
-either at the users discretion.  Therefore, one should note carefully to which version
-of the database a command refers to.  This is especially important for any command that
-identifies reads by their index (ordinal position) in the database.
-
-Once the database has been split into blocks, the commands DBshow, DBstats, and DBdust
-below and commands yet to come, such as the local alignment finder dalign, can take a
-block or blocks as arguments.  On the command line this is indicated by supplying the
-name of the DB followed by a period and then a block number, e.g. FOO.3.db or simply
-FOO.3, refers to the 3'rd block of DB FOO (assuming of course it has a current
-partition and said partition has a 3rd block).  One should note carefully that a block
-is a contiguous range of reads such that once it is trimmed has a given size in base
-pairs (as set by DBsplit).  Thus like an entire database, a block can be either
-untrimmed  or trimmed and one needs to again be careful when giving a read index to
-a command such as DBshow.
-
-All programs add suffixes (e.g. .db) as needed.  The commands of the database library
-are currently as follows:
-
-1. fasta2DB [-v] <path:db> ( -f<file> | <input:fasta> ... )
-
-Builds an initial data base, or adds to an existing database, the list of .fasta files
-following the database name argument, or if the -f option is used, the list of .fasta
-files in <file>.  A given .fasta file can only be added once to the DB (this is checked
-by the command).  The .fasta headers must be in the "Pacbio" format (i.e. the output
-of the Pacbio tools or our dextract program) and the well, pulse interval, and read
-quality are extracted from the header and kept with each read record.  If the files
-are being added to an existing database, and the partition settings of the DB have
-already been set (see DBsplit below), then the partitioning of the database is updated
-to include the new data.
-
-2. DB2fasta [-vU] [-w<int(80)>] <path:db>
-
-The set of .fasta files for the given DB are recreated from the DB exactly as they were
-input.  That is, this is a perfect inversion, including the reconstitution of the
-proper .fasta headers.  Because of this property, one can, if desired, delete the
-.fasta source files once they are in the DB as they can always be recreated from it.
-By default the output sequences are in lower case and 80 chars per line.  The -U option
-specifies upper case should be used, and the characters per line, or line width, can be
-set to any positive value with the -w option.
-
-3. quiva2DB [-vl] <path:db> ( -f<file> | <input:quiva> ... )
-
-Adds the given .quiva files on the command line or in the file specified by the
--f option to an existing DB "path".  The input files must be added in the same order
-as the .fasta files were and have the same root names, e.g. FOO.fasta and FOO.quiva.
-The files can be added incrementally but must be added in the same order as the .fasta
-files.  This is enforced by the program.  With the -l option set the compression
-scheme is a bit lossy to get more compression (see the description of dexqv in the
-DEXTRACTOR module).
-
-4. DB2quiva [-vU] <path:db>
-
-The set of .quiva files within the given DB are recreated from the DB exactly as they
-were input.  That is, this is a perfect inversion, including the reconstitution of the
-proper .quiva headers.  Because of this property, one can, if desired, delete the
-.quiva source files once they are in the DB as they can always be recreated from it.
-By .fastq convention each QV vector is output as a line without new-lines, and by
-default the Deletion Tag entry is in lower case letters.  The -U option specifies
-upper case letters should be used instead.
-
-5. fasta2DAM [-v] <path:dam> ( -f<file> | <input:fasta> ... )
-
-Builds a map DB or DAM from the list of .fasta files following the map database name
-argument, or if the -f option is used, the list of .fasta files in <file>.  Any .fasta
-entry that has a run of N's in it will be split into separate "contig" entries and
-the interval of the contig in the original entry recorded.  The header for each .fasta
-entry is saved with the contigs created from it.
-
-6. DAM2fasta [-vU] [-w<int(80)>] <path:dam>
-
-The set of .fasta files for the given map DB or DAM are recreated from the DAM
-exactly as they were input. That is, this is a perfect inversion, including the
-reconstitution of the proper .fasta headers and the concatenation of contigs with
-the proper number of N's between them. By default the output sequences are in lower
-case and 80 chars per line. The -U option specifies upper case should be used, and
-the characters per line, or line width, can be set to any positive value with
-the -w option.
-
-7. DBsplit [-a] [-x<int>] [-s<int(200)>] <path:db|dam>
-
-Divide the database <path>.db or <path>.dam conceptually into a series of blocks
-referable to on the command line as <path>.1, <path>.2, ...  If the -x option is set
-then all reads less than the given length are ignored, and if the -a option is not
-set then secondary reads from a given well are also ignored.  The remaining reads,
-constituting what we call the trimmed DB, are split amongst the blocks so that each
-block is of size -s * 1Mbp except for the last which necessarily contains a smaller
-residual.  The default value for -s is 200Mbp because blocks of this size can be
-compared by our "overlapper" dalign in roughly 16Gb of memory.  The blocks are very
-space efficient in that their sub-index of the master .idx is computed on the fly
-when loaded, and the .bps and .qvs files (if a .db) of base pairs and quality values,
-respectively, is shared with the master DB.  Any relevant portions of tracks
-associated with the DB are also computed on the fly when loading a database block.
-
-8. DBdust [-b] [-w<int(64)>] [-t<double(2.)>] [-m<int(10)>] <path:db|dam>
-
-Runs the symmetric DUST algorithm over the reads in the untrimmed DB <path>.db or
-<path>.dam producing a track .<path>.dust[.anno,.data] that marks all intervals of low
-complexity sequence, where the scan window is of size -w, the threshold for being a
-low-complexity interval is -t, and only perfect intervals of size greater than -m are
-recorded.  If the -b option is set then the definition of low complexity takes into
-account the frequency of a given base.  The command is incremental if given a DB to
-which new data has been added since it was last run on the DB, then it will extend
-the track to include the new reads.  It is important to set this flag for genomes with
-a strong AT/GC bias, albeit the code is a tad slower.  The dust track, if present,
-is understood and used by DBshow, DBstats, and dalign.
-
-DBdust can also be run over an untriimmed DB block in which case it outputs a track
-encoding where the trace file names contain the block number, e.g. .FOO.3.dust.anno
-and .FOO.3.dust.data, given FOO.3 on the command line.  We call this a *block track*.
-This permits job parallelism in block-sized chunks, and the resulting sequence of
-block tracks can then be merged into a track for the entire untrimmed DB with Catrack.
-
-9. Catrack [-v] <path:db|dam> <track:name>
-
-Find all block tracks of the form .<path>.#.<track>... and merge them into a single
-track, .<path>.<track>..., for the given DB or DAM.   The block track files must all
-encode the same kind of track data (this is checked), and the files must exist for
-block 1, 2, 3, ... up to the last block number.
-
-10. DBshow [-unqUQ] [-w<int(80)>] [-m<track>]+
-                    <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
-
-Displays the requested reads in the database <path>.db or <path>.dam.  By default the
-command applies to the trimmed database, but if -u is set then the entire DB is used.
-If no read arguments are given then every read in the database or database block is
-displayed.  Otherwise the input file or the list of supplied integer ranges give the
-ordinal positions in the actively loaded portion of the db.  In the case of a file, it
-should simply contain a read index, one per line.  In the other case, a read range is
-either a lone integer or the symbol $, in which case the read range consists of just
-that read (the last read in the database if $).  One may also give two positive
-integers separated by a dash to indicate a range of integers, where again a $
-represents the index of the last read in the actively loaded db.  For example,
-1 3-5 $ displays reads 1, 3, 4, 5, and the last read in the active db.  As another
-example, 1-$ displays every read in the active db (the default).
-
-By default a .fasta file of the read sequences is displayed.  If the -q option is
-set, then the QV streams are also displayed in a non-standard modification of the
-fasta format.  If the -n option is set then the DNA sequence is *not* displayed.
-If the -Q option is set then a .quiva file is displayed  and in this case the -n
-and -m options mayt not be set (and the -q and -w options have no effect).
-
-If one or more masks are set with the -m option then the track intervals are also
-displayed in an additional header line and the bases within an interval are displayed
-in the case opposite that used for all the other bases.  By default the output
-sequences are in lower case and 80 chars per line.  The -U option specifies upper
-case should be used, and the characters per line, or line width, can be set to any
-positive value with the -w option.
-
-The .fasta or .quiva files that are output can be converted into a DB by fasta2DB
-and quiva2DB (if the -q and -n options are not set and no -m options are set),
-giving one a simple way to make a DB of a subset of the reads for testing purposes.
-
-11. DBstats [-nu] [-b<int(1000)] [-m<track>]+ <path:db|dam>
-
-Show overview statistics for all the reads in the trimmed data base <path>.db or
-<path>.dam, including a histogram of read lengths where the bucket size is set
-with the -b option (default 1000).  If the -u option is given then the untrimmed
-database is summarized.  If the -n option is given then the histogran of read lengths
-is not displayed.  Any track such as a "dust" track that gives a seried of
-intervals along the read can be specified with the -m option in which case a summary
-and a histogram of the interval lengths is displayed.
-
-12. DBrm <path:db|dam> ...
-
-Delete all the files for the given data bases.  Do not use rm to remove a database, as
-there are at least two and often several secondary files for each DB including track
-files, and all of these are removed by DBrm.
-
-13. simulator <genlen:double> [-c<double(20.)>] [-b<double(.5)] [-r<int>]
-                              [-m<int(10000)>]  [-s<int(2000)>]
-                              [-x<int(4000)>]   [-e<double(.15)>]
-                              [-M<file>]
-
-In addition to the DB commands we include here, somewhat tangentially, a simple
-simulator that generates synthetic reads for a random genome.  simulator first
-generates a fake genome of size genlen*1Mb long, that has an AT-bias of -b.  It then
-generates sample reads of mean length -m from a log-normal length distribution with
-standard deviation -s, but ignores reads of length less than -x.  It collects enough
-reads to cover the genome -c times and introduces -e fraction errors into each read
-where the ratio of insertions, deletions, and substitutions are set by defined
-constants INS_RATE (default 73%) and DEL_RATE (default 20%) within generate.c.  One
-can also control the rate at which reads are picked from the forward and reverse
-strands by setting the defined constant FLIP_RATE (default 50/50).  The -r option seeds
-the random number generator for the generation of the genome so that one can
-reproducibly generate the same underlying genome to sample from.  If this parameter is
-missing, then the job id of the invocation seeds the random number generator.  The
-output is sent to the standard output (i.e. it is a UNIX pipe).  The output is in
-Pacbio .fasta format suitable as input to fasta2DB.  Finally, the -M option requests
-that the coordinates from which each read has been sampled are written to the indicated
-file, one line per read, ASCII encoded.  This "map" file essentially tells one where
-every read belongs in an assembly and is very useful for debugging and testing
-purposes.  If a read pair is say b,e then if b < e the read was sampled from [b,e] in
-the forward direction, and if b > e from [e,b] in the reverse direction.
-
-Example:
-
-     A small complete example of most of the commands above. 
-
-> simulator 1.0 -c20. >G.fasta  //  Generate a 20x data sets of a 1Mb genome
-> fasta2DB G G.fasta            //  Create a compressed data base of the reads, G.db
-> rm G.fasta                    //  Redundant, recreate any time with "DB2fasta G"
-> DBsplit -s11 G                //  Split G into 2 parts of size ~ 11MB each
-> DBdust G.1                    //  Produce a "dust" track on each part
-> DBdust G.2
-> Catrack G dust                //  Create one track for all of the DB
-> rm .G.*.dust.*                //  Clean up the sub-tracks
-> DBstats -mdust G              //  Take a look at the statistics for the database
-
-Statistics for all reads in the data set
-
-          1,836 reads        out of           1,836  (100.0%)
-     20,007,090 base pairs   out of      20,007,090  (100.0%)
-
-         10,897 average read length
-          2,192 standard deviation
-
-  Base composition: 0.250(A) 0.250(C) 0.250(G) 0.250(T)
-
-  Distribution of Read Lengths (Bin size = 1,000)
-
-        Bin:      Count  % Reads  % Bases     Average
-     22,000:          1      0.1      0.1       22654
-     21,000:          0      0.1      0.1       22654
-     20,000:          1      0.1      0.2       21355
-     19,000:          0      0.1      0.2       21355
-     18,000:          4      0.3      0.6       19489
-     17,000:          8      0.8      1.3       18374
-     16,000:         19      1.8      2.8       17231
-     15,000:         43      4.1      6.2       16253
-     14,000:         81      8.6     12.0       15341
-     13,000:        146     16.5     21.9       14428
-     12,000:        200     27.4     34.4       13664
-     11,000:        315     44.6     52.4       12824
-     10,000:        357     64.0     71.2       12126
-      9,000:        306     80.7     85.8       11586
-      8,000:        211     92.2     94.8       11208
-      7,000:         95     97.3     98.4       11017
-      6,000:         43     99.7     99.8       10914
-      5,000:          6    100.0    100.0       10897
-
-
-Statistics for dust-track
-
-  There are 158 intervals totaling 1,820 bases (0.0% of all data)
-
-  Distribution of dust intervals (Bin size = 1,000)
-
-        Bin:      Count  % Intervals  % Bases     Average
-          0:        158        100.0    100.0          11
-
-> ls -al
-total 66518744
-drwxr-xr-x+ 177 myersg  staff        6018 Mar  2 13:28 .
-drwxr-xr-x+  20 myersg  staff         680 Feb 26 19:52 ..
--rw-r--r--+   1 myersg  staff     5002464 Mar  2 13:28 .G.bps
--rw-r--r--+   1 myersg  staff       14704 Mar  2 13:28 .G.dust.anno
--rw-r--r--+   1 myersg  staff        1264 Mar  2 13:28 .G.dust.data
--rw-r--r--+   1 myersg  staff       73552 Mar  2 13:28 .G.idx
--rw-r--r--+   1 myersg  staff         162 Mar  2 13:28 G.db
-> cat G.db
-files =         1
-       1836 G Sim
-blocks =         2
-size =        11 cutoff =         0 all = 0
-         0         0
-      1011      1011
-      1836      1836
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..515de32
--- /dev/null
+++ b/README.md
@@ -0,0 +1,501 @@
+# The Dazzler Database Library
+
+## _Author:  Gene Myers_
+## _First:   July 17, 2013_
+
+For typeset documentation, examples of use, and design philosophy please go to
+my [blog](https://dazzlerblog.wordpress.com/command-guides/dazz_db-command-guide).
+
+To facilitate the multiple phases of the dazzler assembler, we organize all the read
+data into what is effectively a "database" of the reads and their meta-information.
+The design goals for this data base are as follows:
+
+1. The database stores the source Pacbio read information in such a way that it can
+recreate the original input data, thus permitting a user to remove the
+(effectively redundant) source files.  This avoids duplicating the same data,
+once in the source file and once in the database.
+
+2. The data base can be built up incrementally, that is new sequence data can be added
+to the data base over time.
+
+3. The data base flexibly allows one to store any meta-data desired for reads.  This
+is accomplished with the concept of *tracks* that implementors can add as they
+need them.
+
+4. The data is held in a compressed form equivalent to the .dexta and .dexqv files of
+the data extraction module.  Both the .fasta and .quiva information for each
+read is held in the data base and can be recreated from it.  The .quiva
+information can be added separately and later on if desired.
+
+5. To facilitate job parallel, cluster operation of the phases of our assembler, the
+data base has a concept of a *current partitioning* in which all the reads that
+are over a given length and optionally unique to a well, are divided up into
+*blocks* containing roughly a given number of bases, except possibly the last
+block which may have a short count.  Often programs con be run on blocks or
+pairs of blocks and each such job is reasonably well balanced as the blocks are
+all the same size.  One must be careful about changing the partition during an
+assembly as doing so can void the structural validity of any interim
+block-based results.
+
+A Dazzler DB consists of one named, *visible* file, e.g. FOO.db, and several
+*invisible* secondary files encoding various elements of the DB.  The secondary files
+are "invisible" to the UNIX OS in the sense that they begin with a "." and hence are
+not listed by "ls" unless one specifies the -a flag.  We chose to do this so that when
+a user lists the contents of a directory they just see a single name, e.g. FOO.db, that
+is the one used to refer to the DB in commands.  The files associated with a database
+named, say FOO,  are as follows:
+
+* "FOO.db": a text file containing
+  1. the list of input files added to the database so far, and
+  2. how to partition the database into blocks (if the partition
+     parameters have been set).
+
+* ".FOO.idx": a binary "index" of all the meta-data about each read allowing, for
+  example, one to randomly access a read's sequence (in the store
+  ".FOO.bps").  It is 28N + 88 bytes in size where N is the number of
+  reads in the database.
+
+* ".FOO.bps": a binary compressed "store" of all the DNA sequences.  It is M/4 bytes
+  in size where M is the total number of base pairs in the database.
+
+* ".FOO.qvs": a binary compressed "store" of the 5 Pacbio quality value streams for
+  the reads.  Its size is roughly 5/3M bytes depending on the
+  compression acheived.  This file only exists if .quiva files have
+  been added to the database.
+
+* ".FOO.\<track\>.[anno,data]": a *track* containing customized meta-data for each read.  For
+  example, the DBdust command annotates low complexity intervals
+  of reads and records the intervals for each read in two files
+  .FOO.dust.anno & .FOO.dust.data.  Any kind of information
+  about a read can be recorded, such as micro-sats, repeat
+  intervals, corrected sequence, etc.  Specific tracks will be
+  described as modules that produce them are released.
+
+If one does not like the convention of the secondary files being invisible, then
+un-defining the constant HIDE_FILES in DB.h before compiling the library, creates
+commands that do not place a prefixing "." before secondary file names, e.g. FOO.idx
+instead of .FOO.idx.  One then sees all the files realizing a DB when listing the
+contents of a directory with ls.
+
+While a Dazzler DB holds a collection of Pacbio reads, a Dazzler map DB or DAM holds
+a collection of contigs from a reference genome assembly.  This special type of DB has
+been introduced in order to facilitate the mapping of reads to an assembly and has
+been given the suffix .dam to distinguish it from an ordinary DB.  It is structurally
+identical to a .db except:
+
+* there is no concept of quality values, and hence no .FOO.qvs file.
+
+* every .fasta scaffold (a sequence with runs of N's between contigs estimating the
+  length of the gap) is broken into a separate contig sequence in the DB and the
+  header for each scaffold is retained in a new .FOO.hdr file.
+
+* the original and first and last pulse fields in the meta-data records held in
+  .FOO.idx, hold instead the contig number and the interval of the contig within
+  its original scaffold sequence.
+
+A map DB can equally well be the argument of many of the commands below that operate
+on normal DBs.  In general, a .dam can be an argument anywhere a .db can, with the
+exception of routines or optioned calls to routines that involve quality values, or
+the special routines fasta2DAM and DAM2fasta that create a DAM and reverse said,
+just like the pair fasta2DB and DB2fasta do for a normal DB.  So in general when we
+refer to a database we are referring to either a DB or a DAM.
+
+The command DBsplit sets or resets the current partition for a database which is
+determined by 3 parameters: (i) the total number of basepairs to place in each block,
+(ii) the minimum read length of reads to include within a block, and (iii) whether or
+not to only include the longest read from a given well or all reads from a well (NB:
+several reads of the same insert in a given well can be produced by the Pacbio
+instrument).  Note that the length and uniqueness parameters effectively select a
+subset of the reads that contribute to the size of a block.  We call this subset the
+*trimmed* data base.  Some commands operate on the entire database, others on the
+trimmed database, and yet others have an option flag that permits them to operate on
+either at the users discretion.  Therefore, one should note carefully to which version
+of the database a command refers to.  This is especially important for any command that
+identifies reads by their index (ordinal position) in the database.
+
+Once the database has been split into blocks, the commands DBshow, DBstats, and DBdust
+below and commands yet to come, such as the local alignment finder dalign, can take a
+block or blocks as arguments.  On the command line this is indicated by supplying the
+name of the DB followed by a period and then a block number, e.g. FOO.3.db or simply
+FOO.3, refers to the 3'rd block of DB FOO (assuming of course it has a current
+partition and said partition has a 3rd block).  One should note carefully that a block
+is a contiguous range of reads such that once it is trimmed has a given size in base
+pairs (as set by DBsplit).  Thus like an entire database, a block can be either
+untrimmed  or trimmed and one needs to again be careful when giving a read index to
+a command such as DBshow.
+
+All programs add suffixes (e.g. .db) as needed.  The commands of the database library
+are currently as follows:
+
+```
+1. fasta2DB [-v] <path:db> ( -f<file> | -i[<name>] | <input:fasta> ... )
+```
+
+Builds an initial data base, or adds to an existing database, either (a) the list of
+.fasta files following the database name argument, or (b) the list of .fasta files in
+\<file\> if the -f option is used, or (c) entries piped from the standard input if the
+-i option is used.  If a faux file name, \<name\>, follows the -i option then all the
+input received is considered to have come from a file by the name of \<name\>.fasta by
+DB2fasta, otherwise it will be sent to the standard output by DB2fasta.  The SMRT cells
+in a given named input (i.e. all sources other than -i without a name) can only be
+added consecutively to the DB (this is checked by the command). The .fasta headers must
+be in the "Pacbio" format (i.e. the output of the Pacbio tools or our dextract program)
+and the well, pulse interval, and read quality are extracted from the header and kept
+with each read record. If the files are being added to an existing database, and the
+partition settings of the DB have already been set (see DBsplit below), then the
+partitioning of the database is updated to include the new data.  A file may contain
+the data from multiple SMRT cells provided the reads for each SMRT cell are consecutive
+in the file.
+
+```
+2. DB2fasta [-vU] [-w<int(80)>] <path:db>
+```
+
+The set of .fasta files for the given DB are recreated from the DB exactly as they were
+input.  That is, this is a perfect inversion, including the reconstitution of the
+proper .fasta headers.  Because of this property, one can, if desired, delete the
+.fasta source files once they are in the DB as they can always be recreated from it.
+Entries imported from the standard input will be place in the faux file name given on
+import, or to the standard output if no name was given. 
+By default the output sequences are in lower case and 80 chars per line.  The -U option
+specifies upper case should be used, and the characters per line, or line width, can be
+set to any positive value with the -w option.
+
+```
+3. quiva2DB [-vl] <path:db> ( -f<file> | -i | <input:quiva> ... )
+```
+
+Adds .quiva streams to an existing DB "path".  The data comes from (a) the given .quiva
+files on the command line, or (b) those in the file specified by the -f option, or
+(c) the standard input if the -i option is given. The input files must be added in the
+same order as the .fasta files were and have the same root names, e.g. FOO.fasta and
+FOO.quiva. The files can be added incrementally but must be added in the same order as
+their corresponding .fasta files. This is enforced by the program. With the -l option
+set the compression scheme is a bit lossy to get more compression (see the description
+of dexqv in the DEXTRACTOR module here).
+
+```
+4. DB2quiva [-vU] <path:db>
+```
+
+The set of .quiva files within the given DB are recreated from the DB exactly as they
+were input.  That is, this is a perfect inversion, including the reconstitution of the
+proper .quiva headers.  Because of this property, one can, if desired, delete the
+.quiva source files once they are in the DB as they can always be recreated from it.
+Entries imported from the standard input will be place in the faux file name given on
+import, or to the standard output if no name was given. 
+By .fastq convention each QV vector is output as a line without new-lines, and by
+default the Deletion Tag entry is in lower case letters.  The -U option specifies
+upper case letters should be used instead.
+
+```
+5. fasta2DAM [-v] <path:dam> ( -f<file> | -i[<name>] | <input:fasta> ... )
+```
+
+Builds an initial map DB or DAM, or adds to an existing DAM, either (a) the list of
+.fasta files following the database name argument, or (b) the list of .fasta files in
+\<file\> if the -f option is used, or (c) entries piped from the standard input if the -i
+option is used.  If a faux file name, \<name\>, follows the -i option then all the input
+received is considered to have come from a file by the name of \<name\>.fasta by
+DB2fasta, otherwise it will be sent to the standard output by DB2fasta.  Any .fasta
+entry that has a run of N's in it will be split into separate "contig" entries and the
+interval of the contig in the original entry recorded. The header for each .fasta entry
+is saved with the contigs created from it.
+
+```
+6. DAM2fasta [-vU] [-w<int(80)>] <path:dam>
+```
+
+The set of .fasta files for the given map DB or DAM are recreated from the DAM
+exactly as they were input. That is, this is a perfect inversion, including the
+reconstitution of the proper .fasta headers and the concatenation of contigs with
+the proper number of N's between them to recreate scaffolds.
+Entries imported from the standard input will be place in the faux file name given on
+import, or to the standard output if no name was given.  By default the output
+sequences are in lower case and 80 chars per line. The -U option specifies upper case
+should be used, and the characters per line, or line width, can be set to any positive
+value with the -w option.
+
+```
+7. DBsplit [-a] [-x<int>] [-s<int(200)>] <path:db|dam>
+```
+
+Divide the database \<path\>.db or \<path\>.dam conceptually into a series of blocks
+referable to on the command line as \<path\>.1, \<path\>.2, ...  If the -x option is set
+then all reads less than the given length are ignored, and if the -a option is not
+set then secondary reads from a given well are also ignored.  The remaining reads,
+constituting what we call the trimmed DB, are split amongst the blocks so that each
+block is of size -s * 1Mbp except for the last which necessarily contains a smaller
+residual.  The default value for -s is 200Mbp because blocks of this size can be
+compared by our "overlapper" dalign in roughly 16Gb of memory.  The blocks are very
+space efficient in that their sub-index of the master .idx is computed on the fly
+when loaded, and the .bps and .qvs files (if a .db) of base pairs and quality values,
+respectively, is shared with the master DB.  Any relevant portions of tracks
+associated with the DB are also computed on the fly when loading a database block.
+If the -f option is set, the split is forced regardless of whether or not the DB in
+question has previously bin split, i.e. one is not interactively asked if they wish
+to proceed.
+
+```
+8. DBdust [-b] [-w<int(64)>] [-t<double(2.)>] [-m<int(10)>] <path:db|dam>
+```
+
+Runs the symmetric DUST algorithm over the reads in the untrimmed DB \<path\>.db or
+\<path\>.dam producing a track .\<path\>.dust[.anno,.data] that marks all intervals of low
+complexity sequence, where the scan window is of size -w, the threshold for being a
+low-complexity interval is -t, and only perfect intervals of size greater than -m are
+recorded.  If the -b option is set then the definition of low complexity takes into
+account the frequency of a given base.  The command is incremental if given a DB to
+which new data has been added since it was last run on the DB, then it will extend
+the track to include the new reads.  It is important to set this flag for genomes with
+a strong AT/GC bias, albeit the code is a tad slower.  The dust track, if present,
+is understood and used by DBshow, DBstats, and dalign.
+
+DBdust can also be run over an untriimmed DB block in which case it outputs a track
+encoding where the trace file names contain the block number, e.g. .FOO.3.dust.anno
+and .FOO.3.dust.data, given FOO.3 on the command line.  We call this a *block track*.
+This permits job parallelism in block-sized chunks, and the resulting sequence of
+block tracks can then be merged into a track for the entire untrimmed DB with Catrack.
+
+```
+9. Catrack [-v] <path:db|dam> <track:name>
+```
+
+Find all block tracks of the form .\<path\>.#.\<track\>... and merge them into a single
+track, .\<path\>.\<track\>..., for the given DB or DAM.   The block track files must all
+encode the same kind of track data (this is checked), and the files must exist for
+block 1, 2, 3, ... up to the last block number.
+
+```
+10. DBshow [-unqUQ] [-w<int(80)>] [-m<track>]+
+                    <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
+```
+
+Displays the requested reads in the database \<path\>.db or \<path\>.dam.  By default the
+command applies to the trimmed database, but if -u is set then the entire DB is used.
+If no read arguments are given then every read in the database or database block is
+displayed.  Otherwise the input file or the list of supplied integer ranges give the
+ordinal positions in the actively loaded portion of the db.  In the case of a file, it
+should simply contain a read index, one per line.  In the other case, a read range is
+either a lone integer or the symbol $, in which case the read range consists of just
+that read (the last read in the database if $).  One may also give two positive
+integers separated by a dash to indicate a range of integers, where again a $
+represents the index of the last read in the actively loaded db.  For example,
+1 3-5 $ displays reads 1, 3, 4, 5, and the last read in the active db.  As another
+example, 1-$ displays every read in the active db (the default).
+
+By default a .fasta file of the read sequences is displayed.  If the -q option is
+set, then the QV streams are also displayed in a non-standard modification of the
+fasta format.  If the -n option is set then the DNA sequence is *not* displayed.
+If the -Q option is set then a .quiva file is displayed  and in this case the -n
+and -m options mayt not be set (and the -q and -w options have no effect).
+
+If one or more masks are set with the -m option then the track intervals are also
+displayed in an additional header line and the bases within an interval are displayed
+in the case opposite that used for all the other bases.  By default the output
+sequences are in lower case and 80 chars per line.  The -U option specifies upper
+case should be used, and the characters per line, or line width, can be set to any
+positive value with the -w option.
+
+The .fasta or .quiva files that are output can be converted into a DB by fasta2DB
+and quiva2DB (if the -q and -n options are not set and no -m options are set),
+giving one a simple way to make a DB of a subset of the reads for testing purposes.
+
+```
+12. DBdump [-rhsiqp] [-uU] [-m<track>]+
+                     <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
+```
+
+Like DBshow, DBdump allows one to display a subset of the reads in the DB and select
+which information to show about them including any mask tracks.  The difference is
+that the information is written in a very simple "1-code" ASCII format that makes it
+easy for one to read and parse the information for further use.  -r requests that each
+read number be displayed (useful if only a subset of reads is requested).  -h prints
+the header information which is the source file name, well #, and pulse range.
+-s requests the sequence be output, -i requests that the intrinsic quality values be
+output, -q requests that the 5 quiva sequences be output, -p requests the repeat
+profile be output (if available), and -m\<track\> requests that mask \<track\> be output.
+Set -u if you want data from the untrimmed database (the default is trimmed) and
+set -U if you'd like upper-case letter used in the DNA sequence strings.
+
+The format is very simple.  Each requested piece of information occurs on a line.  The
+first character of every line is a "1-code" character that tells you what information
+to expect on the line.  The rest of the line contains information where each item is
+separated by a single blank space.  Strings are output as first an integer giving the
+length of the string, a blank space, and then the string terminated by a new-line.
+Intrinsic quality values are between 0 and 50, inclusive, and a vector of said are
+displayed as an alphabetic string where 'a' is 0, 'b' is '1', ... 'z' is 25, 'A' is
+26, 'B' is 27, ... and 'Y' is 50.  Repeat profiles are also displayed as string where
+'_' denotes 0 repetitions, and then 'a' through 'N' denote the values 1 through 40,
+respectively.  
+
+```
+    R #              - read number
+    H # string       - original file name string (header)
+    L # # #          - location: well, pulse start, pulse end
+    Tx #n (#b #e)^#n - x'th track on command line, #n intervals all on same line
+    S # string       - sequence string
+    I # string       - intrinsic quality vector (as an ASCII string)
+    P # string       - repeat profile vector (as an ASCII string)
+    d # string       - Quiva deletion values (as an ASCII string)
+    c # string       - Quiva deletion character string
+    i # string       - Quiva insertion value string
+    m # string       - Quiva merge value string
+    s # string       - Quiva substitution value string
+    + X #            - Total amount of X (X = H or S or I or P or R or M or T#)
+    @ X #            - Maximum amount of X (X = H or S or I or P or T#)
+```
+
+1-code lines that begin with + or @ are always the first lines in the output.  They
+give size information about what is contained in the output.  That is '+ X #' gives
+the number of reads (X=R), the number of masks (X=M), or the total number of
+characters in all headers (X=H), sequences (X=S), intrinsic quality vectors (X=I),
+read profile vector (X=P), or track (X=T#).  And '@ X #' gives the maximum number of
+characters in any header (X=H), sequence (X=S), intrincic quality vector (X=I), read
+profile vector (X=P), or track (X=T#).  The size numbers for the Quiva strings are
+identical to that for the sequence as they are all of the same length for any
+given entry.
+
+```
+12. DBstats [-nu] [-b<int(1000)] [-m<track>]+ <path:db|dam>
+```
+
+Show overview statistics for all the reads in the trimmed data base \<path\>.db or
+\<path\>.dam, including a histogram of read lengths where the bucket size is set
+with the -b option (default 1000).  If the -u option is given then the untrimmed
+database is summarized.  If the -n option is given then the histogran of read lengths
+is not displayed.  Any track such as a "dust" track that gives a series of
+intervals along the read can be specified with the -m option in which case a summary
+and a histogram of the interval lengths is displayed.
+
+```
+13. DBrm <path:db|dam> ...
+```
+
+Delete all the files for the given data bases.  Do not use rm to remove a database, as
+there are at least two and often several secondary files for each DB including track
+files, and all of these are removed by DBrm.
+
+```
+14.  simulator <genome:dam> [-CU] [-m<int(10000)>] [-s<int(2000)>] [-e<double(.15)]
+                                  [-c<double(50.)>] [-f<double(.5)>] [-x<int(4000)>]
+                                  [-w<int(80)>] [-r<int>] [-M<file>]
+```
+
+In addition to the DB commands we include here, somewhat tangentially, a simple
+simulator that generates synthetic reads over a given genome reference contained in a
+supplied .dam DB.  The simulator first reconstitutes the scaffolds of the reference
+genome and fills in their gaps (a run of N's in .fasta format indicating the estimate
+gap length) with a random sequence that follows the base distribution of the contigs.
+It will then sample reads from these scaffold sequences.
+
+The simulator generates sample reads of mean length -m from a log-normal length
+distribution with standard deviation -s, but ignores reads of length less than -x. It
+collects enough reads to cover the genome -c times and Introduces -e fraction errors
+into each read where the ratio of insertions, deletions, and substitutions are set by
+defined constants INS_RATE (default 73%) and DEL_RATE (default 20%) within generate.c.
+One can control the rate at which reads are picked from the forward and reverse
+strands with the -f option. The -r option seeds the random number generator for the
+generation process so that one can reproducibly generate the same dataset. If this
+parameter is missing, then the job id of the invocation seeds the random number
+generator effectively guaranteeing a different sampling with each invocation.
+
+The output is sent to the standard output (i.e. it is a UNIX pipe). The output is in
+Pacbio .fasta format suitable as input to fasta2DB. Uppercase letters are used if the
+-U option is given, and the width of each line can be controlled with the -w option.
+
+Finally, the -M option requests that the scaffold and coordinates within said scaffold
+from which each read has been sampled are written to the indicated file, one line per
+read, ASCII encoded. This "map" file essential tells one where every read belongs in
+an assembly and is very useful for debugging and testing purposes. If the map line for
+a read is say 's b e' then if b \< e the read is a perturbed copy of s[b,e] in the
+forward direction, and a perturbed copy s[e,b] in the reverse direction otherwise.
+
+```
+15. rangen <genlen:double> [-U] [-b<double(.5)>] [-w<int(80)>] [-r<int>]
+```
+
+Generate a random DNA sequence of length genlen*1Mbp that has an AT-bias of -b.
+Output the sequence to the standard output in .fasta format.  Use uppercase letters if
+-U is set and -w base pairs per line (default 80).  The result can then be converted
+into a .dam DB and given to the simulator to create a read database over a random
+synthetic sequence.  The -r option seeds the random number generator for the
+generation process so that one can reproducibly generate the same sequence. If this
+parameter is missing, then the job id of the invocation seeds the random number
+generator effectively guaranteeing a different sequence with each invocation.
+
+Example: A small complete example of most of the commands above. 
+
+```
+> rangen 1.0 >R.fasta           //  Generate a randome 1Mbp sequence R.fasta
+> fasta2DAM R R.fasta           //  Load it into a .dam DB R.dam
+> simulator R -c20. >G.fasta    //  Sample a 20x data sets of the random geneome R
+> fasta2DB G G.fasta            //  Create a compressed data base of the reads, G.db
+> rm G.fasta                    //  Redundant, recreate any time with "DB2fasta G"
+> DBsplit -s11 G                //  Split G into 2 parts of size ~ 11MB each
+> DBdust G.1                    //  Produce a "dust" track on each part
+> DBdust G.2
+> Catrack G dust                //  Create one track for all of the DB
+> rm .G.*.dust.*                //  Clean up the sub-tracks
+> DBstats -mdust G              //  Take a look at the statistics for the database
+
+Statistics for all reads in the data set
+
+          1,836 reads        out of           1,836  (100.0%)
+     20,007,090 base pairs   out of      20,007,090  (100.0%)
+
+         10,897 average read length
+          2,192 standard deviation
+
+  Base composition: 0.250(A) 0.250(C) 0.250(G) 0.250(T)
+
+  Distribution of Read Lengths (Bin size = 1,000)
+
+        Bin:      Count  % Reads  % Bases     Average
+     22,000:          1      0.1      0.1       22654
+     21,000:          0      0.1      0.1       22654
+     20,000:          1      0.1      0.2       21355
+     19,000:          0      0.1      0.2       21355
+     18,000:          4      0.3      0.6       19489
+     17,000:          8      0.8      1.3       18374
+     16,000:         19      1.8      2.8       17231
+     15,000:         43      4.1      6.2       16253
+     14,000:         81      8.6     12.0       15341
+     13,000:        146     16.5     21.9       14428
+     12,000:        200     27.4     34.4       13664
+     11,000:        315     44.6     52.4       12824
+     10,000:        357     64.0     71.2       12126
+      9,000:        306     80.7     85.8       11586
+      8,000:        211     92.2     94.8       11208
+      7,000:         95     97.3     98.4       11017
+      6,000:         43     99.7     99.8       10914
+      5,000:          6    100.0    100.0       10897
+
+
+Statistics for dust-track
+
+  There are 158 intervals totaling 1,820 bases (0.0% of all data)
+
+  Distribution of dust intervals (Bin size = 1,000)
+
+        Bin:      Count  % Intervals  % Bases     Average
+          0:        158        100.0    100.0          11
+
+> ls -al
+total 66518744
+drwxr-xr-x+ 177 myersg  staff        6018 Mar  2 13:28 .
+drwxr-xr-x+  20 myersg  staff         680 Feb 26 19:52 ..
+-rw-r--r--+   1 myersg  staff     5002464 Mar  2 13:28 .G.bps
+-rw-r--r--+   1 myersg  staff       14704 Mar  2 13:28 .G.dust.anno
+-rw-r--r--+   1 myersg  staff        1264 Mar  2 13:28 .G.dust.data
+-rw-r--r--+   1 myersg  staff       73552 Mar  2 13:28 .G.idx
+-rw-r--r--+   1 myersg  staff         162 Mar  2 13:28 G.db
+> cat G.db
+files =         1
+       1836 G Sim
+blocks =         2
+size =        11 cutoff =         0 all = 0
+         0         0
+      1011      1011
+      1836      1836
+```
diff --git a/fasta2DAM.c b/fasta2DAM.c
index d7f75ec..15e074a 100644
--- a/fasta2DAM.c
+++ b/fasta2DAM.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Add .fasta files to a DB:
@@ -72,7 +35,7 @@
 #define PATHSEP "/"
 #endif
 
-static char *Usage = "[-v] <path:string> ( -f<file> | <input:fasta> ... )";
+static char *Usage = "[-v] <path:dam> ( -f<file> | -i[<name>] | <input:fasta> ... )";
 
 static char number[128] =
     { 0, 0, 0, 0, 0, 0, 0, 0,
@@ -105,6 +68,8 @@ File_Iterator *init_file_iterator(int argc, char **argv, FILE *input, int first)
 { File_Iterator *it;
 
   it = Malloc(sizeof(File_Iterator),"Allocating file iterator");
+  if (it == NULL)
+    return (NULL);
   it->argc  = argc;
   it->argv  = argv;
   it->input = input;
@@ -131,12 +96,15 @@ int next_file(File_Iterator *it)
       if (fgets(nbuffer,MAX_NAME+8,it->input) == NULL)
         { if (feof(it->input))
             return (0);
-          SYSTEM_ERROR;
+          fprintf(stderr,"%s: IO error reading line %d of -f file of names\n",Prog_Name,it->count);
+          it->name = NULL;
+          return (1);
         }
       if ((eol = index(nbuffer,'\n')) == NULL)
         { fprintf(stderr,"%s: Line %d in file list is longer than %d chars!\n",
                          Prog_Name,it->count,MAX_NAME+7);
           it->name = NULL;
+          return (1);
         }
       *eol = '\0';
       it->count += 1;
@@ -147,21 +115,23 @@ int next_file(File_Iterator *it)
 
 
 int main(int argc, char *argv[])
-{ FILE  *ostub;
+{ FILE  *istub, *ostub;
   char  *dbname;
   char  *root, *pwd;
 
   FILE  *bases, *indx, *hdrs;
-  int64  boff, hoff;
+  int64  boff, ioff, hoff, noff;
 
   int    ifiles, ofiles;
   char **flist;
 
   HITS_DB db;
   int     ureads;
+  int64   offset, hdrset;
 
-  int     VERBOSE;
+  char   *PIPE;
   FILE   *IFILE;
+  int     VERBOSE;
 
   //   Process command line
 
@@ -171,6 +141,7 @@ int main(int argc, char *argv[])
     ARG_INIT("fasta2DAM")
 
     IFILE = NULL;
+    PIPE  = NULL;
 
     j = 1;
     for (i = 1; i < argc; i++)
@@ -186,6 +157,20 @@ int main(int argc, char *argv[])
                 exit (1);
               }
             break;
+          case 'i':
+            PIPE = argv[i]+2;
+            if (PIPE[0] != '\0')
+              { FILE *temp;
+
+                temp = fopen(PIPE,"w");
+                if (temp == NULL)
+                  { fprintf(stderr,"%s: Cannot create -i name '%s'\n",Prog_Name,argv[i]+2);
+                    exit (1);
+                  }
+                fclose(temp);
+                unlink(PIPE);
+              }
+            break;
         }
       else
         argv[j++] = argv[i];
@@ -193,70 +178,148 @@ int main(int argc, char *argv[])
 
     VERBOSE = flags['v'];
 
-    if ((IFILE == NULL && argc <= 2) || (IFILE != NULL && argc != 2))
+    if (IFILE != NULL && PIPE != NULL)
+      { fprintf(stderr,"%s: Cannot use both -f and -i together\n",Prog_Name);
+        exit (1);
+      }
+
+    if ( (IFILE == NULL && PIPE == NULL && argc <= 2) ||
+        ((IFILE != NULL || PIPE != NULL) && argc != 2))
       { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
         exit (1);
       }
   }
 
-  //  Try to open DB file, if present then adding to DB, otherwise creating new DB.  Set up
+  //  Try to open DAM file, if present then adding to DAM, otherwise creating new DAM.  Set up
   //  variables as follows:
   //    dbname = full name of map index = <pwd>/<root>.dam
+  //    istub  = open db file (if adding) or NULL (if creating)
   //    ostub  = new image of db file (will overwrite old image at end)
   //    bases  = .bps file positioned for appending
   //    indx   = .idx file positioned for appending
+  //    hdrs   = .hdr file positioned for appending
   //    ureads = # of reads currently in db
-  //    boff   = offset in .bps at which to place next sequence
-  //    hoff   = offset in .hdr at which to place next header prefix
+  //    offset = offset in .bps at which to place next sequence
+  //    hdrset = offset in .hdr at which to place next header
+  //    ioff   = offset in .idx file to truncate to if command fails
+  //    boff   = offset in .bps file to truncate to if command fails
+  //    hoff   = offset in .hdr file to truncate to if command fails
   //    ifiles = # of .fasta files to add
   //    ofiles = # of .fasta files added so far
-  //    flist  = [0..ifiles] list of file names (root only) added to db so far
+  //    flist  = [0..ifiles+ofiles] list of file names (root only) added to dam so far
+
+  { int i;
+
+    root   = Root(argv[1],".dam");
+    pwd    = PathTo(argv[1]);
+    dbname = Strdup(Catenate(pwd,"/",root,".dam"),"Allocating map index name");
+    if (dbname == NULL)
+      exit (1);
+
+    if (PIPE != NULL)
+      ifiles = 1;
+    else if (IFILE == NULL)
+      ifiles = argc-2;
+    else
+      { File_Iterator *ng;
+
+        ifiles = 0;
+        ng = init_file_iterator(argc,argv,IFILE,2);
+        if (ng == NULL)
+          exit (1);
+        while (next_file(ng))
+          { if (ng->name == NULL)
+              exit (1);
+            ifiles += 1;
+          }
+        free(ng);
+      }
 
-  root   = Root(argv[1],".dam");
-  pwd    = PathTo(argv[1]);
-  dbname = Strdup(Catenate(pwd,"/",root,".dam"),"Allocating map index name");
-  if (dbname == NULL)
-    exit (1);
+    bases = NULL;
+    indx  = NULL;
+    hdrs  = NULL;
+    ostub = NULL;
+    ioff  = 0;
+    boff  = 0;
+    hoff  = 0;
+
+    istub = fopen(dbname,"r");
+    if (istub == NULL)
+      { ofiles = 0;
+
+        bases = Fopen(Catenate(pwd,PATHSEP,root,".bps"),"w+");
+        indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"w+");
+        hdrs  = Fopen(Catenate(pwd,PATHSEP,root,".hdr"),"w+");
+        if (bases == NULL || indx == NULL || hdrs == NULL)
+          goto error;
 
-  if (IFILE == NULL)
-    ifiles = argc-2;
-  else
-    { File_Iterator *ng;
+        fwrite(&db,sizeof(HITS_DB),1,indx);
 
-      ifiles = 0;
-      ng = init_file_iterator(argc,argv,IFILE,2);
-      while (next_file(ng))
-        ifiles += 1;
-      free(ng);
-    }
-  ofiles = 0;
+        ureads  = 0;
+        offset  = 0;
+        hdrset  = 0;
+        boff    = 0;
+        ioff    = 0;
+        hoff    = 0;
+      }
+    else
+      { if (fscanf(istub,DB_NFILE,&ofiles) != 1)
+          { fprintf(stderr,"%s: %s.dam is corrupted, read failed 1\n",Prog_Name,root);
+            goto error;
+          }
 
-  bases = Fopen(Catenate(pwd,PATHSEP,root,".bps"),"w");
-  indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"w");
-  hdrs  = Fopen(Catenate(pwd,PATHSEP,root,".hdr"),"w");
-  if (bases == NULL || indx == NULL || hdrs == NULL)
-    exit (1);
+        bases = Fopen(Catenate(pwd,PATHSEP,root,".bps"),"r+");
+        indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r+");
+        hdrs  = Fopen(Catenate(pwd,PATHSEP,root,".hdr"),"r+");
+        if (bases == NULL || indx == NULL || hdrs == NULL)
+          goto error;
 
-  flist  = (char **) Malloc(sizeof(char *)*ifiles,"Allocating file list");
-  fwrite(&db,sizeof(HITS_DB),1,indx);
+        if (fread(&db,sizeof(HITS_DB),1,indx) != 1)
+          { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
+            goto error;
+          }
+        fseeko(bases,0,SEEK_END);
+        fseeko(indx, 0,SEEK_END);
+        fseeko(hdrs, 0,SEEK_END);
+
+        ureads = db.ureads;
+        offset = ftello(bases);
+        hdrset = ftello(hdrs);
+        boff   = offset;
+        ioff   = ftello(indx);
+        hoff   = hdrset;
+      }
 
-  ureads  = 0;
-  boff    = 0;
-  hoff    = 0;
+    flist  = (char **) Malloc(sizeof(char *)*(ofiles+ifiles),"Allocating file list");
+    ostub  = Fopen(Catenate(pwd,"/",root,".dbx"),"w+");
+    if (ostub == NULL || flist == NULL)
+      goto error;
 
-  ostub  = Fopen(dbname,"w+");
-  if (ostub == NULL)
-    exit (1);
+    fprintf(ostub,DB_NFILE,ofiles+ifiles);
+    noff = 0;
+    for (i = 0; i < ofiles; i++)
+      { int  last;
+        char prolog[MAX_NAME], fname[MAX_NAME];
 
-  fprintf(ostub,DB_NFILE,argc-2);
+        if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
+          { fprintf(stderr,"%s: %s.dam is corrupted, read failed 5(%d)\n",Prog_Name,root,i);
+            goto error;
+          }
+        if ((flist[i] = Strdup(fname,"Adding to file list")) == NULL)
+          goto error;
+        noff = ftello(ostub);
+        fprintf(ostub,DB_FDATA,last,fname,prolog);
+      }
+  }
 
   { int            maxlen;
     int64          totlen, count[4];
     int            rmax;
     HITS_READ      prec;
     char          *read;
+    int            append;
     int            c;
-    File_Iterator *ng;
+    File_Iterator *ng = NULL;
 
     //  Buffer for accumulating .fasta sequence over multiple lines
 
@@ -272,31 +335,66 @@ int main(int argc, char *argv[])
 
     //  For each .fasta file do:
 
-    ng = init_file_iterator(argc,argv,IFILE,2);
-    while (next_file(ng))
+    if (PIPE == NULL)
+      { ng = init_file_iterator(argc,argv,IFILE,2);
+        if (ng == NULL)
+          goto error;
+      }
+
+    while (PIPE != NULL || next_file(ng))
       { FILE *input;
         char *path, *core;
         int   nline, eof, rlen;
 
-        if (ng->name == NULL) goto error;
+        //  Open it: <path>/<core>.fasta if file, stdin otherwise with core = PIPE or "stdout"
 
-        //  Open it: <path>/<core>.fasta, check that core is not too long,
-        //           and checking that it is not already in flist.
+        if (PIPE == NULL)
 
-        path  = PathTo(ng->name);
-        core  = Root(ng->name,".fasta");
-        if ((input = Fopen(Catenate(path,"/",core,".fasta"),"r")) == NULL)
-          goto error;
-        free(path);
+          { if (ng->name == NULL) goto error;
+
+            path  = PathTo(ng->name);
+            core  = Root(ng->name,".fasta");
+            if ((input = Fopen(Catenate(path,"/",core,".fasta"),"r")) == NULL)
+              goto error;
+            free(path);
+          }
+
+        else
+
+          { if (PIPE[0] == '\0')
+              core  = Strdup("stdout","Allocating file name");
+            else
+              core  = Strdup(PIPE,"Allocating file name");
+            if (core == NULL)
+              goto error;
+            input = stdin;
+          }
+
+        //  Check that core is not too long and name is unique or last source if PIPE'd
+        //    If PIPE'd and last source, then overwrite last file line of new stub file.
+
+        if (strlen(core) >= MAX_NAME)
+          { fprintf(stderr,"%s: File name over %d chars: '%.200s'\n",
+                           Prog_Name,MAX_NAME,core);
+            goto error;
+          }
 
         { int j;
 
-          for (j = 0; j < ofiles; j++)
-            if (strcmp(core,flist[j]) == 0)
-              { fprintf(stderr,"%s: File %s.fasta is already in database %s.db\n",
-                               Prog_Name,core,Root(argv[1],".db"));
-                goto error;
-              }
+          append = 0;
+          if (PIPE == NULL || (strcmp(core,"stdout") != 0 &&
+                 (ofiles == 0 || strcmp(core,flist[ofiles-1]) != 0)))
+            { for (j = 0; j < ofiles; j++)
+                if (strcmp(core,flist[j]) == 0)
+                  { fprintf(stderr,"%s: File %s.fasta is already in database %s.dam\n",
+                                   Prog_Name,core,Root(argv[1],".dam"));
+                    goto error;
+                  }
+            }
+	  else if (ofiles > 0 && strcmp(core,flist[ofiles-1]) == 0)
+            { fseeko(ostub,noff,SEEK_SET);
+              append = 1;
+            }
         }
 
         //  Get the header of the first line.  If the file is empty skip.
@@ -314,12 +412,17 @@ int main(int argc, char *argv[])
         //   Add the file name to flist
 
         if (VERBOSE)
-          { fprintf(stderr,"Adding '%s' ...\n",core);
+          { if (PIPE != NULL && PIPE[0] == '\0')
+              fprintf(stderr,"Adding scaffolds from stdio ...\n");
+            else
+              fprintf(stderr,"Adding '%s.fasta' ...\n",core);
             fflush(stderr);
           }
-        flist[ofiles++] = core;
+    
+        if (!append)
+          flist[ofiles++] = core;
 
-        // Check that the first line has PACBIO format, and record prolog in 'prolog'.
+        // Check that the first line is a header line
 
         if (read[strlen(read)-1] != '\n')
           { fprintf(stderr,"File %s.fasta, Line 1: Fasta line is too long (> %d chars)\n",
@@ -336,13 +439,12 @@ int main(int argc, char *argv[])
         { int i, x, n;
 
           while (!eof)
-            { int hlen, hline;
+            { int hlen;
 
               read[rlen] = '>';
               hlen = strlen(read+rlen);
               fwrite(read+rlen,1,hlen,hdrs);
 
-              hline = nline;
               rlen  = 0;
               while (1)
                 { eof = (fgets(read+rlen,MAX_NAME,input) == NULL);
@@ -382,8 +484,8 @@ int main(int argc, char *argv[])
                   pbeg = i;
                   prec.fpulse = pbeg;
                   prec.origin = n++;
-                  prec.boff   = boff;
-                  prec.coff   = hoff;
+                  prec.boff   = offset;
+                  prec.coff   = hdrset;
                   prec.flags  = DB_BEST;
                   while (i < rlen)
                     { x = number[(int) read[i]];
@@ -400,51 +502,186 @@ int main(int argc, char *argv[])
                   Compress_Read(plen,read+pbeg);
                   clen = COMPRESSED_LEN(plen);
                   fwrite(read+pbeg,1,clen,bases);
-                  boff += clen;
+                  offset += clen;
 
                   fwrite(&prec,sizeof(HITS_READ),1,indx);
                 }
-              hoff += hlen;
+              hdrset += hlen;
             }
+        }
 
-          fprintf(ostub,DB_FDATA,ureads,core,core);
+        fprintf(ostub,DB_FDATA,ureads,core,core);
 
+        if (PIPE == NULL)
           fclose(input);
-        }
+        else
+          break;
       }
 
     //  Update relevant fields in db record
 
     db.ureads = ureads;
-    db.treads = ureads;
-    for (c = 0; c < 4; c++)
-      db.freq[c] = (float) ((1.*count[c])/totlen);
-    db.totlen = totlen;
-    db.maxlen = maxlen;
-    db.cutoff = -1;
+    if (istub == NULL)
+      { for (c = 0; c < 4; c++)
+          db.freq[c] = (float) ((1.*count[c])/totlen);
+        db.totlen = totlen;
+        db.maxlen = maxlen;
+        db.cutoff = -1;
+      }
+    else
+      { for (c = 0; c < 4; c++)
+          db.freq[c] = (float) ((db.freq[c]*db.totlen + (1.*count[c]))/(db.totlen + totlen));
+        db.totlen += totlen;
+        if (maxlen > db.maxlen)
+          db.maxlen = maxlen;
+      }
   }
 
+  //  If db has been previously partitioned then calculate additional partition points and
+  //    write to new db file image
+
+  if (db.cutoff >= 0)
+    { int64      totlen, dbpos, size;
+      int        nblock, ireads, tfirst, rlen;
+      int        ufirst, cutoff, allflag;
+      HITS_READ  record;
+      int        i;
+
+      if (VERBOSE)
+        { fprintf(stderr,"Updating block partition ...\n");
+          fflush(stderr);
+        }
+
+      //  Read the block portion of the existing db image getting the indices of the first
+      //    read in the last block of the exisiting db as well as the partition parameters.
+      //    Copy the old image block information to the new block information (except for
+      //    the indices of the last partial block)
+
+      if (fscanf(istub,DB_NBLOCK,&nblock) != 1)
+        { fprintf(stderr,"%s: %s.dam is corrupted, read failed 2\n",Prog_Name,root);
+          goto error;
+        }
+      dbpos = ftello(ostub);
+      fprintf(ostub,DB_NBLOCK,0);
+      if (fscanf(istub,DB_PARAMS,&size,&cutoff,&allflag) != 3)
+        { fprintf(stderr,"%s: %s.dam is corrupted, read failed 3\n",Prog_Name,root);
+          goto error;
+        }
+      fprintf(ostub,DB_PARAMS,size,cutoff,allflag);
+      if (allflag)
+        allflag = 0;
+      else
+        allflag = DB_BEST;
+
+      nblock -= 1;
+      for (i = 0; i <= nblock; i++)
+        { if (fscanf(istub,DB_BDATA,&ufirst,&tfirst) != 2)
+            { fprintf(stderr,"%s: %s.dam is corrupted, read failed 4\n",Prog_Name,root);
+              goto error;
+            }
+          fprintf(ostub,DB_BDATA,ufirst,tfirst);
+        }
+
+      //  Seek the first record of the last block of the existing db in .idx, and then
+      //    compute and record partition indices for the rest of the db from this point
+      //    forward.
+
+      fseeko(indx,sizeof(HITS_DB)+sizeof(HITS_READ)*ufirst,SEEK_SET);
+      totlen = 0;
+      ireads = 0;
+      for (i = ufirst; i < ureads; i++)
+        { if (fread(&record,sizeof(HITS_READ),1,indx) != 1)
+            { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
+              goto error;
+            }
+          rlen = record.rlen;
+          if (rlen >= cutoff)
+            { ireads += 1;
+              tfirst += 1;
+              totlen += rlen;
+              if (totlen >= size)
+                { fprintf(ostub," %9d %9d\n",i+1,tfirst);
+                  totlen = 0;
+                  ireads = 0;
+                  nblock += 1;
+                }
+            }
+        }
+
+      if (ireads > 0)
+        { fprintf(ostub,DB_BDATA,ureads,tfirst);
+          nblock += 1;
+        }
+
+      db.treads = tfirst;
+
+      fseeko(ostub,dbpos,SEEK_SET);
+
+      fprintf(ostub,DB_NBLOCK,nblock);    //  Rewind and record the new number of blocks
+    }
+  else
+    db.treads = ureads;
+
+  rewind(ostub);
+  fprintf(ostub,DB_NFILE,ofiles);
+
   rewind(indx);
   fwrite(&db,sizeof(HITS_DB),1,indx);   //  Write the finalized db record into .idx
 
+  if (istub != NULL)
+    fclose(istub);
   fclose(ostub);
   fclose(indx);
   fclose(bases);
   fclose(hdrs);
 
+  rename(Catenate(pwd,"/",root,".dbx"),dbname);   //  New image replaces old image
+
   exit (0);
 
-  //  Error exit:  Remove the .idx, .bps, and .dam files
+  //  Error exit:  Either truncate or remove the .idx, .bps, and .hdr files as appropriate.
+  //               Remove the new image file <pwd>/<root>.dbx
 
 error:
-  fclose(ostub);
-  fclose(indx);
-  fclose(hdrs);
-  fclose(bases);
-  unlink(Catenate(pwd,PATHSEP,root,".idx"));
-  unlink(Catenate(pwd,PATHSEP,root,".bps"));
-  unlink(Catenate(pwd,PATHSEP,root,".hdr"));
-  unlink(Catenate(pwd,"/",root,".dam"));
+  if (ioff != 0)
+    { fseeko(indx,0,SEEK_SET);
+      if (ftruncate(fileno(indx),ioff) < 0)
+        fprintf(stderr,"%s: Fatal: could not restore %s.idx after error, truncate failed\n",
+                       Prog_Name,root);
+    }
+  if (boff != 0)
+    { fseeko(bases,0,SEEK_SET);
+      if (ftruncate(fileno(bases),boff) < 0)
+        fprintf(stderr,"%s: Fatal: could not restore %s.bps after error, truncate failed\n",
+                       Prog_Name,root);
+    }
+  if (hoff != 0)
+    { fseeko(hdrs,0,SEEK_SET);
+      if (ftruncate(fileno(hdrs),hoff) < 0)
+        fprintf(stderr,"%s: Fatal: could not restore %s.hdr after error, truncate failed\n",
+                       Prog_Name,root);
+    }
+  if (indx != NULL)
+    { fclose(indx);
+      if (ioff == 0)
+        unlink(Catenate(pwd,PATHSEP,root,".idx"));
+    }
+  if (bases != NULL)
+    { fclose(bases);
+      if (boff == 0)
+        unlink(Catenate(pwd,PATHSEP,root,".bps"));
+    }
+  if (hdrs != NULL)
+    { fclose(hdrs);
+      if (hoff == 0)
+        unlink(Catenate(pwd,PATHSEP,root,".hdr"));
+    }
+  if (ostub != NULL)
+    { fclose(ostub);
+      unlink(Catenate(pwd,"/",root,".dbx"));
+    }
+  if (istub != NULL)
+    fclose(istub);
 
   exit (1);
 }
diff --git a/fasta2DB.c b/fasta2DB.c
index 50061d5..071bbe0 100644
--- a/fasta2DB.c
+++ b/fasta2DB.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Add .fasta files to a DB:
@@ -72,7 +35,7 @@
 #define PATHSEP "/"
 #endif
 
-static char *Usage = "[-v] <path:string> ( -f<file> | <input:fasta> ... )";
+static char *Usage = "[-v] <path:string> ( -f<file> | -i[<name>] | <input:fasta> ... )";
 
 static char number[128] =
     { 0, 0, 0, 0, 0, 0, 0, 0,
@@ -105,6 +68,8 @@ File_Iterator *init_file_iterator(int argc, char **argv, FILE *input, int first)
 { File_Iterator *it;
 
   it = Malloc(sizeof(File_Iterator),"Allocating file iterator");
+  if (it == NULL)
+    return (NULL);
   it->argc  = argc;
   it->argv  = argv;
   it->input = input;
@@ -131,12 +96,15 @@ int next_file(File_Iterator *it)
       if (fgets(nbuffer,MAX_NAME+8,it->input) == NULL)
         { if (feof(it->input))
             return (0);
-          SYSTEM_ERROR;
+          fprintf(stderr,"%s: IO error reading line %d of -f file of names\n",Prog_Name,it->count);
+          it->name = NULL;
+          return (1);
         }
       if ((eol = index(nbuffer,'\n')) == NULL)
         { fprintf(stderr,"%s: Line %d in file list is longer than %d chars!\n",
                          Prog_Name,it->count,MAX_NAME+7);
           it->name = NULL;
+          return (1);
         }
       *eol = '\0';
       it->count += 1;
@@ -154,17 +122,18 @@ int main(int argc, char *argv[])
   FILE  *bases, *indx;
   int64  boff, ioff;
 
-  int    ifiles, ofiles;
+  int    ifiles, ofiles, ocells;
   char **flist;
 
   HITS_DB db;
   int     ureads;
   int64   offset;
 
+  char   *PIPE;
   FILE   *IFILE;
   int     VERBOSE;
 
-  //   Usage: [-v] <path:string> ( -f<file> | <input:fasta> ... )
+  //   Process command line
 
   { int   i, j, k;
     int   flags[128];
@@ -172,6 +141,7 @@ int main(int argc, char *argv[])
     ARG_INIT("fasta2DB")
 
     IFILE = NULL;
+    PIPE  = NULL;
 
     j = 1;
     for (i = 1; i < argc; i++)
@@ -187,6 +157,20 @@ int main(int argc, char *argv[])
                 exit (1);
               }
             break;
+          case 'i':
+            PIPE = argv[i]+2;
+            if (PIPE[0] != '\0')
+              { FILE *temp;
+
+                temp = fopen(PIPE,"w");
+                if (temp == NULL)
+                  { fprintf(stderr,"%s: Cannot create -i name '%s'\n",Prog_Name,argv[i]+2);
+                    exit (1);
+                  }
+                fclose(temp);
+                unlink(PIPE);
+              }
+            break;
         }
       else
         argv[j++] = argv[i];
@@ -194,7 +178,13 @@ int main(int argc, char *argv[])
 
     VERBOSE = flags['v'];
 
-    if ((IFILE == NULL && argc <= 2) || (IFILE != NULL && argc != 2))
+    if (IFILE != NULL && PIPE != NULL)
+      { fprintf(stderr,"%s: Cannot use both -f and -i together\n",Prog_Name);
+        exit (1);
+      }
+
+    if ( (IFILE == NULL && PIPE == NULL && argc <= 2) || 
+        ((IFILE != NULL || PIPE != NULL) && argc != 2))
       { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
         exit (1);
       }
@@ -212,10 +202,11 @@ int main(int argc, char *argv[])
   //    ioff   = offset in .idx file to truncate to if command fails
   //    boff   = offset in .bps file to truncate to if command fails
   //    ifiles = # of .fasta files to add
-  //    ofiles = # of .fasta files already in db
-  //    flist  = [0..ifiles+ofiles] list of file names (root only) added to db so far
+  //    ofiles = # of .fasta files added so far
+  //    ocells = # of SMRT cells already in db
+  //    flist  = [0..ifiles+ocells] list of file names (root only) added to db so far
 
-  { int     i;
+  { int i;
 
     root   = Root(argv[1],".db");
     pwd    = PathTo(argv[1]);
@@ -223,26 +214,39 @@ int main(int argc, char *argv[])
     if (dbname == NULL)
       exit (1);
 
-    if (IFILE == NULL)
+    if (PIPE != NULL)
+      ifiles = 1;
+    else if (IFILE == NULL)
       ifiles = argc-2;
     else
       { File_Iterator *ng;
 
         ifiles = 0;
         ng = init_file_iterator(argc,argv,IFILE,2);
+        if (ng == NULL)
+          exit (1);
         while (next_file(ng))
-          ifiles += 1;
+          { if (ng->name == NULL)
+              exit (1);
+            ifiles += 1;
+          }
         free(ng);
       }
 
+    bases = NULL;
+    indx  = NULL;
+    ostub = NULL;
+    ioff  = 0;
+    boff  = 0;
+
     istub = fopen(dbname,"r");
     if (istub == NULL)
-      { ofiles = 0;
+      { ocells = 0;
 
         bases = Fopen(Catenate(pwd,PATHSEP,root,".bps"),"w+");
         indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"w+");
         if (bases == NULL || indx == NULL)
-          exit (1);
+          goto error;
 
         fwrite(&db,sizeof(HITS_DB),1,indx);
 
@@ -252,16 +256,20 @@ int main(int argc, char *argv[])
         ioff    = 0;
       }
     else
-      { if (fscanf(istub,DB_NFILE,&ofiles) != 1)
-          SYSTEM_ERROR
+      { if (fscanf(istub,DB_NFILE,&ocells) != 1)
+          { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root);
+            goto error;
+          }
 
         bases = Fopen(Catenate(pwd,PATHSEP,root,".bps"),"r+");
         indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r+");
         if (bases == NULL || indx == NULL)
-          exit (1);
+          goto error;
 
         if (fread(&db,sizeof(HITS_DB),1,indx) != 1)
-          SYSTEM_ERROR
+          { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
+            goto error;
+          }
         fseeko(bases,0,SEEK_END);
         fseeko(indx, 0,SEEK_END);
 
@@ -271,20 +279,24 @@ int main(int argc, char *argv[])
         ioff   = ftello(indx);
       }
 
-    flist  = (char **) Malloc(sizeof(char *)*(ofiles+ifiles),"Allocating file list");
+    flist  = (char **) Malloc(sizeof(char *)*(ocells+ifiles),"Allocating file list");
     ostub  = Fopen(Catenate(pwd,"/",root,".dbx"),"w+");
     if (ostub == NULL || flist == NULL)
-      exit (1);
+      goto error;
 
-    fprintf(ostub,DB_NFILE,ofiles+ifiles);
-    for (i = 0; i < ofiles; i++)
+    fprintf(ostub,DB_NFILE,ocells+ifiles);   //  Will write again with correct value at end
+    ofiles = 0;
+    for (i = 0; i < ocells; i++)
       { int  last;
         char prolog[MAX_NAME], fname[MAX_NAME];
 
         if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
-          SYSTEM_ERROR
-        if ((flist[i] = Strdup(fname,"Adding to file list")) == NULL)
-          goto error;
+          { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root);
+            goto error;
+          }
+        if (ofiles == 0 || strcmp(flist[ofiles-1],fname) != 0)
+          if ((flist[ofiles++] = Strdup(fname,"Adding to file list")) == NULL)
+            goto error;
         fprintf(ostub,DB_FDATA,last,fname,prolog);
       }
   }
@@ -295,7 +307,7 @@ int main(int argc, char *argv[])
     HITS_READ     *prec;
     char          *read;
     int            c;
-    File_Iterator *ng;
+    File_Iterator *ng = NULL;
 
     //  Buffer for reads all in the same well
 
@@ -316,25 +328,47 @@ int main(int argc, char *argv[])
     for (c = 0; c < 4; c++)  //  count of acgt in new .fasta files
       count[c] = 0;
 
-    //  For each new .fasta file do:
+    //  For each new input source do
 
-    ng = init_file_iterator(argc,argv,IFILE,2);
-    while (next_file(ng))
+    if (PIPE == NULL)
+      { ng = init_file_iterator(argc,argv,IFILE,2);  //  Setup to read .fasta's
+        if (ng == NULL)                              //    from command line or file
+          goto error;
+      }
+
+    while (PIPE != NULL || next_file(ng))
       { FILE *input;
-        char *path, *core, *prolog;
+        char  prolog[MAX_NAME];
+        char *path, *core;
         int   nline, eof, rlen, pcnt;
         int   pwell;
 
-        if (ng->name == NULL) goto error;
+        //  Open it: <path>/<core>.fasta if file, stdin otherwise with core = PIPE or "stdout"
 
-        //  Open it: <path>/<core>.fasta, check that core is not too long,
-        //           and checking that it is not already in flist.
+        if (PIPE == NULL)
+
+          { if (ng->name == NULL) goto error;
+
+            path  = PathTo(ng->name);
+            core  = Root(ng->name,".fasta");
+            if ((input = Fopen(Catenate(path,"/",core,".fasta"),"r")) == NULL)
+              goto error;
+            free(path);
+          }
+
+        else
+
+          { if (PIPE[0] == '\0')
+              core  = Strdup("stdout","Allocating file name");
+            else
+              core  = Strdup(PIPE,"Allocating file name");
+            if (core == NULL)
+              goto error;
+            input = stdin;
+          }
+
+        //  Check that core is not too long and name is unique or last source if PIPE'd
 
-        path  = PathTo(ng->name);
-        core  = Root(ng->name,".fasta");
-        if ((input = Fopen(Catenate(path,"/",core,".fasta"),"r")) == NULL)
-          goto error;
-        free(path);
         if (strlen(core) >= MAX_NAME)
           { fprintf(stderr,"%s: File name over %d chars: '%.200s'\n",
                            Prog_Name,MAX_NAME,core);
@@ -343,12 +377,15 @@ int main(int argc, char *argv[])
 
         { int j;
 
-          for (j = 0; j < ofiles; j++)
-            if (strcmp(core,flist[j]) == 0)
-              { fprintf(stderr,"%s: File %s.fasta is already in database %s.db\n",
-                               Prog_Name,core,Root(argv[1],".db"));
-                goto error;
-              }
+ 
+          if (PIPE == NULL || (strcmp(core,"stdout") != 0 &&
+                 (ofiles == 0 || strcmp(core,flist[ofiles-1]) != 0)))
+            for (j = 0; j < ofiles; j++)
+              if (strcmp(core,flist[j]) == 0)
+                { fprintf(stderr,"%s: File %s.fasta is already in database %s.db\n",
+                                 Prog_Name,core,Root(argv[1],".db"));
+                  goto error;
+                }
         }
 
         //  Get the header of the first line.  If the file is empty skip.
@@ -359,6 +396,8 @@ int main(int argc, char *argv[])
         eof   = (fgets(read,MAX_NAME,input) == NULL);
         if (eof || strlen(read) < 1)
           { fprintf(stderr,"Skipping '%s', file is empty!\n",core);
+            if (input == stdin)
+              break;
             fclose(input);
             free(core);
             continue;
@@ -367,12 +406,15 @@ int main(int argc, char *argv[])
         //   Add the file name to flist
 
         if (VERBOSE)
-          { fprintf(stderr,"Adding '%s' ...\n",core);
+          { if (PIPE != NULL && PIPE[0] == '\0')
+              fprintf(stderr,"Adding reads from stdio ...\n");
+            else
+              fprintf(stderr,"Adding '%s.fasta' ...\n",core);
             fflush(stderr);
           }
         flist[ofiles++] = core;
 
-        // Check that the first line has PACBIO format, and record prolog in 'prolog'.
+        // Check that the first line is a header and has PACBIO format.
 
         if (read[strlen(read)-1] != '\n')
           { fprintf(stderr,"File %s.fasta, Line 1: Fasta line is too long (> %d chars)\n",
@@ -390,9 +432,8 @@ int main(int argc, char *argv[])
           find = index(read+1,'/');
           if (find != NULL && sscanf(find+1,"%d/%d_%d RQ=0.%d\n",&well,&beg,&end,&qv) >= 3)
             { *find = '\0';
-              prolog = Strdup(read+1,"Extracting prolog");
+              strcpy(prolog,read+1);
               *find = '/';
-              if (prolog == NULL) goto error;
             }
           else
             { fprintf(stderr,"File %s.fasta, Line %d: Pacbio header line format error\n",
@@ -407,7 +448,7 @@ int main(int argc, char *argv[])
 
           pwell = -1;
           while (!eof)
-            { int   beg, end, clen, hline;
+            { int   beg, end, clen;
               int   well, qv;
               char *find;
 
@@ -419,9 +460,9 @@ int main(int argc, char *argv[])
                 }
               *find = '\0';
               if (strcmp(read+(rlen+1),prolog) != 0)
-                { fprintf(stderr,"File %s.fasta, Line %d: Pacbio header line name inconsisten\n",
-                                 core,nline);
-                  goto error;
+                { fprintf(ostub,DB_FDATA,ureads,core,prolog);
+                  ocells += 1;
+                  strcpy(prolog,read+(rlen+1));
                 }
               *find = '/';
               x = sscanf(find+1,"%d/%d_%d RQ=0.%d\n",&well,&beg,&end,&qv);
@@ -433,16 +474,20 @@ int main(int argc, char *argv[])
               else if (x == 3)
                 qv = 0;
 
-              hline = nline;
               rlen  = 0;
               while (1)
                 { eof = (fgets(read+rlen,MAX_NAME,input) == NULL);
                   nline += 1;
                   x = strlen(read+rlen)-1;
                   if (read[rlen+x] != '\n')
-                    { fprintf(stderr,"File %s.fasta, Line %d:",core,nline);
-                      fprintf(stderr," Fasta line is too long (> %d chars)\n",MAX_NAME-2);
-                      goto error;
+                    { if (read[rlen] == '>')
+                        { fprintf(stderr,"File %s.fasta, Line %d:",core,nline);
+                          fprintf(stderr," Fasta header line is too long (> %d chars)\n",
+                                         MAX_NAME-2);
+                          goto error;
+                        }
+                      else
+                        x += 1;
                     }
                   if (eof || read[rlen] == '>')
                     break;
@@ -510,7 +555,7 @@ int main(int argc, char *argv[])
             }
 
           //  Complete processing of .fasta file: flush last well group, write file line
-          //      in db image, free prolog, and close file
+          //      in db image, and close file
 
           x = 0;
           for (i = 1; i < pcnt; i++)
@@ -518,12 +563,15 @@ int main(int argc, char *argv[])
               x = i;
           prec[x].flags |= DB_BEST;
           fwrite(prec,sizeof(HITS_READ),pcnt,indx);
-
-          fprintf(ostub,DB_FDATA,ureads,core,prolog);
         }
 
-        free(prolog);
-        fclose(input);
+        fprintf(ostub,DB_FDATA,ureads,core,prolog);
+        ocells += 1;
+
+        if (input != stdin)
+          fclose(input);
+        else
+          break;
       }
 
     //  Finished loading all sequences: update relevant fields in db record
@@ -566,22 +614,27 @@ int main(int argc, char *argv[])
       //    the indices of the last partial block)
 
       if (fscanf(istub,DB_NBLOCK,&nblock) != 1)
-        SYSTEM_ERROR
+        { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root);
+          goto error;
+        }
       dbpos = ftello(ostub);
       fprintf(ostub,DB_NBLOCK,0);
       if (fscanf(istub,DB_PARAMS,&size,&cutoff,&allflag) != 3)
-        SYSTEM_ERROR
-      fprintf(ostub,DB_PARAMS,size,cutoff,allflag); 
+        { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root);
+          goto error;
+        }
+      fprintf(ostub,DB_PARAMS,size,cutoff,allflag);
       if (allflag)
         allflag = 0;
       else
         allflag = DB_BEST;
-      size *= 1000000ll;
 
       nblock -= 1;
       for (i = 0; i <= nblock; i++)
         { if (fscanf(istub,DB_BDATA,&ufirst,&tfirst) != 2)
-            SYSTEM_ERROR
+            { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root);
+              goto error;
+            }
           fprintf(ostub,DB_BDATA,ufirst,tfirst);
         }
 
@@ -594,7 +647,9 @@ int main(int argc, char *argv[])
       ireads = 0;
       for (i = ufirst; i < ureads; i++)
         { if (fread(&record,sizeof(HITS_READ),1,indx) != 1)
-            SYSTEM_ERROR
+            { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
+              goto error;
+            }
           rlen = record.rlen;
           if (rlen >= cutoff && (record.flags & DB_BEST) >= allflag)
             { ireads += 1;
@@ -626,7 +681,7 @@ int main(int argc, char *argv[])
   fwrite(&db,sizeof(HITS_DB),1,indx);   //  Write the finalized db record into .idx
 
   rewind(ostub);                        //  Rewrite the number of files actually added
-  fprintf(ostub,DB_NFILE,ofiles);
+  fprintf(ostub,DB_NFILE,ocells);
 
   if (istub != NULL)
     fclose(istub);
@@ -645,24 +700,31 @@ error:
   if (ioff != 0)
     { fseeko(indx,0,SEEK_SET);
       if (ftruncate(fileno(indx),ioff) < 0)
-        SYSTEM_ERROR
+        fprintf(stderr,"%s: Fatal: could not restore %s.idx after error, truncate failed\n",
+                       Prog_Name,root);
     }
   if (boff != 0)
     { fseeko(bases,0,SEEK_SET);
       if (ftruncate(fileno(bases),boff) < 0)
-        SYSTEM_ERROR
+        fprintf(stderr,"%s: Fatal: could not restore %s.bps after error, truncate failed\n",
+                       Prog_Name,root);
+    }
+  if (indx != NULL)
+    { fclose(indx);
+      if (ioff == 0)
+        unlink(Catenate(pwd,PATHSEP,root,".idx"));
+    }
+  if (bases != NULL)
+    { fclose(bases);
+      if (boff == 0)
+        unlink(Catenate(pwd,PATHSEP,root,".bps"));
+    }
+  if (ostub != NULL)
+    { fclose(ostub);
+      unlink(Catenate(pwd,"/",root,".dbx"));
     }
-  fclose(indx);
-  fclose(bases);
-  if (ioff == 0)
-    unlink(Catenate(pwd,PATHSEP,root,".idx"));
-  if (boff == 0)
-    unlink(Catenate(pwd,PATHSEP,root,".bps"));
-
   if (istub != NULL)
     fclose(istub);
-  fclose(ostub);
-  unlink(Catenate(pwd,"/",root,".dbx"));
 
   exit (1);
 }
diff --git a/quiva2DB.c b/quiva2DB.c
index 0ec2628..6d099f5 100644
--- a/quiva2DB.c
+++ b/quiva2DB.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Adds the given .quiva files to an existing DB "path".  The input files must be added in
@@ -59,13 +22,16 @@
 #include "DB.h"
 #include "QV.h"
 
+//  Compiled in INTERACTIVE mode as all routines must return with an error
+//    so that cleanup and restore is possible.
+
 #ifdef HIDE_FILES
 #define PATHSEP "/."
 #else
 #define PATHSEP "/"
 #endif
 
-static char *Usage = "[-vl] <path:string> ( -f<file> | <input:quiva> ... )";
+static char *Usage = "[-vl] <path:string> ( -f<file> | -i | <input:quiva> ... )";
 
 typedef struct
   { int    argc;
@@ -79,6 +45,8 @@ File_Iterator *init_file_iterator(int argc, char **argv, FILE *input, int first)
 { File_Iterator *it;
 
   it = Malloc(sizeof(File_Iterator),"Allocating file iterator");
+  if (it == NULL)
+    return (NULL);
   it->argc  = argc;
   it->argv  = argv;
   it->input = input;
@@ -105,12 +73,15 @@ int next_file(File_Iterator *it)
       if (fgets(nbuffer,MAX_NAME+8,it->input) == NULL)
         { if (feof(it->input))
             return (0);
-          SYSTEM_ERROR;
+          fprintf(stderr,"%s: IO error reading line %d of -f file of names\n",Prog_Name,it->count);
+          it->name = NULL;
+          return (1);
         }
       if ((eol = index(nbuffer,'\n')) == NULL)
         { fprintf(stderr,"%s: Line %d in file list is longer than %d chars!\n",
                          Prog_Name,it->count,MAX_NAME+7);
           it->name = NULL;
+          return (1);
         }
       *eol = '\0';
       it->count += 1;
@@ -121,15 +92,23 @@ int next_file(File_Iterator *it)
 
 
 int main(int argc, char *argv[])
-{ FILE      *istub, *quiva, *indx;
+{ FILE      *istub;
+  char      *root, *pwd;
+
+  FILE      *quiva, *indx;
   int64      coff;
-  int        ofile;
+
   HITS_DB    db;
   HITS_READ *reads;
+  int        nfiles;
+
+  FILE      *temp;
+  char      *tname;
 
   int        VERBOSE;
   int        LOSSY;
-  FILE      *IFILE;
+  int        PIPE;
+  FILE      *INFILE;
 
   //  Process command line
 
@@ -138,18 +117,18 @@ int main(int argc, char *argv[])
 
     ARG_INIT("quiva2DB")
 
-    IFILE = NULL;
+    INFILE = NULL;
 
     j = 1;
     for (i = 1; i < argc; i++)
       if (argv[i][0] == '-')
         switch (argv[i][1])
         { default:
-            ARG_FLAGS("vl")
+            ARG_FLAGS("vli")
             break;
           case 'f':
-            IFILE = fopen(argv[i]+2,"r");
-            if (IFILE == NULL)
+            INFILE = fopen(argv[i]+2,"r");
+            if (INFILE == NULL)
               { fprintf(stderr,"%s: Cannot open file of inputs '%s'\n",Prog_Name,argv[i]+2);
                 exit (1);
               }
@@ -161,192 +140,335 @@ int main(int argc, char *argv[])
 
     VERBOSE = flags['v'];
     LOSSY   = flags['l'];
+    PIPE    = flags['i'];
+
+    if (INFILE != NULL && PIPE)
+      { fprintf(stderr,"%s: Cannot use both -f and -i together\n",Prog_Name);
+        exit (1);
+      }
 
-    if ((IFILE == NULL && argc <= 2) || (IFILE != NULL && argc != 2))
+    if ( (INFILE == NULL && ! PIPE && argc <= 2) || 
+        ((INFILE != NULL || PIPE) && argc != 2))
       { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
         exit (1);
       }
   }
 
-  //  Open DB stub file and index, load db and read records.  Confirm that the .fasta files
-  //    corresponding to the command line .quiva files are in the DB and in order where the
-  //    index of the first file is ofile and the index of the first read to be added is ofirst.
-  //    Record in coff the current size of the .qvs file in case an error occurs and it needs
-  //    to be truncated back to its size at the start.
-
-  { int            i;
-    char          *pwd, *root;
-    int            nfiles;
-    File_Iterator *ng;
-
-    root   = Root(argv[1],".db");
-    pwd    = PathTo(argv[1]);
-    istub  = Fopen(Catenate(pwd,"/",root,".db"),"r");
-    if (istub == NULL)
-      exit (1);
+  //  Open DB stub file, index, and .qvs file for appending.  Load db and read records,
+  //    get number of cells from stub file, and note current offset to end of .qvs
 
-    indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r+");
-    if (indx == NULL)
+  root   = Root(argv[1],".db");
+  pwd    = PathTo(argv[1]);
+  istub  = Fopen(Catenate(pwd,"/",root,".db"),"r");
+  if (istub == NULL)
+    { fprintf(stderr,"%s",Ebuffer);
+      exit (1);
+    }
+  if (fscanf(istub,DB_NFILE,&nfiles) != 1)
+    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",root,Prog_Name);
       exit (1);
-    if (fread(&db,sizeof(HITS_DB),1,indx) != 1)
-      SYSTEM_ERROR
+    }
 
-    reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*db.ureads,"Allocating DB index");
-    if (reads == NULL)
+  indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r+");
+  if (indx == NULL)
+    { fprintf(stderr,"%s",Ebuffer);
+      exit (1);
+    }
+  if (fread(&db,sizeof(HITS_DB),1,indx) != 1)
+    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",root,Prog_Name);
       exit (1);
-    if (fread(reads,sizeof(HITS_READ),db.ureads,indx) != (size_t) (db.ureads))
-      SYSTEM_ERROR
+    }
 
-    { int   first, last;
-      char  prolog[MAX_NAME], fname[MAX_NAME];
-      char *core;
+  reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*db.ureads,"Allocating DB index");
+  if (reads == NULL)
+    { fprintf(stderr,"%s",Ebuffer);
+      exit (1);
+    }
+  if (fread(reads,sizeof(HITS_READ),db.ureads,indx) != (size_t) (db.ureads))
+    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",root,Prog_Name);
+      exit (1);
+    }
 
-      ng = init_file_iterator(argc,argv,IFILE,2);
-      if ( ! next_file(ng))
-        { fprintf(stderr,"%s: file list is empty!\n",Prog_Name);
-          exit (1);
-        }
-      if (ng->name == NULL) exit (1);
+  quiva = NULL;
+  temp  = NULL;
+  coff  = 0;
 
-      core = Root(ng->name,".quiva");
+  if (reads[0].coff < 0)
+    quiva = Fopen(Catenate(pwd,PATHSEP,root,".qvs"),"w");
+  else
+    quiva = Fopen(Catenate(pwd,PATHSEP,root,".qvs"),"r+");
 
-      if (fscanf(istub,DB_NFILE,&nfiles) != 1)
-        SYSTEM_ERROR
-      first = 0;
-      for (i = 0; i < nfiles; i++)
-        { if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
-            SYSTEM_ERROR
-          if (strcmp(core,fname) == 0)
-            break;
-          first = last;
-        }
-      if (i >= nfiles)
-        { fprintf(stderr,"%s: %s.fasta has never been added to DB\n",Prog_Name,core);
-          exit (1);
-        }
+  tname = Strdup(Catenate(".",PATHSEP,root,Numbered_Suffix("",getpid(),".tmp")),
+                 "Allocating temporary name");
+  temp = Fopen(tname,"w+");
 
-      ofile  = i;
-      if (first > 0 && reads[first-1].coff < 0)
-        { fprintf(stderr,"%s: Predecessor of %s.quiva has not been added yet\n",Prog_Name,core);
-          exit (1);
-        }
-      if (reads[first].coff >= 0)
-        { fprintf(stderr,"%s: %s.quiva has already been added\n",Prog_Name,core);
-          exit (1);
-        }
+  if (quiva == NULL || temp == NULL)
+    { fprintf(stderr,"%s",Ebuffer);
+      goto error;
+    }
+  fseeko(quiva,0,SEEK_END);
+  coff = ftello(quiva);
+
+  //  Do a merged traversal of cell lines in .db stub file and .quiva files to be
+  //    imported, driving the loop with the cell line #
+
+  { FILE          *input = NULL;
+    char          *path = NULL;
+    char          *core = NULL;
+    File_Iterator *ng = NULL;
+    char           lname[MAX_NAME];
+    int            first, last, cline;
+    int            cell;
+
+    if (!PIPE)
+      { ng = init_file_iterator(argc,argv,INFILE,2);
+        if (ng == NULL)
+          { fprintf(stderr,"%s",Ebuffer);
+            goto error;
+          }
+      }
 
-      while (next_file(ng))
-        { if (ng->name == NULL)
-            exit (1);
-          core = Root(ng->name,".quiva");
-          if (++i >= nfiles)
-            { fprintf(stderr,"%s: %s.fasta has never been added to DB\n",Prog_Name,core);
-              exit (1);
+    for (cell = 0; cell < nfiles; cell++)
+      { char  prolog[MAX_NAME], fname[MAX_NAME];
+
+        if (cell == 0)
+
+          //  First addition, a pipe: find the first cell that does not have .quiva's yet
+          //     (error if none) and set input source to stdin.
+
+          if (PIPE)
+            { first = 0;
+              while (cell < nfiles)
+                { if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
+                    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",core,Prog_Name);
+                      goto error;
+                    }
+                  if (reads[first].coff < 0)
+                    break;
+                  first = last;
+                  cell += 1;
+                }
+              if (cell >= nfiles)
+                { fprintf(stderr,"%s: All .quiva's have already been added !?\n",Prog_Name);
+                  goto error;
+                }
+
+              input = stdin;
+
+              if (VERBOSE)
+                { fprintf(stderr,"Adding quiva's from stdin ...\n");
+                  fflush(stderr);
+                }
+              cline = 0;
             }
-          if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
-            SYSTEM_ERROR
-          if (strcmp(core,fname) != 0)
-            { fprintf(stderr,"%s: Files not being added in order (expect %s, given %s)",
-                             Prog_Name,fname,core);
-              exit (1);
+
+          //  First addition, not a pipe: then get first .quiva file name (error if not one) to
+          //    add, find the first cell name whose file name matches (error if none), check that
+          //    the previous .quiva's have been added and this is the next slot.  Then open
+          //    the .quiva file for compression
+
+          else
+            { if (! next_file(ng))
+                { fprintf(stderr,"%s: file list is empty!\n",Prog_Name);
+                  goto error;
+                }
+              if (ng->name == NULL)
+                { fprintf(stderr,"%s",Ebuffer);
+                  goto error;
+                }
+  
+              core = Root(ng->name,".quiva");
+              path = PathTo(ng->name);
+              if ((input = Fopen(Catenate(path,"/",core,".quiva"),"r")) == NULL)
+                { fprintf(stderr,"%s",Ebuffer);
+                  goto error;
+                }
+  
+              first = 0;
+              while (cell < nfiles)
+                { if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
+                    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",core,Prog_Name);
+                      goto error;
+                    }
+                  if (strcmp(core,fname) == 0)
+                    break;
+                  first = last;
+                  cell += 1;
+                }
+              if (cell >= nfiles)
+                { fprintf(stderr,"%s: %s.fasta has never been added to DB\n",Prog_Name,core);
+                  goto error;
+                }
+        
+              if (first > 0 && reads[first-1].coff < 0)
+                { fprintf(stderr,"%s: Predecessor of %s.quiva has not been added yet\n",
+                                 Prog_Name,core);
+                  goto error;
+                }
+              if (reads[first].coff >= 0)
+                { fprintf(stderr,"%s: %s.quiva has already been added\n",Prog_Name,core);
+                  goto error;
+                }
+  
+              if (VERBOSE)
+                { fprintf(stderr,"Adding '%s.quiva' ...\n",core);
+                  fflush(stderr);
+                }
+              cline = 0;
             }
-        }
 
-      if (ofile == 0)
-        quiva = Fopen(Catenate(pwd,PATHSEP,root,".qvs"),"w");
-      else
-        quiva = Fopen(Catenate(pwd,PATHSEP,root,".qvs"),"r+");
-      if (quiva == NULL)
-        exit (1);
+        //  Not the first addition: get next cell line.  If not a pipe and the file name is new,
+        //    then close the current .quiva, open the next one and after ensuring the names
+        //    match, open it for compression
 
-      fseeko(quiva,0,SEEK_END);
-      coff = ftello(quiva);
+        else
+          { first = last;  
+            strcpy(lname,fname);
+            if (fscanf(istub,DB_FDATA,&last,fname,prolog) != 3)
+              { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",core,Prog_Name);
+                goto error;
+              }
+            if (PIPE)
+              { int c;
+                if ((c = fgetc(input)) == EOF)
+                  break;
+                ungetc(c,input);
+              }
+            else if (strcmp(lname,fname) != 0)
+              { if (fgetc(input) != EOF)
+                  { fprintf(stderr,"%s: Too many reads in %s.quiva while handling %s.fasta\n",
+                                   Prog_Name,core,fname);
+                    goto error;
+                  }
+
+                fclose(input);
+                free(path);
+                free(core);
+
+                if ( ! next_file(ng))
+                  break;
+                if (ng->name == NULL)
+                  { fprintf(stderr,"%s",Ebuffer);
+                    goto error;
+                  }
+
+                path = PathTo(ng->name);
+                core = Root(ng->name,".quiva");
+                if ((input = Fopen(Catenate(path,"/",core,".quiva"),"r")) == NULL)
+                  { fprintf(stderr,"%s",Ebuffer);
+                    goto error;
+                  }
+
+                if (strcmp(core,fname) != 0)
+                  { fprintf(stderr,"%s: Files not being added in order (expect %s, given %s)\n",
+                                   Prog_Name,fname,core);
+                    goto error;
+                  }
+
+                if (VERBOSE)
+                  { fprintf(stderr,"Adding '%s.quiva' ...\n",core);
+                    fflush(stderr);
+                  }
+                cline = 0;
+              }
+          }
 
-      free(core);
-      free(ng);
-    }
+        //  Compress reads [first..last) from open .quiva appending to .qvs and record
+        //    offset in .coff field of reads (offset of first in a cell is to the compression
+        //    table).
 
-    free(root);
-    free(pwd);
-  }
+        { int64     qpos;
+          QVcoding *coding;
+          int       i, s;
 
-  //  For each .quiva file, determine its compression scheme in a fast scan and append it to
-  //    the .qvs file  Then compress every .quiva entry in the file, appending its compressed
-  //    form to the .qvs file as you go and recording the offset in the .qvs in the .coff field
-  //    of each read record (*except* the first, that points at the compression scheme immediately
-  //    preceding it).  Ensure that the # of .quiva entries matches the # of .fasta entries
-  //    in each added file.
-
-  { int            i;
-    int            last, cur;
-    File_Iterator *ng;
-
-    //  For each .quiva file do:
-
-    rewind(istub);
-    if (fscanf(istub,"files = %*d\n") != 0)
-      SYSTEM_ERROR
-
-    last = 0;
-    for (i = 0; i < ofile; i++)
-      if (fscanf(istub,"  %9d %*s %*s\n",&last) != 1)
-        SYSTEM_ERROR
-
-    ng  = init_file_iterator(argc,argv,IFILE,2);
-    cur = last;
-    while (next_file(ng))
-      { FILE     *input;
-        int64     qpos;
-        char     *pwd, *root;
-        QVcoding *coding;
-
-        //  Open next .quiva file and create its compression scheme
-
-        pwd  = PathTo(ng->name);
-        root = Root(ng->name,".quiva");
-        if ((input = Fopen(Catenate(pwd,"/",root,".quiva"),"r")) == NULL)
-          goto error;
-
-        if (VERBOSE)
-          { fprintf(stderr,"Analyzing '%s' ...\n",root);
-            fflush(stderr);
-          }
+          rewind(temp);
+          if (ftruncate(fileno(temp),0) < 0)
+            { fprintf(stderr,"%s: System error: could not truncate temporary file\n",Prog_Name);
+              goto error;
+            }
+          Set_QV_Line(cline);
+          s = QVcoding_Scan(input,last-first,temp);
+          if (s < 0)
+            { fprintf(stderr,"%s",Ebuffer);
+              goto error;
+            }
+          if (s != last-first)
+            { if (PIPE)
+                fprintf(stderr,"%s: Insufficient # of reads on input while handling %s.fasta\n",
+                               Prog_Name,fname);
+              else
+                fprintf(stderr,"%s: Insufficient # of reads in %s.quiva while handling %s.fasta\n",
+                               Prog_Name,core,fname);
+              goto error;
+            }
 
-        QVcoding_Scan(input);
-        coding = Create_QVcoding(LOSSY);
-        coding->prefix = Strdup(".qvs","Allocating header prefix");
+          coding = Create_QVcoding(LOSSY);
+          if (coding == NULL)
+            { fprintf(stderr,"%s",Ebuffer);
+              goto error;
+            }
 
-        qpos = ftello(quiva);
-        Write_QVcoding(quiva,coding);
+          coding->prefix = Strdup(".qvs","Allocating header prefix");
+          if (coding->prefix == NULL)
+            { fprintf(stderr,"%s",Ebuffer);
+              goto error;
+            }
 
-        //  Then compress and append to the .qvs each compressed QV entry
+          qpos = ftello(quiva);
+          Write_QVcoding(quiva,coding);
+
+          //  Then compress and append to the .qvs each compressed QV entry
  
-        if (VERBOSE)
-          { fprintf(stderr,"Compressing '%s' ...\n",root);
-            fflush(stderr);
-          }
+          rewind(temp);
+          Set_QV_Line(cline);
+          for (i = first; i < last; i++)
+            { s = Read_Lines(temp,1);
+              if (s < -1)
+                { fprintf(stderr,"%s",Ebuffer);
+                  goto error;
+                }
+              reads[i].coff = qpos;
+              s = Compress_Next_QVentry(temp,quiva,coding,LOSSY);
+              if (s < 0)
+                { fprintf(stderr,"%s",Ebuffer);
+                  goto error;
+                }
+              if (s != reads[i].rlen)
+                { fprintf(stderr,"%s: Length of quiva %d is different than fasta in DB\n",
+                                 Prog_Name,i+1);
+                  goto error;
+                }
+              qpos = ftello(quiva);
+            }
+          cline = Get_QV_Line();
 
-        rewind(input);
-        while (Read_Lines(input,1) > 0)
-          { reads[cur++].coff = qpos;
-            Compress_Next_QVentry(input,quiva,coding,LOSSY);
-            qpos = ftello(quiva);
-          }
+          Free_QVcoding(coding);
+        }
+      }
 
-        if (fscanf(istub,"  %9d %*s %*s\n",&last) != 1)
-          SYSTEM_ERROR
-        if (last != cur)
-          { fprintf(stderr,"%s: Number of reads in %s.quiva doesn't match number in %s.fasta\n",
-                           Prog_Name,root,root);
+    if (fgetc(input) != EOF)
+      { if (PIPE)
+          fprintf(stderr,"%s: Too many reads on input while handling %s.fasta\n",
+                         Prog_Name,lname);
+        else
+          fprintf(stderr,"%s: Too many reads in %s.quiva while handling %s.fasta\n",
+                         Prog_Name,core,lname);
+        goto error;
+      }
+    if ( ! PIPE && cell >= nfiles)
+      { fclose(input);
+        free(core);
+        free(path);
+        if (next_file(ng))
+          { if (ng->name == NULL)
+              { fprintf(stderr,"%s",Ebuffer);
+                goto error;
+              }
+            core = Root(ng->name,".quiva");
+            fprintf(stderr,"%s: %s.fasta has never been added to DB\n",Prog_Name,core);
             goto error;
           }
-
-        Free_QVcoding(coding);
-        free(root);
-        free(pwd);
-    }
-
-    free(ng);
+      }
   }
 
   //  Write the db record and read index into .idx and clean up
@@ -358,6 +480,8 @@ int main(int argc, char *argv[])
   fclose(istub);
   fclose(indx);
   fclose(quiva);
+  fclose(temp);
+  unlink(tname);
 
   exit (0);
 
@@ -367,18 +491,20 @@ error:
   if (coff != 0)
     { fseeko(quiva,0,SEEK_SET);
       if (ftruncate(fileno(quiva),coff) < 0)
-        SYSTEM_ERROR
+        fprintf(stderr,"%s: Fatal: could not restore %s.qvs after error, truncate failed\n",
+                       Prog_Name,root);
+    }
+  if (quiva != NULL)
+    { fclose(quiva);
+      if (coff == 0)
+        unlink(Catenate(pwd,PATHSEP,root,".qvs"));
+    }
+  if (temp != NULL)
+    { fclose(temp);
+      unlink(tname);
     }
   fclose(istub);
   fclose(indx);
-  fclose(quiva);
-  if (coff == 0)
-    { char *root = Root(argv[1],".db");
-      char *pwd  = PathTo(argv[1]);
-      unlink(Catenate(pwd,PATHSEP,root,".qvs"));
-      free(pwd);
-      free(root);
-     }
 
   exit (1);
 }
diff --git a/rangen.c b/rangen.c
new file mode 100644
index 0000000..27d8354
--- /dev/null
+++ b/rangen.c
@@ -0,0 +1,182 @@
+/*******************************************************************************************
+ *
+ *  Synthetic DNA shotgun sequence generator
+ *     Generate a fake genome of size genlen*1Mb long, that has an AT-bias of -b.
+ *     The -r parameter seeds the random number generator for the generation of the genome
+ *     so that one can reproducbile produce the same underlying genome to sample from.  If
+ *     missing, then the job id of the invocation seeds the generator.  The sequence is
+ *     sent to the standard output in .fasta format.
+ *
+ *  Author:  Gene Myers
+ *  Date  :  April 2016
+ *
+ ********************************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+
+static char *Usage = "<genlen:double> [-U] [-b<double(.5)>] [-w<int(80)>] [-r<int>]";
+
+static int    GENOME;     // -g option * 1Mbp
+static double BIAS;       // -b option
+static int    HASR = 0;   // -r option is set?
+static int    SEED;       // -r option
+static int    WIDTH;      // -w option
+static int    UPPER;      // -U option
+
+static char *Prog_Name;
+
+//  Generate a random DNA sequence of length *len* with an AT-bias of BIAS.
+//    Uppercase letters if UPPER is set, lowercase otherwise.
+
+static char *random_genome(int len)
+{ static char  *seq = NULL;
+  static double x, PRA, PRC, PRG;
+  int    i;
+
+  if (seq == NULL)
+    { PRA = BIAS/2.;
+      PRC = (1.-BIAS)/2. + PRA;
+      PRG = (1.-BIAS)/2. + PRC;
+      if ((seq = (char *) malloc(WIDTH+1)) == NULL)
+        { fprintf(stderr,"%s: Allocating genome sequence\n",Prog_Name);
+          exit (1);
+        }
+    }
+
+  if (UPPER)
+    for (i = 0; i < len; i++)
+      { x = drand48();
+        if (x < PRC)
+          if (x < PRA)
+            seq[i] = 'A';
+          else
+            seq[i] = 'C';
+        else
+          if (x < PRG)
+            seq[i] = 'G';
+          else
+            seq[i] = 'T';
+      }
+  else
+    for (i = 0; i < len; i++)
+      { x = drand48();
+        if (x < PRC)
+          if (x < PRA)
+            seq[i] = 'a';
+          else
+            seq[i] = 'c';
+        else
+          if (x < PRG)
+            seq[i] = 'g';
+          else
+            seq[i] = 't';
+      }
+  seq[len] = '\0';
+
+  return (seq);
+}
+
+int main(int argc, char *argv[])
+{ int    i, j;
+  char  *eptr;
+  double glen;
+ 
+  //  Process command arguments
+  //
+  //  Usage: <GenomeLen:double> [-b<double(.5)>] [-r<int>]
+
+  Prog_Name = strdup("rangen");
+
+  WIDTH = 80;
+  BIAS  = .5;
+  HASR  = 0;
+  UPPER = 0;
+
+  j = 1;
+  for (i = 1; i < argc; i++)
+    if (argv[i][0] == '-')
+      switch (argv[i][1])
+      { default:
+          fprintf(stderr,"%s: %s is an illegal option\n",Prog_Name,argv[i]);
+          exit (1);
+        case 'U':
+          if (argv[i][2] != '\0')
+            { fprintf(stderr,"%s: %s is an illegal option\n",Prog_Name,argv[i]);
+              exit (1);
+            }
+          UPPER = 1;
+          break;
+        case 'b':
+          BIAS = strtod(argv[i]+2,&eptr);
+          if (*eptr != '\0' || argv[i][2] == '\0')
+            { fprintf(stderr,"%s: -%c '%s' argument is not a real number\n",
+                             Prog_Name,argv[i][1],argv[i]+2);
+              exit (1);
+            }
+          if (BIAS < 0. || BIAS > 1.)
+            { fprintf(stderr,"%s: AT-bias must be in [0,1] (%g)\n",Prog_Name,BIAS);
+              exit (1);
+            }
+          break;
+        case 'r':
+          SEED = strtol(argv[i]+2,&eptr,10);
+          HASR = 1;
+          if (*eptr != '\0' || argv[i][2] == '\0')
+            { fprintf(stderr,"%s: -r argument is not an integer\n",Prog_Name);
+              exit (1);
+            }
+          break;
+        case 'w':
+          WIDTH = strtol(argv[i]+2,&eptr,10);
+          if (*eptr != '\0' || argv[i][2] == '\0')
+            { fprintf(stderr,"%s: -w '%s' argument is not an integer\n",Prog_Name,argv[i]+2);
+              exit (1);
+            }
+          if (WIDTH < 0)
+            { fprintf(stderr,"%s: Line width must be non-negative (%d)\n",Prog_Name,WIDTH);
+              exit (1);
+            }
+          break;
+      }
+    else
+      argv[j++] = argv[i];
+  argc = j;
+
+  if (argc != 2)
+    { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+      exit (1);
+    }
+
+  glen = strtod(argv[1],&eptr);
+  if (*eptr != '\0')
+    { fprintf(stderr,"%s: genome length is not a real number\n",Prog_Name);
+      exit (1);
+    }
+  if (glen < 0.)
+    { fprintf(stderr,"%s: Genome length must be positive (%g)\n",Prog_Name,glen);
+      exit (1);
+    }
+  GENOME = (int) (glen*1000000.);
+
+  //  Set up random number generator
+
+  if (HASR)
+    srand48(SEED);
+  else
+    srand48(getpid());
+
+  //  Generate the sequence line at a time where all lines have width WDITH, save the last.
+
+
+  fprintf(stdout,">random len=%d bias=%g\n",GENOME,BIAS);
+  for (j = 0; j+WIDTH < GENOME; j += WIDTH)
+    fprintf(stdout,"%s\n",random_genome(WIDTH));
+  if (j < GENOME)
+    fprintf(stdout,"%s\n",random_genome(GENOME-j));
+
+  exit (0);
+}
diff --git a/simulator.c b/simulator.c
index b16fb02..cf1d694 100644
--- a/simulator.c
+++ b/simulator.c
@@ -1,51 +1,14 @@
-/************************************************************************************\
-*                                                                                    *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
-*                                                                                    *
-* Redistribution and use in source and binary forms, with or without modification,   *
-* are permitted provided that the following conditions are met:                      *
-*                                                                                    *
-*  · Redistributions of source code must retain the above copyright notice, this     *
-*    list of conditions and the following disclaimer.                                *
-*                                                                                    *
-*  · Redistributions in binary form must reproduce the above copyright notice, this  *
-*    list of conditions and the following disclaimer in the documentation and/or     *
-*    other materials provided with the distribution.                                 *
-*                                                                                    *
-*  · The name of EWM may not be used to endorse or promote products derived from     *
-*    this software without specific prior written permission.                        *
-*                                                                                    *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
-*                                                                                    *
-* For any issues regarding this software and its use, contact EWM at:                *
-*                                                                                    *
-*   Eugene W. Myers Jr.                                                              *
-*   Bautzner Str. 122e                                                               *
-*   01099 Dresden                                                                    *
-*   GERMANY                                                                          *
-*   Email: gene.myers at gmail.com                                                      *
-*                                                                                    *
-\************************************************************************************/
-
 /*******************************************************************************************
  *
  *  Synthetic DNA shotgun dataset simulator
- *     Generate a fake genome of size genlen*1Mb long, that has an AT-bias of -b.  Then
- *     sample reads of mean length -m from a log-normal length distribution with
- *     standard deviation -s, but ignore reads of length less than -x.  Collect enough
- *     reads to cover the genome -c times.   Introduce -e fraction errors into each
- *     read where the ratio of insertions, deletions, and substitutions are set by
- *     defined constants INS_RATE and DEL_RATE within generate.c.  One can also control
- *     the rate at which reads are picked from the forward and reverse strands by setting
- *     the defined constant FLIP_RATE.
+ *     From a supplied reference genome in the form of a Dazzler .dam, sample reads of
+ *     mean length -m from a log-normal length distribution with standard deviation -s,
+ *     but ignore reads of length less than -x.  Collect enough reads to cover the genome
+ *     -c times.   Introduce -e fraction errors into each read where the ratio of insertions,
+ *     deletions, and substitutions are set by defined constants INS_RATE and DEL_RATE
+ *     within generate.c.  The fraction -f controls the rate at which reads are picked from
+ *     the forward and reverse strands which defaults to 50%.  If -C is set then assume the
+ *     scaffolds are circular.
  *
  *     The -r parameter seeds the random number generator for the generation of the genome
  *     so that one can reproducbile produce the same underlying genome to sample from.  If
@@ -53,16 +16,22 @@
  *     to the standard output (i.e. it is a pipe).  The output is in fasta format (i.e. it is
  *     a UNIX pipe).  The output is in Pacbio .fasta format suitable as input to fasta2DB.
  *
- *     The -M option requests that the coordinates from which each read has been sampled are
- *     written to the indicated file, one line per read, ASCII encoded.  This "map" file
- *     essentially tells one where every read belongs in an assembly and is very useful for
- *     debugging and testing purposes.  If a read pair is say b,e then if b < e the read was
- *     sampled from [b,e] in the forward direction, and from [e,b] in the reverse direction
- *     otherwise.
+ *     The genome is considered a sequence of *scaffolds* (these are reconstituted from the
+ *     Dazzler's internal encoding of a .dam), where the gaps are filled with a random
+ *     sequence that follows the base distribution of the contigs of the genome.  The program
+ *     then samples these filled in scaffolds for reads.  If the -C optioin is set then the
+ *     program assumes each scaffold is a circular sequence.
+ *
+ *     The -M option requests that the scaffold and coordinates from which each read has
+ *     been sampled are written to the indicated file, one line per read, ASCII encoded.
+ *     This "map" file essentially tells one where every read belongs in an assembly and
+ *     is very useful for debugging and testing purposes.  If a read pair is say b,e then
+ *     if b < e the read was sampled from [b,e] in the forward direction, and from [e,b]
+ *     in the reverse direction otherwise.
  *
  *  Author:  Gene Myers
  *  Date  :  July 2013
- *  Mod   :  April 2014 (made independent of "mylib")
+ *  Mod   :  April 2016 (generates reads w.r.t. a reference genome)
  *
  ********************************************************************************************/
 
@@ -74,59 +43,27 @@
 
 #include "DB.h"
 
-static char *Usage[] = { "<genlen:double> [-c<double(20.)>] [-b<double(.5)>] [-r<int>]",
-                         "                [-m<int(10000)>]  [-s<int(2000)>]  [-x<int(4000)>]",
-                         "                [-e<double(.15)>] [-M<file>]"
+static char *Usage[] = { "<genome:dam> [-CU] [-m<int(10000)>]  [-s<int(2000)>] [-e<double(.15)>]",
+                         "                   [-c<double(50.)>] [-f<double(.5)>] [-x<int(4000)>]",
+                         "                   [-w<int(80)>] [-r<int>] [-M<file>]",
                        };
 
-static int    GENOME;     // -g option * 1Mbp
-static double COVERAGE;   // -c option
-static double BIAS;       // -b option
-static int    HASR = 0;   // -r option is set?
-static int    SEED;       // -r option
+static int    CIRCULAR;   // -C option
+static int    UPPER;      // -U option
 static int    RMEAN;      // -m option
 static int    RSDEV;      // -s option
-static int    RSHORT;     // -x option
 static double ERROR;      // -e option
+static double COVERAGE;   // -c option
+static double FLIP_RATE;  // -f option
+static int    RSHORT;     // -x option
+static int    WIDTH;      // -w option
+static int    HASR;       // -r option is set?
+static int    SEED;       // -r option
 static FILE  *MAP;        // -M option
 
 #define INS_RATE  .73333  // insert rate
 #define DEL_RATE  .20000  // deletion rate
 #define IDL_RATE  .93333  // insert + delete rate
-#define FLIP_RATE .5      // orientation rate (equal)
-
-//  Generate a random 4 letter string of length *len* with every letter having equal probability.
-
-static char *random_genome()
-{ char  *seq;
-  int    i;
-  double x, PRA, PRC, PRG;
-
-  PRA = BIAS/2.;
-  PRC = (1.-BIAS)/2. + PRA;
-  PRG = (1.-BIAS)/2. + PRC;
-
-  if (HASR)
-    srand48(SEED);
-  else
-    srand48(getpid());
-
-  if ((seq = (char *) Malloc(GENOME+1,"Allocating genome sequence")) == NULL)
-    exit (1);
-  for (i = 0; i < GENOME; i++)
-    { x = drand48();
-      if (x < PRA)
-        seq[i] = 0;
-      else if (x < PRC)
-        seq[i] = 1;
-      else if (x < PRG)
-        seq[i] = 2;
-      else
-        seq[i] = 3;
-    }
-  seq[GENOME] = 4;
-  return (seq);
-}
 
 //  Complement (in the DNA sense) string *s*.
 
@@ -144,6 +81,8 @@ static void complement(int elen, char *s)
     }
 }
 
+//  A unit normal distribution random number generator
+
 #define UNORM_LEN 60000
 #define UNORM_MAX   6.0
 
@@ -226,54 +165,227 @@ static double sample_unorm(double x)
   return (y);
 }
 
+//  Open and trim the reference genome *name*.  Determine the number of scaffolds and sizes
+//    of each scaffold (in nscaffs and the .coff field of the read records) in the dam.  Then
+//    create a sequence for each scaffold (index in the .boff field of the read records), that
+//    consists of its contigs with a random sequence filling the gaps (generated according to
+//    the bp frequency in db.freq[4]).
+
+HITS_DB *load_and_fill(char *name, int *pscaffs)
+{ static HITS_DB db;
+  HITS_READ *reads;
+  FILE      *bases;
+  char      *seq;
+  int        nreads, nscaffs;
+  int        i, c;
+  int64      ctot;
+  int64      o, u;
+  double     PRA, PRC, PRG;
+
+  if (Open_DB(name,&db) != 1)
+    { fprintf(stderr,"%s: %s is not a Dazzler .dam\n",Prog_Name,name);
+      exit (1);
+    }
+  Trim_DB(&db);
+
+  PRA = db.freq[0];
+  PRC = PRA + db.freq[1];
+  PRG = PRC + db.freq[2];
+
+  nreads  = db.nreads;  
+  reads   = db.reads;
+
+  nscaffs = 0;
+  for (i = 0; i < nreads; i++)
+    if (reads[i].origin == 0)
+      nscaffs += 1;
+
+  for (i = 0; i < nscaffs; i++)
+    reads[i].coff = 0;
+
+  c = -1;
+  for (i = 0; i < nreads; i++)
+    { if (reads[i].origin == 0)
+        c += 1;
+      reads[c].coff = reads[i].fpulse+reads[i].rlen;
+    }
+
+  ctot = 0;
+  for (i = 0; i < nscaffs; i++)
+    ctot += reads[i].coff+1;
+
+  bases = Fopen(Catenate(db.path,"","",".bps"),"r");
+  if (bases == NULL)
+    exit (1);
+
+  seq = (char *) Malloc(ctot+4,"Allocating space for genome");
+  if (seq == NULL)
+    exit (1);
+  *seq++ = 4;
+
+  c = -1;
+  o = u = 0;
+  for (i = 0; i < nreads; i++)
+    { int   len, clen;
+      int64 off;
+
+      if (reads[i].origin == 0)
+        { if (c >= 0)
+            o += reads[c].coff + 1;
+          c += 1;
+          u = o;
+        }
+      else
+        { int64  p;
+          double x;
+
+          p = u + reads[i-1].rlen;
+          u = o + reads[i].fpulse;
+          while (p < u)
+            { x = drand48();
+              if (x < PRC)
+                if (x < PRA)
+                  seq[p++] = 0;
+                else
+                  seq[p++] = 1;
+              else
+                if (x < PRG)
+                  seq[p++] = 2;
+                else
+                  seq[p++] = 3;
+            }
+        }
+
+      len = reads[i].rlen;
+      off = reads[i].boff;
+      if (ftello(bases) != off)
+        fseeko(bases,off,SEEK_SET);
+      clen = COMPRESSED_LEN(len);
+      if (clen > 0)
+        { if (fread(seq+u,clen,1,bases) != 1)
+            { EPRINTF(EPLACE,"%s: Read of .bps file failed\n",Prog_Name);
+              exit (1);
+            }
+        }
+      Uncompress_Read(len,seq+u);
+      if (reads[i].origin == 0)
+        reads[c].boff = o;
+    }
+  reads[nscaffs].boff = ctot;
+
+  db.bases  = (void *) seq;
+  db.loaded = 1;
+
+  *pscaffs = nscaffs;
+  return (&db);
+}
 
 //  Generate reads (a) whose lengths are exponentially distributed with mean *mean* and
-//    standard deviation *stdev*, (b) that are never shorter than *shortest* and never
-//    longer than the string *source*.  Each read is a randomly sampled interval of
-//    *source* (each interval is equally likely) that has insertion, deletion, and/or
-//    substitution errors introduced into it and which is oriented in either the forward
-//    or reverse strand direction with probability FLIP_RATE.  The number of errors
-//    introduced is the length of the string times *erate*, and the probability of an
-//    insertion, deletion, or substitution is controlled by the defined constants INS_RATE
-//    and DEL_RATE.  Generate reads until the sum of the lengths of the reads is greater
-//    than slen*coverage.  The reads are output as fasta entries with a specific header
-//    format that contains the sampling interval, read length, and a read id.
-
-static void shotgun(char *source)
-{ int       maxlen, nreads, qv;
-  int64     totlen, totbp;
-  char     *rbuffer;
-  double    nmean, nsdev;
+//    standard deviation *stdev*, and (b) that are never shorter than *shortest*.  Each
+//    read is a randomly sampled interval of one of the filled scaffolds of *source*
+//    (each interval is equally likely) that has insertion, deletion, and/or substitution
+//    errors introduced into it and which is oriented in either the forward or reverse
+//    strand direction with probability FLIP_RATE.  The number of errors introduced is the
+//    length of the string times *erate*, and the probability of an insertion, delection,
+//    or substitution is controlled by the defined constants INS_RATE and DEL_RATE.
+//    If the -C option is set then each scaffold is assumed to be circular and reads can
+//    be sampled that span the origin.   Reads are generated until the sum of the lengths of
+//    the reads is greater thant coverage times the sum of the lengths of the scaffolds in
+//    the reference (i.e. including filled scaffold gaps in the genome size).  The reads are
+//    output as fasta entries with the PacBio-specific header format that contains the
+//    sampling interval, read length, and a read id.
+
+static void shotgun(HITS_DB *source, int nscaffs)
+{ HITS_READ *reads;
+  int        gleng;
+  int        maxlen, nreads, qv;
+  int64      totlen, totbp;
+  char      *rbuffer, *bases;
+  double     nmean, nsdev;
+  double    *weights;
+  int        scf;
 
   nsdev = (1.*RSDEV)/RMEAN;
   nsdev = log(1.+nsdev*nsdev);
   nmean = log(1.*RMEAN) - .5*nsdev;
   nsdev = sqrt(nsdev);
 
-  if (GENOME < RSHORT)
+  bases = source->bases;
+  reads = source->reads;
+  gleng = reads[nscaffs].boff - nscaffs;
+  if (gleng <= RSHORT)
     { fprintf(stderr,"Genome length is less than shortest read length !\n");
       exit (1);
     }
 
   init_unorm();
 
+  weights = (double *) Malloc(sizeof(double)*(nscaffs+1),"Allocating contig weights");
+  if (weights == NULL)
+    exit (1);
+  
+  { double r;
+
+    r = 0.;
+    for (scf = 0; scf < nscaffs; scf++)
+      { weights[scf] = r/gleng;
+        r += reads[scf].coff;
+      }
+    weights[nscaffs] = 1.;
+  }
+
   qv = (int) (1000 * (1.-ERROR));
 
   rbuffer = NULL;
   maxlen  = 0;
   totlen  = 0;
-  totbp   = COVERAGE*GENOME;
+  totbp   = COVERAGE*gleng;
   nreads  = 0;
   while (totlen < totbp)
-    { int   len, sdl, ins, del, elen, rbeg, rend;
-      int   j;
-      char *s, *t;
+    { int    len, sdl, ins, del, elen, slen, rbeg, rend;
+      int    j;
+      double uni;
+      char  *s, *t;
+
+      scf = bin_search(nscaffs,weights,drand48()) - 1;   //  Pick a scaffold with probabilitye
+                                                         //    proportional to its length
 
-      len = (int) exp(nmean + nsdev*sample_unorm(drand48()));    //  Determine length of read.
-      if (len > GENOME) len = GENOME;
-      if (len < RSHORT)
+      uni = drand48();
+      len = (int) exp(nmean + nsdev*sample_unorm(uni));    //  Pick a read length
+      if (len <= RSHORT)
         continue;
 
+      // New sampler:
+
+      slen = reads[scf].coff;
+      rbeg = (int) (drand48()*slen);          //  Pick a spot for read start
+      if (CIRCULAR)
+        rend = (rbeg + len) % slen;           //  Wrap if circular
+      else
+        { if (drand48() < .5)                 //  Pick direction and trim if necessary
+            { rend = rbeg + len;              //    if not circular
+              if (rend > slen)
+                { rend = slen;
+                  len  = rend - rbeg;
+                }
+            }
+          else
+            { rend = rbeg;
+              rbeg = rbeg - len;
+              if (rbeg < 0)
+                { rbeg = 0;
+                  len  = rend;
+                }
+            }
+          if (len <= RSHORT)
+            continue;
+        }
+
+      // Old sampler:
+      //
+      // rbeg = (int) (drand48()*((reads[scf].coff-len)+.9999999));
+      // rend = rbeg + len;
+
       sdl = (int) (len*ERROR);      //  Determine number of inserts *ins*, deletions *del,
       ins = del = 0;                //    and substitions+deletions *sdl*.
       for (j = 0; j < sdl; j++)
@@ -285,8 +397,6 @@ static void shotgun(char *source)
         }
       sdl -= ins;
       elen = len + (ins-del);
-      rbeg = (int) (drand48()*((GENOME-len)+.9999999));
-      rend = rbeg + len;
 
       if (elen > maxlen)
         { maxlen  = ((int) (1.2*elen)) + 1000;
@@ -296,7 +406,7 @@ static void shotgun(char *source)
         }
 
       t = rbuffer;
-      s = source + rbeg;
+      s = bases + (reads[scf].boff + rbeg);
 
       //   Generate the string with errors.  NB that inserts occur randomly between source
       //     characters, while deletions and substitutions occur on source characters.
@@ -320,6 +430,8 @@ static void shotgun(char *source)
               sdl -= 1;
             }
           s += 1;
+          if (*s == 4)
+            s = bases + reads[scf].boff;
           while (len * drand48() < ins)
             { *t++ = (char) (4.*drand48());
               ins -= 1;
@@ -328,23 +440,24 @@ static void shotgun(char *source)
       *t = 4;
 
       if (drand48() >= FLIP_RATE)    //  Complement the string with probability FLIP_RATE.
-        { printf(">Sim/%d/%d_%d RQ=0.%d\n",nreads+1,0,elen,qv);
-          complement(elen,rbuffer);
+        { complement(elen,rbuffer);
           j = rend;
           rend = rbeg;
           rbeg = j;
         }
-      else
-        printf(">Sim/%d/%d_%d RQ=0.%d\n",nreads+1,0,elen,qv);
 
-      Lower_Read(rbuffer);
-      for (j = 0; j+80 < elen; j += 80)
-        printf("%.80s\n",rbuffer+j);
+      printf(">Sim/%d/%d_%d RQ=0.%d\n",nreads+1,0,elen,qv);
+      if (UPPER)
+        Upper_Read(rbuffer);
+      else
+        Lower_Read(rbuffer);
+      for (j = 0; j+WIDTH < elen; j += WIDTH)
+        printf("%.*s\n",WIDTH,rbuffer+j);
       if (j < elen)
         printf("%s\n",rbuffer+j);
 
        if (MAP != NULL)
-         fprintf(MAP," %9d %9d\n",rbeg,rend);
+         fprintf(MAP," %6d %9d %9d\n",scf,rbeg,rend);
 
        totlen += elen;
        nreads += 1;
@@ -352,34 +465,34 @@ static void shotgun(char *source)
 }
 
 int main(int argc, char *argv[])
-{ char  *source;
+{ HITS_DB *source;
+  int      nscaffs;
 
-//  Usage: <GenomeLen:double> [-c<double(20.)>] [-b<double(.5)>] [-r<int>]
-//                            [-m<int(10000)>]  [-s<int(2000)>]  [-x<int(4000)>]
-//                            [-e<double(.15)>] [-M<file]"
+  //  Process command line
 
-  { int    i, j;
+  { int    i, j, k;
+    int    flags[128];
     char  *eptr;
-    double glen;
 
-    Prog_Name = Strdup("simulator","");
+    ARG_INIT("simulator");
 
-    COVERAGE = 20.;
-    BIAS     = .5;
-    HASR     = 0;
-    RMEAN    = 10000;
-    RSDEV    = 2000;
-    RSHORT   = 4000;
-    ERROR    = .15;
-    MAP      = NULL;
+    RMEAN     = 10000;
+    RSDEV     = 2000;
+    ERROR     = .15;
+    COVERAGE  = 50.;
+    FLIP_RATE = .5;
+    RSHORT    = 4000;
+    HASR      = 0;
+    MAP       = NULL;
+    WIDTH     = 80;
 
     j = 1;
     for (i = 1; i < argc; i++)
       if (argv[i][0] == '-')
         switch (argv[i][1])
         { default:
-            fprintf(stderr,"%s: -%c is an illegal option\n",Prog_Name,argv[i][2]);
-            exit (1);
+            ARG_FLAGS("CU");
+            break;
           case 'c':
             ARG_REAL(COVERAGE)
             if (COVERAGE < 0.)
@@ -387,13 +500,23 @@ int main(int argc, char *argv[])
                 exit (1);
               }
             break;
-          case 'b':
-            ARG_REAL(BIAS)
-            if (BIAS < 0. || BIAS > 1.)
-              { fprintf(stderr,"%s: AT-bias must be in [0,1] (%g)\n",Prog_Name,BIAS);
+          case 'e':
+            ARG_REAL(ERROR)
+            if (ERROR < 0. || ERROR > .5)
+              { fprintf(stderr,"%s: Error rate must be in [0,.5] (%g)\n",Prog_Name,ERROR);
                 exit (1);
               }
             break;
+          case 'f':
+            ARG_REAL(FLIP_RATE)
+            if (FLIP_RATE < 0. || FLIP_RATE > 1.)
+              { fprintf(stderr,"%s: Error rate must be in [0,1] (%g)\n",Prog_Name,FLIP_RATE);
+                exit (1);
+              }
+            break;
+          case 'm':
+            ARG_POSITIVE(RMEAN,"Mean read length")
+            break;
           case 'r':
             SEED = strtol(argv[i]+2,&eptr,10);
             HASR = 1;
@@ -402,54 +525,46 @@ int main(int argc, char *argv[])
                 exit (1);
               }
             break;
-          case 'M':
-            MAP = Fopen(argv[i]+2,"w");
-            if (MAP == NULL)
-              exit (1);
-            break;
-          case 'm':
-            ARG_POSITIVE(RMEAN,"Mean read length")
-            break;
           case 's':
-            ARG_POSITIVE(RSDEV,"Read length standard deviation")
+            ARG_NON_NEGATIVE(RSDEV,"Read length standard deviation")
             break;
           case 'x':
             ARG_NON_NEGATIVE(RSHORT,"Read length minimum")
             break;
-          case 'e':
-            ARG_REAL(ERROR)
-            if (ERROR < 0. || ERROR > .5)
-              { fprintf(stderr,"%s: Error rate must be in [0,.5] (%g)\n",Prog_Name,ERROR);
-                exit (1);
-              }
+          case 'w':
+            ARG_NON_NEGATIVE(WIDTH,"Line width")
+            break;
+          case 'M':
+            MAP = Fopen(argv[i]+2,"w");
+            if (MAP == NULL)
+              exit (1);
             break;
         }
       else
         argv[j++] = argv[i];
     argc = j;
 
+    CIRCULAR = flags['C'];
+    UPPER    = flags['U'];
+
     if (argc != 2)
       { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage[0]);
         fprintf(stderr,"       %*s %s\n",(int) strlen(Prog_Name),"",Usage[1]);
         fprintf(stderr,"       %*s %s\n",(int) strlen(Prog_Name),"",Usage[2]);
         exit (1);
       }
-
-    glen = strtod(argv[1],&eptr);
-    if (*eptr != '\0')
-      { fprintf(stderr,"%s: genome length is not a real number\n",Prog_Name);
-        exit (1);
-      }
-    if (glen < 0.)
-      { fprintf(stderr,"%s: Genome length must be positive (%g)\n",Prog_Name,glen);
-        exit (1);
-      }
-    GENOME = (int) (glen*1000000.);
   }
 
-  source = random_genome();
+  if (HASR)
+    srand48(SEED);
+  else
+    srand48(getpid());
+
+  //  Read and generate
+
+  source = load_and_fill(argv[1],&nscaffs);
 
-  shotgun(source);
+  shotgun(source,nscaffs);
 
   if (MAP != NULL)
     fclose(MAP);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/dazzdb.git



More information about the debian-med-commit mailing list