[med-svn] [dazzdb] 01/05: Imported Upstream version 1.0+20161112

Afif Elghraoui afif at moszumanska.debian.org
Thu Jan 19 07:01:25 UTC 2017


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository dazzdb.

commit 49a5b8b9525073504b07e6af19d52e65217de620
Author: Afif Elghraoui <afif at debian.org>
Date:   Wed Jan 18 22:14:15 2017 -0800

    Imported Upstream version 1.0+20161112
---
 DB.c                     |  95 +++++++++++++-
 DB.h                     |  14 ++-
 DB2quiva.c => DB2arrow.c | 105 ++++++++--------
 DB2quiva.c               |   4 +
 DBdump.c                 |  65 ++++++++--
 DBshow.c                 | 113 +++++++++++++----
 DBsplit.c                |   3 +-
 DBstats.c                |   2 +-
 DBwipe.c                 |  89 +++++++++++++
 Makefile                 |  12 +-
 QV.c                     |  94 ++++++++++++++
 QV.h                     |   3 +
 README.md                | 169 ++++++++++++++++++-------
 quiva2DB.c => arrow2DB.c | 319 +++++++++++++++++++++++++----------------------
 fasta2DAM.c              |   1 +
 fasta2DB.c               |  52 ++++----
 quiva2DB.c               |  12 +-
 17 files changed, 824 insertions(+), 328 deletions(-)

diff --git a/DB.c b/DB.c
index 3764842..3afff14 100644
--- a/DB.c
+++ b/DB.c
@@ -314,6 +314,14 @@ void Upper_Read(char *s)
   *s = '\0';
 }
 
+void Letter_Arrow(char *s)
+{ static char letter[4] = { '1', '2', '3', '4' };
+
+  for ( ; *s != 4; s++)
+    *s = letter[(int) *s];
+  *s = '\0';
+}
+
 //  Convert read in ascii representation to [0-3] representation (end with 4)
 
 void Number_Read(char *s)
@@ -341,6 +349,31 @@ void Number_Read(char *s)
   *s = 4;
 }
 
+void Number_Arrow(char *s)
+{ static char arrow[128] =
+    { 3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 0, 1, 2, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 2,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+    };
+
+  for ( ; *s != '\0'; s++)
+    *s = arrow[(int) *s];
+  *s = 4;
+}
+
 
 /*******************************************************************************************
  *
@@ -426,7 +459,7 @@ int Open_DB(char* path, HITS_DB *db)
     if (fscanf(dbvis,DB_NBLOCK,&nblocks) != 1)
       if (part == 0)
         { cutoff = 0;
-          all    = 1;
+          all    = DB_ALL;
         }
       else
         { EPRINTF(EPLACE,"%s: DB %s has not yet been partitioned, cannot request a block !\n",
@@ -466,7 +499,7 @@ int Open_DB(char* path, HITS_DB *db)
   db->tracks  = NULL;
   db->part    = part;
   db->cutoff  = cutoff;
-  db->all     = all;
+  db->allarr |= all;
   db->ufirst  = ufirst;
   db->tfirst  = tfirst;
 
@@ -557,10 +590,10 @@ void Trim_DB(HITS_DB *db)
 
   if (db->trimmed) return;
 
-  if (db->cutoff <= 0 && db->all) return;
+  if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return;
 
   cutoff = db->cutoff;
-  if (db->all)
+  if ((db->allarr & DB_ALL) != 0)
     allflag = 0;
   else
     allflag = DB_BEST;
@@ -660,10 +693,10 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
 
   if (!db->trimmed) return (0);
 
-  if (db->cutoff <= 0 && db->all) return (0);
+  if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return (0);
 
   cutoff = db->cutoff;
-  if (db->all)
+  if ((db->allarr & DB_ALL) != 0)
     allflag = 0;
   else
     allflag = DB_BEST;
@@ -1435,6 +1468,56 @@ int Load_Read(HITS_DB *db, int i, char *read, int ascii)
   return (0);
 }
 
+// Load into 'read' the i'th arrow in 'db'.  As an ASCII string if ascii is 1, 
+//   and as a numeric string otherwise.
+//
+
+HITS_DB *Arrow_DB = NULL;         //  Last db/arw used by "Load_Arrow"
+FILE    *Arrow_File = NULL;       //    Becomes invalid after closing
+
+int Load_Arrow(HITS_DB *db, int i, char *read, int ascii)
+{ FILE      *arrow;
+  int64      off;
+  int        len, clen;
+  HITS_READ *r = db->reads;
+
+  if (i >= db->nreads)
+    { EPRINTF(EPLACE,"%s: Index out of bounds (Load_Arrow)\n",Prog_Name);
+      EXIT(1);
+    }
+  if (Arrow_DB != db)
+    { fclose(Arrow_File);
+      arrow = Fopen(Catenate(db->path,"","",".arw"),"r");
+      if (arrow == NULL)
+        EXIT(1);
+      Arrow_File = arrow;
+      Arrow_DB   = db;
+    }
+  else
+    arrow = Arrow_File;
+
+  off = r[i].boff;
+  len = r[i].rlen;
+
+  if (ftello(arrow) != off)
+    fseeko(arrow,off,SEEK_SET);
+  clen = COMPRESSED_LEN(len);
+  if (clen > 0)
+    { if (fread(read,clen,1,arrow) != 1)
+        { EPRINTF(EPLACE,"%s: Failed read of .bps file (Load_Arrow)\n",Prog_Name);
+          EXIT(1);
+        }
+    }
+  Uncompress_Read(len,read);
+  if (ascii == 1)
+    { Letter_Arrow(read);
+      read[-1] = '\0';
+    }
+  else
+    read[-1] = 4;
+  return (0);
+}
+
 char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii)
 { FILE      *bases  = (FILE *) db->bases;
   int64      off;
diff --git a/DB.h b/DB.h
index 983bee7..dc281de 100644
--- a/DB.h
+++ b/DB.h
@@ -164,6 +164,9 @@ void Lower_Read(char *s);     //  Convert read from numbers to lowercase letters
 void Upper_Read(char *s);     //  Convert read from numbers to uppercase letters (0-3 to ACGT)
 void Number_Read(char *s);    //  Convert read from letters to numbers
 
+void Letter_Arrow(char *s);   //  Convert arrow pw's from numbers to uppercase letters (0-3 to 1234)
+void Number_Arrow(char *s);   //  Convert arrow pw string from letters to numbers
+
 
 /*******************************************************************************************
  *
@@ -175,6 +178,9 @@ void Number_Read(char *s);    //  Convert read from letters to numbers
 #define DB_CSS  0x0400   //  This is the second or later of a group of reads from a given insert
 #define DB_BEST 0x0800   //  This is the longest read of a given insert (may be the only 1)
 
+#define DB_ARROW 0x2     //  DB is an arrow DB
+#define DB_ALL   0x1     //  all wells are in the trimmed DB
+
 //  Fields have different interpretations if a .db versus a .dam
 
 typedef struct
@@ -185,6 +191,7 @@ typedef struct
                     //    uncompressed bases in memory block
     int64   coff;   //  Offset (in bytes) of compressed quiva streams in '.qvs' file (DB),
                     //  Offset (in bytes) of scaffold header string in '.hdr' file (DAM)
+                    //  4 compressed shorts containing snr info if an arrow DB.
     int     flags;  //  QV of read + flags above (DB only)
   } HITS_READ;
 
@@ -226,7 +233,7 @@ typedef struct
   { int         ureads;     //  Total number of reads in untrimmed DB
     int         treads;     //  Total number of reads in trimmed DB
     int         cutoff;     //  Minimum read length in block (-1 if not yet set)
-    int         all;        //  Consider multiple reads from a given well
+    int         allarr;     //  DB_ALL | DB_ARROW
     float       freq[4];    //  frequency of A, C, G, T, respectively
 
     //  Set with respect to "active" part of DB (all vs block, untrimmed vs trimmed)
@@ -368,6 +375,11 @@ char *New_Read_Buffer(HITS_DB *db);
 
 int  Load_Read(HITS_DB *db, int i, char *read, int ascii);
 
+  // Exactly the same as Load_Read, save the arrow information is loaded, not the DNA sequence,
+  //   and there is only a choice between numeric (0) or ascii (1);
+
+int  Load_Arrow(HITS_DB *db, int i, char *read, int ascii);
+
   // Load into 'read' the subread [beg,end] of the i'th read in 'db' and return a pointer to the
   //   the start of the subinterval (not necessarily = to read !!! ).  As a lower case ascii
   //   string if ascii is 1, an upper case ascii string if ascii is 2, and a numeric string
diff --git a/DB2quiva.c b/DB2arrow.c
similarity index 59%
copy from DB2quiva.c
copy to DB2arrow.c
index a7b93da..9247ab4 100644
--- a/DB2quiva.c
+++ b/DB2arrow.c
@@ -1,9 +1,9 @@
 /********************************************************************************************
  *
- *  Recreate all the .quiva files that have been loaded into a specified database.
+ *  Recreate all the .arrow files that have been loaded into a specified database.
  *
  *  Author:  Gene Myers
- *  Date  :  May 2014
+ *  Date  :  October 2016
  *
  ********************************************************************************************/
 
@@ -12,38 +12,40 @@
 #include <string.h>
 
 #include "DB.h"
-#include "QV.h"
 
-#ifdef HIDE_FILES
-#define PATHSEP "/."
-#else
-#define PATHSEP "/"
-#endif
-
-static char *Usage = "[-vU] <path:db>";
+static char *Usage = "[-v] [-w<int(80)>] <path:db>";
 
 int main(int argc, char *argv[])
 { HITS_DB    _db, *db = &_db;
-  FILE       *dbfile, *quiva;
-  int         VERBOSE, UPPER;
+  FILE       *dbfile;
+  int         VERBOSE, WIDTH;
 
   //  Process arguments
 
   { int   i, j, k;
     int   flags[128];
+    char *eptr;
+
+    ARG_INIT("DB2arrow")
 
-    ARG_INIT("DB2quiva")
+    WIDTH = 80;
 
     j = 1;
     for (i = 1; i < argc; i++)
       if (argv[i][0] == '-')
-        { ARG_FLAGS("vU") }
+        switch (argv[i][1])
+        { default:
+            ARG_FLAGS("vU")
+            break;
+          case 'w':
+            ARG_NON_NEGATIVE(WIDTH,"Line width")
+            break;
+        }
       else
         argv[j++] = argv[i];
     argc = j;
 
     VERBOSE = flags['v'];
-    UPPER   = flags['U'];
 
     if (argc != 2)
       { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
@@ -51,10 +53,10 @@ int main(int argc, char *argv[])
       }
   }
 
-  //  Open db, db stub file, and .qvs file
+  //  Open db, and db stub file
 
-  { char *pwd, *root;
-    int   status;
+  { int   status;
+    char *pwd, *root;
 
     status = Open_DB(argv[1],db);
     if (status < 0)
@@ -67,14 +69,17 @@ int main(int argc, char *argv[])
       { fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
         exit (1);
       }
+    if ((db->allarr & DB_ARROW) == 0)
+      { fprintf(stderr,"%s: There is no Arrow information in the DB: %s\n",Prog_Name,argv[1]);
+        exit (1);
+      }
 
     pwd    = PathTo(argv[1]);
     root   = Root(argv[1],".db");
     dbfile = Fopen(Catenate(pwd,"/",root,".db"),"r");
-    quiva  = Fopen(Catenate(pwd,PATHSEP,root,".qvs"),"r");
     free(pwd);
     free(root);
-    if (dbfile == NULL || quiva == NULL)
+    if (dbfile == NULL)
       exit (1);
   }
 
@@ -84,22 +89,19 @@ int main(int argc, char *argv[])
     char        lname[MAX_NAME];
     FILE       *ofile = NULL;
     int         f, first, last, ofirst, nfiles;
-    QVcoding   *coding;
-    char      **entry;
+    char       *read;
 
     if (fscanf(dbfile,DB_NFILE,&nfiles) != 1)
       SYSTEM_ERROR
 
     reads = db->reads;
-    entry = New_QV_Buffer(db);
+    read  = New_Read_Buffer(db);
     first = ofirst = 0;
     for (f = 0; f < nfiles; f++)
       { int   i;
         char  prolog[MAX_NAME], fname[MAX_NAME];
 
-        //  Scan db image file line, create .quiva file for writing
-
-        if (reads[first].coff < 0) break;
+        //  Scan db image file line, create .arrow file for writing
 
         if (fscanf(dbfile,DB_FDATA,&last,fname,prolog) != 3)
           SYSTEM_ERROR
@@ -107,7 +109,7 @@ int main(int argc, char *argv[])
         if (f == 0 || strcmp(fname,lname) != 0)
           { if (f > 0)
               { if (ofile == stdout)
-                  { fprintf(stderr," %d quivas\n",first-ofirst);
+                  { fprintf(stderr," %d reads\n",first-ofirst);
                     fflush(stderr);
                   }
                 else
@@ -124,48 +126,44 @@ int main(int argc, char *argv[])
                   }
               }
             else
-              { if ((ofile = Fopen(Catenate(".","/",fname,".quiva"),"w")) == NULL)
+              { if ((ofile = Fopen(Catenate(".","/",fname,".arrow"),"w")) == NULL)
                   exit (1);
 
                 if (VERBOSE)
-                  { fprintf(stderr,"Creating %s.quiva ...\n",fname);
-                    fflush(stderr);
+                  { fprintf(stderr,"Creating %s.arrow ...\n",fname);
+                    fflush(stdout);
                   }
               }
 
             strcpy(lname,fname);
           }
 
-        //   For the relevant range of reads, write the header for each to the file
-        //     and then uncompress and write the quiva entry for each
-
-        coding = Read_QVcoding(quiva);
+        //   For the relevant range of reads, write each to the file
+        //     recreating the original headers with the index meta-data about each read
 
         for (i = first; i < last; i++)
-          { int        e, flags, qv, rlen;
+          { int        j, len;
+            uint64     big;
+            float      snr[4];
             HITS_READ *r;
 
             r     = reads + i;
-            flags = r->flags;
-            rlen  = r->rlen;
-            qv    = (flags & DB_QV);
-            fprintf(ofile,"@%s/%d/%d_%d",prolog,r->origin,r->fpulse,r->fpulse+rlen);
-            if (qv > 0)
-              fprintf(ofile," RQ=0.%3d",qv);
+            len   = r->rlen;
+            big   = *((uint64 *) &(r->coff));
+            for (j = 0; j < 4; j++)
+              { snr[3-j] = (big & 0xffff) / 100.;
+                big    >>= 16;
+              }
+            fprintf(ofile,">%s",prolog);
+            fprintf(ofile," SN=%.2f,%.2f,%.2f,%.2f",snr[0],snr[1],snr[2],snr[3]);
             fprintf(ofile,"\n");
 
-            Uncompress_Next_QVentry(quiva,entry,coding,rlen);
-
-            if (UPPER)
-              { char *deltag = entry[1];
-                int   j;
-
-                for (j = 0; j < rlen; j++)
-                  deltag[j] -= 32;
-              }
+            Load_Arrow(db,i,read,1);
 
-            for (e = 0; e < 5; e++)
-              fprintf(ofile,"%.*s\n",rlen,entry[e]);
+            for (j = 0; j+WIDTH < len; j += WIDTH)
+              fprintf(ofile,"%.*s\n",WIDTH,read+j);
+            if (j < len)
+              fprintf(ofile,"%s\n",read+j);
           }
 
         first = last;
@@ -173,7 +171,7 @@ int main(int argc, char *argv[])
 
     if (f > 0)
       { if (ofile == stdout)
-          { fprintf(stderr," %d quivas\n",first-ofirst);
+          { fprintf(stderr," %d reads\n",first-ofirst);
             fflush(stderr);
           }
         else
@@ -181,7 +179,6 @@ int main(int argc, char *argv[])
       }
   }
 
-  fclose(quiva);
   fclose(dbfile);
   Close_DB(db);
 
diff --git a/DB2quiva.c b/DB2quiva.c
index a7b93da..45c5cff 100644
--- a/DB2quiva.c
+++ b/DB2quiva.c
@@ -67,6 +67,10 @@ int main(int argc, char *argv[])
       { fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
         exit (1);
       }
+    if (db->reads[0].coff < 0 || (db->allarr & DB_ARROW) != 0)
+      { fprintf(stderr,"%s: There is no Quiver information in the DB: %s\n",Prog_Name,argv[1]);
+        exit (1);
+      }
 
     pwd    = PathTo(argv[1]);
     root   = Root(argv[1],".db");
diff --git a/DBdump.c b/DBdump.c
index 735e726..6227251 100644
--- a/DBdump.c
+++ b/DBdump.c
@@ -21,8 +21,8 @@
 #endif
 
 static char *Usage[] =
-    { "[-rhsiqp] [-uU] [-m<track>]+",
-      "          <path:db|dam> [ <reads:FILE> | <reads:range> ... ]"
+    { "[-rhsaiqp] [-uU] [-m<track>]+",
+      "           <path:db|dam> [ <reads:FILE> | <reads:range> ... ]"
     };
 
 #define LAST_READ_SYMBOL   '$'
@@ -93,6 +93,7 @@ static int prof_map[41] =
 
 int main(int argc, char *argv[])
 { HITS_DB    _db, *db = &_db;
+  int         Quiva_DB, Arrow_DB;
   FILE       *hdrs = NULL;
   int64      *qv_idx = NULL;
   uint8      *qv_val = NULL;
@@ -110,7 +111,7 @@ int main(int argc, char *argv[])
   FILE          *input = NULL;
 
   int         TRIM, UPPER;
-  int         DORED, DOSEQ, DOQVS, DOHDR, DOIQV, DOPRF, DAM;
+  int         DORED, DOSEQ, DOARW, DOQVS, DOHDR, DOIQV, DOPRF, DAM;
 
   int          MMAX, MTOP;
   char       **MASK;
@@ -134,7 +135,7 @@ int main(int argc, char *argv[])
       if (argv[i][0] == '-')
         switch (argv[i][1])
         { default:
-            ARG_FLAGS("hpqrsiuU")
+            ARG_FLAGS("hpqrsaiuU")
             break;
           case 'm':
             if (MTOP >= MMAX)
@@ -156,6 +157,7 @@ int main(int argc, char *argv[])
     DOQVS = flags['q'];
     DORED = flags['r'];
     DOSEQ = flags['s'];
+    DOARW = flags['a'];
     DOHDR = flags['h'];
     DOIQV = flags['i'];
     DOPRF = flags['p'];
@@ -195,13 +197,32 @@ int main(int argc, char *argv[])
           exit (1);
         DAM = 1;
         if (DOQVS)
-          { fprintf(stderr,"%s: -Q and -q options not compatible with a .dam DB\n",Prog_Name);
+          { fprintf(stderr,"%s: -q option is not compatible with a .dam DB\n",Prog_Name);
+            exit (1);
+          }
+        if (DOARW)
+          { fprintf(stderr,"%s: -a option is not compatible with a .dam DB\n",Prog_Name);
             exit (1);
           }
 
         free(root);
         free(pwd);
       }
+
+    Arrow_DB = ((db->allarr & DB_ARROW) != 0);
+    Quiva_DB = (db->reads[0].coff >= 0 && (db->allarr & DB_ARROW) == 0);
+    if (DOARW)
+      { if (!Arrow_DB)
+          { fprintf(stderr,"%s: -a option set but no Arrow data in DB\n",Prog_Name);
+            exit (1);
+          }
+      }
+    if (DOQVS)
+      { if (!Quiva_DB)
+          { fprintf(stderr,"%s: -q option set but no Quiver data in DB\n",Prog_Name);
+            exit (1);
+          }
+      }
   }
 
   //  Load QVs if requested
@@ -294,7 +315,7 @@ int main(int argc, char *argv[])
 
           reads  = db->reads - db->ufirst;
           cutoff = db->cutoff;
-          if (db->all)
+          if ((db->allarr & DB_ALL) != 0)
             allflag = 0;
           else
             allflag = DB_BEST;
@@ -566,7 +587,7 @@ int main(int argc, char *argv[])
                 lst = len;
               }
 
-            if (DOSEQ | DOQVS)
+            if (DOSEQ | DOQVS | DOARW)
               { int ten = lst-fst;
                 if (ten > seqmax)
                   seqmax = ten;
@@ -597,7 +618,7 @@ int main(int argc, char *argv[])
       { printf("+ T%d %lld\n",m,trktot[m]);
         printf("@ T%d %lld\n",m,trkmax[m]);
       }
-    if (DOSEQ | DOQVS)
+    if (DOSEQ | DOQVS | DOARW)
       { printf("+ S %lld\n",seqtot);
         printf("@ S %lld\n",seqmax);
       }
@@ -615,7 +636,7 @@ int main(int argc, char *argv[])
   //    range pairs in pts[0..reps) and according to the display options.
 
   { HITS_READ  *reads;
-    char       *read, **entry;
+    char       *read, *arrow, **entry;
     int         c, b, e, i, m;
     int         substr;
     int         map;
@@ -626,6 +647,10 @@ int main(int argc, char *argv[])
       entry = New_QV_Buffer(db);
     else
       entry = NULL;
+    if (DOARW)
+      arrow = New_Read_Buffer(db);
+    else
+      arrow = NULL;
 
     map    = 0;
     reads  = db->reads;
@@ -685,8 +710,19 @@ int main(int argc, char *argv[])
                       map += 1;
                     printf("H %ld %s\n",strlen(flist[map]),flist[map]);
                     printf("L %d %d %d\n",r->origin,r->fpulse,r->fpulse+len);
-                    if (qv > 0)
-                      printf("Q: %d\n",qv);
+                    if (Quiva_DB && qv > 0)
+                      printf("Q %d\n",qv);
+                    else if (Arrow_DB)
+                      { int   j, snr[4];
+                        int64 big;
+
+                        big   = *((uint64 *) &(r->coff));
+                        for (j = 0; j < 4; j++)
+                          { snr[3-j] = (big & 0xffff);
+                            big    >>= 16;
+                          }
+                        printf("N %d %d %d %d\n",snr[0],snr[1],snr[2],snr[3]);
+                      }
                   }
               }
 
@@ -694,6 +730,8 @@ int main(int argc, char *argv[])
               Load_QVentry(db,i,entry,UPPER);
             if (DOSEQ)
               Load_Read(db,i,read,UPPER);
+            if (DOARW)
+              Load_Arrow(db,i,arrow,1);
 
             for (m = 0; m < MTOP; m++)
               { int64 *anno;
@@ -727,6 +765,11 @@ int main(int argc, char *argv[])
                 printf("%.*s\n",lst-fst,read+fst);
               }
 
+            if (DOARW)
+              { printf("A %d ",lst-fst);
+                printf("%.*s\n",lst-fst,arrow+fst);
+              }
+
             if (DOIQV)
               { int64 k, e;
 
diff --git a/DBshow.c b/DBshow.c
index d4346a5..c376ed4 100644
--- a/DBshow.c
+++ b/DBshow.c
@@ -26,8 +26,8 @@
 #endif
 
 static char *Usage[] =
-    { "[-unqUQ] [-w<int(80)>] [-m<track>]+",
-      "         <path:db|dam> [ <reads:FILE> | <reads:range> ... ]"
+    { "[-unqaUQA] [-w<int(80)>] [-m<track>]+",
+      "           <path:db|dam> [ <reads:FILE> | <reads:range> ... ]"
     };
 
 #define LAST_READ_SYMBOL   '$'
@@ -93,7 +93,7 @@ int main(int argc, char *argv[])
   FILE          *input;
 
   int         TRIM, UPPER;
-  int         DOSEQ, DOQVS, QUIVA, DAM;
+  int         DOSEQ, DOQVS, DOARR, QUIVA, ARROW, DAM;
   int         WIDTH;
 
   int         MMAX, MTOP;
@@ -119,7 +119,7 @@ int main(int argc, char *argv[])
       if (argv[i][0] == '-')
         switch (argv[i][1])
         { default:
-            ARG_FLAGS("unqUQ")
+            ARG_FLAGS("unqaUQA")
             break;
           case 'w':
             ARG_NON_NEGATIVE(WIDTH,"Line width")
@@ -142,15 +142,26 @@ int main(int argc, char *argv[])
     TRIM  = 1-flags['u'];
     UPPER = 1+flags['U'];
     DOQVS = flags['q'];
+    DOARR = flags['a'];
     DOSEQ = 1-flags['n'];
     QUIVA = flags['Q'];
-    if (QUIVA && (!DOSEQ || MTOP > 0))
-      { fprintf(stderr,"%s: -Q (quiva) format request inconsistent with -n and -m options\n",
+    ARROW = flags['A'];
+    if ((QUIVA || DOQVS) && (ARROW || DOARR))
+      { fprintf(stderr,"%s: Cannot request both Quiver (-Q,-q) and Arrow (-A,a) information\n",
                        Prog_Name);
         exit (1);
       }
+
     if (QUIVA)
-      DOQVS = 1;
+      { DOQVS = 1;
+        DOSEQ = 0;
+        MTOP  = 0;
+      }
+    if (ARROW)
+      { DOARR = 1;
+        DOSEQ = 0;
+        MTOP  = 0;
+      }
 
     if (argc <= 1)
       { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage[0]);
@@ -177,14 +188,27 @@ int main(int argc, char *argv[])
         if (hdrs == NULL)
           exit (1);
         DAM = 1;
-        if (QUIVA || DOQVS)
-          { fprintf(stderr,"%s: -Q and -q options not compatible with a .dam DB\n",Prog_Name);
+        if (DOQVS || DOARR)
+          { fprintf(stderr,"%s: -q, a, Q and A options not compatible with a .dam DB\n",Prog_Name);
             exit (1);
           }
 
         free(root);
         free(pwd);
       }
+
+    if (DOQVS)
+      { if (db->reads[0].coff < 0 || (db->allarr & DB_ARROW) != 0)
+          { fprintf(stderr,"%s: -q or Q option but no Quiver data in DB!\n",Prog_Name);
+            exit (1);
+          }
+      }
+    if (DOARR)
+      { if ((db->allarr & DB_ARROW) == 0)
+          { fprintf(stderr,"%s: -a or A option but no Arrow data in DB!\n",Prog_Name);
+            exit (1);
+          }
+      }
   }
 
   //  Load QVs if requested
@@ -263,11 +287,11 @@ int main(int argc, char *argv[])
 
           reads  = db->reads - db->ufirst;
           cutoff = db->cutoff;
-          if (db->all)
+          if ((db->allarr & DB_ALL) != 0)
             allflag = 0;
           else
             allflag = DB_BEST;
-          
+ 
           nid = 0;
           oid = db->ufirst;
           lid = oid + db->nreads;
@@ -394,7 +418,7 @@ int main(int argc, char *argv[])
 
   { HITS_READ  *reads;
     HITS_TRACK *first;
-    char       *read, **entry;
+    char       *read, *arrow, **entry;
     int         c, b, e, i;
     int         hilight, substr;
     int         map;
@@ -409,6 +433,8 @@ int main(int argc, char *argv[])
       { entry = NULL;
         first = db->tracks;
       }
+    if (DOARR)
+      arrow = New_Read_Buffer(db);
 
     if (UPPER == 1)
       { hilight = 'A'-'a';
@@ -446,21 +472,40 @@ int main(int argc, char *argv[])
           { int         len;
             int         fst, lst;
             int         flags, qv;
+            float       snr[4];
             HITS_READ  *r;
             HITS_TRACK *track;
 
             r   = reads + i;
             len = r->rlen;
+            if (substr)
+              { fst = iter->beg;
+                lst = iter->end;
+              }
+            else
+              { fst = 0;
+                lst = len;
+              }
 
             flags = r->flags;
             qv    = (flags & DB_QV);
+            if (DOARR)
+              { uint64 big;
+                int    j;
+
+                big   = *((uint64 *) &(r->coff));
+                for (j = 0; j < 4; j++)
+                  { snr[3-j] = (big & 0xffff) / 100.;
+                    big    >>= 16;
+                  }
+              }
             if (DAM)
               { char header[MAX_NAME];
 
                 fseeko(hdrs,r->coff,SEEK_SET);
                 fgets(header,MAX_NAME,hdrs);
                 header[strlen(header)-1] = '\0';
-                printf("%s :: Contig %d[%d,%d]",header,r->origin,r->fpulse,r->fpulse+len);
+                printf("%s :: Contig %d[%d,%d]",header,r->origin,r->fpulse+fst,r->fpulse+lst);
               }
             else
               { while (i < findx[map-1])
@@ -468,11 +513,15 @@ int main(int argc, char *argv[])
                 while (i >= findx[map])
                   map += 1;
                 if (QUIVA)
-                  printf("@%s/%d/%d_%d",flist[map],r->origin,r->fpulse,r->fpulse+len);
+                  printf("@%s/%d/%d_%d",flist[map],r->origin,r->fpulse+fst,r->fpulse+lst);
+                else if (ARROW)
+                  printf(">%s",flist[map]);
                 else
-                  printf(">%s/%d/%d_%d",flist[map],r->origin,r->fpulse,r->fpulse+len);
+                  printf(">%s/%d/%d_%d",flist[map],r->origin,r->fpulse+fst,r->fpulse+lst);
                 if (qv > 0)
                   printf(" RQ=0.%3d",qv);
+                if (DOARR)
+                  printf(" SN=%.2f,%.2f,%.2f,%.2f",snr[0],snr[1],snr[2],snr[3]);
               }
             printf("\n");
 
@@ -480,6 +529,8 @@ int main(int argc, char *argv[])
               Load_QVentry(db,i,entry,UPPER);
             if (DOSEQ)
               Load_Read(db,i,read,UPPER);
+            if (DOARR)
+              Load_Arrow(db,i,arrow,1);
 
             for (track = first; track != NULL; track = track->next)
               { int64 *anno;
@@ -508,21 +559,20 @@ int main(int argc, char *argv[])
                   }
               }
 
-            if (substr)
-              { fst = iter->beg;
-                lst = iter->end;
-              }
-            else
-              { fst = 0;
-                lst = len;
-              }
-
             if (QUIVA)
               { int k;
 
                 for (k = 0; k < 5; k++)
                   printf("%.*s\n",lst-fst,entry[k]+fst);
               }
+            else if (ARROW)
+              { int k;
+    
+                for (k = fst; k+WIDTH < lst; k += WIDTH)
+                  printf("%.*s\n",WIDTH,arrow+k);
+                if (k < lst)
+                  printf("%.*s\n",lst-k,arrow+k);
+              }
             else
               { if (DOQVS)
                   { int j, k;
@@ -543,6 +593,21 @@ int main(int argc, char *argv[])
                         printf("\n");
                       }
                   }
+                else if (DOARR)
+                  { int j;
+
+                    printf("\n");
+                    for (j = fst; j+WIDTH < lst; j += WIDTH)
+                      { if (DOSEQ)
+                          printf("%.*s\n",WIDTH,read+j);
+                        printf("%.*s\n\n",WIDTH,arrow+j);
+                      }
+                    if (j < lst)
+                      { if (DOSEQ)
+                          printf("%.*s\n",lst-j,read+j);
+                        printf("%.*s\n\n",lst-j,arrow+j);
+                      }
+                  }
                 else if (DOSEQ)
                   { int j;
     
diff --git a/DBsplit.c b/DBsplit.c
index 6a218ec..9ef8b77 100644
--- a/DBsplit.c
+++ b/DBsplit.c
@@ -202,7 +202,8 @@ int main(int argc, char *argv[])
     fprintf(dbfile,DB_NBLOCK,nblock);
 
     dbs.cutoff = CUTOFF;
-    dbs.all    = ALL;
+    if (ALL)
+      dbs.allarr |= DB_ALL;
     dbs.treads = treads;
     rewind(ixfile);
     fwrite(&dbs,sizeof(HITS_DB),1,ixfile);
diff --git a/DBstats.c b/DBstats.c
index b0e46c2..4a3badf 100644
--- a/DBstats.c
+++ b/DBstats.c
@@ -152,7 +152,7 @@ int main(int argc, char *argv[])
 
     if (dam)
       printf("\nStatistics for all contigs");
-    else if (db->all || !TRIM)
+    else if ((db->allarr & DB_ALL) != 0 || !TRIM)
       printf("\nStatistics for all wells");
     else
       printf("\nStatistics for all reads");
diff --git a/DBwipe.c b/DBwipe.c
new file mode 100644
index 0000000..9730062
--- /dev/null
+++ b/DBwipe.c
@@ -0,0 +1,89 @@
+/*******************************************************************************************
+ *
+ *  Split a .db into a set of sub-database blocks for use by the Dazzler:
+ *     Divide the database <path>.db conceptually into a series of blocks referable to on the
+ *     command line as <path>.1.db, <path>.2.db, ...  If the -x option is set then all reads
+ *     less than the given length are ignored, and if the -a option is not set then secondary
+ *     reads from a given well are also ignored.  The remaining reads are split amongst the
+ *     blocks so that each block is of size -s * 1Mbp except for the last which necessarily
+ *     contains a smaller residual.  The default value for -s is 400Mbp because blocks of this
+ *     size can be compared by our "overlapper" dalign in roughly 16Gb of memory.  The blocks
+ *     are very space efficient in that their sub-index of the master .idx is computed on the
+ *     fly when loaded, and the .bps file of base pairs is shared with the master DB.  Any
+ *     tracks associated with the DB are also computed on the fly when loading a database block.
+ *
+ *  Author:  Gene Myers
+ *  Date  :  September 2013
+ *  Mod   :  New splitting definition to support incrementality, and new stub file format
+ *  Date  :  April 2014
+ *
+ ********************************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "DB.h"
+
+#ifdef HIDE_FILES
+#define PATHSEP "/."
+#else
+#define PATHSEP "/"
+#endif
+
+static char *Usage = "<path:db>";
+
+int main(int argc, char *argv[])
+{ HITS_DB    db;
+  int        status;
+
+  Prog_Name = Strdup("DBwipe","Allocating Program Name");
+
+  if (argc != 2)
+    { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+      exit (1);
+    }
+
+  //  Open db
+
+  status = Open_DB(argv[1],&db);
+  if (status < 0)
+    exit (1);
+  if (db.part > 0)
+    { fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
+      exit (1);
+    }
+  if (status)
+    { fprintf(stderr,"%s: Cannot be called on a .dam: %s\n",Prog_Name,argv[1]);
+      exit (1);
+    }
+
+  { char    *pwd, *root;
+    FILE    *index;
+    int      i;
+
+    pwd    = PathTo(argv[1]);
+    root   = Root(argv[1],".db");
+    unlink(Catenate(pwd,PATHSEP,root,".arw"));
+    unlink(Catenate(pwd,PATHSEP,root,".qvs"));
+
+    for (i = 0; i < db.nreads; i++)
+      db.reads[i].coff = -1;
+    db.allarr &= ~DB_ARROW;
+
+    if ((index = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"w")) == NULL)
+      { fprintf(stderr,"%s: Cannot open %s%s%s.idx\n",Prog_Name,pwd,PATHSEP,root);
+        exit (1);
+      }
+
+    fwrite(&db,sizeof(HITS_DB),1,index);
+    fwrite(db.reads,sizeof(HITS_READ),db.nreads,index);
+
+    fclose(index);
+  }
+
+  Close_DB(&db);
+
+  exit (0);
+}
diff --git a/Makefile b/Makefile
index cf0afb8..873296f 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ DEST_DIR = ~/bin
 CFLAGS = -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing
 
 ALL = fasta2DB DB2fasta quiva2DB DB2quiva DBsplit DBdust Catrack DBshow DBstats DBrm simulator \
-      fasta2DAM DAM2fasta DBdump rangen
+      fasta2DAM DAM2fasta DBdump rangen arrow2DB DB2arrow DBwipe
 
 all: $(ALL)
 
@@ -19,6 +19,12 @@ quiva2DB: quiva2DB.c DB.c DB.h QV.c QV.h
 DB2quiva: DB2quiva.c DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o DB2quiva DB2quiva.c DB.c QV.c -lm
 
+DB2arrow: DB2arrow.c DB.c QV.c DB.h QV.h
+	gcc $(CFLAGS) -o DB2arrow DB2arrow.c DB.c QV.c -lz
+
+arrow2DB: arrow2DB.c DB.c QV.c DB.h QV.h
+	gcc $(CFLAGS) -o arrow2DB arrow2DB.c DB.c QV.c -lz
+
 DBsplit: DBsplit.c DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o DBsplit DBsplit.c DB.c QV.c -lm
 
@@ -52,6 +58,10 @@ fasta2DAM: fasta2DAM.c DB.c DB.h QV.c QV.h
 DAM2fasta: DAM2fasta.c DB.c DB.h QV.c QV.h
 	gcc $(CFLAGS) -o DAM2fasta DAM2fasta.c DB.c QV.c -lm
 
+DBwipe: DBwipe.c DB.c DB.h QV.c QV.h
+	gcc $(CFLAGS) -o DBwipe DBwipe.c DB.c QV.c -lm
+
+
 clean:
 	rm -f $(ALL)
 	rm -fr *.dSYM
diff --git a/QV.c b/QV.c
index cdb6a63..d7d7263 100644
--- a/QV.c
+++ b/QV.c
@@ -863,6 +863,62 @@ static int      delChar, subChar;
 
   // Referred by:  QVcoding_Scan, Create_QVcoding
 
+void QVcoding_Scan1(int rlen, char *delQV, char *delTag, char *insQV, char *mergeQV, char *subQV)
+{
+  if (rlen == 0)   //  Initialization call
+    { int   i;
+
+      //  Zero histograms
+
+      bzero(delHist,sizeof(uint64)*256);
+      bzero(mrgHist,sizeof(uint64)*256);
+      bzero(insHist,sizeof(uint64)*256);
+      bzero(subHist,sizeof(uint64)*256);
+
+      for (i = 0; i < 256; i++)
+        delRun[i] = subRun[i] = 1;
+
+      totChar    = 0;
+      delChar    = -1;
+      subChar    = -1;
+      return;
+    }
+
+  //  Add streams to accumulating histograms and figure out the run chars
+  //    for the deletion and substition streams
+
+  Histogram_Seqs(delHist,(uint8 *) delQV,rlen);
+  Histogram_Seqs(insHist,(uint8 *) insQV,rlen);
+  Histogram_Seqs(mrgHist,(uint8 *) mergeQV,rlen);
+  Histogram_Seqs(subHist,(uint8 *) subQV,rlen);
+
+  if (delChar < 0)
+    { int   k;
+
+      for (k = 0; k < rlen; k++)
+        if (delTag[k] == 'n' || delTag[k] == 'N')
+          { delChar = delQV[k];
+            break;
+          }
+    }
+  if (delChar >= 0)
+    Histogram_Runs( delRun,(uint8 *) delQV,rlen,delChar);
+  totChar += rlen;
+  if (subChar < 0)
+    { if (totChar >= 100000)
+        { int k;
+
+          subChar = 0;
+          for (k = 1; k < 256; k++)
+            if (subHist[k] > subHist[subChar])
+              subChar = k;
+        }
+    }
+  if (subChar >= 0)
+    Histogram_Runs( subRun,(uint8 *) subQV,rlen,subChar);
+  return;
+}
+
 int QVcoding_Scan(FILE *input, int num, FILE *temp)
 { char *slash;
   int   rlen;
@@ -1284,6 +1340,44 @@ void Free_QVcoding(QVcoding *coding)
  *
  ********************************************************************************************/
 
+void Compress_Next_QVentry1(int rlen, char *del, char *tag, char *ins, char *mrg, char *sub,
+                            FILE *output, QVcoding *coding, int lossy)
+{ int clen;
+
+  if (coding->delChar < 0)
+    { Encode(coding->delScheme, output, (uint8 *) del, rlen);
+      clen = rlen;
+    }
+  else
+    { Encode_Run(coding->delScheme, coding->dRunScheme, output,
+                 (uint8 *) del, rlen, coding->delChar);
+      clen = Pack_Tag(tag,del,rlen,coding->delChar);
+    }
+  Number_Read(tag);
+  Compress_Read(clen,tag);
+  fwrite(tag,1,COMPRESSED_LEN(clen),output);
+
+  if (lossy)
+    { uint8 *insert = (uint8 *) ins;
+      uint8 *merge  = (uint8 *) mrg;
+      int    k;
+
+      for (k = 0; k < rlen; k++)
+        { insert[k] = (uint8) ((insert[k] >> 1) << 1);
+          merge[k]  = (uint8) (( merge[k] >> 2) << 2);
+        }
+    }
+
+  Encode(coding->insScheme, output, (uint8 *) ins, rlen);
+  Encode(coding->mrgScheme, output, (uint8 *) mrg, rlen);
+  if (coding->subChar < 0)
+    Encode(coding->subScheme, output, (uint8 *) sub, rlen);
+  else
+    Encode_Run(coding->subScheme, coding->sRunScheme, output,
+               (uint8 *) sub, rlen, coding->subChar);
+  return;
+}
+
 int Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int lossy)
 { int rlen, clen;
 
diff --git a/QV.h b/QV.h
index 532b2f4..e5c9485 100644
--- a/QV.h
+++ b/QV.h
@@ -58,6 +58,7 @@ int       Get_QV_Line();
   //   If there is an error then -1 is returned, otherwise the number of entries read.
 
 int       QVcoding_Scan(FILE *input, int num, FILE *temp);
+void      QVcoding_Scan1(int rlen, char *del, char *tag, char *ins, char *mrg, char *sub);
 
   // Given QVcoding_Scan has been called at least once, create an encoding scheme based on
   //   the accumulated statistics and return a pointer to it.  The returned encoding object
@@ -83,6 +84,8 @@ void      Free_QVcoding(QVcoding *coding);
   //    occurred, and the sequence length otherwise.
 
 int      Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int lossy);
+void     Compress_Next_QVentry1(int rlen, char *del, char *tag, char *ins, char *mrg, char *sub,
+                                FILE *output, QVcoding *coding, int lossy);
 
   //  Assuming the input is position just beyond the compressed encoding of an entry header,
   //    read the set of compressed encodings for the ensuing 5 QV vectors, decompress them,
diff --git a/README.md b/README.md
index 515de32..ba37408 100644
--- a/README.md
+++ b/README.md
@@ -22,12 +22,15 @@ to the data base over time.
 is accomplished with the concept of *tracks* that implementors can add as they
 need them.
 
-4. The data is held in a compressed form equivalent to the .dexta and .dexqv files of
-the data extraction module.  Both the .fasta and .quiva information for each
-read is held in the data base and can be recreated from it.  The .quiva
-information can be added separately and later on if desired.
+4. The data is held in a compressed form equivalent to the .dexta and .dexqv/.dexar
+files of the data extraction module.
 
-5. To facilitate job parallel, cluster operation of the phases of our assembler, the
+5. Quiver or Arrow information can be added separately from the sequence information
+and later on if desired, but a database can only hold either Quiver or Arrow information,
+but not both.  The Arrow or Quiver information can be removed from the database at any
+time leaving a database just containing sequence information.
+
+6. To facilitate job parallel, cluster operation of the phases of our assembler, the
 data base has a concept of a *current partitioning* in which all the reads that
 are over a given length and optionally unique to a well, are divided up into
 *blocks* containing roughly a given number of bases, except possibly the last
@@ -37,12 +40,18 @@ all the same size.  One must be careful about changing the partition during an
 assembly as doing so can void the structural validity of any interim
 block-based results.
 
+A DB con contain the information needed by Quiver, or by Arrow, or neither, but
+not both.  A DB containing neither Quiver or Arrow information is termed a
+Sequence-DB (S-DB).  A DB with Quiver information is a Quiver-DB (Q-DB) and
+a DB with Arrow information is an Arrow-DB (A-DB). All commands are aware of
+the state of a DB and respond to options according to their type. 
+
 A Dazzler DB consists of one named, *visible* file, e.g. FOO.db, and several
 *invisible* secondary files encoding various elements of the DB.  The secondary files
 are "invisible" to the UNIX OS in the sense that they begin with a "." and hence are
 not listed by "ls" unless one specifies the -a flag.  We chose to do this so that when
 a user lists the contents of a directory they just see a single name, e.g. FOO.db, that
-is the one used to refer to the DB in commands.  The files associated with a database
+is used to refer to the DB in commands.  The files associated with a database
 named, say FOO,  are as follows:
 
 * "FOO.db": a text file containing
@@ -60,7 +69,11 @@ named, say FOO,  are as follows:
 
 * ".FOO.qvs": a binary compressed "store" of the 5 Pacbio quality value streams for
   the reads.  Its size is roughly 5/3M bytes depending on the
-  compression acheived.  This file only exists if .quiva files have
+  compression acheived.  This file only exists if Quiver information has
+  been added to the database.
+
+* ".FOO.arw": a binary compressed "store" of the clipped pulse width stream for
+  the reads.  Its size is roughly M/4 bytes.  This file only exists if Arrow information has
   been added to the database.
 
 * ".FOO.\<track\>.[anno,data]": a *track* containing customized meta-data for each read.  For
@@ -83,7 +96,7 @@ been introduced in order to facilitate the mapping of reads to an assembly and h
 been given the suffix .dam to distinguish it from an ordinary DB.  It is structurally
 identical to a .db except:
 
-* there is no concept of quality values, and hence no .FOO.qvs file.
+* there is no concept of quality values, and hence no .FOO.qvs or .FOO.arw file.
 
 * every .fasta scaffold (a sequence with runs of N's between contigs estimating the
   length of the gap) is broken into a separate contig sequence in the DB and the
@@ -134,7 +147,9 @@ are currently as follows:
 Builds an initial data base, or adds to an existing database, either (a) the list of
 .fasta files following the database name argument, or (b) the list of .fasta files in
 \<file\> if the -f option is used, or (c) entries piped from the standard input if the
--i option is used.  If a faux file name, \<name\>, follows the -i option then all the
+-i option is used.  If the DB is being created it is established as a Sequence-DB (S-DB)
+otherwise its type is unchanged.  If a faux file name, \<name\>, follows the -i option
+then all the
 input received is considered to have come from a file by the name of \<name\>.fasta by
 DB2fasta, otherwise it will be sent to the standard output by DB2fasta.  The SMRT cells
 in a given named input (i.e. all sources other than -i without a name) can only be
@@ -165,12 +180,13 @@ set to any positive value with the -w option.
 3. quiva2DB [-vl] <path:db> ( -f<file> | -i | <input:quiva> ... )
 ```
 
-Adds .quiva streams to an existing DB "path".  The data comes from (a) the given .quiva
+Adds .quiva streams to an existing DB "path".  The DB must either be an S-DB or a
+Q-DB and upon completion the DB is a Q-DB.  The data comes from (a) the given .quiva
 files on the command line, or (b) those in the file specified by the -f option, or
-(c) the standard input if the -i option is given. The input files must be added in the
+(c) the standard input if the -i option is given. The input files can be added incrementally
+but must be added in the
 same order as the .fasta files were and have the same root names, e.g. FOO.fasta and
-FOO.quiva. The files can be added incrementally but must be added in the same order as
-their corresponding .fasta files. This is enforced by the program. With the -l option
+FOO.quiva.  This is enforced by the program. With the -l option
 set the compression scheme is a bit lossy to get more compression (see the description
 of dexqv in the DEXTRACTOR module here).
 
@@ -178,18 +194,44 @@ of dexqv in the DEXTRACTOR module here).
 4. DB2quiva [-vU] <path:db>
 ```
 
-The set of .quiva files within the given DB are recreated from the DB exactly as they
+The set of .quiva files within the given Q-DB are recreated from the DB exactly as they
 were input.  That is, this is a perfect inversion, including the reconstitution of the
 proper .quiva headers.  Because of this property, one can, if desired, delete the
 .quiva source files once they are in the DB as they can always be recreated from it.
-Entries imported from the standard input will be place in the faux file name given on
+Entries imported from the standard input will be placed in the faux file name given on
 import, or to the standard output if no name was given. 
 By .fastq convention each QV vector is output as a line without new-lines, and by
 default the Deletion Tag entry is in lower case letters.  The -U option specifies
 upper case letters should be used instead.
 
 ```
-5. fasta2DAM [-v] <path:dam> ( -f<file> | -i[<name>] | <input:fasta> ... )
+5. arrow2DB [-v] <path:db> ( -f<file> | -i | <input:arrow> ... )
+```
+
+Adds .arrow streams to an existing DB "path".  The DB must either be an S-DB or an
+A-DB and upon completion the DB is an A-DB.  The data comes from (a) the given .arrow
+files on the command line, or (b) those in the file specified by the -f option, or
+(c) the standard input if the -i option is given. The input files can be added
+incrementally but must be added in the
+same order as the .fasta files were and have the same root names, e.g. FOO.fasta and
+FOO.quiva.  This is enforced by the program.
+
+```
+6. DB2arrow [-v] [-w<int(80)>] <path:db>
+```
+
+The set of .arrow files within the given A-DB are recreated from the DB exactly as they
+were input.  That is, this is a perfect inversion, including the reconstitution of the
+proper .arrow headers.  Because of this property, one can, if desired, delete the
+.arrow source files once they are in the DB as they can always be recreated from it.
+Entries imported from the standard input will be placed in the faux file name given on
+import, or to the standard output if no name was given. 
+By default the output sequences are formatted 80 chars per line,
+but the characters per line, or line width, can be
+set to any positive value with the -w option.
+
+```
+7. fasta2DAM [-v] <path:dam> ( -f<file> | -i[<name>] | <input:fasta> ... )
 ```
 
 Builds an initial map DB or DAM, or adds to an existing DAM, either (a) the list of
@@ -197,13 +239,13 @@ Builds an initial map DB or DAM, or adds to an existing DAM, either (a) the list
 \<file\> if the -f option is used, or (c) entries piped from the standard input if the -i
 option is used.  If a faux file name, \<name\>, follows the -i option then all the input
 received is considered to have come from a file by the name of \<name\>.fasta by
-DB2fasta, otherwise it will be sent to the standard output by DB2fasta.  Any .fasta
+DAM2fasta, otherwise it will be sent to the standard output by DAM2fasta.  Any .fasta
 entry that has a run of N's in it will be split into separate "contig" entries and the
 interval of the contig in the original entry recorded. The header for each .fasta entry
 is saved with the contigs created from it.
 
 ```
-6. DAM2fasta [-vU] [-w<int(80)>] <path:dam>
+8. DAM2fasta [-vU] [-w<int(80)>] <path:dam>
 ```
 
 The set of .fasta files for the given map DB or DAM are recreated from the DAM
@@ -217,7 +259,7 @@ should be used, and the characters per line, or line width, can be set to any po
 value with the -w option.
 
 ```
-7. DBsplit [-a] [-x<int>] [-s<int(200)>] <path:db|dam>
+9. DBsplit [-af] [-x<int>] [-s<int(200)>] <path:db|dam>
 ```
 
 Divide the database \<path\>.db or \<path\>.dam conceptually into a series of blocks
@@ -237,7 +279,7 @@ question has previously bin split, i.e. one is not interactively asked if they w
 to proceed.
 
 ```
-8. DBdust [-b] [-w<int(64)>] [-t<double(2.)>] [-m<int(10)>] <path:db|dam>
+10. DBdust [-b] [-w<int(64)>] [-t<double(2.)>] [-m<int(10)>] <path:db|dam>
 ```
 
 Runs the symmetric DUST algorithm over the reads in the untrimmed DB \<path\>.db or
@@ -258,7 +300,7 @@ This permits job parallelism in block-sized chunks, and the resulting sequence o
 block tracks can then be merged into a track for the entire untrimmed DB with Catrack.
 
 ```
-9. Catrack [-v] <path:db|dam> <track:name>
+11. Catrack [-v] <path:db|dam> <track:name>
 ```
 
 Find all block tracks of the form .\<path\>.#.\<track\>... and merge them into a single
@@ -267,8 +309,8 @@ encode the same kind of track data (this is checked), and the files must exist f
 block 1, 2, 3, ... up to the last block number.
 
 ```
-10. DBshow [-unqUQ] [-w<int(80)>] [-m<track>]+
-                    <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
+12. DBshow [-unqaUQA] [-w<int(80)>] [-m<track>]+
+                      <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
 ```
 
 Displays the requested reads in the database \<path\>.db or \<path\>.dam.  By default the
@@ -285,10 +327,14 @@ represents the index of the last read in the actively loaded db.  For example,
 example, 1-$ displays every read in the active db (the default).
 
 By default a .fasta file of the read sequences is displayed.  If the -q option is
-set, then the QV streams are also displayed in a non-standard modification of the
-fasta format.  If the -n option is set then the DNA sequence is *not* displayed.
-If the -Q option is set then a .quiva file is displayed  and in this case the -n
-and -m options mayt not be set (and the -q and -w options have no effect).
+set and the DB is a Q-DB, then the QV streams are also displayed in a non-standard
+modification of the fasta format.
+Similarly, if the -a option is set and the DB is an A-DB, then the pulse width stream is
+also displayed in a non-standard format.
+If the -n option is set then the DNA sequence is *not* displayed.
+If the -Q option is set then a .quiva file of the selected reads is displayed and
+all other options except -u and -U are ignored.  If the -A option is set then a .arrow
+file of the selected reads is displayed and all other options except -u and -w are ignored.
 
 If one or more masks are set with the -m option then the track intervals are also
 displayed in an additional header line and the bases within an interval are displayed
@@ -297,44 +343,63 @@ sequences are in lower case and 80 chars per line.  The -U option specifies uppe
 case should be used, and the characters per line, or line width, can be set to any
 positive value with the -w option.
 
-The .fasta or .quiva files that are output can be converted into a DB by fasta2DB
-and quiva2DB (if the -q and -n options are not set and no -m options are set),
-giving one a simple way to make a DB of a subset of the reads for testing purposes.
+The .fasta, .quiva, and .arrow files that are output can be used to build a new DB with
+fasta2DB, quiva2D, and arrow2DB, giving one a simple way to make a DB of a subset of
+the reads for testing purposes.
 
 ```
-12. DBdump [-rhsiqp] [-uU] [-m<track>]+
-                     <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
+13. DBdump [-rhsaqip] [-uU] [-m<track>]+
+                      <path:db|dam> [ <reads:FILE> | <reads:range> ... ]
 ```
 
 Like DBshow, DBdump allows one to display a subset of the reads in the DB and select
 which information to show about them including any mask tracks.  The difference is
 that the information is written in a very simple "1-code" ASCII format that makes it
-easy for one to read and parse the information for further use.  -r requests that each
-read number be displayed (useful if only a subset of reads is requested).  -h prints
-the header information which is the source file name, well #, and pulse range.
--s requests the sequence be output, -i requests that the intrinsic quality values be
-output, -q requests that the 5 quiva sequences be output, -p requests the repeat
-profile be output (if available), and -m\<track\> requests that mask \<track\> be output.
+easy for one to read and parse the information for further use.  The option flags determine
+which items of information are output as follows:
+
+* -r requests that each read number be displayed in an R-line (see below, useful if only a
+subset of reads is requested).
+
+* -h requests the header information be output as the source file name on an H-line, the
+well # and pulse range on an L-line, and optionally the quality of the read if given on a Q-line.
+
+* -s requests the sequence be output on an S-line.
+
+* -a requests the Arrow information be output as a pulse-width string on an A-line and
+the 4 SNR channel values on an N-line,
+
+* -q requests that the 5 Quiver quality streams be output on d-, c-, i-, m-, and s-lines.
+
+* -i requests that the intrinsic quality values be output on an I-line.
+
+* -p requests the repeat profile be output (if available) on a P-line, on a P-line
+
+* -m\<track\> requests that mask \<track\> be output on a T-line.
+
 Set -u if you want data from the untrimmed database (the default is trimmed) and
 set -U if you'd like upper-case letter used in the DNA sequence strings.
 
-The format is very simple.  Each requested piece of information occurs on a line.  The
+The format is very simple.  A requested unit of information occurs on a line.  The
 first character of every line is a "1-code" character that tells you what information
-to expect on the line.  The rest of the line contains information where each item is
+to expect on the line.  The rest of the line contains the information where each item is
 separated by a single blank space.  Strings are output as first an integer giving the
 length of the string, a blank space, and then the string terminated by a new-line.
 Intrinsic quality values are between 0 and 50, inclusive, and a vector of said are
 displayed as an alphabetic string where 'a' is 0, 'b' is '1', ... 'z' is 25, 'A' is
 26, 'B' is 27, ... and 'Y' is 50.  Repeat profiles are also displayed as string where
 '_' denotes 0 repetitions, and then 'a' through 'N' denote the values 1 through 40,
-respectively.  
+respectively.   The set of all possible lines is as follows:
 
 ```
     R #              - read number
     H # string       - original file name string (header)
     L # # #          - location: well, pulse start, pulse end
+    Q #              - quality of read (#/1000)
+    N # # # #        - SNR of ACGT channels (#/100)
     Tx #n (#b #e)^#n - x'th track on command line, #n intervals all on same line
     S # string       - sequence string
+    A # string       - arrow pulse-width string
     I # string       - intrinsic quality vector (as an ASCII string)
     P # string       - repeat profile vector (as an ASCII string)
     d # string       - Quiva deletion values (as an ASCII string)
@@ -352,12 +417,12 @@ the number of reads (X=R), the number of masks (X=M), or the total number of
 characters in all headers (X=H), sequences (X=S), intrinsic quality vectors (X=I),
 read profile vector (X=P), or track (X=T#).  And '@ X #' gives the maximum number of
 characters in any header (X=H), sequence (X=S), intrincic quality vector (X=I), read
-profile vector (X=P), or track (X=T#).  The size numbers for the Quiva strings are
-identical to that for the sequence as they are all of the same length for any
-given entry.
+profile vector (X=P), or track (X=T#).  The size numbers for the Quiva strings and
+Arrow pulse width strings are identical to that for the sequence as they are all of
+the same length for any given entry.
 
 ```
-12. DBstats [-nu] [-b<int(1000)] [-m<track>]+ <path:db|dam>
+14. DBstats [-nu] [-b<int(1000)] [-m<track>]+ <path:db|dam>
 ```
 
 Show overview statistics for all the reads in the trimmed data base \<path\>.db or
@@ -369,7 +434,7 @@ intervals along the read can be specified with the -m option in which case a sum
 and a histogram of the interval lengths is displayed.
 
 ```
-13. DBrm <path:db|dam> ...
+15. DBrm <path:db|dam> ...
 ```
 
 Delete all the files for the given data bases.  Do not use rm to remove a database, as
@@ -377,7 +442,15 @@ there are at least two and often several secondary files for each DB including t
 files, and all of these are removed by DBrm.
 
 ```
-14.  simulator <genome:dam> [-CU] [-m<int(10000)>] [-s<int(2000)>] [-e<double(.15)]
+16. DBwipe <path:db|dam> ...
+```
+
+Delete any Arrow or Quiver data from the given databases.  This removes the .arw or
+.qvs file and resets information in the .idx file containing information for Arrow
+or Quiver.  Basically, converts an A-DB or Q-DB back to a simple S-DB.
+
+```
+17.  simulator <genome:dam> [-CU] [-m<int(10000)>] [-s<int(2000)>] [-e<double(.15)]
                                   [-c<double(50.)>] [-f<double(.5)>] [-x<int(4000)>]
                                   [-w<int(80)>] [-r<int>] [-M<file>]
 ```
@@ -412,7 +485,7 @@ a read is say 's b e' then if b \< e the read is a perturbed copy of s[b,e] in t
 forward direction, and a perturbed copy s[e,b] in the reverse direction otherwise.
 
 ```
-15. rangen <genlen:double> [-U] [-b<double(.5)>] [-w<int(80)>] [-r<int>]
+18. rangen <genlen:double> [-U] [-b<double(.5)>] [-w<int(80)>] [-r<int>]
 ```
 
 Generate a random DNA sequence of length genlen*1Mbp that has an AT-bias of -b.
diff --git a/quiva2DB.c b/arrow2DB.c
similarity index 58%
copy from quiva2DB.c
copy to arrow2DB.c
index 6d099f5..19975aa 100644
--- a/quiva2DB.c
+++ b/arrow2DB.c
@@ -1,8 +1,8 @@
 /*******************************************************************************************
  *
- *  Adds the given .quiva files to an existing DB "path".  The input files must be added in
+ *  Adds the given .arrow files to an existing DB "path".  The input files must be added in
  *  the same order as the .fasta files were and have the same root names, e.g. FOO.fasta
- *  and FOO.quiva.  The files can be added incrementally but must be added in the same order  
+ *  and FOO.arrow.  The files can be added incrementally but must be added in the same order  
  *  as the .fasta files.  This is enforced by the program.  With the -l option set the
  *  compression scheme is a bit lossy to get more compression (see the description of dexqv
  *  in the DEXTRACTOR module).
@@ -31,7 +31,7 @@
 #define PATHSEP "/"
 #endif
 
-static char *Usage = "[-vl] <path:string> ( -f<file> | -i | <input:quiva> ... )";
+static char *Usage = "[-v] <path:db> ( -f<file> | -i | <input:arrow> ... )";
 
 typedef struct
   { int    argc;
@@ -95,18 +95,14 @@ int main(int argc, char *argv[])
 { FILE      *istub;
   char      *root, *pwd;
 
-  FILE      *quiva, *indx;
-  int64      coff;
+  FILE      *arrow, *indx;
+  int64      boff;
 
   HITS_DB    db;
   HITS_READ *reads;
   int        nfiles;
 
-  FILE      *temp;
-  char      *tname;
-
   int        VERBOSE;
-  int        LOSSY;
   int        PIPE;
   FILE      *INFILE;
 
@@ -115,7 +111,7 @@ int main(int argc, char *argv[])
   { int   i, j, k;
     int   flags[128];
 
-    ARG_INIT("quiva2DB")
+    ARG_INIT("arrow2DB")
 
     INFILE = NULL;
 
@@ -139,7 +135,6 @@ int main(int argc, char *argv[])
     argc = j;
 
     VERBOSE = flags['v'];
-    LOSSY   = flags['l'];
     PIPE    = flags['i'];
 
     if (INFILE != NULL && PIPE)
@@ -154,78 +149,77 @@ int main(int argc, char *argv[])
       }
   }
 
-  //  Open DB stub file, index, and .qvs file for appending.  Load db and read records,
-  //    get number of cells from stub file, and note current offset to end of .qvs
+  //  Open DB stub file, index, and .arw file for appending.  Load db and read records,
+  //    get number of cells from stub file, and note current offset to end of .arw
 
   root   = Root(argv[1],".db");
   pwd    = PathTo(argv[1]);
   istub  = Fopen(Catenate(pwd,"/",root,".db"),"r");
   if (istub == NULL)
-    { fprintf(stderr,"%s",Ebuffer);
-      exit (1);
-    }
+    exit (1);
   if (fscanf(istub,DB_NFILE,&nfiles) != 1)
-    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",root,Prog_Name);
+    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root);
       exit (1);
     }
 
   indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r+");
   if (indx == NULL)
-    { fprintf(stderr,"%s",Ebuffer);
-      exit (1);
-    }
+    exit (1);
   if (fread(&db,sizeof(HITS_DB),1,indx) != 1)
-    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",root,Prog_Name);
+    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
       exit (1);
     }
 
   reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*db.ureads,"Allocating DB index");
   if (reads == NULL)
-    { fprintf(stderr,"%s",Ebuffer);
+    exit (1);
+  if (fread(reads,sizeof(HITS_READ),db.ureads,indx) != (size_t) (db.ureads))
+    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
       exit (1);
     }
-  if (fread(reads,sizeof(HITS_READ),db.ureads,indx) != (size_t) (db.ureads))
-    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",root,Prog_Name);
+
+  if (reads[0].coff >= 0 && (db.allarr & DB_ARROW) == 0)
+    { fprintf(stderr,"%s: Database %s has Quiver data!\n",Prog_Name,root);
       exit (1);
     }
 
-  quiva = NULL;
-  temp  = NULL;
-  coff  = 0;
+  arrow = NULL;
+  boff  = 0;
 
   if (reads[0].coff < 0)
-    quiva = Fopen(Catenate(pwd,PATHSEP,root,".qvs"),"w");
+    arrow = Fopen(Catenate(pwd,PATHSEP,root,".arw"),"w");
   else
-    quiva = Fopen(Catenate(pwd,PATHSEP,root,".qvs"),"r+");
+    arrow = Fopen(Catenate(pwd,PATHSEP,root,".arw"),"r+");
 
-  tname = Strdup(Catenate(".",PATHSEP,root,Numbered_Suffix("",getpid(),".tmp")),
-                 "Allocating temporary name");
-  temp = Fopen(tname,"w+");
+  if (arrow == NULL)
+    goto error;
+  fseeko(arrow,0,SEEK_END);
+  boff = ftello(arrow);
 
-  if (quiva == NULL || temp == NULL)
-    { fprintf(stderr,"%s",Ebuffer);
-      goto error;
-    }
-  fseeko(quiva,0,SEEK_END);
-  coff = ftello(quiva);
-
-  //  Do a merged traversal of cell lines in .db stub file and .quiva files to be
+  //  Do a merged traversal of cell lines in .db stub file and .arrow files to be
   //    imported, driving the loop with the cell line #
 
   { FILE          *input = NULL;
     char          *path = NULL;
     char          *core = NULL;
+    char          *read;
+    int            rmax, rlen, eof;
     File_Iterator *ng = NULL;
     char           lname[MAX_NAME];
     int            first, last, cline;
     int            cell;
 
+    //  Buffer for accumulating .arrow sequence over multiple lines
+
+    rmax  = MAX_NAME + 60000;
+    read  = (char *) Malloc(rmax+1,"Allocating line buffer");
+    if (read == NULL)
+      goto error;
+
     if (!PIPE)
       { ng = init_file_iterator(argc,argv,INFILE,2);
         if (ng == NULL)
-          { fprintf(stderr,"%s",Ebuffer);
-            goto error;
-          }
+          goto error;
       }
 
     for (cell = 0; cell < nfiles; cell++)
@@ -233,7 +227,7 @@ int main(int argc, char *argv[])
 
         if (cell == 0)
 
-          //  First addition, a pipe: find the first cell that does not have .quiva's yet
+          //  First addition, a pipe: find the first cell that does not have .arrow's yet
           //     (error if none) and set input source to stdin.
 
           if (PIPE)
@@ -249,23 +243,23 @@ int main(int argc, char *argv[])
                   cell += 1;
                 }
               if (cell >= nfiles)
-                { fprintf(stderr,"%s: All .quiva's have already been added !?\n",Prog_Name);
+                { fprintf(stderr,"%s: All .arrows's have already been added !?\n",Prog_Name);
                   goto error;
                 }
 
               input = stdin;
 
               if (VERBOSE)
-                { fprintf(stderr,"Adding quiva's from stdin ...\n");
+                { fprintf(stderr,"Adding arrows's from stdin ...\n");
                   fflush(stderr);
                 }
               cline = 0;
             }
 
-          //  First addition, not a pipe: then get first .quiva file name (error if not one) to
+          //  First addition, not a pipe: then get first .arrow file name (error if not one) to
           //    add, find the first cell name whose file name matches (error if none), check that
-          //    the previous .quiva's have been added and this is the next slot.  Then open
-          //    the .quiva file for compression
+          //    the previous .arrow's have been added and this is the next slot.  Then open
+          //    the .arrow file for compression
 
           else
             { if (! next_file(ng))
@@ -273,16 +267,12 @@ int main(int argc, char *argv[])
                   goto error;
                 }
               if (ng->name == NULL)
-                { fprintf(stderr,"%s",Ebuffer);
-                  goto error;
-                }
+                goto error;
   
-              core = Root(ng->name,".quiva");
+              core = Root(ng->name,".arrow");
               path = PathTo(ng->name);
-              if ((input = Fopen(Catenate(path,"/",core,".quiva"),"r")) == NULL)
-                { fprintf(stderr,"%s",Ebuffer);
-                  goto error;
-                }
+              if ((input = Fopen(Catenate(path,"/",core,".arrow"),"r")) == NULL)
+                goto error;
   
               first = 0;
               while (cell < nfiles)
@@ -301,25 +291,25 @@ int main(int argc, char *argv[])
                 }
         
               if (first > 0 && reads[first-1].coff < 0)
-                { fprintf(stderr,"%s: Predecessor of %s.quiva has not been added yet\n",
+                { fprintf(stderr,"%s: Predecessor of %s.arrow has not been added yet\n",
                                  Prog_Name,core);
                   goto error;
                 }
               if (reads[first].coff >= 0)
-                { fprintf(stderr,"%s: %s.quiva has already been added\n",Prog_Name,core);
+                { fprintf(stderr,"%s: %s.arrow has already been added\n",Prog_Name,core);
                   goto error;
                 }
   
               if (VERBOSE)
-                { fprintf(stderr,"Adding '%s.quiva' ...\n",core);
+                { fprintf(stderr,"Adding '%s.arrow' ...\n",core);
                   fflush(stderr);
                 }
               cline = 0;
             }
 
         //  Not the first addition: get next cell line.  If not a pipe and the file name is new,
-        //    then close the current .quiva, open the next one and after ensuring the names
-        //    match, open it for compression
+        //    then close the current .arrow, open the next one and after ensuring the names
+        //    match, open it for incorporation
 
         else
           { first = last;  
@@ -335,8 +325,8 @@ int main(int argc, char *argv[])
                 ungetc(c,input);
               }
             else if (strcmp(lname,fname) != 0)
-              { if (fgetc(input) != EOF)
-                  { fprintf(stderr,"%s: Too many reads in %s.quiva while handling %s.fasta\n",
+              { if ( ! eof)
+                  { fprintf(stderr,"%s: Too many reads in %s.arrow while handling %s.fasta\n",
                                    Prog_Name,core,fname);
                     goto error;
                   }
@@ -348,16 +338,12 @@ int main(int argc, char *argv[])
                 if ( ! next_file(ng))
                   break;
                 if (ng->name == NULL)
-                  { fprintf(stderr,"%s",Ebuffer);
-                    goto error;
-                  }
+                  goto error;
 
                 path = PathTo(ng->name);
-                core = Root(ng->name,".quiva");
-                if ((input = Fopen(Catenate(path,"/",core,".quiva"),"r")) == NULL)
-                  { fprintf(stderr,"%s",Ebuffer);
-                    goto error;
-                  }
+                core = Root(ng->name,".arrow");
+                if ((input = Fopen(Catenate(path,"/",core,".arrow"),"r")) == NULL)
+                  goto error;
 
                 if (strcmp(core,fname) != 0)
                   { fprintf(stderr,"%s: Files not being added in order (expect %s, given %s)\n",
@@ -366,92 +352,128 @@ int main(int argc, char *argv[])
                   }
 
                 if (VERBOSE)
-                  { fprintf(stderr,"Adding '%s.quiva' ...\n",core);
+                  { fprintf(stderr,"Adding '%s.arrow' ...\n",core);
                     fflush(stderr);
                   }
                 cline = 0;
               }
           }
 
-        //  Compress reads [first..last) from open .quiva appending to .qvs and record
-        //    offset in .coff field of reads (offset of first in a cell is to the compression
-        //    table).
+        //  If first cell or source is a new file, then start IO
 
-        { int64     qpos;
-          QVcoding *coding;
-          int       i, s;
+        if (cline == 0)
+          {
+            // Read in first line and make sure it is a header in PACBIO format.
 
-          rewind(temp);
-          if (ftruncate(fileno(temp),0) < 0)
-            { fprintf(stderr,"%s: System error: could not truncate temporary file\n",Prog_Name);
-              goto error;
-            }
-          Set_QV_Line(cline);
-          s = QVcoding_Scan(input,last-first,temp);
-          if (s < 0)
-            { fprintf(stderr,"%s",Ebuffer);
-              goto error;
-            }
-          if (s != last-first)
-            { if (PIPE)
-                fprintf(stderr,"%s: Insufficient # of reads on input while handling %s.fasta\n",
-                               Prog_Name,fname);
-              else
-                fprintf(stderr,"%s: Insufficient # of reads in %s.quiva while handling %s.fasta\n",
-                               Prog_Name,core,fname);
-              goto error;
-            }
-
-          coding = Create_QVcoding(LOSSY);
-          if (coding == NULL)
-            { fprintf(stderr,"%s",Ebuffer);
-              goto error;
-            }
+            rlen = 0;
+            eof  = (fgets(read,MAX_NAME,input) == NULL);
+            if (read[strlen(read)-1] != '\n')
+              { fprintf(stderr,"File %s.arrow, Line 1: Fasta line is too long (> %d chars)\n",
+                               core,MAX_NAME-2);
+                goto error;
+              }
+            if (!eof && read[0] != '>')
+              { fprintf(stderr,"File %s.arrow, Line 1: First header in arrow file is missing\n",
+                                core);
+                goto error;
+              }
+          }
 
-          coding->prefix = Strdup(".qvs","Allocating header prefix");
-          if (coding->prefix == NULL)
-            { fprintf(stderr,"%s",Ebuffer);
-              goto error;
-            }
+        //  Compress reads [first..last) from open .arrow appending to .arw and record
+        //    snr in .coff field of reads (offset is the same as for the DNA sequence, .boff)
 
-          qpos = ftello(quiva);
-          Write_QVcoding(quiva,coding);
+        { int i, x;
 
-          //  Then compress and append to the .qvs each compressed QV entry
- 
-          rewind(temp);
-          Set_QV_Line(cline);
           for (i = first; i < last; i++)
-            { s = Read_Lines(temp,1);
-              if (s < -1)
-                { fprintf(stderr,"%s",Ebuffer);
+            { char  *find;
+              int    clen;
+              float  snr[4];
+              uint16 cnr[4];
+
+              if (eof)
+                { if (PIPE)
+                    fprintf(stderr,"%s: Insufficient # of reads on input while handling %s.arrow\n",
+                                   Prog_Name,fname);
+                  else
+                    { fprintf(stderr,"%s: Insufficient # of reads in %s.arrow while handling",
+                                     Prog_Name,core);
+                      fprintf(stderr," %s.arrow\n",fname);
+                    }
+                  goto error;
+                }
+
+              find = index(read+(rlen+1),' ');
+              if (find == NULL)
+                { fprintf(stderr,"File %s.arrow, Line %d: Pacbio header line format error\n",
+                                 core,cline);
                   goto error;
                 }
-              reads[i].coff = qpos;
-              s = Compress_Next_QVentry(temp,quiva,coding,LOSSY);
-              if (s < 0)
-                { fprintf(stderr,"%s",Ebuffer);
+              *find = '\0';
+              if (strcmp(read+(rlen+1),prolog) != 0)
+                { fprintf(stderr,"File %s.arrow, Line %d: Pacbio prolog doesn't match DB entry\n",
+                                 core,cline);
                   goto error;
                 }
-              if (s != reads[i].rlen)
-                { fprintf(stderr,"%s: Length of quiva %d is different than fasta in DB\n",
-                                 Prog_Name,i+1);
+              *find = ' ';
+              x = sscanf(find+1," SN=%f,%f,%f,%f\n",snr,snr+1,snr+2,snr+3);
+              if (x != 4)
+                { fprintf(stderr,"File %s.arrow, Line %d: Pacbio header line format error\n",
+                                 core,cline);
                   goto error;
                 }
-              qpos = ftello(quiva);
-            }
-          cline = Get_QV_Line();
 
-          Free_QVcoding(coding);
+              rlen  = 0;
+              while (1)
+                { eof = (fgets(read+rlen,MAX_NAME,input) == NULL);
+                  cline += 1;
+                  x = strlen(read+rlen)-1;
+                  if (read[rlen+x] != '\n')
+                    { if (read[rlen] == '>')
+                        { fprintf(stderr,"File %s.arrow, Line %d:",core,cline);
+                          fprintf(stderr," Fasta header line is too long (> %d chars)\n",
+                                         MAX_NAME-2);
+                          goto error;
+                        }
+                      else
+                        x += 1;
+                    }
+                  if (eof || read[rlen] == '>')
+                    break;
+                  rlen += x;
+                  if (rlen + MAX_NAME > rmax)
+                    { rmax = ((int) (1.2 * rmax)) + 1000 + MAX_NAME;
+                      read = (char *) realloc(read,rmax+1);
+                      if (read == NULL)
+                        { fprintf(stderr,"File %s.arrow, Line %d:",core,cline);
+                          fprintf(stderr," Out of memory (Allocating line buffer)\n");
+                          goto error;
+                        }
+                    }
+                }
+              read[rlen] = '\0';
+
+              for (x = 0; x < 4; x++)
+                cnr[x] = (uint32) (snr[x] * 100.);
+
+              *((uint64  *) &(reads[i].coff)) = ((uint64) cnr[0]) << 48 |
+                                                ((uint64) cnr[1]) << 32 |
+                                                ((uint64) cnr[2]) << 16 |
+                                                ((uint64) cnr[3]);
+
+              Number_Arrow(read);
+              Compress_Read(rlen,read);
+              clen = COMPRESSED_LEN(rlen);
+              fwrite(read,1,clen,arrow);
+            }
         }
       }
 
-    if (fgetc(input) != EOF)
+    if (!eof)
       { if (PIPE)
           fprintf(stderr,"%s: Too many reads on input while handling %s.fasta\n",
                          Prog_Name,lname);
         else
-          fprintf(stderr,"%s: Too many reads in %s.quiva while handling %s.fasta\n",
+          fprintf(stderr,"%s: Too many reads in %s.arrow while handling %s.fasta\n",
                          Prog_Name,core,lname);
         goto error;
       }
@@ -461,10 +483,8 @@ int main(int argc, char *argv[])
         free(path);
         if (next_file(ng))
           { if (ng->name == NULL)
-              { fprintf(stderr,"%s",Ebuffer);
-                goto error;
-              }
-            core = Root(ng->name,".quiva");
+              goto error;
+            core = Root(ng->name,".arrow");
             fprintf(stderr,"%s: %s.fasta has never been added to DB\n",Prog_Name,core);
             goto error;
           }
@@ -473,35 +493,30 @@ int main(int argc, char *argv[])
 
   //  Write the db record and read index into .idx and clean up
 
+  db.allarr |= DB_ARROW;
   rewind(indx);
   fwrite(&db,sizeof(HITS_DB),1,indx);
   fwrite(reads,sizeof(HITS_READ),db.ureads,indx);
 
   fclose(istub);
   fclose(indx);
-  fclose(quiva);
-  fclose(temp);
-  unlink(tname);
+  fclose(arrow);
 
   exit (0);
 
-  //  Error exit:  Either truncate or remove the .qvs file as appropriate.
+  //  Error exit:  Either truncate or remove the .arw file as appropriate.
 
 error:
-  if (coff != 0)
-    { fseeko(quiva,0,SEEK_SET);
-      if (ftruncate(fileno(quiva),coff) < 0)
-        fprintf(stderr,"%s: Fatal: could not restore %s.qvs after error, truncate failed\n",
+  if (boff != 0)
+    { fseeko(arrow,0,SEEK_SET);
+      if (ftruncate(fileno(arrow),boff) < 0)
+        fprintf(stderr,"%s: Fatal: could not restore %s.arw after error, truncate failed\n",
                        Prog_Name,root);
     }
-  if (quiva != NULL)
-    { fclose(quiva);
-      if (coff == 0)
-        unlink(Catenate(pwd,PATHSEP,root,".qvs"));
-    }
-  if (temp != NULL)
-    { fclose(temp);
-      unlink(tname);
+  if (arrow != NULL)
+    { fclose(arrow);
+      if (boff == 0)
+        unlink(Catenate(pwd,PATHSEP,root,".arw"));
     }
   fclose(istub);
   fclose(indx);
diff --git a/fasta2DAM.c b/fasta2DAM.c
index 15e074a..af6722a 100644
--- a/fasta2DAM.c
+++ b/fasta2DAM.c
@@ -527,6 +527,7 @@ int main(int argc, char *argv[])
         db.totlen = totlen;
         db.maxlen = maxlen;
         db.cutoff = -1;
+        db.allarr = 0;
       }
     else
       { for (c = 0; c < 4; c++)
diff --git a/fasta2DB.c b/fasta2DB.c
index 071bbe0..98298e3 100644
--- a/fasta2DB.c
+++ b/fasta2DB.c
@@ -35,7 +35,7 @@
 #define PATHSEP "/"
 #endif
 
-static char *Usage = "[-v] <path:string> ( -f<file> | -i[<name>] | <input:fasta> ... )";
+static char *Usage = "[-v] <path:db> ( -f<file> | -i[<name>] | <input:fasta> ... )";
 
 static char number[128] =
     { 0, 0, 0, 0, 0, 0, 0, 0,
@@ -252,23 +252,21 @@ int main(int argc, char *argv[])
 
         ureads  = 0;
         offset  = 0;
-        boff    = 0;
-        ioff    = 0;
       }
     else
       { if (fscanf(istub,DB_NFILE,&ocells) != 1)
           { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root);
-            goto error;
+            exit (1);
           }
 
         bases = Fopen(Catenate(pwd,PATHSEP,root,".bps"),"r+");
         indx  = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r+");
         if (bases == NULL || indx == NULL)
-          goto error;
+          exit (1);
 
         if (fread(&db,sizeof(HITS_DB),1,indx) != 1)
           { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
-            goto error;
+            exit (1);
           }
         fseeko(bases,0,SEEK_END);
         fseeko(indx, 0,SEEK_END);
@@ -340,8 +338,7 @@ int main(int argc, char *argv[])
       { FILE *input;
         char  prolog[MAX_NAME];
         char *path, *core;
-        int   nline, eof, rlen, pcnt;
-        int   pwell;
+        int   eof;
 
         //  Open it: <path>/<core>.fasta if file, stdin otherwise with core = PIPE or "stdout"
 
@@ -367,6 +364,22 @@ int main(int argc, char *argv[])
             input = stdin;
           }
 
+        //  Get the header of the first line.  If the file is empty skip.
+
+        eof   = (fgets(read,MAX_NAME,input) == NULL);
+        if (eof || strlen(read) < 1)
+          { free(core);
+            fclose(input);
+            if (PIPE != NULL)
+              { fprintf(stderr,"Standard input is empty, terminating!\n");
+                break;
+              }
+            else
+              { fprintf(stderr,"Skipping '%s', file is empty!\n",core);
+                continue;
+              }
+          }
+
         //  Check that core is not too long and name is unique or last source if PIPE'd
 
         if (strlen(core) >= MAX_NAME)
@@ -388,21 +401,6 @@ int main(int argc, char *argv[])
                 }
         }
 
-        //  Get the header of the first line.  If the file is empty skip.
-
-        pcnt  = 0;
-        rlen  = 0;
-        nline = 1;
-        eof   = (fgets(read,MAX_NAME,input) == NULL);
-        if (eof || strlen(read) < 1)
-          { fprintf(stderr,"Skipping '%s', file is empty!\n",core);
-            if (input == stdin)
-              break;
-            fclose(input);
-            free(core);
-            continue;
-          }
-
         //   Add the file name to flist
 
         if (VERBOSE)
@@ -436,8 +434,7 @@ int main(int argc, char *argv[])
               *find = '/';
             }
           else
-            { fprintf(stderr,"File %s.fasta, Line %d: Pacbio header line format error\n",
-                             core,nline);
+            { fprintf(stderr,"File %s.fasta, Line 1: Pacbio header line format error\n",core);
               goto error;
             }
         }
@@ -445,7 +442,11 @@ int main(int argc, char *argv[])
         //  Read in all the sequences until end-of-file
 
         { int i, x;
+          int nline, pwell, rlen, pcnt;
 
+          pcnt  = 0;
+          rlen  = 0;
+          nline = 1;
           pwell = -1;
           while (!eof)
             { int   beg, end, clen;
@@ -583,6 +584,7 @@ int main(int argc, char *argv[])
         db.totlen = totlen;
         db.maxlen = maxlen;
         db.cutoff = -1;
+        db.allarr = 0;
       }
     else
       { for (c = 0; c < 4; c++)
diff --git a/quiva2DB.c b/quiva2DB.c
index 6d099f5..55e54d6 100644
--- a/quiva2DB.c
+++ b/quiva2DB.c
@@ -31,7 +31,7 @@
 #define PATHSEP "/"
 #endif
 
-static char *Usage = "[-vl] <path:string> ( -f<file> | -i | <input:quiva> ... )";
+static char *Usage = "[-vl] <path:db> ( -f<file> | -i | <input:quiva> ... )";
 
 typedef struct
   { int    argc;
@@ -165,7 +165,7 @@ int main(int argc, char *argv[])
       exit (1);
     }
   if (fscanf(istub,DB_NFILE,&nfiles) != 1)
-    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",root,Prog_Name);
+    { fprintf(stderr,"%s: %s.db is corrupted, read failed\n",Prog_Name,root);
       exit (1);
     }
 
@@ -175,7 +175,11 @@ int main(int argc, char *argv[])
       exit (1);
     }
   if (fread(&db,sizeof(HITS_DB),1,indx) != 1)
-    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",root,Prog_Name);
+    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
+      exit (1);
+    }
+  if ((db.allarr & DB_ARROW) != 0)
+    { fprintf(stderr,"%s: Database %s has Arrow data!\n",Prog_Name,root);
       exit (1);
     }
 
@@ -185,7 +189,7 @@ int main(int argc, char *argv[])
       exit (1);
     }
   if (fread(reads,sizeof(HITS_READ),db.ureads,indx) != (size_t) (db.ureads))
-    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",root,Prog_Name);
+    { fprintf(stderr,"%s: %s.idx is corrupted, read failed\n",Prog_Name,root);
       exit (1);
     }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/dazzdb.git



More information about the debian-med-commit mailing list