[med-svn] [daligner] 01/03: Imported Upstream version 1.0+20161119

Afif Elghraoui afif at moszumanska.debian.org
Thu Jan 19 06:20:36 UTC 2017


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository daligner.

commit 297c59fbf22871b2231b605692a7ed6b8af20951
Author: Afif Elghraoui <afif at debian.org>
Date:   Wed Jan 18 22:05:50 2017 -0800

    Imported Upstream version 1.0+20161119
---
 DB.c      | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 DB.h      | 14 ++++++++-
 LAdump.c  | 25 ++++++----------
 LAmerge.c |  2 +-
 QV.c      | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 QV.h      |  3 ++
 align.c   | 17 ++++++-----
 align.h   |  4 +--
 filter.c  | 10 +++++--
 9 files changed, 230 insertions(+), 38 deletions(-)

diff --git a/DB.c b/DB.c
index 46046b7..3afff14 100644
--- a/DB.c
+++ b/DB.c
@@ -314,6 +314,14 @@ void Upper_Read(char *s)
   *s = '\0';
 }
 
+void Letter_Arrow(char *s)
+{ static char letter[4] = { '1', '2', '3', '4' };
+
+  for ( ; *s != 4; s++)
+    *s = letter[(int) *s];
+  *s = '\0';
+}
+
 //  Convert read in ascii representation to [0-3] representation (end with 4)
 
 void Number_Read(char *s)
@@ -341,6 +349,31 @@ void Number_Read(char *s)
   *s = 4;
 }
 
+void Number_Arrow(char *s)
+{ static char arrow[128] =
+    { 3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 0, 1, 2, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 2,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3,
+    };
+
+  for ( ; *s != '\0'; s++)
+    *s = arrow[(int) *s];
+  *s = 4;
+}
+
 
 /*******************************************************************************************
  *
@@ -426,7 +459,7 @@ int Open_DB(char* path, HITS_DB *db)
     if (fscanf(dbvis,DB_NBLOCK,&nblocks) != 1)
       if (part == 0)
         { cutoff = 0;
-          all    = 1;
+          all    = DB_ALL;
         }
       else
         { EPRINTF(EPLACE,"%s: DB %s has not yet been partitioned, cannot request a block !\n",
@@ -466,7 +499,7 @@ int Open_DB(char* path, HITS_DB *db)
   db->tracks  = NULL;
   db->part    = part;
   db->cutoff  = cutoff;
-  db->all     = all;
+  db->allarr |= all;
   db->ufirst  = ufirst;
   db->tfirst  = tfirst;
 
@@ -478,7 +511,7 @@ int Open_DB(char* path, HITS_DB *db)
       db->reads += 1;
       if (fread(db->reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
         { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
-          free(db->reads);
+          free(db->reads-1);
           goto error2;
         }
     }
@@ -495,7 +528,7 @@ int Open_DB(char* path, HITS_DB *db)
       fseeko(index,sizeof(HITS_READ)*ufirst,SEEK_CUR);
       if (fread(reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
         { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
-          free(reads);
+          free(reads-1);
           goto error2;
         }
 
@@ -557,10 +590,10 @@ void Trim_DB(HITS_DB *db)
 
   if (db->trimmed) return;
 
-  if (db->cutoff <= 0 && db->all) return;
+  if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return;
 
   cutoff = db->cutoff;
-  if (db->all)
+  if ((db->allarr & DB_ALL) != 0)
     allflag = 0;
   else
     allflag = DB_BEST;
@@ -660,10 +693,10 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
 
   if (!db->trimmed) return (0);
 
-  if (db->cutoff <= 0 && db->all) return (0);
+  if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return (0);
 
   cutoff = db->cutoff;
-  if (db->all)
+  if ((db->allarr & DB_ALL) != 0)
     allflag = 0;
   else
     allflag = DB_BEST;
@@ -1435,6 +1468,56 @@ int Load_Read(HITS_DB *db, int i, char *read, int ascii)
   return (0);
 }
 
+// Load into 'read' the i'th arrow in 'db'.  As an ASCII string if ascii is 1, 
+//   and as a numeric string otherwise.
+//
+
+HITS_DB *Arrow_DB = NULL;         //  Last db/arw used by "Load_Arrow"
+FILE    *Arrow_File = NULL;       //    Becomes invalid after closing
+
+int Load_Arrow(HITS_DB *db, int i, char *read, int ascii)
+{ FILE      *arrow;
+  int64      off;
+  int        len, clen;
+  HITS_READ *r = db->reads;
+
+  if (i >= db->nreads)
+    { EPRINTF(EPLACE,"%s: Index out of bounds (Load_Arrow)\n",Prog_Name);
+      EXIT(1);
+    }
+  if (Arrow_DB != db)
+    { fclose(Arrow_File);
+      arrow = Fopen(Catenate(db->path,"","",".arw"),"r");
+      if (arrow == NULL)
+        EXIT(1);
+      Arrow_File = arrow;
+      Arrow_DB   = db;
+    }
+  else
+    arrow = Arrow_File;
+
+  off = r[i].boff;
+  len = r[i].rlen;
+
+  if (ftello(arrow) != off)
+    fseeko(arrow,off,SEEK_SET);
+  clen = COMPRESSED_LEN(len);
+  if (clen > 0)
+    { if (fread(read,clen,1,arrow) != 1)
+        { EPRINTF(EPLACE,"%s: Failed read of .bps file (Load_Arrow)\n",Prog_Name);
+          EXIT(1);
+        }
+    }
+  Uncompress_Read(len,read);
+  if (ascii == 1)
+    { Letter_Arrow(read);
+      read[-1] = '\0';
+    }
+  else
+    read[-1] = 4;
+  return (0);
+}
+
 char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii)
 { FILE      *bases  = (FILE *) db->bases;
   int64      off;
diff --git a/DB.h b/DB.h
index 983bee7..dc281de 100644
--- a/DB.h
+++ b/DB.h
@@ -164,6 +164,9 @@ void Lower_Read(char *s);     //  Convert read from numbers to lowercase letters
 void Upper_Read(char *s);     //  Convert read from numbers to uppercase letters (0-3 to ACGT)
 void Number_Read(char *s);    //  Convert read from letters to numbers
 
+void Letter_Arrow(char *s);   //  Convert arrow pw's from numbers to uppercase letters (0-3 to 1234)
+void Number_Arrow(char *s);   //  Convert arrow pw string from letters to numbers
+
 
 /*******************************************************************************************
  *
@@ -175,6 +178,9 @@ void Number_Read(char *s);    //  Convert read from letters to numbers
 #define DB_CSS  0x0400   //  This is the second or later of a group of reads from a given insert
 #define DB_BEST 0x0800   //  This is the longest read of a given insert (may be the only 1)
 
+#define DB_ARROW 0x2     //  DB is an arrow DB
+#define DB_ALL   0x1     //  all wells are in the trimmed DB
+
 //  Fields have different interpretations if a .db versus a .dam
 
 typedef struct
@@ -185,6 +191,7 @@ typedef struct
                     //    uncompressed bases in memory block
     int64   coff;   //  Offset (in bytes) of compressed quiva streams in '.qvs' file (DB),
                     //  Offset (in bytes) of scaffold header string in '.hdr' file (DAM)
+                    //  4 compressed shorts containing snr info if an arrow DB.
     int     flags;  //  QV of read + flags above (DB only)
   } HITS_READ;
 
@@ -226,7 +233,7 @@ typedef struct
   { int         ureads;     //  Total number of reads in untrimmed DB
     int         treads;     //  Total number of reads in trimmed DB
     int         cutoff;     //  Minimum read length in block (-1 if not yet set)
-    int         all;        //  Consider multiple reads from a given well
+    int         allarr;     //  DB_ALL | DB_ARROW
     float       freq[4];    //  frequency of A, C, G, T, respectively
 
     //  Set with respect to "active" part of DB (all vs block, untrimmed vs trimmed)
@@ -368,6 +375,11 @@ char *New_Read_Buffer(HITS_DB *db);
 
 int  Load_Read(HITS_DB *db, int i, char *read, int ascii);
 
+  // Exactly the same as Load_Read, save the arrow information is loaded, not the DNA sequence,
+  //   and there is only a choice between numeric (0) or ascii (1);
+
+int  Load_Arrow(HITS_DB *db, int i, char *read, int ascii);
+
   // Load into 'read' the subread [beg,end] of the i'th read in 'db' and return a pointer to the
   //   the start of the subinterval (not necessarily = to read !!! ).  As a lower case ascii
   //   string if ascii is 1, an upper case ascii string if ascii is 2, and a numeric string
diff --git a/LAdump.c b/LAdump.c
index 070c3c2..716e873 100644
--- a/LAdump.c
+++ b/LAdump.c
@@ -40,6 +40,7 @@ int main(int argc, char *argv[])
   FILE   *input;
   int64   novl;
   int     tspace, tbytes, small;
+  int     tmax;
   int     reps, *pts;
   int     input_pts;
 
@@ -279,7 +280,7 @@ int main(int argc, char *argv[])
 
   { int   j, al, tlen;
     int   in, npt, idx, ar;
-    int64 novls, odeg, omax, sdeg, smax, ttot, tmax;
+    int64 novls, odeg, omax, sdeg, smax, ttot;
 
     in  = 0;
     npt = pts[0];
@@ -359,28 +360,24 @@ int main(int argc, char *argv[])
 
     printf("+ P %lld\n",novls);
     printf("%% P %lld\n",omax);
-    printf("+ T %lld\n",ttot);
-    printf("%% T %lld\n",smax);
-    printf("@ T %lld\n",tmax);
+    if (DOTRACE)
+      { printf("+ T %lld\n",ttot);
+        printf("%% T %lld\n",smax);
+        printf("@ T %lld\n",tmax);
+      }
   }
 
   //  Read the file and display selected records
   
   { int        j;
     uint16    *trace;
-    int        tmax;
     int        in, npt, idx, ar;
     int64      verse;
 
     rewind(input);
-    fread(&verse,sizeof(int64),1,input);
+    fread(&novl,sizeof(int64),1,input);
     fread(&tspace,sizeof(int),1,input);
-    if (verse < 0)
-      { for (j = 0; j < 5; j++)
-          fread(&verse,sizeof(int64),1,input);
-      }
 
-    tmax  = 1000;
     trace = (uint16 *) Malloc(sizeof(uint16)*tmax,"Allocating trace vector");
     if (trace == NULL)
       exit (1);
@@ -396,12 +393,6 @@ int main(int argc, char *argv[])
        //  Read it in
 
       { Read_Overlap(input,ovl);
-        if (ovl->path.tlen > tmax)
-          { tmax = ((int) 1.2*ovl->path.tlen) + 100;
-            trace = (uint16 *) Realloc(trace,sizeof(uint16)*tmax,"Allocating trace vector");
-            if (trace == NULL)
-              exit (1);
-          }
         ovl->path.trace = (void *) trace;
         Read_Trace(input,ovl,tbytes);
 
diff --git a/LAmerge.c b/LAmerge.c
index 985a5f6..6a768fd 100644
--- a/LAmerge.c
+++ b/LAmerge.c
@@ -168,7 +168,7 @@ static void ovl_reload(IO_block *in, int64 bsize)
 
   remains = in->top - in->ptr;
   if (remains > 0)
-    memcpy(in->block, in->ptr, remains);
+    memmove(in->block, in->ptr, remains);
   in->ptr  = in->block;
   in->top  = in->block + remains;
   in->top += fread(in->top,1,bsize-remains,in->stream);
diff --git a/QV.c b/QV.c
index cdb6a63..d7d7263 100644
--- a/QV.c
+++ b/QV.c
@@ -863,6 +863,62 @@ static int      delChar, subChar;
 
   // Referred by:  QVcoding_Scan, Create_QVcoding
 
+void QVcoding_Scan1(int rlen, char *delQV, char *delTag, char *insQV, char *mergeQV, char *subQV)
+{
+  if (rlen == 0)   //  Initialization call
+    { int   i;
+
+      //  Zero histograms
+
+      bzero(delHist,sizeof(uint64)*256);
+      bzero(mrgHist,sizeof(uint64)*256);
+      bzero(insHist,sizeof(uint64)*256);
+      bzero(subHist,sizeof(uint64)*256);
+
+      for (i = 0; i < 256; i++)
+        delRun[i] = subRun[i] = 1;
+
+      totChar    = 0;
+      delChar    = -1;
+      subChar    = -1;
+      return;
+    }
+
+  //  Add streams to accumulating histograms and figure out the run chars
+  //    for the deletion and substition streams
+
+  Histogram_Seqs(delHist,(uint8 *) delQV,rlen);
+  Histogram_Seqs(insHist,(uint8 *) insQV,rlen);
+  Histogram_Seqs(mrgHist,(uint8 *) mergeQV,rlen);
+  Histogram_Seqs(subHist,(uint8 *) subQV,rlen);
+
+  if (delChar < 0)
+    { int   k;
+
+      for (k = 0; k < rlen; k++)
+        if (delTag[k] == 'n' || delTag[k] == 'N')
+          { delChar = delQV[k];
+            break;
+          }
+    }
+  if (delChar >= 0)
+    Histogram_Runs( delRun,(uint8 *) delQV,rlen,delChar);
+  totChar += rlen;
+  if (subChar < 0)
+    { if (totChar >= 100000)
+        { int k;
+
+          subChar = 0;
+          for (k = 1; k < 256; k++)
+            if (subHist[k] > subHist[subChar])
+              subChar = k;
+        }
+    }
+  if (subChar >= 0)
+    Histogram_Runs( subRun,(uint8 *) subQV,rlen,subChar);
+  return;
+}
+
 int QVcoding_Scan(FILE *input, int num, FILE *temp)
 { char *slash;
   int   rlen;
@@ -1284,6 +1340,44 @@ void Free_QVcoding(QVcoding *coding)
  *
  ********************************************************************************************/
 
+void Compress_Next_QVentry1(int rlen, char *del, char *tag, char *ins, char *mrg, char *sub,
+                            FILE *output, QVcoding *coding, int lossy)
+{ int clen;
+
+  if (coding->delChar < 0)
+    { Encode(coding->delScheme, output, (uint8 *) del, rlen);
+      clen = rlen;
+    }
+  else
+    { Encode_Run(coding->delScheme, coding->dRunScheme, output,
+                 (uint8 *) del, rlen, coding->delChar);
+      clen = Pack_Tag(tag,del,rlen,coding->delChar);
+    }
+  Number_Read(tag);
+  Compress_Read(clen,tag);
+  fwrite(tag,1,COMPRESSED_LEN(clen),output);
+
+  if (lossy)
+    { uint8 *insert = (uint8 *) ins;
+      uint8 *merge  = (uint8 *) mrg;
+      int    k;
+
+      for (k = 0; k < rlen; k++)
+        { insert[k] = (uint8) ((insert[k] >> 1) << 1);
+          merge[k]  = (uint8) (( merge[k] >> 2) << 2);
+        }
+    }
+
+  Encode(coding->insScheme, output, (uint8 *) ins, rlen);
+  Encode(coding->mrgScheme, output, (uint8 *) mrg, rlen);
+  if (coding->subChar < 0)
+    Encode(coding->subScheme, output, (uint8 *) sub, rlen);
+  else
+    Encode_Run(coding->subScheme, coding->sRunScheme, output,
+               (uint8 *) sub, rlen, coding->subChar);
+  return;
+}
+
 int Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int lossy)
 { int rlen, clen;
 
diff --git a/QV.h b/QV.h
index 532b2f4..e5c9485 100644
--- a/QV.h
+++ b/QV.h
@@ -58,6 +58,7 @@ int       Get_QV_Line();
   //   If there is an error then -1 is returned, otherwise the number of entries read.
 
 int       QVcoding_Scan(FILE *input, int num, FILE *temp);
+void      QVcoding_Scan1(int rlen, char *del, char *tag, char *ins, char *mrg, char *sub);
 
   // Given QVcoding_Scan has been called at least once, create an encoding scheme based on
   //   the accumulated statistics and return a pointer to it.  The returned encoding object
@@ -83,6 +84,8 @@ void      Free_QVcoding(QVcoding *coding);
   //    occurred, and the sequence length otherwise.
 
 int      Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int lossy);
+void     Compress_Next_QVentry1(int rlen, char *del, char *tag, char *ins, char *mrg, char *sub,
+                                FILE *output, QVcoding *coding, int lossy);
 
   //  Assuming the input is position just beyond the compressed encoding of an entry header,
   //    read the set of compressed encodings for the ensuing 5 QV vectors, decompress them,
diff --git a/align.c b/align.c
index eb877d4..449b833 100644
--- a/align.c
+++ b/align.c
@@ -2983,8 +2983,8 @@ int Find_Extension(Alignment *align, Work_Data *ework, Align_Spec *espec,
   if (prefix)
     { if (reverse_extend(work,spec,align,diag,anti,minp,maxp))
         EXIT(1);
-      apath->aepos = (anti-diag)/2;
-      apath->bepos = (anti+diag)/2;
+      apath->aepos = (anti+diag)/2;
+      apath->bepos = (anti-diag)/2;
 #ifdef DEBUG_PASSES
       printf("E1 (%d,%d) => (%d,%d) %d\n",
              (anti+diag)/2,(anti-diag)/2,apath->abpos,apath->bbpos,apath->diffs);
@@ -2993,8 +2993,8 @@ int Find_Extension(Alignment *align, Work_Data *ework, Align_Spec *espec,
   else
     { if (forward_extend(work,spec,align,diag,anti,minp,maxp))
         EXIT(1);
-      apath->abpos = (anti-diag)/2;
-      apath->bbpos = (anti+diag)/2;
+      apath->abpos = (anti+diag)/2;
+      apath->bbpos = (anti-diag)/2;
 #ifdef DEBUG_PASSES
       printf("F1 (%d,%d) => (%d,%d) %d\n",
              (anti+diag)/2,(anti-diag)/2,apath->aepos,apath->bepos,apath->diffs);
@@ -3044,10 +3044,13 @@ int Read_Trace(FILE *input, Overlap *ovl, int tbytes)
   return (0);
 }
 
-void Write_Overlap(FILE *output, Overlap *ovl, int tbytes)
-{ fwrite( ((char *) ovl) + PtrSize, OvlIOSize, 1, output);
+int Write_Overlap(FILE *output, Overlap *ovl, int tbytes)
+{ if (fwrite( ((char *) ovl) + PtrSize, OvlIOSize, 1, output) != 1)
+    return (1);
   if (ovl->path.trace != NULL)
-    fwrite(ovl->path.trace,tbytes,ovl->path.tlen,output);
+    if (fwrite(ovl->path.trace,tbytes,ovl->path.tlen,output) != (size_t) ovl->path.tlen)
+      return (1);
+  return (0);
 }
 
 void Compress_TraceTo8(Overlap *ovl)
diff --git a/align.h b/align.h
index c3b31ae..daa9151 100644
--- a/align.h
+++ b/align.h
@@ -325,7 +325,7 @@ typedef struct {
      accommodate the trace where each value take 'tbytes' bytes (1 if uint8 or 2 if uint16).
 
      Write_Overlap write 'ovl' to stream 'output' followed by its trace vector (if any) that
-     occupies 'tbytes' bytes per value.  
+     occupies 'tbytes' bytes per value.  It returns non-zero if there was an error writing.
 
      Print_Overlap prints an ASCII version of the contents of 'ovl' to stream 'output'
      where the trace occupes 'tbytes' per value and the print out is indented from the left
@@ -343,7 +343,7 @@ typedef struct {
   int Read_Overlap(FILE *input, Overlap *ovl);
   int Read_Trace(FILE *innput, Overlap *ovl, int tbytes);
 
-  void Write_Overlap(FILE *output, Overlap *ovl, int tbytes);
+  int  Write_Overlap(FILE *output, Overlap *ovl, int tbytes);
   void Print_Overlap(FILE *output, Overlap *ovl, int tbytes, int indent);
 
   void Compress_TraceTo8(Overlap *ovl);
diff --git a/filter.c b/filter.c
index a9c5083..90ca7c3 100644
--- a/filter.c
+++ b/filter.c
@@ -1842,14 +1842,20 @@ static void *report_thread(void *arg)
                ovla->path.trace = tbuf->trace + (uint64) (ovla->path.trace);
                if (small)
                  Compress_TraceTo8(ovla);
-               Write_Overlap(ofile1,ovla,tbytes);
+               if (Write_Overlap(ofile1,ovla,tbytes))
+                 { fprintf(stderr,"%s: Cannot write to /tmp, too small?\n",Prog_Name);
+                   exit (1);
+                 }
              }
            for (i = 0; i < novlb; i++)
              { ovlb->path = bmatch[i];
                ovlb->path.trace = tbuf->trace + (uint64) (ovlb->path.trace);
                if (small)
                  Compress_TraceTo8(ovlb);
-               Write_Overlap(ofile2,ovlb,tbytes);
+               if (Write_Overlap(ofile2,ovlb,tbytes))
+                 { fprintf(stderr,"%s: Cannot write to /tmp, too small?\n",Prog_Name);
+                   exit (1);
+                 }
              }
            ahits += novla;
            bhits += novlb;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/daligner.git



More information about the debian-med-commit mailing list