[med-svn] [Git][med-team/dazzdb][upstream] New upstream version 1.0+git20240115.be65e59

Tue May 28 21:14:11 BST 2024


Étienne Mollier pushed to branch upstream at Debian Med / dazzdb


Commits:
fdd9d20e by Étienne Mollier at 2024-05-28T22:12:43+02:00
New upstream version 1.0+git20240115.be65e59
- - - - -


6 changed files:

- DAM2fasta.c
- DB.c
- QV.c
- README.md
- fasta2DAM.c
- fasta2DB.c


Changes:

=====================================
DAM2fasta.c
=====================================
@@ -105,7 +105,11 @@ int main(int argc, char *argv[])
   { DAZZ_READ  *reads;
     char       *read;
     int         f, first;
-    char        nstring[WIDTH+1];
+    char       *nstring;
+
+    nstring = Malloc(WIDTH+1,"Allocating write buffer\n");
+    if (nstring == NULL)
+      exit (1);
 
     if (UPPER == 2)
       for (f = 0; f < WIDTH; f++)
@@ -205,6 +209,8 @@ int main(int argc, char *argv[])
           FCLOSE(ofile)
         first = last;
       }
+
+    free(nstring);
   }
 
   fclose(hdrs);


=====================================
DB.c
=====================================
@@ -2208,7 +2208,7 @@ int Read_Extra(FILE *afile, char *aname, DAZZ_EXTRA *extra)
     }
   if (accum != extra->accum)
     { EPRINTF(EPLACE,
-           "%s: Reduction indicator of extra %s does not agree with previos .anno block files\n",
+           "%s: Reduction indicator of extra %s does not agree with previous .anno block files\n",
            Prog_Name,name);
       goto error;
     }
@@ -2829,7 +2829,7 @@ static Block_Looper *parse_block_arg(char *arg, int isDB)
     first = last = -1;
   else
     { if (index(ppnt+1,BLOCK_SYMBOL) != NULL)
-        { EPRINTF(EPLACE,"%s: Two or more occurences of %c-sign in source name '%s'\n",
+        { EPRINTF(EPLACE,"%s: Two or more occurrences of %c-sign in source name '%s'\n",
                          Prog_Name,BLOCK_SYMBOL,root);
           goto error;
         }


=====================================
QV.c
=====================================
@@ -1319,7 +1319,7 @@ error:
   EXIT(NULL);
 }
 
-  //  Free all the auxilliary storage associated with the encoding argument
+  //  Free all the auxiliary storage associated with the encoding argument
 
 void Free_QVcoding(QVcoding *coding)
 { if (coding->subChar >= 0)


=====================================
README.md
=====================================
@@ -282,7 +282,7 @@ set then secondary reads from a given well are also ignored.  The remaining read
 constituting what we call the trimmed DB, are split amongst the blocks so that each
 block is of size -s * 1Mbp except for the last which necessarily contains a smaller
 residual.  The default value for -s is 200Mbp because blocks of this size can be
-compared by our "overlapper" dalign in roughly 16Gb of memory.  The blocks are very
+compared by our "overlapper" daligner in roughly 16Gb of memory.  The blocks are very
 space efficient in that their sub-index of the master .idx is computed on the fly
 when loaded, and the .bps and .qvs files (if a .db) of base pairs and quality values,
 respectively, is shared with the master DB.  Any relevant portions of tracks


=====================================
fasta2DAM.c
=====================================
@@ -327,7 +327,7 @@ int main(int argc, char *argv[])
 
     //  Buffer for accumulating .fasta sequence over multiple lines
 
-    rmax  = MAX_NAME + 60000;
+    rmax  = MAX_NAME + 10000000;
     read  = (char *) Malloc(rmax+1,"Allocating line buffer");
     if (read == NULL)
       goto error;
@@ -439,13 +439,13 @@ int main(int argc, char *argv[])
 
         // Check that the first line is a header line
 
-        if (read[strlen(read)-1] != '\n')
-          { fprintf(stderr,"File %s, Line 1: Fasta line is too long (> %d chars)\n",
-                           fname,MAX_NAME-2);
+        if (read[0] != '>')
+          { fprintf(stderr,"File %s, Line 1: First header in fasta file is missing\n",fname);
             goto error;
           }
-        if (!eof && read[0] != '>')
-          { fprintf(stderr,"File %s, Line 1: First header in fasta file is missing\n",fname);
+        if (read[strlen(read)-1] != '\n')
+          { fprintf(stderr,"File %s, Line 1: Fasta header line is too long (> %d chars)\n",
+                           fname,MAX_NAME-2);
             goto error;
           }
 
@@ -460,21 +460,28 @@ int main(int argc, char *argv[])
               hlen = strlen(read+rlen);
               fwrite(read+rlen,1,hlen,hdrs);
 
-              rlen  = 0;
+              rlen = 0;
               while (1)
                 { eof = (fgets(read+rlen,MAX_NAME,input) == NULL);
-                  nline += 1;
                   x = strlen(read+rlen)-1;
-                  if (read[rlen+x] != '\n')
-                    { fprintf(stderr,"File %s, Line %d:",fname,nline);
-                      fprintf(stderr," Fasta line is too long (> %d chars)\n",MAX_NAME-2);
-                      goto error;
+                  if (read[rlen] == '>')
+                    { if (read[rlen+x] != '\n')
+                        { fprintf(stderr,"File %s, Line %d: Fasta header line",fname,nline);
+                          fprintf(stderr," is too long (> %d chars)\n",MAX_NAME-2);
+                          goto error;
+                        }
+                      nline += 1;
+                      break;
                     }
-                  if (eof || read[rlen] == '>')
+                  if (eof)
                     break;
+                  if (read[rlen+x] == '\n')
+                    nline += 1;
+                  else
+                    x += 1;
                   rlen += x;
                   if (rlen + MAX_NAME > rmax)
-                    { rmax = ((int64) (1.2 * rmax)) + 1000 + MAX_NAME;
+                    { rmax = ((int64) (1.4 * rmax)) + 10000000 + MAX_NAME;
                       read = (char *) realloc(read,rmax+1);
                       if (read == NULL)
                         { fprintf(stderr,"File %s, Line %d:",fname,nline);
@@ -636,7 +643,14 @@ int main(int argc, char *argv[])
       fprintf(ostub,DB_NBLOCK,nblock);    //  Rewind and record the new number of blocks
     }
   else
-    db.treads = ureads;
+    { db.treads = ureads;     //  Start with split into a single huge block
+      db.cutoff = 0;
+      db.allarr = 1;
+      fprintf(ostub,DB_NBLOCK,1);
+      fprintf(ostub,DB_PARAMS,db.totlen,0,1);
+      fprintf(ostub," %9d %9d\n",0,0);
+      fprintf(ostub," %9d %9d\n",ureads,ureads);
+    }
 
   rewind(ostub);
   fprintf(ostub,DB_NFILE,ofiles);


=====================================
fasta2DB.c
=====================================
@@ -339,7 +339,7 @@ int main(int argc, char *argv[])
 
     //  Buffer for accumulating .fasta sequence over multiple lines
 
-    rmax  = MAX_NAME + 60000;
+    rmax  = MAX_NAME + 10000000;
     read  = (char *) Malloc(rmax+1,"Allocating line buffer");
     if (read == NULL)
       goto error;
@@ -442,13 +442,13 @@ int main(int argc, char *argv[])
 
         // Check that the first line is a header and has PACBIO format.
 
-        if (read[strlen(read)-1] != '\n')
-          { fprintf(stderr,"File %s, Line 1: Fasta line is too long (> %d chars)\n",
-                           fname,MAX_NAME-2);
+        if (read[0] != '>')
+          { fprintf(stderr,"File %s, Line 1: First header in fasta file is missing\n",fname);
             goto error;
           }
-        if (!eof && read[0] != '>')
-          { fprintf(stderr,"File %s, Line 1: First header in fasta file is missing\n",fname);
+        if (read[strlen(read)-1] != '\n')
+          { fprintf(stderr,"File %s, Line 1: Fasta header line is too long (> %d chars)\n",
+                           fname,MAX_NAME-2);
             goto error;
           }
 
@@ -514,26 +514,28 @@ int main(int argc, char *argv[])
               else if (x == 3)
                 qv = 0;
 
-              rlen  = 0;
+              rlen = 0;
               while (1)
                 { eof = (fgets(read+rlen,MAX_NAME,input) == NULL);
-                  nline += 1;
                   x = strlen(read+rlen)-1;
-                  if (read[rlen+x] != '\n')
-                    { if (read[rlen] == '>')
-                        { fprintf(stderr,"File %s, Line %d:",fname,nline);
-                          fprintf(stderr," Fasta header line is too long (> %d chars)\n",
-                                         MAX_NAME-2);
+                  if (read[rlen] == '>')
+                    { if (read[rlen+x] != '\n')
+                        { fprintf(stderr,"File %s, Line %d: Fasta header line",fname,nline);
+                          fprintf(stderr," is too long (> %d chars)\n",MAX_NAME-2);
                           goto error;
                         }
-                      else
-                        x += 1;
+                      nline += 1;
+                      break;
                     }
-                  if (eof || read[rlen] == '>')
+                  if (eof)
                     break;
+                  if (read[rlen+x] == '\n')
+                    nline += 1;
+                  else
+                    x += 1;
                   rlen += x;
                   if (rlen + MAX_NAME > rmax)
-                    { rmax = ((int64) (1.2 * rmax)) + 1000 + MAX_NAME;
+                    { rmax = ((int64) (1.4 * rmax)) + 10000000 + MAX_NAME;
                       read = (char *) realloc(read,rmax+1);
                       if (read == NULL)
                         { fprintf(stderr,"File %s, Line %d:",fname,nline);
@@ -716,7 +718,14 @@ int main(int argc, char *argv[])
       fprintf(ostub,DB_NBLOCK,nblock);    //  Rewind and record the new number of blocks
     }
   else
-    db.treads = ureads;
+    { db.treads = ureads;
+      db.cutoff = 0;
+      db.allarr = 1;
+      fprintf(ostub,DB_NBLOCK,1);
+      fprintf(ostub,DB_PARAMS,db.totlen,0,1);
+      fprintf(ostub," %9d %9d\n",0,0);
+      fprintf(ostub," %9d %9d\n",ureads,ureads);
+    }
 
   rewind(indx);
   fwrite(&db,sizeof(DAZZ_DB),1,indx);   //  Write the finalized db record into .idx



View it on GitLab: https://salsa.debian.org/med-team/dazzdb/-/commit/fdd9d20e6a0f16e93555ebb99f5848d7d878359a

-- 
This project does not include diff previews in email notifications.
View it on GitLab: https://salsa.debian.org/med-team/dazzdb/-/commit/fdd9d20e6a0f16e93555ebb99f5848d7d878359a
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240528/52aa9b20/attachment-0001.htm>