[med-svn] [Git][med-team/dazzdb][master] 4 commits: New upstream version 1.0+git20230919.414ad05
Andreas Tille (@tille)
gitlab at salsa.debian.org
Mon Nov 27 16:16:15 GMT 2023
Andreas Tille pushed to branch master at Debian Med / dazzdb
Commits:
ff1fc5e0 by Andreas Tille at 2023-11-27T17:12:22+01:00
New upstream version 1.0+git20230919.414ad05
- - - - -
359dc296 by Andreas Tille at 2023-11-27T17:12:22+01:00
routine-update: New upstream version
- - - - -
55ce82f0 by Andreas Tille at 2023-11-27T17:12:23+01:00
Update upstream source from tag 'upstream/1.0+git20230919.414ad05'
Update to upstream version '1.0+git20230919.414ad05'
with Debian dir 781eae0ce6260c4f19b0d67568fb5b3c688c4c5d
- - - - -
707a2f9a by Andreas Tille at 2023-11-27T17:14:44+01:00
routine-update: Ready to upload to unstable
- - - - -
6 changed files:
- DB2ONE.c
- ONElib.c
- README.md
- debian/changelog
- fasta2DAM.c
- fasta2DB.c
Changes:
=====================================
DB2ONE.c
=====================================
@@ -43,10 +43,12 @@ static char *One_Schema =
"D H 1 6 STRING Original fasta/q header\n"
- "D W 3 3 INT 3 INT 3 INT well, pulse start, pulse end\n"
+ "D W 3 3 INT 3 INT 3 INT well, pulse start, pulse end (for db's)\n"
"D N 4 3 INT 3 INT 3 INT 3 INT SNR of ACGT channels (if Arrow-DB)\n"
"D Q 1 3 INT read quality (if Quiva-DB)\n"
+ "D G 3 3 INT 3 INT 3 INT contig, firstbp, lastbp (for dam's)\n"
+
"D X 2 3 INT 6 STRING Prolog: name of track idx\n"
"D T 2 3 INT 8 INT_LIST Track idx, interval pairs list\n";
@@ -204,9 +206,10 @@ int main(int argc, char *argv[])
fprintf(stderr," -a: Output truncated arrow pulse-width string (A line)\n");
fprintf(stderr," -q: Quiver edit vectors (D, C, I, M, and S lines)\n");
fprintf(stderr," -h: Output fasta header prefix (H line)\n");
- fprintf(stderr," -w: Output well, pulse start and end (W line)\n");
+ fprintf(stderr," -w: Output well, pulse start and end (if .db, W line)\n");
fprintf(stderr," + SNR of ACGT channels (if Arrow DB, N line)\n");
fprintf(stderr," + quality value of read (if Quiver DB, Q line)\n");
+ fprintf(stderr," -w: Contig, firstbp, and lastbp (if .dam, G line)\n");
fprintf(stderr,"\n");
fprintf(stderr," -f: group by origin file (f line)\n");
fprintf(stderr,"\n");
@@ -616,7 +619,10 @@ int main(int argc, char *argv[])
{ oneInt(file1,0) = r->origin;
oneInt(file1,1) = r->fpulse;
oneInt(file1,2) = r->fpulse+len;
- oneWriteLine(file1,'W',0,NULL);
+ if (DAM)
+ oneWriteLine(file1,'G',0,NULL);
+ else
+ oneWriteLine(file1,'W',0,NULL);
if (Quiva_DB && qv > 0)
{ oneInt(file1,0) = qv;
oneWriteLine(file1,'Q',0,NULL);
=====================================
ONElib.c
=====================================
@@ -32,7 +32,7 @@
#ifdef DEBUG
#include <assert.h>
#else
-#define assert(x) 0
+#define assert(x)
#endif
#include "ONElib.h"
@@ -390,9 +390,10 @@ static OneSchema *oneSchemaCreateDynamic (char *fileType, char *subType)
assert (fileType && strlen(fileType) > 0) ;
assert (!subType || strlen(subType) > 0) ;
if (subType)
- sprintf (text, "P %ld %s\nS %ld %s\n", strlen(fileType),fileType, strlen(subType), subType) ;
+ sprintf (text, "P %d %s\nS %d %s\n", (int) strlen(fileType),fileType,
+ (int) strlen(subType), subType) ;
else
- sprintf (text, "P %ld %s\n", strlen(fileType), fileType) ;
+ sprintf (text, "P %d %s\n", (int) strlen(fileType), fileType) ;
OneSchema *vs = oneSchemaCreateFromText (text) ;
return vs ;
}
@@ -3485,7 +3486,7 @@ static inline int intGet (unsigned char *u, I64 *pval)
case 0:
switch (u[0] & 0x07)
{
- case 0: die ("int packing error") ;
+ case 0: die ("int packing error") ; break ;
case 1: *pval = *(I64*)(u+1) & 0x0000000000ffff ; return 3 ;
case 2: *pval = *(I64*)(u+1) & 0x00000000ffffff ; return 4 ;
case 3: *pval = *(I64*)(u+1) & 0x000000ffffffff ; return 5 ;
@@ -3497,7 +3498,7 @@ static inline int intGet (unsigned char *u, I64 *pval)
case 4:
switch (u[0] & 0x07)
{
- case 0: die ("int packing error") ;
+ case 0: die ("int packing error") ; break ;
case 1: *pval = *(I64*)(u+1) | 0xffffffffffff0000 ; return 3 ;
case 2: *pval = *(I64*)(u+1) | 0xffffffffff000000 ; return 4 ;
case 3: *pval = *(I64*)(u+1) | 0xffffffff00000000 ; return 5 ;
=====================================
README.md
=====================================
@@ -142,6 +142,7 @@ a command such as DBshow.
All programs add suffixes (e.g. .db) as needed. The commands of the database library
are currently as follows:
+<a name="fasta2DB"></a>
```
1. fasta2DB [-v] <path:db> ( -f<file> | -i[<name>] | <input:fasta> ... )
```
@@ -166,6 +167,7 @@ partitioning of the database is updated to include the new data. A file may con
the data from multiple SMRT cells provided the reads for each SMRT cell are consecutive
in the file.
+<a name="DB2fasta"></a>
```
2. DB2fasta [-vU] [-w<int(80)>] <path:db>
```
@@ -180,6 +182,7 @@ By default the output sequences are in lower case and 80 chars per line. The -U
specifies upper case should be used, and the characters per line, or line width, can be
set to any positive value with the -w option.
+<a name="quiva2DB"></a>
```
3. quiva2DB [-vl] <path:db> ( -f<file> | -i | <input:quiva> ... )
```
@@ -194,6 +197,7 @@ FOO.quiva. This is enforced by the program. With the -l option
set the compression scheme is a bit lossy to get more compression (see the description
of dexqv in the DEXTRACTOR module here).
+<a name="DB2quiva"></a>
```
4. DB2quiva [-vU] <path:db>
```
@@ -208,6 +212,7 @@ By .fastq convention each QV vector is output as a line without new-lines, and b
default the Deletion Tag entry is in lower case letters. The -U option specifies
upper case letters should be used instead.
+<a name="arrow2DB"></a>
```
5. arrow2DB [-v] <path:db> ( -f<file> | -i | <input:arrow> ... )
```
@@ -220,6 +225,7 @@ incrementally but must be added in the
same order as the .fasta files were and have the same root names, e.g. FOO.fasta and
FOO.quiva. This is enforced by the program.
+<a name="DB2arrow"></a>
```
6. DB2arrow [-v] [-w<int(80)>] <path:db>
```
@@ -234,6 +240,7 @@ By default the output sequences are formatted 80 chars per line,
but the characters per line, or line width, can be
set to any positive value with the -w option.
+<a name="fasta2DAM"></a>
```
7. fasta2DAM [-v] <path:dam> ( -f<file> | -i[<name>] | <input:fasta> ... )
```
@@ -248,6 +255,7 @@ entry that has a run of N's in it will be split into separate "contig" entries a
interval of the contig in the original entry recorded. The header for each .fasta entry
is saved with the contigs created from it.
+<a name="DAM2fasta"></a>
```
8. DAM2fasta [-vU] [-w<int(80)>] <path:dam>
```
@@ -262,6 +270,7 @@ sequences are in lower case and 80 chars per line. The -U option specifies upper
should be used, and the characters per line, or line width, can be set to any positive
value with the -w option.
+<a name="DBsplit"></a>
```
9. DBsplit [-aflm] [-x<int>] [-s<double(200.)>] <path:db|dam>
```
@@ -288,6 +297,7 @@ primary read instead. One can at any later time change this back to the default
by splitting again with the -l parameter set. The setting of the primary reads occurs
regardless of whether the -a parameter is set or not.
+<a name="DBtrim"></a>
```
10. DBtrim [-af] [-x<int>] <path:db|dam>
```
@@ -295,6 +305,7 @@ regardless of whether the -a parameter is set or not.
Exactly like DBsplit except that it only resets the trimming parameters (and not the split
partition itself).
+<a name="DBdust"></a>
```
11. DBdust [-b] [-w<int(64)>] [-t<double(2.)>] [-m<int(10)>] <path:db|dam>
```
@@ -316,6 +327,7 @@ and .FOO.3.dust.data, given FOO.3 on the command line. We call this a *block tr
This permits job parallelism in block-sized chunks, and the resulting sequence of
block tracks can then be merged into a track for the entire untrimmed DB with Catrack.
+<a name="Catrack"></a>
```
12. Catrack [-vfd] <path:db|dam> <track:name> ...
```
@@ -329,6 +341,7 @@ concatenation takes place regardless of whether or not the single, combined trac
already exists or not. If the -d option is set then every block track is removed after
the successful construction of the combined track.
+<a name="DBshow"></a>
```
13. DBshow [-unqaUQA] [-w<int(80)>] [-m<mask>]+
<path:db|dam> [ <reads:FILE> | <reads:range> ... ]
@@ -368,6 +381,7 @@ The .fasta, .quiva, and .arrow files that are output can be used to build a new
fasta2DB, quiva2D, and arrow2DB, giving one a simple way to make a DB of a subset of
the reads for testing purposes.
+<a name="DB2ONE"></a>
```
14. DB2ONE [-u] [-aqhwf] [-m<mask>]+
<path:db|dam> [ <reads:FILE> | <reads:range> ... ]
@@ -416,9 +430,9 @@ outputs five 1-code lines containing the Quiver odds vectors as indicated in the
```
If the -h flag is set, then DB2ONE outputs an H line giving
the fasta header line that was associated with each read on input.
-If the -w flag is set, then DB2ONE outputs a W line giving the well number and pulse start and end. Furthermore, if the database is an A-DB then an N line containing the SNR for each channel for
-that well is output, and if the database is a Q-DB then a Q line is output giving an estimate of the
-error rate of the read based on the Quiver vectors.
+If the -w flag is set, then for a .db database DB2ONE outputs a W line giving the well number and pulse start and end. Furthermore, if the database is an A-DB then an N line containing the SNR for each channel for that well is output, and if the database is a Q-DB then a Q line is output giving an estimate of the error rate of the read based on the Quiver vectors.
+On the other hand if the database is a .dam, then DB2ONE outputs a G line in response to the
+-w flag, which gives the contig # within a .fasta scaffold and the first and last base positions in the fasta file entry from which the contig was extracted.
Finally, if the -f flag is output, then the output is grouped by original source files where
each group begins with an f line giving the name of the file and the number of reads from that
file.
@@ -429,7 +443,9 @@ file.
W <well: int> <pulse start: int> <pulse end: int>
N <SNR A-channel> <SNR C-channel> <SNR G-channel> <SNR T-channel>
Q <read quality value: int>
-
+
+ G <contig int> <first bp: int> <last bp: int>
+
f <count: int> <file name: string>
```
Lastly, for each -m option specifying a *mask* track name, a T-line is output that first indicates which mask it is for and then contains an integer list of interval begin-end pairs.
@@ -441,6 +457,7 @@ the mapping between the track index and its name as it appeared in the -m option
X <track idx: int> <name: string> // Once in prolog for each track
```
+<a name="DBstats"></a>
```
15. DBstats [-nu] [-b<int(1000)] [-m<mask>]+ <path:db|dam>
```
@@ -453,6 +470,7 @@ is not displayed. Any track such as a "dust" track that gives a series of
intervals along the read can be specified with the -m option in which case a summary
and a histogram of the interval lengths is displayed.
+<a name="DBrm"></a>
```
16. DBrm [-vnf] <path:db|dam> ...
```
@@ -463,6 +481,7 @@ files, and all of these are removed by DBrm.
If the -v option is set then every file deleted is listed.
The -n, and -f options are as for the UNIX "rm" command.
+<a name="DBmv"></a>
```
17. DBmv [-vinf] <old:db|dam> <new:db|dam|dir>
```
@@ -472,6 +491,7 @@ to the diretory, otherwise, all the files for \<old> are renamed to the given ta
If the -v option is set then every file move is displayed.
The -i, -n, and -f options are as for the UNIX "mv" command.
+<a name="DBcp"></a>
```
18. DBcp [-vinf] <old:db|dam> <new:db|dam|dir>
```
@@ -481,6 +501,7 @@ to the diretory, otherwise, a copy of all the files for \<old> are created with
If the -v option is set then every file move is displayed.
The -i, -n, and -f options are as for the UNIX "cp" command.
+<a name="DBwipe"></a>
```
19. DBwipe <path:db|dam>
```
@@ -489,6 +510,7 @@ Delete any Arrow or Quiver data from the given databases. This removes the .arw
.qvs file and resets information in the .idx file containing information for Arrow
or Quiver. Basically, converts an A-DB or Q-DB back to a simple S-DB.
+<a name="simulator"></a>
```
20. simulator <genome:dam> [-CU] [-m<int(10000)>] [-s<int(2000)>] [-e<double(.15)]
[-c<double(50.)>] [-f<double(.5)>] [-x<int(4000)>]
@@ -524,6 +546,7 @@ an assembly and is very useful for debugging and testing purposes. If the map li
a read is say 's b e' then if b \< e the read is a perturbed copy of s[b,e] in the
forward direction, and a perturbed copy s[e,b] in the reverse direction otherwise.
+<a name="rangen"></a>
```
21. rangen <genlen:double> [-U] [-b<double(.5)>] [-w<int(80)>] [-r<int>]
```
=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+dazzdb (1.0+git20230919.414ad05-1) unstable; urgency=medium
+
+ * New upstream version
+
+ -- Andreas Tille <tille at debian.org> Mon, 27 Nov 2023 17:14:06 +0100
+
dazzdb (1.0+git20221215.aad3a46-1) unstable; urgency=medium
* New upstream version
=====================================
fasta2DAM.c
=====================================
@@ -347,7 +347,7 @@ int main(int argc, char *argv[])
while (PIPE != NULL || next_file(ng))
{ FILE *input;
- char *path, *core;
+ char *path, *core, *fname;
int nline, eof, rlen;
// Open it: <path>/<core>.fasta if file, stdin otherwise with core = PIPE or "stdout"
@@ -358,8 +358,15 @@ int main(int argc, char *argv[])
path = PathTo(ng->name);
core = Root(ng->name,".fasta");
- if ((input = Fopen(Catenate(path,"/",core,".fasta"),"r")) == NULL)
- goto error;
+ fname = Strdup(Catenate(core,".fasta",NULL,NULL),"Allocating file name");
+ if ((input = fopen(Catenate(path,"/",core,".fasta"),"r")) == NULL)
+ { free(fname);
+ free(core);
+ core = Root(ng->name,".fa");
+ fname = Strdup(Catenate(core,".fa",NULL,NULL),"Allocating file name");
+ if ((input = fopen(Catenate(path,"/",core,".fa"),"r")) == NULL)
+ goto error;
+ }
free(path);
}
@@ -390,8 +397,8 @@ int main(int argc, char *argv[])
(ofiles == 0 || strcmp(core,flist[ofiles-1]) != 0)))
{ for (j = 0; j < ofiles; j++)
if (strcmp(core,flist[j]) == 0)
- { fprintf(stderr,"%s: File %s.fasta is already in database %s.dam\n",
- Prog_Name,core,Root(argv[1],".dam"));
+ { fprintf(stderr,"%s: File %s is already in database %s.dam\n",
+ Prog_Name,fname,Root(argv[1],".dam"));
goto error;
}
}
@@ -423,7 +430,7 @@ int main(int argc, char *argv[])
{ if (PIPE != NULL && PIPE[0] == '\0')
fprintf(stderr,"Adding scaffolds from stdio ...\n");
else
- fprintf(stderr,"Adding '%s.fasta' ...\n",core);
+ fprintf(stderr,"Adding '%s' ...\n",fname);
fflush(stderr);
}
@@ -433,12 +440,12 @@ int main(int argc, char *argv[])
// Check that the first line is a header line
if (read[strlen(read)-1] != '\n')
- { fprintf(stderr,"File %s.fasta, Line 1: Fasta line is too long (> %d chars)\n",
- core,MAX_NAME-2);
+ { fprintf(stderr,"File %s, Line 1: Fasta line is too long (> %d chars)\n",
+ fname,MAX_NAME-2);
goto error;
}
if (!eof && read[0] != '>')
- { fprintf(stderr,"File %s.fasta, Line 1: First header in fasta file is missing\n",core);
+ { fprintf(stderr,"File %s, Line 1: First header in fasta file is missing\n",fname);
goto error;
}
@@ -459,7 +466,7 @@ int main(int argc, char *argv[])
nline += 1;
x = strlen(read+rlen)-1;
if (read[rlen+x] != '\n')
- { fprintf(stderr,"File %s.fasta, Line %d:",core,nline);
+ { fprintf(stderr,"File %s, Line %d:",fname,nline);
fprintf(stderr," Fasta line is too long (> %d chars)\n",MAX_NAME-2);
goto error;
}
@@ -470,7 +477,7 @@ int main(int argc, char *argv[])
{ rmax = ((int64) (1.2 * rmax)) + 1000 + MAX_NAME;
read = (char *) realloc(read,rmax+1);
if (read == NULL)
- { fprintf(stderr,"File %s.fasta, Line %d:",core,nline);
+ { fprintf(stderr,"File %s, Line %d:",fname,nline);
fprintf(stderr," Out of memory (Allocating line buffer)\n");
goto error;
}
=====================================
fasta2DB.c
=====================================
@@ -360,7 +360,7 @@ int main(int argc, char *argv[])
while (PIPE != NULL || next_file(ng))
{ FILE *input;
char prolog[MAX_NAME];
- char *path, *core;
+ char *path, *core, *fname;
int eof;
// Open it: <path>/<core>.fasta if file, stdin otherwise with core = PIPE or "stdout"
@@ -371,8 +371,15 @@ int main(int argc, char *argv[])
path = PathTo(ng->name);
core = Root(ng->name,".fasta");
- if ((input = Fopen(Catenate(path,"/",core,".fasta"),"r")) == NULL)
- goto error;
+ fname = Strdup(Catenate(core,".fasta",NULL,NULL),"Allocating file name");
+ if ((input = fopen(Catenate(path,"/",core,".fasta"),"r")) == NULL)
+ { free(fname);
+ free(core);
+ core = Root(ng->name,".fa");
+ fname = Strdup(Catenate(core,".fa",NULL,NULL),"Allocating file name");
+ if ((input = fopen(Catenate(path,"/",core,".fa"),"r")) == NULL)
+ goto error;
+ }
free(path);
}
@@ -416,8 +423,8 @@ int main(int argc, char *argv[])
(ofiles == 0 || strcmp(core,flist[ofiles-1]) != 0)))
for (j = 0; j < ofiles; j++)
if (strcmp(core,flist[j]) == 0)
- { fprintf(stderr,"%s: File %s.fasta is already in database %s.db\n",
- Prog_Name,core,Root(argv[1],".db"));
+ { fprintf(stderr,"%s: File %s is already in database %s.db\n",
+ Prog_Name,fname,Root(argv[1],".db"));
goto error;
}
}
@@ -428,7 +435,7 @@ int main(int argc, char *argv[])
{ if (PIPE != NULL && PIPE[0] == '\0')
fprintf(stderr,"Adding reads from stdio ...\n");
else
- fprintf(stderr,"Adding '%s.fasta' ...\n",core);
+ fprintf(stderr,"Adding '%s' ...\n",fname);
fflush(stderr);
}
flist[ofiles++] = core;
@@ -436,12 +443,12 @@ int main(int argc, char *argv[])
// Check that the first line is a header and has PACBIO format.
if (read[strlen(read)-1] != '\n')
- { fprintf(stderr,"File %s.fasta, Line 1: Fasta line is too long (> %d chars)\n",
- core,MAX_NAME-2);
+ { fprintf(stderr,"File %s, Line 1: Fasta line is too long (> %d chars)\n",
+ fname,MAX_NAME-2);
goto error;
}
if (!eof && read[0] != '>')
- { fprintf(stderr,"File %s.fasta, Line 1: First header in fasta file is missing\n",core);
+ { fprintf(stderr,"File %s, Line 1: First header in fasta file is missing\n",fname);
goto error;
}
@@ -460,7 +467,7 @@ int main(int argc, char *argv[])
*find = '/';
}
else
- { fprintf(stderr,"File %s.fasta, Line 1: Pacbio header line format error\n",core);
+ { fprintf(stderr,"File %s, Line 1: Pacbio header line format error\n",fname);
goto error;
}
}
@@ -481,8 +488,8 @@ int main(int argc, char *argv[])
find = index(read+(rlen+1),'/');
if (find == NULL)
- { fprintf(stderr,"File %s.fasta, Line %d: Pacbio header line format error\n",
- core,nline);
+ { fprintf(stderr,"File %s, Line %d: Pacbio header line format error\n",
+ fname,nline);
goto error;
}
*find = '\0';
@@ -497,8 +504,8 @@ int main(int argc, char *argv[])
{ char *secn = index(find+1,'/');
x = sscanf(find+1,"%d/ccs\n",&well);
if (secn == NULL || strncmp(secn+1,"ccs",3) != 0 || x < 1)
- { fprintf(stderr,"File %s.fasta, Line %d: Pacbio header line format error\n",
- core,nline);
+ { fprintf(stderr,"File %s, Line %d: Pacbio header line format error\n",
+ fname,nline);
goto error;
}
beg = 0;
@@ -514,7 +521,7 @@ int main(int argc, char *argv[])
x = strlen(read+rlen)-1;
if (read[rlen+x] != '\n')
{ if (read[rlen] == '>')
- { fprintf(stderr,"File %s.fasta, Line %d:",core,nline);
+ { fprintf(stderr,"File %s, Line %d:",fname,nline);
fprintf(stderr," Fasta header line is too long (> %d chars)\n",
MAX_NAME-2);
goto error;
@@ -529,7 +536,7 @@ int main(int argc, char *argv[])
{ rmax = ((int64) (1.2 * rmax)) + 1000 + MAX_NAME;
read = (char *) realloc(read,rmax+1);
if (read == NULL)
- { fprintf(stderr,"File %s.fasta, Line %d:",core,nline);
+ { fprintf(stderr,"File %s, Line %d:",fname,nline);
fprintf(stderr," Out of memory (Allocating line buffer)\n");
goto error;
}
@@ -566,7 +573,7 @@ int main(int argc, char *argv[])
{ pmax = ((int) (pcnt*1.2)) + 100;
prec = (DAZZ_READ *) realloc(prec,sizeof(DAZZ_READ)*pmax);
if (prec == NULL)
- { fprintf(stderr,"File %s.fasta, Line %d: Out of memory",core,nline);
+ { fprintf(stderr,"File %s, Line %d: Out of memory",fname,nline);
fprintf(stderr," (Allocating read records)\n");
goto error;
}
View it on GitLab: https://salsa.debian.org/med-team/dazzdb/-/compare/4a9af83eb8f2576a0e766e5bdcfe0cce18fe89de...707a2f9ad090ff70a4a9dfa323617c3384503bfd
--
View it on GitLab: https://salsa.debian.org/med-team/dazzdb/-/compare/4a9af83eb8f2576a0e766e5bdcfe0cce18fe89de...707a2f9ad090ff70a4a9dfa323617c3384503bfd
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20231127/ce02f167/attachment-0001.htm>
More information about the debian-med-commit
mailing list