[med-svn] [Git][med-team/dazzdb][master] 9 commits: Use git mode; New Git commit
Andreas Tille
gitlab at salsa.debian.org
Sun Oct 28 09:31:23 GMT 2018
Andreas Tille pushed to branch master at Debian Med / dazzdb
Commits:
e48d90c0 by Andreas Tille at 2018-10-28T09:25:32Z
Use git mode; New Git commit
- - - - -
d9350062 by Andreas Tille at 2018-10-28T09:26:00Z
New upstream version 1.0+git20180908.0bd5e07
- - - - -
30017004 by Andreas Tille at 2018-10-28T09:26:00Z
Update upstream source from tag 'upstream/1.0+git20180908.0bd5e07'
Update to upstream version '1.0+git20180908.0bd5e07'
with Debian dir 4fd5af46efb1079d943f4f0ecf46ecdda054e5cf
- - - - -
df468830 by Andreas Tille at 2018-10-28T09:26:21Z
debhelper 11
- - - - -
5a163c53 by Andreas Tille at 2018-10-28T09:26:23Z
Point Vcs fields to salsa.debian.org
- - - - -
33cde67b by Andreas Tille at 2018-10-28T09:26:23Z
Standards-Version: 4.2.1
- - - - -
5f47e8d7 by Andreas Tille at 2018-10-28T09:26:23Z
Remove trailing whitespace in debian/copyright
- - - - -
b8bb7977 by Andreas Tille at 2018-10-28T09:29:07Z
hardening=+all
- - - - -
f8e4f5bd by Andreas Tille at 2018-10-28T09:30:43Z
Upload to unstable
- - - - -
19 changed files:
- Catrack.c
- DB.c
- DB.h
- DBdump.c
- DBdust.c
- DBshow.c
- DBsplit.c
- DBtrim.c
- README.md
- debian/changelog
- debian/compat
- debian/control
- debian/copyright
- debian/rules
- debian/watch
- fasta2DAM.c
- fasta2DB.c
- rangen.c
- simulator.c
Changes:
=====================================
Catrack.c
=====================================
@@ -131,6 +131,7 @@ int main(int argc, char *argv[])
int nextra;
int64 extail;
+ extra = NULL;
anno = NULL;
trackoff = 0;
tracktot = tracksiz = 0;
@@ -144,7 +145,7 @@ int main(int argc, char *argv[])
while (1)
{ FILE *dfile, *afile;
char *dfile_name, *afile_name;
- int i, size, esize, tracklen;
+ int i, apos, size, esize, tracklen;
afile_name = Strdup(Numbered_Suffix(prefix,nfiles+1,Catenate(".",argv[2],".","anno")),
"Allocating .anno file name");
@@ -256,10 +257,11 @@ int main(int argc, char *argv[])
}
FSEEKO(afile,0,SEEK_END)
+ FTELLO(apos,afile)
if (dfile != NULL)
- extail = FTELLO(afile) - (esize*(tracklen+1) + 2*sizeof(int));
+ extail = apos - (esize*(tracklen+1) + 2*sizeof(int));
else
- extail = FTELLO(afile) - (esize*tracklen + 2*sizeof(int));
+ extail = apos - (esize*tracklen + 2*sizeof(int));
FSEEKO(afile,-extail,SEEK_END)
if (extail >= 20)
@@ -338,16 +340,16 @@ int main(int argc, char *argv[])
FWRITE(&tracksiz,sizeof(int),1,aout)
}
}
-
- FCLOSE(aout);
- if (dout != NULL)
- FCLOSE(dout);
if (nfiles != nblocks)
{ fprintf(stderr,"%s: Did not catenate all tracks of DB (nfiles %d != nblocks %d)\n",
Prog_Name, nfiles, nblocks);
goto error;
}
+
+ FCLOSE(aout);
+ if (dout != NULL)
+ FCLOSE(dout);
if (DELETE)
{ int i;
=====================================
DB.c
=====================================
@@ -17,6 +17,8 @@
#include <ctype.h>
#include <unistd.h>
#include <dirent.h>
+#include <limits.h>
+#include <sys/stat.h>
#include "DB.h"
@@ -599,8 +601,8 @@ error:
// for the retained reads.
void Trim_DB(DAZZ_DB *db)
-{ int i, j, r;
- int allflag, cutoff;
+{ int i, j, r, f;
+ int allflag, cutoff, css;
int64 totlen;
int maxlen, nreads;
DAZZ_TRACK *record;
@@ -676,14 +678,23 @@ void Trim_DB(DAZZ_DB *db)
record->anno = Realloc(record->anno,record->size*(j+1),NULL);
}
+ css = 0;
totlen = maxlen = 0;
for (j = i = 0; i < nreads; i++)
- { r = reads[i].rlen;
- if ((reads[i].flags & DB_BEST) >= allflag && r >= cutoff)
+ { f = reads[i].flags;
+ if ((f & DB_CSS) == 0)
+ css = 0;
+ r = reads[i].rlen;
+ if ((f & DB_BEST) >= allflag && r >= cutoff)
{ totlen += r;
if (r > maxlen)
maxlen = r;
- reads[j++] = reads[i];
+ reads[j] = reads[i];
+ if (css)
+ reads[j++].flags |= DB_CSS;
+ else
+ reads[j++].flags &= ~DB_CSS;
+ css = 1;
}
}
@@ -1989,3 +2000,196 @@ void Print_Read(char *s, int width)
printf("\n");
}
}
+
+
+/*******************************************************************************************
+ *
+ * COMMAND LINE BLOCK PARSER
+ * Take a command line argument and interpret the '@' block number ranges.
+ * Parse_Block_Arg produces an Block_Looper iterator object that can then
+ * be invoked multiple times to iterate through all the files implied by
+ * the @ pattern/range.
+ *
+ ********************************************************************************************/
+
+typedef struct
+ { int first, last, next;
+ char *root, *pwd, *ppnt;
+ char *slice;
+ } _Block_Looper;
+
+ // Advance the iterator e_parse to the next file, open it, and return the file pointer
+ // to it. Return NULL if at the end of the list of files.
+
+int Next_Block_Exists(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+ char *disp;
+ struct stat sts;
+
+ if (parse->next+1 > parse->last)
+ return (0);
+
+ if (parse->next < 0)
+ disp = parse->root;
+ else
+ disp = Numbered_Suffix(parse->root,parse->next+1,parse->ppnt);
+
+ if (stat(Catenate(parse->pwd,"/",disp,".las"),&sts))
+ return (0);
+ else
+ return (1);
+}
+
+
+FILE *Next_Block_Arg(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+ char *disp;
+ FILE *input;
+
+ parse->next += 1;
+ if (parse->next > parse->last)
+ return (NULL);
+
+ if (parse->next < 0)
+ disp = parse->root;
+ else
+ disp = Numbered_Suffix(parse->root,parse->next,parse->ppnt);
+
+ if ((input = fopen(Catenate(parse->pwd,"/",disp,".las"),"r")) == NULL)
+ { if (parse->last != INT_MAX)
+ { fprintf(stderr,"%s: %s.las is not present\n",Prog_Name,disp);
+ exit (1);
+ }
+ return (NULL);
+ }
+ return (input);
+}
+
+ // Reset the iterator e_parse to the first file
+
+void Reset_Block_Arg(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+ parse->next = parse->first - 1;
+}
+
+ // Return a pointer to the path for the current file
+
+char *Block_Arg_Path(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+ return (parse->pwd);
+}
+
+ // Return a pointer to the root name for the current file
+
+char *Block_Arg_Root(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+ if (parse->next < 0)
+ return (parse->root);
+ else
+ return (Numbered_Suffix(parse->root,parse->next,parse->ppnt));
+}
+
+ // Free the iterator
+
+void Free_Block_Arg(Block_Looper *e_parse)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+ free(parse->root);
+ free(parse->pwd);
+ free(parse->slice);
+ free(parse);
+}
+
+char *Next_Block_Slice(Block_Looper *e_parse, int slice)
+{ _Block_Looper *parse = (_Block_Looper *) e_parse;
+
+ if (parse->slice == NULL)
+ { int size = strlen(parse->pwd) + strlen(Block_Arg_Root(parse)) + 30;
+ parse->slice = (char *) Malloc(size,"Block argument slice");
+ if (parse->slice == NULL)
+ exit (1);
+ }
+
+ if (parse->first < 0)
+ sprintf(parse->slice,"%s/%s",parse->pwd,parse->root);
+ else
+ sprintf(parse->slice,"%s/%s%c%d-%d%s",parse->pwd,parse->root,BLOCK_SYMBOL,parse->next+1,
+ parse->next+slice,parse->ppnt);
+ parse->next += slice;
+ return (parse->slice);
+}
+
+ // Parse the command line argument and return an iterator to move through the
+ // file names, setting it up to report the first file.
+
+Block_Looper *Parse_Block_Arg(char *arg)
+{ _Block_Looper *parse;
+ char *pwd, *root;
+ char *ppnt, *cpnt;
+ int first, last;
+
+ parse = (_Block_Looper *) Malloc(sizeof(_Block_Looper),"Allocating parse node");
+ pwd = PathTo(arg);
+ root = Root(arg,".las");
+ if (parse == NULL || pwd == NULL || root == NULL)
+ exit (1);
+
+ ppnt = index(root,BLOCK_SYMBOL);
+ if (ppnt == NULL)
+ first = last = -1;
+ else
+ { if (index(ppnt+1,BLOCK_SYMBOL) != NULL)
+ { fprintf(stderr,"%s: Two or more occurences of %c-sign in source name '%s'\n",
+ Prog_Name,BLOCK_SYMBOL,root);
+ exit (1);
+ }
+ *ppnt++ = '\0';
+ first = strtol(ppnt,&cpnt,10);
+ if (cpnt == ppnt)
+ { first = 1;
+ last = INT_MAX;
+ }
+ else
+ { if (first < 0)
+ { fprintf(stderr,
+ "%s: Integer following %c-sigan is less than 0 in source name '%s'\n",
+ Prog_Name,BLOCK_SYMBOL,root);
+ exit (1);
+ }
+ if (*cpnt == '-')
+ { ppnt = cpnt+1;
+ last = strtol(ppnt,&cpnt,10);
+ if (cpnt == ppnt)
+ { fprintf(stderr,"%s: Second integer must follow - in source name '%s'\n",
+ Prog_Name,root);
+ exit (1);
+ }
+ if (last < first)
+ { fprintf(stderr,
+ "%s: 2nd integer is less than 1st integer in source name '%s'\n",
+ Prog_Name,root);
+ exit (1);
+ }
+ ppnt = cpnt;
+ }
+ else
+ { last = INT_MAX;
+ ppnt = cpnt;
+ }
+ }
+ }
+
+ parse->pwd = pwd;
+ parse->root = root;
+ parse->ppnt = ppnt;
+ parse->first = first;
+ parse->last = last;
+ parse->next = first-1;
+ parse->slice = NULL;
+ return ((Block_Looper *) parse);
+}
=====================================
DB.h
=====================================
@@ -59,6 +59,8 @@ typedef signed long long int64;
typedef float float32;
typedef double float64;
+#define LAST_READ_SYMBOL '$'
+#define BLOCK_SYMBOL '@'
/*******************************************************************************************
*
@@ -215,12 +217,11 @@ int Count_Args(char *arg);
SYSTEM_READ_ERROR \
}
-#define FTELLO(file) \
- ( { int x = ftello(file); \
- if (x < 0) \
- SYSTEM_READ_ERROR \
- ; x; \
- } )
+#define FTELLO(val,file) \
+ { val = ftello(file); \
+ if (val < 0) \
+ SYSTEM_READ_ERROR \
+ }
/*******************************************************************************************
*
@@ -567,4 +568,25 @@ int Read_All_Sequences(DAZZ_DB *db, int ascii);
int List_DB_Files(char *path, void actor(char *path, char *extension));
+ // Take a command line argument and interpret the '@' block number ranges.
+ // Parse_Block_Arg produces a Block_Looper iterator object that can then
+ // be invoked multiple times to iterate through all the files implied by
+ // the @ pattern/range. Next_Block_Slice returns a string encoing the next
+ // slice files represented by an @-notation, and advances the iterator by
+ // that many files.
+
+typedef void Block_Looper;
+
+Block_Looper *Parse_Block_Arg(char *arg);
+
+FILE *Next_Block_Arg(Block_Looper *e_parse);
+int Next_Block_Exists(Block_Looper *e_parse);
+
+char *Next_Block_Slice(Block_Looper *e_parse,int slice);
+
+void Reset_Block_Arg(Block_Looper *e_parse); // Reset iterator to first file
+char *Block_Arg_Path(Block_Looper *e_parse); // Path of current file
+char *Block_Arg_Root(Block_Looper *e_parse); // Root name of current file
+void Free_Block_Arg(Block_Looper *e_parse); // Free the iterator
+
#endif // _DAZZ_DB
=====================================
DBdump.c
=====================================
@@ -94,6 +94,7 @@ static int prof_map[41] =
int main(int argc, char *argv[])
{ DAZZ_DB _db, *db = &_db;
int Quiva_DB, Arrow_DB;
+ int FirstRead;
FILE *hdrs = NULL;
char *hdrs_name = NULL;
int64 *qv_idx = NULL;
@@ -377,7 +378,10 @@ int main(int argc, char *argv[])
else if (status == 1)
MTRACK[i] = Load_Track(db,MASK[i]);
}
+ FirstRead = db->tfirst;
}
+ else
+ FirstRead = db->ufirst;
if (DOIQV)
{ int status, kind;
@@ -713,7 +717,7 @@ int main(int argc, char *argv[])
r = reads + i;
len = r->rlen;
if (DORED)
- printf("R %d\n",i+1);
+ printf("R %d\n",FirstRead + (i+1));
flags = r->flags;
qv = (flags & DB_QV);
=====================================
DBdust.c
=====================================
@@ -155,7 +155,7 @@ int main(int argc, char *argv[])
FWRITE(&size,sizeof(int),1,afile)
FSEEKO(afile,0,SEEK_END)
FSEEKO(dfile,0,SEEK_END)
- indx = FTELLO(dfile);
+ FTELLO(indx,dfile)
}
free(pwd);
=====================================
DBshow.c
=====================================
@@ -175,8 +175,8 @@ int main(int argc, char *argv[])
fprintf(stderr," -n: Do not show the default read DNA sequences.\n");
fprintf(stderr," -m: Show mask intervals and highlight in sequence.\n");
fprintf(stderr,"\n");
- fprintf(stderr," -Q: Produce a .quiva file (ignore all other options but -uU.\n");
- fprintf(stderr," -A: Produce a .arrow file (ignore all other options but -uw.\n");
+ fprintf(stderr," -Q: Produce a .quiva file (ignore all other options but -uU).\n");
+ fprintf(stderr," -A: Produce a .arrow file (ignore all other options but -uw).\n");
fprintf(stderr,"\n");
fprintf(stderr," -U: Use upper case for DNA (default is lower case).\n");
fprintf(stderr," -w: Print -w bp per line (default is 80).\n");
@@ -349,14 +349,13 @@ int main(int argc, char *argv[])
if (argc == 3)
{ if (argv[2][0] != LAST_READ_SYMBOL || argv[2][1] != '\0')
{ char *eptr, *fptr;
- int b, e;
- b = strtol(argv[2],&eptr,10);
- if (eptr > argv[2] && b > 0)
+ strtol(argv[2],&eptr,10);
+ if (eptr > argv[2])
{ if (*eptr == '-')
{ if (eptr[1] != LAST_READ_SYMBOL || eptr[2] != '\0')
- { e = strtol(eptr+1,&fptr,10);
- input_pts = (fptr <= eptr+1 || *fptr != '\0' || e <= 0);
+ { strtol(eptr+1,&fptr,10);
+ input_pts = (fptr <= eptr+1 || *fptr != '\0');
}
}
else
=====================================
DBsplit.c
=====================================
@@ -32,7 +32,15 @@
#define PATHSEP "/"
#endif
-static char *Usage = "[-af] [-x<int>] [-s<double(200.)>] <path:db|dam>";
+static char *Usage = "[-aflm] [-x<int>] [-s<double(200.)>] <path:db|dam>";
+
+static DAZZ_READ *reads;
+
+static int MSORT(const void *l, const void *r)
+{ int x = *((int *) l);
+ int y = *((int *) r);
+ return (reads[x].rlen - reads[y].rlen);
+}
int main(int argc, char *argv[])
{ DAZZ_DB db, dbs;
@@ -40,11 +48,16 @@ int main(int argc, char *argv[])
FILE *dbfile, *ixfile;
char *dbfile_name, *ixfile_name;
int status;
+ int nreads;
+
+ int medmax = 0;
+ int *msort = NULL;
int FORCE;
int ALL;
int CUTOFF;
int64 SIZE;
+ int SELECT;
{ int i, j, k;
int flags[128];
@@ -53,6 +66,7 @@ int main(int argc, char *argv[])
ARG_INIT("DBsplit")
+ SELECT = -1;
CUTOFF = 0;
size = 200;
@@ -61,7 +75,7 @@ int main(int argc, char *argv[])
if (argv[i][0] == '-')
switch (argv[i][1])
{ default:
- ARG_FLAGS("af")
+ ARG_FLAGS("aflm")
break;
case 'x':
ARG_NON_NEGATIVE(CUTOFF,"Min read length cutoff")
@@ -82,6 +96,16 @@ int main(int argc, char *argv[])
ALL = flags['a'];
FORCE = flags['f'];
+ if (flags['l'])
+ { SELECT = 0;
+ if (flags['m'])
+ { fprintf(stderr,"%s: Cannot set both -l and -m, mutually exclusive\n",Prog_Name);
+ exit (1);
+ }
+ }
+ else if (flags['m'])
+ SELECT = 1;
+
if (argc != 2)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
fprintf(stderr,"\n");
@@ -89,6 +113,9 @@ int main(int argc, char *argv[])
fprintf(stderr," -x: Trimmed DB has reads >= this threshold.\n");
fprintf(stderr," -a: Trimmed DB contains all reads from a well (not just longest).\n");
fprintf(stderr," -f: Force the split to occur even if already split.\n");
+ fprintf(stderr,"\n");
+ fprintf(stderr," -l: Set primary read for a well to be the longest.\n");
+ fprintf(stderr," -m: Set primary read for a well to be the median.\n");
exit (1);
}
}
@@ -102,6 +129,8 @@ int main(int argc, char *argv[])
{ fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
exit (1);
}
+ reads = db.reads;
+ nreads = db.ureads;
{ char *pwd, *root;
char buffer[2*MAX_NAME+100];
@@ -146,18 +175,59 @@ int main(int argc, char *argv[])
}
}
- dbpos = FTELLO(dbfile);
+ FTELLO(dbpos,dbfile)
FSEEKO(dbfile,dbpos,SEEK_SET)
FPRINTF(dbfile,DB_NBLOCK,0)
FPRINTF(dbfile,DB_PARAMS,SIZE,CUTOFF,ALL)
}
- { DAZZ_READ *reads = db.reads;
- int nreads = db.ureads;
- int64 totlen;
+ if (SELECT >= 0)
+ { int i, j, k, h;
+ int off;
+
+ off = (~ DB_BEST);
+
+ for (i = 0; i < nreads; i = j)
+ { j = i+1;
+ reads[i].flags &= off;
+ while ((reads[j].flags & DB_CSS) != 0)
+ reads[j++].flags &= off;
+
+ if (j-i <= 1)
+ { reads[i].flags |= DB_BEST;
+ continue;
+ }
+
+ if (SELECT == 0)
+ { int mlen;
+
+ mlen = reads[h = i].rlen;
+ for (k = i+1; k < j; k++)
+ if (mlen < reads[k].rlen)
+ mlen = reads[h = k].rlen;
+ }
+ else
+ { if (j-i > medmax)
+ { medmax = 1.2*(j-i) + 1000;
+ msort = (int *) Realloc(msort,sizeof(int)*medmax,"Allocating Median Array");
+ if (msort == NULL)
+ exit (1);
+ }
+ for (k = i; k < j; k++)
+ msort[k-i] = k;
+ qsort(msort,j-i,sizeof(int),MSORT);
+ h = msort[(j-i)/2];
+ }
+ reads[h].flags |= DB_BEST;
+ }
+ }
+
+
+ { int64 totlen;
int nblock, ireads, treads, rlen, fno;
- int i;
+ int i, upos, css;
+ css = 0;
nblock = 0;
totlen = 0;
ireads = 0;
@@ -166,16 +236,19 @@ int main(int argc, char *argv[])
if (ALL)
for (i = 0; i < nreads; i++)
{ rlen = reads[i].rlen;
+ if ((reads[i].flags & DB_CSS) == 0)
+ css = 0;
if (rlen >= CUTOFF)
- { ireads += 1;
- treads += 1;
- totlen += rlen;
- if (totlen >= SIZE)
- { FPRINTF(dbfile,DB_BDATA,i+1,treads)
+ { if (css == 0 && totlen >= SIZE)
+ { FPRINTF(dbfile,DB_BDATA,i,treads)
totlen = 0;
ireads = 0;
nblock += 1;
}
+ ireads += 1;
+ treads += 1;
+ totlen += rlen;
+ css = 1;
}
}
else
@@ -199,7 +272,8 @@ int main(int argc, char *argv[])
nblock += 1;
}
fno = fileno(dbfile);
- if (ftruncate(fno,FTELLO(dbfile)) < 0)
+ FTELLO(upos,dbfile)
+ if (ftruncate(fno,upos) < 0)
SYSTEM_WRITE_ERROR
FSEEKO(dbfile,dbpos,SEEK_SET)
@@ -208,9 +282,13 @@ int main(int argc, char *argv[])
dbs.cutoff = CUTOFF;
if (ALL)
dbs.allarr |= DB_ALL;
+ else
+ dbs.allarr &= ~DB_ALL;
dbs.treads = treads;
FSEEKO(ixfile,0,SEEK_SET)
FWRITE(&dbs,sizeof(DAZZ_DB),1,ixfile)
+ if (SELECT >= 0)
+ FWRITE(reads,sizeof(DAZZ_READ),nreads,ixfile)
}
FCLOSE(ixfile)
=====================================
DBtrim.c
=====================================
@@ -22,7 +22,15 @@
#define PATHSEP "/"
#endif
-static char *Usage = "[-af] [-x<int>] <path:db|dam>";
+static char *Usage = "[-aflm] [-x<int>] <path:db|dam>";
+
+static DAZZ_READ *reads;
+
+static int MSORT(const void *l, const void *r)
+{ int x = *((int *) l);
+ int y = *((int *) r);
+ return (reads[x].rlen - reads[y].rlen);
+}
int main(int argc, char *argv[])
{ DAZZ_DB db, dbs;
@@ -31,10 +39,15 @@ int main(int argc, char *argv[])
char *dbfile_name, *ixfile_name;
int nblocks;
int status;
+ int nreads;
+
+ int medmax = 0;
+ int *msort = NULL;
int FORCE;
int ALL;
int CUTOFF;
+ int SELECT;
{ int i, j, k;
int flags[128];
@@ -42,6 +55,7 @@ int main(int argc, char *argv[])
ARG_INIT("DBtrim")
+ SELECT = -1;
CUTOFF = 0;
j = 1;
@@ -49,7 +63,7 @@ int main(int argc, char *argv[])
if (argv[i][0] == '-')
switch (argv[i][1])
{ default:
- ARG_FLAGS("af")
+ ARG_FLAGS("aflm")
break;
case 'x':
ARG_NON_NEGATIVE(CUTOFF,"Min read length cutoff")
@@ -62,12 +76,25 @@ int main(int argc, char *argv[])
ALL = flags['a'];
FORCE = flags['f'];
+ if (flags['l'])
+ { SELECT = 0;
+ if (flags['m'])
+ { fprintf(stderr,"%s: Cannot set both -l and -m, mutually exclusive\n",Prog_Name);
+ exit (1);
+ }
+ }
+ else if (flags['m'])
+ SELECT = 1;
+
if (argc != 2)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
fprintf(stderr,"\n");
fprintf(stderr," -x: Trimmed DB has reads >= this threshold.\n");
fprintf(stderr," -a: Trimmed DB contains all reads from a well (not just longest).\n");
fprintf(stderr," -f: Force the new trim setting even if already set.\n");
+ fprintf(stderr,"\n");
+ fprintf(stderr," -l: Set primary read for a well to be the longest.\n");
+ fprintf(stderr," -m: Set primary read for a well to be the median.\n");
exit (1);
}
}
@@ -81,6 +108,8 @@ int main(int argc, char *argv[])
{ fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
exit (1);
}
+ reads = db.reads;
+ nreads = db.ureads;
{ char *pwd, *root;
char buffer[2*MAX_NAME+100];
@@ -135,12 +164,53 @@ int main(int argc, char *argv[])
FSCANF(dbfile,DB_NBLOCK,&nblocks)
- dbpos = FTELLO(dbfile);
+ FTELLO(dbpos,dbfile)
FSCANF(dbfile,DB_PARAMS,&size,&cutoff,&all)
FSEEKO(dbfile,dbpos,SEEK_SET)
FPRINTF(dbfile,DB_PARAMS,size,CUTOFF,ALL)
}
+ if (SELECT >= 0)
+ { int i, j, k, h;
+ int off;
+
+ off = (~ DB_BEST);
+
+ for (i = 0; i < nreads; i = j)
+ { j = i+1;
+ reads[i].flags &= off;
+ while ((reads[j].flags & DB_CSS) != 0)
+ reads[j++].flags &= off;
+
+ if (j-i <= 1)
+ { reads[i].flags |= DB_BEST;
+ continue;
+ }
+
+ if (SELECT == 0)
+ { int mlen;
+
+ mlen = reads[h = i].rlen;
+ for (k = i+1; k < j; k++)
+ if (mlen < reads[k].rlen)
+ mlen = reads[h = k].rlen;
+ }
+ else
+ { if (j-i > medmax)
+ { medmax = 1.2*(j-i) + 1000;
+ msort = (int *) Realloc(msort,sizeof(int)*medmax,"Allocating Median Array");
+ if (msort == NULL)
+ exit (1);
+ }
+ for (k = i; k < j; k++)
+ msort[k-i] = k;
+ qsort(msort,j-i,sizeof(int),MSORT);
+ h = msort[(j-i)/2];
+ }
+ reads[h].flags |= DB_BEST;
+ }
+ }
+
{ DAZZ_READ *reads = db.reads;
int uread, tread;
int rlen;
@@ -150,7 +220,7 @@ int main(int argc, char *argv[])
t = 0;
fprintf(dbfile,DB_BDATA,0,0);
for (b = 0; b < nblocks; b++)
- { dbpos = FTELLO(dbfile);
+ { FTELLO(dbpos,dbfile);
FSCANF(dbfile,DB_BDATA,&uread,&tread)
if (ALL)
@@ -174,9 +244,13 @@ int main(int argc, char *argv[])
dbs.cutoff = CUTOFF;
if (ALL)
dbs.allarr |= DB_ALL;
+ else
+ dbs.allarr &= ~DB_ALL;
dbs.treads = t;
FSEEKO(ixfile,0,SEEK_SET)
FWRITE(&dbs,sizeof(DAZZ_DB),1,ixfile)
+ if (SELECT >= 0)
+ FWRITE(reads,sizeof(DAZZ_READ),nreads,ixfile)
}
FCLOSE(ixfile)
=====================================
README.md
=====================================
@@ -116,8 +116,9 @@ refer to a database we are referring to either a DB or a DAM.
The command DBsplit sets or resets the current partition for a database which is
determined by 3 parameters: (i) the total number of basepairs to place in each block,
(ii) the minimum read length of reads to include within a block, and (iii) whether or
-not to only include the longest read from a given well or all reads from a well (NB:
-several reads of the same insert in a given well can be produced by the Pacbio
+not to only include the longest (-l) or median (-m) read from a given well or all
+reads from a well (-a) (NB: several reads of the same insert in a given well can be
+produced by the Pacbio
instrument). Note that the length and uniqueness parameters effectively select a
subset of the reads that contribute to the size of a block. We call this subset the
*trimmed* data base. Some commands operate on the entire database, others on the
@@ -259,7 +260,7 @@ should be used, and the characters per line, or line width, can be set to any po
value with the -w option.
```
-9. DBsplit [-af] [-x<int>] [-s<double(200.)>] <path:db|dam>
+9. DBsplit [-aflm] [-x<int>] [-s<double(200.)>] <path:db|dam>
```
Divide the database \<path\>.db or \<path\>.dam conceptually into a series of blocks
@@ -278,6 +279,12 @@ If the -f option is set, the split is forced regardless of whether or not the DB
question has previously bin split, i.e. one is not interactively asked if they wish
to proceed.
+By default, the primary read for each well consists of the longest subread of the insert
+from the well. By setting the -m parameter, the subread of median length becomes the
+primary read instead. One can at any later time change this back to the default longest
+by splitting again with the -l parameter set. The setting of the primary reads occurs
+regardless of whether the -a parameter is set or not.
+
```
10. DBtrim [-af] [-x<int>] <path:db|dam>
```
=====================================
debian/changelog
=====================================
@@ -1,8 +1,20 @@
-dazzdb (1.0+20180115-2) UNRELEASED; urgency=medium
+dazzdb (1.0+git20180908.0bd5e07-1) unstable; urgency=medium
+ * Team upload.
+
+ [ Jelmer Vernooij ]
* Use secure copyright file specification URI.
- -- Jelmer Vernooij <jelmer at debian.org> Mon, 08 Oct 2018 21:48:05 +0100
+ [ Andreas Tille ]
+ * d/watch: Use git mode
+ * New Git commit
+ * debhelper 11
+ * Point Vcs fields to salsa.debian.org
+ * Standards-Version: 4.2.1
+ * Remove trailing whitespace in debian/copyright
+ * hardening=+all
+
+ -- Andreas Tille <tille at debian.org> Sun, 28 Oct 2018 10:29:24 +0100
dazzdb (1.0+20180115-1) unstable; urgency=medium
=====================================
debian/compat
=====================================
@@ -1 +1 @@
-10
+11
=====================================
debian/control
=====================================
@@ -3,12 +3,11 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.
Uploaders: Afif Elghraoui <afif at debian.org>
Section: science
Priority: optional
-Build-Depends:
- debhelper (>= 10),
- zlib1g-dev,
-Standards-Version: 4.1.3
-Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/dazzdb.git
-Vcs-Git: https://anonscm.debian.org/git/debian-med/dazzdb.git
+Build-Depends: debhelper (>= 11~),
+ zlib1g-dev
+Standards-Version: 4.2.1
+Vcs-Browser: https://salsa.debian.org/med-team/dazzdb
+Vcs-Git: https://salsa.debian.org/med-team/dazzdb.git
Homepage: https://github.com/thegenemyers/DAZZ_DB
Package: dazzdb
=====================================
debian/copyright
=====================================
@@ -25,13 +25,13 @@ License: EWM-BSD-3-Clause
without specific prior written permission.
.
THIS SOFTWARE IS PROVIDED BY EWM
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM
- BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=====================================
debian/rules
=====================================
@@ -4,6 +4,8 @@
include /usr/share/dpkg/default.mk
+export DEB_BUILD_MAINT_OPTIONS=hardening=+all
+
%:
dh $@
=====================================
debian/watch
=====================================
@@ -1,4 +1,7 @@
-version=3
+version=4
-opts="filenamemangle=s/(?:.*)?v?(\d[\d\.]*)\.tar\.gz/dazzdb-$1.tar.gz/" \
- https://github.com/thegenemyers/DAZZ_DB/tags (?:.*/)?[vV]?(\d[\d\.]*)\.tar\.gz
+opts="mode=git,pretty=1.0+git%cd.%h" \
+ https://github.com/thegenemyers/DAZZ_DB.git HEAD
+
+#opts="filenamemangle=s/(?:.*)?v?(\d[\d\.]*)\.tar\.gz/dazzdb-$1.tar.gz/" \
+# https://github.com/thegenemyers/DAZZ_DB/tags (?:.*/)?[vV]?(\d[\d\.]*)\.tar\.gz
=====================================
fasta2DAM.c
=====================================
@@ -407,8 +407,12 @@ int main(int argc, char *argv[])
nline = 1;
eof = (fgets(read,MAX_NAME,input) == NULL);
if (eof || strlen(read) < 1)
- { fprintf(stderr,"Skipping '%s', file is empty!\n",core);
- fclose(input);
+ { fclose(input);
+ if (PIPE != NULL)
+ { fprintf(stderr,"Standard input is empty, terminating!\n");
+ break;
+ }
+ fprintf(stderr,"Skipping '%s', file is empty!\n",core);
free(core);
continue;
}
=====================================
fasta2DB.c
=====================================
@@ -390,16 +390,14 @@ int main(int argc, char *argv[])
eof = (fgets(read,MAX_NAME,input) == NULL);
if (eof || strlen(read) < 1)
- { free(core);
- fclose(input);
+ { fclose(input);
if (PIPE != NULL)
{ fprintf(stderr,"Standard input is empty, terminating!\n");
break;
}
- else
- { fprintf(stderr,"Skipping '%s', file is empty!\n",core);
- continue;
- }
+ fprintf(stderr,"Skipping '%s', file is empty!\n",core);
+ free(core);
+ continue;
}
// Check that core is not too long and name is unique or last source if PIPE'd
=====================================
rangen.c
=====================================
@@ -148,6 +148,11 @@ int main(int argc, char *argv[])
if (argc != 2)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+ fprintf(stderr,"\n");
+ fprintf(stderr," -r: Random number generator seed (default is process id).\n");
+ fprintf(stderr," -b: AT vs GC ratio or bias.\n");
+ fprintf(stderr," -w: Print -w bp per line (default is 80).\n");
+ fprintf(stderr," -U: Output sequence in upper case (default is lower case).\n");
exit (1);
}
=====================================
simulator.c
=====================================
@@ -567,6 +567,19 @@ int main(int argc, char *argv[])
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage[0]);
fprintf(stderr," %*s %s\n",(int) strlen(Prog_Name),"",Usage[1]);
fprintf(stderr," %*s %s\n",(int) strlen(Prog_Name),"",Usage[2]);
+ fprintf(stderr,"\n");
+ fprintf(stderr," -m: average read length (log normal distribution).\n");
+ fprintf(stderr," -s: standard deviation of read lengths (log normal)\n");
+ fprintf(stderr," -x: ignore reads below this length\n");
+ fprintf(stderr," -f: forward/reverse strand sampling fraction\n");
+ fprintf(stderr," -e: error rate\n");
+ fprintf(stderr," -c: coverage of genome\n");
+ fprintf(stderr," -C: assume genome is circular (default is linear)\n");
+ fprintf(stderr,"\n");
+ fprintf(stderr," -r: Random number generator seed (default is process id).\n");
+ fprintf(stderr," -w: Print -w bp per line (default is 80).\n");
+ fprintf(stderr," -U: Use upper case for DNA (default is lower case).\n");
+ fprintf(stderr," -M: create a map file that indicates where every read was sampled\n");
exit (1);
}
}
View it on GitLab: https://salsa.debian.org/med-team/dazzdb/compare/f5eea3bcd72d73fb4b4a1fe08c24207ad5c098ff...f8e4f5bdf8055470113e29b5b47902e886e495cf
--
View it on GitLab: https://salsa.debian.org/med-team/dazzdb/compare/f5eea3bcd72d73fb4b4a1fe08c24207ad5c098ff...f8e4f5bdf8055470113e29b5b47902e886e495cf
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181028/6b20e2bf/attachment-0001.html>
More information about the debian-med-commit
mailing list