[med-svn] [daligner] 01/01: New upstream version 1.0+20171010
Afif Elghraoui
afif at moszumanska.debian.org
Fri Oct 20 03:20:57 UTC 2017
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch upstream
in repository daligner.
commit 3c46bb06ad985e674b37a1141a271ae27ba5e47f
Author: Afif Elghraoui <afif at debian.org>
Date: Thu Oct 19 23:03:46 2017 -0400
New upstream version 1.0+20171010
---
DB.c | 3 ++-
HPC.daligner.c | 44 ++++++++++++++++++++++++-------------
LAcat.c | 23 ++++++++++++--------
LAcheck.c | 6 +++---
LAdump.c | 39 +++++++++++++++++++++++++--------
LAindex.c | 6 +++---
LAmerge.c | 19 ++++++++++------
LAshow.c | 43 +++++++++++++++++++++++++++----------
LAsort.c | 21 +++++++++++-------
LAsplit.c | 12 +++++------
README.md | 30 ++++++++++++++++----------
align.c | 55 +++++++++++++++++++++++++++++++----------------
daligner.c | 35 ++++++++++++++++++++----------
filter.c | 68 ++++++++++++++++++++++++++++++++++++++++++++--------------
filter.h | 1 +
15 files changed, 276 insertions(+), 129 deletions(-)
diff --git a/DB.c b/DB.c
index 3afff14..69060d0 100644
--- a/DB.c
+++ b/DB.c
@@ -1486,7 +1486,8 @@ int Load_Arrow(HITS_DB *db, int i, char *read, int ascii)
EXIT(1);
}
if (Arrow_DB != db)
- { fclose(Arrow_File);
+ { if (Arrow_File != NULL)
+ fclose(Arrow_File);
arrow = Fopen(Catenate(db->path,"","",".arw"),"r");
if (arrow == NULL)
EXIT(1);
diff --git a/HPC.daligner.c b/HPC.daligner.c
index 649a629..578a9da 100644
--- a/HPC.daligner.c
+++ b/HPC.daligner.c
@@ -16,6 +16,7 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <dirent.h>
#include "DB.h"
#include "filter.h"
@@ -23,7 +24,7 @@
#undef LSF // define if want a directly executable LSF script
static char *Usage[] =
- { "[-vbad] [-t<int>] [-w<int(6)>] [-l<int(1000)>] [-s<int(100)]",
+ { "[-vbad] [-t<int>] [-w<int(6)>] [-l<int(1000)>] [-s<int(100)] [-P<dir(/tmp)>]",
" [-M<int>] [-B<int(4)>] [-D<int( 250)>] [-T<int(4)>] [-f<name>]",
" ( [-k<int(14)>] [-h<int(35)>] [-e<double(.70)>] [-H<int>]",
" [-k<int(20)>] [-h<int(50)>] [-e<double(.85)>] <ref:db|dam> )",
@@ -40,6 +41,7 @@ static double EREL;
static int MMAX, MTOP;
static char **MASK;
static char *ONAME;
+static char *PDIR;
#define LSF_ALIGN "bsub -q medium -n 4 -o DALIGNER.out -e DALIGNER.err -R span[hosts=1] -J align#%d"
#define LSF_MERGE \
@@ -236,6 +238,8 @@ void daligner_script(int argc, char *argv[])
fprintf(out," -s%d",SINT);
if (MINT >= 0)
fprintf(out," -M%d",MINT);
+ if (PDIR != NULL)
+ fprintf(out," -P%s",PDIR);
if (NTHREADS != 4)
fprintf(out," -T%d",NTHREADS);
for (k = 0; k < MTOP; k++)
@@ -621,10 +625,10 @@ void daligner_script(int argc, char *argv[])
last = (dnt == 1 || i == level);
for (j = 1; j < fblock; j++)
{ low = 1;
- if (DON)
- fprintf(out,"cd work%d\n",j);
for (p = 1; p <= dits; p++)
{ hgh = (dnt*p)/dits;
+ if (DON)
+ fprintf(out,"cd work%d; ",j);
fprintf(out,"rm");
if (last)
fprintf(out," L%d.%d.0.las",i,j);
@@ -633,31 +637,31 @@ void daligner_script(int argc, char *argv[])
fprintf(out," %s.%d.%s.%d.las",root,j,root,k+(fblock-1));
else
fprintf(out," L%d.%d.%d.las",i,j,k);
+ if (DON)
+ fprintf(out,"; cd ..");
fprintf(out,"\n");
low = hgh+1;
}
- if (DON)
- fprintf(out,"cd ..\n");
}
}
for (j = fblock; j <= lblock; j++)
{ low = 1;
- if (DON)
- fprintf(out,"cd work%d\n",j);
for (p = 1; p <= bits; p++)
{ hgh = (cnt*p)/bits;
+ if (DON)
+ fprintf(out,"cd work%d; ",j);
fprintf(out,"rm");
for (k = low; k <= hgh; k++)
if (i == 1)
fprintf(out," %s.%d.%s.%d.las",root,j,root,k);
else
fprintf(out," L%d.%d.%d.las",i,j,k);
+ if (DON)
+ fprintf(out,"; cd ..");
fprintf(out,"\n");
low = hgh+1;
}
- if (DON)
- fprintf(out,"cd ..\n");
}
if (ONAME != NULL)
@@ -929,6 +933,8 @@ void mapper_script(int argc, char *argv[])
fprintf(out," -T%d",NTHREADS);
if (MINT >= 0)
fprintf(out," -M%d",MINT);
+ if (PDIR != NULL)
+ fprintf(out," -P%s",PDIR);
for (k = 0; k < MTOP; k++)
fprintf(out," -m%s",MASK[k]);
@@ -1201,26 +1207,26 @@ void mapper_script(int argc, char *argv[])
fprintf(out,"# Remove level %d .las files\n",i);
for (j = fblock; j <= lblock; j++)
- { if (DON)
- fprintf(out,"cd work%d\n",j);
- low = 1;
+ { low = 1;
for (p = 1; p <= bits; p++)
{ hgh = (cnt*p)/bits;
+ if (DON)
+ fprintf(out,"cd work%d; ",j);
fprintf(out,"rm");
for (k = low; k <= hgh; k++)
if (i == 1)
{ fprintf(out," %s",root2);
if (useblock2)
fprintf(out,".%d",j);
- fprintf(out,".%s.%d",root1,k);
+ fprintf(out,".%s.%d.las",root1,k);
}
else
fprintf(out," L%d.%d.%d.las",i,j,k);
+ if (DON)
+ fprintf(out,"; cd ..");
fprintf(out,"\n");
low = hgh+1;
}
- if (DON)
- fprintf(out,"cd ..\n");
}
if (ONAME != NULL)
@@ -1262,6 +1268,7 @@ int main(int argc, char *argv[])
LINT = 1000;
SINT = 100;
MINT = -1;
+ PDIR = NULL;
MTOP = 0;
MMAX = 10;
@@ -1294,6 +1301,10 @@ int main(int argc, char *argv[])
break;
case 'k':
ARG_POSITIVE(KINT,"K-mer length")
+ if (KINT > 32)
+ { fprintf(stderr,"%s: K-mer length must be 32 or less\n",Prog_Name);
+ exit (1);
+ }
break;
case 'l':
ARG_POSITIVE(LINT,"Minimum ovlerap length")
@@ -1333,6 +1344,9 @@ int main(int argc, char *argv[])
case 'M':
ARG_NON_NEGATIVE(MINT,"Memory allocation (in Gb)")
break;
+ case 'P':
+ PDIR = argv[i]+2;
+ break;
case 'T':
ARG_POSITIVE(NTHREADS,"Number of threads")
break;
diff --git a/LAcat.c b/LAcat.c
index 5b39340..c0f61f8 100644
--- a/LAcat.c
+++ b/LAcat.c
@@ -96,7 +96,7 @@ int main(int argc, char *argv[])
SYSTEM_ERROR
if (i == 0)
{ tspace = mspace;
- if (tspace <= TRACE_XOVR)
+ if (tspace <= TRACE_XOVR && tspace != 0)
tbytes = sizeof(uint8);
else
tbytes = sizeof(uint16);
@@ -108,8 +108,10 @@ int main(int argc, char *argv[])
fclose(input);
}
- fwrite(&novl,sizeof(int64),1,stdout);
- fwrite(&tspace,sizeof(int),1,stdout);
+ if (fwrite(&novl,sizeof(int64),1,stdout) != 1)
+ SYSTEM_ERROR
+ if (fwrite(&tspace,sizeof(int),1,stdout) != 1)
+ SYSTEM_ERROR
}
{ int i, j;
@@ -141,7 +143,7 @@ int main(int argc, char *argv[])
{ if (iptr + ovlsize > itop)
{ int64 remains = itop-iptr;
if (remains > 0)
- memcpy(iblock,iptr,remains);
+ memmove(iblock,iptr,remains);
iptr = iblock;
itop = iblock + remains;
itop += fread(itop,1,bsize-remains,input);
@@ -151,24 +153,25 @@ int main(int argc, char *argv[])
tsize = w->path.tlen*tbytes;
if (optr + ovlsize + tsize > otop)
- { fwrite(oblock,1,optr-oblock,stdout);
+ { if (fwrite(oblock,1,optr-oblock,stdout) != (size_t) (optr-oblock))
+ SYSTEM_ERROR
optr = oblock;
}
- memcpy(optr,iptr,ovlsize);
+ memmove(optr,iptr,ovlsize);
optr += ovlsize;
iptr += ovlsize;
if (iptr + tsize > itop)
{ int64 remains = itop-iptr;
if (remains > 0)
- memcpy(iblock,iptr,remains);
+ memmove(iblock,iptr,remains);
iptr = iblock;
itop = iblock + remains;
itop += fread(itop,1,bsize-remains,input);
}
- memcpy(optr,iptr,tsize);
+ memmove(optr,iptr,tsize);
optr += tsize;
iptr += tsize;
}
@@ -177,7 +180,9 @@ int main(int argc, char *argv[])
}
if (optr > oblock)
- fwrite(oblock,1,optr-oblock,stdout);
+ { if (fwrite(oblock,1,optr-oblock,stdout) != (size_t) (optr-oblock))
+ SYSTEM_ERROR
+ }
}
if (VERBOSE)
diff --git a/LAcheck.c b/LAcheck.c
index 5de3efe..4c878ac 100644
--- a/LAcheck.c
+++ b/LAcheck.c
@@ -152,7 +152,7 @@ int main(int argc, char *argv[])
goto error;
}
- if (tspace <= TRACE_XOVR)
+ if (tspace <= TRACE_XOVR && tspace != 0)
tbytes = sizeof(uint8);
else
tbytes = sizeof(uint16);
@@ -179,7 +179,7 @@ int main(int argc, char *argv[])
if (iptr + ovlsize > itop)
{ int64 remains = itop-iptr;
if (remains > 0)
- memcpy(iblock,iptr,remains);
+ memmove(iblock,iptr,remains);
iptr = iblock;
itop = iblock + remains;
itop += fread(itop,1,bsize-remains,input);
@@ -197,7 +197,7 @@ int main(int argc, char *argv[])
if (iptr + tsize > itop)
{ int64 remains = itop-iptr;
if (remains > 0)
- memcpy(iblock,iptr,remains);
+ memmove(iblock,iptr,remains);
iptr = iblock;
itop = iblock + remains;
itop += fread(itop,1,bsize-remains,input);
diff --git a/LAdump.c b/LAdump.c
index 716e873..8931e5c 100644
--- a/LAdump.c
+++ b/LAdump.c
@@ -22,7 +22,7 @@
#include "align.h"
static char *Usage =
- "[-cdt] [-o] <src1:db|dam> [ <src2:db|dam> ] <align:las> [ <reads:FILE> | <reads:range> ... ]";
+ "[-cdtlo] <src1:db|dam> [ <src2:db|dam> ] <align:las> [ <reads:FILE> | <reads:range> ... ]";
#define LAST_READ_SYMBOL '$'
@@ -45,7 +45,7 @@ int main(int argc, char *argv[])
int input_pts;
int OVERLAP;
- int DOCOORDS, DODIFFS, DOTRACE;
+ int DOCOORDS, DODIFFS, DOTRACE, DOLENS;
int ISTWO;
// Process options
@@ -60,7 +60,7 @@ int main(int argc, char *argv[])
if (argv[i][0] == '-')
switch (argv[i][1])
{ default:
- ARG_FLAGS("ocdtUF")
+ ARG_FLAGS("ocdtl")
break;
}
else
@@ -71,12 +71,27 @@ int main(int argc, char *argv[])
DOCOORDS = flags['c'];
DODIFFS = flags['d'];
DOTRACE = flags['t'];
+ DOLENS = flags['l'];
if (DOTRACE)
DOCOORDS = 1;
if (argc <= 2)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+ fprintf(stderr,"\n");
+ fprintf(stderr," P #a #b #o #c - (#a,#b^#o) have an LA between them where #o is 'n' or 'c' and\n");
+ fprintf(stderr," #c is '>' (start of best chain), '+' (start of alternate chain),\n");
+ fprintf(stderr," '-' (continuation of chain), or '.' (no chains in file).\n");
+ fprintf(stderr,"\n");
+ fprintf(stderr," -c: C #ab #ae #bb #be - #a[#ab,#ae] aligns with #b^#o[#bb,#be]\n");
+ fprintf(stderr," -d: D # - there are # differences in the LA\n");
+ fprintf(stderr," -t: T #n - there are #n trace point intervals for the LA\n");
+ fprintf(stderr," (#d #y )^#n - there are #d difference aligning the #y bp's of B with the\n");
+ fprintf(stderr," next fixed-size interval of A\n");
+ fprintf(stderr," -l: L #la #lb - #la is the length of the a-read and #lb that of the b-read\n");
+ fprintf(stderr,"\n");
+ fprintf(stderr," -o: Output proper overlaps only\n");
+
exit (1);
}
}
@@ -263,7 +278,7 @@ int main(int argc, char *argv[])
if (fread(&tspace,sizeof(int),1,input) != 1)
SYSTEM_ERROR
- if (tspace <= TRACE_XOVR)
+ if (tspace <= TRACE_XOVR && tspace != 0)
{ small = 1;
tbytes = sizeof(uint8);
}
@@ -363,16 +378,16 @@ int main(int argc, char *argv[])
if (DOTRACE)
{ printf("+ T %lld\n",ttot);
printf("%% T %lld\n",smax);
- printf("@ T %lld\n",tmax);
+ printf("@ T %d\n",tmax);
}
}
// Read the file and display selected records
- { int j;
+ { int j, k;
uint16 *trace;
int in, npt, idx, ar;
- int64 verse;
+ HITS_READ *read1, *read2;
rewind(input);
fread(&novl,sizeof(int64),1,input);
@@ -382,6 +397,9 @@ int main(int argc, char *argv[])
if (trace == NULL)
exit (1);
+ read1 = db1->reads;
+ read2 = db2->reads;
+
in = 0;
npt = pts[0];
idx = 1;
@@ -449,6 +467,9 @@ int main(int argc, char *argv[])
printf(" .");
printf("\n");
+ if (DOLENS)
+ printf("L %d %d\n",read1[ovl->aread].rlen,read2[ovl->bread].rlen);
+
if (DOCOORDS)
printf("C %d %d %d %d\n",ovl->path.abpos,ovl->path.aepos,ovl->path.bbpos,ovl->path.bepos);
@@ -462,8 +483,8 @@ int main(int argc, char *argv[])
if (small)
Decompress_TraceTo16(ovl);
printf("T %d\n",tlen>>1);
- for (j = 0; j < tlen; j += 2)
- printf(" %3d %3d\n",trace[j],trace[j+1]);
+ for (k = 0; k < tlen; k += 2)
+ printf(" %3d %3d\n",trace[k],trace[k+1]);
}
}
diff --git a/LAindex.c b/LAindex.c
index 3f2445a..897e49d 100644
--- a/LAindex.c
+++ b/LAindex.c
@@ -83,7 +83,7 @@ int main(int argc, char *argv[])
SYSTEM_ERROR
if (fread(&tspace,sizeof(int),1,input) != 1)
SYSTEM_ERROR
- if (tspace <= TRACE_XOVR)
+ if (tspace <= TRACE_XOVR && tspace != 0)
tbytes = sizeof(uint8);
else
tbytes = sizeof(uint16);
@@ -126,7 +126,7 @@ int main(int argc, char *argv[])
{ if (iptr + ovlsize > itop)
{ int64 remains = itop-iptr;
if (remains > 0)
- memcpy(iblock,iptr,remains);
+ memmove(iblock,iptr,remains);
iptr = iblock;
itop = iblock + remains;
itop += fread(itop,1,bsize-remains,input);
@@ -161,7 +161,7 @@ int main(int argc, char *argv[])
if (iptr + tsize > itop)
{ int64 remains = itop-iptr;
if (remains > 0)
- memcpy(iblock,iptr,remains);
+ memmove(iblock,iptr,remains);
iptr = iblock;
itop = iblock + remains;
itop += fread(itop,1,bsize-remains,input);
diff --git a/LAmerge.c b/LAmerge.c
index 6a768fd..70e42e7 100644
--- a/LAmerge.c
+++ b/LAmerge.c
@@ -258,7 +258,7 @@ int main(int argc, char *argv[])
SYSTEM_ERROR
if (i == 0)
{ tspace = mspace;
- if (tspace <= TRACE_XOVR)
+ if (tspace <= TRACE_XOVR && tspace != 0)
tbytes = sizeof(uint8);
else
tbytes = sizeof(uint16);
@@ -287,8 +287,10 @@ int main(int argc, char *argv[])
free(pwd);
free(root);
- fwrite(&totl,sizeof(int64),1,output);
- fwrite(&tspace,sizeof(int),1,output);
+ if (fwrite(&totl,sizeof(int64),1,output) != 1)
+ SYSTEM_ERROR
+ if (fwrite(&tspace,sizeof(int),1,output) != 1)
+ SYSTEM_ERROR
oblock = block+fway*bsize;
optr = oblock;
@@ -350,13 +352,14 @@ int main(int argc, char *argv[])
if (src->ptr + span > src->top)
ovl_reload(src,bsize);
if (optr + span > otop)
- { fwrite(oblock,1,optr-oblock,output);
+ { if (fwrite(oblock,1,optr-oblock,output) != (size_t) (optr-oblock))
+ SYSTEM_ERROR
optr = oblock;
}
- memcpy(optr,((char *) ov) + psize,osize);
+ memmove(optr,((char *) ov) + psize,osize);
optr += osize;
- memcpy(optr,src->ptr,tsize);
+ memmove(optr,src->ptr,tsize);
optr += tsize;
src->ptr += tsize;
@@ -374,7 +377,9 @@ int main(int argc, char *argv[])
// Flush output buffer and wind up
if (optr > oblock)
- fwrite(oblock,1,optr-oblock,output);
+ { if (fwrite(oblock,1,optr-oblock,output) != (size_t) (optr-oblock))
+ SYSTEM_ERROR
+ }
fclose(output);
for (i = 0; i < fway; i++)
diff --git a/LAshow.c b/LAshow.c
index 4d6cf45..13c85fe 100644
--- a/LAshow.c
+++ b/LAshow.c
@@ -287,12 +287,12 @@ int main(int argc, char *argv[])
SYSTEM_ERROR
if (fread(&tspace,sizeof(int),1,input) != 1)
SYSTEM_ERROR
- if (tspace <= 0)
- { fprintf(stderr,"%s: Garbage .las file, trace spacing <= 0 !\n",Prog_Name);
+ if (tspace < 0)
+ { fprintf(stderr,"%s: Garbage .las file, trace spacing < 0 !\n",Prog_Name);
exit (1);
}
- if (tspace <= TRACE_XOVR)
+ if (tspace <= TRACE_XOVR && tspace != 0)
{ small = 1;
tbytes = sizeof(uint8);
}
@@ -353,12 +353,18 @@ int main(int argc, char *argv[])
if (db1->maxlen < db2->maxlen)
{ mn_wide = ai_wide;
mx_wide = bi_wide;
- tp_wide = Number_Digits((int64) db1->maxlen/tspace+2);
+ if (tspace > 0)
+ tp_wide = Number_Digits((int64) db1->maxlen/tspace+2);
+ else
+ tp_wide = 0;
}
else
{ mn_wide = bi_wide;
mx_wide = ai_wide;
- tp_wide = Number_Digits((int64) db2->maxlen/tspace+2);
+ if (tspace > 0)
+ tp_wide = Number_Digits((int64) db2->maxlen/tspace+2);
+ else
+ tp_wide = 0;
}
ar_wide += (ar_wide-1)/3;
br_wide += (br_wide-1)/3;
@@ -514,15 +520,27 @@ int main(int argc, char *argv[])
else
printf("]");
+ if (!CARTOON)
+ printf(" ~ %5.2f%% ",(200.*ovl->path.diffs) /
+ ((ovl->path.aepos - ovl->path.abpos) + (ovl->path.bepos - ovl->path.bbpos)) );
+ printf(" (");
+ if (FLIP)
+ { Print_Number(aln->alen,ai_wide,stdout);
+ printf(" x ");
+ Print_Number(aln->blen,bi_wide,stdout);
+ }
+ else
+ { Print_Number(aln->blen,bi_wide,stdout);
+ printf(" x ");
+ Print_Number(aln->alen,ai_wide,stdout);
+ }
+ printf(" bps,");
if (CARTOON)
- { printf(" (");
- Print_Number(tps,tp_wide,stdout);
+ { Print_Number(tps,tp_wide,stdout);
printf(" trace pts)\n\n");
}
else
- { printf(" ~ %4.1f%% (",(200.*ovl->path.diffs) /
- ((ovl->path.aepos - ovl->path.abpos) + (ovl->path.bepos - ovl->path.bbpos)) );
- Print_Number((int64) ovl->path.diffs,mn_wide,stdout);
+ { Print_Number((int64) ovl->path.diffs,mn_wide,stdout);
printf(" diffs, ");
Print_Number(tps,tp_wide,stdout);
printf(" trace pts)\n");
@@ -581,7 +599,10 @@ int main(int argc, char *argv[])
else
aln->bseq = bseq - bmin;
- Compute_Trace_PTS(aln,work,tspace,GREEDIEST);
+ if (tspace == 0)
+ Compute_Trace_IRR(aln,work,GREEDIEST);
+ else
+ Compute_Trace_PTS(aln,work,tspace,GREEDIEST);
if (FLIP)
{ if (COMP(aln->flags))
diff --git a/LAsort.c b/LAsort.c
index ce0d8d1..72ca23e 100644
--- a/LAsort.c
+++ b/LAsort.c
@@ -49,7 +49,7 @@ static int SORT_OVL(const void *x, const void *y)
return (bl-br);
cl = COMP(ol->flags);
- cr = COMP(ol->flags);
+ cr = COMP(or->flags);
if (cl != cr)
return (cl-cr);
@@ -164,7 +164,7 @@ int main(int argc, char *argv[])
if (fread(&tspace,sizeof(int),1,input) != 1)
SYSTEM_ERROR
- if (tspace <= TRACE_XOVR)
+ if (tspace <= TRACE_XOVR && tspace != 0)
tbytes = sizeof(uint8);
else
tbytes = sizeof(uint16);
@@ -182,8 +182,10 @@ int main(int argc, char *argv[])
if (foutput == NULL)
exit (1);
- fwrite(&novl,sizeof(int64),1,foutput);
- fwrite(&tspace,sizeof(int),1,foutput);
+ if (fwrite(&novl,sizeof(int64),1,foutput) != 1)
+ SYSTEM_ERROR
+ if (fwrite(&tspace,sizeof(int),1,foutput) != 1)
+ SYSTEM_ERROR
free(pwd);
free(root);
@@ -258,19 +260,22 @@ int main(int argc, char *argv[])
{ tsize = w->path.tlen*tbytes;
span = ovlsize + tsize;
if (fptr + span > ftop)
- { fwrite(fblock,1,fptr-fblock,foutput);
+ { if (fwrite(fblock,1,fptr-fblock,foutput) != (size_t) (fptr-fblock))
+ SYSTEM_ERROR
fptr = fblock;
}
- memcpy(fptr,((char *) w)+ptrsize,ovlsize);
+ memmove(fptr,((char *) w)+ptrsize,ovlsize);
fptr += ovlsize;
- memcpy(fptr,(char *) (w+1),tsize);
+ memmove(fptr,(char *) (w+1),tsize);
fptr += tsize;
w = (Overlap *) (wo += span);
}
while (wo < iend && CHAIN_NEXT(w->flags));
}
if (fptr > fblock)
- fwrite(fblock,1,fptr-fblock,foutput);
+ { if (fwrite(fblock,1,fptr-fblock,foutput) != (size_t) (fptr-fblock))
+ SYSTEM_ERROR
+ }
}
free(perm);
diff --git a/LAsplit.c b/LAsplit.c
index adf6cf5..aceb647 100644
--- a/LAsplit.c
+++ b/LAsplit.c
@@ -131,7 +131,7 @@ int main(int argc, char *argv[])
SYSTEM_ERROR
if (fread(&tspace,sizeof(int),1,stdin) != 1)
SYSTEM_ERROR
- if (tspace <= TRACE_XOVR)
+ if (tspace <= TRACE_XOVR && tspace != 0)
tbytes = sizeof(uint8);
else
tbytes = sizeof(uint16);
@@ -141,7 +141,7 @@ int main(int argc, char *argv[])
{ int i, j;
Overlap *w;
- int low, hgh, last;
+ int64 low, hgh, last;
int64 tsize, povl;
char *iptr, *itop;
char *optr, *otop;
@@ -178,7 +178,7 @@ int main(int argc, char *argv[])
{ if (iptr + ovlsize > itop)
{ int64 remains = itop-iptr;
if (remains > 0)
- memcpy(iblock,iptr,remains);
+ memmove(iblock,iptr,remains);
iptr = iblock;
itop = iblock + remains;
itop += fread(itop,1,bsize-remains,stdin);
@@ -201,19 +201,19 @@ int main(int argc, char *argv[])
optr = oblock;
}
- memcpy(optr,iptr,ovlsize);
+ memmove(optr,iptr,ovlsize);
optr += ovlsize;
iptr += ovlsize;
if (iptr + tsize > itop)
{ int64 remains = itop-iptr;
if (remains > 0)
- memcpy(iblock,iptr,remains);
+ memmove(iblock,iptr,remains);
iptr = iblock;
itop = iblock + remains;
itop += fread(itop,1,bsize-remains,stdin);
}
- memcpy(optr,iptr,tsize);
+ memmove(optr,iptr,tsize);
optr += tsize;
iptr += tsize;
}
diff --git a/README.md b/README.md
index 3af129a..da08fe7 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ efficient reconstruction of alignments on demand.
```
1. daligner [-vbAI]
- [-k<int(14)>] [-w<int(6)>] [-h<int(35)>] [-t<int>] [-M<int>]
+ [-k<int(14)>] [-w<int(6)>] [-h<int(35)>] [-t<int>] [-M<int>] [-P<dir(/tmp)>]
[-e<double(.70)] [-l<int(1000)] [-s<int(100)>] [-H<int>] [-T<int(4)>]
[-m<track>]+ <subject:db|dam> <target:db|dam> ...
```
@@ -78,6 +78,10 @@ between different portions of the same read will also be found and reported. In
the command "daligner -A X Y" produces a single file X.Y..las and "daligner X Y" produces
2 files X.Y..las and Y.X.las (unless X=Y in which case only a single file, X.X.las, is
produced). The overlap records in one of these files are sorted as described for LAsort.
+In order to produce the aforementioned .las file, several temporary .las files, two for
+each thread, are produce in the sub-directory /tmp by default. You can overide this
+location by specifying the directory you would like this activity to take place in with
+the -P option.
By default daligner compares all overlaps between reads in the database that are
greater than the minimum cutoff set when the DB or DBs were split, typically 1 or
@@ -168,7 +172,7 @@ and (part of an) alignment for which we will explain several additional
important points:
```
- 1 1,865 c [18,479..20,216] x [ 1,707.. 0> ( 19 trace pts)
+ 1 1,865 c [18,479..20,216] x [ 1,707..0> (24,451 x 7,283 bps, 19 trace pts)
18479 4235
A ========+----------+======> dif/(len1+len2) = 478/(1737+1707) = 27.76%
@@ -187,8 +191,10 @@ important points:
```
The display of an LA always begins with a line giving the A-read, then the B-read, then
-an indication of orientation (i.e. are A and B from the same strand (n) or the opposite
-strand (c)?) followed by the A-interval and B-interval that are aligned. In particular,
+an indication of orientation (i.e. 'n' for same strand, and 'c' for the opposite strand)
+followed by the A-interval and B-interval that are aligned and in parentheses
+the lengths of the two reads and the number of tracepoints in the alignment between them.
+In particular,
note carefully that when the B-read is in the complement orientation (c), then the
B-interval gives the higher coordinate first, the idea being that one will align from
the highest base down to the lowest base in the descending direction on B, complement
@@ -223,8 +229,8 @@ scoring chain and + indicates an alternate near optimal chain (controlled by the
-n parameter to damapper). Each additional LA of a chain is marked with a - character.
```
-5. LAdump [-cdt] [-o] <src1:db|dam> [ <src2:db|dam> ]
- <align:las> [ <reads:FILE> | <reads:range> ... ]
+5. LAdump [-cdtlo] <src1:db|dam> [ <src2:db|dam> ]
+ <align:las> [ <reads:FILE> | <reads:range> ... ]
```
Like LAshow, LAdump allows one to display the local alignments (LAs) of a subset of the
@@ -233,8 +239,9 @@ is that the information is written in a very simple "1-code" ASCII format that m
easy for one to read and parse the information for further use. For each LA the pair of
reads is output on a line. -c requests that one further output the coordinates of the
LA segments be output. The -d option requests that the number of difference in the LA
-be output, and -t requests that the tracepoint information be output. Finally, -o
-requests that only LAs that are proper overlaps be output.
+be output, -t requests that the tracepoint information be output, and -l requests the
+length of the two reads be output. Finally, -o requests that only LAs that are proper
+overlaps be output.
The format is very simple. Each requested piece of information occurs on a line. The
first character of every line is a "1-code" character that tells you what information
@@ -246,8 +253,9 @@ trace point interval.
```
P #a #b #o #c - (#a,#b^#o) have an LA between them where #o is 'n' or 'c' and
- #c is '>' (start of best chain), '+' (start of alternate chain),
- '-' (continuation of chain), or '.' (no chains in file)
+ #c is '>' (start of best chain), '+' (start of alternate chain),
+ '-' (continuation of chain), or '.' (no chains in file).
+ L #la #lb - #la is the length of the a-read and #lb that of the b-read
C #ab #ae #bb #be - #a[#ab,#ae] aligns with #b^#o[#bb,#be]
D # - there are # differences in the LA
T #n - there are #n trace point intervals for the LA
@@ -327,7 +335,7 @@ information, and if it does, then it checks the validity of chains and assumes t
the chains were sorted with the -a option to LAsort and LAmerge.
```
-10. HPC.daligner [-vbad] [-t<int>] [-w<int(6)>] [-l<int(1000)] [-s<int(100)]
+10. HPC.daligner [-vbad] [-t<int>] [-w<int(6)>] [-l<int(1000)] [-s<int(100)] [-P<dir(/tmp)>]
[-M<int>] [-B<int(4)>] [-D<int( 250)>] [-T<int(4)>] [-f<name>]
( [-k<int(14)>] [-h<int(35)>] [-e<double(.70)] [-H<int>]
[-k<int(20)>] [-h<int(50)>] [-e<double(.85)] <ref:db|dam> )
diff --git a/align.c b/align.c
index 449b833..d4fdfe8 100644
--- a/align.c
+++ b/align.c
@@ -3113,29 +3113,46 @@ void Print_Overlap(FILE *output, Overlap *ovl, int tbytes, int indent)
}
int Check_Trace_Points(Overlap *ovl, int tspace, int verbose, char *fname)
-{ int i, p;
-
- if (((ovl->path.aepos-1)/tspace - ovl->path.abpos/tspace)*2 != ovl->path.tlen-2)
- { if (verbose)
- EPRINTF(EPLACE," %s: Wrong number of trace points\n",fname);
- return (1);
- }
- p = ovl->path.bbpos;
- if (tspace <= TRACE_XOVR)
- { uint8 *trace8 = (uint8 *) ovl->path.trace;
- for (i = 1; i < ovl->path.tlen; i += 2)
- p += trace8[i];
+{ int i, p, q;
+
+ if (tspace != 0)
+ { if (((ovl->path.aepos-1)/tspace - ovl->path.abpos/tspace)*2 != ovl->path.tlen-2)
+ { if (verbose)
+ EPRINTF(EPLACE," %s: Wrong number of trace points\n",fname);
+ return (1);
+ }
+ p = ovl->path.bbpos;
+ if (tspace <= TRACE_XOVR)
+ { uint8 *trace8 = (uint8 *) ovl->path.trace;
+ for (i = 1; i < ovl->path.tlen; i += 2)
+ p += trace8[i];
+ }
+ else
+ { uint16 *trace16 = (uint16 *) ovl->path.trace;
+ for (i = 1; i < ovl->path.tlen; i += 2)
+ p += trace16[i];
+ }
+ if (p != ovl->path.bepos)
+ { if (verbose)
+ EPRINTF(EPLACE," %s: Trace point sum != aligned interval\n",fname);
+ return (1);
+ }
}
- else
+ else
{ uint16 *trace16 = (uint16 *) ovl->path.trace;
+
+ p = ovl->path.bbpos;
+ q = ovl->path.abpos;
for (i = 1; i < ovl->path.tlen; i += 2)
- p += trace16[i];
+ { p += trace16[i];
+ q += trace16[i-1];
+ }
+ if (p != ovl->path.bepos || q != ovl->path.aepos)
+ { if (verbose)
+ EPRINTF(EPLACE," %s: Trace point sum != aligned interval\n",fname);
+ return (1);
+ }
}
- if (p != ovl->path.bepos)
- { if (verbose)
- EPRINTF(EPLACE," %s: Trace point sum != aligned interval\n",fname);
- return (1);
- }
return (0);
}
diff --git a/daligner.c b/daligner.c
index a25de9d..ef47d6b 100644
--- a/daligner.c
+++ b/daligner.c
@@ -39,6 +39,7 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <dirent.h>
#include <sys/param.h>
#if defined(BSD)
@@ -49,12 +50,13 @@
#include "filter.h"
static char *Usage[] =
- { "[-vbAI] [-k<int(14)>] [-w<int(6)>] [-h<int(35)>] [-t<int>] [-M<int>]",
+ { "[-vbAI] [-k<int(14)>] [-w<int(6)>] [-h<int(35)>] [-t<int>] [-M<int>] [-P<dir(/tmp)>]",
" [-e<double(.70)] [-l<int(1000)>] [-s<int(100)>] [-H<int>] [-T<int(4)>]",
" [-m<track>]+ <subject:db|dam> <target:db|dam> ...",
};
int VERBOSE; // Globally visible to filter.c
+char *SORT_PATH;
int BIASED;
int MINOVER;
int HGAP_MIN;
@@ -440,7 +442,7 @@ static HITS_DB *complement_DB(HITS_DB *block, int inplace)
if (seq == NULL)
exit (1);
*seq++ = 4;
- memcpy(seq,block->bases,block->reads[nreads].boff);
+ memmove(seq,block->bases,block->reads[nreads].boff);
*cblock = *block;
cblock->bases = (void *) seq;
cblock->tracks = NULL;
@@ -556,6 +558,7 @@ int main(int argc, char *argv[])
{ int i, j, k;
int flags[128];
char *eptr;
+ DIR *dirp;
ARG_INIT("daligner")
@@ -568,6 +571,7 @@ int main(int argc, char *argv[])
SPACING = 100;
MINOVER = 1000; // Globally visible to filter.c
NTHREADS = 4;
+ SORT_PATH = "/tmp";
MEM_PHYSICAL = getMemorySize();
MEM_LIMIT = MEM_PHYSICAL;
@@ -640,6 +644,14 @@ int main(int argc, char *argv[])
}
MASK[MTOP++] = argv[i]+2;
break;
+ case 'P':
+ SORT_PATH = argv[i]+2;
+ if ((dirp = opendir(SORT_PATH)) == NULL)
+ { fprintf(stderr,"%s: -P option: cannot open directory %s\n",Prog_Name,SORT_PATH);
+ exit (1);
+ }
+ closedir(dirp);
+ break;
case 'T':
ARG_POSITIVE(NTHREADS,"Number of threads")
break;
@@ -703,11 +715,11 @@ int main(int argc, char *argv[])
if (i == 2)
{ for (j = 0; j < MTOP; j++)
{ if (MSTAT[j] == -2)
- printf("%s: Warning: -m%s option given but no track found.\n",Prog_Name,MASK[i]);
+ printf("%s: Warning: -m%s option given but no track found.\n",Prog_Name,MASK[j]);
else if (MSTAT[j] == -1)
- printf("%s: Warning: %s track not sync'd with relevant db.\n",Prog_Name,MASK[i]);
+ printf("%s: Warning: %s track not sync'd with relevant db.\n",Prog_Name,MASK[j]);
else if (MSTAT[j] == -3)
- printf("%s: Warning: %s track is not a mask track.\n",Prog_Name,MASK[i]);
+ printf("%s: Warning: %s track is not a mask track.\n",Prog_Name,MASK[j]);
}
if (VERBOSE)
@@ -744,28 +756,29 @@ int main(int argc, char *argv[])
command = CommandBuffer(aroot,broot);
- sprintf(command,"LAsort /tmp/%s.%s.[CN]*.las",aroot,broot);
+ sprintf(command,"LAsort %s/%s.%s.[CN]*.las",SORT_PATH,aroot,broot);
if (VERBOSE)
printf("\n%s\n",command);
system(command);
- sprintf(command,"LAmerge %s.%s.las /tmp/%s.%s.[CN]*.S.las",aroot,broot,aroot,broot);
+ sprintf(command,"LAmerge %s.%s.las %s/%s.%s.[CN]*.S.las",aroot,broot,SORT_PATH,aroot,broot);
if (VERBOSE)
printf("%s\n",command);
system(command);
- sprintf(command,"rm /tmp/%s.%s.[CN]*.las",aroot,broot);
+ sprintf(command,"rm %s/%s.%s.[CN]*.las",SORT_PATH,aroot,broot);
if (VERBOSE)
printf("%s\n",command);
system(command);
if (aroot != broot && SYMMETRIC)
- { sprintf(command,"LAsort /tmp/%s.%s.[CN]*.las",broot,aroot);
+ { sprintf(command,"LAsort %s/%s.%s.[CN]*.las",SORT_PATH,broot,aroot);
if (VERBOSE)
printf("%s\n",command);
system(command);
- sprintf(command,"LAmerge %s.%s.las /tmp/%s.%s.[CN]*.S.las",broot,aroot,broot,aroot);
+ sprintf(command,"LAmerge %s.%s.las %s/%s.%s.[CN]*.S.las",broot,aroot,
+ SORT_PATH,broot,aroot);
if (VERBOSE)
printf("%s\n",command);
system(command);
- sprintf(command,"rm /tmp/%s.%s.[CN]*.las",broot,aroot);
+ sprintf(command,"rm %s/%s.%s.[CN]*.las",SORT_PATH,broot,aroot);
if (VERBOSE)
printf("%s\n",command);
system(command);
diff --git a/filter.c b/filter.c
index 90ca7c3..97c4868 100644
--- a/filter.c
+++ b/filter.c
@@ -27,6 +27,8 @@
#include "filter.h"
#include "align.h"
+#undef FOR_PACBIO
+
#define THREAD pthread_t
#define MAX_BIAS 2 // In -b mode, don't consider tuples with specificity
@@ -45,6 +47,7 @@
#undef TEST_CSORT
#define HOW_MANY 3000 // Print first HOW_MANY items for each of the TEST options above
+#define DO_ALIGNMENT
#undef TEST_GATHER
#undef TEST_CONTAIN
#undef SHOW_OVERLAP // Show the cartoon
@@ -448,8 +451,8 @@ static void *tuple_thread(void *arg)
kptr[BMASK] += (data->fill = m-n);
while (n < m)
{ list[n].code = 0xffffffffffffffffllu;
- list[n].read = 0xffffffff;
- list[n].rpos = 0xffffffff;
+ list[n].read = -1;
+ list[n].rpos = -1;
n += 1;
}
}
@@ -590,8 +593,8 @@ static void *biased_tuple_thread(void *arg)
kptr[BMASK] += (data->fill = m-n);
while (n < m)
{ list[n].code = 0xffffffffffffffffllu;
- list[n].read = 0xffffffff;
- list[n].rpos = 0xffffffff;
+ list[n].read = -1;
+ list[n].rpos = -1;
n += 1;
}
@@ -745,8 +748,34 @@ void *Sort_Kmers(HITS_DB *block, int *len)
rez = (KmerPos *) lex_sort(mersort,(Double *) src,(Double *) trg,parmx);
if (BIASED || TA_track != NULL)
- for (i = 0; i < NTHREADS; i++)
- kmers -= parmt[i].fill;
+ { if (Kmer%4 == 0)
+ { int wedge[NTHREADS];
+
+ for (j = 0; j < NTHREADS; j++)
+ if (parmt[j].fill > 0)
+ break;
+ j += 1;
+ if (j < NTHREADS)
+ { x = kmers-1;
+ for (i = NTHREADS-1; i >= j; i--)
+ { x = x - parmt[i].fill;
+ z = x;
+ while (rez[x].read >= 0)
+ x -= 1;
+ wedge[i] = z-x;
+ }
+ x += 1;
+ z = x-parmt[j-1].fill;
+ for (i = j; i < NTHREADS; i++)
+ { memmove(rez+z,rez+x,wedge[i]*sizeof(KmerPos));
+ x += wedge[i] + parmt[i].fill;
+ z += wedge[i];
+ }
+ }
+ }
+ for (i = 0; i < NTHREADS; i++)
+ kmers -= parmt[i].fill;
+ }
if (TooFrequent < INT32_MAX && kmers > 0)
{ parmf[0].beg = 0;
@@ -808,7 +837,7 @@ void *Sort_Kmers(HITS_DB *block, int *len)
printf("\nKMER SORT:\n");
for (i = 0; i < HOW_MANY && i < kmers; i++)
{ KmerPos *c = rez+i;
- printf(" %5d / %5d / %10lld\n",c->read,c->rpos,c->code);
+ printf(" %9d: %6d / %6d / %16llx\n",i,c->read,c->rpos,c->code);
}
fflush(stdout);
}
@@ -1613,7 +1642,7 @@ static void *report_thread(void *arg)
hitc = hitd + (minhit-1);
eidx = data->end - minhit;
nidx = data->beg;
- for (cpair = hitd[nidx].p2; nidx < eidx; cpair = npair)
+ for (cpair = hitd[nidx].p2; nidx <= eidx; cpair = npair)
if (hitc[nidx].p2 != cpair)
{ nidx += 1;
while ((npair = hitd[nidx].p2) == cpair)
@@ -1691,9 +1720,14 @@ static void *report_thread(void *arg)
align->blen = blen;
ovlb->bread = ovla->aread = ar + afirst;
ovlb->aread = ovla->bread = br + bfirst;
+#ifdef FOR_PACBIO
+ doA = 1;
+ doB = (SYMMETRIC && (ar != br || !MG_self || !MG_comp));
+#else
doA = (alen >= HGAP_MIN);
doB = (SYMMETRIC && blen >= HGAP_MIN &&
(ar != br || !MG_self || !MG_comp));
+#endif
}
#ifdef TEST_GATHER
else
@@ -1708,6 +1742,7 @@ static void *report_thread(void *arg)
#endif
nfilt += 1;
+#ifdef DO_ALIGNMENT
bpath = Local_Alignment(align,work,MR_spec,apos-bpos,apos-bpos,apos+bpos,-1,-1);
{ int low, hgh, ae;
@@ -1744,7 +1779,7 @@ static void *report_thread(void *arg)
}
amatch[novla] = *apath;
amatch[novla].trace = (void *) (tbuf->top);
- memcpy(tbuf->trace+tbuf->top,apath->trace,sizeof(short)*apath->tlen);
+ memmove(tbuf->trace+tbuf->top,apath->trace,sizeof(short)*apath->tlen);
novla += 1;
tbuf->top += apath->tlen;
}
@@ -1765,7 +1800,7 @@ static void *report_thread(void *arg)
}
bmatch[novlb] = *bpath;
bmatch[novlb].trace = (void *) (tbuf->top);
- memcpy(tbuf->trace+tbuf->top,bpath->trace,sizeof(short)*bpath->tlen);
+ memmove(tbuf->trace+tbuf->top,bpath->trace,sizeof(short)*bpath->tlen);
novlb += 1;
tbuf->top += bpath->tlen;
}
@@ -1792,6 +1827,7 @@ static void *report_thread(void *arg)
printf(" No alignment %d",
((apath->aepos-apath->abpos) + (apath->bepos-apath->bbpos))/2);
#endif
+#endif // DO_ALIGNMENT
}
}
@@ -1843,7 +1879,7 @@ static void *report_thread(void *arg)
if (small)
Compress_TraceTo8(ovla);
if (Write_Overlap(ofile1,ovla,tbytes))
- { fprintf(stderr,"%s: Cannot write to /tmp, too small?\n",Prog_Name);
+ { fprintf(stderr,"%s: Cannot write to %s too small?\n",SORT_PATH,Prog_Name);
exit (1);
}
}
@@ -1853,7 +1889,7 @@ static void *report_thread(void *arg)
if (small)
Compress_TraceTo8(ovlb);
if (Write_Overlap(ofile2,ovlb,tbytes))
- { fprintf(stderr,"%s: Cannot write to /tmp, too small?\n",Prog_Name);
+ { fprintf(stderr,"%s: Cannot write to %s, too small?\n",SORT_PATH,Prog_Name);
exit (1);
}
}
@@ -2202,14 +2238,14 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
parmr[i].lasta = parmr[i].lastp + w;
parmr[i].work = New_Work_Data();
- sprintf(fname,"/tmp/%s.%s.%c%d.las",aname,bname,(comp?'C':'N'),i+1);
+ sprintf(fname,"%s/%s.%s.%c%d.las",SORT_PATH,aname,bname,(comp?'C':'N'),i+1);
parmr[i].ofile1 = Fopen(fname,"w");
if (parmr[i].ofile1 == NULL)
exit (1);
if (MG_self)
parmr[i].ofile2 = parmr[i].ofile1;
else if (SYMMETRIC)
- { sprintf(fname,"/tmp/%s.%s.%c%d.las",bname,aname,(comp?'C':'N'),i+1);
+ { sprintf(fname,"%s/%s.%s.%c%d.las",SORT_PATH,bname,aname,(comp?'C':'N'),i+1);
parmr[i].ofile2 = Fopen(fname,"w");
if (parmr[i].ofile2 == NULL)
exit (1);
@@ -2254,13 +2290,13 @@ zerowork:
nhits = 0;
for (i = 0; i < NTHREADS; i++)
- { sprintf(fname,"/tmp/%s.%s.%c%d.las",aname,bname,(comp?'C':'N'),i+1);
+ { sprintf(fname,"%s/%s.%s.%c%d.las",SORT_PATH,aname,bname,(comp?'C':'N'),i+1);
ofile = Fopen(fname,"w");
fwrite(&nhits,sizeof(int64),1,ofile);
fwrite(&MR_tspace,sizeof(int),1,ofile);
fclose(ofile);
if (! MG_self && SYMMETRIC)
- { sprintf(fname,"/tmp/%s.%s.%c%d.las",bname,aname,(comp?'C':'N'),i+1);
+ { sprintf(fname,"%s/%s.%s.%c%d.las",SORT_PATH,bname,aname,(comp?'C':'N'),i+1);
ofile = Fopen(fname,"w");
fwrite(&nhits,sizeof(int64),1,ofile);
fwrite(&MR_tspace,sizeof(int),1,ofile);
diff --git a/filter.h b/filter.h
index db565f1..48e3222 100644
--- a/filter.h
+++ b/filter.h
@@ -20,6 +20,7 @@ extern int MINOVER;
extern int HGAP_MIN;
extern int SYMMETRIC;
extern int IDENTITY;
+extern char *SORT_PATH;
extern uint64 MEM_LIMIT;
extern uint64 MEM_PHYSICAL;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/daligner.git
More information about the debian-med-commit
mailing list