[med-svn] [Git][med-team/daligner][master] 5 commits: New upstream version 1.0+git20240111.a9f62ac
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Sat Jan 13 14:25:16 GMT 2024
Étienne Mollier pushed to branch master at Debian Med / daligner
Commits:
57049af7 by Étienne Mollier at 2024-01-13T15:13:43+01:00
New upstream version 1.0+git20240111.a9f62ac
- - - - -
33d9153b by Étienne Mollier at 2024-01-13T15:13:43+01:00
routine-update: New upstream version
- - - - -
6022084a by Étienne Mollier at 2024-01-13T15:13:45+01:00
Update upstream source from tag 'upstream/1.0+git20240111.a9f62ac'
Update to upstream version '1.0+git20240111.a9f62ac'
with Debian dir e21db6befef107a9565d84cba91a9802c0ccaedb
- - - - -
8c10c64b by Étienne Mollier at 2024-01-13T15:20:59+01:00
typos.patch: remove: applied upstream.
- - - - -
025a52ba by Étienne Mollier at 2024-01-13T15:24:23+01:00
ready to upload to unstable.
- - - - -
12 changed files:
- DB.c
- LAshow.c
- LAsplit.c
- ONE2LA.c
- QV.c
- align.c
- align.h
- daligner.c
- debian/changelog
- debian/patches/series
- − debian/patches/typos.patch
- lsd.sort.c
Changes:
=====================================
DB.c
=====================================
@@ -2208,7 +2208,7 @@ int Read_Extra(FILE *afile, char *aname, DAZZ_EXTRA *extra)
}
if (accum != extra->accum)
{ EPRINTF(EPLACE,
- "%s: Reduction indicator of extra %s does not agree with previos .anno block files\n",
+ "%s: Reduction indicator of extra %s does not agree with previous .anno block files\n",
Prog_Name,name);
goto error;
}
@@ -2829,7 +2829,7 @@ static Block_Looper *parse_block_arg(char *arg, int isDB)
first = last = -1;
else
{ if (index(ppnt+1,BLOCK_SYMBOL) != NULL)
- { EPRINTF(EPLACE,"%s: Two or more occurences of %c-sign in source name '%s'\n",
+ { EPRINTF(EPLACE,"%s: Two or more occurrences of %c-sign in source name '%s'\n",
Prog_Name,BLOCK_SYMBOL,root);
goto error;
}
=====================================
LAshow.c
=====================================
@@ -18,6 +18,8 @@
#include <sys/types.h>
#include <sys/stat.h>
+#undef DEBUG_GAP_IMPROVER
+
#include "DB.h"
#include "align.h"
@@ -695,8 +697,10 @@ int main(int argc, char *argv[])
Compute_Trace_IRR(aln,work,GREEDIEST);
else
Compute_Trace_PTS(aln,work,tspace,GREEDIEST);
+ Gap_Improver(aln,work);
}
+#ifdef DEBUG_GAP_IMPROVER
aln->path->abpos += aoffs;
aln->path->aepos += aoffs;
aln->alen = alens;
@@ -716,6 +720,7 @@ int main(int argc, char *argv[])
else
trace[i] += boffs;
}
+#endif
if (CARTOON)
Alignment_Cartoon(stdout,aln,INDENT,mx_wide);
=====================================
LAsplit.c
=====================================
@@ -105,7 +105,7 @@ int main(int argc, char *argv[])
exit (1);
}
if (index(root2+1,BLOCK_SYMBOL) != NULL)
- { fprintf(stderr,"%s: Two or more occurences of %c-sign in source name '%s'\n",
+ { fprintf(stderr,"%s: Two or more occurrences of %c-sign in source name '%s'\n",
Prog_Name,BLOCK_SYMBOL,root);
exit (1);
}
=====================================
ONE2LA.c
=====================================
@@ -152,7 +152,7 @@ int main(int argc, char *argv[])
{ t = oneReadLine(file1);
if (t == 0)
- { fprintf(stderr,"ONE2LA: Pile object not followed by sufficient auxilliary lines\n");
+ { fprintf(stderr,"ONE2LA: Pile object not followed by sufficient auxiliary lines\n");
exit (1);
}
if (has[t] > 0 && t != 'T' && t != 'Q')
=====================================
QV.c
=====================================
@@ -1319,7 +1319,7 @@ error:
EXIT(NULL);
}
- // Free all the auxilliary storage associated with the encoding argument
+ // Free all the auxiliary storage associated with the encoding argument
void Free_QVcoding(QVcoding *coding)
{ if (coding->subChar >= 0)
=====================================
align.c
=====================================
@@ -173,7 +173,7 @@ void Free_Work_Data(Work_Data *ework)
#define PATH_INT 0x0fffffffffffffffll // Must be PATH_TOP-1
#define TRIM_MASK 0x7fff // Must be (1 << TRIM_LEN) - 1
#define TRIM_MLAG 250 // How far can last trim point be behind best point
-#define WAVE_LAG 30 // How far can worst point be behind the best point
+#define WAVE_LAG 60 // How far can worst point be behind the best point
static double Bias_Factor[10] = { .690, .690, .690, .690, .780,
.850, .900, .933, .966, 1.000 };
@@ -3379,7 +3379,9 @@ int Print_Alignment(FILE *file, Alignment *align, Work_Data *ework,
blen = align->blen;
Abuf[width] = Bbuf[width] = Dbuf[width] = '\0';
- /* buffer/output next column */
+
+ // buffer/output next column
+
#define COLUMN(x,y) \
{ int u, v; \
if (o >= width) \
@@ -5494,3 +5496,397 @@ int Compute_Trace_IRR(Alignment *align, Work_Data *ework, int mode)
return (0);
}
+
+#undef DEBUG_BOX
+#undef DEBUG_DP
+#undef DEBUG_BACK
+#undef BOX_STATS
+
+#define LONG_SNAKE 50
+
+#ifdef DEBUG
+
+static int ASCII[5] = { 'a', 'c', 'g', 't', '.' };
+
+static inline void print_string(char *a, int l)
+{ int i;
+
+ for (i = 0; i < l; i++)
+ printf("%c",ASCII[(int) a[i]]);
+}
+
+#endif
+
+static inline int hamming(char *a, char *b, int n)
+{ int h, i, x, y;
+
+ h = 0;
+ for (i = 0; i < n; i++)
+ { x = *a++;
+ if (x == 4)
+ break;
+ y = *b++;
+ if (x != y)
+ { if (y == 4)
+ break;
+ else
+ h += 1;
+ }
+ }
+ return (h);
+}
+
+static inline int snake(char *a, char *b)
+{ int i, x;
+
+ for (i = 0; 1; i++)
+ { x = *a++;
+ if (x == 4)
+ break;
+ if (x != *b++)
+ break;
+ }
+ return (i);
+}
+
+static inline int rsnake(char *a, char *b)
+{ int i, x;
+
+ for (i = 0; 1; i++)
+ { x = *--a;
+ if (x == 4)
+ break;
+ if (x != *--b)
+ break;
+ }
+ return (i);
+}
+
+#ifdef BOX_STATS
+
+static int MaxBxArea;
+static int MaxBxWidth;
+static int MaxBxHeight;
+static int64 SumBx;
+static int NumBx;
+static int BxHist[101];
+static int BxExtend;
+static int BxGaps;
+
+void BeginBoxStats()
+{ int i;
+
+ MaxBxArea = 0;
+ MaxBxWidth = 0;
+ MaxBxHeight = 0;
+ SumBx = 0;
+ NumBx = 0;
+ for (i = 0; i <= 100; i++)
+ BxHist[i] = 0;
+ BxExtend = 0;
+ BxGaps = 0;
+}
+
+void EndBoxStats()
+{ int i;
+
+ printf("\n# of Boxes = %d with average work %lld\n",NumBx,SumBx/NumBx);
+ printf("\nMax Work = %d\n",MaxBxArea);
+ printf("Max Diags = %d\n",MaxBxWidth);
+ printf("Max Waves = %d\n",MaxBxHeight);
+ printf("\nBox extended = %d\n",BxExtend);
+ printf("Gaps removed = %d\n",BxGaps);
+ printf("\nHistogram of box work:\n");
+ for (i = 0; i <= 100; i++)
+ if (BxHist[i] > 0)
+ printf(" %3d00: %10d\n",i,BxHist[i]);
+}
+
+#endif
+
+void Gap_Improver(Alignment *aln, Work_Data *ework)
+{ _Work_Data *work = (_Work_Data *) ework;
+ int *F, *H;
+ int *f, *h;
+
+ char *A, *B;
+ int x;
+ int p, q;
+ int d, m;
+ int *t, T;
+ int Fpos, Lpos, Fdag, Hamm, Gaps, Diag;
+ int passes;
+
+ A = aln->aseq-1;
+ B = aln->bseq-1;
+ t = (int *) aln->path->trace;
+ T = aln->path->tlen;
+ F = (int *) work->vector;
+
+ d = aln->path->abpos - aln->path->bbpos;
+ q = t[0];
+ x = 0;
+ while (x < T)
+ { p = q;
+ m = x;
+ Fdag = d;
+ Fpos = p;
+ Hamm = 0;
+ Gaps = 1;
+ while (1)
+ { x += 1;
+ q = 0;
+ if (x >= T || (q = t[x]) != p)
+ { m = x-m;
+ if (p < 0)
+ { d -= m;
+ if (q >= 0)
+ break;
+ if (p-q >= LONG_SNAKE)
+ break;
+ Hamm += hamming(A-p,B-(d+p),p-q);
+ }
+ else
+ { d += m;
+ if (q <= 0)
+ break;
+ if (q-p >= LONG_SNAKE)
+ break;
+ Hamm += hamming(A+(p+d),B+p,q-p);
+ }
+ Gaps += 1;
+ p = q;
+ m = x;
+ }
+ }
+ if (Gaps == 1)
+ continue;
+ Lpos = p;
+ Diag = abs(Fdag-d)+1;
+
+ // Process box
+
+ p = Diag*(Gaps+Hamm+1)*sizeof(int);
+ if (p > work->vecmax)
+ { if (enlarge_vector(work,p))
+ EXIT (1);
+ F = (int *) work->vector;
+ }
+ H = F + Diag;
+
+#ifdef BOX_STATS
+ { int hgt = Gaps+Hamm+1;
+ int area = Diag*hgt;
+ if (area > MaxBxArea)
+ MaxBxArea = area;
+ if (Diag > MaxBxWidth)
+ MaxBxWidth = Diag;
+ if (hgt > MaxBxHeight)
+ MaxBxHeight = hgt;
+ NumBx += 1;
+ SumBx += area;
+ if (area >= 10000)
+ BxHist[100] += 1;
+ else
+ BxHist[area/100] += 1;
+ }
+#endif
+#ifdef DEBUG_BOX
+ printf("Box: %5d :: %4d x %3d (%2d+%2d) :: %5d .. %5d %6d .. %6d\n",
+ (Gaps+Hamm+1)*Diag,abs(Fpos-Lpos),Diag,Hamm,Gaps,Fpos,Lpos,Fdag,d);
+ fflush(stdout);
+#endif
+
+ if (Fpos < 0)
+ { Fpos = -Fpos;
+ Lpos = -Lpos;
+
+ while (A[Fpos-1] != B[(Fpos-Fdag)-1] && A[Fpos-1] != 4 && B[(Fpos-Fdag)-1] != 4)
+ { Fpos -= 1;
+#ifdef BOX_STATS
+ BxExtend += 1;
+#endif
+ }
+ while (A[Lpos] != B[Lpos-d] && A[Lpos] != 4 && B[Lpos-d] != 4)
+ { Lpos += 1;
+#ifdef BOX_STATS
+ BxExtend += 1;
+#endif
+ }
+
+ f = F;
+ *f++ = p = Fpos + snake(A+Fpos,B+(Fpos-Fdag));
+ for (m = Fdag-1; m >= d; m--)
+ *f++ = Fpos-1;
+ passes = 0;
+
+#ifdef DEBUG_DP
+ printf(" %2d:",passes);
+ for (m = Fdag; m >= d; m--)
+ printf(" %d",F[Fdag-m]);
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ h = H;
+ p = Fpos;
+ while (p < Lpos)
+ { int b, c;
+
+ b = Fpos;
+ c = 0;
+ f = F;
+ for (m = Fdag; m >= d; m--)
+ { p = b;
+ if (*f >= b)
+ { b = *f;
+ c = 0;
+ p = b+1;
+ }
+ else
+ c += 1;
+ *h++ = c;
+ *f++ = p += snake(A+p,B+(p-m));
+ }
+ passes += 1;
+
+#ifdef DEBUG_DP
+ printf(" %2d:",passes);
+ for (m = Fdag; m >= d; m--)
+ printf(" %d(%2d)",F[Fdag-m],h[(d-m)-1]);
+ printf("\n");
+ fflush(stdout);
+#endif
+ }
+
+ if (passes < Gaps+Hamm)
+ { int y, k;
+
+ p = Lpos;
+ m = d;
+ y = x;
+#ifdef DEBUG_BACK
+ printf("Short cut %d\n",(Gaps+Hamm)-passes);
+ printf("Path (%d,%d)",p,m);
+#endif
+#ifdef BOX_STATS
+ BxGaps += (Gaps+Hamm)-passes;
+#endif
+ while (h > H)
+ { p -= rsnake(A+p,B+(p-m));
+ if (p < Fpos)
+ p = Fpos;
+ h -= Diag;
+ k = h[Fdag-m];
+ if (k == 0)
+ p -= 1;
+ else
+ { m += k;
+ for (; k > 0; k--)
+ t[--y] = -p;
+ }
+#ifdef DEBUG_BACK
+ printf(" (%d,%d)",p,m);
+#endif
+ }
+#ifdef DEBUG_BACK
+ printf("\n");
+#endif
+ }
+ }
+ else
+ { while (B[Fpos-1] != A[(Fpos+Fdag)-1] && B[Fpos-1] != 4 && A[(Fpos+Fdag)-1] != 4)
+ { Fpos -= 1;
+#ifdef BOX_STATS
+ BxExtend += 1;
+#endif
+ }
+ while (B[Lpos] != A[Lpos+d] && B[Lpos] != 4 && A[Lpos+d] != 4)
+ { Lpos += 1;
+#ifdef BOX_STATS
+ BxExtend += 1;
+#endif
+ }
+
+ f = F;
+ *f++ = p = Fpos + snake(A+(Fpos+Fdag),B+Fpos);
+ for (m = Fdag+1; m <= d; m++)
+ *f++ = Fpos-1;
+ passes = 0;
+
+#ifdef DEBUG_DP
+ printf(" %2d:",passes);
+ for (m = Fdag; m <= d; m++)
+ printf(" %d",F[m-Fdag]);
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ h = H;
+ p = Fpos;
+ while (p < Lpos)
+ { int b, c;
+
+ b = Fpos;
+ c = 0;
+ f = F;
+ for (m = Fdag; m <= d; m++)
+ { p = b;
+ if (*f >= b)
+ { b = *f;
+ c = 0;
+ p = b+1;
+ }
+ else
+ c += 1;
+ *h++ = c;
+ *f++ = p += snake(A+(m+p),B+p);
+ }
+ passes += 1;
+
+#ifdef DEBUG_DP
+ printf(" %2d:",passes);
+ for (m = Fdag; m <= d; m++)
+ printf(" %d(%2d)",F[m-Fdag],h[(m-d)-1]);
+ printf("\n");
+ fflush(stdout);
+#endif
+ }
+
+ if (passes < Gaps+Hamm)
+ { int y, k;
+
+ p = Lpos;
+ m = d;
+ y = x;
+#ifdef DEBUG_BACK
+ printf("Short cut %d\n",(Gaps+Hamm)-passes);
+ printf("Path (%d,%d)",p,m);
+#endif
+#ifdef BOX_STATS
+ BxGaps += (Gaps+Hamm)-passes;
+#endif
+ while (h > H)
+ { p -= rsnake(A+(p+m),B+p);
+ if (p < Fpos)
+ p = Fpos;
+ h -= Diag;
+ k = h[m-Fdag];
+ if (k == 0)
+ p -= 1;
+ else
+ { m -= k;
+ for (; k > 0; k--)
+ t[--y] = p;
+ }
+#ifdef DEBUG_BACK
+ printf(" (%d,%d)",p,m);
+#endif
+ }
+#ifdef DEBUG_BACK
+ printf("\n");
+#endif
+ }
+ }
+ }
+}
=====================================
align.h
=====================================
@@ -375,4 +375,12 @@ typedef struct {
int Check_Trace_Points(Overlap *ovl, int tspace, int verbose, char *fname);
+ /* Gap_Improver takes an alignment trace and improves it so the alignment has fewer, larger
+ gaps as if computed under an affine gap penalty. It should be called immediately after
+ Compute_Trace_(PTS|MID). The modified trace alignment is guaranteed to have the same
+ length as the input alignment.
+ */
+
+ void Gap_Improver(Alignment *align, Work_Data *work);
+
#endif // _A_MODULE
=====================================
daligner.c
=====================================
@@ -531,7 +531,7 @@ int main(int argc, char *argv[])
ARG_POSITIVE(HIT_MIN,"Hit threshold (in bp.s)")
break;
case 't':
- ARG_POSITIVE(MAX_REPS,"Tuple supression frequency")
+ ARG_POSITIVE(MAX_REPS,"Tuple suppression frequency")
break;
case 'H':
ARG_POSITIVE(HGAP_MIN,"HGAP threshold (in bp.s)")
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+daligner (1.0+git20240111.a9f62ac-1) unstable; urgency=medium
+
+ * New upstream version
+ * typos.patch: remove: applied upstream.
+
+ -- Étienne Mollier <emollier at debian.org> Sat, 13 Jan 2024 15:21:16 +0100
+
daligner (1.0+git20231217.518d4c2-1) unstable; urgency=medium
* New upstream version
=====================================
debian/patches/series
=====================================
@@ -3,4 +3,3 @@ lddflags.patch
destdir-install.patch
cross.patch
cppflags.patch
-typos.patch
=====================================
debian/patches/typos.patch deleted
=====================================
@@ -1,81 +0,0 @@
-Description: fix a couple of typos caught by lintian.
-Author: Étienne Mollier <emollier at debian.org>
-Forwarded: https://github.com/thegenemyers/DALIGNER/pull/95
-Last-Update: 2023-07-20
----
-This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
---- daligner.orig/DB.c
-+++ daligner/DB.c
-@@ -2208,7 +2208,7 @@
- }
- if (accum != extra->accum)
- { EPRINTF(EPLACE,
-- "%s: Reduction indicator of extra %s does not agree with previos .anno block files\n",
-+ "%s: Reduction indicator of extra %s does not agree with previous .anno block files\n",
- Prog_Name,name);
- goto error;
- }
-@@ -2829,7 +2829,7 @@
- first = last = -1;
- else
- { if (index(ppnt+1,BLOCK_SYMBOL) != NULL)
-- { EPRINTF(EPLACE,"%s: Two or more occurences of %c-sign in source name '%s'\n",
-+ { EPRINTF(EPLACE,"%s: Two or more occurrences of %c-sign in source name '%s'\n",
- Prog_Name,BLOCK_SYMBOL,root);
- goto error;
- }
---- daligner.orig/LAsplit.c
-+++ daligner/LAsplit.c
-@@ -105,7 +105,7 @@
- exit (1);
- }
- if (index(root2+1,BLOCK_SYMBOL) != NULL)
-- { fprintf(stderr,"%s: Two or more occurences of %c-sign in source name '%s'\n",
-+ { fprintf(stderr,"%s: Two or more occurrences of %c-sign in source name '%s'\n",
- Prog_Name,BLOCK_SYMBOL,root);
- exit (1);
- }
---- daligner.orig/lsd.sort.c
-+++ daligner/lsd.sort.c
-@@ -55,7 +55,7 @@
- int64 thresh[256]; // If check then multiple of LEX_zdiv to check for thread assignment
- int64 tptr[256]; // Finger for each 8-bit value
- int64 *sptr; // Conceptually [256][NTHREADS]. At end of sorting pass
-- } Lex_Arg; // sprtr[b][n] = # of occurences of value b in rangd of
-+ } Lex_Arg; // sprtr[b][n] = # of occurrences of value b in rangd of
- // thread n for the *next* pass
-
- // Threaded sorting pass
---- daligner.orig/daligner.c
-+++ daligner/daligner.c
-@@ -531,7 +531,7 @@
- ARG_POSITIVE(HIT_MIN,"Hit threshold (in bp.s)")
- break;
- case 't':
-- ARG_POSITIVE(MAX_REPS,"Tuple supression frequency")
-+ ARG_POSITIVE(MAX_REPS,"Tuple suppression frequency")
- break;
- case 'H':
- ARG_POSITIVE(HGAP_MIN,"HGAP threshold (in bp.s)")
---- daligner.orig/ONE2LA.c
-+++ daligner/ONE2LA.c
-@@ -152,7 +152,7 @@
- { t = oneReadLine(file1);
-
- if (t == 0)
-- { fprintf(stderr,"ONE2LA: Pile object not followed by sufficient auxilliary lines\n");
-+ { fprintf(stderr,"ONE2LA: Pile object not followed by sufficient auxiliary lines\n");
- exit (1);
- }
- if (has[t] > 0 && t != 'T' && t != 'Q')
---- daligner.orig/QV.c
-+++ daligner/QV.c
-@@ -1319,7 +1319,7 @@
- EXIT(NULL);
- }
-
-- // Free all the auxilliary storage associated with the encoding argument
-+ // Free all the auxiliary storage associated with the encoding argument
-
- void Free_QVcoding(QVcoding *coding)
- { if (coding->subChar >= 0)
=====================================
lsd.sort.c
=====================================
@@ -55,7 +55,7 @@ typedef struct
int64 thresh[256]; // If check then multiple of LEX_zdiv to check for thread assignment
int64 tptr[256]; // Finger for each 8-bit value
int64 *sptr; // Conceptually [256][NTHREADS]. At end of sorting pass
- } Lex_Arg; // sprtr[b][n] = # of occurences of value b in rangd of
+ } Lex_Arg; // sprtr[b][n] = # of occurrences of value b in rangd of
// thread n for the *next* pass
// Threaded sorting pass
View it on GitLab: https://salsa.debian.org/med-team/daligner/-/compare/518386eef4e6d13464270b59d0fc5839676e0860...025a52ba9f23d50613384c17cdc106d4e750c5a6
--
View it on GitLab: https://salsa.debian.org/med-team/daligner/-/compare/518386eef4e6d13464270b59d0fc5839676e0860...025a52ba9f23d50613384c17cdc106d4e750c5a6
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240113/96b98972/attachment-0001.htm>
More information about the debian-med-commit
mailing list