[med-svn] [Git][med-team/daligner][master] 5 commits: New upstream version 1.0+git20240111.a9f62ac

Étienne Mollier (@emollier) gitlab at salsa.debian.org
Sat Jan 13 14:25:16 GMT 2024



Étienne Mollier pushed to branch master at Debian Med / daligner


Commits:
57049af7 by Étienne Mollier at 2024-01-13T15:13:43+01:00
New upstream version 1.0+git20240111.a9f62ac
- - - - -
33d9153b by Étienne Mollier at 2024-01-13T15:13:43+01:00
routine-update: New upstream version

- - - - -
6022084a by Étienne Mollier at 2024-01-13T15:13:45+01:00
Update upstream source from tag 'upstream/1.0+git20240111.a9f62ac'

Update to upstream version '1.0+git20240111.a9f62ac'
with Debian dir e21db6befef107a9565d84cba91a9802c0ccaedb
- - - - -
8c10c64b by Étienne Mollier at 2024-01-13T15:20:59+01:00
typos.patch: remove: applied upstream.

- - - - -
025a52ba by Étienne Mollier at 2024-01-13T15:24:23+01:00
ready to upload to unstable.

- - - - -


12 changed files:

- DB.c
- LAshow.c
- LAsplit.c
- ONE2LA.c
- QV.c
- align.c
- align.h
- daligner.c
- debian/changelog
- debian/patches/series
- − debian/patches/typos.patch
- lsd.sort.c


Changes:

=====================================
DB.c
=====================================
@@ -2208,7 +2208,7 @@ int Read_Extra(FILE *afile, char *aname, DAZZ_EXTRA *extra)
     }
   if (accum != extra->accum)
     { EPRINTF(EPLACE,
-           "%s: Reduction indicator of extra %s does not agree with previos .anno block files\n",
+           "%s: Reduction indicator of extra %s does not agree with previous .anno block files\n",
            Prog_Name,name);
       goto error;
     }
@@ -2829,7 +2829,7 @@ static Block_Looper *parse_block_arg(char *arg, int isDB)
     first = last = -1;
   else
     { if (index(ppnt+1,BLOCK_SYMBOL) != NULL)
-        { EPRINTF(EPLACE,"%s: Two or more occurences of %c-sign in source name '%s'\n",
+        { EPRINTF(EPLACE,"%s: Two or more occurrences of %c-sign in source name '%s'\n",
                          Prog_Name,BLOCK_SYMBOL,root);
           goto error;
         }


=====================================
LAshow.c
=====================================
@@ -18,6 +18,8 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#undef DEBUG_GAP_IMPROVER
+
 #include "DB.h"
 #include "align.h"
 
@@ -695,8 +697,10 @@ int main(int argc, char *argv[])
                   Compute_Trace_IRR(aln,work,GREEDIEST);
                 else
                   Compute_Trace_PTS(aln,work,tspace,GREEDIEST);
+                Gap_Improver(aln,work);
               }
 
+#ifdef DEBUG_GAP_IMPROVER
             aln->path->abpos += aoffs;
             aln->path->aepos += aoffs;
             aln->alen = alens;
@@ -716,6 +720,7 @@ int main(int argc, char *argv[])
                   else
                     trace[i] += boffs;
               }
+#endif
 
             if (CARTOON)
               Alignment_Cartoon(stdout,aln,INDENT,mx_wide);


=====================================
LAsplit.c
=====================================
@@ -105,7 +105,7 @@ int main(int argc, char *argv[])
       exit (1);
     }
   if (index(root2+1,BLOCK_SYMBOL) != NULL)
-    { fprintf(stderr,"%s: Two or more occurences of %c-sign in source name '%s'\n",
+    { fprintf(stderr,"%s: Two or more occurrences of %c-sign in source name '%s'\n",
                      Prog_Name,BLOCK_SYMBOL,root);
       exit (1);
     }


=====================================
ONE2LA.c
=====================================
@@ -152,7 +152,7 @@ int main(int argc, char *argv[])
         { t = oneReadLine(file1);
 
           if (t == 0)
-            { fprintf(stderr,"ONE2LA: Pile object not followed by sufficient auxilliary lines\n");
+            { fprintf(stderr,"ONE2LA: Pile object not followed by sufficient auxiliary lines\n");
               exit (1);
             }
           if (has[t] > 0 && t != 'T' && t != 'Q')


=====================================
QV.c
=====================================
@@ -1319,7 +1319,7 @@ error:
   EXIT(NULL);
 }
 
-  //  Free all the auxilliary storage associated with the encoding argument
+  //  Free all the auxiliary storage associated with the encoding argument
 
 void Free_QVcoding(QVcoding *coding)
 { if (coding->subChar >= 0)


=====================================
align.c
=====================================
@@ -173,7 +173,7 @@ void Free_Work_Data(Work_Data *ework)
 #define PATH_INT  0x0fffffffffffffffll   //  Must be PATH_TOP-1
 #define TRIM_MASK 0x7fff                 //  Must be (1 << TRIM_LEN) - 1
 #define TRIM_MLAG 250                    //  How far can last trim point be behind best point
-#define WAVE_LAG   30                    //  How far can worst point be behind the best point
+#define WAVE_LAG   60                    //  How far can worst point be behind the best point
 
 static double Bias_Factor[10] = { .690, .690, .690, .690, .780,
                                   .850, .900, .933, .966, 1.000 };
@@ -3379,7 +3379,9 @@ int Print_Alignment(FILE *file, Alignment *align, Work_Data *ework,
   blen = align->blen;
 
   Abuf[width] = Bbuf[width] = Dbuf[width] = '\0';
-                                           /* buffer/output next column */
+
+  // buffer/output next column
+
 #define COLUMN(x,y)							\
 { int u, v;								\
   if (o >= width)							\
@@ -5494,3 +5496,397 @@ int Compute_Trace_IRR(Alignment *align, Work_Data *ework, int mode)
 
   return (0);
 }
+
+#undef DEBUG_BOX
+#undef DEBUG_DP
+#undef DEBUG_BACK
+#undef BOX_STATS
+
+#define LONG_SNAKE 50
+
+#ifdef DEBUG
+
+static int ASCII[5] = { 'a', 'c', 'g', 't', '.' };
+
+static inline void print_string(char *a, int l)
+{ int i;
+
+  for (i = 0; i < l; i++)
+    printf("%c",ASCII[(int) a[i]]);
+}
+
+#endif
+
+static inline int hamming(char *a, char *b, int n)
+{ int h, i, x, y;
+
+  h = 0;
+  for (i = 0; i < n; i++)
+    { x = *a++;
+      if (x == 4)
+        break;
+      y = *b++;
+      if (x != y)
+        { if (y == 4)
+            break;
+          else
+            h += 1;
+        }
+    }
+  return (h);
+}
+
+static inline int snake(char *a, char *b)
+{ int i, x;
+
+  for (i = 0; 1; i++)
+    { x = *a++;
+      if (x == 4)
+        break;
+      if (x != *b++)
+        break;
+    }
+  return (i);
+}
+
+static inline int rsnake(char *a, char *b)
+{ int i, x;
+
+  for (i = 0; 1; i++)
+    { x = *--a;
+      if (x == 4)
+        break;
+      if (x != *--b)
+        break;
+    }
+  return (i);
+}
+
+#ifdef BOX_STATS
+
+static int   MaxBxArea;
+static int   MaxBxWidth;
+static int   MaxBxHeight;
+static int64 SumBx;
+static int   NumBx;
+static int   BxHist[101];
+static int   BxExtend;
+static int   BxGaps;
+
+void BeginBoxStats()
+{ int i;
+
+  MaxBxArea   = 0;
+  MaxBxWidth  = 0;
+  MaxBxHeight = 0;
+  SumBx       = 0;
+  NumBx       = 0;
+  for (i = 0; i <= 100; i++)
+    BxHist[i] = 0;
+  BxExtend = 0;
+  BxGaps   = 0;
+}
+
+void EndBoxStats()
+{ int i;
+
+  printf("\n# of Boxes = %d with average work %lld\n",NumBx,SumBx/NumBx);
+  printf("\nMax Work  = %d\n",MaxBxArea);
+  printf("Max Diags = %d\n",MaxBxWidth);
+  printf("Max Waves = %d\n",MaxBxHeight);
+  printf("\nBox extended = %d\n",BxExtend);
+  printf("Gaps removed = %d\n",BxGaps);
+  printf("\nHistogram of box work:\n");
+  for (i = 0; i <= 100; i++)
+    if (BxHist[i] > 0)
+      printf(" %3d00: %10d\n",i,BxHist[i]);
+}
+
+#endif
+
+void Gap_Improver(Alignment *aln, Work_Data *ework)
+{ _Work_Data *work = (_Work_Data *) ework;
+  int        *F, *H;
+  int        *f, *h;
+
+  char  *A, *B;
+  int    x;
+  int    p, q;
+  int    d, m;
+  int   *t, T;
+  int    Fpos, Lpos, Fdag, Hamm, Gaps, Diag;
+  int    passes;
+ 
+  A = aln->aseq-1;
+  B = aln->bseq-1;
+  t = (int *) aln->path->trace;
+  T = aln->path->tlen;
+  F = (int *) work->vector;
+
+  d = aln->path->abpos - aln->path->bbpos;
+  q = t[0];
+  x = 0;
+  while (x < T)
+    { p = q;
+      m = x;
+      Fdag = d;
+      Fpos = p;
+      Hamm = 0;
+      Gaps = 1;
+      while (1)
+        { x += 1;
+          q = 0;
+          if (x >= T || (q = t[x]) != p)
+            { m = x-m;
+              if (p < 0)
+                { d -= m;
+                  if (q >= 0)
+                    break;
+                  if (p-q >= LONG_SNAKE)
+                    break;
+                  Hamm += hamming(A-p,B-(d+p),p-q);
+                }
+              else
+                { d += m;
+                  if (q <= 0)
+                    break;
+                  if (q-p >= LONG_SNAKE)
+                    break;
+                  Hamm += hamming(A+(p+d),B+p,q-p);
+                }
+              Gaps += 1;
+              p = q;
+              m = x;
+            }
+        }
+      if (Gaps == 1)
+        continue;
+      Lpos = p;
+      Diag = abs(Fdag-d)+1;
+
+      // Process box
+
+      p = Diag*(Gaps+Hamm+1)*sizeof(int);
+      if (p > work->vecmax)
+        { if (enlarge_vector(work,p))
+            EXIT (1);
+          F = (int *) work->vector;
+        }
+      H = F + Diag;
+
+#ifdef BOX_STATS
+      { int hgt  = Gaps+Hamm+1;
+        int area = Diag*hgt;
+        if (area > MaxBxArea)
+          MaxBxArea = area;
+        if (Diag > MaxBxWidth)
+          MaxBxWidth = Diag;
+        if (hgt > MaxBxHeight)
+          MaxBxHeight = hgt;
+        NumBx += 1;
+        SumBx += area;
+        if (area >= 10000)
+          BxHist[100] += 1;
+        else
+          BxHist[area/100] += 1;
+      }
+#endif
+#ifdef DEBUG_BOX
+      printf("Box:  %5d :: %4d x %3d (%2d+%2d)  :: %5d .. %5d  %6d .. %6d\n",
+             (Gaps+Hamm+1)*Diag,abs(Fpos-Lpos),Diag,Hamm,Gaps,Fpos,Lpos,Fdag,d);
+      fflush(stdout);
+#endif
+
+      if (Fpos < 0)
+        { Fpos = -Fpos;
+          Lpos = -Lpos;
+
+          while (A[Fpos-1] != B[(Fpos-Fdag)-1] && A[Fpos-1] != 4 && B[(Fpos-Fdag)-1] != 4)
+            { Fpos -= 1;
+#ifdef BOX_STATS
+              BxExtend += 1;
+#endif
+            }
+          while (A[Lpos] != B[Lpos-d] && A[Lpos] != 4 && B[Lpos-d] != 4)
+            { Lpos += 1;
+#ifdef BOX_STATS
+              BxExtend += 1;
+#endif
+            }
+
+          f = F;
+          *f++ = p = Fpos + snake(A+Fpos,B+(Fpos-Fdag));
+          for (m = Fdag-1; m >= d; m--)
+            *f++ = Fpos-1;
+          passes = 0;
+
+#ifdef DEBUG_DP
+          printf(" %2d:",passes);
+          for (m = Fdag; m >= d; m--)
+            printf(" %d",F[Fdag-m]);
+          printf("\n");
+          fflush(stdout);
+#endif
+
+          h = H;
+          p = Fpos;
+          while (p < Lpos)
+            { int b, c;
+
+              b = Fpos;
+              c = 0;
+              f = F;
+              for (m = Fdag; m >= d; m--)
+                { p = b;
+                  if (*f >= b)
+                    { b = *f;
+                      c = 0;
+                      p = b+1;
+                    }
+                  else
+                    c += 1;
+                  *h++ = c;
+                  *f++ = p += snake(A+p,B+(p-m));
+                }
+              passes += 1;
+
+#ifdef DEBUG_DP
+              printf(" %2d:",passes);
+              for (m = Fdag; m >= d; m--)
+                printf(" %d(%2d)",F[Fdag-m],h[(d-m)-1]);
+              printf("\n");
+              fflush(stdout);
+#endif
+            }
+
+          if (passes < Gaps+Hamm)
+            { int y, k;
+
+              p = Lpos;
+              m = d;
+              y = x;
+#ifdef DEBUG_BACK
+              printf("Short cut %d\n",(Gaps+Hamm)-passes);
+              printf("Path (%d,%d)",p,m);
+#endif
+#ifdef BOX_STATS
+              BxGaps += (Gaps+Hamm)-passes;
+#endif
+              while (h > H)
+                { p -= rsnake(A+p,B+(p-m));
+                  if (p < Fpos)
+                    p = Fpos;
+                  h -= Diag;
+                  k = h[Fdag-m];
+                  if (k == 0)
+                    p -= 1;
+                  else
+                    { m += k;
+                      for (; k > 0; k--)
+                        t[--y] = -p;
+                    }
+#ifdef DEBUG_BACK
+                  printf(" (%d,%d)",p,m);
+#endif
+                }
+#ifdef DEBUG_BACK
+              printf("\n");
+#endif
+            }
+        }
+      else
+        { while (B[Fpos-1] != A[(Fpos+Fdag)-1] && B[Fpos-1] != 4 && A[(Fpos+Fdag)-1] != 4)
+            { Fpos -= 1;
+#ifdef BOX_STATS
+              BxExtend += 1;
+#endif
+            }
+          while (B[Lpos] != A[Lpos+d] && B[Lpos] != 4 && A[Lpos+d] != 4)
+            { Lpos += 1;
+#ifdef BOX_STATS
+              BxExtend += 1;
+#endif
+            }
+
+          f = F;
+          *f++ = p = Fpos + snake(A+(Fpos+Fdag),B+Fpos);
+          for (m = Fdag+1; m <= d; m++)
+            *f++ = Fpos-1;
+          passes = 0;
+
+#ifdef DEBUG_DP
+          printf(" %2d:",passes);
+          for (m = Fdag; m <= d; m++) 
+            printf(" %d",F[m-Fdag]);
+          printf("\n");
+          fflush(stdout);
+#endif
+
+          h = H;
+          p = Fpos;
+          while (p < Lpos)
+            { int b, c;
+
+              b = Fpos;
+              c = 0;
+              f = F;
+              for (m = Fdag; m <= d; m++) 
+                { p = b;
+                  if (*f >= b)
+                    { b = *f;
+                      c = 0;
+                      p = b+1;
+                    }
+                  else
+                    c += 1;
+                  *h++ = c; 
+                  *f++ = p += snake(A+(m+p),B+p);
+                }
+              passes += 1;
+
+#ifdef DEBUG_DP
+              printf(" %2d:",passes);
+              for (m = Fdag; m <= d; m++) 
+                printf(" %d(%2d)",F[m-Fdag],h[(m-d)-1]);
+              printf("\n");
+              fflush(stdout);
+#endif
+            }
+
+          if (passes < Gaps+Hamm)
+            { int y, k;
+
+              p = Lpos;
+              m = d;
+              y = x;
+#ifdef DEBUG_BACK
+              printf("Short cut %d\n",(Gaps+Hamm)-passes);
+              printf("Path (%d,%d)",p,m);
+#endif
+#ifdef BOX_STATS
+              BxGaps += (Gaps+Hamm)-passes;
+#endif
+              while (h > H)
+                { p -= rsnake(A+(p+m),B+p);
+                  if (p < Fpos)
+                    p = Fpos;
+                  h -= Diag;
+                  k = h[m-Fdag];
+                  if (k == 0)
+                    p -= 1;
+                  else
+                    { m -= k;
+                      for (; k > 0; k--)
+                        t[--y] = p;
+                    }
+#ifdef DEBUG_BACK
+                  printf(" (%d,%d)",p,m);
+#endif
+                }
+#ifdef DEBUG_BACK
+              printf("\n");
+#endif
+            }
+        }
+    }
+}


=====================================
align.h
=====================================
@@ -375,4 +375,12 @@ typedef struct {
 
   int  Check_Trace_Points(Overlap *ovl, int tspace, int verbose, char *fname);
 
+  /* Gap_Improver takes an alignment trace and improves it so the alignment has fewer, larger
+     gaps as if computed under an affine gap penalty.  It should be called immediately after
+     Compute_Trace_(PTS|MID).  The modified trace alignment is guaranteed to have the same
+     length as the input alignment.
+  */
+
+  void Gap_Improver(Alignment *align, Work_Data *work);
+
 #endif // _A_MODULE


=====================================
daligner.c
=====================================
@@ -531,7 +531,7 @@ int main(int argc, char *argv[])
             ARG_POSITIVE(HIT_MIN,"Hit threshold (in bp.s)")
             break;
           case 't':
-            ARG_POSITIVE(MAX_REPS,"Tuple supression frequency")
+            ARG_POSITIVE(MAX_REPS,"Tuple suppression frequency")
             break;
           case 'H':
             ARG_POSITIVE(HGAP_MIN,"HGAP threshold (in bp.s)")


=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+daligner (1.0+git20240111.a9f62ac-1) unstable; urgency=medium
+
+  * New upstream version
+  * typos.patch: remove: applied upstream.
+
+ -- Étienne Mollier <emollier at debian.org>  Sat, 13 Jan 2024 15:21:16 +0100
+
 daligner (1.0+git20231217.518d4c2-1) unstable; urgency=medium
 
   * New upstream version


=====================================
debian/patches/series
=====================================
@@ -3,4 +3,3 @@ lddflags.patch
 destdir-install.patch
 cross.patch
 cppflags.patch
-typos.patch


=====================================
debian/patches/typos.patch deleted
=====================================
@@ -1,81 +0,0 @@
-Description: fix a couple of typos caught by lintian.
-Author: Étienne Mollier <emollier at debian.org>
-Forwarded: https://github.com/thegenemyers/DALIGNER/pull/95
-Last-Update: 2023-07-20
----
-This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
---- daligner.orig/DB.c
-+++ daligner/DB.c
-@@ -2208,7 +2208,7 @@
-     }
-   if (accum != extra->accum)
-     { EPRINTF(EPLACE,
--           "%s: Reduction indicator of extra %s does not agree with previos .anno block files\n",
-+           "%s: Reduction indicator of extra %s does not agree with previous .anno block files\n",
-            Prog_Name,name);
-       goto error;
-     }
-@@ -2829,7 +2829,7 @@
-     first = last = -1;
-   else
-     { if (index(ppnt+1,BLOCK_SYMBOL) != NULL)
--        { EPRINTF(EPLACE,"%s: Two or more occurences of %c-sign in source name '%s'\n",
-+        { EPRINTF(EPLACE,"%s: Two or more occurrences of %c-sign in source name '%s'\n",
-                          Prog_Name,BLOCK_SYMBOL,root);
-           goto error;
-         }
---- daligner.orig/LAsplit.c
-+++ daligner/LAsplit.c
-@@ -105,7 +105,7 @@
-       exit (1);
-     }
-   if (index(root2+1,BLOCK_SYMBOL) != NULL)
--    { fprintf(stderr,"%s: Two or more occurences of %c-sign in source name '%s'\n",
-+    { fprintf(stderr,"%s: Two or more occurrences of %c-sign in source name '%s'\n",
-                      Prog_Name,BLOCK_SYMBOL,root);
-       exit (1);
-     }
---- daligner.orig/lsd.sort.c
-+++ daligner/lsd.sort.c
-@@ -55,7 +55,7 @@
-     int64  thresh[256];   //  If check then multiple of LEX_zdiv to check for thread assignment
-     int64  tptr[256];     //  Finger for each 8-bit value
-     int64 *sptr;          //  Conceptually [256][NTHREADS].  At end of sorting pass
--  } Lex_Arg;              //    sprtr[b][n] = # of occurences of value b in rangd of
-+  } Lex_Arg;              //    sprtr[b][n] = # of occurrences of value b in rangd of
-                           //    thread n for the *next* pass
- 
- //  Threaded sorting pass
---- daligner.orig/daligner.c
-+++ daligner/daligner.c
-@@ -531,7 +531,7 @@
-             ARG_POSITIVE(HIT_MIN,"Hit threshold (in bp.s)")
-             break;
-           case 't':
--            ARG_POSITIVE(MAX_REPS,"Tuple supression frequency")
-+            ARG_POSITIVE(MAX_REPS,"Tuple suppression frequency")
-             break;
-           case 'H':
-             ARG_POSITIVE(HGAP_MIN,"HGAP threshold (in bp.s)")
---- daligner.orig/ONE2LA.c
-+++ daligner/ONE2LA.c
-@@ -152,7 +152,7 @@
-         { t = oneReadLine(file1);
- 
-           if (t == 0)
--            { fprintf(stderr,"ONE2LA: Pile object not followed by sufficient auxilliary lines\n");
-+            { fprintf(stderr,"ONE2LA: Pile object not followed by sufficient auxiliary lines\n");
-               exit (1);
-             }
-           if (has[t] > 0 && t != 'T' && t != 'Q')
---- daligner.orig/QV.c
-+++ daligner/QV.c
-@@ -1319,7 +1319,7 @@
-   EXIT(NULL);
- }
- 
--  //  Free all the auxilliary storage associated with the encoding argument
-+  //  Free all the auxiliary storage associated with the encoding argument
- 
- void Free_QVcoding(QVcoding *coding)
- { if (coding->subChar >= 0)


=====================================
lsd.sort.c
=====================================
@@ -55,7 +55,7 @@ typedef struct
     int64  thresh[256];   //  If check then multiple of LEX_zdiv to check for thread assignment
     int64  tptr[256];     //  Finger for each 8-bit value
     int64 *sptr;          //  Conceptually [256][NTHREADS].  At end of sorting pass
-  } Lex_Arg;              //    sprtr[b][n] = # of occurences of value b in rangd of
+  } Lex_Arg;              //    sprtr[b][n] = # of occurrences of value b in rangd of
                           //    thread n for the *next* pass
 
 //  Threaded sorting pass



View it on GitLab: https://salsa.debian.org/med-team/daligner/-/compare/518386eef4e6d13464270b59d0fc5839676e0860...025a52ba9f23d50613384c17cdc106d4e750c5a6

-- 
View it on GitLab: https://salsa.debian.org/med-team/daligner/-/compare/518386eef4e6d13464270b59d0fc5839676e0860...025a52ba9f23d50613384c17cdc106d4e750c5a6
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240113/96b98972/attachment-0001.htm>


More information about the debian-med-commit mailing list