[med-svn] [Git][med-team/kalign][master] 5 commits: New upstream version 3.3.5

Andreas Tille (@tille) gitlab at salsa.debian.org
Mon Feb 6 06:45:59 GMT 2023



Andreas Tille pushed to branch master at Debian Med / kalign


Commits:
10995494 by Andreas Tille at 2023-02-06T07:34:54+01:00
New upstream version 3.3.5
- - - - -
723726b8 by Andreas Tille at 2023-02-06T07:34:54+01:00
routine-update: New upstream version

- - - - -
2347e808 by Andreas Tille at 2023-02-06T07:34:57+01:00
Update upstream source from tag 'upstream/3.3.5'

Update to upstream version '3.3.5'
with Debian dir ac031a5f1c68d30d4b189eeb50b6382365cca143
- - - - -
42277699 by Andreas Tille at 2023-02-06T07:34:57+01:00
routine-update: Standards-Version: 4.6.2

- - - - -
b732e619 by Andreas Tille at 2023-02-06T07:37:13+01:00
routine-update: Ready to upload to unstable

- - - - -


16 changed files:

- CMakeLists.txt
- ChangeLog
- README.md
- debian/changelog
- debian/control
- lib/CMakeLists.txt
- lib/src/aln_run.c
- lib/src/aln_wrap.c
- lib/src/bisectingKmeans.c
- lib/src/msa_alloc.c
- lib/src/msa_check.c
- lib/src/msa_check.h
- lib/src/msa_op.c
- lib/src/task.c
- lib/src/task.h
- src/run_kalign.c


Changes:

=====================================
CMakeLists.txt
=====================================
@@ -13,7 +13,7 @@ include(GenerateExportHeader)
 
 set(KALIGN_LIBRARY_VERSION_MAJOR 3)
 set(KALIGN_LIBRARY_VERSION_MINOR 3)
-set(KALIGN_LIBRARY_VERSION_PATCH 4)
+set(KALIGN_LIBRARY_VERSION_PATCH 5)
 set(KALIGN_LIBRARY_VERSION_STRING ${KALIGN_LIBRARY_VERSION_MAJOR}.${KALIGN_LIBRARY_VERSION_MINOR}.${KALIGN_LIBRARY_VERSION_PATCH})
 
 


=====================================
ChangeLog
=====================================
@@ -1,3 +1,8 @@
+2022-11-05  Timo Lassmann  <timo.lassmann at telethonkids.org.au>
+
+	* version 3.3.5
+	- Added a check to find and remove sequences of length 0. 
+
 2022-10-28  Timo Lassmann  <timo.lassmann at telethonkids.org.au>
 
 	* version 3.3.4 - Cmake and more


=====================================
README.md
=====================================
@@ -1,4 +1,4 @@
-![C/C++ CI](https://github.com/TimoLassmann/kalign/workflows/C/C++%20CI/badge.svg)
+<!-- ![C/C++ CI](https://github.com/TimoLassmann/kalign/workflows/C/C++%20CI/badge.svg) -->
 [![CMake](https://github.com/TimoLassmann/kalign/actions/workflows/cmake.yml/badge.svg)](https://github.com/TimoLassmann/kalign/actions/workflows/cmake.yml)
 ![CodeQL](https://github.com/TimoLassmann/kalign/workflows/CodeQL/badge.svg)
 
@@ -29,6 +29,7 @@ on macOS, install [brew](https://brew.sh/) then:
 brew install cmake 
 git clone https://github.com/TimoLassmann/kalign.git
 cd kalign
+mkdir build
 cd build 
 cmake ..
 make 
@@ -139,19 +140,11 @@ Here are some benchmark results. The code to reproduce these figures can be foun
 
 ## Balibase
 
-![Balibase_scores](https://user-images.githubusercontent.com/8110320/66697423-7ea3d000-eca3-11e9-919a-995ca8e9f7c1.jpeg)
+![Balibase_scores](https://user-images.githubusercontent.com/8110320/198513840-0e08a634-bb41-4826-bd58-7fc66eae1054.jpeg)
 
 ## Bralibase
 
-![Bralibase_scores](https://user-images.githubusercontent.com/8110320/66697424-86637480-eca3-11e9-90ea-238f82b0ac6b.jpeg)
-
-## Homfam
-
-![Homfam_scores](https://user-images.githubusercontent.com/8110320/66697425-895e6500-eca3-11e9-97e7-63f3a79133cf.jpeg)
-
-## Quantest2
-
-![Quantest2_scores](https://user-images.githubusercontent.com/8110320/66698153-6c2c9500-eca9-11e9-904c-3d6ea9a1c44d.jpeg)
+![Bralibase_scores](https://user-images.githubusercontent.com/8110320/198513850-00e5037f-355f-45ec-828f-ed8d47497272.jpeg)
 
 # Please cite:
 1. Lassmann, Timo. _Kalign 3: multiple sequence alignment of large data sets._ **Bioinformatics** (2019). [pdf](https://academic.oup.com/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btz795/30314127/btz795.pdf)


=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+kalign (1:3.3.5-1) unstable; urgency=medium
+
+  * New upstream version
+  * Standards-Version: 4.6.2 (routine-update)
+
+ -- Andreas Tille <tille at debian.org>  Mon, 06 Feb 2023 07:35:19 +0100
+
 kalign (1:3.3.4-3) unstable; urgency=medium
 
   * Team Upload.


=====================================
debian/control
=====================================
@@ -4,7 +4,7 @@ Uploaders: Charles Plessy <plessy at debian.org>, Andreas Tille <tille at debian.org>
 Section: science
 Priority: optional
 Build-Depends: debhelper-compat (= 13), libsimde-dev, cmake
-Standards-Version: 4.6.1
+Standards-Version: 4.6.2
 Vcs-Browser: https://salsa.debian.org/med-team/kalign
 Vcs-Git: https://salsa.debian.org/med-team/kalign.git
 Homepage: https://msa.sbc.su.se/


=====================================
lib/CMakeLists.txt
=====================================
@@ -256,3 +256,19 @@ add_test(
   NAME edist_utest
   COMMAND edist_utest
   )
+
+
+add_executable(task_utest
+  src/tldevel.c
+  src/tlrng.c
+  src/task.c  
+  )
+
+
+target_link_libraries(task_utest PRIVATE m)
+set_target_properties(task_utest PROPERTIES COMPILE_FLAGS "-DTASKWRITETEST")
+add_test(
+  NAME task_utest
+  COMMAND task_utest
+  )
+ 


=====================================
lib/src/aln_run.c
=====================================
@@ -46,6 +46,9 @@ int create_msa_tree(struct msa* msa, struct aln_param* ap,struct aln_tasks* t)
         for(i = msa->numseq; i < msa->num_profiles;i++){
                 active[i] = 0;
         }
+        /* LOG_MSG("Setting threads to 1 for debugging!"); */
+        /* ap->nthreads = 1; */
+
 #ifdef HAVE_OPENMP
         if(ap->nthreads == 1){
                 recursive_aln_serial(msa, t, ap, active, t->n_tasks-1);
@@ -407,6 +410,8 @@ void recursive_aln_openMP(struct msa* msa, struct aln_tasks*t, struct aln_param*
 
         a = local_t->a - msa->numseq;
         b = local_t->b - msa->numseq;
+
+        /* LOG_MSG("Aligning %d %d", a,b); */
 /* #ifdef HAVE_OPENMP */
 /* #pragma omp parallel num_threads(2) */
 /*         { */
@@ -439,8 +444,19 @@ void recursive_aln_openMP(struct msa* msa, struct aln_tasks*t, struct aln_param*
         ml->mode = ALN_MODE_FULL;
 
         /* if(active[local_t->a] && active[local_t->b]){ */
-        /* fprintf(stdout,"THREAD: %d %3d %3d -> %3d (p: %d)\n",tid, t->list[c]->a, t->list[c]->b, t->list[c]->c, t->list[c]->p); */
+        /* fprintf(stdout,"THREAD:  %3d %3d -> %3d (p: %d)\n", t->list[c]->a, t->list[c]->b, t->list[c]->c, t->list[c]->p); */
+
+/* #ifdef HAVE_OPENMP */
+/* #pragma omp critical */
+/*         { */
+/*                 int thread_num = omp_get_thread_num(); */
+/*                 LOG_MSG("Thread %d working on %d %d",thread_num,local_t->a, local_t->b); */
+/*         } */
+/* #endif */
         do_align(msa,t,ml,c);
+/* #ifdef HAVE_OPENMP */
+/* #pragma omp critical */
+/* #endif */
         active[local_t->b] = 0;
 
         free_aln_mem(ml);
@@ -660,6 +676,20 @@ int do_align(struct msa* msa,struct aln_tasks* t,struct aln_mem* m, int task_id)
                 RUN(set_gap_penalties_n(t->profile[b],m->len_b,msa->nsip[a]));
         }
 
+
+        if(m->len_a == 0 || m->len_b == 0){
+                LOG_MSG("Doalign :  LEN: %d %d     Targets are: %d %d -> %d nsip: %d %d ",m->len_a,m->len_b,a,b,c,msa->nsip[a],msa->nsip[b]  );
+                if(msa->nsip[a] == 1){
+                        LOG_MSG("%s",msa->sequences[a]->name);
+                }
+                if(msa->nsip[b] == 1){
+                        LOG_MSG("%s",msa->sequences[b]->name);
+                }
+
+
+                ERROR_MSG("Oh no!");
+        }
+
         RUN(init_alnmem(m));
 
         m->mode = ALN_MODE_FULL;
@@ -882,7 +912,7 @@ int do_align_serial(struct msa* msa,struct aln_tasks* t,struct aln_mem* m, int t
         }
 
         RUN(add_gap_info_to_path_n(m)) ;
-
+        LOG_MSG("Aligned %d and %d (len %d %d) -> path is of length: %d",a,b, m->len_a,m->len_b, 64*(m->path[0]+2));
         MMALLOC(tmp,sizeof(float)*64*(m->path[0]+2));
 
         /* LOG_MSG("%d TASK ID", task_id); */


=====================================
lib/src/aln_wrap.c
=====================================
@@ -1,9 +1,11 @@
 #include "tldevel.h"
+#include "tlmisc.h"
 #include "esl_stopwatch.h"
 #include "task.h"
 #include "msa_struct.h"
 #include "msa_op.h"
 #include "msa_alloc.h"
+#include "msa_check.h"
 #include "alphabet.h"
 #include "bisectingKmeans.h"
 
@@ -44,6 +46,7 @@ int kalign_run(struct msa *msa, int n_threads, int type, float gpo, float gpe, f
         struct aln_tasks* tasks = NULL;
         struct aln_param* ap = NULL;
 
+        RUN(kalign_essential_input_check(msa, 0));
         /* If already aligned unalign ! */
         if(msa->aligned != ALN_STATUS_UNALIGNED){
                 RUN(dealign_msa(msa));
@@ -61,6 +64,12 @@ int kalign_run(struct msa *msa, int n_threads, int type, float gpo, float gpe, f
         }
         /* -LOG_MSG("L: %d  threads: %d",msa->L, n_threads); */
         /* Start the heavy lifting  */
+
+        /* if(my_file_exists("tasklist.txt")){ */
+        /*         LOG_MSG("Found task list"); */
+        /*         read_tasks(&tasks , "tasklist.txt"); */
+        /* }else{ */
+
         RUN(alloc_tasks(&tasks, msa->numseq));
 
 #ifdef HAVE_OPENMP
@@ -75,6 +84,8 @@ int kalign_run(struct msa *msa, int n_threads, int type, float gpo, float gpe, f
                 RUN(convert_msa_to_internal(msa, ALPHA_ambigiousPROTEIN));
         }
 
+/* write_tasks(tasks, "tasklist.txt"); */
+/*         } */
 
         /* LOG_MSG("L: %d",msa->L); */
         /* align  */


=====================================
lib/src/bisectingKmeans.c
=====================================
@@ -86,8 +86,6 @@ int build_tree_kmeans(struct msa* msa, int n_threads, struct aln_tasks** tasks)
         int i;
 
         ASSERT(msa != NULL, "No alignment.");
-        //ASSERT(param != NULL, "No input parameters.");
-        /* ASSERT(ap != NULL, "No alignment parameters."); */
 
         t = *tasks;
         if(!t){
@@ -109,19 +107,12 @@ int build_tree_kmeans(struct msa* msa, int n_threads, struct aln_tasks** tasks)
         if(!msa->quiet){
                 GET_TIMING(timer);
         }
-
-        //LOG_MSG("Done in %f sec.", GET_TIMING(timer));
-
         MFREE(anchors);
 
         MMALLOC(samples, sizeof(int)* numseq);
         for(i = 0; i < numseq;i++){
                 samples[i] = i;
         }
-        /* if(!msa->quiet){ */
-        /*         LOG_MSG("%d anchors ", num_anchors); */
-        /* } */
-        //RUNP(root = alloc_node());
 
         START_TIMER(timer);
         if(!msa->quiet){
@@ -146,13 +137,6 @@ int build_tree_kmeans(struct msa* msa, int n_threads, struct aln_tasks** tasks)
 
         create_tasks(root, t);
 
-
-        /* exit(0); */
-        /*ap->tree[0] = 1;
-          ap->tree = readbitree(root, ap->tree);
-          for (i = 0; i < (numseq*3);i++){
-          tree[i] = tree[i+1];
-          }*/
         MFREE(root);
         for(i =0 ; i < msa->numseq;i++){
 #ifdef HAVE_AVX2


=====================================
lib/src/msa_alloc.c
=====================================
@@ -66,7 +66,9 @@ void kalign_free_msa(struct msa* msa)
         int i;
         if(msa){
                 for(i = 0; i < msa->alloc_numseq;i++){
-                        free_msa_seq(msa->sequences[i]);
+                        if(msa->sequences[i]){
+                                free_msa_seq(msa->sequences[i]);
+                        }
                 }
 
                 for (i = msa->num_profiles;i--;){


=====================================
lib/src/msa_check.c
=====================================
@@ -20,6 +20,8 @@ static int sort_by_name(const void *a, const void *b);
 static int sort_by_chksum(const void *a, const void *b);
 static int sort_by_both(const void *a, const void *b);
 
+int sort_seq_by_len(const void *a, const void *b);
+
 int kalign_sort_msa(struct msa *msa)
 {
         struct sort_struct_name_chksum** a = NULL;
@@ -56,6 +58,83 @@ ERROR:
         return FAIL;
 }
 
+
+int kalign_essential_input_check(struct msa *msa, int exit_on_error)
+{
+        int problem_len0 = 0;
+        ASSERT(msa != NULL, "No alignment");
+
+        ASSERT(msa->numseq > 1,"only %d sequences found.", msa->numseq);
+        for(int i = 0; i < msa->numseq;i++){
+                if(msa->sequences[i]->len == 0){
+                        if(!msa->quiet){
+                                WARNING_MSG("No sequence found for sequence %s ",msa->sequences[i]->name);
+                        }
+                        problem_len0++;
+                }
+        }
+
+        if(!exit_on_error){
+                /* Here we attempt to fix the zero length problem  */
+                if(problem_len0){
+
+
+                        if(problem_len0 == 1){
+                                if(!msa->quiet){
+                                        LOG_MSG("Removing %d sequence with a length of 0.", problem_len0);
+                                }
+                        }else{
+                                if(!msa->quiet){
+                                        LOG_MSG("Removing %d sequences with a length of 0.",problem_len0);
+                                }
+                        }
+
+                        struct msa_seq** tmp = NULL;
+                        MMALLOC(tmp, sizeof(struct msa_seq* )  * msa->alloc_numseq);
+                        int c = 0;
+                        int e = msa->numseq-1;
+
+                        for(int i = 0 ; i < msa->numseq;i++){
+                                if(msa->sequences[i]->len){
+                                        tmp[c] = msa->sequences[i];
+                                        c++;
+                                }else{
+                                        tmp[e] = msa->sequences[i];
+                                        e--;
+                                }
+                        }
+                        for(int i = msa->numseq; i < msa->alloc_numseq;i++){
+                                 tmp[i] = NULL;
+                        }
+
+                        MFREE(msa->sequences);
+                        msa->sequences = tmp;
+                        /* for(int i = msa->numseq-500; i < msa->numseq;i++){ */
+                                  /* LOG_MSG("%d\t%s", msa->sequences[i]->len,msa->sequences[i]->name); */
+                        /* } */
+                        /* LOG_MSG("%d %d %d ", msa->numseq, msa->numseq -c , problem_len0); */
+                        /* qsort(msa->sequences, msa->numseq, sizeof(struct msa_seq*),sort_seq_by_len); */
+                        /* int c = 0; */
+                        /* for(int i = msa->numseq-1;i >= 0;i--){ */
+                        /*         if(msa->sequences[i]->len != 0){ */
+                        /*                 c = i; */
+                        /*                 break; */
+                        /*         } */
+                        /* } */
+                        /* c++; */
+                        msa->numseq = c;
+                        ASSERT(msa->numseq > 1,"only %d sequences found.", msa->numseq);
+                        /* exit(0); */
+                }
+        }else{
+                ERROR_MSG("%d sequences found with length 0.", problem_len0);
+        }
+
+        return OK;
+ERROR:
+        return FAIL;
+}
+
 int kalign_check_msa(struct msa* msa, int exit_on_error)
 {
         char* tmp_name = NULL;
@@ -189,6 +268,7 @@ int sort_by_name(const void *a, const void *b)
         }
 }
 
+
 int sort_by_chksum(const void *a, const void *b)
 {
         struct sort_struct_name_chksum* const *one = a;
@@ -201,6 +281,16 @@ int sort_by_chksum(const void *a, const void *b)
         }
 }
 
+int sort_seq_by_len(const void *a, const void *b)
+{
+        struct msa_seq* const *one = a;
+        struct msa_seq* const *two = b;
+        if((*one)->len > (*two)->len){
+                return -1;
+        }else{
+                return 1;
+        }
+}
 
 /* Taken from squid library by Sean Eddy  */
 int GCGchecksum(char *seq, int len)


=====================================
lib/src/msa_check.h
=====================================
@@ -12,6 +12,8 @@
 #endif
 
 struct msa;
+
+EXTERN int kalign_essential_input_check(struct msa *msa, int exit_on_error);
 EXTERN int kalign_check_msa(struct msa* msa, int exit_on_error);
 EXTERN int kalign_sort_msa(struct msa *msa);
 #undef MSA_CHECK_IMPORT


=====================================
lib/src/msa_op.c
=====================================
@@ -178,8 +178,10 @@ int detect_aligned(struct msa* msa)
                 l += msa->sequences[i]->len;
                 min_len = MACRO_MIN(min_len, l);
                 max_len = MACRO_MAX(max_len, l);
-                /* LOG_MSG("%d %d", max_len, min_len); */
+
         }
+        /* LOG_MSG("%d %d", max_len, min_len); */
+        /* exit(0); */
         if(gaps){
                 if(min_len == max_len){ /* sequences have gaps and total length is identical - clearly aligned  */
                         msa->aligned = ALN_STATUS_ALIGNED;


=====================================
lib/src/task.c
=====================================
@@ -6,6 +6,110 @@
 static int sort_tasks_by_priority(const void *a, const void *b);
 static int sort_tasks_by_c(const void *a, const void *b);
 
+#ifdef TASKWRITETEST
+#include "tlrng.h"
+int main(void)
+{
+        struct rng_state* rng = NULL;
+        struct aln_tasks *t = NULL;
+        rng = init_rng(0);
+
+        int n_tasks = 54;
+
+        alloc_tasks(&t, n_tasks);
+
+        for(int i = 0; i < n_tasks;i++){
+                t->list[i]->score = 0.0;
+                t->list[i]->a = tl_random_int(rng, 1000);
+                t->list[i]->b = tl_random_int(rng, 1000);
+                t->list[i]->c = tl_random_int(rng, 1000);
+                t->list[i]->p = tl_random_int(rng, 1000);
+                t->list[i]->n = tl_random_int(rng, 1000);
+                t->n_tasks++;
+        }
+
+
+        RUN(write_tasks(t, "task_write_test.txt"));
+
+
+        free_tasks(t);
+        t = 0;
+
+        RUN(read_tasks(&t, "task_write_test.txt" ));
+
+        for(int i = 0; i < t->n_tasks;i++){
+                struct task* a = t->list[i];
+                fprintf(stdout,"%d %d %d %d %d\n",a->a,a->b,a->c,a->p,a->n);
+        }
+
+
+        free_rng(rng);
+        return EXIT_SUCCESS;
+ERROR:
+        if(t){
+                free_tasks(t);
+        }
+        if(rng){
+                free_rng(rng);
+        }
+        return EXIT_FAILURE;
+}
+
+#endif
+
+int write_tasks(struct aln_tasks *t, char *filename)
+{
+
+        FILE* f_ptr = NULL;
+
+        RUNP(f_ptr = fopen(filename, "w"));
+
+        fprintf(f_ptr,"%d\n", t->n_tasks);
+
+        for(int i = 0; i < t->n_tasks;i++){
+                struct task* a = t->list[i];
+                fprintf(f_ptr,"%d,%d,%d,%d,%d\n",a->a,a->b,a->c,a->p,a->n);
+        }
+        fclose(f_ptr);
+        return OK;
+
+ERROR:
+        if(f_ptr){
+                fclose(f_ptr);
+        }
+        return FAIL;
+}
+
+int read_tasks(struct aln_tasks **tasks, char *filename)
+{
+        struct aln_tasks *t = NULL;
+
+        FILE* f_ptr = NULL;
+
+        RUNP(f_ptr = fopen(filename, "r"));
+        int n_tasks = 0;
+
+
+
+        fscanf( f_ptr, "%d", &n_tasks);
+
+        RUN(alloc_tasks(&t, n_tasks));
+        for(int i = 0; i < n_tasks;i++){
+                struct task* a = t->list[i];
+                fscanf(f_ptr,"%d,%d,%d,%d,%d\n",&a->a,&a->b,&a->c,&a->p,&a->n);
+                t->n_tasks++;
+        }
+        fclose(f_ptr);
+
+        *tasks = t;
+        return OK;
+ERROR:
+        if(f_ptr){
+                fclose(f_ptr);
+        }
+        return FAIL;
+}
+
 
 int sort_tasks(struct aln_tasks* t , int order)
 {
@@ -56,7 +160,6 @@ int sort_tasks_by_c(const void *a, const void *b)
         }
 }
 
-
 int alloc_tasks(struct aln_tasks** tasks,int numseq)
 {
         struct aln_tasks* t = NULL;
@@ -69,15 +172,11 @@ int alloc_tasks(struct aln_tasks** tasks,int numseq)
         t->n_alloc_tasks = numseq;
         t->list = NULL;
         t->profile = NULL;
-        /* t->map = NULL; */
+        np = (numseq << 1) - 1;
 
-        np =  (numseq << 1) - 1;
         MMALLOC(t->profile,sizeof(float*)*np);
-        /* MMALLOC(t->map,sizeof(int*)*np); */
-
         for(i = 0; i < np;i++){
                 t->profile[i] = NULL;
-                /* t->map[i] = NULL; */
         }
 
         MMALLOC(t->list, sizeof(struct task*) * t->n_alloc_tasks);


=====================================
lib/src/task.h
=====================================
@@ -36,6 +36,10 @@ EXTERN  int sort_tasks(struct aln_tasks* t , int order);
 EXTERN  int alloc_tasks(struct aln_tasks** tasks,int numseq);
 EXTERN void free_tasks(struct aln_tasks* tasks);
 
+EXTERN int write_tasks(struct aln_tasks *t, char *filename);
+EXTERN int read_tasks(struct aln_tasks **tasks,char* filename);
+
+
 #undef TASK_IMPORT
 #undef EXTERN
 


=====================================
src/run_kalign.c
=====================================
@@ -350,6 +350,8 @@ int run_kalign(struct parameters* param)
                 }
         }
 
+
+
         RUN(kalign_run(msa,
                        param->nthreads,
                        param->type,



View it on GitLab: https://salsa.debian.org/med-team/kalign/-/compare/0b7fbe376db77363bea9ac4635cc2d4cdc793b8a...b732e619a09bfee1a71f076839cf1f74d6504606

-- 
View it on GitLab: https://salsa.debian.org/med-team/kalign/-/compare/0b7fbe376db77363bea9ac4635cc2d4cdc793b8a...b732e619a09bfee1a71f076839cf1f74d6504606
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230206/55872c09/attachment-0001.htm>


More information about the debian-med-commit mailing list