[med-svn] [Git][med-team/kalign][master] 5 commits: New upstream version 3.3.5
Andreas Tille (@tille)
gitlab at salsa.debian.org
Mon Feb 6 06:45:59 GMT 2023
Andreas Tille pushed to branch master at Debian Med / kalign
Commits:
10995494 by Andreas Tille at 2023-02-06T07:34:54+01:00
New upstream version 3.3.5
- - - - -
723726b8 by Andreas Tille at 2023-02-06T07:34:54+01:00
routine-update: New upstream version
- - - - -
2347e808 by Andreas Tille at 2023-02-06T07:34:57+01:00
Update upstream source from tag 'upstream/3.3.5'
Update to upstream version '3.3.5'
with Debian dir ac031a5f1c68d30d4b189eeb50b6382365cca143
- - - - -
42277699 by Andreas Tille at 2023-02-06T07:34:57+01:00
routine-update: Standards-Version: 4.6.2
- - - - -
b732e619 by Andreas Tille at 2023-02-06T07:37:13+01:00
routine-update: Ready to upload to unstable
- - - - -
16 changed files:
- CMakeLists.txt
- ChangeLog
- README.md
- debian/changelog
- debian/control
- lib/CMakeLists.txt
- lib/src/aln_run.c
- lib/src/aln_wrap.c
- lib/src/bisectingKmeans.c
- lib/src/msa_alloc.c
- lib/src/msa_check.c
- lib/src/msa_check.h
- lib/src/msa_op.c
- lib/src/task.c
- lib/src/task.h
- src/run_kalign.c
Changes:
=====================================
CMakeLists.txt
=====================================
@@ -13,7 +13,7 @@ include(GenerateExportHeader)
set(KALIGN_LIBRARY_VERSION_MAJOR 3)
set(KALIGN_LIBRARY_VERSION_MINOR 3)
-set(KALIGN_LIBRARY_VERSION_PATCH 4)
+set(KALIGN_LIBRARY_VERSION_PATCH 5)
set(KALIGN_LIBRARY_VERSION_STRING ${KALIGN_LIBRARY_VERSION_MAJOR}.${KALIGN_LIBRARY_VERSION_MINOR}.${KALIGN_LIBRARY_VERSION_PATCH})
=====================================
ChangeLog
=====================================
@@ -1,3 +1,8 @@
+2022-11-05 Timo Lassmann <timo.lassmann at telethonkids.org.au>
+
+ * version 3.3.5
+ - Added a check to find and remove sequences of length 0.
+
2022-10-28 Timo Lassmann <timo.lassmann at telethonkids.org.au>
* version 3.3.4 - Cmake and more
=====================================
README.md
=====================================
@@ -1,4 +1,4 @@
-![C/C++ CI](https://github.com/TimoLassmann/kalign/workflows/C/C++%20CI/badge.svg)
+<!-- ![C/C++ CI](https://github.com/TimoLassmann/kalign/workflows/C/C++%20CI/badge.svg) -->
[![CMake](https://github.com/TimoLassmann/kalign/actions/workflows/cmake.yml/badge.svg)](https://github.com/TimoLassmann/kalign/actions/workflows/cmake.yml)
![CodeQL](https://github.com/TimoLassmann/kalign/workflows/CodeQL/badge.svg)
@@ -29,6 +29,7 @@ on macOS, install [brew](https://brew.sh/) then:
brew install cmake
git clone https://github.com/TimoLassmann/kalign.git
cd kalign
+mkdir build
cd build
cmake ..
make
@@ -139,19 +140,11 @@ Here are some benchmark results. The code to reproduce these figures can be foun
## Balibase
-![Balibase_scores](https://user-images.githubusercontent.com/8110320/66697423-7ea3d000-eca3-11e9-919a-995ca8e9f7c1.jpeg)
+![Balibase_scores](https://user-images.githubusercontent.com/8110320/198513840-0e08a634-bb41-4826-bd58-7fc66eae1054.jpeg)
## Bralibase
-![Bralibase_scores](https://user-images.githubusercontent.com/8110320/66697424-86637480-eca3-11e9-90ea-238f82b0ac6b.jpeg)
-
-## Homfam
-
-![Homfam_scores](https://user-images.githubusercontent.com/8110320/66697425-895e6500-eca3-11e9-97e7-63f3a79133cf.jpeg)
-
-## Quantest2
-
-![Quantest2_scores](https://user-images.githubusercontent.com/8110320/66698153-6c2c9500-eca9-11e9-904c-3d6ea9a1c44d.jpeg)
+![Bralibase_scores](https://user-images.githubusercontent.com/8110320/198513850-00e5037f-355f-45ec-828f-ed8d47497272.jpeg)
# Please cite:
1. Lassmann, Timo. _Kalign 3: multiple sequence alignment of large data sets._ **Bioinformatics** (2019). [pdf](https://academic.oup.com/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btz795/30314127/btz795.pdf)
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+kalign (1:3.3.5-1) unstable; urgency=medium
+
+ * New upstream version
+ * Standards-Version: 4.6.2 (routine-update)
+
+ -- Andreas Tille <tille at debian.org> Mon, 06 Feb 2023 07:35:19 +0100
+
kalign (1:3.3.4-3) unstable; urgency=medium
* Team Upload.
=====================================
debian/control
=====================================
@@ -4,7 +4,7 @@ Uploaders: Charles Plessy <plessy at debian.org>, Andreas Tille <tille at debian.org>
Section: science
Priority: optional
Build-Depends: debhelper-compat (= 13), libsimde-dev, cmake
-Standards-Version: 4.6.1
+Standards-Version: 4.6.2
Vcs-Browser: https://salsa.debian.org/med-team/kalign
Vcs-Git: https://salsa.debian.org/med-team/kalign.git
Homepage: https://msa.sbc.su.se/
=====================================
lib/CMakeLists.txt
=====================================
@@ -256,3 +256,19 @@ add_test(
NAME edist_utest
COMMAND edist_utest
)
+
+
+add_executable(task_utest
+ src/tldevel.c
+ src/tlrng.c
+ src/task.c
+ )
+
+
+target_link_libraries(task_utest PRIVATE m)
+set_target_properties(task_utest PROPERTIES COMPILE_FLAGS "-DTASKWRITETEST")
+add_test(
+ NAME task_utest
+ COMMAND task_utest
+ )
+
=====================================
lib/src/aln_run.c
=====================================
@@ -46,6 +46,9 @@ int create_msa_tree(struct msa* msa, struct aln_param* ap,struct aln_tasks* t)
for(i = msa->numseq; i < msa->num_profiles;i++){
active[i] = 0;
}
+ /* LOG_MSG("Setting threads to 1 for debugging!"); */
+ /* ap->nthreads = 1; */
+
#ifdef HAVE_OPENMP
if(ap->nthreads == 1){
recursive_aln_serial(msa, t, ap, active, t->n_tasks-1);
@@ -407,6 +410,8 @@ void recursive_aln_openMP(struct msa* msa, struct aln_tasks*t, struct aln_param*
a = local_t->a - msa->numseq;
b = local_t->b - msa->numseq;
+
+ /* LOG_MSG("Aligning %d %d", a,b); */
/* #ifdef HAVE_OPENMP */
/* #pragma omp parallel num_threads(2) */
/* { */
@@ -439,8 +444,19 @@ void recursive_aln_openMP(struct msa* msa, struct aln_tasks*t, struct aln_param*
ml->mode = ALN_MODE_FULL;
/* if(active[local_t->a] && active[local_t->b]){ */
- /* fprintf(stdout,"THREAD: %d %3d %3d -> %3d (p: %d)\n",tid, t->list[c]->a, t->list[c]->b, t->list[c]->c, t->list[c]->p); */
+ /* fprintf(stdout,"THREAD: %3d %3d -> %3d (p: %d)\n", t->list[c]->a, t->list[c]->b, t->list[c]->c, t->list[c]->p); */
+
+/* #ifdef HAVE_OPENMP */
+/* #pragma omp critical */
+/* { */
+/* int thread_num = omp_get_thread_num(); */
+/* LOG_MSG("Thread %d working on %d %d",thread_num,local_t->a, local_t->b); */
+/* } */
+/* #endif */
do_align(msa,t,ml,c);
+/* #ifdef HAVE_OPENMP */
+/* #pragma omp critical */
+/* #endif */
active[local_t->b] = 0;
free_aln_mem(ml);
@@ -660,6 +676,20 @@ int do_align(struct msa* msa,struct aln_tasks* t,struct aln_mem* m, int task_id)
RUN(set_gap_penalties_n(t->profile[b],m->len_b,msa->nsip[a]));
}
+
+ if(m->len_a == 0 || m->len_b == 0){
+ LOG_MSG("Doalign : LEN: %d %d Targets are: %d %d -> %d nsip: %d %d ",m->len_a,m->len_b,a,b,c,msa->nsip[a],msa->nsip[b] );
+ if(msa->nsip[a] == 1){
+ LOG_MSG("%s",msa->sequences[a]->name);
+ }
+ if(msa->nsip[b] == 1){
+ LOG_MSG("%s",msa->sequences[b]->name);
+ }
+
+
+ ERROR_MSG("Oh no!");
+ }
+
RUN(init_alnmem(m));
m->mode = ALN_MODE_FULL;
@@ -882,7 +912,7 @@ int do_align_serial(struct msa* msa,struct aln_tasks* t,struct aln_mem* m, int t
}
RUN(add_gap_info_to_path_n(m)) ;
-
+ LOG_MSG("Aligned %d and %d (len %d %d) -> path is of length: %d",a,b, m->len_a,m->len_b, 64*(m->path[0]+2));
MMALLOC(tmp,sizeof(float)*64*(m->path[0]+2));
/* LOG_MSG("%d TASK ID", task_id); */
=====================================
lib/src/aln_wrap.c
=====================================
@@ -1,9 +1,11 @@
#include "tldevel.h"
+#include "tlmisc.h"
#include "esl_stopwatch.h"
#include "task.h"
#include "msa_struct.h"
#include "msa_op.h"
#include "msa_alloc.h"
+#include "msa_check.h"
#include "alphabet.h"
#include "bisectingKmeans.h"
@@ -44,6 +46,7 @@ int kalign_run(struct msa *msa, int n_threads, int type, float gpo, float gpe, f
struct aln_tasks* tasks = NULL;
struct aln_param* ap = NULL;
+ RUN(kalign_essential_input_check(msa, 0));
/* If already aligned unalign ! */
if(msa->aligned != ALN_STATUS_UNALIGNED){
RUN(dealign_msa(msa));
@@ -61,6 +64,12 @@ int kalign_run(struct msa *msa, int n_threads, int type, float gpo, float gpe, f
}
/* -LOG_MSG("L: %d threads: %d",msa->L, n_threads); */
/* Start the heavy lifting */
+
+ /* if(my_file_exists("tasklist.txt")){ */
+ /* LOG_MSG("Found task list"); */
+ /* read_tasks(&tasks , "tasklist.txt"); */
+ /* }else{ */
+
RUN(alloc_tasks(&tasks, msa->numseq));
#ifdef HAVE_OPENMP
@@ -75,6 +84,8 @@ int kalign_run(struct msa *msa, int n_threads, int type, float gpo, float gpe, f
RUN(convert_msa_to_internal(msa, ALPHA_ambigiousPROTEIN));
}
+/* write_tasks(tasks, "tasklist.txt"); */
+/* } */
/* LOG_MSG("L: %d",msa->L); */
/* align */
=====================================
lib/src/bisectingKmeans.c
=====================================
@@ -86,8 +86,6 @@ int build_tree_kmeans(struct msa* msa, int n_threads, struct aln_tasks** tasks)
int i;
ASSERT(msa != NULL, "No alignment.");
- //ASSERT(param != NULL, "No input parameters.");
- /* ASSERT(ap != NULL, "No alignment parameters."); */
t = *tasks;
if(!t){
@@ -109,19 +107,12 @@ int build_tree_kmeans(struct msa* msa, int n_threads, struct aln_tasks** tasks)
if(!msa->quiet){
GET_TIMING(timer);
}
-
- //LOG_MSG("Done in %f sec.", GET_TIMING(timer));
-
MFREE(anchors);
MMALLOC(samples, sizeof(int)* numseq);
for(i = 0; i < numseq;i++){
samples[i] = i;
}
- /* if(!msa->quiet){ */
- /* LOG_MSG("%d anchors ", num_anchors); */
- /* } */
- //RUNP(root = alloc_node());
START_TIMER(timer);
if(!msa->quiet){
@@ -146,13 +137,6 @@ int build_tree_kmeans(struct msa* msa, int n_threads, struct aln_tasks** tasks)
create_tasks(root, t);
-
- /* exit(0); */
- /*ap->tree[0] = 1;
- ap->tree = readbitree(root, ap->tree);
- for (i = 0; i < (numseq*3);i++){
- tree[i] = tree[i+1];
- }*/
MFREE(root);
for(i =0 ; i < msa->numseq;i++){
#ifdef HAVE_AVX2
=====================================
lib/src/msa_alloc.c
=====================================
@@ -66,7 +66,9 @@ void kalign_free_msa(struct msa* msa)
int i;
if(msa){
for(i = 0; i < msa->alloc_numseq;i++){
- free_msa_seq(msa->sequences[i]);
+ if(msa->sequences[i]){
+ free_msa_seq(msa->sequences[i]);
+ }
}
for (i = msa->num_profiles;i--;){
=====================================
lib/src/msa_check.c
=====================================
@@ -20,6 +20,8 @@ static int sort_by_name(const void *a, const void *b);
static int sort_by_chksum(const void *a, const void *b);
static int sort_by_both(const void *a, const void *b);
+int sort_seq_by_len(const void *a, const void *b);
+
int kalign_sort_msa(struct msa *msa)
{
struct sort_struct_name_chksum** a = NULL;
@@ -56,6 +58,83 @@ ERROR:
return FAIL;
}
+
+int kalign_essential_input_check(struct msa *msa, int exit_on_error)
+{
+ int problem_len0 = 0;
+ ASSERT(msa != NULL, "No alignment");
+
+ ASSERT(msa->numseq > 1,"only %d sequences found.", msa->numseq);
+ for(int i = 0; i < msa->numseq;i++){
+ if(msa->sequences[i]->len == 0){
+ if(!msa->quiet){
+ WARNING_MSG("No sequence found for sequence %s ",msa->sequences[i]->name);
+ }
+ problem_len0++;
+ }
+ }
+
+ if(!exit_on_error){
+ /* Here we attempt to fix the zero length problem */
+ if(problem_len0){
+
+
+ if(problem_len0 == 1){
+ if(!msa->quiet){
+ LOG_MSG("Removing %d sequence with a length of 0.", problem_len0);
+ }
+ }else{
+ if(!msa->quiet){
+ LOG_MSG("Removing %d sequences with a length of 0.",problem_len0);
+ }
+ }
+
+ struct msa_seq** tmp = NULL;
+ MMALLOC(tmp, sizeof(struct msa_seq* ) * msa->alloc_numseq);
+ int c = 0;
+ int e = msa->numseq-1;
+
+ for(int i = 0 ; i < msa->numseq;i++){
+ if(msa->sequences[i]->len){
+ tmp[c] = msa->sequences[i];
+ c++;
+ }else{
+ tmp[e] = msa->sequences[i];
+ e--;
+ }
+ }
+ for(int i = msa->numseq; i < msa->alloc_numseq;i++){
+ tmp[i] = NULL;
+ }
+
+ MFREE(msa->sequences);
+ msa->sequences = tmp;
+ /* for(int i = msa->numseq-500; i < msa->numseq;i++){ */
+ /* LOG_MSG("%d\t%s", msa->sequences[i]->len,msa->sequences[i]->name); */
+ /* } */
+ /* LOG_MSG("%d %d %d ", msa->numseq, msa->numseq -c , problem_len0); */
+ /* qsort(msa->sequences, msa->numseq, sizeof(struct msa_seq*),sort_seq_by_len); */
+ /* int c = 0; */
+ /* for(int i = msa->numseq-1;i >= 0;i--){ */
+ /* if(msa->sequences[i]->len != 0){ */
+ /* c = i; */
+ /* break; */
+ /* } */
+ /* } */
+ /* c++; */
+ msa->numseq = c;
+ ASSERT(msa->numseq > 1,"only %d sequences found.", msa->numseq);
+ /* exit(0); */
+ }
+ }else{
+ ERROR_MSG("%d sequences found with length 0.", problem_len0);
+ }
+
+ return OK;
+ERROR:
+ return FAIL;
+}
+
int kalign_check_msa(struct msa* msa, int exit_on_error)
{
char* tmp_name = NULL;
@@ -189,6 +268,7 @@ int sort_by_name(const void *a, const void *b)
}
}
+
int sort_by_chksum(const void *a, const void *b)
{
struct sort_struct_name_chksum* const *one = a;
@@ -201,6 +281,16 @@ int sort_by_chksum(const void *a, const void *b)
}
}
+int sort_seq_by_len(const void *a, const void *b)
+{
+ struct msa_seq* const *one = a;
+ struct msa_seq* const *two = b;
+ if((*one)->len > (*two)->len){
+ return -1;
+ }else{
+ return 1;
+ }
+}
/* Taken from squid library by Sean Eddy */
int GCGchecksum(char *seq, int len)
=====================================
lib/src/msa_check.h
=====================================
@@ -12,6 +12,8 @@
#endif
struct msa;
+
+EXTERN int kalign_essential_input_check(struct msa *msa, int exit_on_error);
EXTERN int kalign_check_msa(struct msa* msa, int exit_on_error);
EXTERN int kalign_sort_msa(struct msa *msa);
#undef MSA_CHECK_IMPORT
=====================================
lib/src/msa_op.c
=====================================
@@ -178,8 +178,10 @@ int detect_aligned(struct msa* msa)
l += msa->sequences[i]->len;
min_len = MACRO_MIN(min_len, l);
max_len = MACRO_MAX(max_len, l);
- /* LOG_MSG("%d %d", max_len, min_len); */
+
}
+ /* LOG_MSG("%d %d", max_len, min_len); */
+ /* exit(0); */
if(gaps){
if(min_len == max_len){ /* sequences have gaps and total length is identical - clearly aligned */
msa->aligned = ALN_STATUS_ALIGNED;
=====================================
lib/src/task.c
=====================================
@@ -6,6 +6,110 @@
static int sort_tasks_by_priority(const void *a, const void *b);
static int sort_tasks_by_c(const void *a, const void *b);
+#ifdef TASKWRITETEST
+#include "tlrng.h"
+int main(void)
+{
+ struct rng_state* rng = NULL;
+ struct aln_tasks *t = NULL;
+ rng = init_rng(0);
+
+ int n_tasks = 54;
+
+ alloc_tasks(&t, n_tasks);
+
+ for(int i = 0; i < n_tasks;i++){
+ t->list[i]->score = 0.0;
+ t->list[i]->a = tl_random_int(rng, 1000);
+ t->list[i]->b = tl_random_int(rng, 1000);
+ t->list[i]->c = tl_random_int(rng, 1000);
+ t->list[i]->p = tl_random_int(rng, 1000);
+ t->list[i]->n = tl_random_int(rng, 1000);
+ t->n_tasks++;
+ }
+
+
+ RUN(write_tasks(t, "task_write_test.txt"));
+
+
+ free_tasks(t);
+ t = 0;
+
+ RUN(read_tasks(&t, "task_write_test.txt" ));
+
+ for(int i = 0; i < t->n_tasks;i++){
+ struct task* a = t->list[i];
+ fprintf(stdout,"%d %d %d %d %d\n",a->a,a->b,a->c,a->p,a->n);
+ }
+
+
+ free_rng(rng);
+ return EXIT_SUCCESS;
+ERROR:
+ if(t){
+ free_tasks(t);
+ }
+ if(rng){
+ free_rng(rng);
+ }
+ return EXIT_FAILURE;
+}
+
+#endif
+
+int write_tasks(struct aln_tasks *t, char *filename)
+{
+
+ FILE* f_ptr = NULL;
+
+ RUNP(f_ptr = fopen(filename, "w"));
+
+ fprintf(f_ptr,"%d\n", t->n_tasks);
+
+ for(int i = 0; i < t->n_tasks;i++){
+ struct task* a = t->list[i];
+ fprintf(f_ptr,"%d,%d,%d,%d,%d\n",a->a,a->b,a->c,a->p,a->n);
+ }
+ fclose(f_ptr);
+ return OK;
+
+ERROR:
+ if(f_ptr){
+ fclose(f_ptr);
+ }
+ return FAIL;
+}
+
+int read_tasks(struct aln_tasks **tasks, char *filename)
+{
+ struct aln_tasks *t = NULL;
+
+ FILE* f_ptr = NULL;
+
+ RUNP(f_ptr = fopen(filename, "r"));
+ int n_tasks = 0;
+
+
+
+ fscanf( f_ptr, "%d", &n_tasks);
+
+ RUN(alloc_tasks(&t, n_tasks));
+ for(int i = 0; i < n_tasks;i++){
+ struct task* a = t->list[i];
+ fscanf(f_ptr,"%d,%d,%d,%d,%d\n",&a->a,&a->b,&a->c,&a->p,&a->n);
+ t->n_tasks++;
+ }
+ fclose(f_ptr);
+
+ *tasks = t;
+ return OK;
+ERROR:
+ if(f_ptr){
+ fclose(f_ptr);
+ }
+ return FAIL;
+}
+
int sort_tasks(struct aln_tasks* t , int order)
{
@@ -56,7 +160,6 @@ int sort_tasks_by_c(const void *a, const void *b)
}
}
-
int alloc_tasks(struct aln_tasks** tasks,int numseq)
{
struct aln_tasks* t = NULL;
@@ -69,15 +172,11 @@ int alloc_tasks(struct aln_tasks** tasks,int numseq)
t->n_alloc_tasks = numseq;
t->list = NULL;
t->profile = NULL;
- /* t->map = NULL; */
+ np = (numseq << 1) - 1;
- np = (numseq << 1) - 1;
MMALLOC(t->profile,sizeof(float*)*np);
- /* MMALLOC(t->map,sizeof(int*)*np); */
-
for(i = 0; i < np;i++){
t->profile[i] = NULL;
- /* t->map[i] = NULL; */
}
MMALLOC(t->list, sizeof(struct task*) * t->n_alloc_tasks);
=====================================
lib/src/task.h
=====================================
@@ -36,6 +36,10 @@ EXTERN int sort_tasks(struct aln_tasks* t , int order);
EXTERN int alloc_tasks(struct aln_tasks** tasks,int numseq);
EXTERN void free_tasks(struct aln_tasks* tasks);
+EXTERN int write_tasks(struct aln_tasks *t, char *filename);
+EXTERN int read_tasks(struct aln_tasks **tasks,char* filename);
+
+
#undef TASK_IMPORT
#undef EXTERN
=====================================
src/run_kalign.c
=====================================
@@ -350,6 +350,8 @@ int run_kalign(struct parameters* param)
}
}
+
+
RUN(kalign_run(msa,
param->nthreads,
param->type,
View it on GitLab: https://salsa.debian.org/med-team/kalign/-/compare/0b7fbe376db77363bea9ac4635cc2d4cdc793b8a...b732e619a09bfee1a71f076839cf1f74d6504606
--
View it on GitLab: https://salsa.debian.org/med-team/kalign/-/compare/0b7fbe376db77363bea9ac4635cc2d4cdc793b8a...b732e619a09bfee1a71f076839cf1f74d6504606
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230206/55872c09/attachment-0001.htm>
More information about the debian-med-commit
mailing list