[med-svn] [Git][med-team/kma][master] 4 commits: New upstream version 1.2.11
Steffen Möller
gitlab at salsa.debian.org
Sat Aug 31 10:25:25 BST 2019
Steffen Möller pushed to branch master at Debian Med / kma
Commits:
c3ae9084 by Steffen Moeller at 2019-08-31T09:18:20Z
New upstream version 1.2.11
- - - - -
70b854e6 by Steffen Moeller at 2019-08-31T09:18:21Z
Update upstream source from tag 'upstream/1.2.11'
Update to upstream version '1.2.11'
with Debian dir b1c5dd6d23b724dbf686038a30f5a78763b8b62b
- - - - -
bc3bb1f8 by Steffen Moeller at 2019-08-31T09:18:21Z
New upstream version
- - - - -
70145206 by Steffen Moeller at 2019-08-31T09:24:26Z
Preparing upload to salsa
- - - - -
17 changed files:
- Makefile
- debian/changelog
- debian/patches/hardening.patch
- hashmapkmers.c
- kma.c
- + kmeranker.c
- + kmeranker.h
- qseqs.c
- qseqs.h
- sam.c
- savekmers.c
- savekmers.h
- + seqmenttree
- + seqmenttree.c
- + seqmenttree.h
- sparse.c
- version.h
Changes:
=====================================
Makefile
=====================================
@@ -1,5 +1,5 @@
CFLAGS = -Wall -O3 -std=c99
-LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
+LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmeranker.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqmenttree.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
PROGS = kma kma_index kma_shm kma_update
.c .o:
@@ -45,6 +45,7 @@ hashtable.o: hashtable.h hashmapkma.h hashmapkmers.h pherror.h
index.o: index.h compress.h decon.h hashmap.h hashmapkma.h loadupdate.h makeindex.h pherror.h stdstat.h version.h
kma.o: kma.h ankers.h assembly.h chain.h hashmapkma.h kmers.h mt1.h penalties.h pherror.h qseqs.h runinput.h runkma.h savekmers.h sparse.h spltdb.h tmp.h version.h
kmapipe.o: kmapipe.h pherror.h
+kmeranker.o: kmeranker.h penalties.h
kmers.o: kmers.h ankers.h compdna.h hashmapkma.h kmapipe.h pherror.h qseqs.h savekmers.h spltdb.h
kmmap.o: kmmap.h hashmapkma.h
loadupdate.o: loadupdate.h pherror.h hashmap.h hashmapkma.h updateindex.h
@@ -58,8 +59,9 @@ qualcheck.o: qualcheck.h compdna.h hashmap.h pherror.h stdnuc.h stdstat.h
runinput.o: runinput.h compdna.h filebuff.h pherror.h qseqs.h seqparse.h
runkma.o: runkma.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h stdnuc.h stdstat.h tmp.h vcf.h
sam.o: sam.h nw.h pherror.h qseqs.h runkma.h
-savekmers.o: savekmers.h ankers.h compdna.h hashmapkma.h penalties.h pherror.h qseqs.h stdnuc.h stdstat.h threader.h
+savekmers.o: savekmers.h ankers.h compdna.h hashmapkma.h kmeranker.h penalties.h pherror.h qseqs.h stdnuc.h stdstat.h threader.h
seq2fasta.o: seq2fasta.h pherror.h qseqs.h runkma.h stdnuc.h
+seqmenttree.o: seqmenttree.h pherror.h
seqparse.o: seqparse.h filebuff.h qseqs.h
shm.o: shm.h pherror.h hashmapkma.h version.h
sparse.o: sparse.h compkmers.h hashtable.h kmapipe.h pherror.h runinput.h savekmers.h stdnuc.h stdstat.h
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+kma (1.2.11-1) UNRELEASED; urgency=medium
+
+ * Team upload.
+ * New upstream version
+
+ -- Steffen Moeller <moeller at debian.org> Sat, 31 Aug 2019 11:18:21 +0200
+
kma (1.2.10a-1) unstable; urgency=medium
* New upstream version
=====================================
debian/patches/hardening.patch
=====================================
@@ -2,12 +2,14 @@ Author: Andreas Tille <tille at debian.org>
Last-Update: Thu, 07 Jun 2018 21:55:00 +0200
Description: Propagate hardening options
---- a/Makefile
-+++ b/Makefile
+Index: kma/Makefile
+===================================================================
+--- kma.orig/Makefile
++++ kma/Makefile
@@ -1,4 +1,4 @@
-CFLAGS = -Wall -O3 -std=c99
+CFLAGS += -Wall -O3 -std=c99
- LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
+ LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmeranker.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqmenttree.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
PROGS = kma kma_index kma_shm kma_update
@@ -8,16 +8,16 @@ PROGS = kma kma_index kma_shm kma_update
=====================================
hashmapkmers.c
=====================================
@@ -39,7 +39,7 @@ void reallocHashMap_kmers(HashMap_kmers *dest) {
/* save buckets */
table = 0;
- index = ++dest->size;
+ index = dest->size;
while(index--) {
for(node = dest->table[index]; node; node = node_next) {
node_next = node->next;
@@ -54,12 +54,11 @@ void reallocHashMap_kmers(HashMap_kmers *dest) {
if(!dest->table) {
ERROR();
}
- --dest->size;
/* refill table */
for(node = table; node; node = node_next) {
node_next = node->next;
- index = node->key & dest->size;
+ index = node->key % dest->size;
node->next = dest->table[index];
dest->table[index] = node;
}
@@ -96,7 +95,7 @@ int hashMap_CountKmer(HashMap_kmers *dest, long unsigned key) {
long unsigned index;
HashTable_kmers *node;
- index = key & dest->size;
+ index = key % dest->size;
for(node = dest->table[index]; node != 0; node = node->next) {
if(node->key == key) {
return 0;
=====================================
kma.c
=====================================
@@ -816,7 +816,7 @@ int kma_main(int argc, char *argv[]) {
}
preseed(0, 0, exhaustive);
- if(sam) {
+ if(sam && kmaPipe != &kmaPipeThread) {
fprintf(stderr, "\"-sam\" and \"-status\" cannot coincide.\n");
kmaPipe = &kmaPipeThread;
}
=====================================
kmeranker.c
=====================================
@@ -0,0 +1,213 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#define _XOPEN_SOURCE 600
+#include <stdlib.h>
+#include "kmeranker.h"
+#include "penalties.h"
+
+KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int *bests, int *Score, int *extendScore, char *include) {
+
+ /* get set of best templates and silences the chain, except for the initial anker */
+ int template, score, bestScore, Wl, W1, U, M, MM, gaps;
+ unsigned i, j, start, end, SU;
+ unsigned *values;
+ short unsigned *values_s;
+ KmerAnker *node, *prev;
+
+ /* mark template candidates */
+ if(!src) {
+ return 0;
+ } else if((SU = *include)) {
+ values_s = (short unsigned *) src->values;
+ i = *values_s + 1;
+ *bests += i;
+ values_s += i;
+ bests += i;
+ while(--i) {
+ include[(*--bests = *--values_s)] ^= 1;
+ }
+ } else {
+ values = src->values;
+ i = *values + 1;
+ *bests += i;
+ values += i;
+ bests += i;
+ while(--i) {
+ include[(*--bests = *--values)] ^= 1;
+ }
+ }
+ M = rewards->M;
+ MM = rewards->MM;
+ U = rewards->U;
+ W1 = rewards->W1;
+ Wl = rewards->Wl;
+ bestScore = src->score;
+
+ /* get chaining scores */
+ for(node = src; node != 0; node = node->next) {
+ if(SU) {
+ values_s = (short unsigned *) node->values;
+ i = *values_s + 1;
+ values_s += i;
+ } else {
+ values = node->values;
+ i = *values + 1;
+ values += i;
+ }
+
+ start = node->start;
+ end = node->end;
+
+ while(--i) {
+ template = SU ? *--values_s : *--values;
+ if(!include[template]) {
+ score = Score[template];
+ gaps = start - extendScore[template];
+ if(gaps == 0) {
+ /* continued match */
+ score += M;
+ } else if(gaps < 0) {
+ /* deletion */
+ score += (W1 + abs(gaps + 1) * U);
+ } else if(gaps == 1) {
+ /* one unmatched between */
+ score += MM;
+ } else {
+ /* unmatched bases between */
+ if((gaps = (gaps - 2) * M) < 0) {
+ score += gaps;
+ }
+ score += (MM << 1);
+ }
+
+ /* check local chain is needed */
+ if(score < Wl) {
+ score = Wl + node->weight;
+ /* mark as terminating */
+ extendScore[template] = 0;
+ } else {
+ score += node->weight;
+ /* mark descendant */
+ extendScore[template] = end;
+ }
+
+ /* update Scores */
+ Score[template] = score;
+ }
+ }
+
+ /* mark as used */
+ node->score = 0;
+ prev = node;
+ }
+ src->score = bestScore;
+
+ /* get best templates */
+ j = 0;
+ for(i = 1; i <= *bests; ++i) {
+ if(Score[(template = bests[i])] == bestScore) {
+ bests[++j] = template;
+ }
+ /* clear Score arrays */
+ Score[template] = 0;
+ extendScore[template] = 0;
+ include[template] = 0;
+ }
+ *bests = j;
+
+ return prev;
+}
+
+KmerAnker * pruneAnkers(KmerAnker *V_score, int kmersize) {
+
+ KmerAnker *node, *prev;
+
+ while(V_score->score < kmersize) {
+ ++V_score;
+ }
+
+ prev = V_score;
+ node = V_score->descend;
+ while(node) {
+ if(kmersize <= node->score) {
+ prev->descend = node;
+ prev = node;
+ }
+ node = node->descend;
+ }
+ prev->descend = 0;
+
+ return V_score;
+}
+
+unsigned getStartAnker(KmerAnker *src) {
+
+ while(src->next) {
+ src = src->next;
+ }
+ return src->start;
+}
+
+KmerAnker * getBestAnker(KmerAnker **src, unsigned *ties) {
+
+ KmerAnker *node, *prev, *best;
+
+ *ties = 0;
+ prev = *src;
+ while(prev && prev->score == 0) {
+ prev = prev->descend;
+ }
+ *src = prev;
+ best = prev;
+ node = prev->descend;
+
+ while(node) {
+ if(best->score < node->score) {
+ best = node;
+ *ties = 0;
+ } else if(best->score == node->score) {
+ if((node->end - node->start) < (best->end - best->start)) {
+ best = node;
+ *ties = 0;
+ } else if((node->end - node->start) == (best->end - best->start)) {
+ best = node;
+ ++*ties;
+ }
+ } else if(node->score != 0) {
+ prev->descend = node;
+ prev = node;
+ }
+ node = node->descend;
+ }
+ prev->descend = 0;
+
+ return best;
+}
+
+KmerAnker * getTieAnker(KmerAnker *stop, KmerAnker *src, int score, int len) {
+
+ /* search downwards */
+ while(--src != stop) {
+ if(src->score == score && (src->end - src->start) == len) {
+ return src;
+ }
+ }
+
+ return 0;
+}
=====================================
kmeranker.h
=====================================
@@ -0,0 +1,41 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#define _XOPEN_SOURCE 600
+#include "penalties.h"
+
+#ifndef KMERANKER
+#define KMERANKER 1;
+typedef struct kmerAnker KmerAnker;
+struct kmerAnker {
+ int score;
+ int weight;
+ unsigned start;
+ unsigned end;
+ unsigned flag;
+ unsigned *values;
+ struct kmerAnker *descend; /* descending anker */
+ struct kmerAnker *next; /* next anker in chain */
+};
+#endif
+
+KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int *bests, int *Score, int *extendScore, char *include);
+KmerAnker * pruneAnkers(KmerAnker *V_score, int kmersize);
+unsigned getStartAnker(KmerAnker *src);
+KmerAnker * getBestAnker(KmerAnker **src, unsigned *ties);
+KmerAnker * getTieAnker(KmerAnker *stop, KmerAnker *src, int score, int len);
=====================================
qseqs.c
=====================================
@@ -37,3 +37,22 @@ void destroyQseqs(Qseqs *dest) {
free(dest->seq);
free(dest);
}
+
+void insertKmerBound(Qseqs *header, int start, int end) {
+
+ int *seq;
+
+ if((header->len + 3 * sizeof(int)) < header->size) {
+ header->size = (header->len + 3 * sizeof(int)) << 1;
+ if(!(header->seq = realloc(header->seq, header->size))) {
+ ERROR();
+ }
+ }
+
+ seq = (int *) (header->seq + header->len + 1);
+ *seq = start;
+ *++seq = end;
+ *++seq = 0;
+ header->len += (2 * sizeof(int) + 1);
+
+}
=====================================
qseqs.h
=====================================
@@ -31,3 +31,4 @@ struct qseqs {
Qseqs * setQseqs(int size);
/* destroy Qseqs */
void destroyQseqs(Qseqs *dest);
+void insertKmerBound(Qseqs *header, int start, int end);
=====================================
sam.c
=====================================
@@ -32,8 +32,8 @@ char * makeCigar(Qseqs *Cigar, const Aln *aligned) {
char op, pop, *s, *cigar;
unsigned char *t, *q;
- if(Cigar->size < aligned->len << 1) {
- Cigar->size <<= 1;
+ if(Cigar->size < (aligned->len << 1)) {
+ Cigar->size = (aligned->len << 1);
free(Cigar->seq);
Cigar->seq = smalloc(Cigar->size);
} else if(aligned->len == 0) {
=====================================
savekmers.c
=====================================
@@ -25,11 +25,13 @@
#include "ankers.h"
#include "compdna.h"
#include "hashmapkma.h"
+#include "kmeranker.h"
#include "penalties.h"
#include "pherror.h"
#include "qseqs.h"
#include "sam.h"
#include "savekmers.h"
+#include "seqmenttree.h"
#include "stdnuc.h"
#include "stdstat.h"
#include "threader.h"
@@ -4760,17 +4762,19 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
/* return 0 for match, 1 otherwise */
static int *Sizes = 0;
+ static double mrs = 0.5, coverT = 0.5;
static KmerAnker **tVF_scores, **tVR_scores;
+ static SeqmentTree **tSeqments;
- int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, maxS;
- int *bests;
+ int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, *bests;
unsigned i, j, rc, shifter, kmersize, DB_size, SU, HIT, start, end, pos;
- unsigned hitCounter, hitCounter_r;
- unsigned *values, *last;
+ unsigned hitCounter, hitCounter_r, ties, len, *values, *last;
short unsigned *values_s;
char *include;
CompDNA *qseqP;
KmerAnker *V_score, *V_scores, *VF_scores, *VR_scores, *tmp_score;
+ KmerAnker *best_score, *best_score_r;
+ SeqmentTree *chainSeqments;
if(qseq == 0) {
if(Sizes) {
@@ -4782,10 +4786,18 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
free(tVF_scores);
} else {
+ /* here */
+ /* set mrs and coverT */
+
+
i = *bestTemplates;
Sizes = smalloc(i * sizeof(int));
tVF_scores = smalloc(2 * i * sizeof(KmerAnker *));
tVR_scores = tVF_scores + i;
+ tSeqments = calloc(i, sizeof(SeqmentTree *));
+ if(!tSeqments) {
+ ERROR();
+ }
while(i--) {
Sizes[i] = 256;
tVF_scores[i] = calloc(512, sizeof(KmerAnker));
@@ -4828,6 +4840,7 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
VF_scores = tVF_scores[*Score];
VR_scores = tVR_scores[*Score];
+ chainSeqments = tSeqments[*Score];
V_scores = VF_scores;
qseqP = qseq;
@@ -4857,16 +4870,20 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
}
+ V_score = V_scores;
+ V_score->start = 0;
+ V_score->end = 0;
+ V_score->values = 0;
+ V_score->next = 0;
+ V_score->descend = 0;
if(HIT) {
- V_score = V_scores;
- V_score->start = (j = 0);
- V_score->values = (last = 0);
- V_score->next = 0;
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
gaps = 0;
+ last = 0;
+ j = 0;
for(i = 1; i <= *(qseqP->N); ++i) {
end = qseqP->N[i] - kmersize + 1;
while(j < end) {
@@ -4901,10 +4918,12 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
/* update and link between ankers */
V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
V_score->end = j - gaps + kmersize;
+ V_score->descend = V_score + 1;
++V_score;
}
V_score->start = j;
V_score->values = (last = values);
+ V_score->descend = 0;
Ms = 0;
MMs = 0;
Us = 0;
@@ -4927,34 +4946,27 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
--*(qseqP->N);
}
- /* here */
- /* chaining */
-
- /*
- while(chain is good) {
- 1. make chain. OK
- 2. choose best.
- 3. silence "used" ankers.
- 4. redo chaining if next best hits a silenced anker.
+ /* no matches */
+ if(!hitCounter && !hitCounter_r) {
+ *include = 0;
+ return 1;
}
- */
-
- /*
- for each anker
- mark last anker w.r.t. highest score on anker
- */
/* make chains */
- maxS = 0;
V_score = VF_scores;
+ best_score = 0;
+ best_score_r = V_score;
HIT = hitCounter + 1;
bests = bestTemplates;
+ ties = 0;
rc = 2;
while(rc--) {
if(rc == 0) {
V_score = VR_scores;
HIT = hitCounter_r + 1;
bests = bestTemplates_r;
+ best_score = best_score_r;
+ best_score_r = V_score;
}
*bests = 0;
while(--HIT) {
@@ -5006,48 +5018,556 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
if(score < Wl) {
score = Wl + V_score->weight;
/* mark as terminating */
- V_score->next = 0;
+ tmp_score = 0;
extendScore[template] = 0;
} else {
score += V_score->weight;
/* mark descendant */
tmp_score = V_score;
- while((--tmp_score)->end != pos);
- V_score->next = tmp_score;
extendScore[template] = end;
}
/* update Scores */
if(V_score->score < score) {
V_score->score = score;
- /* here */
- /* consider several bests */
- if(maxS < score) {
- maxS = score;
+ /* find link chain */
+ if(tmp_score) {
+ while((--tmp_score)->end != pos);
}
+ V_score->next = tmp_score;
}
Score[template] = score;
}
+
+ /* Mark (first) best hit */
+ if(best_score_r->score < V_score->score) {
+ best_score_r = V_score;
+ ties = 0;
+ } else if(best_score_r->score == V_score->score) {
+ /* lower uncertainty in shorter chains */
+ if((V_score->end - V_score->start) < (best_score_r->end - best_score_r->start)) {
+ best_score_r = V_score;
+ ties = 0;
+ } else if((V_score->end - V_score->start) == (best_score_r->end - best_score_r->start)) {
+ /* equals */
+ ++ties;
+ /* first hit on rc is likely last hit on forward */
+ if(rc) {
+ best_score_r = V_score;
+ }
+ }
+ }
+
+ V_score->flag = 0;
++V_score;
}
- /* clear Score */
+ /* clear Score arrays */
+ clearScore(bests, Score);
+ clearScore(bests, extendScore);
i = *bests + 1;
while(--i) {
include[*++bests] = 0;
}
- clearScore(bests, Score);
- clearScore(bests, extendScore);
}
+ /* no good hits */
+ if(best_score->score < kmersize && best_score_r->score < kmersize) {
+ *include = 0;
+ return 1;
+ }
+
+ /* get best chain,
+ start/end, score, template(s)
+ and zero out ankers */
+ *include = SU;
+ *bestTemplates = 0;
+ if(best_score_r->score < best_score->score ||
+ (best_score_r->score < best_score->score &&
+ (best_score->end - best_score->start) <= (best_score_r->end - best_score->start))) {
+ tmp_score = getBestChainTemplates(best_score, rewards, bestTemplates, Score, extendScore, include);
+ score = best_score->score;
+ start = tmp_score->start;
+ len = best_score->end - start;
+ rc = 0;
+ } else {
+ tmp_score = getBestChainTemplates(best_score_r, rewards, bestTemplates, Score, extendScore, include);
+ score = best_score_r->score;
+ start = tmp_score->start;
+ len = best_score_r->end - start;
+ rc = 1;
+ }
+
+ if(score < mrs * len) {
+ *include = 0;
+ return 1;
+ }
+
+ /* prune hits */
+ VF_scores = pruneAnkers(VF_scores, kmersize);
+ VR_scores = pruneAnkers(VR_scores, kmersize);
+
+ /* get best chains */
+ while(best_score || best_score_r) {
+ /* get equal ankers inside chain,
+ that lies under the threshold */
+ /* here */
+ if(ties && best_score != tmp_score) {
+ V_score = best_score;
+ while(V_score && (V_score = getTieAnker(tmp_score, V_score, best_score->score, len))) {
+ /*
+ 1. tie anker -> tie_len == best_len
+ 2. Best match is last on seq -> tie_start < best_start
+ 1. && 2. -> cover = |tie_start - best_end| / len
+ */
+ if((V_score->end - start) < coverT * len ) {
+ /* not a co-match / overlap is insufficient ->
+ no more equal matches */
+ V_score = 0;
+ } else { /* Anker is equal, update with templates */
+ /* Mark current templates to avoid double hitting */
+ i = *bestTemplates + 1;
+ bests = bestTemplates;
+ while(--i) {
+ include[*++bests] = 1;
+ }
+ /* update best template candidates */
+ template = *bests;
+ *bests = 0;
+ getBestChainTemplates(V_score, rewards, bests, Score, extendScore, include);
+ *bestTemplates += *bests;
+ *bests = template;
+ start = V_score->end - len;
+ }
+ }
+
+
+ }
+
+
+
+ }
+
+
+
+
+
+
+ /*
+ Best template(s) must in values on best anker,
+ remove non-union of A and B, while backtracking.
+ Where A and B are templates on ankerA and ankerB.
+ For proxi scoring the score has to be imputed while tracking.
+ */
+
+ /* here */
+ /* what happens when two equal chains overlap */
+ /*
+ ------------- ------------- -------------
+ ------------- -------------
+
+ ------------- ------------- -------------
+ ------------- -------------
+ */
+
+
+
+
+ /* no rechaining */
+
+ /*
+ Q = set of best chains
+
+ while(score is good)
+ 1. if in Q.
+ if(<50%)
+ add to Q
+ else if(scores equal)
+ add with match
+ else
+ skip chain
+ else
+ add to Q.
+ 2. Silence it.
+ 3. find best chain.
+ */
+ *include = 0;
+
+ return 1;
+}
+
+int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out) {
+
+ /* return 0 for match, 1 otherwise */
+ static int *Sizes = 0;
+ static double mrs = 0.5, coverT = 0.5;
+ static KmerAnker **tVF_scores;
+ static SeqmentTree **tSeqments;
+ int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, *bests;
+ unsigned i, j, shifter, kmersize, DB_size, SU, HIT, start, end, pos;
+ unsigned len, cover, hitCounter, ties, *values, *last;
+ short unsigned *values_s;
+ char *include;
+ KmerAnker *V_score, *VF_scores, *tmp_score, *best_score;
+ SeqmentTree *chainSeqments;
+
+ if(qseq == 0) {
+ if(Sizes) {
+ free(Sizes);
+ Sizes = 0;
+ i = *bestTemplates;
+ while(i--) {
+ free(tVF_scores[i]);
+ }
+ free(tVF_scores);
+ } else {
+ /* here */
+ /* set mrs and coverT */
+
+
+ i = *bestTemplates;
+ Sizes = smalloc(i * sizeof(int));
+ tVF_scores = smalloc(i * sizeof(KmerAnker *));
+ tSeqments = calloc(i, sizeof(SeqmentTree *));
+ if(!tSeqments) {
+ ERROR();
+ }
+ while(i--) {
+ Sizes[i] = 256;
+ tVF_scores[i] = calloc(512, sizeof(KmerAnker));
+ if(!tVF_scores[i]) {
+ ERROR();
+ }
+ }
+ }
+ return 0;
+ } else if(qseq->seqlen < (kmersize = templates->kmersize)) {
+ return 1;
+ } else if((DB_size = templates->DB_size) < USHRT_MAX) {
+ SU = 1;
+ } else {
+ SU = 0;
+ }
+ shifter = sizeof(long unsigned) * sizeof(long unsigned) - (templates->kmersize << 1);
+ M = rewards->M;
+ MM = rewards->MM;
+ U = rewards->U;
+ W1 = rewards->W1;
+ Wl = rewards->Wl;
+ include = (char *) (extendScore + (templates->DB_size + 1));
+ hitCounter = 0;
+ values = 0;
+ values_s = 0;
+
+ if(Sizes[*Score] < qseq->size) {
+ Sizes[*Score] = qseq->size << 1;
+ free(tVF_scores[*Score]);
+ tVF_scores[*Score] = calloc(qseq->size, sizeof(KmerAnker));
+ if(!tVF_scores[*Score]) {
+ ERROR();
+ }
+ }
+ VF_scores = tVF_scores[*Score];
+ chainSeqments = tSeqments[*Score];
+ HIT = exhaustive;
+ j = 0;
+ ++*(qseq->N);
+ qseq->N[*(qseq->N)] = qseq->seqlen;
+ for(i = 1; i <= *(qseq->N) && !HIT; ++i) {
+ end = qseq->N[i] - kmersize + 1;
+ while(j < end && hashMap_get(templates, getKmer(qseq->seq, j, shifter)) == 0) {
+ j += kmersize;
+ }
+ if(j < end) {
+ HIT = 1;
+ } else {
+ j = qseq->N[i] + 1;
+ }
+ }
+ if(HIT) {
+ V_score = VF_scores;
+ V_score->start = (j = 0);
+ V_score->values = (last = 0);
+ V_score->next = 0;
+ Ms = 0;
+ MMs = 0;
+ Us = 0;
+ W1s = 0;
+ gaps = 0;
+ for(i = 1; i <= *(qseq->N); ++i) {
+ end = qseq->N[i] - kmersize + 1;
+ while(j < end) {
+ if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
+ if(values == last) {
+ if(kmersize < gaps) {
+ Ms += kmersize;
+ gaps -= kmersize;
+ if(gaps) {
+ /* go for best scenario */
+ if(gaps == 1) {
+ MMs += 2;
+ } else {
+ gaps -= 2;
+ if((MM << 1) + gaps * M < 0) {
+ Ms += gaps;
+ MMs += 2;
+ }
+ }
+ } else {
+ ++MMs;
+ }
+ } else if(gaps) {
+ --gaps;
+ ++W1s;
+ Us += gaps;
+ } else {
+ ++Ms;
+ }
+ } else {
+ if(last) {
+ /* update and link between ankers */
+ V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
+ V_score->end = j - gaps + kmersize;
+ V_score->descend = V_score + 1;
+ ++V_score;
+ }
+ V_score->start = j;
+ V_score->values = (last = values);
+ V_score->descend = 0;
+ Ms = 0;
+ MMs = 0;
+ Us = 0;
+ W1s = 0;
+ ++hitCounter;
+ }
+ gaps = 0;
+ } else {
+ ++gaps;
+ }
+ j += kmersize;
+ }
+ j = qseq->N[i] + 1;
+ }
+ if(last) {
+ /* update anker */
+ V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
+ }
+ }
+ --*(qseq->N);
+ /* no matches */
+ if(!hitCounter) {
+ return 1;
+ }
+ /* make chains */
+ V_score = VF_scores;
+ best_score = V_score;
+ HIT = hitCounter + 1;
+ bests = bestTemplates;
+ *bests = (ties = 0);
+ while(--HIT) {
+ /*
+ Save pos of best score, and chain it (0 if local).
+ Score is always current / w.r.t. the anker you are on.
+ extendScore contains endpos of last hit anker.
+ */
+ start = V_score->start;
+ end = V_score->end;
+
+ /* chain anker */
+ V_score->score = 0;
+ if(SU) {
+ values_s = (short unsigned *) V_score->values;
+ i = *values_s + 1;
+ values_s += i;
+ } else {
+ values = V_score->values;
+ i = *values + 1;
+ values += i;
+ }
+ while(--i) {
+ template = SU ? *--values_s : *--values;
+ if(!include[template]) {
+ include[template] = 1;
+ bests[++*bests] = template;
+ }
+ score = Score[template];
+ gaps = start - (pos = extendScore[template]);
+ if(gaps == 0) {
+ /* continued match */
+ score += M;
+ } else if(gaps < 0) {
+ /* deletion */
+ score += (W1 + abs(gaps + 1) * U);
+ } else if(gaps == 1) {
+ /* one unmatched between */
+ score += MM;
+ } else {
+ /* unmatched bases between */
+ if((gaps = (gaps - 2) * M) < 0) {
+ score += gaps;
+ }
+ score += (MM << 1);
+ }
+
+ /* check local chain is needed */
+ if(score < Wl) {
+ score = Wl + V_score->weight;
+ /* mark as terminating */
+ tmp_score = 0;
+ extendScore[template] = 0;
+ } else {
+ score += V_score->weight;
+ /* mark descendant */
+ tmp_score = V_score;
+ extendScore[template] = end;
+ }
+
+ /* update Scores */
+ if(V_score->score < score) {
+ V_score->score = score;
+ /* find link chain */
+ if(tmp_score) {
+ while((--tmp_score)->end != pos);
+ }
+ V_score->next = tmp_score;
+ }
+ Score[template] = score;
+ }
+
+ /* Mark (last) best hit */
+ if(best_score->score < V_score->score) {
+ best_score = V_score;
+ ties = 0;
+ } else if(best_score->score == V_score->score) {
+ if((V_score->end - V_score->start) < (best_score->end - best_score->start)) {
+ /* lower uncertainty in shorter chains */
+ best_score = V_score;
+ ties = 0;
+ } else if((V_score->end - V_score->start) == (best_score->end - best_score->start)) {
+ /* equals */
+ best_score = V_score;
+ ++ties;
+ }
+ }
+
+ V_score->flag = 0;
+ ++V_score;
+ }
+ /* clear Score arrays */
+ clearScore(bests, Score);
+ clearScore(bests, extendScore);
+ i = *bests + 1;
+ while(--i) {
+ include[*++bests] = 0;
+ }
+ /* no good hits */
+ if((score = best_score->score) < kmersize) {
+ *include = 0;
+ return 1;
+ }
+ /* get best chain,
+ start/end, score, template(s)
+ and zero out ankers */
+ *include = SU;
+ *bestTemplates = 0;
+ tmp_score = getBestChainTemplates(best_score, rewards, bestTemplates, Score, extendScore, include);
+ start = tmp_score->start;
+ len = best_score->end - start;
+ if(score < mrs * len) {
+ *include = 0;
+ return 1;
+ }
+ /* prune hits */
+ VF_scores = pruneAnkers(VF_scores, kmersize);
+
+ /* get best chains */
+ while(best_score) {
+ /* get equal ankers inside chain,
+ that lies under the threshold */
+ if(ties && best_score != tmp_score) {
+ V_score = best_score;
+ while(V_score && (V_score = getTieAnker(tmp_score, V_score, best_score->score, len))) {
+ /*
+ 1. tie anker -> tie_len == best_len
+ 2. Best match is last on seq -> tie_start < best_start
+ 1. && 2. -> cover = |tie_start - best_end| / len
+ */
+ if((V_score->end - start) < coverT * len ) {
+ /* not a co-match / overlap is insufficient ->
+ no more equal matches */
+ V_score = 0;
+ } else { /* Anker is equal, update with templates */
+ /* Mark current templates to avoid double hitting */
+ i = *bestTemplates + 1;
+ bests = bestTemplates;
+ while(--i) {
+ include[*++bests] = 1;
+ }
+ /* update best template candidates */
+ template = *bests;
+ *bests = 0;
+ getBestChainTemplates(V_score, rewards, bests, Score, extendScore, include);
+ *bestTemplates += *bests;
+ *bests = template;
+ start = V_score->end - len;
+ }
+ }
+ }
+
+ /* use segment trees to mark "used" regions of query.
+ Allow for quick check of intersection with chosen chains. */
+ growSeqmentTree(chainSeqments, start, best_score->end);
+
+ /* insert anker-boundaries */
+ insertKmerBound((Qseqs *) header, start, best_score->end);
+
+ /* print match */
+ lock(excludeOut);
+ i = deConPrintPtr(bestTemplates, qseq, best_score->score, header, 0, out);
+ unlock(excludeOut);
+
+ /* get next match */
+ best_score->score = 0;
+ *bestTemplates = 0;
+ while(best_score->score == 0) {
+ /* find best anker */
+ if(!(best_score = getBestAnker(&VF_scores, &ties))) {
+ *include = 0;
+ return 0;
+ }
+
+ /* check chain */
+ start = getStartAnker(best_score);
+ cover = queSeqmentTree(chainSeqments->root, start, best_score->end);
+
+ /* verify chain */
+ if(cover < coverT * (best_score->end - start)) {
+ /* get chain */
+ tmp_score = getBestChainTemplates(best_score, rewards, bestTemplates, Score, extendScore, include);
+ len = best_score->end - start;
+ if(best_score->score < mrs * len) {
+ /* silence anker */
+ best_score->score = 0;
+ }
+ } else {
+ /* silence anker */
+ best_score->score = 0;
+ }
+ }
+ }
+ *include = 0;
return 1;
}
+
+
+
+
=====================================
savekmers.h
=====================================
@@ -25,7 +25,6 @@
#ifndef SAVEKMERS
typedef struct kmerScan_thread KmerScan_thread;
-typedef struct kmerAnker KmerAnker;
struct kmerScan_thread {
pthread_t id;
@@ -43,15 +42,6 @@ struct kmerScan_thread {
struct kmerScan_thread *next;
};
-struct kmerAnker {
- int score;
- int weight;
- unsigned start;
- unsigned end;
- unsigned *values;
- struct kmerAnker *next;
-};
-
#define SAVEKMERS 1;
#endif
=====================================
seqmenttree
=====================================
Binary files /dev/null and b/seqmenttree differ
=====================================
seqmenttree.c
=====================================
@@ -0,0 +1,172 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#define _XOPEN_SOURCE 600
+#include <stdlib.h>
+#include "pherror.h"
+#include "seqmenttree.h"
+
+SeqmentTree * initializeSeqmentTree(const long unsigned size) {
+
+ SeqmentTree *dest;
+
+ dest = smalloc(sizeof(SeqmentTree));
+ dest->n = 0;
+ dest->root = smalloc((dest->size = size) * sizeof(SeqmentTrees));
+
+ return dest;
+}
+
+SeqmentTree * initSeqmentTree(SeqmentTree *src, const unsigned start, const unsigned end) {
+
+ SeqmentTrees *node;
+
+ if(!src) {
+ src = initializeSeqmentTree(64);
+ }
+
+ src->n = 1;
+ node = src->root;
+ node->start = start;
+ node->end = end;
+ node->covered = end - start;
+ node->branch[0] = 0;
+ node->branch[1] = 0;
+
+ return src;
+}
+
+void reallocSeqmentTree(SeqmentTree *src) {
+
+ src->root = realloc(src->root, (src->size <<= 1) * sizeof(SeqmentTrees));
+ if(!src->root) {
+ ERROR();
+ }
+}
+
+unsigned addSeqmentTrees(SeqmentTrees *root, SeqmentTrees *node) {
+
+ unsigned pos, covered;
+ SeqmentTrees *bud;
+
+ if(*(root->branch)) { /* search */
+ /* adjust limits of root */
+ if(node->start < root->start && root->end < node->end) {
+ root->start = node->start;
+ root->end = node->end;
+ root->covered = node->covered;
+ node->covered = 0;
+ *(root->branch) = 0;
+ return root->covered;
+ } else if(root->end < node->end) {
+ root->end = node->end;
+ } else if(node->start < root->start) {
+ root->start = node->start;
+ }
+
+ /* search tree */
+ if(node->end < (pos = root->branch[1]->start)) { /* left */
+ root->covered = root->branch[1]->covered + addSeqmentTrees(root->branch[0], node);
+ } else if(pos <= node->start) { /* right */
+ root->covered = root->branch[0]->covered + addSeqmentTrees(root->branch[1], node);
+ } else { /* split */
+ /* calculate right side */
+ pos = node->start;
+ node->start = root->branch[0]->end + 1;
+ node->covered = node->end - node->start;
+ covered = addSeqmentTrees(root->branch[1], node);
+
+ /* here */
+ if(node->covered) {
+ fprintf(stderr, "I was wrong about split seqmenttrees.\n");
+ exit(1);
+ }
+
+ /* calculate left side */
+ node->start = pos;
+ node->end = root->branch[0]->end;
+ node->covered = node->end - node->start;
+ root->covered = covered + addSeqmentTrees(root->branch[0], node);
+
+ /* here */
+ if(node->covered) {
+ fprintf(stderr, "I was wrong about split seqmenttrees.\n");
+ exit(1);
+ }
+ }
+ } else if((pos = node->end < root->start) || (pos = root->end < node->start)) { /* new leaf */
+ /* create and grow bud */
+ bud = node + 1;
+ root->branch[pos] = node;
+ root->branch[pos] = bud;
+
+ /* form new leaf */
+ bud->start = root->start;
+ bud->end = root->end;
+ bud->covered = root->covered;
+ *(bud->branch) = 0;
+
+ /* update the root */
+ root->covered += node->covered;
+ } else { /* extend leaf */
+ if(node->start < root->start) { /* extend left */
+ root->start = node->start;
+ } else if(root->end < node->end) { /* extend right */
+ root->end = node->end;
+ }
+ node->covered = 0;
+ root->covered = root->end - root->start;
+ }
+
+
+ return root->covered;
+}
+
+void growSeqmentTree(SeqmentTree *src, const unsigned start, const unsigned end) {
+
+ SeqmentTrees *node;
+
+ /* make room for new anker */
+ if(src->size <= src->n + 2) {
+ reallocSeqmentTree(src);
+ }
+
+ /* make new leaf */
+ node = src->root + src->n;
+ node->start = start;
+ node->end = end;
+ node->covered = end - start;
+ *(node->branch) = 0;
+
+ src->root->covered = addSeqmentTrees(src->root, node);
+
+ if(node->covered) {
+ src->n += 2;
+ }
+}
+
+unsigned queSeqmentTree(SeqmentTrees *src, const unsigned start, const unsigned end) {
+
+ if(end < src->start || src->end < start) {
+ return 0;
+ } else if(src->start <= start && end <= src->end) {
+ return src->covered;
+ } else {
+ return queSeqmentTree(src->branch[0], start, end) + queSeqmentTree(src->branch[1], start, end);
+ }
+}
=====================================
seqmenttree.h
=====================================
@@ -0,0 +1,43 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#define _XOPEN_SOURCE 600
+//cc -Wall -O3 -std=c99 -c -o seqmenttree seqmenttree.c
+#ifndef SEQMENTTREE
+#define SEQMENTTREE 1
+typedef struct seqmentTree SeqmentTree;
+typedef struct seqmentTrees SeqmentTrees;
+struct seqmentTree {
+ unsigned n;
+ unsigned size;
+ struct seqmentTrees *root;
+};
+struct seqmentTrees {
+ unsigned start;
+ unsigned end;
+ unsigned covered;
+ struct seqmentTrees *branch[2];
+};
+#endif
+
+SeqmentTree * initializeSeqmentTree(long unsigned size);
+SeqmentTree * initSeqmentTree(SeqmentTree *src, const unsigned start, const unsigned end);
+void reallocSeqmentTree(SeqmentTree *src);
+unsigned addSeqmentTrees(SeqmentTrees *root, SeqmentTrees *node);
+void growSeqmentTree(SeqmentTree *src, const unsigned start, const unsigned end);
+unsigned queSeqmentTree(SeqmentTrees *src, const unsigned start, const unsigned end);
=====================================
sparse.c
=====================================
@@ -400,14 +400,12 @@ int save_kmers_sparse_batch(char *templatefilename, char *outputfilename, char *
Kmers = smalloc(sizeof(CompKmers));
allocCompKmers(Kmers, 1024);
Ntot = 0;
-
/* count kmers */
while((Kmers->n = fread(Kmers->kmers, sizeof(long unsigned), Kmers->size, inputfile))) {
Ntot += Kmers->n;
save_kmers_sparse(templates, foundKmers, Kmers);
}
kmaPipe(0, 0, inputfile, &status);
-
if(kmaPipe == &kmaPipeFork) {
t1 = clock();
fprintf(stderr, "#\n# Total time used to identify k-mers in query: %.2f s.\n#\n", difftime(t1, t0) / 1000000);
=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
* limitations under the License.
*/
-#define KMA_VERSION "1.2.10a"
+#define KMA_VERSION "1.2.11"
View it on GitLab: https://salsa.debian.org/med-team/kma/compare/5bfc66d5f5811326eaed52c00ee0f7a38e2ba1e8...7014520656bf2d05a4d4e6163608c4684d750269
--
View it on GitLab: https://salsa.debian.org/med-team/kma/compare/5bfc66d5f5811326eaed52c00ee0f7a38e2ba1e8...7014520656bf2d05a4d4e6163608c4684d750269
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190831/5c941cf5/attachment-0001.html>
More information about the debian-med-commit
mailing list