[med-svn] [Git][med-team/kma][master] 4 commits: New upstream version 1.3.22
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Thu Jun 24 19:37:32 BST 2021
Nilesh Patra pushed to branch master at Debian Med / kma
Commits:
fbe9dfdc by Nilesh Patra at 2021-06-24T23:58:21+05:30
New upstream version 1.3.22
- - - - -
c6ca158d by Nilesh Patra at 2021-06-24T23:58:23+05:30
Update upstream source from tag 'upstream/1.3.22'
Update to upstream version '1.3.22'
with Debian dir a9ae491615bb04fe711e05050d0ad01aafb6419b
- - - - -
115e50ac by Nilesh Patra at 2021-06-24T18:34:09+00:00
Update manpage
- - - - -
049fadc4 by Nilesh Patra at 2021-06-24T18:34:19+00:00
Interim changelog entry
- - - - -
15 changed files:
- KMAspecification.pdf
- align.c
- assembly.c
- debian/changelog
- debian/kma.1
- debian/kma_index.1
- debian/kma_shm.1
- kma.c
- runkma.c
- savekmers.c
- spltdb.c
- updateindex.c
- version.h
- xml.c
- xml.h
Changes:
=====================================
KMAspecification.pdf
=====================================
Binary files a/KMAspecification.pdf and b/KMAspecification.pdf differ
=====================================
align.c
=====================================
@@ -634,7 +634,6 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
}
}
mapQ = 0;
-
if(mem_count) {
points->len = mem_count;
} else {
@@ -807,7 +806,7 @@ int anker_rc(const HashMapCCI *template_index, unsigned char *qseq, int q_len, i
i = q_len - q_start;
q_start = q_len - q_end;
q_end = i;
- i = q_start;
+ i = q_start ? q_start : preseed(template_index, qseq, q_end - q_start);
} else if(q_start) {
i = q_start;
} else {
@@ -957,7 +956,7 @@ int anker_rc(const HashMapCCI *template_index, unsigned char *qseq, int q_len, i
}
}
- if(one2one && bestScore * kmersize < (q_len - kmersize - bestScore)) {
+ if(one2one && bestScore < kmersize && bestScore * kmersize < (q_len - kmersize - bestScore)) {
bestScore = 0;
points->len = 0;
} else if(bestScore == score) {
@@ -1141,7 +1140,7 @@ int anker_rc_comp(const HashMapCCI *template_index, unsigned char *qseq, unsigne
}
}
- if(one2one && bestScore * kmersize < (q_len - kmersize - bestScore)) {
+ if(one2one && bestScore < kmersize && bestScore * kmersize < (q_len - kmersize - bestScore)) {
bestScore = 0;
points->len = 0;
} else if(bestScore == score) {
=====================================
assembly.c
=====================================
@@ -1055,7 +1055,7 @@ void * assemble_KMA_dense_threaded(void *arg) {
}
if(xml_out) {
- hitXML(xml_out, template, template_name, aligned, &alnStat, NWmatrices->rewards, stats[4]);
+ hitXML(xml_out, template, header->seq, aligned, &alnStat, NWmatrices->rewards, stats[4]);
}
} else if(sam && !(sam & 2096)) {
stats[1] = read_score;
@@ -1717,6 +1717,7 @@ void * assemble_KMA(void *arg) {
t_len = thread->t_len;
load = (template_index->len != 0) ? 0 : 1;
matrix->len = 0;
+ seq_in = thread->seq_in;
/* start threads */
aligned_assem->score = 0;
@@ -1931,7 +1932,7 @@ void * assemble_KMA(void *arg) {
}
if(xml_out) {
- hitXML(xml_out, template, template_name, aligned, &alnStat, NWmatrices->rewards, stats[4]);
+ hitXML(xml_out, template, header->seq, aligned, &alnStat, NWmatrices->rewards, stats[4]);
}
} else if(sam && !(sam & 2096)) {
stats[1] = read_score;
=====================================
debian/changelog
=====================================
@@ -1,10 +1,10 @@
-kma (1.3.19-1) UNRELEASED; urgency=medium
+kma (1.3.22-1) UNRELEASED; urgency=medium
- * New upstream version 1.3.19
+ * New upstream version 1.3.22
* d/*.1: Update Manpages
* d/p/blhc.patch: Propagate hardening flags to fix blhc
- -- Nilesh Patra <nilesh at debian.org> Sat, 01 May 2021 22:20:10 +0530
+ -- Nilesh Patra <nilesh at debian.org> Thu, 24 Jun 2021 23:59:48 +0530
kma (1.3.10-1) unstable; urgency=medium
=====================================
debian/kma.1
=====================================
@@ -1,10 +1,10 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
-.TH KMA "1" "May 2021" "kma 1.3.19" "User Commands"
+.TH KMA "1" "June 2021" "kma 1.3.22" "User Commands"
.SH NAME
kma \- maps and/or aligns raw reads to a template database
.SH DESCRIPTION
.IP
-KMA\-1.3.19 maps and/or aligns raw reads to a template database.
+KMA\-1.3.22 maps and/or aligns raw reads to a template database.
.TP
Options:
Desc: Default:
=====================================
debian/kma_index.1
=====================================
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
-.TH KMA_INDEX "1" "May 2021" "kma_index 1.3.19" "User Commands"
+.TH KMA_INDEX "1" "June 2021" "kma_index 1.3.22" "User Commands"
.SH NAME
kma_index \- creates the databases needed to run KMA, from a list of fasta files given
.SH DESCRIPTION
=====================================
debian/kma_shm.1
=====================================
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
-.TH KMA_SHM "1" "May 2021" "kma_shm 1.3.19" "User Commands"
+.TH KMA_SHM "1" "June 2021" "kma_shm 1.3.22" "User Commands"
.SH NAME
kma_shm \- sets up a shared database (sysV) for mapping with KMA
.SH DESCRIPTION
=====================================
kma.c
=====================================
@@ -1301,7 +1301,6 @@ int kma_main(int argc, char *argv[]) {
} else {
myTemplatefilename = 0;
}
- kmersize = 16;
totFrags = 0;
/* SE */
=====================================
runkma.c
=====================================
@@ -206,6 +206,9 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
ERROR();
}
alignLoadPtr = &alignLoad_fly;
+ if(!kmersize) {
+ kmersize = *template_lengths;
+ }
if(kmersize < 4 || 32 < kmersize) {
kmersize = 16;
}
@@ -1399,6 +1402,9 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
/* load databases */
alignLoadPtr = &alignLoad_fly_mem;
+ if(!kmersize) {
+ kmersize = *template_lengths;
+ }
if(kmersize < 4 || 32 < kmersize) {
kmersize = 16;
}
=====================================
savekmers.c
=====================================
@@ -408,12 +408,26 @@ static int clearScore(int *bestTemplates, int *Score) {
return 0;
}
+int testVal(short unsigned *values_s, int template) {
+
+ int i;
+
+ i = *values_s;
+ while(--i) {
+ if(*++values_s == template) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
int get_kmers_for_pair(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, int *extendScore, const int exhaustive) {
/* save_kmers find ankering k-mers the in query sequence,
and is the time determining step */
int i, j, l, rc, end, HIT, gaps, score, Ms, MMs, Us, W1s, template, SU;
- int hitCounter, bestSeqCount, kmersize, shifter, W1, U, M, MM;
+ int hitCounter, bestSeqCount, kmersize, shifter, W1, U, M, MM, m, mm;
int *bests, *Scores;
unsigned *values, *last, n;
short unsigned *values_s;
@@ -478,157 +492,132 @@ int get_kmers_for_pair(const HashMapKMA *templates, const Penalties *rewards, in
for(;j < end; ++j) {
if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
if(values == last) {
- if(kmersize < gaps) {
+ /*
+ gaps == 0 -> Match
+ gaps == kmersize -> 1 MM
+ kmersize < gaps -> several mismatches or indel(s)
+ gaps < kmersize -> deletion
+ */
+ if(gaps == 0) {
+ /* match */
+ ++Ms;
+ } else if(gaps == kmersize) {
+ /* snp */
Ms += kmersize;
- gaps -= kmersize;
- if(gaps) {
- /* go for best scenario */
- if(gaps == 1) {
- MMs += 2;
- } else {
- gaps -= 2;
- if((MM << 1) + gaps * M < 0) {
- Ms += gaps;
- MMs += 2;
- }
- }
+ ++MMs;
+ } else if(kmersize < gaps) {
+ /* mismatch or insersion */
+ Ms += kmersize;
+ gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+ if(gaps <= 2) {
+ mm = gaps;
+ m = 0;
} else {
- ++MMs;
+ mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+ mm = MAX(2, mm);
+ m = MIN(gaps - mm, kmersize);
+ m = MIN(m, mm);
}
- } else if (gaps) {
- --gaps;
- ++W1s;
- Us += gaps;
- } else {
- ++Ms;
- }
+
+ /* evaluate best option */
+ if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+ MMs += mm;
+ Ms += m;
+ } else {
+ ++W1s;
+ Us += (gaps -1);
+ }
+ } /*else {
+ // unlikely deletion or random k-mer mismatch,
+ // assume better and go random zero score
+ }*/
+
HIT = j;
gaps = 0;
} else {
if(last) {
- if(HIT) {
- HIT += kmersize;
- } else {
- HIT = j + kmersize;
- }
score = Ms * M + MMs * MM + Us * U + W1s * W1;
- if(SU) {
- values_s = (short unsigned *) last;
- l = (*values_s) + 1;
- while(--l) {
- Scores[(template = values_s[l])] += score;
- extendScore[template] = HIT;
- }
-
- score = kmersize * M;
- MMs = MM << 1;
- values_s = (short unsigned *) values;
- n = *values_s;
- for(l = 1; l <= n; ++l) {
- if(j < extendScore[(template = values_s[l])]) {
- if(extendScore[template] == HIT) {
- Scores[template] += M;
- } else {
- gaps = extendScore[template] - j - 1;
- Scores[template] += (W1 + gaps * U);
- }
- } else if(Scores[template] != 0) {
- Scores[template] += score;
- if((gaps = extendScore[template] - j)) {
- if(gaps == 1) {
- Scores[template] += MMs;
- } else {
- gaps -= 2;
- if((Ms = MMs + gaps * M) < 0) {
- Scores[template] += Ms;
- }
- }
- } else {
- Scores[template] += MM;
- }
- } else {
- Scores[template] = score;
- if(include[template] == 0) {
- include[template] = 1;
- bests[++*bests] = template;
- }
- }
- }
- } else {
- l = (*last) + 1;
- while(--l) {
- Scores[(template = last[l])] += score;
- extendScore[template] = HIT;
- }
-
- score = kmersize * M;
- MMs = MM << 1;
- n = *values;
- for(l = 1; l <= n; ++l) {
- if(j < extendScore[(template = values[l])]) {
- if(extendScore[template] == HIT) {
- Scores[template] += M;
+ values_s = (short unsigned *) last;
+ l = SU ? (*values_s + 1) : (*last + 1);
+ while(--l) {
+ template = SU ? *++values_s : *++last;
+ Scores[template] += score;
+ extendScore[template] = HIT;
+ }
+ HIT = j - 1;
+ last = values;
+ score = kmersize * M;
+ values_s = (short unsigned *) values;
+ n = SU ? *values_s : *values;
+ l = n + 1;
+ while(--l) {
+ template = SU ? *++values_s : *++values;
+ if(Scores[template] != 0) {
+ gaps = HIT - extendScore[template];
+ if(gaps == 0) {
+ /* match */
+ Scores[template] += M;
+ } else if(gaps == kmersize) {
+ /* snp */
+ Scores[template] += score + MM;
+ } else if(kmersize < gaps) {
+ /* mismatch or insersion */
+ gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+ if(gaps <= 2) {
+ mm = gaps;
+ m = 0;
} else {
- gaps = extendScore[template] - j - 1;
- Scores[template] += (W1 + gaps * U);
+ mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+ mm = MAX(2, mm);
+ m = MIN(gaps - mm, kmersize);
+ m = MIN(m, mm);
}
- } else if(Scores[template] != 0) {
- Scores[template] += score;
- if((gaps = extendScore[template] - j)) {
- if(gaps == 1) {
- Scores[template] += MMs;
- } else {
- gaps -= 2;
- if((Ms = MMs + gaps * M) < 0) {
- Scores[template] += Ms;
- }
- }
+
+ /* evaluate best option */
+ if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+ Scores[template] += score + (mm * MM + m * M);
} else {
- Scores[template] += MM;
- }
- } else {
- Scores[template] = score;
- if(include[template] == 0) {
- include[template] = 1;
- bests[++*bests] = template;
+ Scores[template] += score + (W1 + (gaps - 1) * U);
}
+ } /*else {
+ // unlikely deletion or random k-mer mismatch,
+ // assume better and go random zero score
+ }*/
+ } else {
+ Scores[template] = score;
+ if(include[template] == 0) {
+ include[template] = 1;
+ bests[++*bests] = template;
}
}
}
- } else if(SU) {
- values_s = (short unsigned *) values;
- n = *values_s;
- Ms = kmersize * M;
- for(l = 1; l <= n; ++l) {
- Scores[(template = values_s[l])] = Ms;
- include[template] = 1;
- bests[l] = template;
- }
- *bests = n;
} else {
- n = *values;
+ last = values;
+ values_s = (short unsigned *) values;
+ n = SU ? *values_s : *values;
Ms = kmersize * M;
for(l = 1; l <= n; ++l) {
- Scores[(template = values[l])] = Ms;
+ template = SU ? *++values_s : *++values;
+ Scores[template] = Ms;
include[template] = 1;
bests[l] = template;
}
*bests = n;
}
- HIT = 0;
+ HIT = j;
gaps = 0;
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
- last = values;
}
++hitCounter;
} else {
++gaps;
}
}
+ gaps += (qseq->N[i] + 1 - j); /* gap over N's */
j = qseq->N[i] + 1;
}
@@ -638,12 +627,12 @@ int get_kmers_for_pair(const HashMapKMA *templates, const Penalties *rewards, in
values_s = (short unsigned *) last;
l = (*values_s) + 1;
while(--l) {
- Scores[values_s[l]] += score;
+ Scores[*++values_s] += score;
}
} else {
l = (*last) + 1;
while(--l) {
- Scores[last[l]] += score;
+ Scores[*++last] += score;
}
}
for(l = *bests; l != 0; --l) {
@@ -940,7 +929,7 @@ int get_kmers_for_pair_Sparse(const HashMapKMA *templates, const Penalties *rewa
int get_kmers_for_pair_pseoudoSparse(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, int *extendScore, const int exhaustive) {
int i, j, l, n, end, template, hitCounter, gaps, Ms, MMs, Us, W1s;
- int W1, U, M, MM, HIT, SU, kmersize, score, *bests, *Scores;
+ int W1, U, M, MM, HIT, SU, kmersize, score, m, mm, *bests, *Scores;
unsigned shifter, *values, *last;
short unsigned *values_s;
char *include;
@@ -1001,159 +990,135 @@ int get_kmers_for_pair_pseoudoSparse(const HashMapKMA *templates, const Penaltie
for(;j < end; ++j) {
if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
if(values == last) {
- if(kmersize < gaps) {
+ /*
+ gaps == 0 -> Match
+ gaps == kmersize -> 1 MM
+ kmersize < gaps -> several mismatches or indel(s)
+ gaps < kmersize -> deletion
+ */
+ if(gaps == 0) {
+ /* match */
+ ++Ms;
+ } else if(gaps == kmersize) {
+ /* snp */
Ms += kmersize;
- gaps -= kmersize;
- if(gaps) {
- /* go for best scenario */
- if(gaps == 1) {
- MMs += 2;
- } else {
- gaps -= 2;
- if((MM << 1) + gaps * M < 0) {
- Ms += gaps;
- MMs += 2;
- }
- }
+ ++MMs;
+ } else if(kmersize < gaps) {
+ /* mismatch or insersion */
+ Ms += kmersize;
+ gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+ if(gaps <= 2) {
+ mm = gaps;
+ m = 0;
} else {
- ++MMs;
+ mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+ mm = MAX(2, mm);
+ m = MIN(gaps - mm, kmersize);
+ m = MIN(m, mm);
}
- } else if (gaps) {
- --gaps;
- ++W1s;
- Us += gaps;
- } else {
- ++Ms;
- }
+
+ /* evaluate best option */
+ if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+ MMs += mm;
+ Ms += m;
+ } else {
+ ++W1s;
+ Us += (gaps -1);
+ }
+ } /*else {
+ // unlikely deletion or random k-mer mismatch,
+ // assume better and go random zero score
+ }*/
+
HIT = j;
gaps = 0;
} else {
if(last) {
- if(HIT) {
- HIT += kmersize;
- } else {
- HIT = j + kmersize;
- }
score = Ms * M + MMs * MM + Us * U + W1s * W1;
- if(SU) {
- values_s = (short unsigned *) last;
- l = *values_s + 1;
- while(--l) {
- Scores[*++values_s] += score;
- extendScore[*values_s] = HIT;
- }
- } else {
- l = *last + 1;
- while(--l) {
- Scores[*++last] += score;
- extendScore[*last] = HIT;
- }
+ values_s = (short unsigned *) last;
+ l = SU ? (*values_s + 1) : (*last + 1);
+ while(--l) {
+ template = SU ? *++values_s : *++last;
+ Scores[template] += score;
+ extendScore[template] = HIT;
}
+ HIT = j - 1;
+ last = values;
score = kmersize * M;
- MMs = MM << 1;
- if(SU) {
- values_s = (short unsigned *) values;
- n = *values_s + 1;
- while(--n) {
- template = *++values_s;
- if(j < extendScore[template]) {
- if(extendScore[template] == HIT) {
- Scores[template] += M;
- } else {
- gaps = extendScore[template] - j - 1;
- Scores[template] += (W1 + gaps * U);
- }
- } else if(Scores[template] != 0) {
- Scores[template] += score;
- if((gaps = extendScore[template] - j)) {
- if(gaps == 1) {
- Scores[template] += MMs;
- } else {
- gaps -= 2;
- if((Ms = MMs + gaps * M) < 0) {
- Scores[template] += Ms;
- }
- }
- } else {
- Scores[template] += MM;
- }
- } else {
- Scores[template] = score;
- if(include[template] == 0) {
- include[template] = 1;
- bests[++*bests] = template;
- }
- }
- }
- } else {
- n = *(last = values) + 1;
- while(--n) {
- template = *++values;
- if(j < extendScore[template]) {
- if(extendScore[template] == HIT) {
- Scores[template] += M;
+ values_s = (short unsigned *) values;
+ n = SU ? *values_s : *values;
+ l = n + 1;
+ while(--l) {
+ template = SU ? *++values_s : *++values;
+ if(Scores[template] != 0) {
+ gaps = HIT - extendScore[template];
+ if(gaps == 0) {
+ /* match */
+ Scores[template] += M;
+ } else if(gaps == kmersize) {
+ /* snp */
+ Scores[template] += score + MM;
+ } else if(kmersize < gaps) {
+ /* mismatch or insersion */
+ gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+ if(gaps <= 2) {
+ mm = gaps;
+ m = 0;
} else {
- gaps = extendScore[template] - j - 1;
- Scores[template] += (W1 + gaps * U);
+ mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+ mm = MAX(2, mm);
+ m = MIN(gaps - mm, kmersize);
+ m = MIN(m, mm);
}
- } else if(Scores[template] != 0) {
- Scores[template] += score;
- if((gaps = extendScore[template] - j)) {
- if(gaps == 1) {
- Scores[template] += MMs;
- } else {
- gaps -= 2;
- if((Ms = MMs + gaps * M) < 0) {
- Scores[template] += Ms;
- }
- }
+
+ /* evaluate best option */
+ if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+ Scores[template] += score + (mm * MM + m * M);
} else {
- Scores[template] += MM;
- }
- } else {
- Scores[template] = score;
- if(include[template] == 0) {
- include[template] = 1;
- bests[++*bests] = template;
+ Scores[template] += score + (W1 + (gaps - 1) * U);
}
+ } /*else {
+ // unlikely deletion or random k-mer mismatch,
+ // assume better and go random zero score
+ }*/
+ } else {
+ Scores[template] = score;
+ if(include[template] == 0) {
+ include[template] = 1;
+ bests[++*bests] = template;
}
}
}
- } else if(SU) {
- values_s = (short unsigned *) values;
- n = *values_s + 1;
- Ms = kmersize * M;
- *bests = 0;
- while(--n) {
- Scores[*++values_s] = Ms;
- include[*values_s] = 1;
- bests[++*bests] = *values_s;
- }
} else {
- n = *(last = values) + 1;
+ last = values;
+ values_s = (short unsigned *) values;
+ n = SU ? *values_s : *values;
Ms = kmersize * M;
- *bests = 0;
- while(--n) {
- Scores[*++last] = Ms;
- include[*last] = 1;
- bests[++*bests] = *last;
+ for(l = 1; l <= n; ++l) {
+ template = SU ? *++values_s : *++values;
+ Scores[template] = Ms;
+ include[template] = 1;
+ bests[l] = template;
}
+ *bests = n;
}
- HIT = 0;
+
+ HIT = j;
gaps = 0;
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
- last = values;
}
++hitCounter;
} else {
++gaps;
}
}
+ gaps += (qseq->N[i] + 1 - j); /* gap over N's */
j = qseq->N[i] + 1;
}
+
if(last) {
score = Ms * M + MMs * MM + Us * U + W1s * W1;
if(SU) {
@@ -2019,7 +1984,7 @@ int save_kmers_Sparse(const HashMapKMA *templates, const Penalties *rewards, int
}
i = 0;
- if(bestScore && bestScore * kmersize > end) {
+ if(kmersize <= bestScore || bestScore * kmersize > end) {
lock(excludeOut);
i = deConPrintPtr(bestTemplates, qseq, bestScore, header, flag, out);
unlock(excludeOut);
@@ -2031,8 +1996,7 @@ int save_kmers_Sparse(const HashMapKMA *templates, const Penalties *rewards, int
int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out) {
int i, j, l, n, end, template, hitCounter, gaps, Ms, MMs, Us, W1s;
- int HIT, SU, score, bestScore, kmersize;
- int W1, U, M, MM;
+ int HIT, SU, score, bestScore, kmersize, W1, U, M, MM, m, mm;
unsigned shifter, *values, *last;
short unsigned *values_s;
char *include;
@@ -2083,156 +2047,132 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
for(;j < end; ++j) {
if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
if(values == last) {
- if(kmersize < gaps) {
+ /*
+ gaps == 0 -> Match
+ gaps == kmersize -> 1 MM
+ kmersize < gaps -> several mismatches or indel(s)
+ gaps < kmersize -> deletion
+ */
+ if(gaps == 0) {
+ /* match */
+ ++Ms;
+ } else if(gaps == kmersize) {
+ /* snp */
Ms += kmersize;
- gaps -= kmersize;
- if(gaps) {
- /* go for best scenario */
- if(gaps == 1) {
- MMs += 2;
- } else {
- gaps -= 2;
- if((MM << 1) + gaps * M < 0) {
- Ms += gaps;
- MMs += 2;
- }
- }
+ ++MMs;
+ } else if(kmersize < gaps) {
+ /* mismatch or insersion */
+ Ms += kmersize;
+ gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+ if(gaps <= 2) {
+ mm = gaps;
+ m = 0;
} else {
- ++MMs;
+ mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+ mm = MAX(2, mm);
+ m = MIN(gaps - mm, kmersize);
+ m = MIN(m, mm);
}
- } else if (gaps) {
- --gaps;
- ++W1s;
- Us += gaps;
- } else {
- ++Ms;
- }
+
+ /* evaluate best option */
+ if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+ MMs += mm;
+ Ms += m;
+ } else {
+ ++W1s;
+ Us += (gaps -1);
+ }
+ } /*else {
+ // unlikely deletion or random k-mer mismatch,
+ // assume better and go random zero score
+ }*/
+
HIT = j;
gaps = 0;
} else {
if(last) {
- if(HIT) {
- HIT += kmersize;
- } else {
- HIT = j + kmersize;
- }
score = Ms * M + MMs * MM + Us * U + W1s * W1;
- if(SU) {
- values_s = (short unsigned *) last;
- l = (*values_s) + 1;
- while(--l) {
- Score[(template = values_s[l])] += score;
- extendScore[template] = HIT;
- }
- } else {
- l = (*last) + 1;
- while(--l) {
- Score[(template = last[l])] += score;
- extendScore[template] = HIT;
- }
+ values_s = (short unsigned *) last;
+ l = SU ? (*values_s + 1) : (*last + 1);
+ while(--l) {
+ template = SU ? *++values_s : *++last;
+ Score[template] += score;
+ extendScore[template] = HIT;
}
-
+ HIT = j - 1;
+ last = values;
score = kmersize * M;
- MMs = MM << 1;
- if(SU) {
- values_s = (short unsigned *) values;
- n = *values_s;
- for(l = 1; l <= n; ++l) {
- if(j < extendScore[(template = values_s[l])]) {
- if(extendScore[template] == HIT) {
- Score[template] += M;
- } else {
- gaps = extendScore[template] - j - 1;
- Score[template] += (W1 + gaps * U);
- }
- } else if(Score[template] != 0) {
- Score[template] += score;
- if((gaps = extendScore[template] - j)) {
- if(gaps == 1) {
- Score[template] += MMs;
- } else {
- gaps -= 2;
- if((Ms = MMs + gaps * M) < 0) {
- Score[template] += Ms;
- }
- }
- } else {
- Score[template] += MM;
- }
- } else {
- Score[template] = score;
- if(include[template] == 0) {
- include[template] = 1;
- bestTemplates[++*bestTemplates] = template;
- }
- }
- }
- } else {
- n = *values;
- for(l = 1; l <= n; ++l) {
- if(j < extendScore[(template = values[l])]) {
- if(extendScore[template] == HIT) {
- Score[template] += M;
+ values_s = (short unsigned *) values;
+ n = SU ? *values_s : *values;
+ l = n + 1;
+ while(--l) {
+ template = SU ? *++values_s : *++values;
+ if(Score[template] != 0) {
+ gaps = HIT - extendScore[template];
+ if(gaps == 0) {
+ /* match */
+ Score[template] += M;
+ } else if(gaps == kmersize) {
+ /* snp */
+ Score[template] += score + MM;
+ } else if(kmersize < gaps) {
+ /* mismatch or insersion */
+ gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+ if(gaps <= 2) {
+ mm = gaps;
+ m = 0;
} else {
- gaps = extendScore[template] - j - 1;
- Score[template] += (W1 + gaps * U);
+ mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+ mm = MAX(2, mm);
+ m = MIN(gaps - mm, kmersize);
+ m = MIN(m, mm);
}
- } else if(Score[template] != 0) {
- Score[template] += score;
- if((gaps = extendScore[template] - j)) {
- if(gaps == 1) {
- Score[template] += MMs;
- } else {
- gaps -= 2;
- if((Ms = MMs + gaps * M) < 0) {
- Score[template] += Ms;
- }
- }
+
+ /* evaluate best option */
+ if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+ Score[template] += score + (mm * MM + m * M);
} else {
- Score[template] += MM;
- }
- } else {
- Score[template] = score;
- if(include[template] == 0) {
- include[template] = 1;
- bestTemplates[++*bestTemplates] = template;
+ Score[template] += score + (W1 + (gaps - 1) * U);
}
+ } /*else {
+ // unlikely deletion or random k-mer mismatch,
+ // assume better and go random zero score
+ }*/
+ } else {
+ Score[template] = score;
+ if(include[template] == 0) {
+ include[template] = 1;
+ bestTemplates[++*bestTemplates] = template;
}
}
}
- } else if(SU) {
- values_s = (short unsigned *) values;
- n = *values_s;
- Ms = kmersize * M;
- for(l = 1; l <= n; ++l) {
- Score[(template = values_s[l])] = Ms;
- include[template] = 1;
- bestTemplates[l] = template;
- }
- *bestTemplates = n;
} else {
- n = *values;
+ last = values;
+ values_s = (short unsigned *) values;
+ n = SU ? *values_s : *values;
Ms = kmersize * M;
for(l = 1; l <= n; ++l) {
- Score[(template = values[l])] = Ms;
+ template = SU ? *++values_s : *++values;
+ Score[template] = Ms;
include[template] = 1;
bestTemplates[l] = template;
}
*bestTemplates = n;
}
- HIT = 0;
+
+ HIT = j;
gaps = 0;
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
- last = values;
}
++hitCounter;
} else {
++gaps;
}
}
+ gaps += (qseq->N[i] + 1 - j); /* gap over N's */
j = qseq->N[i] + 1;
}
if(last) {
@@ -2294,7 +2234,7 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
end = qseq->seqlen + 1 - bestScore;
i = 0;
- if(bestScore && bestScore * kmersize > end) {
+ if(kmersize <= bestScore || bestScore * kmersize > end) {
lock(excludeOut);
i = deConPrintPtr(bestTemplates, qseq, bestScore, header, 0, out);
unlock(excludeOut);
@@ -2306,7 +2246,7 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out) {
int i, j, l, end, HIT, gaps, score, Ms, MMs, Us, W1s, W1, U, M, MM;
- int template, hitCounter, bestScore, bestScore_r, kmersize;
+ int template, hitCounter, bestScore, bestScore_r, kmersize, m, mm;
unsigned *values, *last, n, SU, shifter;
short unsigned *values_s;
char *include;
@@ -2377,156 +2317,132 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
for(;j < end; ++j) {
if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
if(values == last) {
- if(kmersize < gaps) {
+ /*
+ gaps == 0 -> Match
+ gaps == kmersize -> 1 MM
+ kmersize < gaps -> several mismatches or indel(s)
+ gaps < kmersize -> deletion
+ */
+ if(gaps == 0) {
+ /* match */
+ ++Ms;
+ } else if(gaps == kmersize) {
+ /* snp */
Ms += kmersize;
- gaps -= kmersize;
- if(gaps) {
- /* go for best scenario */
- if(gaps == 1) {
- MMs += 2;
- } else {
- gaps -= 2;
- if((MM << 1) + gaps * M < 0) {
- Ms += gaps;
- MMs += 2;
- }
- }
+ ++MMs;
+ } else if(kmersize < gaps) {
+ /* mismatch or insersion */
+ Ms += kmersize;
+ gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+ if(gaps <= 2) {
+ mm = gaps;
+ m = 0;
} else {
- ++MMs;
+ mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+ mm = MAX(2, mm);
+ m = MIN(gaps - mm, kmersize);
+ m = MIN(m, mm);
}
- } else if (gaps) {
- --gaps;
- ++W1s;
- Us += gaps;
- } else {
- ++Ms;
- }
+
+ /* evaluate best option */
+ if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+ MMs += mm;
+ Ms += m;
+ } else {
+ ++W1s;
+ Us += (gaps -1);
+ }
+ } /*else {
+ // unlikely deletion or random k-mer mismatch,
+ // assume better and go random zero score
+ }*/
+
HIT = j;
gaps = 0;
} else {
if(last) {
- if(HIT) {
- HIT += kmersize;
- } else {
- HIT = j + kmersize;
- }
score = Ms * M + MMs * MM + Us * U + W1s * W1;
- if(SU) {
- values_s = (short unsigned *) last;
- l = (*values_s) + 1;
- while(--l) {
- Score[(template = values_s[l])] += score;
- extendScore[template] = HIT;
- }
-
- score = kmersize * M;
- MMs = MM << 1;
- values_s = (short unsigned *) values;
- n = *values_s;
- for(l = 1; l <= n; ++l) {
- if(j < extendScore[(template = values_s[l])]) {
- if(extendScore[template] == HIT) {
- Score[template] += M;
- } else {
- gaps = extendScore[template] - j - 1;
- Score[template] += (W1 + gaps * U);
- }
- } else if(Score[template] != 0) {
- Score[template] += score;
- if((gaps = extendScore[template] - j)) {
- if(gaps == 1) {
- Score[template] += MMs;
- } else {
- gaps -= 2;
- if((Ms = MMs + gaps * M) < 0) {
- Score[template] += Ms;
- }
- }
- } else {
- Score[template] += MM;
- }
- } else {
- Score[template] = score;
- if(include[template] == 0) {
- include[template] = 1;
- bestTemplates[++*bestTemplates] = template;
- }
- }
- }
- } else {
- l = (*last) + 1;
- while(--l) {
- Score[(template = last[l])] += score;
- extendScore[template] = HIT;
- }
-
- score = kmersize * M;
- MMs = MM << 1;
- n = *values;
- for(l = 1; l <= n; ++l) {
- if(j < extendScore[(template = values[l])]) {
- if(extendScore[template] == HIT) {
- Score[template] += M;
+ values_s = (short unsigned *) last;
+ l = SU ? (*values_s + 1) : (*last + 1);
+ while(--l) {
+ template = SU ? *++values_s : *++last;
+ Score[template] += score;
+ extendScore[template] = HIT;
+ }
+ HIT = j - 1;
+ last = values;
+ score = kmersize * M;
+ values_s = (short unsigned *) values;
+ n = SU ? *values_s : *values;
+ l = n + 1;
+ while(--l) {
+ template = SU ? *++values_s : *++values;
+ if(Score[template] != 0) {
+ gaps = HIT - extendScore[template];
+ if(gaps == 0) {
+ /* match */
+ Score[template] += M;
+ } else if(gaps == kmersize) {
+ /* snp */
+ Score[template] += score + MM;
+ } else if(kmersize < gaps) {
+ /* mismatch or insersion */
+ gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+ if(gaps <= 2) {
+ mm = gaps;
+ m = 0;
} else {
- gaps = extendScore[template] - j - 1;
- Score[template] += (W1 + gaps * U);
+ mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+ mm = MAX(2, mm);
+ m = MIN(gaps - mm, kmersize);
+ m = MIN(m, mm);
}
- } else if(Score[template] != 0) {
- Score[template] += score;
- if((gaps = extendScore[template] - j)) {
- if(gaps == 1) {
- Score[template] += MMs;
- } else {
- gaps -= 2;
- if((Ms = MMs + gaps * M) < 0) {
- Score[template] += Ms;
- }
- }
+
+ /* evaluate best option */
+ if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+ Score[template] += score + (mm * MM + m * M);
} else {
- Score[template] += MM;
- }
- } else {
- Score[template] = score;
- if(include[template] == 0) {
- include[template] = 1;
- bestTemplates[++*bestTemplates] = template;
+ Score[template] += score + (W1 + (gaps - 1) * U);
}
+ } /*else {
+ // unlikely deletion or random k-mer mismatch,
+ // assume better and go random zero score
+ }*/
+ } else {
+ Score[template] = score;
+ if(include[template] == 0) {
+ include[template] = 1;
+ bestTemplates[++*bestTemplates] = template;
}
}
}
- } else if(SU) {
- values_s = (short unsigned *) values;
- n = *values_s;
- Ms = kmersize * M;
- for(l = 1; l <= n; ++l) {
- Score[(template = values_s[l])] = Ms;
- include[template] = 1;
- bestTemplates[l] = template;
- }
- *bestTemplates = n;
} else {
- n = *values;
+ last = values;
+ values_s = (short unsigned *) values;
+ n = SU ? *values_s : *values;
Ms = kmersize * M;
for(l = 1; l <= n; ++l) {
- Score[(template = values[l])] = Ms;
+ template = SU ? *++values_s : *++values;
+ Score[template] = Ms;
include[template] = 1;
bestTemplates[l] = template;
}
*bestTemplates = n;
}
- HIT = 0;
+
+ HIT = j;
gaps = 0;
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
- last = values;
}
++hitCounter;
} else {
++gaps;
}
}
+ gaps += (qseq->N[i] + 1 - j); /* gap over N's */
j = qseq->N[i] + 1;
}
if(last) {
@@ -2620,156 +2536,132 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
for(;j < end; ++j) {
if((values = hashMap_get(templates, getKmer(qseq_r->seq, j, shifter)))) {
if(values == last) {
- if(kmersize < gaps) {
+ /*
+ gaps == 0 -> Match
+ gaps == kmersize -> 1 MM
+ kmersize < gaps -> several mismatches or indel(s)
+ gaps < kmersize -> deletion
+ */
+ if(gaps == 0) {
+ /* match */
+ ++Ms;
+ } else if(gaps == kmersize) {
+ /* snp */
Ms += kmersize;
- gaps -= kmersize;
- if(gaps) {
- /* go for best scenario */
- if(gaps == 1) {
- MMs += 2;
- } else {
- gaps -= 2;
- if((MM << 1) + gaps * M < 0) {
- Ms += gaps;
- MMs += 2;
- }
- }
+ ++MMs;
+ } else if(kmersize < gaps) {
+ /* mismatch or insersion */
+ Ms += kmersize;
+ gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+ if(gaps <= 2) {
+ mm = gaps;
+ m = 0;
} else {
- ++MMs;
+ mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+ mm = MAX(2, mm);
+ m = MIN(gaps - mm, kmersize);
+ m = MIN(m, mm);
}
- } else if (gaps) {
- --gaps;
- ++W1s;
- Us += gaps;
- } else {
- ++Ms;
- }
+
+ /* evaluate best option */
+ if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+ MMs += mm;
+ Ms += m;
+ } else {
+ ++W1s;
+ Us += (gaps -1);
+ }
+ } /*else {
+ // unlikely deletion or random k-mer mismatch,
+ // assume better and go random zero score
+ }*/
+
HIT = j;
gaps = 0;
} else {
if(last) {
- if(HIT) {
- HIT += kmersize;
- } else {
- HIT = j + kmersize;
- }
score = Ms * M + MMs * MM + Us * U + W1s * W1;
- if(SU) {
- values_s = (short unsigned *) last;
- l = (*values_s) + 1;
- while(--l) {
- Score_r[(template = values_s[l])] += score;
- extendScore[template] = HIT;
- }
-
- score = kmersize * M;
- MMs = MM << 1;
- values_s = (short unsigned *) values;
- n = *values_s;
- for(l = 1; l <= n; ++l) {
- if(j < extendScore[(template = values_s[l])]) {
- if(extendScore[template] == HIT) {
- Score_r[template] += M;
- } else {
- gaps = extendScore[template] - j - 1;
- Score_r[template] += (W1 + gaps * U);
- }
- } else if(Score_r[template] != 0) {
- Score_r[template] += score;
- if((gaps = extendScore[template] - j)) {
- if(gaps == 1) {
- Score_r[template] += MMs;
- } else {
- gaps -= 2;
- if((Ms = MMs + gaps * M) < 0) {
- Score_r[template] += Ms;
- }
- }
- } else {
- Score_r[template] += MM;
- }
- } else {
- Score_r[template] = score;
- if(include[template] == 0) {
- include[template] = 1;
- bestTemplates_r[++*bestTemplates_r] = template;
- }
- }
- }
- } else {
- l = (*last) + 1;
- while(--l) {
- Score_r[(template = last[l])] += score;
- extendScore[template] = HIT;
- }
-
- score = kmersize * M;
- MMs = MM << 1;
- n = *values;
- for(l = 1; l <= n; ++l) {
- if(j < extendScore[(template = values[l])]) {
- if(extendScore[template] == HIT) {
- Score_r[template] += M;
+ values_s = (short unsigned *) last;
+ l = SU ? (*values_s + 1) : (*last + 1);
+ while(--l) {
+ template = SU ? *++values_s : *++last;
+ Score_r[template] += score;
+ extendScore[template] = HIT;
+ }
+ HIT = j - 1;
+ last = values;
+ score = kmersize * M;
+ values_s = (short unsigned *) values;
+ n = SU ? *values_s : *values;
+ l = n + 1;
+ while(--l) {
+ template = SU ? *++values_s : *++values;
+ if(Score_r[template] != 0) {
+ gaps = HIT - extendScore[template];
+ if(gaps == 0) {
+ /* match */
+ Score_r[template] += M;
+ } else if(gaps == kmersize) {
+ /* snp */
+ Score_r[template] += score + MM;
+ } else if(kmersize < gaps) {
+ /* mismatch or insersion */
+ gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+ if(gaps <= 2) {
+ mm = gaps;
+ m = 0;
} else {
- gaps = extendScore[template] - j - 1;
- Score_r[template] += (W1 + gaps * U);
+ mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+ mm = MAX(2, mm);
+ m = MIN(gaps - mm, kmersize);
+ m = MIN(m, mm);
}
- } else if(Score_r[template] != 0) {
- Score_r[template] += score;
- if((gaps = extendScore[template] - j)) {
- if(gaps == 1) {
- Score_r[template] += MMs;
- } else {
- gaps -= 2;
- if((Ms = MMs + gaps * M) < 0) {
- Score_r[template] += Ms;
- }
- }
+
+ /* evaluate best option */
+ if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+ Score_r[template] += score + (mm * MM + m * M);
} else {
- Score_r[template] += MM;
- }
- } else {
- Score_r[template] = score;
- if(include[template] == 0) {
- include[template] = 1;
- bestTemplates_r[++*bestTemplates_r] = template;
+ Score_r[template] += score + (W1 + (gaps - 1) * U);
}
+ } /*else {
+ // unlikely deletion or random k-mer mismatch,
+ // assume better and go random zero score
+ }*/
+ } else {
+ Score_r[template] = score;
+ if(include[template] == 0) {
+ include[template] = 1;
+ bestTemplates_r[++*bestTemplates_r] = template;
}
}
}
- } else if(SU) {
- values_s = (short unsigned *) values;
- n = *values_s;
- Ms = kmersize * M;
- for(l = 1; l <= n; ++l) {
- Score_r[(template = values_s[l])] = Ms;
- include[template] = 1;
- bestTemplates_r[l] = template;
- }
- *bestTemplates_r = n;
} else {
- n = *values;
+ last = values;
+ values_s = (short unsigned *) values;
+ n = SU ? *values_s : *values;
Ms = kmersize * M;
for(l = 1; l <= n; ++l) {
- Score_r[(template = values[l])] = Ms;
+ template = SU ? *++values_s : *++values;
+ Score_r[template] = Ms;
include[template] = 1;
bestTemplates_r[l] = template;
}
*bestTemplates_r = n;
}
- HIT = 0;
+
+ HIT = j;
gaps = 0;
Ms = 0;
MMs = 0;
Us = 0;
W1s = 0;
- last = values;
}
++hitCounter;
} else {
++gaps;
}
}
+ gaps += (qseq->N[i] + 1 - j); /* gap over N's */
j = qseq_r->N[i] + 1;
}
if(last) {
@@ -2833,7 +2725,8 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
i = 0;
if(bestScore > 0 || bestScore_r > 0) {
end = qseq->seqlen + 1;
- if((bestScore >= bestScore_r && bestScore * kmersize > (end - bestScore)) || (bestScore < bestScore_r && bestScore_r * kmersize > (end - bestScore_r))) {
+ //if((bestScore >= bestScore_r && bestScore * kmersize > (end - bestScore)) || (bestScore < bestScore_r && bestScore_r * kmersize > (end - bestScore_r))) {
+ if(kmersize <= bestScore || kmersize <= bestScore_r) {
if(bestScore > bestScore_r) {
lock(excludeOut);
i = deConPrintPtr(bestTemplates, qseq, bestScore, header, 0, out);
@@ -3105,7 +2998,8 @@ int save_kmers_count(const HashMapKMA *templates, const Penalties *rewards, int
i = 0;
if(bestScore > 0 || bestScore_r > 0) {
end = qseq->seqlen + 1;
- if((bestScore >= bestScore_r && bestScore * kmersize > (end - bestScore)) || (bestScore < bestScore_r && bestScore_r * kmersize > (end - bestScore_r))) {
+ //if((bestScore >= bestScore_r && bestScore * kmersize > (end - bestScore)) || (bestScore < bestScore_r && bestScore_r * kmersize > (end - bestScore_r))) {
+ if(kmersize <= bestScore || kmersize <= bestScore_r) {
if(bestScore > bestScore_r) {
lock(excludeOut);
i = deConPrintPtr(bestTemplates, qseq, bestScore, header, 0, out);
@@ -3148,7 +3042,7 @@ int save_kmers_unionPair(const HashMapKMA *templates, const Penalties *rewards,
/* got hits */
bestScore = getF(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates);
- if(bestScore * kmersize < (qseq->seqlen - bestScore)) {
+ if(kmersize < bestScore && bestScore * kmersize < (qseq->seqlen - bestScore)) {
bestScore = 0;
}
} else {
@@ -3176,7 +3070,7 @@ int save_kmers_unionPair(const HashMapKMA *templates, const Penalties *rewards,
} else {
bestScore_r = getF(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates);
}
- if(bestScore_r * kmersize < (qseq_r->seqlen - bestScore_r)) {
+ if(kmersize < bestScore_r && bestScore_r * kmersize < (qseq_r->seqlen - bestScore_r)) {
bestScore_r = 0;
*regionTemplates = abs(*regionTemplates);
}
@@ -3379,7 +3273,7 @@ int save_kmers_penaltyPair(const HashMapKMA *templates, const Penalties *rewards
flag |= 2;
flag_r |= 2;
compScore = MIN((hitCounter + hitCounter_r), (bestScore + bestScore_r));
- if((qseq->seqlen + qseq_r->seqlen - compScore - (kmersize << 1)) < compScore * kmersize) {
+ if(kmersize <= compScore || (qseq->seqlen + qseq_r->seqlen - compScore - (kmersize << 1)) < compScore * kmersize) {
*regionTemplates = -(*regionTemplates);
if(0 < regionTemplates[1]) {
if(rev) {
@@ -3425,7 +3319,7 @@ int save_kmers_penaltyPair(const HashMapKMA *templates, const Penalties *rewards
}
} else {
hitCounter = MIN(hitCounter, bestScore);
- hitCounter = (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize;
+ hitCounter = kmersize <= hitCounter || (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize;
if(hitCounter) {
if(0 < regionTemplates[1]) {
if(rev) {
@@ -3445,7 +3339,7 @@ int save_kmers_penaltyPair(const HashMapKMA *templates, const Penalties *rewards
}
}
hitCounter_r = MIN(hitCounter_r, bestScore_r);
- hitCounter_r = (qseq_r->seqlen - hitCounter_r - kmersize) < hitCounter_r * kmersize;
+ hitCounter_r = kmersize <= hitCounter_r || (qseq_r->seqlen - hitCounter_r - kmersize) < hitCounter_r * kmersize;
if(hitCounter_r) {
if(0 < bestTemplates[1]) {
if(rev) {
@@ -3485,7 +3379,7 @@ int save_kmers_penaltyPair(const HashMapKMA *templates, const Penalties *rewards
return i;
} else if(0 < bestScore) {
hitCounter = MIN(hitCounter, bestScore);
- if((qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {
+ if(kmersize <= hitCounter || (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {
if(rev) {
flag |= 8;
flag |= 32;
@@ -3516,7 +3410,7 @@ int save_kmers_penaltyPair(const HashMapKMA *templates, const Penalties *rewards
}
} else if(0 < bestScore_r) {
hitCounter_r = MIN(hitCounter_r, bestScore_r);
- if((qseq_r->seqlen - hitCounter_r - kmersize) < hitCounter_r * kmersize) {
+ if(kmersize <= hitCounter_r || (qseq_r->seqlen - hitCounter_r - kmersize) < hitCounter_r * kmersize) {
if(rev) {
flag_r |= 8;
flag_r |= 32;
@@ -3578,7 +3472,7 @@ int save_kmers_forcePair(const HashMapKMA *templates, const Penalties *rewards,
if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) &&
(bestScore = getSecondForce(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates, regionScores))) {
- if((qseq->seqlen + qseq_r->seqlen - bestScore) < bestScore * kmersize) {
+ if(kmersize <= bestScore || (qseq->seqlen + qseq_r->seqlen - bestScore) < bestScore * kmersize) {
flag = 67;
flag_r = 131;
=====================================
spltdb.c
=====================================
@@ -379,10 +379,10 @@ unsigned get_ankers_spltDB(int *infoSize, int *out_Tem, CompDNA *qseq, Qseqs *he
cfread(header->seq, 1, header->len, inputfile);
} else {
/* in the case of equally well scoring DBs */
- fseek(inputfile, infoSize[1] * sizeof(long unsigned) + infoSize[2] * sizeof(int), SEEK_CUR);
+ sfseek(inputfile, infoSize[1] * sizeof(long unsigned) + infoSize[2] * sizeof(int), SEEK_CUR);
*out_Tem = infoSize[4];
cfread(out_Tem + 1, sizeof(int), *out_Tem, inputfile);
- fseek(inputfile, infoSize[5], SEEK_CUR);
+ sfseek(inputfile, infoSize[5], SEEK_CUR);
}
/* get info for next read */
@@ -430,6 +430,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
AlnPoints *points;
NWmat *NWmatrices;
Assemble_thread *threads, *thread;
+ HashMapCCI *template_index;
if(!outputfilename) {
fprintf(stderr, " No output file specified!\n");
@@ -471,6 +472,9 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
}
}
dbBiases[i] = DB_size;
+ if(!kmersize) {
+ kmersize = *template_lengths;
+ }
if(kmersize < 4 || 32 < kmersize) {
kmersize = 16;
}
@@ -599,7 +603,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
/* open input streams */
file_len = strlen(outputfilename);
for(i = 0; i < targetNum; ++i) {
- sprintf(outputfilename + file_len, ".%d", i);
+ j = sprintf(outputfilename + file_len, ".%d", i);
while(!(inputfile = fopen(outputfilename, "rb"))) {
usleep(100);
}
@@ -1401,6 +1405,10 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
aligned_assem->t = smalloc(aligned_assem->size);
aligned_assem->s = smalloc(aligned_assem->size);
aligned_assem->q = smalloc(aligned_assem->size);
+ seq_in_no = 0;
+ template_index = smalloc(sizeof(HashMapCCI));
+ template_index->size = 0;
+ hashMapCCI_initialize(template_index, matrix->size, kmersize);
/* allocate matrcies for NW */
i = 1;
@@ -1455,7 +1463,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
thread->points = seedPoint_init(delta, rewards);
thread->points->len = 0;
thread->spin = (sparse < 0) ? 10 : 100;
-
+ thread->template_index = template_index;
thread->next = threads;
threads = thread;
@@ -1521,6 +1529,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
thread->points->len = 0;
thread->next = 0;
thread->spin = (sparse < 0) ? 10 : 100;
+ thread->template_index = template_index;
/* Do local assemblies of fragments mapping to the same template */
depth = 0;
@@ -1550,6 +1559,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
ERROR();
}
+ thread->seq_in = seq_in_no;
templatefilename[file_len] = 0;
strcat(templatefilename, ".name");
name_file = sfopen(templatefilename, "rb");
@@ -1593,7 +1603,6 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
printExtendedFeatures(templatefilename, 0, 0, 0, extendedFeatures_out);
}
} else if(w_scores[template] > 0) {
-
if(progress) {
counter += w_scores[template];
fprintf(stderr, "# Progress:\t%3lu%%\r", 100 * counter / Nhits);
@@ -1628,10 +1637,11 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
if(xml) {
newIterXML(xml_out, template, t_len, thread->template_name);
}
+
/* Do assembly */
//status |= assemblyPtr(aligned_assem, template, template_fragments, fileCount, frag_out, aligned, gap_align, qseq, header, matrix, points, NWmatrices);
thread->template = template;
- thread->t_len = 0;
+ thread->t_len = t_len;
assembly_KMA_Ptr(thread);
/* Depth, ID and coverage */
@@ -1718,9 +1728,6 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
}
}
- /* destroy index */
- hashMapCCI_destroy(thread->template_index);
-
/* Close files */
close(seq_in_no);
fclose(res_out);
=====================================
updateindex.c
=====================================
@@ -75,7 +75,7 @@ int updateDBs_sparse(HashMap *templates, CompDNA *qseq, unsigned template, int M
template_slengths[template] = 0;
template_ulengths[template] = 0;
for(rc = 0; rc < 2; ++rc) {
- /* revers complement */
+ /* reverse complement */
if(rc) {
comp_rc(qseq);
}
@@ -114,6 +114,9 @@ int updateDBs_sparse(HashMap *templates, CompDNA *qseq, unsigned template, int M
qseq->N[0]--;
}
}
+ if(prefix_len == 0) {
+ comp_rc(qseq);
+ }
return 1;
}
=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
* limitations under the License.
*/
-#define KMA_VERSION "1.3.19"
+#define KMA_VERSION "1.3.22"
=====================================
xml.c
=====================================
@@ -144,11 +144,11 @@ void capIterXML(FILE *out, const int DB_size, const long unsigned seqsize, const
fprintf(out, "</Iteration>\n");
}
-void hitXML(FILE *out, const int template, const char *template_name, const Aln *aligned, const AlnScore *alnStat, const Penalties *rewards, const int flag) {
+void hitXML(FILE *out, const int template, const unsigned char *template_name, const Aln *aligned, const AlnScore *alnStat, const Penalties *rewards, const int flag) {
static volatile int Lock = 0;
- volatile int *lock = &Lock;
static int num = 0;
+ volatile int *lock = &Lock;
int i, Ms, MMs, W1s, Us, gap, pos, **d;
unsigned char *t, *q;
char *s, bases[6] = "ACGTN-";
@@ -227,4 +227,17 @@ void hitXML(FILE *out, const int template, const char *template_name, const Aln
fprintf(out, "\t</Hit_hsps>\n");
fprintf(out, "</Hit>\n");
unlock(lock);
+
+ /* here */
+ /*
+ fprintf(stdout, "%s\n", template_name);
+ i = 0;
+ for(i = 0; i < aligned->len; i += 60) {
+ fprintf(stdout, "t:\t%.60s\n", aligned->t + i);
+ fprintf(stdout, "s:\t%.60s\n", aligned->s + i);
+ fprintf(stdout, "q:\t%.60s\n\n", aligned->q + i);
+ }
+ fprintf(stdout, "\n\n");
+ */
+
}
=====================================
xml.h
=====================================
@@ -26,4 +26,4 @@ void closeCapXML(FILE *out);
void newIterXML(FILE *out, const int template, const int t_len, const char *template_name);
double getEntropy(const unsigned char *aligned_assem_q, const int len);
void capIterXML(FILE *out, const int DB_size, const long unsigned seqsize, const int t_len, const int readCounts, const double p_value, const long read_score, const unsigned char *aligned_assem_q, const int len);
-void hitXML(FILE *out, const int template, const char *template_name, const Aln *aligned, const AlnScore *alnStat, const Penalties *rewards, const int flag);
+void hitXML(FILE *out, const int template, const unsigned char *template_name, const Aln *aligned, const AlnScore *alnStat, const Penalties *rewards, const int flag);
View it on GitLab: https://salsa.debian.org/med-team/kma/-/compare/24c4ad423ca629e3f4c13de63d5e99e2c31a1ff8...049fadc4b9b12b6ff575bd4bdfb0d7e6551dc3ef
--
View it on GitLab: https://salsa.debian.org/med-team/kma/-/compare/24c4ad423ca629e3f4c13de63d5e99e2c31a1ff8...049fadc4b9b12b6ff575bd4bdfb0d7e6551dc3ef
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210624/63d3c3fc/attachment-0001.htm>
More information about the debian-med-commit
mailing list