[med-svn] [Git][med-team/kma][master] 7 commits: New upstream version 1.3.11
Nilesh Patra
gitlab at salsa.debian.org
Sat May 1 17:55:01 BST 2021
Nilesh Patra pushed to branch master at Debian Med / kma
Commits:
4ac3c304 by Nilesh Patra at 2021-02-14T20:50:41+05:30
New upstream version 1.3.11
- - - - -
45823d7f by Nilesh Patra at 2021-03-21T00:23:19+05:30
New upstream version 1.3.13
- - - - -
8e430e2c by Nilesh Patra at 2021-04-14T15:27:03+05:30
New upstream version 1.3.14
- - - - -
205feb04 by Nilesh Patra at 2021-05-01T22:19:55+05:30
New upstream version 1.3.15
- - - - -
576585d8 by Nilesh Patra at 2021-05-01T22:19:57+05:30
Update upstream source from tag 'upstream/1.3.15'
Update to upstream version '1.3.15'
with Debian dir 659799a31efb554176f0bb1d47d2f524bd1004e5
- - - - -
9228ebbf by Nilesh Patra at 2021-05-01T22:20:00+05:30
d/p/blhc.patch: Propagate hardening flags to fix blhc
- - - - -
3a0c2e08 by Nilesh Patra at 2021-05-01T22:21:07+05:30
Interim changelog entry
- - - - -
30 changed files:
- align.c
- alnfrags.c
- assembly.c
- chain.c
- chain.h
- debian/changelog
- + debian/patches/blhc.patch
- + debian/patches/series
- dist.c
- frags.c
- index.c
- kma.c
- matrix.c
- mt1.c
- mt1.h
- nw.c
- nw.h
- runkma.c
- runkma.h
- sam.c
- savekmers.c
- sparse.c
- spltdb.c
- spltdb.h
- stdstat.c
- stdstat.h
- vcf.c
- vcf.h
- version.h
- xml.c
Changes:
=====================================
align.c
=====================================
@@ -201,7 +201,8 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
Stat.score = 0;
Stat.len = 1;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
Stat.pos = 0;
aligned->s[0] = 0;
aligned->len = 0;
@@ -216,7 +217,8 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
Stat.score = 0;
Stat.len = 1;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
Stat.pos = 0;
aligned->s[0] = 0;
aligned->len = 0;
@@ -224,11 +226,15 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
return Stat;
}
+ /* trim seeds */
+ trimSeeds(points, start);
+
/* initialize */
Stat.len = 0;
Stat.score = 0;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
value = points->tStart[start] - 1;
Stat.pos = value;
i = points->qStart[start];
@@ -263,8 +269,10 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
if(t_s == 0) {
while(bias < NWstat.len && (Frag_align->t[bias] == 5 || Frag_align->q[bias] == 5)) {
if(Frag_align->t[bias] == 5) {
- --NWstat.gaps;
+ --NWstat.tGaps;
++(Frag_align->start);
+ } else {
+ --NWstat.qGaps;
}
++bias;
}
@@ -278,11 +286,12 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
memcpy(aligned->s, Frag_align->s + bias, NWstat.len);
memcpy(aligned->q, Frag_align->q + bias, NWstat.len);
aligned->start = q_s + Frag_align->start;
- Stat.pos -= (NWstat.len - NWstat.gaps);
+ Stat.pos -= (NWstat.len - NWstat.tGaps);
Stat.score = NWstat.score;
Stat.len = NWstat.len;
Stat.match = NWstat.match;
- Stat.gaps = NWstat.gaps;
+ Stat.tGaps = NWstat.tGaps;
+ Stat.qGaps = NWstat.qGaps;
} else {
aligned->start = q_s;
}
@@ -340,7 +349,8 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
Stat.score = 0;
Stat.len = 1;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
aligned->s[0] = 0;
aligned->len = 0;
points->len = 0;
@@ -361,7 +371,8 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
Stat.score += NWstat.score;
Stat.len += NWstat.len;
Stat.match += NWstat.match;
- Stat.gaps += NWstat.gaps;
+ Stat.tGaps += NWstat.tGaps;
+ Stat.qGaps += NWstat.qGaps;
}
} else {
stop = 0;
@@ -398,8 +409,10 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
bias = NWstat.len - 1;
while(bias && (Frag_align->t[bias] == 5 || Frag_align->q[bias] == 5)) {
if(Frag_align->t[bias] == 5) {
- --NWstat.gaps;
+ --NWstat.tGaps;
++(Frag_align->end);
+ } else {
+ --NWstat.qGaps;
}
--bias;
}
@@ -418,7 +431,8 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
Stat.score += NWstat.score;
Stat.len += NWstat.len;
Stat.match += NWstat.match;
- Stat.gaps += NWstat.gaps;
+ Stat.tGaps += NWstat.tGaps;
+ Stat.qGaps += NWstat.qGaps;
} else {
Frag_align->end = 0;
}
@@ -569,7 +583,8 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
Stat.score = 0;
Stat.len = 1;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
Stat.pos = 0;
points->len = 0;
return Stat;
@@ -584,7 +599,8 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
Stat.score = 0;
Stat.len = 1;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
Stat.pos = 0;
points->len = 0;
return Stat;
@@ -594,7 +610,8 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
Stat.len = 0;
Stat.score = 0;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
value = points->tStart[start] - 1;
Stat.pos = value;
i = points->qStart[start];
@@ -623,11 +640,12 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
NWstat = NW_band_score(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, band, matrices, t_len);
//NWstat = NW_score(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, matrices, t_len);
}
- Stat.pos -= (NWstat.len - NWstat.gaps);
+ Stat.pos -= (NWstat.len - NWstat.tGaps);
Stat.score = NWstat.score;
Stat.len = NWstat.len;
Stat.match = NWstat.match;
- Stat.gaps = NWstat.gaps;
+ Stat.tGaps = NWstat.tGaps;
+ Stat.qGaps = NWstat.qGaps;
}
}
@@ -679,7 +697,8 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
Stat.score = 0;
Stat.len = 1;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
points->len = 0;
return Stat;
}
@@ -693,7 +712,8 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
Stat.score += NWstat.score;
Stat.len += NWstat.len;
Stat.match += NWstat.match;
- Stat.gaps += NWstat.gaps;
+ Stat.tGaps += NWstat.tGaps;
+ Stat.qGaps += NWstat.qGaps;
}
} else {
stop = 0;
@@ -727,7 +747,8 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
Stat.score += NWstat.score;
Stat.len += NWstat.len;
Stat.match += NWstat.match;
- Stat.gaps += NWstat.gaps;
+ Stat.tGaps += NWstat.tGaps;
+ Stat.qGaps += NWstat.qGaps;
}
points->len = 0;
=====================================
alnfrags.c
=====================================
@@ -34,6 +34,7 @@
#include "stdstat.h"
#include "threader.h"
#include "updatescores.h"
+#define mrcheck(mrc, Stat, q_len, t_len) ((mrc * q_len <= Stat.len - Stat.qGaps) || (mrc * t_len <= Stat.len - Stat.tGaps))
int (*alnFragsPE)(HashMapCCI**, int*, int*, int, double, double, int, CompDNA*, CompDNA*, CompDNA*, CompDNA*, unsigned char*, unsigned char*, unsigned char*, unsigned char*, Qseqs*, Qseqs*, int, int*, int*, long unsigned*, long unsigned*, int*, int*, int*, int*, int*, int*, int, long*, FILE*, AlnPoints*, NWmat*, volatile int*, volatile int*) = alnFragsUnionPE;
@@ -103,7 +104,8 @@ int alnFragsSE(HashMapCCI **templates_index, int *matched_templates, int *templa
alnStat.score = 0;
alnStat.pos = 0;
alnStat.len = 0;
- alnStat.gaps = 0;
+ alnStat.tGaps = 0;
+ alnStat.qGaps = 0;
points->len = 0;
}
} else {
@@ -117,9 +119,9 @@ int alnFragsSE(HashMapCCI **templates_index, int *matched_templates, int *templa
/* get read score */
aln_len = alnStat.len;
start = alnStat.pos;
- end = start + aln_len - alnStat.gaps;
+ end = start + aln_len - alnStat.tGaps;
t_len = template_lengths[abs(template)];
- if(template_lengths[abs(template)] < end && ((t_len < q_len) ? mrc * t_len : mrc * q_len) <= alnStat.match) {
+ if(template_lengths[abs(template)] < end && mrcheck(mrc, alnStat, q_len, t_len)) {
end -= template_lengths[abs(template)];
}
@@ -247,7 +249,8 @@ int alnFragsUnionPE(HashMapCCI **templates_index, int *matched_templates, int *t
alnStat.score = 0;
alnStat.pos = 0;
alnStat.len = 0;
- alnStat.gaps = 0;
+ alnStat.tGaps = 0;
+ alnStat.qGaps = 0;
points->len = 0;
}
} else {
@@ -258,9 +261,9 @@ int alnFragsUnionPE(HashMapCCI **templates_index, int *matched_templates, int *t
aln_len = alnStat.len;
read_score = alnStat.score;
t_len = template_lengths[abs(template)];
- if(minlen <= aln_len && 0 < read_score && ((t_len < qseq_comp->seqlen) ? mrc * t_len : mrc * qseq_comp->seqlen) <= alnStat.match) {
+ if(minlen <= aln_len && 0 < read_score && mrcheck(mrc, alnStat, qseq_comp->seqlen, t_len)) {
start = alnStat.pos;
- end = alnStat.pos + alnStat.len - alnStat.gaps;
+ end = alnStat.pos + alnStat.len - alnStat.tGaps;
if(start == 0 && end == t_len) {
read_score += abs(W1);
}
@@ -294,7 +297,8 @@ int alnFragsUnionPE(HashMapCCI **templates_index, int *matched_templates, int *t
alnStat.score = 0;
alnStat.pos = 0;
alnStat.len = 0;
- alnStat.gaps = 0;
+ alnStat.tGaps = 0;
+ alnStat.qGaps = 0;
}
rc = 1;
} else {
@@ -304,9 +308,9 @@ int alnFragsUnionPE(HashMapCCI **templates_index, int *matched_templates, int *t
/* get read score */
aln_len = alnStat.len;
read_score = alnStat.score;
- if(minlen <= aln_len && 0 < read_score && ((t_len < qseq_r_comp->seqlen) ? mrc * t_len : mrc * qseq_r_comp->seqlen) <= alnStat.match) {
+ if(minlen <= aln_len && 0 < read_score && mrcheck(mrc, alnStat, qseq_r_comp->seqlen, t_len)) {
start = alnStat.pos;
- end = alnStat.pos + alnStat.len - alnStat.gaps;
+ end = alnStat.pos + alnStat.len - alnStat.tGaps;
if(start == 0 && end == t_len) {
read_score += abs(W1);
@@ -576,7 +580,8 @@ int alnFragsPenaltyPE(HashMapCCI **templates_index, int *matched_templates, int
alnStat.score = 0;
alnStat.pos = 0;
alnStat.len = 0;
- alnStat.gaps = 0;
+ alnStat.tGaps = 0;
+ alnStat.qGaps = 0;
points->len = 0;
}
} else {
@@ -587,9 +592,9 @@ int alnFragsPenaltyPE(HashMapCCI **templates_index, int *matched_templates, int
aln_len = alnStat.len;
read_score = alnStat.score;
t_len = template_lengths[abs(template)];
- if(minlen <= aln_len && 0 < read_score && ((t_len < qseq_comp->seqlen) ? mrc * t_len : mrc * qseq_comp->seqlen) <= alnStat.match) {
+ if(minlen <= aln_len && 0 < read_score && mrcheck(mrc, alnStat, qseq_comp->seqlen, t_len)) {
start = alnStat.pos;
- end = alnStat.pos + alnStat.len - alnStat.gaps;
+ end = alnStat.pos + alnStat.len - alnStat.tGaps;
if(start == 0 && end == t_len) {
read_score += abs(W1);
@@ -622,7 +627,8 @@ int alnFragsPenaltyPE(HashMapCCI **templates_index, int *matched_templates, int
alnStat.score = 0;
alnStat.pos = 0;
alnStat.len = 0;
- alnStat.gaps = 0;
+ alnStat.tGaps = 0;
+ alnStat.qGaps = 0;
}
rc = 1;
} else {
@@ -632,9 +638,9 @@ int alnFragsPenaltyPE(HashMapCCI **templates_index, int *matched_templates, int
/* get read score */
aln_len = alnStat.len;
read_score = alnStat.score;
- if(minlen <= aln_len && 0 < read_score && ((t_len < qseq_r_comp->seqlen) ? mrc * t_len : mrc * qseq_r_comp->seqlen) <= alnStat.match) {
+ if(minlen <= aln_len && 0 < read_score && mrcheck(mrc, alnStat, qseq_r_comp->seqlen, t_len)) {
start = alnStat.pos;
- end = alnStat.pos + alnStat.len - alnStat.gaps;
+ end = alnStat.pos + alnStat.len - alnStat.tGaps;
if(start == 0 && end == t_len) {
read_score += abs(W1);
@@ -899,7 +905,8 @@ int alnFragsForcePE(HashMapCCI **templates_index, int *matched_templates, int *t
alnStat.score = 0;
alnStat.pos = 0;
alnStat.len = 0;
- alnStat.gaps = 0;
+ alnStat.tGaps = 0;
+ alnStat.qGaps = 0;
points->len = 0;
}
} else {
@@ -907,7 +914,7 @@ int alnFragsForcePE(HashMapCCI **templates_index, int *matched_templates, int *t
}
t_len = template_lengths[abs(template)];
- if(0 < alnStat.score && minlen <= alnStat.len && ((t_len < qseq_comp->seqlen) ? mrc * t_len : mrc * qseq_comp->seqlen) <= alnStat.match) {
+ if(0 < alnStat.score && minlen <= alnStat.len && mrcheck(mrc, alnStat, qseq_comp->seqlen, t_len)) {
if(arc) {
if(rc < 0) {
/* rc */
@@ -919,7 +926,8 @@ int alnFragsForcePE(HashMapCCI **templates_index, int *matched_templates, int *t
alnStat_r.score = 0;
alnStat_r.pos = 0;
alnStat_r.len = 0;
- alnStat_r.gaps = 0;
+ alnStat_r.tGaps = 0;
+ alnStat_r.qGaps = 0;
}
rc = 1;
} else {
@@ -927,17 +935,17 @@ int alnFragsForcePE(HashMapCCI **templates_index, int *matched_templates, int *t
}
/* get read score */
- if(0 < alnStat_r.score && minlen <= alnStat_r.len) {
+ if(0 < alnStat_r.score && minlen <= alnStat_r.len && mrcheck(mrc, alnStat_r, qseq_r_comp->seqlen, t_len)) {
aln_len = alnStat.len + alnStat_r.len;
/* Handle negative insertsizes caused by trimming,
user stupidity or sample error. */
if(alnStat.pos < alnStat_r.pos) {
start = alnStat.pos;
- end = alnStat_r.pos + alnStat_r.len - alnStat_r.gaps;
+ end = alnStat_r.pos + alnStat_r.len - alnStat_r.tGaps;
} else {
start = alnStat_r.pos;
- end = alnStat.pos + alnStat.len - alnStat.gaps;
+ end = alnStat.pos + alnStat.len - alnStat.tGaps;
}
read_score = alnStat.score + alnStat_r.score;
@@ -953,7 +961,7 @@ int alnFragsForcePE(HashMapCCI **templates_index, int *matched_templates, int *t
}
/* save best match(es) */
- if(read_score > kmersize && score >= scoreT && ((t_len < qseq_r_comp->seqlen) ? mrc * t_len : mrc * qseq_r_comp->seqlen) <= alnStat.match) {
+ if(read_score > kmersize && score >= scoreT) {
if(score > bestScore) { // save as best match
bestScore = score;
*best_read_score = read_score;
@@ -1117,7 +1125,7 @@ void * alnFrags_threaded(void * arg) {
unmapped = 0;
}
- if(sam && unmapped) {
+ if(sam && !(sam & 2096) && unmapped) {
if(unmapped & 1) {
stats[1] = flag;
nibble2base(qseq->seq, qseq->len);
=====================================
assembly.c
=====================================
@@ -39,6 +39,7 @@
#include "stdstat.h"
#include "threader.h"
#include "xml.h"
+#define mrcheck(mrc, Stat, q_len, t_len) ((mrc * q_len <= Stat.len - Stat.qGaps) || (mrc * t_len <= Stat.len - Stat.tGaps))
void * (*assembly_KMA_Ptr)(void *) = &assemble_KMA;
int (*significantBase)(int, int, double) = &significantNuc;
@@ -432,11 +433,11 @@ void * assemble_KMA_threaded(void *arg) {
/* get read score */
aln_len = alnStat.len;
start = alnStat.pos;
- end = start + aln_len - alnStat.gaps;
+ end = start + aln_len - alnStat.tGaps;
/* Get normed score check read coverage */
read_score = alnStat.score;
- if(minlen <= aln_len && ((t_len < qseq->len) ? mrc * t_len : mrc * qseq->len) <= alnStat.match) {
+ if(minlen <= aln_len && mrcheck(mrc, alnStat, qseq->len, t_len)) {
score = 1.0 * read_score / aln_len;
} else {
read_score = 0;
@@ -684,16 +685,16 @@ void * assemble_KMA_threaded(void *arg) {
while(i < asm_len) {
/* call template */
if(pos < t_len) {
- aligned_assem->t[i] = bases[getNuc(template_index->seq, pos)];
+ bestNuc = getNuc(template_index->seq, pos);
} else {
- aligned_assem->t[i] = '-';
+ bestNuc = 5;
}
+ aligned_assem->t[i] = bases[bestNuc];
/* call query */
- bestNuc = 0;
- bestScore = assembly[pos].counts[0];
- depthUpdate = bestScore;
- for(j = 1; j < 6; ++j) {
+ bestScore = assembly[pos].counts[bestNuc];
+ depthUpdate = 0;
+ for(j = 0; j < 6; ++j) {
if(bestScore < assembly[pos].counts[j]) {
bestScore = assembly[pos].counts[j];
bestNuc = j;
@@ -703,7 +704,9 @@ void * assemble_KMA_threaded(void *arg) {
bestNuc = bases[bestNuc];
/* check for minor base call */
- if((bestScore << 1) < depthUpdate) {
+ if(!depthUpdate) {
+ bestNuc = '-';
+ } else if((bestScore << 1) < depthUpdate) {
if(bestNuc == '-') {
bestBaseScore = assembly[pos].counts[4];
bestNuc = 4;
@@ -718,14 +721,20 @@ void * assemble_KMA_threaded(void *arg) {
bestNuc = tolower(bestNuc);
}
bestScore = depthUpdate - assembly[pos].counts[5];
+ } else if(depthUpdate < bcd) {
+ /* too low depth */
+ bestNuc = tolower(bestNuc);
}
/* determine base at current position */
+ /*
if(bcd <= depthUpdate) {
bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
} else {
bestNuc = baseCall('-', aligned_assem->t[i], 0, 0, evalue, &assembly[pos]);
}
+ */
+ bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
aligned_assem->q[i] = bestNuc;
if(bestNuc != '-') {
@@ -986,11 +995,11 @@ void * assemble_KMA_dense_threaded(void *arg) {
/* get read score */
aln_len = alnStat.len;
start = alnStat.pos;
- end = start + aln_len - alnStat.gaps;
+ end = start + aln_len - alnStat.tGaps;
/* Get normed score check read coverage */
read_score = alnStat.score;
- if(minlen <= aln_len && ((t_len < qseq->len) ? mrc * t_len : mrc * qseq->len) <= alnStat.match) {
+ if(minlen <= aln_len && mrcheck(mrc, alnStat, qseq->len, t_len)) {
score = 1.0 * read_score / aln_len;
} else {
read_score = 0;
@@ -1127,8 +1136,7 @@ void * assemble_KMA_dense_threaded(void *arg) {
aligned_assem->t[i] = bases[bestNuc];
/* call query */
- bestNuc = 5;
- bestScore = 0;
+ bestScore = assembly[i].counts[bestNuc];
depthUpdate = 0;
for(j = 0; j < 6; ++j) {
if(bestScore < assembly[i].counts[j]) {
@@ -1140,7 +1148,9 @@ void * assemble_KMA_dense_threaded(void *arg) {
bestNuc = bases[bestNuc];
/* Check for minor base call */
- if((bestScore << 1) < depthUpdate) {
+ if(!depthUpdate) {
+ bestNuc = '-';
+ } else if((bestScore << 1) < depthUpdate) {
if(bestNuc == '-') {
bestBaseScore = 0;
bestNuc = 4;
@@ -1155,14 +1165,20 @@ void * assemble_KMA_dense_threaded(void *arg) {
bestNuc = tolower(bestNuc);
}
bestScore = depthUpdate - assembly[i].counts[5];
+ } else if(depthUpdate < bcd) {
+ /* too low depth */
+ bestNuc = tolower(bestNuc);
}
/* determine base at current position */
+ /*
if(bcd <= depthUpdate) {
bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[i]);
} else {
bestNuc = baseCall('-', aligned_assem->t[i], 0, 0, evalue, &assembly[i]);
}
+ */
+ bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[i]);
aligned_assem->q[i] = bestNuc;
if(bestNuc != '-') {
@@ -1293,7 +1309,7 @@ void * skip_assemble_KMA(void *arg) {
}
aligned_assem->cover = 0;
- aligned_assem->aln_len = (1 - exp((-1.0) * aligned_assem->depth / t_len)) * t_len; // expected coverage from depth
+ aligned_assem->aln_len = 0;//(1 - exp((-1.0) * aligned_assem->depth / t_len)) * t_len; // expected coverage from depth
return NULL;
}
@@ -1475,16 +1491,16 @@ void callConsensus(AssemInfo *matrix, Assem *aligned_assem, long unsigned *seq,
while(i < end) {
/* call template */
if(pos < t_len) {
- aligned_assem->t[i] = bases[getNuc(seq, pos)];
+ bestNuc = getNuc(seq, pos);
} else {
- aligned_assem->t[i] = '-';
+ bestNuc = 5;
}
+ aligned_assem->t[i] = bases[bestNuc];
/* call query */
- bestNuc = 0;
- bestScore = assembly[pos].counts[0];
- depthUpdate = bestScore;
- for(j = 1; j < 6; ++j) {
+ bestScore = assembly[pos].counts[bestNuc];
+ depthUpdate = 0;
+ for(j = 0; j < 6; ++j) {
if(bestScore < assembly[pos].counts[j]) {
bestScore = assembly[pos].counts[j];
bestNuc = j;
@@ -1494,7 +1510,9 @@ void callConsensus(AssemInfo *matrix, Assem *aligned_assem, long unsigned *seq,
bestNuc = bases[bestNuc];
/* check for minor base call */
- if((bestScore << 1) < depthUpdate) {
+ if(!depthUpdate) {
+ bestNuc = '-';
+ } else if((bestScore << 1) < depthUpdate) {
if(bestNuc == '-') {
bestBaseScore = assembly[pos].counts[4];
bestNuc = 4;
@@ -1509,14 +1527,20 @@ void callConsensus(AssemInfo *matrix, Assem *aligned_assem, long unsigned *seq,
bestNuc = tolower(bestNuc);
}
bestScore = depthUpdate - assembly[pos].counts[5];
+ } else if(depthUpdate < bcd) {
+ /* too low depth */
+ bestNuc = tolower(bestNuc);
}
/* determine base at current position */
+ /*
if(bcd <= depthUpdate) {
bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
} else {
bestNuc = baseCall('-', aligned_assem->t[i], 0, 0, evalue, &assembly[pos]);
}
+ */
+ bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
aligned_assem->q[i] = bestNuc;
if(bestNuc != '-') {
@@ -1615,11 +1639,11 @@ void * assemble_KMA(void *arg) {
static volatile int excludeIn[1] = {0}, excludeOut[1] = {0}, excludeMatrix[1] = {0};
static volatile int thread_wait = 0, thread_init = 0, thread_begin = 0;
static volatile int mainTemplate = -2, next;
- static int t_len, load;
+ static int t_len, load, seq_in;
static char *template_name;
static HashMapCCI *template_index;
Assemble_thread *thread = arg;
- int i, minlen, aln_len, kmersize, sam, chunk, seq_in, ef, template;
+ int i, minlen, aln_len, kmersize, sam, chunk, ef, template;
int read_score, asm_len, nextTemplate, file_i, file_count, delta, status;
int thread_num, mq, bcd, start, end, q_start, q_end;
int stats[5], buffer[8], *qBoundPtr;
@@ -1858,11 +1882,11 @@ void * assemble_KMA(void *arg) {
/* get read score */
aln_len = alnStat.len;
start = alnStat.pos;
- end = start + aln_len - alnStat.gaps;
+ end = start + aln_len - alnStat.tGaps;
/* Get normed score check read coverage */
read_score = alnStat.score;
- if(minlen <= aln_len && ((t_len < qseq->len) ? mrc * t_len : mrc * qseq->len) <= alnStat.match) {
+ if(minlen <= aln_len && mrcheck(mrc, alnStat, qseq->len, t_len)) {
score = 1.0 * read_score / aln_len;
} else {
read_score = 0;
=====================================
chain.c
=====================================
@@ -137,12 +137,13 @@ int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *
MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
MMs = MAX(2, MMs);
Ms = MIN(Ms - MMs, kmersize);
+ Ms = MIN(Ms, MMs);
}
gap += weight + points->score[j] + Ms * M + MMs * MM;
/* check if score is max */
- if(score < gap) {
+ if(score <= gap) {
score = gap;
points->next[i] = j;
}
@@ -194,14 +195,12 @@ int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *
points->score[i] = score;
/* update bestScore */
- if(bestScore < score) {
+ if(bestScore <= score) {
if(points->next[i] != bestPos) {
secondScore = bestScore;
}
bestScore = score;
bestPos = i;
- } else if(bestScore == score && points->next[i] != bestPos) {
- secondScore = bestScore;
}
}
/* calculate mapping quality */
@@ -284,12 +283,13 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
MMs = MAX(2, MMs);
Ms = MIN(Ms - MMs, kmersize);
+ Ms = MIN(Ms, MMs);
}
gap += weight + points->score[j] + Ms * M + MMs * MM;
/* check if score is max */
- if(score < gap) {
+ if(score <= gap) {
score = gap;
points->next[i] = j;
}
@@ -327,6 +327,7 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
MMs = MAX(2, MMs);
Ms = MIN(Ms - MMs, kmersize);
+ Ms = MIN(Ms, MMs);
}
gap += weight + points->score[j] + Ms * M + MMs * MM;
@@ -353,7 +354,7 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
gap += (weight + points->score[j] - (tStart - tEnd) * M);
/* check if score is max */
- if(score < gap) {
+ if(score <= gap) {
score = gap;
points->next[i] = j;
}
@@ -390,14 +391,12 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
points->score[i] = score;
/* update bestScore */
- if(bestScore < score) {
+ if(bestScore <= score) {
if(points->next[i] != bestPos) {
secondScore = bestScore;
}
bestScore = score;
bestPos = i;
- } else if(bestScore == score && points->next[i] != bestPos) {
- secondScore = bestScore;
}
}
/* calculate mapping quality */
@@ -417,3 +416,31 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
return bestPos;
}
+
+void trimSeeds(AlnPoints *points, int start) {
+
+ /* trim the start of each seed */
+ static int ts = 0;
+ int len;
+
+ if(!points) {
+ ts = start;
+ return;
+ } else if(!ts) {
+ return;
+ }
+
+ /* iterate seeds on best chain */
+ do {
+ /* trim seed */
+ len = points->qEnd[start] - points->qStart[start];
+ if(len < ts) {
+ /* ensure at least one nucleotide remains in seed */
+ points->tStart[start] += --len;
+ points->qStart[start] += len;
+ } else {
+ points->tStart[start] += ts;
+ points->qStart[start] += ts;
+ }
+ } while((start = points->next[start]));
+}
=====================================
chain.h
=====================================
@@ -45,3 +45,4 @@ void seedPoint_realloc(AlnPoints *dest, int size);
void seedPoint_free(AlnPoints *src);
int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ);
int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ);
+void trimSeeds(AlnPoints *points, int start);
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+kma (1.3.15-1) UNRELEASED; urgency=medium
+
+ * New upstream version 1.3.15
+ * d/p/blhc.patch: Propagate hardening flags to fix blhc
+
+ -- Nilesh Patra <nilesh at debian.org> Sat, 01 May 2021 22:20:10 +0530
+
kma (1.3.10-1) unstable; urgency=medium
* New upstream version
=====================================
debian/patches/blhc.patch
=====================================
@@ -0,0 +1,32 @@
+Description: Propagate CPPFLAGS into Makefile
+Author: Nilesh Patra <nilesh at debian.org>
+Last-Update: 2021-04-14
+--- a/Makefile
++++ b/Makefile
+@@ -4,21 +4,21 @@
+ PROGS = kma kma_index kma_shm kma_update
+
+ .c .o:
+- $(CC) $(CFLAGS) -c -o $@ $<
++ $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
+
+ all: $(PROGS)
+
+ kma: main.c libkma.a
+- $(CC) $(CFLAGS) -o $@ main.c libkma.a -lm -lpthread -lz $(LDFLAGS)
++ $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ main.c libkma.a -lm -lpthread -lz $(LDFLAGS)
+
+ kma_index: kma_index.c libkma.a
+- $(CC) $(CFLAGS) -o $@ kma_index.c libkma.a -lm -lz $(LDFLAGS)
++ $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ kma_index.c libkma.a -lm -lz $(LDFLAGS)
+
+ kma_shm: kma_shm.c libkma.a
+- $(CC) $(CFLAGS) -o $@ kma_shm.c libkma.a $(LDFLAGS)
++ $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ kma_shm.c libkma.a $(LDFLAGS)
+
+ kma_update: kma_update.c libkma.a
+- $(CC) $(CFLAGS) -o $@ kma_update.c libkma.a $(LDFLAGS)
++ $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ kma_update.c libkma.a $(LDFLAGS)
+
+ libkma.a: $(LIBS)
+ $(AR) -csru $@ $(LIBS)
=====================================
debian/patches/series
=====================================
@@ -0,0 +1 @@
+blhc.patch
=====================================
dist.c
=====================================
@@ -150,7 +150,7 @@ HashMapKMA * loadValues(const char *filename) {
fclose(file);
return 0;
}
- dest->exist_l = (long unsigned *)(dest->value_index);
+ dest->exist_l = (long unsigned *)(dest->exist);
fclose(file);
return dest;
=====================================
frags.c
=====================================
@@ -85,13 +85,15 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
/* copy seq */
update = (char *) dest->next;
++q_len;
+ --qseq;
while(--q_len) {
- *update++ = bases[*qseq++];
+ *update++ = bases[*++qseq];
}
avail -= check;
check = 33;
if(avail < check) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -102,6 +104,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
for(i = 1; i < bestHits; ++i) {
if(avail < 11) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -112,6 +115,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
}
if(avail < 11) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -121,6 +125,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
update += check;
for(i = 1; i < bestHits; ++i) {
if(avail < 11) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -131,6 +136,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
}
if(avail < 11) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -139,7 +145,8 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
avail -= check;
update += check;
for(i = 1; i < bestHits; ++i) {
- if(avail < 11) {
+ if(avail < 12) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -151,6 +158,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
check = header->len + 1;
if(avail < check) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
=====================================
index.c
=====================================
@@ -231,8 +231,9 @@ int index_main(int argc, char *argv[]) {
} else if(kmersize == 0) {
fprintf(stderr, "# Invalid kmersize parsed, using default\n");
kmersize = 16;
- } else if(kmersize > 32) {
- kmersize = 32;
+ } else if(kmersize > 31) {
+ fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+ exit(1);
}
kmerindex = kmersize;
}
@@ -246,8 +247,9 @@ int index_main(int argc, char *argv[]) {
} else if(kmersize == 0) {
fprintf(stderr, "# Invalid kmersize parsed, using default\n");
kmersize = 16;
- } else if(kmersize > 32) {
- kmersize = 32;
+ } else if(kmersize > 31) {
+ fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+ exit(1);
}
}
} else if(strcmp(argv[args], "-k_i") == 0) {
@@ -260,8 +262,9 @@ int index_main(int argc, char *argv[]) {
} else if(kmerindex == 0) {
fprintf(stderr, "# Invalid kmersize parsed, using default\n");
kmerindex = 16;
- } else if(kmerindex > 32) {
- kmerindex = 32;
+ } else if(kmerindex > 31) {
+ fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+ exit(1);
}
}
} else if(strcmp(argv[args], "-CS") == 0) {
=====================================
kma.c
=====================================
@@ -113,6 +113,7 @@ static void helpMessage(int exeStatus) {
fprintf(helpOut, "#\t-ipe\t\tInput paired end file name(s)\n");
fprintf(helpOut, "#\t-int\t\tInput interleaved file name(s)\n");
fprintf(helpOut, "#\t-k\t\tKmersize\t\t\t%s\n", "DB defined");
+ fprintf(helpOut, "#\t-ts\t\tTrim front of seeds with ts\t%d\n", 0);
fprintf(helpOut, "#\t-ml\t\tMinimum alignment length\t%d\n", 16);
fprintf(helpOut, "#\t-p\t\tp-value\t\t\t\t0.05\n");
fprintf(helpOut, "#\t-ConClave\tConClave version\t\t1\n");
@@ -156,6 +157,7 @@ static void helpMessage(int exeStatus) {
fprintf(helpOut, "#\t-bcd\t\tMinimum depth at base\t\t1\n");
fprintf(helpOut, "#\t-bcg\t\tMaintain insignificant gaps\n");
fprintf(helpOut, "#\t-and\t\tBoth mrs and p_value thresholds\n#\t\t\thas to reached to in order to\n#\t\t\treport a template hit.\t\tor\n");
+ fprintf(helpOut, "#\t-oa\t\tOutput all, disregard mrs and p-value\n#\t\t\twhen reporting a template hit\tFalse\n");
fprintf(helpOut, "#\t-mq\t\tMinimum mapping quality\t\t0\n");
fprintf(helpOut, "#\t-mrs\t\tMinimum alignment score,\n#\t\t\tnormalized to alignment length\t0.50\n");
fprintf(helpOut, "#\t-mrc\t\tMinimum read coverage\t\t0.10\n");
@@ -203,16 +205,15 @@ int kma_main(int argc, char *argv[]) {
static int fileCounter, fileCounter_PE, fileCounter_INT, Ts, Tv, minlen;
static int extendedFeatures, spltDB, thread_num, kmersize, targetNum, mq;
static int ref_fsa, print_matrix, print_all, sam, vcf, Mt1, bcd, one2one;
- static int sparse_run, **d, status = 0;
+ static int sparse_run, ts, **d, status = 0;
static unsigned xml, nc, nf, shm, exhaustive, verbose;
static char *outputfilename, *templatefilename, **templatefilenames;
static char **inputfiles, **inputfiles_PE, **inputfiles_INT, ss;
- static double ID_t, scoreT, coverT, mrc, evalue;
+ static double ID_t, scoreT, coverT, mrc, evalue, minFrac, support;
static Penalties *rewards;
int i, j, args, exe_len, fileCount, size, escape, tmp, step1, step2;
unsigned totFrags;
char *to2Bit, *exeBasic, *myTemplatefilename;
- double support;
FILE *templatefile, *ioStream;
time_t t0, t1;
Qseqs qseq;
@@ -252,8 +253,11 @@ int kma_main(int argc, char *argv[]) {
print_all = 0;
ref_fsa = 0;
kmersize = 0;
+ ts = 0;
minlen = 16;
evalue = 0.05;
+ support = 0.0;
+ minFrac = 1.0;
exhaustive = 0;
shm = 0;
mq = 0;
@@ -498,8 +502,18 @@ int kma_main(int argc, char *argv[]) {
} else if(kmersize == 0) {
fprintf(stderr, "# Invalid kmersize parsed, using default\n");
kmersize = 16;
- } else if(kmersize > 32) {
- kmersize = 32;
+ } else if(kmersize > 31) {
+ fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+ exit(1);
+ }
+ }
+ } else if(strcmp(argv[args], "-ts") == 0) {
+ ++args;
+ if(args < argc) {
+ ts = strtoul(argv[args], &exeBasic, 10);
+ if(*exeBasic != 0 || ts < 0 || ts > 30) {
+ fprintf(stderr, "# Invalid seed trim parsed\n");
+ exit(1);
}
}
} else if(strcmp(argv[args], "-ml") == 0) {
@@ -590,8 +604,8 @@ int kma_main(int argc, char *argv[]) {
one2one = 0;
} else if(strcmp(argv[args], "-proxi") == 0) {
if(++args < argc) {
- support = strtod(argv[args], &exeBasic);
- if(*exeBasic != 0 || support < 0 || 1 < support) {
+ minFrac = strtod(argv[args], &exeBasic);
+ if(*exeBasic != 0 || minFrac < 0 || 1 < minFrac) {
fprintf(stderr, "Invalid argument at \"-proxi\".\n");
exit(1);
} else {
@@ -603,13 +617,13 @@ int kma_main(int argc, char *argv[]) {
getF = &getF_Proxi;
getR = &getR_Proxi;
getChainTemplates = &getProxiChainTemplates;
- getMatch((int *)(&support), 0);
- getMatchSparse((int *)(&support), 0, 0, 0, 0, 0);
- getSecondPen((int *)(&support), 0, 0, 0, 0, 0, 0, 0);
- getF((int *)(&support), 0, 0, 0, 0);
- ankerAndClean((int *)(&support), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
- ankerAndClean_MEM((int *)(&support), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
- getProxiChainTemplates(0, (const Penalties *)(&support), 0, 0, 0, 0, 0);
+ getMatch((int *)(&minFrac), 0);
+ getMatchSparse((int *)(&minFrac), 0, 0, 0, 0, 0);
+ getSecondPen((int *)(&minFrac), 0, 0, 0, 0, 0, 0, 0);
+ getF((int *)(&minFrac), 0, 0, 0, 0);
+ ankerAndClean((int *)(&minFrac), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ ankerAndClean_MEM((int *)(&minFrac), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ getProxiChainTemplates(0, (const Penalties *)(&minFrac), 0, 0, 0, 0, 0);
}
} else {
fprintf(stderr, "Need argument at: \"-proxi\".\n");
@@ -633,7 +647,7 @@ int kma_main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-p") == 0 || strcmp(argv[args], "-e") == 0) {
if(++args < argc) {
evalue = strtod(argv[args], &exeBasic);
- if(*exeBasic != 0) {
+ if(*exeBasic != 0 || evalue < 0 || 1.0 < evalue) {
fprintf(stderr, "Invalid argument at \"%s\".\n", argv[--args]);
exit(1);
}
@@ -642,7 +656,7 @@ int kma_main(int argc, char *argv[]) {
if(++args < argc && argv[args][0] != '-') {
significantBase = &significantAndSupport;
support = strtod(argv[args], &exeBasic);
- if(*exeBasic != 0 || 1 < support) {
+ if(*exeBasic != 0 || support < 0 || 1 < support) {
fprintf(stderr, "Invalid argument at \"-bc\".\n");
exit(1);
} else {
@@ -802,6 +816,9 @@ int kma_main(int argc, char *argv[]) {
}
} else if(strcmp(argv[args], "-and") == 0) {
cmp = &cmp_and;
+ } else if(strcmp(argv[args], "-oa") == 0) {
+ cmp = &cmp_true;
+ ID_t = 0.0;
} else if(strcmp(argv[args], "-boot") == 0) {
printFsa_ptr = &bootFsa;
} else if(strcmp(argv[args], "-Mt1") == 0) {
@@ -967,6 +984,7 @@ int kma_main(int argc, char *argv[]) {
++args;
}
preseed(0, 0, exhaustive);
+ trimSeeds(0, ts);
if(sam && kmaPipe != &kmaPipeThread) {
fprintf(stderr, "\"-sam\" and \"-status\" cannot coincide.\n");
@@ -1285,7 +1303,7 @@ int kma_main(int argc, char *argv[]) {
} else if(Mt1) {
myTemplatefilename = smalloc(strlen(templatefilename) + 64);
strcpy(myTemplatefilename, templatefilename);
- runKMA_Mt1(myTemplatefilename, outputfilename, strjoin(argv, argc), kmersize, minlen, rewards, ID_t, mq, scoreT, mrc, evalue, bcd, Mt1, ref_fsa, print_matrix, vcf, xml, sam, nc, nf, thread_num);
+ runKMA_Mt1(myTemplatefilename, outputfilename, strjoin(argv, argc), kmersize, minlen, rewards, ID_t, mq, scoreT, mrc, evalue, support, bcd, Mt1, ref_fsa, print_matrix, vcf, xml, sam, nc, nf, thread_num);
free(myTemplatefilename);
fprintf(stderr, "# Closing files\n");
} else if(step2) {
@@ -1304,11 +1322,11 @@ int kma_main(int argc, char *argv[]) {
myTemplatefilename = smalloc(strlen(templatefilename) + 64);
strcpy(myTemplatefilename, templatefilename);
if(spltDB == 0 && targetNum != 1) {
- status |= runKMA_spltDB(templatefilenames, targetNum, outputfilename, argc, argv, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
+ status |= runKMA_spltDB(templatefilenames, targetNum, outputfilename, argc, argv, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, support, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
} else if(mem_mode) {
- status |= runKMA_MEM(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
+ status |= runKMA_MEM(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, support, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
} else {
- status |= runKMA(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
+ status |= runKMA(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, support, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
}
free(myTemplatefilename);
fprintf(stderr, "# Closing files\n");
=====================================
matrix.c
=====================================
@@ -30,13 +30,17 @@
Matrix * matrix_init(unsigned size) {
int i, **ptr, *src;
+ long unsigned Size;
Matrix *dest;
dest = smalloc(sizeof(Matrix));
dest->n = 0;
dest->size = size;
dest->mat = smalloc(size * sizeof(int *));
- *(dest->mat) = smalloc(size * size * sizeof(int));
+ Size = size;
+ Size *= size;
+ Size *= sizeof(int);
+ *(dest->mat) = smalloc(Size);
/* set matrix rows */
ptr = dest->mat;
@@ -54,13 +58,17 @@ Matrix * matrix_init(unsigned size) {
Matrix * ltdMatrix_init(unsigned size) {
int i, **ptr, *src;
+ long unsigned Size;
Matrix *dest;
dest = smalloc(sizeof(Matrix));
dest->n = 0;
dest->size = size;
dest->mat = smalloc(size * sizeof(int *));
- *(dest->mat) = calloc(size * (size - 1) / 2, sizeof(int));
+ Size = size;
+ Size *= (size - 1);
+ Size *= (sizeof(int) / 2);
+ *(dest->mat) = calloc(Size, 1);
if(!*(dest->mat)) {
ERROR();
}
@@ -120,8 +128,12 @@ Matrix * ltdMatrix_minit(long unsigned size) {
void ltdMatrix_realloc(Matrix *src, unsigned size) {
int i, **ptr, *mat;
+ long unsigned Size;
- *(src->mat) = realloc(*(src->mat), size * (size - 1) * sizeof(int) / 2);
+ Size = size;
+ Size *= (size - 1);
+ Size *= (sizeof(int) / 2);
+ *(src->mat) = realloc(*(src->mat), Size);
src->mat = realloc(src->mat, size * sizeof(int *));
if(!src->mat || !*(src->mat)) {
ERROR();
@@ -150,7 +162,9 @@ void Matrix_mdestroy(Matrix *src) {
long unsigned size;
- size = src->size * (src->size - 1) * sizeof(int) / 2;
+ size = src->size;
+ size *= (src->size - 1);
+ size *= sizeof(int) / 2;
msync(*(src->mat), size, MS_SYNC);
munmap(*(src->mat), size);
free(src->mat);
=====================================
mt1.c
=====================================
@@ -17,6 +17,7 @@
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -80,9 +81,9 @@ void printFsa_pairMt1(Qseqs *header, Qseqs *qseq, Qseqs *header_r, Qseqs *qseq_r
}
-void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num) {
+void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num) {
- int i, j, aln_len, t_len, coverScore, file_len, DB_size, delta;
+ int i, j, aln_len, t_len, coverScore, file_len, DB_size, delta, seq_in;
int *template_lengths;
long unsigned read_score, seeker;
double p_value, id, q_id, cover, q_cover;
@@ -213,10 +214,13 @@ void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int
}
strcat(templatefilename, ".seq.b");
- DB_file = sfopen(templatefilename, "rb");
+ seq_in = open(templatefilename, O_RDONLY);
+ if(seq_in == -1) {
+ ERROR();
+ }
templatefilename[file_len] = 0;
- template_index = alignLoad_fly(0, fileno(DB_file), *template_lengths, kmersize, seeker);
- fclose(DB_file);
+ template_index = alignLoad_fly(0, seq_in, *template_lengths, kmersize, seeker);
+ close(seq_in);
/* get name */
strcat(templatefilename, ".name");
@@ -427,7 +431,7 @@ void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int
updateMatrix(matrix_out, thread->template_name, template_index->seq, matrix, t_len);
}
if(vcf) {
- updateVcf(thread->template_name, template_index->seq, evalue, t_len, matrix, vcf, vcf_out);
+ updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
}
}
/* destroy this DB index */
=====================================
mt1.h
=====================================
@@ -24,4 +24,4 @@
void printFsaMt1(Qseqs *header, Qseqs *qseq, CompDNA *compressor, FILE *out);
void printFsa_pairMt1(Qseqs *header, Qseqs *qseq, Qseqs *header_r, Qseqs *qseq_r, CompDNA *compressor, FILE *out);
-void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num);
+void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num);
=====================================
nw.c
=====================================
@@ -45,18 +45,21 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
t_len += aligned->pos;
}
query = (unsigned char*)(queryOrg + q_s);
+ Stat.pos = 0;
if(t_len == 0 || q_len == 0) {
if(t_len == q_len) {
Stat.len = 0;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
Stat.score = 0;
aligned->s[0] = 0;
} else if(t_len == 0) {
Stat.len = q_len;
Stat.match = 0;
- Stat.gaps = q_len;
+ Stat.tGaps = q_len;
+ Stat.qGaps = 0;
Stat.score = W1 + (q_len - 1) * U;
memset(aligned->s, '_', q_len);
aligned->s[q_len] = 0;
@@ -65,7 +68,8 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
} else {
Stat.len = t_len;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = t_len;
Stat.score = W1 + (t_len - 1) * U;
memset(aligned->s, '_', t_len);
aligned->s[t_len] = 0;
@@ -106,7 +110,6 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
E = matrices->E;
thisScore = (t_len + q_len) * (MM + U + W1);
Stat.score = thisScore;
- Stat.pos = 0;
if(0 < k) {
E_ptr = E;
for(m = 0; m < t_len; ++m) {
@@ -180,8 +183,8 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
thisScore = Q_prev + U;
if(Q < thisScore) {
Q = thisScore;
- if(e == 2) {
- D_ptr[n] = Q;
+ if(D_ptr[n] <= thisScore) {
+ D_ptr[n] = thisScore;
e = 3;
}
} else {
@@ -190,7 +193,7 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
thisScore = P_prev[n] + U;
if(P_ptr[n] < thisScore) {
P_ptr[n] = thisScore;
- if(D_ptr[n] < thisScore) {
+ if(D_ptr[n] <= thisScore) {
D_ptr[n] = thisScore;
e = 5;
}
@@ -252,7 +255,8 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
nuc_pos = m + t_s;
Stat.len = 0;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
while(E_ptr[n] != 0) {
if(nuc_pos == template_length) {
nuc_pos = 0;
@@ -273,26 +277,28 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
++nuc_pos;
E_ptr += (q_len + 1);
++Stat.len;
+ ++Stat.qGaps;
}
aligned->t[Stat.len] = getNuc(template, nuc_pos);
aligned->q[Stat.len] = 5;
aligned->s[Stat.len] = '_';
++nuc_pos;
E_ptr += (q_len + 1);
+ ++Stat.qGaps;
} else {
while(!(E_ptr[n] >> 3)) {
aligned->t[Stat.len] = 5;
aligned->q[Stat.len] = query[n];
aligned->s[Stat.len] = '_';
- ++Stat.gaps;
++n;
++Stat.len;
+ ++Stat.tGaps;
}
aligned->t[Stat.len] = 5;
aligned->q[Stat.len] = query[n];
aligned->s[Stat.len] = '_';
- ++Stat.gaps;
++n;
+ ++Stat.tGaps;
}
++Stat.len;
}
@@ -325,18 +331,21 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
t_len += template_length;
}
query = (unsigned char*)(queryOrg + q_s);
+ Stat.pos = 0;
if(t_len == 0 || q_len == 0) {
if(t_len == q_len) {
+ Stat.score = 0;
Stat.len = 0;
Stat.match = 0;
- Stat.gaps = 0;
- Stat.score = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
aligned->s[0] = 0;
} else if(t_len == 0) {
Stat.len = q_len;
Stat.match = 0;
- Stat.gaps = q_len;
+ Stat.tGaps = q_len;
+ Stat.qGaps = 0;
Stat.score = W1 + (q_len - 1) * U;
memset(aligned->s, '_', q_len);
aligned->s[q_len] = 0;
@@ -345,7 +354,8 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
} else {
Stat.len = t_len;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = t_len;
Stat.score = W1 + (t_len - 1) * U;
memset(aligned->s, '_', t_len);
aligned->s[t_len] = 0;
@@ -393,7 +403,6 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
E = matrices->E;
thisScore = (t_len + q_len) * (MM + U + W1);
Stat.score = thisScore;
- Stat.pos = 0;
E_ptr = E + (t_len * (bq_len + 1));
c_pos = (t_len + q_len) >> 1;
@@ -474,8 +483,8 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
thisScore = Q_prev + U;
if(Q < thisScore) {
Q = thisScore;
- if(e == 2) {
- D_ptr[n] = Q;
+ if(D_ptr[n] <= thisScore) {
+ D_ptr[n] = thisScore;
e = 3;
}
} else {
@@ -484,7 +493,7 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
thisScore = P_prev[n - 1] + U;
if(P_ptr[n] < thisScore) {
P_ptr[n] = thisScore;
- if(D_ptr[n] < thisScore) {
+ if(D_ptr[n] <= thisScore) {
D_ptr[n] = thisScore;
e = 5;
}
@@ -576,7 +585,8 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
nuc_pos = m + t_s;
Stat.len = 0;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
while(E_ptr[n] != 0) {
if(nuc_pos == template_length) {
nuc_pos = 0;
@@ -598,6 +608,7 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
E_ptr += (bq_len + 1);
--n;
++Stat.len;
+ ++Stat.qGaps;
}
aligned->t[Stat.len] = getNuc(template, nuc_pos);
aligned->q[Stat.len] = 5;
@@ -605,22 +616,23 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
++nuc_pos;
E_ptr += (bq_len + 1);
--n;
+ ++Stat.qGaps;
} else {
while(!(E_ptr[n] >> 3)) {
aligned->t[Stat.len] = 5;
aligned->q[Stat.len] = query[q_pos];
aligned->s[Stat.len] = '_';
- ++Stat.gaps;
++n;
++q_pos;
++Stat.len;
+ ++Stat.tGaps;
}
aligned->t[Stat.len] = 5;
aligned->q[Stat.len] = query[q_pos];
aligned->s[Stat.len] = '_';
- ++Stat.gaps;
++n;
++q_pos;
+ ++Stat.tGaps;
}
++Stat.len;
}
@@ -649,22 +661,26 @@ AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg,
t_len += template_length;
}
query = (unsigned char*)(queryOrg + q_s);
+ Stat.pos = 0;
if(t_len == 0 || q_len == 0) {
if(t_len == q_len) {
Stat.len = 0;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
Stat.score = 0;
} else if(t_len == 0) {
Stat.len = q_len;
Stat.match = 0;
- Stat.gaps = q_len;
+ Stat.tGaps = q_len;
+ Stat.qGaps = 0;
Stat.score = W1 + (q_len - 1) * U;
} else {
Stat.len = t_len;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = t_len;
Stat.score = W1 + (t_len - 1) * U;
}
return Stat;
@@ -694,7 +710,6 @@ AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg,
E = matrices->E;
thisScore = (t_len + q_len) * (MM + U + W1);
Stat.score = thisScore;
- Stat.pos = 0;
if(0 < k) {
E_ptr = E;
for(m = 0; m < t_len; ++m) {
@@ -769,8 +784,8 @@ AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg,
thisScore = Q_prev + U;
if(Q < thisScore) {
Q = thisScore;
- if(e == 2) {
- D_ptr[n] = Q;
+ if(D_ptr[n] <= thisScore) {
+ D_ptr[n] = thisScore;
e = 3;
}
} else {
@@ -779,7 +794,7 @@ AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg,
thisScore = P_prev[n] + U;
if(P_ptr[n] < thisScore) {
P_ptr[n] = thisScore;
- if(D_ptr[n] < thisScore) {
+ if(D_ptr[n] <= thisScore) {
D_ptr[n] = thisScore;
e = 5;
}
@@ -841,7 +856,8 @@ AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg,
nuc_pos = m + t_s;
Stat.len = 0;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
while(E_ptr[n] != 0) {
if(nuc_pos == template_length) {
nuc_pos = 0;
@@ -856,16 +872,18 @@ AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg,
++nuc_pos;
E_ptr += (q_len + 1);
++Stat.len;
+ ++Stat.qGaps;
}
+ ++Stat.qGaps;
++nuc_pos;
E_ptr += (q_len + 1);
} else {
while(!(E_ptr[n] >> 3)) {
- ++Stat.gaps;
++n;
++Stat.len;
+ ++Stat.tGaps;
}
- ++Stat.gaps;
+ ++Stat.tGaps;
++n;
}
++Stat.len;
@@ -894,22 +912,26 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
t_len += template_length;
}
query = (unsigned char*)(queryOrg + q_s);
+ Stat.pos = 0;
if(t_len == 0 || q_len == 0) {
if(t_len == q_len) {
Stat.len = 0;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
Stat.score = 0;
} else if(t_len == 0) {
Stat.len = q_len;
Stat.match = 0;
- Stat.gaps = q_len;
+ Stat.tGaps = q_len;
+ Stat.qGaps = 0;
Stat.score = W1 + (q_len - 1) * U;
} else {
Stat.len = t_len;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = t_len;
Stat.score = W1 + (t_len - 1) * U;
}
return Stat;
@@ -946,7 +968,6 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
E = matrices->E;
thisScore = (t_len + q_len) * (MM + U + W1);
Stat.score = thisScore;
- Stat.pos = 0;
E_ptr = E + (t_len * (bq_len + 1));
c_pos = (t_len + q_len) >> 1;
@@ -1028,8 +1049,8 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
thisScore = Q_prev + U;
if(Q < thisScore) {
Q = thisScore;
- if(e == 2) {
- D_ptr[n] = Q;
+ if(D_ptr[n] <= thisScore) {
+ D_ptr[n] = thisScore;
e = 3;
}
} else {
@@ -1038,7 +1059,7 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
thisScore = P_prev[n - 1] + U;
if(P_ptr[n] < thisScore) {
P_ptr[n] = thisScore;
- if(D_ptr[n] < thisScore) {
+ if(D_ptr[n] <= thisScore) {
D_ptr[n] = thisScore;
e = 5;
}
@@ -1129,7 +1150,8 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
nuc_pos = m + t_s;
Stat.len = 0;
Stat.match = 0;
- Stat.gaps = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
while(E_ptr[n] != 0) {
if(nuc_pos == template_length) {
nuc_pos = 0;
@@ -1145,18 +1167,20 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
E_ptr += (bq_len + 1);
--n;
++Stat.len;
+ ++Stat.qGaps;
}
+ ++Stat.qGaps;
++nuc_pos;
E_ptr += (bq_len + 1);
--n;
} else {
while(!(E_ptr[n] >> 3)) {
- ++Stat.gaps;
++n;
++q_pos;
++Stat.len;
+ ++Stat.tGaps;
}
- ++Stat.gaps;
+ ++Stat.tGaps;
++n;
++q_pos;
}
=====================================
nw.h
=====================================
@@ -38,7 +38,8 @@ struct alnScore {
int len;
int pos;
int match;
- int gaps;
+ int tGaps;
+ int qGaps;
};
struct aln {
=====================================
runkma.c
=====================================
@@ -17,6 +17,7 @@
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
+#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
#include <stdio.h>
@@ -135,7 +136,7 @@ char * nameLoad(Qseqs *name, FILE *infile) {
return (char *) name->seq;
}
-int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
+int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
int i, j, tmp_template, tmp_tmp_template, file_len, bestTemplate, tot;
int template, bestHits, t_len, start, end, aln_len, status, rand, sparse;
@@ -149,7 +150,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
long unsigned *w_scores, *uniq_alignment_scores, *alignment_scores;
double tmp_score, bestScore, id, q_id, cover, q_cover, p_value;
long double depth, expected, q_value;
- FILE *inputfile, *frag_in_raw, *seq_in, *res_out, *name_file;
+ FILE *inputfile, *frag_in_raw, *res_out, *name_file;
FILE *alignment_out, *consensus_out, *frag_out_raw, **template_fragments;
FILE *extendedFeatures_out, *xml_out;
time_t t0, t1;
@@ -191,11 +192,14 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
/* load databases */
strcat(templatefilename, ".seq.b");
- seq_in = sfopen(templatefilename, "rb");
- fseek(seq_in, 0, SEEK_END);
- seqin_size = 4 * ftell(seq_in);
- fseek(seq_in, 0, SEEK_SET);
- seq_in_no = fileno(seq_in);
+ seq_in_no = open(templatefilename, O_RDONLY);
+ if(seq_in_no == -1) {
+ ERROR();
+ }
+ seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
+ if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
+ ERROR();
+ }
templatefilename[file_len] = 0;
templates_index = calloc(DB_size, sizeof(HashMapCCI*));
if(!templates_index) {
@@ -335,7 +339,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
alnThread->template_lengths = template_lengths;
alnThread->templates_index = templates_index;
alnThread->mq = mq;
- alnThread->sam = (sam == 1) ? 1 : 0;
+ alnThread->sam = sam;//(sam == 1) ? 1 : 0;
alnThread->scoreT = scoreT;
alnThread->next = alnThreads;
alnThreads = alnThread;
@@ -405,7 +409,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
alnThread->kmersize = kmersize;
alnThread->minlen = minlen;
alnThread->mq = mq;
- alnThread->sam = (sam == 1) ? 1 : 0;
+ alnThread->sam = sam;//(sam == 1) ? 1 : 0;
alnThread->scoreT = scoreT;
alnThread->template_lengths = template_lengths;
alnThread->templates_index = templates_index;
@@ -801,7 +805,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
t_len = template_lengths[template];
//expected = (Nhits - read_score) * (1.0 * t_len) / (template_tot_ulen - t_len + etta);
expected = t_len;
- expected /= (template_tot_ulen - t_len);
+ expected /= MAX(1,(template_tot_ulen - t_len));
expected *= (Nhits - read_score);
//q_value = pow(read_score - expected, 2) / (expected + read_score + etta);
q_value = read_score - expected;
@@ -1198,7 +1202,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
read_score = w_scores[template];
t_len = template_lengths[template];
expected = t_len;
- expected /= (template_tot_ulen - t_len);
+ expected /= MAX(1, (template_tot_ulen - t_len));
expected *= (Nhits - read_score);
if(0 < expected) {
q_value = read_score - expected;
@@ -1254,15 +1258,15 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
printExtendedFeatures(thread->template_name, aligned_assem, fragmentCounts[template], readCounts[template], extendedFeatures_out);
}
if(vcf) {
- updateVcf(thread->template_name, templates_index[template]->seq, evalue, t_len, matrix, vcf, vcf_out);
+ updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
}
}
} else {
- if(sam || ID_t == 0.0) {
+ if((sam && !(sam & 2096)) || ID_t == 0.0) {
thread->template_index = templates_index[template];
thread->template_name = nameLoad(template_name, name_file);
thread->template = template;
- assembly_KMA_Ptr(thread);
+ skip_assemble_KMA(thread);
//skip_assemble_KMA(template, sam, t_len, thread->template_name, fileCount, template_fragments, aligned_assem, qseq, header);
if(ID_t == 0.0) {
depth = aligned_assem->depth;
@@ -1326,7 +1330,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
return status;
}
-int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
+int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
/* runKMA_MEM is a memory saving version of runKMA,
at the cost it chooses best templates based on kmers
@@ -1344,7 +1348,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
long unsigned *w_scores, *uniq_alignment_scores, *alignment_scores;
double tmp_score, bestScore, id, cover, q_id, q_cover, p_value;
long double depth, q_value, expected;
- FILE *inputfile, *frag_in_raw, *seq_in, *res_out, *name_file;
+ FILE *inputfile, *frag_in_raw, *res_out, *name_file;
FILE *alignment_out, *consensus_out, *frag_out_raw, **template_fragments;
FILE *extendedFeatures_out, *xml_out;
time_t t0, t1;
@@ -1395,11 +1399,11 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
kmersize = 16;
}
strcat(templatefilename, ".seq.b");
- seq_in = sfopen(templatefilename, "rb");
- fseek(seq_in, 0, SEEK_END);
- seqin_size = 4 * ftell(seq_in);
- fseek(seq_in, 0, SEEK_SET);
- seq_in_no = fileno(seq_in);
+ seq_in_no = open(templatefilename, O_RDONLY);
+ if(seq_in_no == -1) {
+ ERROR();
+ }
+ seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
ERROR();
}
@@ -1502,6 +1506,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
while((rc_flag = get_ankers(matched_templates, qseq_comp, header, &flag, inputfile)) != 0) {
if(*matched_templates) { // SE
read_score = 0;
+ qseq_r->len = 0;
} else { // PE
read_score = get_ankers(matched_templates, qseq_r_comp, header_r, &flag_r, inputfile);
read_score = labs(read_score);
@@ -1536,7 +1541,6 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
if(rc_flag < 0 && 0 < matched_templates[*matched_templates]) {
bestHits = -bestHits;
}
-
if(read_score && kmersize <= qseq_r->len) {
unCompDNA(qseq_r_comp, qseq_r->seq);
update_Scores_pe(qseq->seq, qseq->len, qseq_r->seq, qseq_r->len, bestHits, best_read_score + read_score, best_start_pos, best_end_pos, bestTemplates, header, header_r, flag, flag_r, alignment_scores, uniq_alignment_scores, frag_out_raw);
@@ -1892,7 +1896,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
t_len = template_lengths[template];
//expected = (Nhits - read_score) * (t_len / (template_tot_ulen - t_len + etta));
expected = t_len;
- expected /= (template_tot_ulen - t_len);
+ expected /= MAX(1, (template_tot_ulen - t_len));
expected *= (Nhits - read_score);
//q_value = pow(read_score - expected, 2) / (expected + read_score + etta);
q_value = read_score - expected;
@@ -2353,7 +2357,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
read_score = w_scores[template];
t_len = template_lengths[template];
expected = t_len;
- expected /= (template_tot_ulen - t_len);
+ expected /= MAX(1, (template_tot_ulen - t_len));
expected *= (Nhits - read_score);
if(0 < expected) {
q_value = read_score - expected;
@@ -2420,11 +2424,11 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
printExtendedFeatures(thread->template_name, aligned_assem, fragmentCounts[template], readCounts[template], extendedFeatures_out);
}
if(vcf) {
- updateVcf(thread->template_name, thread->template_index->seq, evalue, t_len, matrix, vcf, vcf_out);
+ updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
}
}
} else {
- if(sam || ID_t == 0.0) {
+ if((sam && !(sam & 2096)) || ID_t == 0.0) {
/* load DB */
seq_seeker *= sizeof(long unsigned);
lseek(seq_in_no, seq_seeker, SEEK_CUR);
@@ -2475,7 +2479,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
}
/* Close files */
- fclose(seq_in);
+ close(seq_in_no);
fclose(res_out);
if(alignment_out) {
fclose(alignment_out);
=====================================
runkma.h
=====================================
@@ -26,5 +26,5 @@
unsigned char * ustrdup(unsigned char *src, size_t n);
int load_DBs_KMA(char *templatefilename, long unsigned **alignment_scores, long unsigned **uniq_alignment_scores, int **template_lengths, unsigned shm);
char * nameLoad(Qseqs *name, FILE *infile);
-int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
-int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
+int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
+int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
=====================================
sam.c
=====================================
@@ -142,7 +142,6 @@ int samwrite(const Qseqs *qseq, const Qseqs *header, const Qseqs *Qual, char *rn
2048 supplementary alignment
*/
-
qname = (char *) header->seq;
seq = qseq->seq;
if(Qual) {
@@ -150,6 +149,7 @@ int samwrite(const Qseqs *qseq, const Qseqs *header, const Qseqs *Qual, char *rn
} else {
qual = "*";
}
+
if(aligned) {
mapQ = 254 < aligned->mapQ ? 254 : aligned->mapQ;
et = *stats;
@@ -172,11 +172,11 @@ int samwrite(const Qseqs *qseq, const Qseqs *header, const Qseqs *Qual, char *rn
}
rnext = "*";
pnext = 0;
-
tab = 0;
if(qname) {
while(*qname) {
if(*qname == '\t') {
+ tab = -tab;
*qname = 0;
} else {
++qname;
@@ -192,15 +192,16 @@ int samwrite(const Qseqs *qseq, const Qseqs *header, const Qseqs *Qual, char *rn
}
if(aligned) {
cigar = makeCigar(Cigar, aligned);
- } else if(2 * sizeof(int) + 1 < header->len && header->seq[header->len - 2 * sizeof(int) - 1] == 0) {
- cigar = (char *) Cigar->seq;
- sprintf(cigar, "%dS", *((int*) (header->seq + (header->len - 2 * sizeof(int)))));
+ if(2 * sizeof(int) + 1 < header->len && header->seq[header->len - 2 * sizeof(int) - 1] == 0) {
+ cigar = (char *) Cigar->seq;
+ sprintf(cigar, "%dS", *((int*) (header->seq + (header->len - 2 * sizeof(int)))));
+ }
}
size = fprintf(stdout, "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\tET:i:%d\tAS:i:%d\n", qname, flag, rname, pos, mapQ, cigar, rnext, pnext, tlen, (char *) seq, qual, et, score);
unlock(lock);
- if(tab) {
- qname[tab] = '\t';
+ if(tab < 0) {
+ qname[-tab] = '\t';
}
return size;
=====================================
savekmers.c
=====================================
@@ -1885,7 +1885,8 @@ int save_kmers_Sparse(const HashMapKMA *templates, const Penalties *rewards, int
}
/* get best match(es) */
- if(hitCounter * kmersize > (n_kmers - hitCounter)) {
+ /*if(hitCounter * kmersize > (n_kmers - hitCounter)) {*/
+ if(hitCounter) {
bestScore = getMatchSparse(bestTemplates, Score, kmersize, n_kmers, M, MM);
/*
@@ -2007,7 +2008,8 @@ int save_kmers_Sparse(const HashMapKMA *templates, const Penalties *rewards, int
qseq->N[0]--;
/* get best match(es) */
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+ if(hitCounter) {
bestScore = getMatch(bestTemplates, Score);
} else {
*bestTemplates = clearScore(bestTemplates, Score);
@@ -2258,8 +2260,9 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
qseq->N[0]--;
/* get best match(es) */
- //clearScore(bestTemplates, extendScore);
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /*clearScore(bestTemplates, extendScore);*/
+ /*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+ if(hitCounter) {
bestScore = getMatch(bestTemplates, Score);
/*
bestHits = 0;
@@ -2549,8 +2552,9 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
}
/* get best match(es) */
- //clearScore(bestTemplates, extendScore);
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /* clearScore(bestTemplates, extendScore); */
+ /* if(hitCounter * kmersize > (end - hitCounter - kmersize)) { */
+ if(hitCounter) {
bestScore = getMatch(bestTemplates, Score);
/*
bestHits = 0;
@@ -2791,8 +2795,9 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
}
/* get best match(es) */
- //clearScore(bestTemplates_r, extendScore);
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /*clearScore(bestTemplates_r, extendScore);*/
+ /*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+ if(hitCounter) {
bestScore_r = getMatch(bestTemplates_r, Score_r);
/*
@@ -2953,7 +2958,8 @@ int save_kmers_count(const HashMapKMA *templates, const Penalties *rewards, int
reps = 0;
/* get best match(es) */
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+ if(hitCounter) {
bestScore = getMatch(bestTemplates, Score);
@@ -3064,7 +3070,8 @@ int save_kmers_count(const HashMapKMA *templates, const Penalties *rewards, int
reps = 0;
/* get best match(es) */
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+ if(hitCounter) {
bestScore_r = getMatch(bestTemplates_r, Score_r);
/*
@@ -3135,7 +3142,8 @@ int save_kmers_unionPair(const HashMapKMA *templates, const Penalties *rewards,
}
/* get forward */
- if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq, extendScore, exhaustive)) && (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {
+ /*if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq, extendScore, exhaustive)) && (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {*/
+ if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq, extendScore, exhaustive))) {
/* got hits */
bestScore = getF(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates);
@@ -3160,7 +3168,8 @@ int save_kmers_unionPair(const HashMapKMA *templates, const Penalties *rewards,
*extendScore = 1;
/* get reverse */
- if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq_r, extendScore, exhaustive)) && (qseq_r->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {
+ /*if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq_r, extendScore, exhaustive)) && (qseq_r->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {*/
+ if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq_r, extendScore, exhaustive))) {
if(bestScore) {
bestScore_r = getR(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates);
} else {
@@ -3562,8 +3571,10 @@ int save_kmers_forcePair(const HashMapKMA *templates, const Penalties *rewards,
*extendScore = 1;
/* get reverse */
- if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) &&
+ /*if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) &&
(qseq->seqlen + qseq_r->seqlen - hitCounter - hitCounter_r - (kmersize << 1)) < (hitCounter + hitCounter_r) * kmersize &&
+ (bestScore = getSecondForce(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates, regionScores))) {*/
+ if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) &&
(bestScore = getSecondForce(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates, regionScores))) {
if((qseq->seqlen + qseq_r->seqlen - bestScore) < bestScore * kmersize) {
@@ -3952,8 +3963,9 @@ int save_kmers_HMM(const HashMapKMA *templates, const Penalties *rewards, int *b
stop = j + kmersize - 1;
/* evaluate hit */
- if(hitCounter > 0 && (hitCounter * kmersize > (stop - start - hitCounter + kmersize))
- && ((stop - start) > minLen || start == 0 || stop == seqlen)) {
+ /*if(hitCounter > 0 && (hitCounter * kmersize > (stop - start - hitCounter + kmersize))
+ && ((stop - start) > minLen || start == 0 || stop == seqlen)) {*/
+ if(hitCounter > 0 && ((stop - start) > minLen || start == 0 || stop == seqlen)) {
if(deCon) {
if(SU) {
for(k = start; k < j; ++k) {
=====================================
sparse.c
=====================================
@@ -153,8 +153,12 @@ char ** load_DBs_Sparse(char *templatefilename, unsigned **template_lengths, uns
/* load lengths */
fseek(DB_file, DB_size * sizeof(int), SEEK_CUR);
- fread(*template_lengths, sizeof(int), DB_size, DB_file);
- fread(*template_ulengths, sizeof(int), DB_size, DB_file);
+ i = fread(*template_lengths, sizeof(int), DB_size, DB_file);
+ i += fread(*template_ulengths, sizeof(int), DB_size, DB_file);
+ if(!i) {
+ fprintf(stderr, "DB needs to sparse indexed, to run a sparse mapping.\n");
+ exit(1);
+ }
}
templatefilename[file_len] = 0;
fclose(DB_file);
=====================================
spltdb.c
=====================================
@@ -17,6 +17,7 @@
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
+#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
#include <stdio.h>
@@ -395,7 +396,7 @@ unsigned get_ankers_spltDB(int *infoSize, int *out_Tem, CompDNA *qseq, Qseqs *he
return num;
}
-int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
+int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
/* https://www.youtube.com/watch?v=LtXEMwSG5-8 */
@@ -414,7 +415,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
double tmp_score, bestScore, id, cover, q_id, q_cover, p_value;
long double depth, q_value, expected;
char *templatefilename, Date[11];
- FILE **inputfiles, *inputfile, *frag_in_raw, *seq_in;
+ FILE **inputfiles, *inputfile, *frag_in_raw;
FILE *res_out, *alignment_out, *consensus_out, *frag_out_raw, *xml_out;
FILE *extendedFeatures_out, *name_file, **template_fragments;
time_t t0, t1;
@@ -1376,7 +1377,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
templatefilename = 0;
initialiseVcf(vcf_out, templatefilename);
}
- seq_in = 0;
+ seq_in_no = 0;
/* preallocate assembly matrices */
matrix = smalloc(sizeof(AssemInfo));
@@ -1434,7 +1435,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
thread->bcd = bcd;
thread->sam = sam;
thread->ef = extendedFeatures;
- thread->seq_in = fileno(seq_in);
+ thread->seq_in = seq_in_no;
thread->kmersize = kmersize;
thread->template = -2;
thread->file_count = fileCount;
@@ -1499,7 +1500,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
thread->bcd = bcd;
thread->sam = sam;
thread->ef = extendedFeatures;
- thread->seq_in = fileno(seq_in);
+ thread->seq_in = seq_in_no;
thread->kmersize = kmersize;
thread->template = 0;
thread->file_count = fileCount;
@@ -1538,11 +1539,11 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
templatefilename = *templatefilenames++;
file_len = strlen(templatefilename);
strcat(templatefilename, ".seq.b");
- seq_in = sfopen(templatefilename, "rb");
- fseek(seq_in, 0, SEEK_END);
- seqin_size = 4 * ftell(seq_in);
- fseek(seq_in, 0, SEEK_SET);
- seq_in_no = fileno(seq_in);
+ seq_in_no = open(templatefilename, O_RDONLY);
+ if(seq_in_no == -1) {
+ ERROR();
+ }
+ seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
ERROR();
}
@@ -1573,12 +1574,16 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
name_file = sfopen(templatefilename, "rb");
templatefilename[file_len] = 0;
strcat(templatefilename, ".seq.b");
- fclose(seq_in);
- seq_in = sfopen(templatefilename, "rb");
- fseek(seq_in, 0, SEEK_END);
- seqin_size = 4 * ftell(seq_in);
- fseek(seq_in, 0, SEEK_SET);
- seq_in_no = fileno(seq_in);
+ close(seq_in_no);
+ seq_in_no = open(templatefilename, O_RDONLY);
+ if(seq_in_no == -1) {
+ ERROR();
+ }
+ seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
+ if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
+ ERROR();
+ }
+ thread->seq_in = seq_in_no;
templatefilename[file_len] = 0;
seq_seeker = 0;
if(extendedFeatures == 2) {
@@ -1665,7 +1670,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
printExtendedFeatures(thread->template_name, aligned_assem, fragmentCounts[template], readCounts[template], extendedFeatures_out);
}
if(vcf) {
- updateVcf(thread->template_name, thread->template_index->seq, evalue, t_len, matrix, vcf, vcf_out);
+ updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
}
}
} else {
@@ -1714,7 +1719,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
hashMapCCI_destroy(thread->template_index);
/* Close files */
- fclose(seq_in);
+ close(seq_in_no);
fclose(res_out);
if(alignment_out) {
fclose(alignment_out);
=====================================
spltdb.h
=====================================
@@ -35,4 +35,4 @@ struct spltDBbuff {
int print_ankers_spltDB(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out);
int print_ankers_Sparse_spltDB(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out);
unsigned get_ankers_spltDB(int *infoSize, int *out_Tem, CompDNA *qseq, Qseqs *header, int *flag, FILE *inputfile);
-int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
+int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
=====================================
stdstat.c
=====================================
@@ -30,6 +30,10 @@ int cmp_and(int t, int q) {
return (t && q);
}
+int cmp_true(int t, int q) {
+ return 1;
+}
+
double fastp(long double q) {
/* P-value from quantile in a chi-square distribution */
double p = 1.0;
=====================================
stdstat.h
=====================================
@@ -23,6 +23,7 @@
extern int (*cmp)(int, int);
int cmp_or(int t, int q);
int cmp_and(int t, int q);
+int cmp_true(int t, int q);
double fastp(long double q);
double p_chisqr(long double q);
double power(double x, unsigned n);
=====================================
vcf.c
=====================================
@@ -94,10 +94,19 @@ void initialiseVcf(FileBuff *fileP, char *templateFilename) {
}
-void updateVcf(char *template_name, long unsigned *template_seq, double evalue, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP) {
+void updateVcf(char *template_name, unsigned char *template_seq, double evalue, double support, int bcd, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP) {
- static const char *PASS = "PASS", *FAIL = "FAIL", *LowQual = "LowQual", *UNKNOWN = ".";
- int i, j, pos, bestScore, depthUpdate, bestBaseScore, nucNum;
+ static const char nuc2num[256] = {
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 0, 8, 1, 8, 8, 8, 2, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 0, 8, 1, 8, 8, 8, 2, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
+ const char *PASS = "PASS", *FAIL = "FAIL", *LowQual = "LowQual", *UNKNOWN = ".";
+ int i, pos, nextPos, bestScore, depthUpdate, bestBaseScore, nucNum;
int template_name_length, check, avail, DP, AD, DEL, QUAL;
double AF, RAF, Q, P;
const double lnConst = -10 / log(10);
@@ -115,79 +124,84 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
update = (char *) fileP->next;
avail = fileP->bytes;
assembly = matrix->assmb;
- pos = 0;
- i = 0;
+ nextPos = 0;
do {
- /* does not handle insertions yet */
+ pos = nextPos;
+ nextPos = assembly[pos].next;
+
+ /* handle insertions now */
+ nuc = *template_seq++;
if(pos < t_len) {
- nuc = bases[getNuc(template_seq, pos)];
++i;
- } else {
+ } else if(nuc != '-') {
+ --template_seq;
nuc = '-';
}
+
/* call base */
- nucNum = 0;
- bestNuc = 5;
- bestScore = 0;
+ bestNuc = nuc2num[nuc];
+ bestScore = assembly[pos].counts[bestNuc];
depthUpdate = 0;
- for(j = 0; j < 5; ++j) {
- if(bestScore < assembly[pos].counts[j]) {
- bestScore = assembly[pos].counts[j];
- bestNuc = j;
+ for(i = 0; i < 6; ++i) {
+ if(bestScore < assembly[pos].counts[i]) {
+ bestScore = assembly[pos].counts[i];
+ bestNuc = i;
}
- depthUpdate += assembly[pos].counts[j];
- }
- if(bestScore < assembly[pos].counts[j]) {
- bestScore = assembly[pos].counts[j];
- bestNuc = j;
+ depthUpdate += assembly[pos].counts[i];
}
- depthUpdate += assembly[pos].counts[j];
nucNum = bestNuc;
bestNuc = bases[bestNuc];
/* Check for minor base call */
- if((bestScore << 1) < depthUpdate) {
+ if(!depthUpdate) {
+ nucNum = 5;
+ bestNuc = '-';
+ } else if((bestScore << 1) < depthUpdate) {
if(bestNuc == '-') {
bestBaseScore = 0;
bestNuc = 4;
- for(j = 0; j < 5; ++j) {
- if(bestBaseScore < assembly[pos].counts[j]) {
- bestBaseScore = assembly[pos].counts[j];
- bestNuc = j;
+ for(i = 0; i < 5; ++i) {
+ if(bestBaseScore < assembly[pos].counts[i]) {
+ bestBaseScore = assembly[pos].counts[i];
+ bestNuc = i;
}
}
+ nucNum = bestNuc;
bestNuc = tolower(bases[bestNuc]);
} else {
bestNuc = tolower(bestNuc);
}
bestScore = depthUpdate - assembly[pos].counts[5];
+ } else if(depthUpdate < bcd) {
+ /* too low depth */
+ bestNuc = tolower(bestNuc);
}
if(bestScore) {
/* determine base at current position */
bestNuc = baseCall(bestNuc, nuc, bestScore, depthUpdate, evalue, &assembly[pos]);
+ nucNum = nuc2num[bestNuc];
+
+ /* INFO */
+ DP = depthUpdate;
+ AD = assembly[pos].counts[nucNum];
+ AF = (double) AD / DP;
+ RAF = (double) bestScore / DP;
+ DEL = assembly[pos].counts[5];
+ Q = pow(depthUpdate - (bestScore << 1), 2) / depthUpdate;
+ P = p_chisqr(Q);
/* discard unimportant changes */
- if(nuc != toupper(bestNuc)) {
- /* INFO */
- DP = depthUpdate;
- AD = assembly[pos].counts[nucNum];
- AF = (double) AD / DP;
- RAF = (double) bestScore / DP;
- DEL = assembly[pos].counts[5];
-
- /* FORMAT */
- Q = pow(depthUpdate - (bestScore << 1), 2) / depthUpdate;
- P = p_chisqr(Q);
+ if(nuc != bestNuc || (t_len <= nextPos && *template_seq == '-') || DP < bcd || evalue < P || AD < support * DP) {
/* QUAL */
//QUAL = lnConst * log(P);
QUAL = lnConst * log(binP(DP, AD, 0.25));
QUAL = (QUAL < 0 || 3079 < QUAL) ? 3079 : QUAL;
/* FILTER */
- if(isupper(bestNuc)) {
+ if(bcd <= DP && P <= evalue && support * DP <= AD) {
FILTER = (char *) PASS;
- } else if(P <= evalue || (9 * depthUpdate) <= (10 * bestScore)) {
+ } else if(bcd <= DP || P <= evalue || support * DP <= AD) {
FILTER = (char *) LowQual;
} else {
FILTER = (char *) FAIL;
@@ -204,8 +218,8 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
update += template_name_length; avail -= template_name_length;
*update++ = '\t'; --avail;
- if(nuc != '-') {
- check = sprintf(update, "%d", i);
+ if(pos < t_len) {
+ check = sprintf(update, "%d", pos + 1);
update += check; avail -= check;
} else {
*update++ = '0';
@@ -227,7 +241,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
}
*update++ = '\t';
--avail;
- if(nuc != '-' && bestNuc == '-') {
+ if(bestNuc == '-') {
*update++ = '<';
*update++ = bestNuc;
*update++ = '>';
@@ -244,7 +258,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
check = sprintf(update, "Q:P:FT\t%.2f:%4.1e:%s\n", Q, P, FILTER);
update += check; avail -= check;
}
- } else if(nuc != '-') {
+ } else if(pos < t_len) {
FILTER = (char *) FAIL;
if(avail < template_name_length + 105) {
fileP->bytes = avail;
@@ -252,7 +266,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
avail = fileP->bytes;
update = (char *) fileP->next;
}
- check = sprintf(update, "%s\t%d\t.\t%c\t%c\t%d\t%s\t", template_name, i, nuc, '.', 0, *FILTER_ptr);
+ check = sprintf(update, "%s\t%d\t.\t%c\t%c\t%d\t%s\t", template_name, pos + 1, nuc, '.', 0, *FILTER_ptr);
update += check; avail -= check;
check = sprintf(update, "DP=%d;AD=%d;AF=%.2f;RAF=%.2f;DEL=%d;AD6=%d,%d,%d,%d,%d,%d\t", 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0, 0);
update += check; avail -= check;
@@ -260,7 +274,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
update += check; avail -= check;
}
- } while((pos = assembly[pos].next) != 0);
+ } while(nextPos != 0);
fileP->next = (unsigned char *) update;
fileP->bytes = avail;
=====================================
vcf.h
=====================================
@@ -22,4 +22,4 @@
char * noFolder(const char *src);
void initialiseVcf(FileBuff *fileP, char *templateFilename);
-void updateVcf(char *template_name, long unsigned *template_seq, double evalue, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP);
+void updateVcf(char *template_name, unsigned char *template_seq, double evalue, double support, int bcd, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP);
=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
* limitations under the License.
*/
-#define KMA_VERSION "1.3.10"
+#define KMA_VERSION "1.3.15"
=====================================
xml.c
=====================================
@@ -212,7 +212,7 @@ void hitXML(FILE *out, const int template, const char *template_name, const Aln
fprintf(out, "\t\t\t<Hsp_query-from>%d</Hsp_query-from>\n", ((flag & 16) ? (aligned->end) : (aligned->start)) + 1);
fprintf(out, "\t\t\t<Hsp_query-to>%d</Hsp_query-to>\n", ((flag & 16) ? (aligned->start) : (aligned->end)) + 1);
fprintf(out, "\t\t\t<Hsp_hit-from>%d</Hsp_hit-from>\n", alnStat->pos + 1);
- fprintf(out, "\t\t\t<Hsp_hit-to>%d</Hsp_hit-to>\n", alnStat->pos + alnStat->len - alnStat->gaps + 1);
+ fprintf(out, "\t\t\t<Hsp_hit-to>%d</Hsp_hit-to>\n", alnStat->pos + alnStat->len - alnStat->tGaps + 1);
fprintf(out, "\t\t\t<Hsp_query-frame>%d</Hsp_query-frame>\n", aligned->start % 3);
fprintf(out, "\t\t\t<Hsp_hit-frame>%d</Hsp_hit-frame>\n", alnStat->pos % 3);
fprintf(out, "\t\t\t<Hsp_identity>%d</Hsp_identity>\n", Ms);
View it on GitLab: https://salsa.debian.org/med-team/kma/-/compare/0bfe9dc8bcea018ed75423430e51dc83a8626983...3a0c2e0868ae672f84c60fafa3bcea1aa87ca059
--
View it on GitLab: https://salsa.debian.org/med-team/kma/-/compare/0bfe9dc8bcea018ed75423430e51dc83a8626983...3a0c2e0868ae672f84c60fafa3bcea1aa87ca059
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210501/584d13b3/attachment-0001.htm>
More information about the debian-med-commit
mailing list