[med-svn] [Git][med-team/kma][master] 3 commits: New upstream version 1.3.17
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Sun May 23 17:15:48 BST 2021
Nilesh Patra pushed to branch master at Debian Med / kma
Commits:
d0cca96d by Nilesh Patra at 2021-05-23T21:39:01+05:30
New upstream version 1.3.17
- - - - -
0abd8ffd by Nilesh Patra at 2021-05-23T21:39:01+05:30
Update upstream source from tag 'upstream/1.3.17'
Update to upstream version '1.3.17'
with Debian dir 373d1b637b885f9952fd82cad3569a63d0e25b4b
- - - - -
97b5faf0 by Nilesh Patra at 2021-05-23T21:40:05+05:30
Interim changelog entry
- - - - -
12 changed files:
- align.c
- align.h
- chain.c
- chain.h
- debian/changelog
- kma.c
- nw.c
- nw.h
- runinput.c
- runinput.h
- stdnuc.h
- version.h
Changes:
=====================================
align.c
=====================================
@@ -28,6 +28,180 @@
#include "stdnuc.h"
#include "stdstat.h"
+AlnScore (*leadTailAlnPtr)(Aln *, Aln *, const long unsigned*, const unsigned char*, int, int, int, const int, NWmat *) = &leadTailAln;
+void (*trailTailAlnPtr)(Aln *, Aln *, AlnScore *, const long unsigned *, const unsigned char *, int, int, int, int, const int, NWmat *) = &trailTailAln;
+
+AlnScore skipLeadAln(Aln *aligned, Aln *Frag_align, const long unsigned *tseq, const unsigned char *qseq, int t_e, int t_len, int q_e, const int bandwidth, NWmat *matrices) {
+
+ AlnScore Stat;
+
+ /* initialize */
+ Stat.len = 0;
+ Stat.score = 0;
+ Stat.match = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
+ Stat.pos = t_e;
+
+ return Stat;
+}
+
+AlnScore leadTailAln(Aln *aligned, Aln *Frag_align, const long unsigned *tseq, const unsigned char *qseq, int t_e, int t_len, int q_e, const int bandwidth, NWmat *matrices) {
+
+ int bias, band, t_s, q_s;
+ AlnScore Stat, NWstat;
+
+ /* initialize */
+ Stat.len = 0;
+ Stat.score = 0;
+ Stat.match = 0;
+ Stat.tGaps = 0;
+ Stat.qGaps = 0;
+ Stat.pos = t_e;
+
+ if(q_e) {
+ /* get boundaries */
+ t_s = 0;
+ q_s = 0;
+ if((q_e << 1) < t_e || (q_e + bandwidth) < t_e) { // big leading template gap, cut down
+ //t_s = t_e - MIN(bandwidth, (q_e << 1));
+ t_s = t_e - (q_e + (q_e < bandwidth ? q_e : bandwidth));
+ } else if((t_e << 1) < q_e || (t_e + bandwidth) < q_e) { // big leading query gap, cut down
+ //q_s = q_e - MIN(bandwidth, (t_e << 1));
+ q_s = q_e - (t_e + (t_e < bandwidth ? t_e : bandwidth));
+ }
+
+ /* align */
+ if(t_e - t_s > 0 && q_e - q_s > 0) {
+ band = abs(t_e - t_s - q_e + q_s) + bandwidth;
+ if(q_e - q_s <= band || t_e - t_s <= band) {// || abs(t_e - t_s - q_e - q_s) >= 32) {
+ if(Frag_align) {
+ NWstat = NW(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, matrices, t_len);
+ } else {
+ NWstat = NW_score(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, matrices, t_len);
+ }
+ } else if(Frag_align) {
+ NWstat = NW_band(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, band, matrices, t_len);
+ //NWstat = NW(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, matrices, t_len);
+ } else {
+ NWstat = NW_band_score(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, band, matrices, t_len);
+ //NWstat = NW_score(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, matrices, t_len);
+ }
+
+ if(Frag_align) {
+ /* trim leading gaps */
+ bias = 0;
+ if(t_s == 0) {
+ while(bias < NWstat.len && (Frag_align->t[bias] == 5 || Frag_align->q[bias] == 5)) {
+ if(Frag_align->t[bias] == 5) {
+ --NWstat.tGaps;
+ ++(Frag_align->start);
+ } else {
+ --NWstat.qGaps;
+ }
+ ++bias;
+ }
+ NWstat.len -= bias;
+ /*if(bias) {
+ NWstat.score -= (W1 + (bias - 1) * U);
+ }*/
+ }
+
+ memcpy(aligned->t, Frag_align->t + bias, NWstat.len);
+ memcpy(aligned->s, Frag_align->s + bias, NWstat.len);
+ memcpy(aligned->q, Frag_align->q + bias, NWstat.len);
+ aligned->start = q_s + Frag_align->start;
+ }
+ Stat.pos -= (NWstat.len - NWstat.tGaps);
+ Stat.score = NWstat.score;
+ Stat.len = NWstat.len;
+ Stat.match = NWstat.match;
+ Stat.tGaps = NWstat.tGaps;
+ Stat.qGaps = NWstat.qGaps;
+ } else if(aligned) {
+ aligned->start = q_s;
+ }
+ }
+
+ return Stat;
+}
+
+void skipTrailAln(Aln *aligned, Aln *Frag_align, AlnScore *Stat, const long unsigned *tseq, const unsigned char *qseq, int t_s, int t_len, int q_s, int q_len, const int bandwidth, NWmat *matrices) {
+ if(Frag_align) {
+ Frag_align->end = 0;
+ }
+}
+
+void trailTailAln(Aln *aligned, Aln *Frag_align, AlnScore *Stat, const long unsigned *tseq, const unsigned char *qseq, int t_s, int t_len, int q_s, int q_len, const int bandwidth, NWmat *matrices) {
+
+ int band, bias, q_e, t_e;
+ AlnScore NWstat;
+
+ /* Get intervals in query and template to align */
+ q_e = q_len;
+ t_e = t_len;
+ if(((q_len - q_s) << 1) < (t_len - t_s) || (q_len - q_s + bandwidth) < (t_len - t_s)) { // big trailing template gap, cut down
+ //t_e = t_s + MIN(bandwidth, ((q_len - q_s) << 1));
+ t_e = q_len - q_s;
+ t_e = t_s + (t_e + (t_e < bandwidth ? t_e : bandwidth));
+ } else if(((t_len - t_s) << 1) < (q_len - q_s) || (t_len - t_s + bandwidth) < (q_len - q_s)) { // big leading query gap, cut down
+ //q_e = q_s + MIN(bandwidth, ((t_len - t_s) << 1));
+ q_e = t_len - t_s;
+ q_e = q_s + (q_e + (q_e < bandwidth ? q_e : bandwidth));
+ }
+
+ /* align trailing gap */
+ if(t_e - t_s > 0 && q_e - q_s > 0) {
+ band = abs(t_e - t_s - q_e + q_s) + bandwidth;
+ if(q_e - q_s <= band || t_e - t_s <= band) {//|| abs(t_e - t_s - q_e - q_s) >= 32) {
+ if(Frag_align) {
+ NWstat = NW(tseq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, matrices, t_len);
+ } else {
+ NWstat = NW_score(tseq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, matrices, t_len);
+ }
+ } else if(Frag_align) {
+ NWstat = NW_band(tseq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, band, matrices, t_len);
+ //NWstat = NW(tseq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, matrices, t_len);
+ } else {
+ NWstat = NW_band_score(tseq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, band, matrices, t_len);
+ }
+
+ if(Frag_align) {
+ /* trim trailing gaps */
+ if(t_e == t_len) {
+ bias = NWstat.len - 1;
+ while(bias && (Frag_align->t[bias] == 5 || Frag_align->q[bias] == 5)) {
+ if(Frag_align->t[bias] == 5) {
+ --NWstat.tGaps;
+ ++(Frag_align->end);
+ } else {
+ --NWstat.qGaps;
+ }
+ --bias;
+ }
+ ++bias;
+
+ if(bias != NWstat.len) {
+ //NWstat.score -= (W1 + (NWstat.len - bias) * U);
+ NWstat.len = bias;
+ }
+ }
+
+ memcpy(aligned->t + Stat->len, Frag_align->t, NWstat.len);
+ memcpy(aligned->s + Stat->len, Frag_align->s, NWstat.len);
+ memcpy(aligned->q + Stat->len, Frag_align->q, NWstat.len);
+ }
+
+ Stat->score += NWstat.score;
+ Stat->len += NWstat.len;
+ Stat->match += NWstat.match;
+ Stat->tGaps += NWstat.tGaps;
+ Stat->qGaps += NWstat.qGaps;
+ } else if(Frag_align) {
+ Frag_align->end = 0;
+ }
+}
+
AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_len, int q_start, int q_end, Aln *aligned, Aln *Frag_align, int min, int max, int mq, double scoreT, AlnPoints *points, NWmat *matrices) {
const int bandwidth = 64;
@@ -227,75 +401,10 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
}
/* trim seeds */
- trimSeeds(points, start);
-
- /* initialize */
- Stat.len = 0;
- Stat.score = 0;
- Stat.match = 0;
- Stat.tGaps = 0;
- Stat.qGaps = 0;
- value = points->tStart[start] - 1;
- Stat.pos = value;
- i = points->qStart[start];
+ trimSeedsPtr(points, start);
/* align leading tail */
- if(i != 0) {
- /* get boundaries */
- t_s = 0;
- t_e = value;
- q_s = 0;
- q_e = i;
- if((q_e << 1) < t_e || (q_e + bandwidth) < t_e) { // big leading template gap, cut down
- //t_s = t_e - MIN(bandwidth, (q_e << 1));
- t_s = t_e - (q_e + (q_e < bandwidth ? q_e : bandwidth));
- } else if((t_e << 1) < q_e || (t_e + bandwidth) < q_e) { // big leading query gap, cut down
- //q_s = q_e - MIN(bandwidth, (t_e << 1));
- q_s = q_e - (t_e + (t_e < bandwidth ? t_e : bandwidth));
- }
-
- /* align */
- if(t_e - t_s > 0 && q_e - q_s > 0) {
- band = 4 * abs(t_e - t_s - q_e + q_s) + bandwidth;
- if(q_e - q_s <= band || t_e - t_s <= band) {// || abs(t_e - t_s - q_e - q_s) >= 32) {
- NWstat = NW(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, matrices);
- } else {
- NWstat = NW_band(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, band, matrices);
- //NWstat = NW(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, matrices);
- }
-
- /* trim leading gaps */
- bias = 0;
- if(t_s == 0) {
- while(bias < NWstat.len && (Frag_align->t[bias] == 5 || Frag_align->q[bias] == 5)) {
- if(Frag_align->t[bias] == 5) {
- --NWstat.tGaps;
- ++(Frag_align->start);
- } else {
- --NWstat.qGaps;
- }
- ++bias;
- }
- NWstat.len -= bias;
- /*if(bias) {
- NWstat.score -= (W1 + (bias - 1) * U);
- }*/
- }
-
- memcpy(aligned->t, Frag_align->t + bias, NWstat.len);
- memcpy(aligned->s, Frag_align->s + bias, NWstat.len);
- memcpy(aligned->q, Frag_align->q + bias, NWstat.len);
- aligned->start = q_s + Frag_align->start;
- Stat.pos -= (NWstat.len - NWstat.tGaps);
- Stat.score = NWstat.score;
- Stat.len = NWstat.len;
- Stat.match = NWstat.match;
- Stat.tGaps = NWstat.tGaps;
- Stat.qGaps = NWstat.qGaps;
- } else {
- aligned->start = q_s;
- }
- }
+ Stat = leadTailAlnPtr(aligned, Frag_align, template_index->seq, qseq, points->tStart[start] - 1, t_len, points->qStart[start], bandwidth, matrices);
/* piece seeds together */
stop = 1;
@@ -357,11 +466,11 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
return Stat;
}
if((t_l > 0 || q_e - q_s > 0)) {
- band = 4 * abs(t_l - q_e + q_s) + bandwidth;
+ band = abs(t_l - q_e + q_s) + bandwidth;
if(q_e - q_s <= band || t_l <= band) {// || abs(t_e - t_s - q_e - q_s) >= 32) {
- NWstat = NW(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, matrices);
+ NWstat = NW(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, matrices, t_len);
} else {
- NWstat = NW_band(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, band, matrices);
+ NWstat = NW_band(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, band, matrices, t_len);
//NWstat = NW(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, matrices);
}
@@ -380,63 +489,7 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
}
/* align trailing tail */
- /* Get intervals in query and template to align */
- q_s = points->qEnd[start];
- t_s = points->tEnd[start] - 1;
- q_e = q_len;
- t_e = t_len;
- if(((q_len - q_s) << 1) < (t_len - t_s) || (q_len - q_s + bandwidth) < (t_len - t_s)) { // big trailing template gap, cut down
- //t_e = t_s + MIN(bandwidth, ((q_len - q_s) << 1));
- t_e = q_len - q_s;
- t_e = t_s + (t_e + (t_e < bandwidth ? t_e : bandwidth));
- } else if(((t_len - t_s) << 1) < (q_len - q_s) || (t_len - t_s + bandwidth) < (q_len - q_s)) { // big leading query gap, cut down
- //q_e = q_s + MIN(bandwidth, ((t_len - t_s) << 1));
- q_e = t_len - t_s;
- q_e = q_s + (q_e + (q_e < bandwidth ? q_e : bandwidth));
- }
-
- /* align trailing gap */
- if(t_e - t_s > 0 && q_e - q_s > 0) {
- band = 4 * abs(t_e - t_s - q_e + q_s) + bandwidth;
- if(q_e - q_s <= band || t_e - t_s <= band) {//|| abs(t_e - t_s - q_e - q_s) >= 32) {
- NWstat = NW(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, matrices);
- } else {
- NWstat = NW_band(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, band, matrices);
- //NWstat = NW(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, matrices);
- }
- /* trim trailing gaps */
- if(t_e == t_len) {
- bias = NWstat.len - 1;
- while(bias && (Frag_align->t[bias] == 5 || Frag_align->q[bias] == 5)) {
- if(Frag_align->t[bias] == 5) {
- --NWstat.tGaps;
- ++(Frag_align->end);
- } else {
- --NWstat.qGaps;
- }
- --bias;
- }
- ++bias;
-
-
- if(bias != NWstat.len) {
- //NWstat.score -= (W1 + (NWstat.len - bias) * U);
- NWstat.len = bias;
- }
- }
-
- memcpy(aligned->t + Stat.len, Frag_align->t, NWstat.len);
- memcpy(aligned->s + Stat.len, Frag_align->s, NWstat.len);
- memcpy(aligned->q + Stat.len, Frag_align->q, NWstat.len);
- Stat.score += NWstat.score;
- Stat.len += NWstat.len;
- Stat.match += NWstat.match;
- Stat.tGaps += NWstat.tGaps;
- Stat.qGaps += NWstat.qGaps;
- } else {
- Frag_align->end = 0;
- }
-
+ trailTailAlnPtr(aligned, Frag_align, &Stat, template_index->seq, qseq, points->tEnd[start] - 1, t_len, points->qEnd[start], q_len, bandwidth, matrices);
aligned->s[Stat.len] = 0;
aligned->len = Stat.len;
aligned->end = q_len - q_e + Frag_align->end;
@@ -450,7 +503,7 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
const int bandwidth = 64;
int i, j, k, l, bias, prev, start, stop, t_len, value, end, band, U, M;
int t_l, t_s, t_e, q_s, q_e, mem_count, score, kmersize, *seeds, **d;
- unsigned mapQ, shifter;
+ unsigned mapQ, shifter, cPos, iPos;
long unsigned key;
unsigned char nuc;
AlnScore Stat, NWstat;
@@ -478,7 +531,7 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
end = q_end - kmersize + 1;
}
while(j < end) {
- key = getKmer(qseq_comp->seq, j, shifter);
+ getKmer_macro(key, qseq_comp->seq, j, cPos, iPos, shifter);
value = hashMapCCI_get(template_index, key, shifter);
if(value == 0) {
@@ -576,6 +629,7 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
j = qseq_comp->N[i] + 1;
}
}
+ mapQ = 0;
if(mem_count) {
points->len = mem_count;
@@ -593,8 +647,6 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
/* get best seed chain, returns best starting point */
start = chainSeedsPtr(points, q_len, t_len, kmersize, &mapQ);
score = points->score[start];
-
- //if(score < (q_len >> 5)) {
if(mapQ < mq || score < kmersize) {
Stat.score = 0;
Stat.len = 1;
@@ -606,48 +658,8 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
return Stat;
}
- /* initialize */
- Stat.len = 0;
- Stat.score = 0;
- Stat.match = 0;
- Stat.tGaps = 0;
- Stat.qGaps = 0;
- value = points->tStart[start] - 1;
- Stat.pos = value;
- i = points->qStart[start];
-
/* align leading tail */
- if(i != 0) {
- /* get boundaries */
- t_s = 0;
- t_e = value;
- q_s = 0;
- q_e = i;
- if((q_e << 1) < t_e || (q_e + bandwidth) < t_e) { // big leading template gap, cut down
- //t_s = t_e - MIN(bandwidth, (q_e << 1));
- t_s = t_e - (q_e + (q_e < bandwidth ? q_e : bandwidth));
- } else if((t_e << 1) < q_e || (t_e + bandwidth) < q_e) { // big leading query gap, cut down
- //q_s = q_e - MIN(bandwidth, (t_e << 1));
- q_s = q_e - (t_e + (t_e < bandwidth ? t_e : bandwidth));
- }
-
- /* align */
- if(t_e - t_s > 0 && q_e - q_s > 0) {
- band = 4 * abs(t_e - t_s - q_e + q_s) + bandwidth;
- if(q_e - q_s <= band || t_e - t_s <= band) {// || abs(t_e - t_s - q_e - q_s) >= 32) {
- NWstat = NW_score(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, matrices, t_len);
- } else {
- NWstat = NW_band_score(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, band, matrices, t_len);
- //NWstat = NW_score(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, matrices, t_len);
- }
- Stat.pos -= (NWstat.len - NWstat.tGaps);
- Stat.score = NWstat.score;
- Stat.len = NWstat.len;
- Stat.match = NWstat.match;
- Stat.tGaps = NWstat.tGaps;
- Stat.qGaps = NWstat.qGaps;
- }
- }
+ Stat = leadTailAlnPtr(0, 0, template_index->seq, qseq, points->tStart[start] - 1, t_len, points->qStart[start], bandwidth, matrices);
/* piece seeds together */
stop = 1;
@@ -703,7 +715,7 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
return Stat;
}
if((t_l > 0 || q_e - q_s > 0)) {
- band = 4 * abs(t_l - q_e + q_s) + bandwidth;
+ band = abs(t_l - q_e + q_s) + bandwidth;
if(q_e - q_s <= band || t_l <= band) {
NWstat = NW_score(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, matrices, t_len);
} else {
@@ -721,35 +733,7 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
}
/* align trailing tail */
- /* Get intervals in query and template to align */
- q_s = points->qEnd[start];
- t_s = points->tEnd[start] - 1;
- q_e = q_len;
- t_e = t_len;
- if((t_len - t_s) > (q_len - q_s + bandwidth) || (t_len - t_s) > ((q_len - q_s) << 1)) { // big trailing template gap, cut down
- //t_e = t_s + MIN(bandwidth, ((q_len - q_s) << 1));
- t_e = q_len - q_s;
- t_e = t_s + (t_e + (t_e < bandwidth ? t_e : bandwidth));
- } else if ((q_len - q_s) > (t_len - t_s + bandwidth) || (q_len - q_s) > ((t_len - t_s) << 1)) { // big leading query gap, cut down
- //q_e = q_s + MIN(bandwidth, ((t_len - t_s) << 1));
- q_e = t_len - t_s;
- q_e = q_s + (q_e + (q_e < bandwidth ? q_e : bandwidth));
- }
-
- /* align trailing gap */
- if(t_e - t_s > 0 && q_e - q_s > 0) {
- band = 4 * abs(t_e - t_s - q_e + q_s) + bandwidth;
- if(q_e - q_s <= band || t_e - t_s <= band) {//|| abs(t_e - t_s - q_e - q_s) >= 32) {
- NWstat = NW_score(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, matrices, t_len);
- } else {
- NWstat = NW_band_score(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, band, matrices, t_len);
- }
- Stat.score += NWstat.score;
- Stat.len += NWstat.len;
- Stat.match += NWstat.match;
- Stat.tGaps += NWstat.tGaps;
- Stat.qGaps += NWstat.qGaps;
- }
+ trailTailAlnPtr(0, 0, &Stat, template_index->seq, qseq, points->tEnd[start] - 1, t_len, points->qEnd[start], q_len, bandwidth, matrices);
points->len = 0;
return Stat;
@@ -819,14 +803,15 @@ int anker_rc(const HashMapCCI *template_index, unsigned char *qseq, int q_len, i
i = q_len - q_start;
q_start = q_len - q_end;
q_end = i;
- }
- score_r = 0;
- mem_count = 0;
- if(q_start) {
+ i = q_start;
+ } else if(q_start) {
i = q_start;
} else {
i = preseed(template_index, qseq, q_end - q_start);
}
+ score_r = 0;
+ mem_count = 0;
+
while(i < q_end) {
end = charpos(qseq, 4, i, q_len);
if(end == -1) {
@@ -993,6 +978,7 @@ int anker_rc_comp(const HashMapCCI *template_index, unsigned char *qseq, unsigne
static int one2one = 0;
int i, j, k, rc, end, score, score_r, value, t_len, q_len, prev;
int bestScore, mem_count, totMems, shifter, kmersize, bias, *Ns, *seeds;
+ unsigned cPos, iPos;
long unsigned key, mask, *seq;
if(!template_index) {
@@ -1026,14 +1012,22 @@ int anker_rc_comp(const HashMapCCI *template_index, unsigned char *qseq, unsigne
points->len = mem_count;
Ns = qseq_r_comp->N;
Ns[*Ns] = q_len;
+ i = q_len - q_start;
+ q_start = q_len - q_end;
+ q_end = i;
+ i = q_start;
+ } else if(q_start) {
+ i = q_start;
+ } else {
+ i = preseed(template_index, qseq, q_end - q_start);
}
score_r = 0;
mem_count = 0;
- i = 0;
- while(i < q_len) {
+
+ while(i < q_end) {
end = *++Ns - kmersize + 1;
while(i < end) {
- key = getKmer(seq, i, shifter);
+ getKmer_macro(key, seq, i, cPos, iPos, shifter);
value = hashMapCCI_get(template_index, key, shifter);
if(value == 0) {
=====================================
align.h
=====================================
@@ -22,6 +22,13 @@
#include "hashmapcci.h"
#include "nw.h"
+extern AlnScore (*leadTailAlnPtr)(Aln *, Aln *, const long unsigned*, const unsigned char*, int, int, int, const int, NWmat *);
+extern void (*trailTailAlnPtr)(Aln *, Aln *, AlnScore *, const long unsigned *, const unsigned char *, int, int, int, int, const int, NWmat *);
+
+AlnScore skipLeadAln(Aln *aligned, Aln *Frag_align, const long unsigned *tseq, const unsigned char *qseq, int t_e, int t_len, int q_e, const int bandwidth, NWmat *matrices);
+AlnScore leadTailAln(Aln *aligned, Aln *Frag_align, const long unsigned *tseq, const unsigned char *qseq, int t_e, int t_len, int q_e, const int bandwidth, NWmat *matrices);
+void skipTrailAln(Aln *aligned, Aln *Frag_align, AlnScore *Stat, const long unsigned *tseq, const unsigned char *qseq, int t_s, int t_len, int q_s, int q_len, const int bandwidth, NWmat *matrices);
+void trailTailAln(Aln *aligned, Aln *Frag_align, AlnScore *Stat, const long unsigned *tseq, const unsigned char *qseq, int t_s, int t_len, int q_s, int q_len, const int bandwidth, NWmat *matrices);
AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_len, int q_start, int q_end, Aln *aligned, Aln *Frag_align, int min, int max, int mq, double scoreT, AlnPoints *points, NWmat *matrices);
AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq, int q_len, int q_start, int q_end, const CompDNA *qseq_comp, int mq, double scoreT, AlnPoints *points, NWmat *matrices);
int preseed(const HashMapCCI *template_index, unsigned char *qseq, int q_len);
=====================================
chain.c
=====================================
@@ -25,6 +25,7 @@
#include "stdstat.h"
int (*chainSeedsPtr)(AlnPoints *, int, int, int, unsigned *) = &chainSeeds;
+void (*trimSeedsPtr)(AlnPoints *points, int start) = &trimSeeds;
AlnPoints * seedPoint_init(int size, Penalties *rewards) {
@@ -78,7 +79,7 @@ void seedPoint_free(AlnPoints *src) {
int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ) {
int i, j, nMems, weight, gap, score, bestScore, secondScore, bestPos;
- int tStart, tEnd, qEnd, tGap, qGap, nMin, W1, U, M, MM, Ms, MMs;
+ int tStart, tEnd, qStart, qEnd, tGap, qGap, nMin, W1, U, M, MM, Ms, MMs;
Penalties *rewards;
rewards = points->rewards;
@@ -87,29 +88,45 @@ int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *
M = rewards->M;
MM = rewards->MM;
nMems = points->len;
- i = nMems - 1;
- bestPos = i;
- bestScore = points->weight[i] * M;
+ i = nMems;
+ bestPos = i - 1;
+ bestScore = 0;
secondScore = 0;
- points->score[i] = bestScore;
+ points->score[i] = 0;
points->next[i] = 0;
- points->weight[i] -= (kmersize - 1);
while(i--) {
weight = points->weight[i] * M;
points->next[i] = 0;
tEnd = points->tEnd[i];
qEnd = points->qEnd[i];
- score = MIN(t_len - tEnd, q_len - qEnd);
- if(0 < --score) {
- score *= U;
- score += W1;
- } else if(score == 0) {
- score = W1;
+
+ /* get stop score */
+ gap = MIN(t_len - tEnd, q_len - qEnd);
+ Ms = gap;
+
+ /* gap */
+ if(--gap) {
+ gap *= U;
+ gap += W1;
+ } else if(gap == 0) {
+ gap = W1;
} else {
- score = 0;
+ gap = 0;
}
- score += weight;
+
+ /* match */
+ if(Ms == 2) {
+ MMs = 2;
+ Ms = 0;
+ } else {
+ MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
+ MMs = MAX(2, MMs);
+ Ms = MIN(Ms - MMs, kmersize);
+ Ms = MIN(Ms, MMs);
+ }
+ Ms = Ms * M + MMs * MM;
+ score = weight + (Ms < gap ? gap : Ms);
/* 128 is the bandwidth */
nMin = MIN(nMems, i + 128);
@@ -194,6 +211,35 @@ int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *
}
points->score[i] = score;
+ /* penalize start */
+ tStart = points->tStart[i];
+ qStart = points->qStart[i];
+ gap = MIN(tStart, qStart);
+ Ms = gap;
+
+ /* gap */
+ if(0 < --gap) {
+ gap *= U;
+ gap += W1;
+ } else if(gap == 0) {
+ gap = W1;
+ } else {
+ gap = 0;
+ }
+
+ /* match */
+ if(Ms == 2) {
+ MMs = 2;
+ Ms = 0;
+ } else {
+ MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
+ MMs = MAX(2, MMs);
+ Ms = MIN(Ms - MMs, kmersize);
+ Ms = MIN(Ms, MMs);
+ }
+ Ms = Ms * M + MMs * MM;
+ score += Ms < gap ? gap : Ms;
+
/* update bestScore */
if(bestScore <= score) {
if(points->next[i] != bestPos) {
@@ -201,29 +247,22 @@ int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *
}
bestScore = score;
bestPos = i;
+ } else if(secondScore <= score && points->next[i] != bestPos) {
+ secondScore = bestScore;
}
}
+
/* calculate mapping quality */
*mapQ = 0 < bestScore ? (ceil(40 * (1 - 1.0 * secondScore / bestScore) * MIN(1, points->weight[bestPos] / 10.0) * log(bestScore))) : 0;
+ points->score[bestPos] = bestScore;
- /* penalize start */
- /*
- if(bestPos != 0) {
- score = (MIN(points->qStart[bestPos], points->tStart[bestPos])) * pM;
- bestScore += MAX(W1, score);
- if(points->qStart[0] == 0 && bestScore < points->score[0]) {
- bestScore = points->score[0];
- bestPos = 0;
- }
- }
- */
return bestPos;
}
int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ) {
int i, j, nMems, weight, gap, score, bestScore, secondScore, bestPos;
- int tStart, tEnd, qEnd, tGap, qGap, nMin, W1, U, M, MM, Ms, MMs;
+ int tStart, tEnd, qStart, qEnd, tGap, qGap, nMin, W1, U, M, MM, Ms, MMs;
Penalties *rewards;
rewards = points->rewards;
@@ -232,29 +271,45 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
M = rewards->M;
MM = rewards->MM;
nMems = points->len;
- i = nMems - 1;
- bestPos = i;
- bestScore = points->weight[i] * M;
+ i = nMems;
+ bestPos = i - 1;
+ bestScore = 0;
secondScore = 0;
- points->score[i] = bestScore;
+ points->score[i] = 0;
points->next[i] = 0;
- points->weight[i] -= (kmersize - 1);
while(i--) {
weight = points->weight[i] * M;
points->next[i] = 0;
tEnd = points->tEnd[i];
qEnd = points->qEnd[i];
- score = MIN(t_len - tEnd, q_len - qEnd);
- if(0 < --score) {
- score *= U;
- score += W1;
- } else if(score == 0) {
- score = W1;
+
+ /* get stop score */
+ gap = MIN(t_len - tEnd, q_len - qEnd);
+ Ms = gap;
+
+ /* gap */
+ if(--gap) {
+ gap *= U;
+ gap += W1;
+ } else if(gap == 0) {
+ gap = W1;
+ } else {
+ gap = 0;
+ }
+
+ /* match */
+ if(Ms == 2) {
+ MMs = 2;
+ Ms = 0;
} else {
- score = 0;
+ MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
+ MMs = MAX(2, MMs);
+ Ms = MIN(Ms - MMs, kmersize);
+ Ms = MIN(Ms, MMs);
}
- score += weight;
+ Ms = Ms * M + MMs * MM;
+ score = weight + (Ms < gap ? gap : Ms);
/* 128 is the bandwidth */
nMin = MIN(nMems, i + 128);
@@ -390,6 +445,35 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
}
points->score[i] = score;
+ /* penalize start */
+ tStart = points->tStart[i];
+ qStart = points->qStart[i];
+ gap = MIN(tStart, qStart);
+ Ms = gap;
+
+ /* gap */
+ if(0 < --gap) {
+ gap *= U;
+ gap += W1;
+ } else if(gap == 0) {
+ gap = W1;
+ } else {
+ gap = 0;
+ }
+
+ /* match */
+ if(Ms == 2) {
+ MMs = 2;
+ Ms = 0;
+ } else {
+ MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
+ MMs = MAX(2, MMs);
+ Ms = MIN(Ms - MMs, kmersize);
+ Ms = MIN(Ms, MMs);
+ }
+ Ms = Ms * M + MMs * MM;
+ score += Ms < gap ? gap : Ms;
+
/* update bestScore */
if(bestScore <= score) {
if(points->next[i] != bestPos) {
@@ -397,22 +481,14 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
}
bestScore = score;
bestPos = i;
+ } else if(secondScore <= score && points->next[i] != bestPos) {
+ secondScore = bestScore;
}
}
- /* calculate mapping quality */
- *mapQ = ceil(40 * (1 - 1.0 * secondScore / bestScore) * MIN(1, points->weight[bestPos] / 10.0) * log(bestScore));
- /* penalize start */
- /*
- if(bestPos != 0) {
- score = (MIN(points->qStart[bestPos], points->tStart[bestPos])) * pM;
- bestScore += MAX(W1, score);
- if(points->qStart[0] == 0 && bestScore < points->score[0]) {
- bestScore = points->score[0];
- bestPos = 0;
- }
- }
- */
+ /* calculate mapping quality */
+ *mapQ = 0 < bestScore ? ceil(40 * (1 - 1.0 * secondScore / bestScore) * MIN(1, points->weight[bestPos] / 10.0) * log(bestScore)) : 0;
+ points->score[bestPos] = bestScore;
return bestPos;
}
@@ -430,8 +506,52 @@ void trimSeeds(AlnPoints *points, int start) {
return;
}
+ if(points->qStart[start]) {
+ /* iterate seeds on best chain */
+ do {
+ /* trim seed */
+ len = points->qEnd[start] - points->qStart[start];
+ if(len < ts) {
+ /* ensure at least one nucleotide remains in seed */
+ points->tStart[start] += --len;
+ points->qStart[start] += len;
+ } else {
+ points->tStart[start] += ts;
+ points->qStart[start] += ts;
+ }
+ } while((start = points->next[start]));
+ } else {
+ /* iterate seeds on best chain */
+ while((start = points->next[start])) {
+ /* trim seed */
+ len = points->qEnd[start] - points->qStart[start];
+ if(len < ts) {
+ /* ensure at least one nucleotide remains in seed */
+ points->tStart[start] += --len;
+ points->qStart[start] += len;
+ } else {
+ points->tStart[start] += ts;
+ points->qStart[start] += ts;
+ }
+ }
+ }
+}
+
+void trimSeedsNoLead(AlnPoints *points, int start) {
+
+ /* trim the start of each seed */
+ static int ts = 0;
+ int len;
+
+ if(!points) {
+ ts = start;
+ return;
+ } else if(!ts) {
+ return;
+ }
+
/* iterate seeds on best chain */
- do {
+ while((start = points->next[start])) {
/* trim seed */
len = points->qEnd[start] - points->qStart[start];
if(len < ts) {
@@ -442,5 +562,5 @@ void trimSeeds(AlnPoints *points, int start) {
points->tStart[start] += ts;
points->qStart[start] += ts;
}
- } while((start = points->next[start]));
+ }
}
=====================================
chain.h
=====================================
@@ -38,6 +38,7 @@ struct alnPoints {
/* pointer to chaining method */
extern int (*chainSeedsPtr)(AlnPoints *, int, int, int, unsigned *);
+extern void (*trimSeedsPtr)(AlnPoints *points, int start);
/* FUNCTIONS */
AlnPoints * seedPoint_init(int size, Penalties *rewards);
@@ -46,3 +47,4 @@ void seedPoint_free(AlnPoints *src);
int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ);
int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ);
void trimSeeds(AlnPoints *points, int start);
+void trimSeedsNoLead(AlnPoints *points, int start);
=====================================
debian/changelog
=====================================
@@ -1,6 +1,6 @@
-kma (1.3.15-1) UNRELEASED; urgency=medium
+kma (1.3.17-1) UNRELEASED; urgency=medium
- * New upstream version 1.3.15
+ * New upstream version 1.3.17
* d/p/blhc.patch: Propagate hardening flags to fix blhc
-- Nilesh Patra <nilesh at debian.org> Sat, 01 May 2021 22:20:10 +0530
=====================================
kma.c
=====================================
@@ -97,89 +97,109 @@ char * strjoin(char **strings, int len) {
return newStr;
}
-static void helpMessage(int exeStatus) {
- FILE *helpOut;
- if(exeStatus == 0) {
- helpOut = stdout;
- } else {
- helpOut = stderr;
- }
- fprintf(helpOut, "# KMA-%s mapps raw reads to a template database.\n", KMA_VERSION);
- fprintf(helpOut, "# Options are:\t\tDesc:\t\t\t\tDefault:\tRequirements:\n");
- fprintf(helpOut, "#\n");
- fprintf(helpOut, "#\t-o\t\tOutput file\t\t\tNone\t\tREQUIRED\n");
- fprintf(helpOut, "#\t-t_db\t\tTemplate DB\t\t\tNone\t\tREQUIRED\n");
- fprintf(helpOut, "#\t-i\t\tInput file name(s)\t\tSTDIN\n");
- fprintf(helpOut, "#\t-ipe\t\tInput paired end file name(s)\n");
- fprintf(helpOut, "#\t-int\t\tInput interleaved file name(s)\n");
- fprintf(helpOut, "#\t-k\t\tKmersize\t\t\t%s\n", "DB defined");
- fprintf(helpOut, "#\t-ts\t\tTrim front of seeds with ts\t%d\n", 0);
- fprintf(helpOut, "#\t-ml\t\tMinimum alignment length\t%d\n", 16);
- fprintf(helpOut, "#\t-p\t\tp-value\t\t\t\t0.05\n");
- fprintf(helpOut, "#\t-ConClave\tConClave version\t\t1\n");
- fprintf(helpOut, "#\t-mem_mode\tUse kmers to choose best\n#\t\t\ttemplate, and save memory\tFalse\n");
- fprintf(helpOut, "#\t-proxi\t\tUse proximity scoring under\n#\t\t\ttemplate mapping\t\tFalse/1.0\n");
- fprintf(helpOut, "#\t-ex_mode\tSearh kmers exhaustively\tFalse\n");
- fprintf(helpOut, "#\t-ef\t\tPrint additional features\tFalse\n");
- fprintf(helpOut, "#\t-vcf\t\tMake vcf file, 2 to apply FT\tFalse/0\n");
- fprintf(helpOut, "#\t-sam\t\tOutput sam to stdout, 4 to \n#\t\t\tonly output mapped reads, \n#\t\t\t2096 for aligned\t\tFalse/0\n");
- fprintf(helpOut, "#\t-nc\t\tNo consensus file\t\tFalse\n");
- fprintf(helpOut, "#\t-na\t\tNo aln file\t\tFalse\n");
- fprintf(helpOut, "#\t-nf\t\tNo frag file\t\t\tFalse\n");
- fprintf(helpOut, "#\t-deCon\t\tRemove contamination\t\tFalse\n");
- fprintf(helpOut, "#\t-dense\t\tDo not allow insertions\n#\t\t\tin assembly\t\t\tFalse\n");
- fprintf(helpOut, "#\t-sasm\t\tSkip alignment and assembly\tFalse\n");
- fprintf(helpOut, "#\t-ref_fsa\tConsensus sequnce will\n#\t\t\thave \"n\" instead of gaps\tFalse / 0\n");
- fprintf(helpOut, "#\t-matrix\t\tPrint assembly matrix\t\tFalse\n");
- fprintf(helpOut, "#\t-a\t\tPrint all best mappings\t\tFalse\n");
- fprintf(helpOut, "#\t-mp\t\tMinimum phred score\t\t20\n");
- fprintf(helpOut, "#\t-eq\t\tMinimum avg. quality score\t0\n");
- fprintf(helpOut, "#\t-5p\t\tCut a constant number of\n#\t\t\tnucleotides from the 5 prime.\t0\n");
- fprintf(helpOut, "#\t-3p\t\tCut a constant number of\n#\t\t\tnucleotides from the 3 prime.\t0\n");
- fprintf(helpOut, "#\t-Sparse\t\tOnly count kmers\t\tFalse\n");
- fprintf(helpOut, "#\t-Mt1\t\tMap only to \"num\" template.\t0 / False\n");
- fprintf(helpOut, "#\t-ID\t\tMinimum ID\t\t\t1.0%%\n");
- fprintf(helpOut, "#\t-ss\t\tSparse sorting (q,c,d)\t\tq\n");
- fprintf(helpOut, "#\t-pm\t\tPairing method (p,u,f)\t\tu\n");
- fprintf(helpOut, "#\t-fpm\t\tFine Pairing method (p,u,f)\tu\n");
- fprintf(helpOut, "#\t-apm\t\tSets both pm and fpm\t\tu\n");
- fprintf(helpOut, "#\t-shm\t\tUse shared DB made by kma_shm\t0 (lvl)\n");
- fprintf(helpOut, "#\t-mmap\t\tMemory map *.comp.by\n");
- fprintf(helpOut, "#\t-tmp\t\tSet directory for temporary files.\n");
- fprintf(helpOut, "#\t-1t1\t\tForce end to end mapping\tFalse\n");
- fprintf(helpOut, "#\t-hmm\t\tUse a HMM to assign template(s)\n#\t\t\tto query sequences\t\tTrue\n");
- fprintf(helpOut, "#\t-ck\t\tCount kmers instead of\n#\t\t\tpseudo alignment\t\tFalse\n");
- fprintf(helpOut, "#\t-ca\t\tMake circular alignments\tFalse\n");
- fprintf(helpOut, "#\t-boot\t\tBootstrap sequence\t\tFalse\n");
- fprintf(helpOut, "#\t-bc\t\tBase calls should be\n#\t\t\tsignificantly overrepresented.\t[True]\n");
- fprintf(helpOut, "#\t-bc90\t\tBase calls should be both\n#\t\t\tsignificantly overrepresented,\n#\t\t\tand have 90%% agreement.\t\tFalse\n");
- fprintf(helpOut, "#\t-bcNano\t\tCall bases at suspicious\n#\t\t\tdeletions, made for nanopore.\tFalse\n");
- fprintf(helpOut, "#\t-bcd\t\tMinimum depth at base\t\t1\n");
- fprintf(helpOut, "#\t-bcg\t\tMaintain insignificant gaps\n");
- fprintf(helpOut, "#\t-and\t\tBoth mrs and p_value thresholds\n#\t\t\thas to reached to in order to\n#\t\t\treport a template hit.\t\tor\n");
- fprintf(helpOut, "#\t-oa\t\tOutput all, disregard mrs and p-value\n#\t\t\twhen reporting a template hit\tFalse\n");
- fprintf(helpOut, "#\t-mq\t\tMinimum mapping quality\t\t0\n");
- fprintf(helpOut, "#\t-mrs\t\tMinimum alignment score,\n#\t\t\tnormalized to alignment length\t0.50\n");
- fprintf(helpOut, "#\t-mrc\t\tMinimum read coverage\t\t0.10\n");
- fprintf(helpOut, "#\t-mct\t\tMax overlap between templates\t0.10\n");
- fprintf(helpOut, "#\t-reward\t\tScore for match\t\t\t1\n");
- fprintf(helpOut, "#\t-penalty\tPenalty for mismatch\t\t-2\n");
- fprintf(helpOut, "#\t-gapopen\tPenalty for gap opening\t\t-3\n");
- fprintf(helpOut, "#\t-gapextend\tPenalty for gap extension\t-1\n");
- fprintf(helpOut, "#\t-per\t\tReward for pairing reads\t7\n");
- fprintf(helpOut, "#\t-localopen\tPenalty for openning a local chain\t-6\n");
- fprintf(helpOut, "#\t-Npenalty\tPenalty matching N\t\t0\n");
- fprintf(helpOut, "#\t-transition\tPenalty for transition\t\t-2\n");
- fprintf(helpOut, "#\t-transversion\tPenalty for transversion\t-2\n");
- fprintf(helpOut, "#\t-cge\t\tSet CGE penalties and rewards\tFalse\n");
- fprintf(helpOut, "#\t-t\t\tNumber of threads\t\t1\n");
- fprintf(helpOut, "#\t-status\t\tExtra status\n");
- fprintf(helpOut, "#\t-verbose\tExtra verbose\n");
- fprintf(helpOut, "#\t-c\t\tCitation\n");
- fprintf(helpOut, "#\t-v\t\tVersion\n");
- fprintf(helpOut, "#\t-h\t\tShows this help message\n");
- fprintf(helpOut, "#\n");
- exit(exeStatus);
+static void helpMessage(int exitStatus) {
+
+ FILE *out = exitStatus ? stderr : stdout;
+
+ fprintf(out, "# KMA-%s maps and/or aligns raw reads to a template database.\n", KMA_VERSION);
+
+ fprintf(out, "# %16s\t%-32s\t%s\n", "Options:", "Desc:", "Default:");
+
+ fprintf(out, "#\n# Input:\n");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-i", "Single end input(s)", "stdin");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ipe", "Paired end input(s)", "");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-int", "Interleaved input(s)", "");
+
+ fprintf(out, "#\n# Output:\n");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-o", "Output prefix", "");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ef", "Output additional features", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-vcf", "Output vcf file, 2 to apply FT", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-sam", "Output sam, 4/2096 for mapped/aligned", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-nc", "No consensus file", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-nc", "No aln file", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-nf", "No frag file", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-matrix", "Output assembly matrix", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-a", "Output all template mappings", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-and", "Use both mrs and p-value on consensus", "or");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-oa", "Use neither mrs or p-value on consensus", "False");
+
+ fprintf(out, "#\n# Consensus:\n");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-bc", "Minimum support to call bases", "0");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-bcNano", "Altered indel calling for ONT data", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-bcd", "Minimum depth to cal bases", "1");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-bcg", "Maintain insignificant gaps", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ID", "Minimum consensus ID", "1.0%");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-dense", "Skip insertion in consensus", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ref_fsa", "Use n's on indels", "False");
+
+ fprintf(out, "#\n# General:\n");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-t_db", "Template DB", "");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-p", "P-value", "0.05");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-shm", "Use DB in shared memory", "0");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-mmap", "Memory map *.comp.b", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-tmp", "Set directory for temporary files", "");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-t", "Number of threads", "1");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-status", "Extra status", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-verbose", "Extra verbose", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-c", "Citation", "");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-v", "Version", "");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-h", "Shows this help message", "");
+
+ fprintf(out, "#\n# Template mapping:\n");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ConClave", "ConClave version", "1");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-mem_mode", "Base ConClave on template mappings", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-proxi", "Proximity scoring (negative for soft)", "False/1.0");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ex_mode", "Searh kmers exhaustively", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-deCon", "Remove contamination", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-Sparse", "Only count kmers", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ss", "Sparse sorting (q,c,d)", "q");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-Mt1", "Map everything to one template", "False/0");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-pm", "Pairing method (p,u,f)", "u");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-1t1", "One query to one template", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-hmm", "Use a HMM to assign template(s)", "True");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ck", "Count k-mers over pseudo alignment", "False");
+
+ fprintf(out, "#\n# Chaining:\n");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-k", "K-mersize", "DB defined");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ts", "Trim front of seeds", "0");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ssa", "Seeds soround alignments", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ex_mode", "Searh kmers exhaustively", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-fpm", "Pairing method (p,u,f)", "u");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-mq", "Minimum mapping quality", "0");
+
+ fprintf(out, "#\n# Alignment:\n");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ca", "Circular alignments", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-mrs", "Minimum relative alignment score", "0.5");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-mrc", "Minimum query coverage", "0.0");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ml", "Minimum alignment length", "16");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-reward", "Score for match", "1");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-penalty", "Penalty for mismatch", "2");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-gapopen", "Penalty for gap opening", "3");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-gapextend", "Penalty for gap extension", "1");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-per", "Reward for pairing reads", "7");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-Npenalty", "Penalty matching N", "0");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-transition", "Penalty for transition", "2");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-transversion", "Penalty for transversion", "2");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-sasm", "Skip alignment", "False");
+
+ fprintf(out, "#\n# Trimming:\n");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-mp", "Minimum phred score", "20");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-eq", "Minimum avg. quality score", "0");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-5p", "Trim 5 prime", "0");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-3p", "Trim 3 prime", "0");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-ml", "Minimum length", "16");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-xl", "Maximum length on se", "2147483647");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-boot", "Bootstrap sub-sequence", "False");
+
+ fprintf(out, "#\n# Presets:\n");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-apm", "Sets both pm and fpm", "u");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-cge", "Set CGE penalties and rewards", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-mint2", "Set 2ng gen Mintyper preset", "False");
+ fprintf(out, "# %16s\t%-32s\t%s\n", "-mint3", "Set 3rd gen Mintyper preset", "False");
+
+ fprintf(out, "#\n");
+
+ exit(exitStatus);
}
int kma_main(int argc, char *argv[]) {
@@ -205,7 +225,7 @@ int kma_main(int argc, char *argv[]) {
static int fileCounter, fileCounter_PE, fileCounter_INT, Ts, Tv, minlen;
static int extendedFeatures, spltDB, thread_num, kmersize, targetNum, mq;
static int ref_fsa, print_matrix, print_all, sam, vcf, Mt1, bcd, one2one;
- static int sparse_run, ts, **d, status = 0;
+ static int sparse_run, ts, maxlen, **d, status = 0;
static unsigned xml, nc, nf, shm, exhaustive, verbose;
static char *outputfilename, *templatefilename, **templatefilenames;
static char **inputfiles, **inputfiles_PE, **inputfiles_INT, ss;
@@ -255,6 +275,7 @@ int kma_main(int argc, char *argv[]) {
kmersize = 0;
ts = 0;
minlen = 16;
+ maxlen = 2147483647;
evalue = 0.05;
support = 0.0;
minFrac = 1.0;
@@ -516,6 +537,10 @@ int kma_main(int argc, char *argv[]) {
exit(1);
}
}
+ } else if(strcmp(argv[args], "-ssa") == 0) {
+ trimSeedsPtr = &trimSeedsNoLead;
+ leadTailAlnPtr = &skipLeadAln;
+ trailTailAlnPtr = &skipTrailAln;
} else if(strcmp(argv[args], "-ml") == 0) {
++args;
if(args < argc) {
@@ -525,6 +550,15 @@ int kma_main(int argc, char *argv[]) {
exit(1);
}
}
+ } else if(strcmp(argv[args], "-xl") == 0) {
+ ++args;
+ if(args < argc) {
+ maxlen = strtoul(argv[args], &exeBasic, 10);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "# Invalid minimum length parsed\n");
+ exit(1);
+ }
+ }
} else if(strcmp(argv[args], "-mp") == 0) {
++args;
if(args < argc) {
@@ -984,7 +1018,7 @@ int kma_main(int argc, char *argv[]) {
++args;
}
preseed(0, 0, exhaustive);
- trimSeeds(0, ts);
+ trimSeedsPtr(0, ts);
if(sam && kmaPipe != &kmaPipeThread) {
fprintf(stderr, "\"-sam\" and \"-status\" cannot coincide.\n");
@@ -1272,7 +1306,7 @@ int kma_main(int argc, char *argv[]) {
/* SE */
if(fileCounter > 0) {
- totFrags += run_input(inputfiles, fileCounter, minPhred, minQ, fiveClip, threeClip, minlen, to2Bit, prob, ioStream);
+ totFrags += run_input(inputfiles, fileCounter, minPhred, minQ, fiveClip, threeClip, minlen, maxlen, to2Bit, prob, ioStream);
}
/* PE */
=====================================
nw.c
=====================================
@@ -23,9 +23,9 @@
#include "pherror.h"
#include "stdnuc.h"
-AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, NWmat *matrices) {
+AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, NWmat *matrices, int template_length) {
- int m, n, t_len, q_len, thisScore, nuc_pos, template_length, W1, U, MM;
+ int m, n, t_len, q_len, thisScore, nuc_pos, W1, U, MM;
int pos[2], *D_ptr, *D_prev, Q, Q_prev, *P_ptr, *P_prev, *tmp, **d;
unsigned char *query, t_nuc, *E, *E_ptr, e;
AlnScore Stat;
@@ -40,9 +40,8 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
t_len = t_e - t_s;
aligned->start = 0;
aligned->end = 0;
- template_length = aligned->pos;
if(t_len < 0) {
- t_len += aligned->pos;
+ t_len += template_length;
}
query = (unsigned char*)(queryOrg + q_s);
Stat.pos = 0;
@@ -308,9 +307,9 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
return Stat;
}
-AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, int band, NWmat *matrices) {
+AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, int band, NWmat *matrices, int template_length) {
- int m, n, t_len, q_len, thisScore, nuc_pos, template_length, pos[2];
+ int m, n, t_len, q_len, thisScore, nuc_pos, pos[2];
int bq_len, halfBand, sn, en, sq, eq, q_pos, c_pos, W1, U, MM;
int *D_ptr, *D_prev, Q, Q_prev, *P_ptr, *P_prev, *tmp, **d;
unsigned char *query, t_nuc, *E, *E_ptr, e;
@@ -326,7 +325,6 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
t_len = t_e - t_s;
aligned->start = 0;
aligned->end = 0;
- template_length = aligned->pos;
if(t_len < 0) {
t_len += template_length;
}
=====================================
nw.h
=====================================
@@ -56,7 +56,7 @@ struct aln {
#define NWLOAD 1
#endif
-AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, NWmat *matrices);
-AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, int band, NWmat *matrices);
+AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, NWmat *matrices, int template_length);
+AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, int band, NWmat *matrices, int template_length);
AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, NWmat *matrices, int template_length);
AlnScore NW_band_score(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, int band, NWmat *matrices, int template_length);
=====================================
runinput.c
=====================================
@@ -29,7 +29,7 @@
void (*printFsa_ptr)(Qseqs*, Qseqs*, CompDNA*, FILE*) = &printFsa;
void (*printFsa_pair_ptr)(Qseqs*, Qseqs*, Qseqs*, Qseqs*, CompDNA*, FILE*) = &printFsa_pair;
-long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out) {
+long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, int maxlen, char *trans, const double *prob, FILE *out) {
int fileCounter, phredScale, phredCut, start, end;
unsigned FASTQ;
@@ -65,55 +65,59 @@ long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ
/* parse reads */
while(FileBuffgetFq(inputfile, header, qseq, qual, trans)) {
- /* trim */
- seq = qual->seq;
- start = fiveClip;
- end = qseq->len - 1 - threeClip;
- end = end < 0 ? 0 : end;
- while(end >= 0 && seq[end] < phredCut) {
- --end;
- }
- ++end;
- while(start < end && seq[start] < phredCut) {
- ++start;
- }
- /*
- for(i = start; i < end; ++i) {
- if(seq[i] < phredCut) {
- seq[i] = 4;
+ if(qseq->len <= maxlen) {
+ /* trim */
+ seq = qual->seq;
+ start = fiveClip;
+ end = qseq->len - 1 - threeClip;
+ end = end < 0 ? 0 : end;
+ while(end >= 0 && seq[end] < phredCut) {
+ --end;
+ }
+ ++end;
+ while(start < end && seq[start] < phredCut) {
+ ++start;
+ }
+ /*
+ for(i = start; i < end; ++i) {
+ if(seq[i] < phredCut) {
+ seq[i] = 4;
+ }
+ }
+ */
+ qseq->len = end - start;
+ /* print */
+ if(qseq->len > minlen && minQ <= eQual(seq + start, qseq->len, minQ, prob - phredScale)) {
+ /* dump seq */
+ qseq->seq += start;
+ printFsa_ptr(header, qseq, compressor, out);
+ qseq->seq -= start;
+ ++count;
}
- }
- */
- qseq->len = end - start;
- /* print */
- if(qseq->len > minlen && minQ <= eQual(seq + start, qseq->len, minQ, prob - phredScale)) {
- /* dump seq */
- qseq->seq += start;
- printFsa_ptr(header, qseq, compressor, out);
- qseq->seq -= start;
- ++count;
}
}
} else if(FASTQ & 2) {
while(FileBuffgetFsa(inputfile, header, qseq, trans)) {
- /* remove leading and trailing N's */
- start = 0;
- end = qseq->len - 1;
- seq = qseq->seq;
- while(end >= 0 && seq[end] == 4) {
- --end;
- }
- ++end;
- while(start < end && seq[start] == 4) {
- ++start;
- }
- qseq->len = end - start;
- if(qseq->len > minlen) {
- /* dump seq */
- qseq->seq += start;
- printFsa_ptr(header, qseq, compressor, out);
- qseq->seq -= start;
- ++count;
+ if(qseq->len <= maxlen) {
+ /* remove leading and trailing N's */
+ start = 0;
+ end = qseq->len - 1;
+ seq = qseq->seq;
+ while(end >= 0 && seq[end] == 4) {
+ --end;
+ }
+ ++end;
+ while(start < end && seq[start] == 4) {
+ ++start;
+ }
+ qseq->len = end - start;
+ if(qseq->len > minlen) {
+ /* dump seq */
+ qseq->seq += start;
+ printFsa_ptr(header, qseq, compressor, out);
+ qseq->seq -= start;
+ ++count;
+ }
}
}
}
=====================================
runinput.h
=====================================
@@ -23,7 +23,7 @@
/* pointers determining how to deliver the input */
extern void (*printFsa_ptr)(Qseqs*, Qseqs*, CompDNA*, FILE*);
extern void (*printFsa_pair_ptr)(Qseqs*, Qseqs*, Qseqs*, Qseqs*, CompDNA*, FILE*);
-long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out);
+long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, int maxlen, char *trans, const double *prob, FILE *out);
long unsigned run_input_PE(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out);
long unsigned run_input_INT(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out);
void bootFsa(Qseqs *header, Qseqs *qseq, CompDNA *compressor, FILE *out);
=====================================
stdnuc.h
=====================================
@@ -21,6 +21,10 @@
#define setEx(src, pos)(src[pos >> 3] |= (1 << (pos & 7)))
#define unsetEx(src, pos)(src[pos >> 3] ^= (1 << (pos & 7)))
#define getEx(src, pos)((src[pos >> 3] >> (pos & 7)) & 1)
+#define getKmer_macro(kmer, Comp, pos, cPos, iPos, shifter) \
+ iPos = (pos & 31) << 1;\
+ cPos = pos >> 5;\
+ kmer = (iPos <= shifter) ? ((Comp[cPos] << iPos) >> shifter) : (((Comp[cPos] << iPos) | (Comp[cPos + 1] >> (64-iPos))) >> shifter);
long unsigned getKmer(long unsigned *compressor, unsigned cPos, const unsigned shifter);
long unsigned makeKmer(const unsigned char *qseq, unsigned pos, unsigned size);
=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
* limitations under the License.
*/
-#define KMA_VERSION "1.3.15"
+#define KMA_VERSION "1.3.17"
View it on GitLab: https://salsa.debian.org/med-team/kma/-/compare/3a0c2e0868ae672f84c60fafa3bcea1aa87ca059...97b5faf0bd92f7f493f9007ec81ec23752e9734e
--
View it on GitLab: https://salsa.debian.org/med-team/kma/-/compare/3a0c2e0868ae672f84c60fafa3bcea1aa87ca059...97b5faf0bd92f7f493f9007ec81ec23752e9734e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210523/5957297a/attachment-0001.htm>
More information about the debian-med-commit
mailing list