[med-svn] [Git][med-team/kma][upstream] New upstream version 1.3.17

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Sun May 23 17:16:01 BST 2021



Nilesh Patra pushed to branch upstream at Debian Med / kma


Commits:
d0cca96d by Nilesh Patra at 2021-05-23T21:39:01+05:30
New upstream version 1.3.17
- - - - -


11 changed files:

- align.c
- align.h
- chain.c
- chain.h
- kma.c
- nw.c
- nw.h
- runinput.c
- runinput.h
- stdnuc.h
- version.h


Changes:

=====================================
align.c
=====================================
@@ -28,6 +28,180 @@
 #include "stdnuc.h"
 #include "stdstat.h"
 
+AlnScore (*leadTailAlnPtr)(Aln *, Aln *, const long unsigned*, const unsigned char*, int, int, int, const int, NWmat *) = &leadTailAln;
+void (*trailTailAlnPtr)(Aln *, Aln *, AlnScore *, const long unsigned *, const unsigned char *, int, int, int, int, const int, NWmat *) = &trailTailAln;
+
+AlnScore skipLeadAln(Aln *aligned, Aln *Frag_align, const long unsigned *tseq, const unsigned char *qseq, int t_e, int t_len, int q_e, const int bandwidth, NWmat *matrices) {
+	
+	AlnScore Stat;
+	
+	/* initialize */
+	Stat.len = 0;
+	Stat.score = 0;
+	Stat.match = 0;
+	Stat.tGaps = 0;
+	Stat.qGaps = 0;
+	Stat.pos = t_e;
+	
+	return Stat;
+}
+
+AlnScore leadTailAln(Aln *aligned, Aln *Frag_align, const long unsigned *tseq, const unsigned char *qseq, int t_e, int t_len, int q_e, const int bandwidth, NWmat *matrices) {
+	
+	int bias, band, t_s, q_s;
+	AlnScore Stat, NWstat;
+	
+	/* initialize */
+	Stat.len = 0;
+	Stat.score = 0;
+	Stat.match = 0;
+	Stat.tGaps = 0;
+	Stat.qGaps = 0;
+	Stat.pos = t_e;
+	
+	if(q_e) {
+		/* get boundaries */
+		t_s = 0;
+		q_s = 0;
+		if((q_e << 1) < t_e || (q_e + bandwidth) < t_e) { // big leading template gap, cut down
+			//t_s = t_e - MIN(bandwidth, (q_e << 1));
+			t_s = t_e - (q_e + (q_e < bandwidth ? q_e : bandwidth));
+		} else if((t_e << 1) < q_e || (t_e + bandwidth) < q_e) { // big leading query gap, cut down
+			//q_s = q_e - MIN(bandwidth, (t_e << 1));
+			q_s = q_e - (t_e + (t_e < bandwidth ? t_e : bandwidth));
+		}
+		
+		/* align */
+		if(t_e - t_s > 0 && q_e - q_s > 0) {
+			band = abs(t_e - t_s - q_e + q_s) + bandwidth;
+			if(q_e - q_s <= band || t_e - t_s <= band) {// || abs(t_e - t_s - q_e - q_s) >= 32) {
+				if(Frag_align) {
+					NWstat = NW(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, matrices, t_len);
+				} else {
+					NWstat = NW_score(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, matrices, t_len);
+				}
+			} else if(Frag_align) {
+				NWstat = NW_band(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, band, matrices, t_len);
+				//NWstat = NW(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, matrices, t_len);
+			} else {
+				NWstat = NW_band_score(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, band, matrices, t_len);
+				//NWstat = NW_score(tseq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, matrices, t_len);
+			}
+			
+			if(Frag_align) {
+				/* trim leading gaps */
+				bias = 0;
+				if(t_s == 0) {
+					while(bias < NWstat.len && (Frag_align->t[bias] == 5 || Frag_align->q[bias] == 5)) {
+						if(Frag_align->t[bias] == 5) {
+							--NWstat.tGaps;
+							++(Frag_align->start);
+						} else {
+							--NWstat.qGaps;
+						}
+						++bias;
+					}
+					NWstat.len -= bias;
+					/*if(bias) {
+						NWstat.score -= (W1 + (bias - 1) * U);
+					}*/
+				}
+				
+				memcpy(aligned->t, Frag_align->t + bias, NWstat.len);
+				memcpy(aligned->s, Frag_align->s + bias, NWstat.len);
+				memcpy(aligned->q, Frag_align->q + bias, NWstat.len);
+				aligned->start = q_s + Frag_align->start;
+			}	
+			Stat.pos -= (NWstat.len - NWstat.tGaps);
+			Stat.score = NWstat.score;
+			Stat.len = NWstat.len;
+			Stat.match = NWstat.match;
+			Stat.tGaps = NWstat.tGaps;
+			Stat.qGaps = NWstat.qGaps;
+		} else if(aligned) {
+			aligned->start = q_s;
+		}
+	}
+	
+	return Stat;
+}
+
+void skipTrailAln(Aln *aligned, Aln *Frag_align, AlnScore *Stat, const long unsigned *tseq, const unsigned char *qseq, int t_s, int t_len, int q_s, int q_len, const int bandwidth, NWmat *matrices) {
+	if(Frag_align) {
+		Frag_align->end = 0;
+	}
+}
+
+void trailTailAln(Aln *aligned, Aln *Frag_align, AlnScore *Stat, const long unsigned *tseq, const unsigned char *qseq, int t_s, int t_len, int q_s, int q_len, const int bandwidth, NWmat *matrices) {
+	
+	int band, bias, q_e, t_e;
+	AlnScore NWstat;
+	
+	/* Get intervals in query and template to align */
+	q_e = q_len;
+	t_e = t_len;
+	if(((q_len - q_s) << 1) < (t_len - t_s) || (q_len - q_s + bandwidth) < (t_len - t_s)) { // big trailing template gap, cut down
+		//t_e = t_s + MIN(bandwidth, ((q_len - q_s) << 1));
+		t_e = q_len - q_s;
+		t_e = t_s + (t_e + (t_e < bandwidth ? t_e : bandwidth));
+	} else if(((t_len - t_s) << 1) < (q_len - q_s) || (t_len - t_s + bandwidth) < (q_len - q_s)) { // big leading query gap, cut down
+		//q_e = q_s + MIN(bandwidth, ((t_len - t_s) << 1));
+		q_e = t_len - t_s;
+		q_e = q_s + (q_e + (q_e < bandwidth ? q_e : bandwidth));
+	}
+	
+	/* align trailing gap */
+	if(t_e - t_s > 0 && q_e - q_s > 0) {
+		band = abs(t_e - t_s - q_e + q_s) + bandwidth;
+		if(q_e - q_s <= band || t_e - t_s <= band) {//|| abs(t_e - t_s - q_e - q_s) >= 32) {
+			if(Frag_align) {
+				NWstat = NW(tseq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, matrices, t_len);
+			} else {
+				NWstat = NW_score(tseq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, matrices, t_len);
+			}
+		} else if(Frag_align) {
+			NWstat = NW_band(tseq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, band, matrices, t_len);
+			//NWstat = NW(tseq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, matrices, t_len);
+		} else {
+			NWstat = NW_band_score(tseq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, band, matrices, t_len);
+		}
+		
+		if(Frag_align) {
+			/* trim trailing gaps */
+			if(t_e == t_len) {
+				bias = NWstat.len - 1;
+				while(bias && (Frag_align->t[bias] == 5 || Frag_align->q[bias] == 5)) {
+					if(Frag_align->t[bias] == 5) {
+						--NWstat.tGaps;
+						++(Frag_align->end);
+					} else {
+						--NWstat.qGaps;
+					}
+					--bias;
+				}
+				++bias;
+				
+				if(bias != NWstat.len) {
+					//NWstat.score -= (W1 + (NWstat.len - bias) * U);
+					NWstat.len = bias;
+				}
+			}
+			
+			memcpy(aligned->t + Stat->len, Frag_align->t, NWstat.len);
+			memcpy(aligned->s + Stat->len, Frag_align->s, NWstat.len);
+			memcpy(aligned->q + Stat->len, Frag_align->q, NWstat.len);
+		}
+		
+		Stat->score += NWstat.score;
+		Stat->len += NWstat.len;
+		Stat->match += NWstat.match;
+		Stat->tGaps += NWstat.tGaps;
+		Stat->qGaps += NWstat.qGaps;
+	} else if(Frag_align) {
+		Frag_align->end = 0;
+	}
+}
+
 AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_len, int q_start, int q_end, Aln *aligned, Aln *Frag_align, int min, int max, int mq, double scoreT, AlnPoints *points, NWmat *matrices) {
 	
 	const int bandwidth = 64;
@@ -227,75 +401,10 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
 	}
 	
 	/* trim seeds */
-	trimSeeds(points, start);
-	
-	/* initialize */
-	Stat.len = 0;
-	Stat.score = 0;
-	Stat.match = 0;
-	Stat.tGaps = 0;
-	Stat.qGaps = 0;
-	value = points->tStart[start] - 1;
-	Stat.pos = value;
-	i = points->qStart[start];
+	trimSeedsPtr(points, start);
 	
 	/* align leading tail */
-	if(i != 0) {
-		/* get boundaries */
-		t_s = 0;
-		t_e = value;
-		q_s = 0;
-		q_e = i;
-		if((q_e << 1) < t_e || (q_e + bandwidth) < t_e) { // big leading template gap, cut down
-			//t_s = t_e - MIN(bandwidth, (q_e << 1));
-			t_s = t_e - (q_e + (q_e < bandwidth ? q_e : bandwidth));
-		} else if((t_e << 1) < q_e || (t_e + bandwidth) < q_e) { // big leading query gap, cut down
-			//q_s = q_e - MIN(bandwidth, (t_e << 1));
-			q_s = q_e - (t_e + (t_e < bandwidth ? t_e : bandwidth));
-		}
-		
-		/* align */
-		if(t_e - t_s > 0 && q_e - q_s > 0) {
-			band = 4 * abs(t_e - t_s - q_e + q_s) + bandwidth;
-			if(q_e - q_s <= band || t_e - t_s <= band) {// || abs(t_e - t_s - q_e - q_s) >= 32) {
-				NWstat = NW(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, matrices);
-			} else {
-				NWstat = NW_band(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, band, matrices);
-				//NWstat = NW(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, matrices);
-			}
-			
-			/* trim leading gaps */
-			bias = 0;
-			if(t_s == 0) {
-				while(bias < NWstat.len && (Frag_align->t[bias] == 5 || Frag_align->q[bias] == 5)) {
-					if(Frag_align->t[bias] == 5) {
-						--NWstat.tGaps;
-						++(Frag_align->start);
-					} else {
-						--NWstat.qGaps;
-					}
-					++bias;
-				}
-				NWstat.len -= bias;
-				/*if(bias) {
-					NWstat.score -= (W1 + (bias - 1) * U);
-				}*/
-			}
-			
-			memcpy(aligned->t, Frag_align->t + bias, NWstat.len);
-			memcpy(aligned->s, Frag_align->s + bias, NWstat.len);
-			memcpy(aligned->q, Frag_align->q + bias, NWstat.len);
-			aligned->start = q_s + Frag_align->start;
-			Stat.pos -= (NWstat.len - NWstat.tGaps);
-			Stat.score = NWstat.score;
-			Stat.len = NWstat.len;
-			Stat.match = NWstat.match;
-			Stat.tGaps = NWstat.tGaps;
-			Stat.qGaps = NWstat.qGaps;
-		} else {
-			aligned->start = q_s;
-		}
-	}
+	Stat = leadTailAlnPtr(aligned, Frag_align, template_index->seq, qseq, points->tStart[start] - 1, t_len, points->qStart[start], bandwidth, matrices);
 	
 	/* piece seeds together */
 	stop = 1;
@@ -357,11 +466,11 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
 				return Stat;
 			}
 			if((t_l > 0 || q_e - q_s > 0)) {
-				band = 4 * abs(t_l - q_e + q_s) + bandwidth;
+				band = abs(t_l - q_e + q_s) + bandwidth;
 				if(q_e - q_s <= band || t_l <= band) {// || abs(t_e - t_s - q_e - q_s) >= 32) {
-					NWstat = NW(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, matrices);
+					NWstat = NW(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, matrices, t_len);
 				} else {
-					NWstat = NW_band(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, band, matrices);
+					NWstat = NW_band(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, band, matrices, t_len);
 					//NWstat = NW(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, matrices);
 				}
 				
@@ -380,63 +489,7 @@ AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_
 	}
 	
 	/* align trailing tail */
-	/* Get intervals in query and template to align */
-	q_s = points->qEnd[start];
-	t_s = points->tEnd[start] - 1;
-	q_e = q_len;
-	t_e = t_len;
-	if(((q_len - q_s) << 1) < (t_len - t_s) || (q_len - q_s + bandwidth) < (t_len - t_s)) { // big trailing template gap, cut down
-		//t_e = t_s + MIN(bandwidth, ((q_len - q_s) << 1));
-		t_e = q_len - q_s;
-		t_e = t_s + (t_e + (t_e < bandwidth ? t_e : bandwidth));
-	} else if(((t_len - t_s) << 1) < (q_len - q_s) || (t_len - t_s + bandwidth) < (q_len - q_s)) { // big leading query gap, cut down
-		//q_e = q_s + MIN(bandwidth, ((t_len - t_s) << 1));
-		q_e = t_len - t_s;
-		q_e = q_s + (q_e + (q_e < bandwidth ? q_e : bandwidth));
-	}
-	
-	/* align trailing gap */
-	if(t_e - t_s > 0 && q_e - q_s > 0) {
-		band = 4 * abs(t_e - t_s - q_e + q_s) + bandwidth;
-		if(q_e - q_s <= band || t_e - t_s <= band) {//|| abs(t_e - t_s - q_e - q_s) >= 32) {
-			NWstat = NW(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, matrices);
-		} else {
-			NWstat = NW_band(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, band, matrices);
-			//NWstat = NW(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, matrices);
-		}
-		/* trim trailing gaps */
-		if(t_e == t_len) {
-			bias = NWstat.len - 1;
-			while(bias && (Frag_align->t[bias] == 5 || Frag_align->q[bias] == 5)) {
-				if(Frag_align->t[bias] == 5) {
-					--NWstat.tGaps;
-					++(Frag_align->end);
-				} else {
-					--NWstat.qGaps;
-				}
-				--bias;
-			}
-			++bias;
-			
-			
-			if(bias != NWstat.len) {
-				//NWstat.score -= (W1 + (NWstat.len - bias) * U);
-				NWstat.len = bias;
-			}
-		}
-		
-		memcpy(aligned->t + Stat.len, Frag_align->t, NWstat.len);
-		memcpy(aligned->s + Stat.len, Frag_align->s, NWstat.len);
-		memcpy(aligned->q + Stat.len, Frag_align->q, NWstat.len);
-		Stat.score += NWstat.score;
-		Stat.len += NWstat.len;
-		Stat.match += NWstat.match;
-		Stat.tGaps += NWstat.tGaps;
-		Stat.qGaps += NWstat.qGaps;
-	} else {
-		Frag_align->end = 0;
-	}
-	
+	trailTailAlnPtr(aligned, Frag_align, &Stat, template_index->seq, qseq, points->tEnd[start] - 1, t_len, points->qEnd[start], q_len, bandwidth, matrices);
 	aligned->s[Stat.len] = 0;
 	aligned->len = Stat.len;
 	aligned->end = q_len - q_e + Frag_align->end;
@@ -450,7 +503,7 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
 	const int bandwidth = 64;
 	int i, j, k, l, bias, prev, start, stop, t_len, value, end, band, U, M;
 	int t_l, t_s, t_e, q_s, q_e, mem_count, score, kmersize, *seeds, **d;
-	unsigned mapQ, shifter;
+	unsigned mapQ, shifter, cPos, iPos;
 	long unsigned key;
 	unsigned char nuc;
 	AlnScore Stat, NWstat;
@@ -478,7 +531,7 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
 				end = q_end - kmersize + 1;
 			}
 			while(j < end) {
-				key = getKmer(qseq_comp->seq, j, shifter);
+				getKmer_macro(key, qseq_comp->seq, j, cPos, iPos, shifter);
 				value = hashMapCCI_get(template_index, key, shifter);
 				
 				if(value == 0) {
@@ -576,6 +629,7 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
 			j = qseq_comp->N[i] + 1;
 		}
 	}
+	mapQ = 0;
 	
 	if(mem_count) {
 		points->len = mem_count;
@@ -593,8 +647,6 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
 	/* get best seed chain, returns best starting point */
 	start = chainSeedsPtr(points, q_len, t_len, kmersize, &mapQ);
 	score = points->score[start];
-	
-	//if(score < (q_len >> 5)) {
 	if(mapQ < mq || score < kmersize) {
 		Stat.score = 0;
 		Stat.len = 1;
@@ -606,48 +658,8 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
 		return Stat;
 	}
 	
-	/* initialize */
-	Stat.len = 0;
-	Stat.score = 0;
-	Stat.match = 0;
-	Stat.tGaps = 0;
-	Stat.qGaps = 0;
-	value = points->tStart[start] - 1;
-	Stat.pos = value;
-	i = points->qStart[start];
-	
 	/* align leading tail */
-	if(i != 0) {
-		/* get boundaries */
-		t_s = 0;
-		t_e = value;
-		q_s = 0;
-		q_e = i;
-		if((q_e << 1) < t_e || (q_e + bandwidth) < t_e) { // big leading template gap, cut down
-			//t_s = t_e - MIN(bandwidth, (q_e << 1));
-			t_s = t_e - (q_e + (q_e < bandwidth ? q_e : bandwidth));
-		} else if((t_e << 1) < q_e || (t_e + bandwidth) < q_e) { // big leading query gap, cut down
-			//q_s = q_e - MIN(bandwidth, (t_e << 1));
-			q_s = q_e - (t_e + (t_e < bandwidth ? t_e : bandwidth));
-		}
-		
-		/* align */
-		if(t_e - t_s > 0 && q_e - q_s > 0) {
-			band = 4 * abs(t_e - t_s - q_e + q_s) + bandwidth;
-			if(q_e - q_s <= band || t_e - t_s <= band) {// || abs(t_e - t_s - q_e - q_s) >= 32) {
-				NWstat = NW_score(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, matrices, t_len);
-			} else {
-				NWstat = NW_band_score(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, band, matrices, t_len);
-				//NWstat = NW_score(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, matrices, t_len);
-			}
-			Stat.pos -= (NWstat.len - NWstat.tGaps);
-			Stat.score = NWstat.score;
-			Stat.len = NWstat.len;
-			Stat.match = NWstat.match;
-			Stat.tGaps = NWstat.tGaps;
-			Stat.qGaps = NWstat.qGaps;
-		}
-	}
+	Stat = leadTailAlnPtr(0, 0, template_index->seq, qseq, points->tStart[start] - 1, t_len, points->qStart[start], bandwidth, matrices);
 	
 	/* piece seeds together */
 	stop = 1;
@@ -703,7 +715,7 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
 				return Stat;
 			}
 			if((t_l > 0 || q_e - q_s > 0)) {
-				band = 4 * abs(t_l - q_e + q_s) + bandwidth;
+				band = abs(t_l - q_e + q_s) + bandwidth;
 				if(q_e - q_s <= band || t_l <= band) {
 					NWstat = NW_score(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, matrices, t_len);
 				} else {
@@ -721,35 +733,7 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
 	}
 	
 	/* align trailing tail */
-	/* Get intervals in query and template to align */
-	q_s = points->qEnd[start];
-	t_s = points->tEnd[start] - 1;
-	q_e = q_len;
-	t_e = t_len;
-	if((t_len - t_s) > (q_len - q_s + bandwidth) || (t_len - t_s) > ((q_len - q_s) << 1)) { // big trailing template gap, cut down
-		//t_e = t_s + MIN(bandwidth, ((q_len - q_s) << 1));
-		t_e = q_len - q_s;
-		t_e = t_s + (t_e + (t_e < bandwidth ? t_e : bandwidth));
-	} else if ((q_len - q_s) > (t_len - t_s + bandwidth) || (q_len - q_s) > ((t_len - t_s) << 1)) { // big leading query gap, cut down
-		//q_e = q_s + MIN(bandwidth, ((t_len - t_s) << 1));
-		q_e = t_len - t_s;
-		q_e = q_s + (q_e + (q_e < bandwidth ? q_e : bandwidth));
-	}
-	
-	/* align trailing gap */
-	if(t_e - t_s > 0 && q_e - q_s > 0) {
-		band = 4 * abs(t_e - t_s - q_e + q_s) + bandwidth;
-		if(q_e - q_s <= band || t_e - t_s <= band) {//|| abs(t_e - t_s - q_e - q_s) >= 32) {
-			NWstat = NW_score(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, matrices, t_len);
-		} else {
-			NWstat = NW_band_score(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, band, matrices, t_len);
-		}
-		Stat.score += NWstat.score;
-		Stat.len += NWstat.len;
-		Stat.match += NWstat.match;
-		Stat.tGaps += NWstat.tGaps;
-		Stat.qGaps += NWstat.qGaps;
-	}
+	trailTailAlnPtr(0, 0, &Stat, template_index->seq, qseq, points->tEnd[start] - 1, t_len, points->qEnd[start], q_len, bandwidth, matrices);
 	points->len = 0;
 	
 	return Stat;
@@ -819,14 +803,15 @@ int anker_rc(const HashMapCCI *template_index, unsigned char *qseq, int q_len, i
 			i = q_len - q_start;
 			q_start = q_len - q_end;
 			q_end = i;
-		}
-		score_r = 0;
-		mem_count = 0;
-		if(q_start) {
+			i = q_start;
+		} else if(q_start) {
 			i = q_start;
 		} else {
 			i = preseed(template_index, qseq, q_end - q_start);
 		}
+		score_r = 0;
+		mem_count = 0;
+		
 		while(i < q_end) {
 			end = charpos(qseq, 4, i, q_len);
 			if(end == -1) {
@@ -993,6 +978,7 @@ int anker_rc_comp(const HashMapCCI *template_index, unsigned char *qseq, unsigne
 	static int one2one = 0;
 	int i, j, k, rc, end, score, score_r, value, t_len, q_len, prev;
 	int bestScore, mem_count, totMems, shifter, kmersize, bias, *Ns, *seeds;
+	unsigned cPos, iPos;
 	long unsigned key, mask, *seq;
 	
 	if(!template_index) {
@@ -1026,14 +1012,22 @@ int anker_rc_comp(const HashMapCCI *template_index, unsigned char *qseq, unsigne
 			points->len = mem_count;
 			Ns = qseq_r_comp->N;
 			Ns[*Ns] = q_len;
+			i = q_len - q_start;
+			q_start = q_len - q_end;
+			q_end = i;
+			i = q_start;
+		} else if(q_start) {
+			i = q_start;
+		} else {
+			i = preseed(template_index, qseq, q_end - q_start);
 		}
 		score_r = 0;
 		mem_count = 0;
-		i = 0;
-		while(i < q_len) {
+		
+		while(i < q_end) {
 			end = *++Ns - kmersize + 1;
 			while(i < end) {
-				key = getKmer(seq, i, shifter);
+				getKmer_macro(key, seq, i, cPos, iPos, shifter);
 				value = hashMapCCI_get(template_index, key, shifter);
 				
 				if(value == 0) {


=====================================
align.h
=====================================
@@ -22,6 +22,13 @@
 #include "hashmapcci.h"
 #include "nw.h"
 
+extern AlnScore (*leadTailAlnPtr)(Aln *, Aln *, const long unsigned*, const unsigned char*, int, int, int, const int, NWmat *);
+extern void (*trailTailAlnPtr)(Aln *, Aln *, AlnScore *, const long unsigned *, const unsigned char *, int, int, int, int, const int, NWmat *);
+
+AlnScore skipLeadAln(Aln *aligned, Aln *Frag_align, const long unsigned *tseq, const unsigned char *qseq, int t_e, int t_len, int q_e, const int bandwidth, NWmat *matrices);
+AlnScore leadTailAln(Aln *aligned, Aln *Frag_align, const long unsigned *tseq, const unsigned char *qseq, int t_e, int t_len, int q_e, const int bandwidth, NWmat *matrices);
+void skipTrailAln(Aln *aligned, Aln *Frag_align, AlnScore *Stat, const long unsigned *tseq, const unsigned char *qseq, int t_s, int t_len, int q_s, int q_len, const int bandwidth, NWmat *matrices);
+void trailTailAln(Aln *aligned, Aln *Frag_align, AlnScore *Stat, const long unsigned *tseq, const unsigned char *qseq, int t_s, int t_len, int q_s, int q_len, const int bandwidth, NWmat *matrices);
 AlnScore KMA(const HashMapCCI *template_index, const unsigned char *qseq, int q_len, int q_start, int q_end, Aln *aligned, Aln *Frag_align, int min, int max, int mq, double scoreT, AlnPoints *points, NWmat *matrices);
 AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq, int q_len, int q_start, int q_end, const CompDNA *qseq_comp, int mq, double scoreT, AlnPoints *points, NWmat *matrices);
 int preseed(const HashMapCCI *template_index, unsigned char *qseq, int q_len);


=====================================
chain.c
=====================================
@@ -25,6 +25,7 @@
 #include "stdstat.h"
 
 int (*chainSeedsPtr)(AlnPoints *, int, int, int, unsigned *) = &chainSeeds;
+void (*trimSeedsPtr)(AlnPoints *points, int start) = &trimSeeds;
 
 AlnPoints * seedPoint_init(int size, Penalties *rewards) {
 	
@@ -78,7 +79,7 @@ void seedPoint_free(AlnPoints *src) {
 int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ) {
 	
 	int i, j, nMems, weight, gap, score, bestScore, secondScore, bestPos;
-	int tStart, tEnd, qEnd, tGap, qGap, nMin, W1, U, M, MM, Ms, MMs;
+	int tStart, tEnd, qStart, qEnd, tGap, qGap, nMin, W1, U, M, MM, Ms, MMs;
 	Penalties *rewards;
 	
 	rewards = points->rewards;
@@ -87,29 +88,45 @@ int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *
 	M = rewards->M;
 	MM = rewards->MM;
 	nMems = points->len;
-	i = nMems - 1;
-	bestPos = i;
-	bestScore = points->weight[i] * M;
+	i = nMems;
+	bestPos = i - 1;
+	bestScore = 0;
 	secondScore = 0;
-	points->score[i] = bestScore;
+	points->score[i] = 0;
 	points->next[i] = 0;
-	points->weight[i] -= (kmersize - 1);
 	
 	while(i--) {
 		weight = points->weight[i] * M;
 		points->next[i] = 0;
 		tEnd = points->tEnd[i];
 		qEnd = points->qEnd[i];
-		score = MIN(t_len - tEnd, q_len - qEnd);
-		if(0 < --score) {
-			score *= U;
-			score += W1;
-		} else if(score == 0) {
-			score = W1;
+		
+		/* get stop score */
+		gap = MIN(t_len - tEnd, q_len - qEnd);
+		Ms = gap;
+		
+		/* gap */
+		if(--gap) {
+			gap *= U;
+			gap += W1;
+		} else if(gap == 0) {
+			gap = W1;
 		} else {
-			score = 0;
+			gap = 0;
 		}
-		score += weight;
+		
+		/* match */
+		if(Ms == 2) {
+			MMs = 2;
+			Ms = 0;
+		} else {
+			MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
+			MMs = MAX(2, MMs);
+			Ms = MIN(Ms - MMs, kmersize);
+			Ms = MIN(Ms, MMs);
+		}
+		Ms = Ms * M + MMs * MM;
+		score = weight + (Ms < gap ? gap : Ms);
 		
 		/* 128 is the bandwidth */
 		nMin = MIN(nMems, i + 128);
@@ -194,6 +211,35 @@ int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *
 		}
 		points->score[i] = score;
 		
+		/* penalize start */
+		tStart = points->tStart[i];
+		qStart = points->qStart[i];
+		gap = MIN(tStart, qStart);
+		Ms = gap;
+		
+		/* gap */
+		if(0 < --gap) {
+			gap *= U;
+			gap += W1;
+		} else if(gap == 0) {
+			gap = W1;
+		} else {
+			gap = 0;
+		}
+		
+		/* match */
+		if(Ms == 2) {
+			MMs = 2;
+			Ms = 0;
+		} else {
+			MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
+			MMs = MAX(2, MMs);
+			Ms = MIN(Ms - MMs, kmersize);
+			Ms = MIN(Ms, MMs);
+		}
+		Ms = Ms * M + MMs * MM;
+		score += Ms < gap ? gap : Ms;
+		
 		/* update bestScore */
 		if(bestScore <= score) {
 			if(points->next[i] != bestPos) {
@@ -201,29 +247,22 @@ int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *
 			}
 			bestScore = score;
 			bestPos = i;
+		} else if(secondScore <= score && points->next[i] != bestPos) {
+			secondScore = bestScore;
 		}
 	}
+	
 	/* calculate mapping quality */
 	*mapQ = 0 < bestScore ? (ceil(40 * (1 - 1.0 * secondScore / bestScore) * MIN(1, points->weight[bestPos] / 10.0) * log(bestScore))) : 0;
+	points->score[bestPos] = bestScore;
 	
-	/* penalize start */
-	/*
-	if(bestPos != 0) {
-		score = (MIN(points->qStart[bestPos], points->tStart[bestPos])) * pM;
-		bestScore += MAX(W1, score);
-		if(points->qStart[0] == 0 && bestScore < points->score[0]) {
-			bestScore = points->score[0];
-			bestPos = 0;
-		}
-	}
-	*/
 	return bestPos;
 }
 
 int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ) {
 	
 	int i, j, nMems, weight, gap, score, bestScore, secondScore, bestPos;
-	int tStart, tEnd, qEnd, tGap, qGap, nMin, W1, U, M, MM, Ms, MMs;
+	int tStart, tEnd, qStart, qEnd, tGap, qGap, nMin, W1, U, M, MM, Ms, MMs;
 	Penalties *rewards;
 	
 	rewards = points->rewards;
@@ -232,29 +271,45 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
 	M = rewards->M;
 	MM = rewards->MM;
 	nMems = points->len;
-	i = nMems - 1;
-	bestPos = i;
-	bestScore = points->weight[i] * M;
+	i = nMems;
+	bestPos = i - 1;
+	bestScore = 0;
 	secondScore = 0;
-	points->score[i] = bestScore;
+	points->score[i] = 0;
 	points->next[i] = 0;
-	points->weight[i] -= (kmersize - 1);
 	
 	while(i--) {
 		weight = points->weight[i] * M;
 		points->next[i] = 0;
 		tEnd = points->tEnd[i];
 		qEnd = points->qEnd[i];
-		score = MIN(t_len - tEnd, q_len - qEnd);
-		if(0 < --score) {
-			score *= U;
-			score += W1;
-		} else if(score == 0) {
-			score = W1;
+		
+		/* get stop score */
+		gap = MIN(t_len - tEnd, q_len - qEnd);
+		Ms = gap;
+		
+		/* gap */
+		if(--gap) {
+			gap *= U;
+			gap += W1;
+		} else if(gap == 0) {
+			gap = W1;
+		} else {
+			gap = 0;
+		}
+		
+		/* match */
+		if(Ms == 2) {
+			MMs = 2;
+			Ms = 0;
 		} else {
-			score = 0;
+			MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
+			MMs = MAX(2, MMs);
+			Ms = MIN(Ms - MMs, kmersize);
+			Ms = MIN(Ms, MMs);
 		}
-		score += weight;
+		Ms = Ms * M + MMs * MM;
+		score = weight + (Ms < gap ? gap : Ms);
 		
 		/* 128 is the bandwidth */
 		nMin = MIN(nMems, i + 128);
@@ -390,6 +445,35 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
 		}
 		points->score[i] = score;
 		
+		/* penalize start */
+		tStart = points->tStart[i];
+		qStart = points->qStart[i];
+		gap = MIN(tStart, qStart);
+		Ms = gap;
+		
+		/* gap */
+		if(0 < --gap) {
+			gap *= U;
+			gap += W1;
+		} else if(gap == 0) {
+			gap = W1;
+		} else {
+			gap = 0;
+		}
+		
+		/* match */
+		if(Ms == 2) {
+			MMs = 2;
+			Ms = 0;
+		} else {
+			MMs = Ms / kmersize + (Ms % kmersize ? 1 : 0);
+			MMs = MAX(2, MMs);
+			Ms = MIN(Ms - MMs, kmersize);
+			Ms = MIN(Ms, MMs);
+		}
+		Ms = Ms * M + MMs * MM;
+		score += Ms < gap ? gap : Ms;
+		
 		/* update bestScore */
 		if(bestScore <= score) {
 			if(points->next[i] != bestPos) {
@@ -397,22 +481,14 @@ int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, u
 			}
 			bestScore = score;
 			bestPos = i;
+		} else if(secondScore <= score && points->next[i] != bestPos) {
+			secondScore = bestScore;
 		}
 	}
-	/* calculate mapping quality */
-	*mapQ = ceil(40 * (1 - 1.0 * secondScore / bestScore) * MIN(1, points->weight[bestPos] / 10.0) * log(bestScore));
 	
-	/* penalize start */
-	/*
-	if(bestPos != 0) {
-		score = (MIN(points->qStart[bestPos], points->tStart[bestPos])) * pM;
-		bestScore += MAX(W1, score);
-		if(points->qStart[0] == 0 && bestScore < points->score[0]) {
-			bestScore = points->score[0];
-			bestPos = 0;
-		}
-	}
-	*/
+	/* calculate mapping quality */
+	*mapQ = 0 < bestScore ? ceil(40 * (1 - 1.0 * secondScore / bestScore) * MIN(1, points->weight[bestPos] / 10.0) * log(bestScore)) : 0;
+	points->score[bestPos] = bestScore;
 	
 	return bestPos;
 }
@@ -430,8 +506,52 @@ void trimSeeds(AlnPoints *points, int start) {
 		return;
 	}
 	
+	if(points->qStart[start]) {
+		/* iterate seeds on best chain */
+		do {
+			/* trim seed */
+			len = points->qEnd[start] - points->qStart[start];
+			if(len < ts) {
+				/* ensure at least one nucleotide remains in seed */
+				points->tStart[start] += --len;
+				points->qStart[start] += len;
+			} else {
+				points->tStart[start] += ts;
+				points->qStart[start] += ts;
+			}
+		} while((start = points->next[start]));
+	} else {
+		/* iterate seeds on best chain */
+		while((start = points->next[start])) {
+			/* trim seed */
+			len = points->qEnd[start] - points->qStart[start];
+			if(len < ts) {
+				/* ensure at least one nucleotide remains in seed */
+				points->tStart[start] += --len;
+				points->qStart[start] += len;
+			} else {
+				points->tStart[start] += ts;
+				points->qStart[start] += ts;
+			}
+		}
+	}
+}
+
+void trimSeedsNoLead(AlnPoints *points, int start) {
+	
+	/* trim the start of each seed */
+	static int ts = 0;
+	int len;
+	
+	if(!points) {
+		ts = start;
+		return;
+	} else if(!ts) {
+		return;
+	}
+	
 	/* iterate seeds on best chain */
-	do {
+	while((start = points->next[start])) {
 		/* trim seed */
 		len = points->qEnd[start] - points->qStart[start];
 		if(len < ts) {
@@ -442,5 +562,5 @@ void trimSeeds(AlnPoints *points, int start) {
 			points->tStart[start] += ts;
 			points->qStart[start] += ts;
 		}
-	} while((start = points->next[start]));
+	}
 }


=====================================
chain.h
=====================================
@@ -38,6 +38,7 @@ struct alnPoints {
 
 /* pointer to chaining method */
 extern int (*chainSeedsPtr)(AlnPoints *, int, int, int, unsigned *);
+extern void (*trimSeedsPtr)(AlnPoints *points, int start);
 
 /* FUNCTIONS */
 AlnPoints * seedPoint_init(int size, Penalties *rewards);
@@ -46,3 +47,4 @@ void seedPoint_free(AlnPoints *src);
 int chainSeeds(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ);
 int chainSeeds_circular(AlnPoints *points, int q_len, int t_len, int kmersize, unsigned *mapQ);
 void trimSeeds(AlnPoints *points, int start);
+void trimSeedsNoLead(AlnPoints *points, int start);


=====================================
kma.c
=====================================
@@ -97,89 +97,109 @@ char * strjoin(char **strings, int len) {
 	return newStr;
 }
 
-static void helpMessage(int exeStatus) {
-	FILE *helpOut;
-	if(exeStatus == 0) {
-		helpOut = stdout;
-	} else {
-		helpOut = stderr;
-	}
-	fprintf(helpOut, "# KMA-%s mapps raw reads to a template database.\n", KMA_VERSION);
-	fprintf(helpOut, "# Options are:\t\tDesc:\t\t\t\tDefault:\tRequirements:\n");
-	fprintf(helpOut, "#\n");
-	fprintf(helpOut, "#\t-o\t\tOutput file\t\t\tNone\t\tREQUIRED\n");
-	fprintf(helpOut, "#\t-t_db\t\tTemplate DB\t\t\tNone\t\tREQUIRED\n");
-	fprintf(helpOut, "#\t-i\t\tInput file name(s)\t\tSTDIN\n");
-	fprintf(helpOut, "#\t-ipe\t\tInput paired end file name(s)\n");
-	fprintf(helpOut, "#\t-int\t\tInput interleaved file name(s)\n");
-	fprintf(helpOut, "#\t-k\t\tKmersize\t\t\t%s\n", "DB defined");
-	fprintf(helpOut, "#\t-ts\t\tTrim front of seeds with ts\t%d\n", 0);
-	fprintf(helpOut, "#\t-ml\t\tMinimum alignment length\t%d\n", 16);
-	fprintf(helpOut, "#\t-p\t\tp-value\t\t\t\t0.05\n");
-	fprintf(helpOut, "#\t-ConClave\tConClave version\t\t1\n");
-	fprintf(helpOut, "#\t-mem_mode\tUse kmers to choose best\n#\t\t\ttemplate, and save memory\tFalse\n");
-	fprintf(helpOut, "#\t-proxi\t\tUse proximity scoring under\n#\t\t\ttemplate mapping\t\tFalse/1.0\n");
-	fprintf(helpOut, "#\t-ex_mode\tSearh kmers exhaustively\tFalse\n");
-	fprintf(helpOut, "#\t-ef\t\tPrint additional features\tFalse\n");
-	fprintf(helpOut, "#\t-vcf\t\tMake vcf file, 2 to apply FT\tFalse/0\n");
-	fprintf(helpOut, "#\t-sam\t\tOutput sam to stdout, 4 to \n#\t\t\tonly output mapped reads, \n#\t\t\t2096 for aligned\t\tFalse/0\n");
-	fprintf(helpOut, "#\t-nc\t\tNo consensus file\t\tFalse\n");
-	fprintf(helpOut, "#\t-na\t\tNo aln file\t\tFalse\n");
-	fprintf(helpOut, "#\t-nf\t\tNo frag file\t\t\tFalse\n");
-	fprintf(helpOut, "#\t-deCon\t\tRemove contamination\t\tFalse\n");
-	fprintf(helpOut, "#\t-dense\t\tDo not allow insertions\n#\t\t\tin assembly\t\t\tFalse\n");
-	fprintf(helpOut, "#\t-sasm\t\tSkip alignment and assembly\tFalse\n");
-	fprintf(helpOut, "#\t-ref_fsa\tConsensus sequnce will\n#\t\t\thave \"n\" instead of gaps\tFalse / 0\n");
-	fprintf(helpOut, "#\t-matrix\t\tPrint assembly matrix\t\tFalse\n");
-	fprintf(helpOut, "#\t-a\t\tPrint all best mappings\t\tFalse\n");
-	fprintf(helpOut, "#\t-mp\t\tMinimum phred score\t\t20\n");
-	fprintf(helpOut, "#\t-eq\t\tMinimum avg. quality score\t0\n");
-	fprintf(helpOut, "#\t-5p\t\tCut a constant number of\n#\t\t\tnucleotides from the 5 prime.\t0\n");
-	fprintf(helpOut, "#\t-3p\t\tCut a constant number of\n#\t\t\tnucleotides from the 3 prime.\t0\n");
-	fprintf(helpOut, "#\t-Sparse\t\tOnly count kmers\t\tFalse\n");
-	fprintf(helpOut, "#\t-Mt1\t\tMap only to \"num\" template.\t0 / False\n");
-	fprintf(helpOut, "#\t-ID\t\tMinimum ID\t\t\t1.0%%\n");
-	fprintf(helpOut, "#\t-ss\t\tSparse sorting (q,c,d)\t\tq\n");
-	fprintf(helpOut, "#\t-pm\t\tPairing method (p,u,f)\t\tu\n");
-	fprintf(helpOut, "#\t-fpm\t\tFine Pairing method (p,u,f)\tu\n");
-	fprintf(helpOut, "#\t-apm\t\tSets both pm and fpm\t\tu\n");
-	fprintf(helpOut, "#\t-shm\t\tUse shared DB made by kma_shm\t0 (lvl)\n");
-	fprintf(helpOut, "#\t-mmap\t\tMemory map *.comp.by\n");
-	fprintf(helpOut, "#\t-tmp\t\tSet directory for temporary files.\n");
-	fprintf(helpOut, "#\t-1t1\t\tForce end to end mapping\tFalse\n");
-	fprintf(helpOut, "#\t-hmm\t\tUse a HMM to assign template(s)\n#\t\t\tto query sequences\t\tTrue\n");
-	fprintf(helpOut, "#\t-ck\t\tCount kmers instead of\n#\t\t\tpseudo alignment\t\tFalse\n");
-	fprintf(helpOut, "#\t-ca\t\tMake circular alignments\tFalse\n");
-	fprintf(helpOut, "#\t-boot\t\tBootstrap sequence\t\tFalse\n");
-	fprintf(helpOut, "#\t-bc\t\tBase calls should be\n#\t\t\tsignificantly overrepresented.\t[True]\n");
-	fprintf(helpOut, "#\t-bc90\t\tBase calls should be both\n#\t\t\tsignificantly overrepresented,\n#\t\t\tand have 90%% agreement.\t\tFalse\n");
-	fprintf(helpOut, "#\t-bcNano\t\tCall bases at suspicious\n#\t\t\tdeletions, made for nanopore.\tFalse\n");
-	fprintf(helpOut, "#\t-bcd\t\tMinimum depth at base\t\t1\n");
-	fprintf(helpOut, "#\t-bcg\t\tMaintain insignificant gaps\n");
-	fprintf(helpOut, "#\t-and\t\tBoth mrs and p_value thresholds\n#\t\t\thas to reached to in order to\n#\t\t\treport a template hit.\t\tor\n");
-	fprintf(helpOut, "#\t-oa\t\tOutput all, disregard mrs and p-value\n#\t\t\twhen reporting a template hit\tFalse\n");
-	fprintf(helpOut, "#\t-mq\t\tMinimum mapping quality\t\t0\n");
-	fprintf(helpOut, "#\t-mrs\t\tMinimum alignment score,\n#\t\t\tnormalized to alignment length\t0.50\n");
-	fprintf(helpOut, "#\t-mrc\t\tMinimum read coverage\t\t0.10\n");
-	fprintf(helpOut, "#\t-mct\t\tMax overlap between templates\t0.10\n");
-	fprintf(helpOut, "#\t-reward\t\tScore for match\t\t\t1\n");
-	fprintf(helpOut, "#\t-penalty\tPenalty for mismatch\t\t-2\n");
-	fprintf(helpOut, "#\t-gapopen\tPenalty for gap opening\t\t-3\n");
-	fprintf(helpOut, "#\t-gapextend\tPenalty for gap extension\t-1\n");
-	fprintf(helpOut, "#\t-per\t\tReward for pairing reads\t7\n");
-	fprintf(helpOut, "#\t-localopen\tPenalty for openning a local chain\t-6\n");
-	fprintf(helpOut, "#\t-Npenalty\tPenalty matching N\t\t0\n");
-	fprintf(helpOut, "#\t-transition\tPenalty for transition\t\t-2\n");
-	fprintf(helpOut, "#\t-transversion\tPenalty for transversion\t-2\n");
-	fprintf(helpOut, "#\t-cge\t\tSet CGE penalties and rewards\tFalse\n");
-	fprintf(helpOut, "#\t-t\t\tNumber of threads\t\t1\n");
-	fprintf(helpOut, "#\t-status\t\tExtra status\n");
-	fprintf(helpOut, "#\t-verbose\tExtra verbose\n");
-	fprintf(helpOut, "#\t-c\t\tCitation\n");
-	fprintf(helpOut, "#\t-v\t\tVersion\n");
-	fprintf(helpOut, "#\t-h\t\tShows this help message\n");
-	fprintf(helpOut, "#\n");
-	exit(exeStatus);
+static void helpMessage(int exitStatus) {
+	
+	FILE *out = exitStatus ? stderr : stdout;
+	
+	fprintf(out, "# KMA-%s maps and/or aligns raw reads to a template database.\n", KMA_VERSION);
+	
+	fprintf(out, "# %16s\t%-32s\t%s\n", "Options:", "Desc:", "Default:");
+	
+	fprintf(out, "#\n# Input:\n");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-i", "Single end input(s)", "stdin");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ipe", "Paired end input(s)", "");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-int", "Interleaved input(s)", "");
+	
+	fprintf(out, "#\n# Output:\n");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-o", "Output prefix", "");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ef", "Output additional features", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-vcf", "Output vcf file, 2 to apply FT", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-sam", "Output sam, 4/2096 for mapped/aligned", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-nc", "No consensus file", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-nc", "No aln file", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-nf", "No frag file", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-matrix", "Output assembly matrix", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-a", "Output all template mappings", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-and", "Use both mrs and p-value on consensus", "or");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-oa", "Use neither mrs or p-value on consensus", "False");
+	
+	fprintf(out, "#\n# Consensus:\n");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-bc", "Minimum support to call bases", "0");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-bcNano", "Altered indel calling for ONT data", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-bcd", "Minimum depth to cal bases", "1");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-bcg", "Maintain insignificant gaps", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ID", "Minimum consensus ID", "1.0%");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-dense", "Skip insertion in consensus", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ref_fsa", "Use n's on indels", "False");
+	
+	fprintf(out, "#\n# General:\n");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-t_db", "Template DB", "");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-p", "P-value", "0.05");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-shm", "Use DB in shared memory", "0");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-mmap", "Memory map *.comp.b", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-tmp", "Set directory for temporary files", "");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-t", "Number of threads", "1");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-status", "Extra status", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-verbose", "Extra verbose", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-c", "Citation", "");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-v", "Version", "");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-h", "Shows this help message", "");
+	
+	fprintf(out, "#\n# Template mapping:\n");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ConClave", "ConClave version", "1");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-mem_mode", "Base ConClave on template mappings", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-proxi", "Proximity scoring (negative for soft)", "False/1.0");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ex_mode", "Searh kmers exhaustively", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-deCon", "Remove contamination", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-Sparse", "Only count kmers", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ss", "Sparse sorting (q,c,d)", "q");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-Mt1", "Map everything to one template", "False/0");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-pm", "Pairing method (p,u,f)", "u");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-1t1", "One query to one template", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-hmm", "Use a HMM to assign template(s)", "True");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ck", "Count k-mers over pseudo alignment", "False");
+	
+	fprintf(out, "#\n# Chaining:\n");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-k", "K-mersize", "DB defined");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ts", "Trim front of seeds", "0");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ssa", "Seeds soround alignments", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ex_mode", "Searh kmers exhaustively", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-fpm", "Pairing method (p,u,f)", "u");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-mq", "Minimum mapping quality", "0");
+	
+	fprintf(out, "#\n# Alignment:\n");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ca", "Circular alignments", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-mrs", "Minimum relative alignment score", "0.5");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-mrc", "Minimum query coverage", "0.0");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ml", "Minimum alignment length", "16");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-reward", "Score for match", "1");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-penalty", "Penalty for mismatch", "2");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-gapopen", "Penalty for gap opening", "3");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-gapextend", "Penalty for gap extension", "1");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-per", "Reward for pairing reads", "7");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-Npenalty", "Penalty matching N", "0");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-transition", "Penalty for transition", "2");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-transversion", "Penalty for transversion", "2");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-sasm", "Skip alignment", "False");
+	
+	fprintf(out, "#\n# Trimming:\n");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-mp", "Minimum phred score", "20");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-eq", "Minimum avg. quality score", "0");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-5p", "Trim 5 prime", "0");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-3p", "Trim 3 prime", "0");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-ml", "Minimum length", "16");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-xl", "Maximum length on se", "2147483647");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-boot", "Bootstrap sub-sequence", "False");
+	
+	fprintf(out, "#\n# Presets:\n");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-apm", "Sets both pm and fpm", "u");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-cge", "Set CGE penalties and rewards", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-mint2", "Set 2ng gen Mintyper preset", "False");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-mint3", "Set 3rd gen Mintyper preset", "False");
+	
+	fprintf(out, "#\n");
+	
+	exit(exitStatus);
 }
 
 int kma_main(int argc, char *argv[]) {
@@ -205,7 +225,7 @@ int kma_main(int argc, char *argv[]) {
 	static int fileCounter, fileCounter_PE, fileCounter_INT, Ts, Tv, minlen;
 	static int extendedFeatures, spltDB, thread_num, kmersize, targetNum, mq;
 	static int ref_fsa, print_matrix, print_all, sam, vcf, Mt1, bcd, one2one;
-	static int sparse_run, ts, **d, status = 0;
+	static int sparse_run, ts, maxlen, **d, status = 0;
 	static unsigned xml, nc, nf, shm, exhaustive, verbose;
 	static char *outputfilename, *templatefilename, **templatefilenames;
 	static char **inputfiles, **inputfiles_PE, **inputfiles_INT, ss;
@@ -255,6 +275,7 @@ int kma_main(int argc, char *argv[]) {
 		kmersize = 0;
 		ts = 0;
 		minlen = 16;
+		maxlen = 2147483647;
 		evalue = 0.05;
 		support = 0.0;
 		minFrac = 1.0;
@@ -516,6 +537,10 @@ int kma_main(int argc, char *argv[]) {
 						exit(1);
 					}
 				}
+			} else if(strcmp(argv[args], "-ssa") == 0) {
+				trimSeedsPtr = &trimSeedsNoLead;
+				leadTailAlnPtr = &skipLeadAln;
+				trailTailAlnPtr = &skipTrailAln;
 			} else if(strcmp(argv[args], "-ml") == 0) {
 				++args;
 				if(args < argc) {
@@ -525,6 +550,15 @@ int kma_main(int argc, char *argv[]) {
 						exit(1);
 					}
 				}
+			} else if(strcmp(argv[args], "-xl") == 0) {
+				++args;
+				if(args < argc) {
+					maxlen = strtoul(argv[args], &exeBasic, 10);
+					if(*exeBasic != 0) {
+						fprintf(stderr, "# Invalid minimum length parsed\n");
+						exit(1);
+					}
+				}
 			} else if(strcmp(argv[args], "-mp") == 0) {
 				++args;
 				if(args < argc) {
@@ -984,7 +1018,7 @@ int kma_main(int argc, char *argv[]) {
 			++args;
 		}
 		preseed(0, 0, exhaustive);
-		trimSeeds(0, ts);
+		trimSeedsPtr(0, ts);
 		
 		if(sam && kmaPipe != &kmaPipeThread) {
 			fprintf(stderr, "\"-sam\" and \"-status\" cannot coincide.\n");
@@ -1272,7 +1306,7 @@ int kma_main(int argc, char *argv[]) {
 			
 			/* SE */
 			if(fileCounter > 0) {
-				totFrags += run_input(inputfiles, fileCounter, minPhred, minQ, fiveClip, threeClip, minlen, to2Bit, prob, ioStream);
+				totFrags += run_input(inputfiles, fileCounter, minPhred, minQ, fiveClip, threeClip, minlen, maxlen, to2Bit, prob, ioStream);
 			}
 			
 			/* PE */


=====================================
nw.c
=====================================
@@ -23,9 +23,9 @@
 #include "pherror.h"
 #include "stdnuc.h"
 
-AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, NWmat *matrices) {
+AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, NWmat *matrices, int template_length) {
 	
-	int m, n, t_len, q_len, thisScore, nuc_pos, template_length, W1, U, MM;
+	int m, n, t_len, q_len, thisScore, nuc_pos, W1, U, MM;
 	int pos[2], *D_ptr, *D_prev, Q, Q_prev, *P_ptr, *P_prev, *tmp, **d;
 	unsigned char *query, t_nuc, *E, *E_ptr, e;
 	AlnScore Stat;
@@ -40,9 +40,8 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
 	t_len = t_e - t_s;
 	aligned->start = 0;
 	aligned->end = 0;
-	template_length = aligned->pos;
 	if(t_len < 0) {
-		t_len += aligned->pos;
+		t_len += template_length;
 	}
 	query = (unsigned char*)(queryOrg + q_s);
 	Stat.pos = 0;
@@ -308,9 +307,9 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
 	return Stat;
 }
 
-AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, int band, NWmat *matrices) {
+AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, int band, NWmat *matrices, int template_length) {
 	
-	int m, n, t_len, q_len, thisScore, nuc_pos, template_length, pos[2];
+	int m, n, t_len, q_len, thisScore, nuc_pos, pos[2];
 	int bq_len, halfBand, sn, en, sq, eq, q_pos, c_pos, W1, U, MM;
 	int *D_ptr, *D_prev, Q, Q_prev, *P_ptr, *P_prev, *tmp, **d;
 	unsigned char *query, t_nuc, *E, *E_ptr, e;
@@ -326,7 +325,6 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
 	t_len = t_e - t_s;
 	aligned->start = 0;
 	aligned->end = 0;
-	template_length = aligned->pos;
 	if(t_len < 0) {
 		t_len += template_length;
 	}


=====================================
nw.h
=====================================
@@ -56,7 +56,7 @@ struct aln {
 #define NWLOAD 1
 #endif
 
-AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, NWmat *matrices);
-AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, int band, NWmat *matrices);
+AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, NWmat *matrices, int template_length);
+AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, Aln *aligned, int band, NWmat *matrices, int template_length);
 AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, NWmat *matrices, int template_length);
 AlnScore NW_band_score(const long unsigned *template, const unsigned char *queryOrg, int k, int t_s, int t_e, int q_s, int q_e, int band, NWmat *matrices, int template_length);


=====================================
runinput.c
=====================================
@@ -29,7 +29,7 @@
 void (*printFsa_ptr)(Qseqs*, Qseqs*, CompDNA*, FILE*) = &printFsa;
 void (*printFsa_pair_ptr)(Qseqs*, Qseqs*, Qseqs*, Qseqs*, CompDNA*, FILE*) = &printFsa_pair;
 
-long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out) {
+long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, int maxlen, char *trans, const double *prob, FILE *out) {
 	
 	int fileCounter, phredScale, phredCut, start, end;
 	unsigned FASTQ;
@@ -65,55 +65,59 @@ long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ
 			
 			/* parse reads */
 			while(FileBuffgetFq(inputfile, header, qseq, qual, trans)) {
-				/* trim */
-				seq = qual->seq;
-				start = fiveClip;
-				end = qseq->len - 1 - threeClip;
-				end = end < 0 ? 0 : end;
-				while(end >= 0 && seq[end] < phredCut) {
-					--end;
-				}
-				++end;
-				while(start < end && seq[start] < phredCut) {
-					++start;
-				}
-				/*
-				for(i = start; i < end; ++i) {
-					if(seq[i] < phredCut) {
-						seq[i] = 4;
+				if(qseq->len <= maxlen) {
+					/* trim */
+					seq = qual->seq;
+					start = fiveClip;
+					end = qseq->len - 1 - threeClip;
+					end = end < 0 ? 0 : end;
+					while(end >= 0 && seq[end] < phredCut) {
+						--end;
+					}
+					++end;
+					while(start < end && seq[start] < phredCut) {
+						++start;
+					}
+					/*
+					for(i = start; i < end; ++i) {
+						if(seq[i] < phredCut) {
+							seq[i] = 4;
+						}
+					}
+					*/
+					qseq->len = end - start;
+					/* print */
+					if(qseq->len > minlen && minQ <= eQual(seq + start, qseq->len, minQ, prob - phredScale)) {
+						/* dump seq */
+						qseq->seq += start;
+						printFsa_ptr(header, qseq, compressor, out);
+						qseq->seq -= start;
+						++count;
 					}
-				}
-				*/
-				qseq->len = end - start;
-				/* print */
-				if(qseq->len > minlen && minQ <= eQual(seq + start, qseq->len, minQ, prob - phredScale)) {
-					/* dump seq */
-					qseq->seq += start;
-					printFsa_ptr(header, qseq, compressor, out);
-					qseq->seq -= start;
-					++count;
 				}
 			}
 		} else if(FASTQ & 2) {
 			while(FileBuffgetFsa(inputfile, header, qseq, trans)) {
-				/* remove leading and trailing N's */
-				start = 0;
-				end = qseq->len - 1;
-				seq = qseq->seq;
-				while(end >= 0 && seq[end] == 4) {
-					--end;
-				}
-				++end;
-				while(start < end && seq[start] == 4) {
-					++start;
-				}
-				qseq->len = end - start;
-				if(qseq->len > minlen) {
-					/* dump seq */
-					qseq->seq += start;
-					printFsa_ptr(header, qseq, compressor, out);
-					qseq->seq -= start;
-					++count;
+				if(qseq->len <= maxlen) {
+					/* remove leading and trailing N's */
+					start = 0;
+					end = qseq->len - 1;
+					seq = qseq->seq;
+					while(end >= 0 && seq[end] == 4) {
+						--end;
+					}
+					++end;
+					while(start < end && seq[start] == 4) {
+						++start;
+					}
+					qseq->len = end - start;
+					if(qseq->len > minlen) {
+						/* dump seq */
+						qseq->seq += start;
+						printFsa_ptr(header, qseq, compressor, out);
+						qseq->seq -= start;
+						++count;
+					}
 				}
 			}
 		}


=====================================
runinput.h
=====================================
@@ -23,7 +23,7 @@
 /* pointers determining how to deliver the input */
 extern void (*printFsa_ptr)(Qseqs*, Qseqs*, CompDNA*, FILE*);
 extern void (*printFsa_pair_ptr)(Qseqs*, Qseqs*, Qseqs*, Qseqs*, CompDNA*, FILE*);
-long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out);
+long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, int maxlen, char *trans, const double *prob, FILE *out);
 long unsigned run_input_PE(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out);
 long unsigned run_input_INT(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out);
 void bootFsa(Qseqs *header, Qseqs *qseq, CompDNA *compressor, FILE *out);


=====================================
stdnuc.h
=====================================
@@ -21,6 +21,10 @@
 #define setEx(src, pos)(src[pos >> 3] |= (1 << (pos & 7)))
 #define unsetEx(src, pos)(src[pos >> 3] ^= (1 << (pos & 7)))
 #define getEx(src, pos)((src[pos >> 3] >> (pos & 7)) & 1)
+#define getKmer_macro(kmer, Comp, pos, cPos, iPos, shifter) \
+		iPos = (pos & 31) << 1;\
+		cPos = pos >> 5;\
+		kmer = (iPos <= shifter) ? ((Comp[cPos] << iPos) >> shifter) : (((Comp[cPos] << iPos) | (Comp[cPos + 1] >> (64-iPos))) >> shifter);
 
 long unsigned getKmer(long unsigned *compressor, unsigned cPos, const unsigned shifter);
 long unsigned makeKmer(const unsigned char *qseq, unsigned pos, unsigned size);


=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
  * limitations under the License.
 */
 
-#define KMA_VERSION "1.3.15"
+#define KMA_VERSION "1.3.17"



View it on GitLab: https://salsa.debian.org/med-team/kma/-/commit/d0cca96d8591568669d8b395a1da9b7aa5d7c2f4

-- 
View it on GitLab: https://salsa.debian.org/med-team/kma/-/commit/d0cca96d8591568669d8b395a1da9b7aa5d7c2f4
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210523/fdaf79f7/attachment-0001.htm>


More information about the debian-med-commit mailing list