[med-svn] [Git][med-team/kma][upstream] New upstream version 1.3.22

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Thu Jun 24 19:37:47 BST 2021



Nilesh Patra pushed to branch upstream at Debian Med / kma


Commits:
fbe9dfdc by Nilesh Patra at 2021-06-24T23:58:21+05:30
New upstream version 1.3.22
- - - - -


11 changed files:

- KMAspecification.pdf
- align.c
- assembly.c
- kma.c
- runkma.c
- savekmers.c
- spltdb.c
- updateindex.c
- version.h
- xml.c
- xml.h


Changes:

=====================================
KMAspecification.pdf
=====================================
Binary files a/KMAspecification.pdf and b/KMAspecification.pdf differ


=====================================
align.c
=====================================
@@ -634,7 +634,6 @@ AlnScore KMA_score(const HashMapCCI *template_index, const unsigned char *qseq,
 		}
 	}
 	mapQ = 0;
-	
 	if(mem_count) {
 		points->len = mem_count;
 	} else {
@@ -807,7 +806,7 @@ int anker_rc(const HashMapCCI *template_index, unsigned char *qseq, int q_len, i
 			i = q_len - q_start;
 			q_start = q_len - q_end;
 			q_end = i;
-			i = q_start;
+			i = q_start ? q_start : preseed(template_index, qseq, q_end - q_start);
 		} else if(q_start) {
 			i = q_start;
 		} else {
@@ -957,7 +956,7 @@ int anker_rc(const HashMapCCI *template_index, unsigned char *qseq, int q_len, i
 		}
 	}
 	
-	if(one2one && bestScore * kmersize < (q_len - kmersize - bestScore)) {
+	if(one2one && bestScore < kmersize && bestScore * kmersize < (q_len - kmersize - bestScore)) {
 		bestScore = 0;
 		points->len = 0;
 	} else if(bestScore == score) {
@@ -1141,7 +1140,7 @@ int anker_rc_comp(const HashMapCCI *template_index, unsigned char *qseq, unsigne
 		}
 	}
 	
-	if(one2one && bestScore * kmersize < (q_len - kmersize - bestScore)) {
+	if(one2one && bestScore < kmersize && bestScore * kmersize < (q_len - kmersize - bestScore)) {
 		bestScore = 0;
 		points->len = 0;
 	} else if(bestScore == score) {


=====================================
assembly.c
=====================================
@@ -1055,7 +1055,7 @@ void * assemble_KMA_dense_threaded(void *arg) {
 							}
 							
 							if(xml_out) {
-								hitXML(xml_out, template, template_name, aligned, &alnStat, NWmatrices->rewards, stats[4]);
+								hitXML(xml_out, template, header->seq, aligned, &alnStat, NWmatrices->rewards, stats[4]);
 							}
 						} else if(sam && !(sam & 2096)) {
 							stats[1] = read_score;
@@ -1717,6 +1717,7 @@ void * assemble_KMA(void *arg) {
 		t_len = thread->t_len;
 		load = (template_index->len != 0) ? 0 : 1;
 		matrix->len = 0;
+		seq_in = thread->seq_in;
 		
 		/* start threads */
 		aligned_assem->score = 0;
@@ -1931,7 +1932,7 @@ void * assemble_KMA(void *arg) {
 							}
 							
 							if(xml_out) {
-								hitXML(xml_out, template, template_name, aligned, &alnStat, NWmatrices->rewards, stats[4]);
+								hitXML(xml_out, template, header->seq, aligned, &alnStat, NWmatrices->rewards, stats[4]);
 							}
 						} else if(sam && !(sam & 2096)) {
 							stats[1] = read_score;


=====================================
kma.c
=====================================
@@ -1301,7 +1301,6 @@ int kma_main(int argc, char *argv[]) {
 			} else {
 				myTemplatefilename = 0;
 			}
-			kmersize = 16;
 			totFrags = 0;
 			
 			/* SE */


=====================================
runkma.c
=====================================
@@ -206,6 +206,9 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
 		ERROR();
 	}
 	alignLoadPtr = &alignLoad_fly;
+	if(!kmersize) {
+		kmersize = *template_lengths;
+	}
 	if(kmersize < 4 || 32 < kmersize) {
 		kmersize = 16;
 	}
@@ -1399,6 +1402,9 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
 	
 	/* load databases */
 	alignLoadPtr = &alignLoad_fly_mem;
+	if(!kmersize) {
+		kmersize = *template_lengths;
+	}
 	if(kmersize < 4 || 32 < kmersize) {
 		kmersize = 16;
 	}


=====================================
savekmers.c
=====================================
@@ -408,12 +408,26 @@ static int clearScore(int *bestTemplates, int *Score) {
 	return 0;
 }
 
+int testVal(short unsigned *values_s, int template) {
+	
+	int i;
+	
+	i = *values_s;
+	while(--i) {
+		if(*++values_s == template) {
+			return 1;
+		}
+	}
+	
+	return 0;
+}
+
 int get_kmers_for_pair(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, int *extendScore, const int exhaustive) {
 	
 	/* save_kmers find ankering k-mers the in query sequence,
 	   and is the time determining step */
 	int i, j, l, rc, end, HIT, gaps, score, Ms, MMs, Us, W1s, template, SU;
-	int hitCounter, bestSeqCount, kmersize, shifter, W1, U, M, MM;
+	int hitCounter, bestSeqCount, kmersize, shifter, W1, U, M, MM, m, mm;
 	int *bests, *Scores;
 	unsigned *values, *last, n;
 	short unsigned *values_s;
@@ -478,157 +492,132 @@ int get_kmers_for_pair(const HashMapKMA *templates, const Penalties *rewards, in
 				for(;j < end; ++j) {
 					if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
 						if(values == last) {
-							if(kmersize < gaps) {
+							/*
+							gaps == 0 -> Match
+							gaps == kmersize -> 1 MM
+							kmersize < gaps -> several mismatches or indel(s)
+							gaps < kmersize -> deletion
+							*/
+							if(gaps == 0) {
+								/* match */
+								++Ms;
+							} else if(gaps == kmersize) {
+								/* snp */
 								Ms += kmersize;
-								gaps -= kmersize;
-								if(gaps) {
-									/* go for best scenario */
-									if(gaps == 1) {
-										MMs += 2;
-									} else {
-										gaps -= 2;
-										if((MM << 1) + gaps * M < 0) {
-											Ms += gaps;
-											MMs += 2;
-										}
-									}
+								++MMs;
+							} else if(kmersize < gaps) {
+								/* mismatch or insersion */
+								Ms += kmersize;
+								gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+								if(gaps <= 2) {
+									mm = gaps;
+									m = 0;
 								} else {
-									++MMs;
+									mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+									mm = MAX(2, mm);
+									m = MIN(gaps - mm, kmersize);
+									m = MIN(m, mm);
 								}
-							} else if (gaps) {
-								--gaps;
-								++W1s;
-								Us += gaps;
-							} else {
-								++Ms;
-							}
+								
+								/* evaluate best option */
+								if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+									MMs += mm;
+									Ms += m;
+								} else {
+									++W1s;
+									Us += (gaps -1);
+								}
+							} /*else {
+								// unlikely deletion or random k-mer mismatch, 
+								// assume better and go random zero score
+							}*/
+							
 							HIT = j;
 							gaps = 0;
 						} else {
 							if(last) {
-								if(HIT) {
-									HIT += kmersize;
-								} else {
-									HIT = j + kmersize;
-								}
 								score = Ms * M + MMs * MM + Us * U + W1s * W1;
-								if(SU) {
-									values_s = (short unsigned *) last;
-									l = (*values_s) + 1;
-									while(--l) {
-										Scores[(template = values_s[l])] += score;
-										extendScore[template] = HIT;
-									}
-									
-									score = kmersize * M;
-									MMs = MM << 1;
-									values_s = (short unsigned *) values;
-									n = *values_s;
-									for(l = 1; l <= n; ++l) {
-										if(j < extendScore[(template = values_s[l])]) {
-											if(extendScore[template] == HIT) {
-												Scores[template] += M;
-											} else {
-												gaps = extendScore[template] - j - 1;
-												Scores[template] += (W1 + gaps * U);
-											}
-										} else if(Scores[template] != 0) {
-											Scores[template] += score;
-											if((gaps = extendScore[template] - j)) {
-												if(gaps == 1) {
-													Scores[template] += MMs;
-												} else {
-													gaps -= 2;
-													if((Ms = MMs + gaps * M) < 0) {
-														Scores[template] += Ms;
-													}
-												}
-											} else {
-												Scores[template] += MM;
-											}
-										} else {
-											Scores[template] = score;
-											if(include[template] == 0) {
-												include[template] = 1;
-												bests[++*bests] = template;
-											}
-										}
-									}
-								} else {
-									l = (*last) + 1;
-									while(--l) {
-										Scores[(template = last[l])] += score;
-										extendScore[template] = HIT;
-									}
-									
-									score = kmersize * M;
-									MMs = MM << 1;
-									n = *values;
-									for(l = 1; l <= n; ++l) {
-										if(j < extendScore[(template = values[l])]) {
-											if(extendScore[template] == HIT) {
-												Scores[template] += M;
+								values_s = (short unsigned *) last;
+								l = SU ? (*values_s + 1) : (*last + 1);
+								while(--l) {
+									template = SU ? *++values_s : *++last;
+									Scores[template] += score;
+									extendScore[template] = HIT;
+								}
+								HIT = j - 1;
+								last = values;
+								score = kmersize * M;
+								values_s = (short unsigned *) values;
+								n = SU ? *values_s : *values;
+								l = n + 1;
+								while(--l) {
+									template = SU ? *++values_s : *++values;
+									if(Scores[template] != 0) {
+										gaps = HIT - extendScore[template];
+										if(gaps == 0) {
+											/* match */
+											Scores[template] += M;
+										} else if(gaps == kmersize) {
+											/* snp */
+											Scores[template] += score + MM;
+										} else if(kmersize < gaps) {
+											/* mismatch or insersion */
+											gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+											if(gaps <= 2) {
+												mm = gaps;
+												m = 0;
 											} else {
-												gaps = extendScore[template] - j - 1;
-												Scores[template] += (W1 + gaps * U);
+												mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+												mm = MAX(2, mm);
+												m = MIN(gaps - mm, kmersize);
+												m = MIN(m, mm);
 											}
-										} else if(Scores[template] != 0) {
-											Scores[template] += score;
-											if((gaps = extendScore[template] - j)) {
-												if(gaps == 1) {
-													Scores[template] += MMs;
-												} else {
-													gaps -= 2;
-													if((Ms = MMs + gaps * M) < 0) {
-														Scores[template] += Ms;
-													}
-												}
+											
+											/* evaluate best option */
+											if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+												Scores[template] += score + (mm * MM + m * M);
 											} else {
-												Scores[template] += MM;
-											}
-										} else {
-											Scores[template] = score;
-											if(include[template] == 0) {
-												include[template] = 1;
-												bests[++*bests] = template;
+												Scores[template] += score + (W1 + (gaps - 1) * U);
 											}
+										} /*else {
+											// unlikely deletion or random k-mer mismatch, 
+											// assume better and go random zero score
+										}*/
+									} else {
+										Scores[template] = score;
+										if(include[template] == 0) {
+											include[template] = 1;
+											bests[++*bests] = template;
 										}
 									}
 								}
-							} else if(SU) {
-								values_s = (short unsigned *) values;
-								n = *values_s;
-								Ms = kmersize * M;
-								for(l = 1; l <= n; ++l) {
-									Scores[(template = values_s[l])] = Ms;
-									include[template] = 1;
-									bests[l] = template;
-								}
-								*bests = n;
 							} else {
-								n = *values;
+								last = values;
+								values_s = (short unsigned *) values;
+								n = SU ? *values_s : *values;
 								Ms = kmersize * M;
 								for(l = 1; l <= n; ++l) {
-									Scores[(template = values[l])] = Ms;
+									template = SU ? *++values_s : *++values;
+									Scores[template] = Ms;
 									include[template] = 1;
 									bests[l] = template;
 								}
 								*bests = n;
 							}
 							
-							HIT = 0;
+							HIT = j;
 							gaps = 0;
 							Ms = 0;
 							MMs = 0;
 							Us = 0;
 							W1s = 0;
-							last = values;
 						}
 						++hitCounter;
 					} else {
 						++gaps;
 					}
 				}
+				gaps += (qseq->N[i] + 1 - j); /* gap over N's */
 				j = qseq->N[i] + 1;
 			}
 			
@@ -638,12 +627,12 @@ int get_kmers_for_pair(const HashMapKMA *templates, const Penalties *rewards, in
 					values_s = (short unsigned *) last;
 					l = (*values_s) + 1;
 					while(--l) {
-						Scores[values_s[l]] += score;
+						Scores[*++values_s] += score;
 					}
 				} else {
 					l = (*last) + 1;
 					while(--l) {
-						Scores[last[l]] += score;
+						Scores[*++last] += score;
 					}
 				}
 				for(l = *bests; l != 0; --l) {
@@ -940,7 +929,7 @@ int get_kmers_for_pair_Sparse(const HashMapKMA *templates, const Penalties *rewa
 int get_kmers_for_pair_pseoudoSparse(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, int *extendScore, const int exhaustive) {
 	
 	int i, j, l, n, end, template, hitCounter, gaps, Ms, MMs, Us, W1s;
-	int W1, U, M, MM, HIT, SU, kmersize, score, *bests, *Scores;
+	int W1, U, M, MM, HIT, SU, kmersize, score, m, mm, *bests, *Scores;
 	unsigned shifter, *values, *last;
 	short unsigned *values_s;
 	char *include;
@@ -1001,159 +990,135 @@ int get_kmers_for_pair_pseoudoSparse(const HashMapKMA *templates, const Penaltie
 			for(;j < end; ++j) {
 				if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
 					if(values == last) {
-						if(kmersize < gaps) {
+						/*
+						gaps == 0 -> Match
+						gaps == kmersize -> 1 MM
+						kmersize < gaps -> several mismatches or indel(s)
+						gaps < kmersize -> deletion
+						*/
+						if(gaps == 0) {
+							/* match */
+							++Ms;
+						} else if(gaps == kmersize) {
+							/* snp */
 							Ms += kmersize;
-							gaps -= kmersize;
-							if(gaps) {
-								/* go for best scenario */
-								if(gaps == 1) {
-									MMs += 2;
-								} else {
-									gaps -= 2;
-									if((MM << 1) + gaps * M < 0) {
-										Ms += gaps;
-										MMs += 2;
-									}
-								}
+							++MMs;
+						} else if(kmersize < gaps) {
+							/* mismatch or insersion */
+							Ms += kmersize;
+							gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+							if(gaps <= 2) {
+								mm = gaps;
+								m = 0;
 							} else {
-								++MMs;
+								mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+								mm = MAX(2, mm);
+								m = MIN(gaps - mm, kmersize);
+								m = MIN(m, mm);
 							}
-						} else if (gaps) {
-							--gaps;
-							++W1s;
-							Us += gaps;
-						} else {
-							++Ms;
-						}
+							
+							/* evaluate best option */
+							if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+								MMs += mm;
+								Ms += m;
+							} else {
+								++W1s;
+								Us += (gaps -1);
+							}
+						} /*else {
+							// unlikely deletion or random k-mer mismatch, 
+							// assume better and go random zero score
+						}*/
+						
 						HIT = j;
 						gaps = 0;
 					} else {
 						if(last) {
-							if(HIT) {
-								HIT += kmersize;
-							} else {
-								HIT = j + kmersize;
-							}
 							score = Ms * M + MMs * MM + Us * U + W1s * W1;
-							if(SU) {
-								values_s = (short unsigned *) last;
-								l = *values_s + 1;
-								while(--l) {
-									Scores[*++values_s] += score;
-									extendScore[*values_s] = HIT;
-								}
-							} else {
-								l = *last + 1;
-								while(--l) {
-									Scores[*++last] += score;
-									extendScore[*last] = HIT;
-								}
+							values_s = (short unsigned *) last;
+							l = SU ? (*values_s + 1) : (*last + 1);
+							while(--l) {
+								template = SU ? *++values_s : *++last;
+								Scores[template] += score;
+								extendScore[template] = HIT;
 							}
+							HIT = j - 1;
+							last = values;
 							score = kmersize * M;
-							MMs = MM << 1;
-							if(SU) {
-								values_s = (short unsigned *) values;
-								n = *values_s + 1;
-								while(--n) {
-									template = *++values_s;
-									if(j < extendScore[template]) {
-										if(extendScore[template] == HIT) {
-											Scores[template] += M;
-										} else {
-											gaps = extendScore[template] - j - 1;
-											Scores[template] += (W1 + gaps * U);
-										}
-									} else if(Scores[template] != 0) {
-										Scores[template] += score;
-										if((gaps = extendScore[template] - j)) {
-											if(gaps == 1) {
-												Scores[template] += MMs;
-											} else {
-												gaps -= 2;
-												if((Ms = MMs + gaps * M) < 0) {
-													Scores[template] += Ms;
-												}
-											}
-										} else {
-											Scores[template] += MM;
-										}
-									} else {
-										Scores[template] = score;
-										if(include[template] == 0) {
-											include[template] = 1;
-											bests[++*bests] = template;
-										}
-									}
-								}
-							} else {
-								n = *(last = values) + 1;
-								while(--n) {
-									template = *++values;
-									if(j < extendScore[template]) {
-										if(extendScore[template] == HIT) {
-											Scores[template] += M;
+							values_s = (short unsigned *) values;
+							n = SU ? *values_s : *values;
+							l = n + 1;
+							while(--l) {
+								template = SU ? *++values_s : *++values;
+								if(Scores[template] != 0) {
+									gaps = HIT - extendScore[template];
+									if(gaps == 0) {
+										/* match */
+										Scores[template] += M;
+									} else if(gaps == kmersize) {
+										/* snp */
+										Scores[template] += score + MM;
+									} else if(kmersize < gaps) {
+										/* mismatch or insersion */
+										gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+										if(gaps <= 2) {
+											mm = gaps;
+											m = 0;
 										} else {
-											gaps = extendScore[template] - j - 1;
-											Scores[template] += (W1 + gaps * U);
+											mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+											mm = MAX(2, mm);
+											m = MIN(gaps - mm, kmersize);
+											m = MIN(m, mm);
 										}
-									} else if(Scores[template] != 0) {
-										Scores[template] += score;
-										if((gaps = extendScore[template] - j)) {
-											if(gaps == 1) {
-												Scores[template] += MMs;
-											} else {
-												gaps -= 2;
-												if((Ms = MMs + gaps * M) < 0) {
-													Scores[template] += Ms;
-												}
-											}
+										
+										/* evaluate best option */
+										if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+											Scores[template] += score + (mm * MM + m * M);
 										} else {
-											Scores[template] += MM;
-										}
-									} else {
-										Scores[template] = score;
-										if(include[template] == 0) {
-											include[template] = 1;
-											bests[++*bests] = template;
+											Scores[template] += score + (W1 + (gaps - 1) * U);
 										}
+									} /*else {
+										// unlikely deletion or random k-mer mismatch, 
+										// assume better and go random zero score
+									}*/
+								} else {
+									Scores[template] = score;
+									if(include[template] == 0) {
+										include[template] = 1;
+										bests[++*bests] = template;
 									}
 								}
 							}
-						} else if(SU) {
-							values_s = (short unsigned *) values;
-							n = *values_s + 1;
-							Ms = kmersize * M;
-							*bests = 0;
-							while(--n) {
-								Scores[*++values_s] = Ms;
-								include[*values_s] = 1;
-								bests[++*bests] = *values_s;
-							}
 						} else {
-							n = *(last = values) + 1;
+							last = values;
+							values_s = (short unsigned *) values;
+							n = SU ? *values_s : *values;
 							Ms = kmersize * M;
-							*bests = 0;
-							while(--n) {
-								Scores[*++last] = Ms;
-								include[*last] = 1;
-								bests[++*bests] = *last;
+							for(l = 1; l <= n; ++l) {
+								template = SU ? *++values_s : *++values;
+								Scores[template] = Ms;
+								include[template] = 1;
+								bests[l] = template;
 							}
+							*bests = n;
 						}
-						HIT = 0;
+						
+						HIT = j;
 						gaps = 0;
 						Ms = 0;
 						MMs = 0;
 						Us = 0;
 						W1s = 0;
-						last = values;
 					}
 					++hitCounter;
 				} else {
 					++gaps;
 				}
 			}
+			gaps += (qseq->N[i] + 1 - j); /* gap over N's */
 			j = qseq->N[i] + 1;
 		}
+		
 		if(last) {
 			score = Ms * M + MMs * MM + Us * U + W1s * W1;
 			if(SU) {
@@ -2019,7 +1984,7 @@ int save_kmers_Sparse(const HashMapKMA *templates, const Penalties *rewards, int
 	}
 	
 	i = 0;
-	if(bestScore && bestScore * kmersize > end) {
+	if(kmersize <= bestScore || bestScore * kmersize > end) {
 		lock(excludeOut);
 		i = deConPrintPtr(bestTemplates, qseq, bestScore, header, flag, out);
 		unlock(excludeOut);
@@ -2031,8 +1996,7 @@ int save_kmers_Sparse(const HashMapKMA *templates, const Penalties *rewards, int
 int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out) {
 	
 	int i, j, l, n, end, template, hitCounter, gaps, Ms, MMs, Us, W1s;
-	int HIT, SU, score, bestScore, kmersize;
-	int W1, U, M, MM;
+	int HIT, SU, score, bestScore, kmersize, W1, U, M, MM, m, mm;
 	unsigned shifter, *values, *last;
 	short unsigned *values_s;
 	char *include;
@@ -2083,156 +2047,132 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
 			for(;j < end; ++j) {
 				if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
 					if(values == last) {
-						if(kmersize < gaps) {
+						/*
+						gaps == 0 -> Match
+						gaps == kmersize -> 1 MM
+						kmersize < gaps -> several mismatches or indel(s)
+						gaps < kmersize -> deletion
+						*/
+						if(gaps == 0) {
+							/* match */
+							++Ms;
+						} else if(gaps == kmersize) {
+							/* snp */
 							Ms += kmersize;
-							gaps -= kmersize;
-							if(gaps) {
-								/* go for best scenario */
-								if(gaps == 1) {
-									MMs += 2;
-								} else {
-									gaps -= 2;
-									if((MM << 1) + gaps * M < 0) {
-										Ms += gaps;
-										MMs += 2;
-									}
-								}
+							++MMs;
+						} else if(kmersize < gaps) {
+							/* mismatch or insersion */
+							Ms += kmersize;
+							gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+							if(gaps <= 2) {
+								mm = gaps;
+								m = 0;
 							} else {
-								++MMs;
+								mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+								mm = MAX(2, mm);
+								m = MIN(gaps - mm, kmersize);
+								m = MIN(m, mm);
 							}
-						} else if (gaps) {
-							--gaps;
-							++W1s;
-							Us += gaps;
-						} else {
-							++Ms;
-						}
+							
+							/* evaluate best option */
+							if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+								MMs += mm;
+								Ms += m;
+							} else {
+								++W1s;
+								Us += (gaps -1);
+							}
+						} /*else {
+							// unlikely deletion or random k-mer mismatch, 
+							// assume better and go random zero score
+						}*/
+						
 						HIT = j;
 						gaps = 0;
 					} else {
 						if(last) {
-							if(HIT) {
-								HIT += kmersize;
-							} else {
-								HIT = j + kmersize;
-							}
 							score = Ms * M + MMs * MM + Us * U + W1s * W1;
-							if(SU) {
-								values_s = (short unsigned *) last;
-								l = (*values_s) + 1;
-								while(--l) {
-									Score[(template = values_s[l])] += score;
-									extendScore[template] = HIT;
-								}
-							} else {
-								l = (*last) + 1;
-								while(--l) {
-									Score[(template = last[l])] += score;
-									extendScore[template] = HIT;
-								}
+							values_s = (short unsigned *) last;
+							l = SU ? (*values_s + 1) : (*last + 1);
+							while(--l) {
+								template = SU ? *++values_s : *++last;
+								Score[template] += score;
+								extendScore[template] = HIT;
 							}
-							
+							HIT = j - 1;
+							last = values;
 							score = kmersize * M;
-							MMs = MM << 1;
-							if(SU) {
-								values_s = (short unsigned *) values;
-								n = *values_s;
-								for(l = 1; l <= n; ++l) {
-									if(j < extendScore[(template = values_s[l])]) {
-										if(extendScore[template] == HIT) {
-											Score[template] += M;
-										} else {
-											gaps = extendScore[template] - j - 1;
-											Score[template] += (W1 + gaps * U);
-										}
-									} else if(Score[template] != 0) {
-										Score[template] += score;
-										if((gaps = extendScore[template] - j)) {
-											if(gaps == 1) {
-												Score[template] += MMs;
-											} else {
-												gaps -= 2;
-												if((Ms = MMs + gaps * M) < 0) {
-													Score[template] += Ms;
-												}
-											}
-										} else {
-											Score[template] += MM;
-										}
-									} else {
-										Score[template] = score;
-										if(include[template] == 0) {
-											include[template] = 1;
-											bestTemplates[++*bestTemplates] = template;
-										}
-									}
-								}
-							} else {
-								n = *values;
-								for(l = 1; l <= n; ++l) {
-									if(j < extendScore[(template = values[l])]) {
-										if(extendScore[template] == HIT) {
-											Score[template] += M;
+							values_s = (short unsigned *) values;
+							n = SU ? *values_s : *values;
+							l = n + 1;
+							while(--l) {
+								template = SU ? *++values_s : *++values;
+								if(Score[template] != 0) {
+									gaps = HIT - extendScore[template];
+									if(gaps == 0) {
+										/* match */
+										Score[template] += M;
+									} else if(gaps == kmersize) {
+										/* snp */
+										Score[template] += score + MM;
+									} else if(kmersize < gaps) {
+										/* mismatch or insersion */
+										gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+										if(gaps <= 2) {
+											mm = gaps;
+											m = 0;
 										} else {
-											gaps = extendScore[template] - j - 1;
-											Score[template] += (W1 + gaps * U);
+											mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+											mm = MAX(2, mm);
+											m = MIN(gaps - mm, kmersize);
+											m = MIN(m, mm);
 										}
-									} else if(Score[template] != 0) {
-										Score[template] += score;
-										if((gaps = extendScore[template] - j)) {
-											if(gaps == 1) {
-												Score[template] += MMs;
-											} else {
-												gaps -= 2;
-												if((Ms = MMs + gaps * M) < 0) {
-													Score[template] += Ms;
-												}
-											}
+										
+										/* evaluate best option */
+										if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+											Score[template] += score + (mm * MM + m * M);
 										} else {
-											Score[template] += MM;
-										}
-									} else {
-										Score[template] = score;
-										if(include[template] == 0) {
-											include[template] = 1;
-											bestTemplates[++*bestTemplates] = template;
+											Score[template] += score + (W1 + (gaps - 1) * U);
 										}
+									} /*else {
+										// unlikely deletion or random k-mer mismatch, 
+										// assume better and go random zero score
+									}*/
+								} else {
+									Score[template] = score;
+									if(include[template] == 0) {
+										include[template] = 1;
+										bestTemplates[++*bestTemplates] = template;
 									}
 								}
 							}
-						} else if(SU) {
-							values_s = (short unsigned *) values;
-							n = *values_s;
-							Ms = kmersize * M;
-							for(l = 1; l <= n; ++l) {
-								Score[(template = values_s[l])] = Ms;
-								include[template] = 1;
-								bestTemplates[l] = template;
-							}
-							*bestTemplates = n;
 						} else {
-							n = *values;
+							last = values;
+							values_s = (short unsigned *) values;
+							n = SU ? *values_s : *values;
 							Ms = kmersize * M;
 							for(l = 1; l <= n; ++l) {
-								Score[(template = values[l])] = Ms;
+								template = SU ? *++values_s : *++values;
+								Score[template] = Ms;
 								include[template] = 1;
 								bestTemplates[l] = template;
 							}
 							*bestTemplates = n;
 						}
-						HIT = 0;
+						
+						HIT = j;
 						gaps = 0;
 						Ms = 0;
 						MMs = 0;
 						Us = 0;
 						W1s = 0;
-						last = values;
 					}
 					++hitCounter;
 				} else {
 					++gaps;
 				}
 			}
+			gaps += (qseq->N[i] + 1 - j); /* gap over N's */
 			j = qseq->N[i] + 1;
 		}
 		if(last) {
@@ -2294,7 +2234,7 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
 	end = qseq->seqlen + 1 - bestScore;
 	
 	i = 0;
-	if(bestScore && bestScore * kmersize > end) {
+	if(kmersize <= bestScore || bestScore * kmersize > end) {
 		lock(excludeOut);
 		i = deConPrintPtr(bestTemplates, qseq, bestScore, header, 0, out);
 		unlock(excludeOut);
@@ -2306,7 +2246,7 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
 int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out) {
 	
 	int i, j, l, end, HIT, gaps, score, Ms, MMs, Us, W1s, W1, U, M, MM;
-	int template, hitCounter, bestScore, bestScore_r, kmersize;
+	int template, hitCounter, bestScore, bestScore_r, kmersize, m, mm;
 	unsigned *values, *last, n, SU, shifter;
 	short unsigned *values_s;
 	char *include;
@@ -2377,156 +2317,132 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 			for(;j < end; ++j) {
 				if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
 					if(values == last) {
-						if(kmersize < gaps) {
+						/*
+						gaps == 0 -> Match
+						gaps == kmersize -> 1 MM
+						kmersize < gaps -> several mismatches or indel(s)
+						gaps < kmersize -> deletion
+						*/
+						if(gaps == 0) {
+							/* match */
+							++Ms;
+						} else if(gaps == kmersize) {
+							/* snp */
 							Ms += kmersize;
-							gaps -= kmersize;
-							if(gaps) {
-								/* go for best scenario */
-								if(gaps == 1) {
-									MMs += 2;
-								} else {
-									gaps -= 2;
-									if((MM << 1) + gaps * M < 0) {
-										Ms += gaps;
-										MMs += 2;
-									}
-								}
+							++MMs;
+						} else if(kmersize < gaps) {
+							/* mismatch or insersion */
+							Ms += kmersize;
+							gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+							if(gaps <= 2) {
+								mm = gaps;
+								m = 0;
 							} else {
-								++MMs;
+								mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+								mm = MAX(2, mm);
+								m = MIN(gaps - mm, kmersize);
+								m = MIN(m, mm);
 							}
-						} else if (gaps) {
-							--gaps;
-							++W1s;
-							Us += gaps;
-						} else {
-							++Ms;
-						}
+							
+							/* evaluate best option */
+							if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+								MMs += mm;
+								Ms += m;
+							} else {
+								++W1s;
+								Us += (gaps -1);
+							}
+						} /*else {
+							// unlikely deletion or random k-mer mismatch, 
+							// assume better and go random zero score
+						}*/
+						
 						HIT = j;
 						gaps = 0;
 					} else {
 						if(last) {
-							if(HIT) {
-								HIT += kmersize;
-							} else {
-								HIT = j + kmersize;
-							}
 							score = Ms * M + MMs * MM + Us * U + W1s * W1;
-							if(SU) {
-								values_s = (short unsigned *) last;
-								l = (*values_s) + 1;
-								while(--l) {
-									Score[(template = values_s[l])] += score;
-									extendScore[template] = HIT;
-								}
-								
-								score = kmersize * M;
-								MMs = MM << 1;
-								values_s = (short unsigned *) values;
-								n = *values_s;
-								for(l = 1; l <= n; ++l) {
-									if(j < extendScore[(template = values_s[l])]) {
-										if(extendScore[template] == HIT) {
-											Score[template] += M;
-										} else {
-											gaps = extendScore[template] - j - 1;
-											Score[template] += (W1 + gaps * U);
-										}
-									} else if(Score[template] != 0) {
-										Score[template] += score;
-										if((gaps = extendScore[template] - j)) {
-											if(gaps == 1) {
-												Score[template] += MMs;
-											} else {
-												gaps -= 2;
-												if((Ms = MMs + gaps * M) < 0) {
-													Score[template] += Ms;
-												}
-											}
-										} else {
-											Score[template] += MM;
-										}
-									} else {
-										Score[template] = score;
-										if(include[template] == 0) {
-											include[template] = 1;
-											bestTemplates[++*bestTemplates] = template;
-										}
-									}
-								}
-							} else {
-								l = (*last) + 1;
-								while(--l) {
-									Score[(template = last[l])] += score;
-									extendScore[template] = HIT;
-								}
-								
-								score = kmersize * M;
-								MMs = MM << 1;
-								n = *values;
-								for(l = 1; l <= n; ++l) {
-									if(j < extendScore[(template = values[l])]) {
-										if(extendScore[template] == HIT) {
-											Score[template] += M;
+							values_s = (short unsigned *) last;
+							l = SU ? (*values_s + 1) : (*last + 1);
+							while(--l) {
+								template = SU ? *++values_s : *++last;
+								Score[template] += score;
+								extendScore[template] = HIT;
+							}
+							HIT = j - 1;
+							last = values;
+							score = kmersize * M;
+							values_s = (short unsigned *) values;
+							n = SU ? *values_s : *values;
+							l = n + 1;
+							while(--l) {
+								template = SU ? *++values_s : *++values;
+								if(Score[template] != 0) {
+									gaps = HIT - extendScore[template];
+									if(gaps == 0) {
+										/* match */
+										Score[template] += M;
+									} else if(gaps == kmersize) {
+										/* snp */
+										Score[template] += score + MM;
+									} else if(kmersize < gaps) {
+										/* mismatch or insersion */
+										gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+										if(gaps <= 2) {
+											mm = gaps;
+											m = 0;
 										} else {
-											gaps = extendScore[template] - j - 1;
-											Score[template] += (W1 + gaps * U);
+											mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+											mm = MAX(2, mm);
+											m = MIN(gaps - mm, kmersize);
+											m = MIN(m, mm);
 										}
-									} else if(Score[template] != 0) {
-										Score[template] += score;
-										if((gaps = extendScore[template] - j)) {
-											if(gaps == 1) {
-												Score[template] += MMs;
-											} else {
-												gaps -= 2;
-												if((Ms = MMs + gaps * M) < 0) {
-													Score[template] += Ms;
-												}
-											}
+										
+										/* evaluate best option */
+										if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+											Score[template] += score + (mm * MM + m * M);
 										} else {
-											Score[template] += MM;
-										}
-									} else {
-										Score[template] = score;
-										if(include[template] == 0) {
-											include[template] = 1;
-											bestTemplates[++*bestTemplates] = template;
+											Score[template] += score + (W1 + (gaps - 1) * U);
 										}
+									} /*else {
+										// unlikely deletion or random k-mer mismatch, 
+										// assume better and go random zero score
+									}*/
+								} else {
+									Score[template] = score;
+									if(include[template] == 0) {
+										include[template] = 1;
+										bestTemplates[++*bestTemplates] = template;
 									}
 								}
 							}
-						} else if(SU) {
-							values_s = (short unsigned *) values;
-							n = *values_s;
-							Ms = kmersize * M;
-							for(l = 1; l <= n; ++l) {
-								Score[(template = values_s[l])] = Ms;
-								include[template] = 1;
-								bestTemplates[l] = template;
-							}
-							*bestTemplates = n;
 						} else {
-							n = *values;
+							last = values;
+							values_s = (short unsigned *) values;
+							n = SU ? *values_s : *values;
 							Ms = kmersize * M;
 							for(l = 1; l <= n; ++l) {
-								Score[(template = values[l])] = Ms;
+								template = SU ? *++values_s : *++values;
+								Score[template] = Ms;
 								include[template] = 1;
 								bestTemplates[l] = template;
 							}
 							*bestTemplates = n;
 						}
-						HIT = 0;
+						
+						HIT = j;
 						gaps = 0;
 						Ms = 0;
 						MMs = 0;
 						Us = 0;
 						W1s = 0;
-						last = values;
 					}
 					++hitCounter;
 				} else {
 					++gaps;
 				}
 			}
+			gaps += (qseq->N[i] + 1 - j); /* gap over N's */
 			j = qseq->N[i] + 1;
 		}
 		if(last) {
@@ -2620,156 +2536,132 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 			for(;j < end; ++j) {
 				if((values = hashMap_get(templates, getKmer(qseq_r->seq, j, shifter)))) {
 					if(values == last) {
-						if(kmersize < gaps) {
+						/*
+						gaps == 0 -> Match
+						gaps == kmersize -> 1 MM
+						kmersize < gaps -> several mismatches or indel(s)
+						gaps < kmersize -> deletion
+						*/
+						if(gaps == 0) {
+							/* match */
+							++Ms;
+						} else if(gaps == kmersize) {
+							/* snp */
 							Ms += kmersize;
-							gaps -= kmersize;
-							if(gaps) {
-								/* go for best scenario */
-								if(gaps == 1) {
-									MMs += 2;
-								} else {
-									gaps -= 2;
-									if((MM << 1) + gaps * M < 0) {
-										Ms += gaps;
-										MMs += 2;
-									}
-								}
+							++MMs;
+						} else if(kmersize < gaps) {
+							/* mismatch or insersion */
+							Ms += kmersize;
+							gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+							if(gaps <= 2) {
+								mm = gaps;
+								m = 0;
 							} else {
-								++MMs;
+								mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+								mm = MAX(2, mm);
+								m = MIN(gaps - mm, kmersize);
+								m = MIN(m, mm);
 							}
-						} else if (gaps) {
-							--gaps;
-							++W1s;
-							Us += gaps;
-						} else {
-							++Ms;
-						}
+							
+							/* evaluate best option */
+							if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+								MMs += mm;
+								Ms += m;
+							} else {
+								++W1s;
+								Us += (gaps -1);
+							}
+						} /*else {
+							// unlikely deletion or random k-mer mismatch, 
+							// assume better and go random zero score
+						}*/
+						
 						HIT = j;
 						gaps = 0;
 					} else {
 						if(last) {
-							if(HIT) {
-								HIT += kmersize;
-							} else {
-								HIT = j + kmersize;
-							}
 							score = Ms * M + MMs * MM + Us * U + W1s * W1;
-							if(SU) {
-								values_s = (short unsigned *) last;
-								l = (*values_s) + 1;
-								while(--l) {
-									Score_r[(template = values_s[l])] += score;
-									extendScore[template] = HIT;
-								}
-								
-								score = kmersize * M;
-								MMs = MM << 1;
-								values_s = (short unsigned *) values;
-								n = *values_s;
-								for(l = 1; l <= n; ++l) {
-									if(j < extendScore[(template = values_s[l])]) {
-										if(extendScore[template] == HIT) {
-											Score_r[template] += M;
-										} else {
-											gaps = extendScore[template] - j - 1;
-											Score_r[template] += (W1 + gaps * U);
-										}
-									} else if(Score_r[template] != 0) {
-										Score_r[template] += score;
-										if((gaps = extendScore[template] - j)) {
-											if(gaps == 1) {
-												Score_r[template] += MMs;
-											} else {
-												gaps -= 2;
-												if((Ms = MMs + gaps * M) < 0) {
-													Score_r[template] += Ms;
-												}
-											}
-										} else {
-											Score_r[template] += MM;
-										}
-									} else {
-										Score_r[template] = score;
-										if(include[template] == 0) {
-											include[template] = 1;
-											bestTemplates_r[++*bestTemplates_r] = template;
-										}
-									}
-								}
-							} else {
-								l = (*last) + 1;
-								while(--l) {
-									Score_r[(template = last[l])] += score;
-									extendScore[template] = HIT;
-								}
-								
-								score = kmersize * M;
-								MMs = MM << 1;
-								n = *values;
-								for(l = 1; l <= n; ++l) {
-									if(j < extendScore[(template = values[l])]) {
-										if(extendScore[template] == HIT) {
-											Score_r[template] += M;
+							values_s = (short unsigned *) last;
+							l = SU ? (*values_s + 1) : (*last + 1);
+							while(--l) {
+								template = SU ? *++values_s : *++last;
+								Score_r[template] += score;
+								extendScore[template] = HIT;
+							}
+							HIT = j - 1;
+							last = values;
+							score = kmersize * M;
+							values_s = (short unsigned *) values;
+							n = SU ? *values_s : *values;
+							l = n + 1;
+							while(--l) {
+								template = SU ? *++values_s : *++values;
+								if(Score_r[template] != 0) {
+									gaps = HIT - extendScore[template];
+									if(gaps == 0) {
+										/* match */
+										Score_r[template] += M;
+									} else if(gaps == kmersize) {
+										/* snp */
+										Score_r[template] += score + MM;
+									} else if(kmersize < gaps) {
+										/* mismatch or insersion */
+										gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
+										if(gaps <= 2) {
+											mm = gaps;
+											m = 0;
 										} else {
-											gaps = extendScore[template] - j - 1;
-											Score_r[template] += (W1 + gaps * U);
+											mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+											mm = MAX(2, mm);
+											m = MIN(gaps - mm, kmersize);
+											m = MIN(m, mm);
 										}
-									} else if(Score_r[template] != 0) {
-										Score_r[template] += score;
-										if((gaps = extendScore[template] - j)) {
-											if(gaps == 1) {
-												Score_r[template] += MMs;
-											} else {
-												gaps -= 2;
-												if((Ms = MMs + gaps * M) < 0) {
-													Score_r[template] += Ms;
-												}
-											}
+										
+										/* evaluate best option */
+										if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
+											Score_r[template] += score + (mm * MM + m * M);
 										} else {
-											Score_r[template] += MM;
-										}
-									} else {
-										Score_r[template] = score;
-										if(include[template] == 0) {
-											include[template] = 1;
-											bestTemplates_r[++*bestTemplates_r] = template;
+											Score_r[template] += score + (W1 + (gaps - 1) * U);
 										}
+									} /*else {
+										// unlikely deletion or random k-mer mismatch, 
+										// assume better and go random zero score
+									}*/
+								} else {
+									Score_r[template] = score;
+									if(include[template] == 0) {
+										include[template] = 1;
+										bestTemplates_r[++*bestTemplates_r] = template;
 									}
 								}
 							}
-						} else if(SU) {
-							values_s = (short unsigned *) values;
-							n = *values_s;
-							Ms = kmersize * M;
-							for(l = 1; l <= n; ++l) {
-								Score_r[(template = values_s[l])] = Ms;
-								include[template] = 1;
-								bestTemplates_r[l] = template;
-							}
-							*bestTemplates_r = n;
 						} else {
-							n = *values;
+							last = values;
+							values_s = (short unsigned *) values;
+							n = SU ? *values_s : *values;
 							Ms = kmersize * M;
 							for(l = 1; l <= n; ++l) {
-								Score_r[(template = values[l])] = Ms;
+								template = SU ? *++values_s : *++values;
+								Score_r[template] = Ms;
 								include[template] = 1;
 								bestTemplates_r[l] = template;
 							}
 							*bestTemplates_r = n;
 						}
-						HIT = 0;
+						
+						HIT = j;
 						gaps = 0;
 						Ms = 0;
 						MMs = 0;
 						Us = 0;
 						W1s = 0;
-						last = values;
 					}
 					++hitCounter;
 				} else {
 					++gaps;
 				}
 			}
+			gaps += (qseq->N[i] + 1 - j); /* gap over N's */
 			j = qseq_r->N[i] + 1;
 		}
 		if(last) {
@@ -2833,7 +2725,8 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 	i = 0;
 	if(bestScore > 0 || bestScore_r > 0) {
 		end = qseq->seqlen + 1;
-		if((bestScore >= bestScore_r && bestScore * kmersize > (end - bestScore)) || (bestScore < bestScore_r && bestScore_r * kmersize > (end - bestScore_r))) {
+		//if((bestScore >= bestScore_r && bestScore * kmersize > (end - bestScore)) || (bestScore < bestScore_r && bestScore_r * kmersize > (end - bestScore_r))) {
+		if(kmersize <= bestScore || kmersize <= bestScore_r) {
 			if(bestScore > bestScore_r) {
 				lock(excludeOut);
 				i = deConPrintPtr(bestTemplates, qseq, bestScore, header, 0, out);
@@ -3105,7 +2998,8 @@ int save_kmers_count(const HashMapKMA *templates, const Penalties *rewards, int
 	i = 0;
 	if(bestScore > 0 || bestScore_r > 0) {
 		end = qseq->seqlen + 1;
-		if((bestScore >= bestScore_r && bestScore * kmersize > (end - bestScore)) || (bestScore < bestScore_r && bestScore_r * kmersize > (end - bestScore_r))) {
+		//if((bestScore >= bestScore_r && bestScore * kmersize > (end - bestScore)) || (bestScore < bestScore_r && bestScore_r * kmersize > (end - bestScore_r))) {
+		if(kmersize <= bestScore || kmersize <= bestScore_r) {
 			if(bestScore > bestScore_r) {
 				lock(excludeOut);
 				i = deConPrintPtr(bestTemplates, qseq, bestScore, header, 0, out);
@@ -3148,7 +3042,7 @@ int save_kmers_unionPair(const HashMapKMA *templates, const Penalties *rewards,
 		/* got hits */
 		bestScore = getF(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates);
 		
-		if(bestScore * kmersize < (qseq->seqlen - bestScore)) {
+		if(kmersize < bestScore && bestScore * kmersize < (qseq->seqlen - bestScore)) {
 			bestScore = 0;
 		}
 	} else {
@@ -3176,7 +3070,7 @@ int save_kmers_unionPair(const HashMapKMA *templates, const Penalties *rewards,
 		} else {
 			bestScore_r = getF(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates);
 		}
-		if(bestScore_r * kmersize < (qseq_r->seqlen - bestScore_r)) {
+		if(kmersize < bestScore_r && bestScore_r * kmersize < (qseq_r->seqlen - bestScore_r)) {
 			bestScore_r = 0;
 			*regionTemplates = abs(*regionTemplates);
 		}
@@ -3379,7 +3273,7 @@ int save_kmers_penaltyPair(const HashMapKMA *templates, const Penalties *rewards
 			flag |= 2;
 			flag_r |= 2;
 			compScore = MIN((hitCounter + hitCounter_r), (bestScore + bestScore_r));
-			if((qseq->seqlen + qseq_r->seqlen - compScore - (kmersize << 1)) < compScore * kmersize) {
+			if(kmersize <= compScore || (qseq->seqlen + qseq_r->seqlen - compScore - (kmersize << 1)) < compScore * kmersize) {
 				*regionTemplates = -(*regionTemplates);
 				if(0 < regionTemplates[1]) {
 					if(rev) {
@@ -3425,7 +3319,7 @@ int save_kmers_penaltyPair(const HashMapKMA *templates, const Penalties *rewards
 			}
 		} else {
 			hitCounter = MIN(hitCounter, bestScore);
-			hitCounter = (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize;
+			hitCounter = kmersize <= hitCounter || (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize;
 			if(hitCounter) {
 				if(0 < regionTemplates[1]) {
 					if(rev) {
@@ -3445,7 +3339,7 @@ int save_kmers_penaltyPair(const HashMapKMA *templates, const Penalties *rewards
 				}
 			}
 			hitCounter_r = MIN(hitCounter_r, bestScore_r);
-			hitCounter_r = (qseq_r->seqlen - hitCounter_r - kmersize) < hitCounter_r * kmersize;
+			hitCounter_r = kmersize <= hitCounter_r || (qseq_r->seqlen - hitCounter_r - kmersize) < hitCounter_r * kmersize;
 			if(hitCounter_r) {
 				if(0 < bestTemplates[1]) {
 					if(rev) {
@@ -3485,7 +3379,7 @@ int save_kmers_penaltyPair(const HashMapKMA *templates, const Penalties *rewards
 		return i;
 	} else if(0 < bestScore) {
 		hitCounter = MIN(hitCounter, bestScore);
-		if((qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {
+		if(kmersize <= hitCounter || (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {
 			if(rev) {
 				flag |= 8;
 				flag |= 32;
@@ -3516,7 +3410,7 @@ int save_kmers_penaltyPair(const HashMapKMA *templates, const Penalties *rewards
 		}
 	} else if(0 < bestScore_r) {
 		hitCounter_r = MIN(hitCounter_r, bestScore_r);
-		if((qseq_r->seqlen - hitCounter_r - kmersize) < hitCounter_r * kmersize) {
+		if(kmersize <= hitCounter_r || (qseq_r->seqlen - hitCounter_r - kmersize) < hitCounter_r * kmersize) {
 			if(rev) {
 				flag_r |= 8;
 				flag_r |= 32;
@@ -3578,7 +3472,7 @@ int save_kmers_forcePair(const HashMapKMA *templates, const Penalties *rewards,
 	if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) && 
 	(bestScore = getSecondForce(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates, regionScores))) {
 		
-		if((qseq->seqlen + qseq_r->seqlen - bestScore) < bestScore * kmersize) {
+		if(kmersize <= bestScore || (qseq->seqlen + qseq_r->seqlen - bestScore) < bestScore * kmersize) {
 			flag = 67;
 			flag_r = 131;
 			


=====================================
spltdb.c
=====================================
@@ -379,10 +379,10 @@ unsigned get_ankers_spltDB(int *infoSize, int *out_Tem, CompDNA *qseq, Qseqs *he
 		cfread(header->seq, 1, header->len, inputfile);
 	} else {
 		/* in the case of equally well scoring DBs */
-		fseek(inputfile, infoSize[1] * sizeof(long unsigned) + infoSize[2] * sizeof(int), SEEK_CUR);
+		sfseek(inputfile, infoSize[1] * sizeof(long unsigned) + infoSize[2] * sizeof(int), SEEK_CUR);
 		*out_Tem = infoSize[4];
 		cfread(out_Tem + 1, sizeof(int), *out_Tem, inputfile);
-		fseek(inputfile, infoSize[5], SEEK_CUR);
+		sfseek(inputfile, infoSize[5], SEEK_CUR);
 	}
 	
 	/* get info for next read */
@@ -430,6 +430,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 	AlnPoints *points;
 	NWmat *NWmatrices;
 	Assemble_thread *threads, *thread;
+	HashMapCCI *template_index;
 	
 	if(!outputfilename) {
 		fprintf(stderr, " No output file specified!\n");
@@ -471,6 +472,9 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 		}
 	}
 	dbBiases[i] = DB_size;
+	if(!kmersize) {
+		kmersize = *template_lengths;
+	}
 	if(kmersize < 4 || 32 < kmersize) {
 		kmersize = 16;
 	}
@@ -599,7 +603,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 	/* open input streams */
 	file_len = strlen(outputfilename);
 	for(i = 0; i < targetNum; ++i) {
-		sprintf(outputfilename + file_len, ".%d", i);
+		j = sprintf(outputfilename + file_len, ".%d", i);
 		while(!(inputfile = fopen(outputfilename, "rb"))) {
 			usleep(100);
 		}
@@ -1401,6 +1405,10 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 	aligned_assem->t = smalloc(aligned_assem->size);
 	aligned_assem->s = smalloc(aligned_assem->size);
 	aligned_assem->q = smalloc(aligned_assem->size);
+	seq_in_no = 0;
+	template_index = smalloc(sizeof(HashMapCCI));
+	template_index->size = 0;
+	hashMapCCI_initialize(template_index, matrix->size, kmersize);
 	
 	/* allocate matrcies for NW */
 	i = 1;
@@ -1455,7 +1463,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 		thread->points = seedPoint_init(delta, rewards);
 		thread->points->len = 0;
 		thread->spin = (sparse < 0) ? 10 : 100;
-		
+		thread->template_index = template_index;
 		thread->next = threads;
 		threads = thread;
 		
@@ -1521,6 +1529,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 	thread->points->len = 0;
 	thread->next = 0;
 	thread->spin = (sparse < 0) ? 10 : 100;
+	thread->template_index = template_index;
 	
 	/* Do local assemblies of fragments mapping to the same template */
 	depth = 0;
@@ -1550,6 +1559,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 	if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
 		ERROR();
 	}
+	thread->seq_in = seq_in_no;
 	templatefilename[file_len] = 0;
 	strcat(templatefilename, ".name");
 	name_file = sfopen(templatefilename, "rb");
@@ -1593,7 +1603,6 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 				printExtendedFeatures(templatefilename, 0, 0, 0, extendedFeatures_out);
 			}
 		} else if(w_scores[template] > 0) {
-			
 			if(progress) {
 				counter += w_scores[template];
 				fprintf(stderr, "# Progress:\t%3lu%%\r", 100 * counter / Nhits);
@@ -1628,10 +1637,11 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 				if(xml) {
 					newIterXML(xml_out, template, t_len, thread->template_name);
 				}
+				
 				/* Do assembly */
 				//status |= assemblyPtr(aligned_assem, template, template_fragments, fileCount, frag_out, aligned, gap_align, qseq, header, matrix, points, NWmatrices);
 				thread->template = template;
-				thread->t_len = 0;
+				thread->t_len = t_len;
 				assembly_KMA_Ptr(thread);
 				
 				/* Depth, ID and coverage */
@@ -1718,9 +1728,6 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 		}
 	}
 	
-	/* destroy index */
-	hashMapCCI_destroy(thread->template_index);
-	
 	/* Close files */
 	close(seq_in_no);
 	fclose(res_out);


=====================================
updateindex.c
=====================================
@@ -75,7 +75,7 @@ int updateDBs_sparse(HashMap *templates, CompDNA *qseq, unsigned template, int M
 		template_slengths[template] = 0;
 		template_ulengths[template] = 0;
 		for(rc = 0; rc < 2; ++rc) {
-			/* revers complement */
+			/* reverse complement */
 			if(rc) {
 				comp_rc(qseq);
 			}
@@ -114,6 +114,9 @@ int updateDBs_sparse(HashMap *templates, CompDNA *qseq, unsigned template, int M
 				qseq->N[0]--;
 			}
 		}
+		if(prefix_len == 0) {
+			comp_rc(qseq);
+		}
 		return 1;
 	}
 	


=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
  * limitations under the License.
 */
 
-#define KMA_VERSION "1.3.19"
+#define KMA_VERSION "1.3.22"


=====================================
xml.c
=====================================
@@ -144,11 +144,11 @@ void capIterXML(FILE *out, const int DB_size, const long unsigned seqsize, const
 	fprintf(out, "</Iteration>\n");
 }
 
-void hitXML(FILE *out, const int template, const char *template_name, const Aln *aligned, const AlnScore *alnStat, const Penalties *rewards, const int flag) {
+void hitXML(FILE *out, const int template, const unsigned char *template_name, const Aln *aligned, const AlnScore *alnStat, const Penalties *rewards, const int flag) {
 	
 	static volatile int Lock = 0;
-	volatile int *lock = &Lock;
 	static int num = 0;
+	volatile int *lock = &Lock;
 	int i, Ms, MMs, W1s, Us, gap, pos, **d;
 	unsigned char *t, *q;
 	char *s, bases[6] = "ACGTN-";
@@ -227,4 +227,17 @@ void hitXML(FILE *out, const int template, const char *template_name, const Aln
 	fprintf(out, "\t</Hit_hsps>\n");
 	fprintf(out, "</Hit>\n");
 	unlock(lock);
+	
+	/* here */
+	/*
+	fprintf(stdout, "%s\n", template_name);
+	i = 0;
+	for(i = 0; i < aligned->len; i += 60) {
+		fprintf(stdout, "t:\t%.60s\n", aligned->t + i);
+		fprintf(stdout, "s:\t%.60s\n", aligned->s + i);
+		fprintf(stdout, "q:\t%.60s\n\n", aligned->q + i);
+	}
+	fprintf(stdout, "\n\n");
+	*/
+	
 }


=====================================
xml.h
=====================================
@@ -26,4 +26,4 @@ void closeCapXML(FILE *out);
 void newIterXML(FILE *out, const int template, const int t_len, const char *template_name);
 double getEntropy(const unsigned char *aligned_assem_q, const int len);
 void capIterXML(FILE *out, const int DB_size, const long unsigned seqsize, const int t_len, const int readCounts, const double p_value, const long read_score, const unsigned char *aligned_assem_q, const int len);
-void hitXML(FILE *out, const int template, const char *template_name, const Aln *aligned, const AlnScore *alnStat, const Penalties *rewards, const int flag);
+void hitXML(FILE *out, const int template, const unsigned char *template_name, const Aln *aligned, const AlnScore *alnStat, const Penalties *rewards, const int flag);



View it on GitLab: https://salsa.debian.org/med-team/kma/-/commit/fbe9dfdc5b4c63157d8e24f78bdb0fd124f573bd

-- 
View it on GitLab: https://salsa.debian.org/med-team/kma/-/commit/fbe9dfdc5b4c63157d8e24f78bdb0fd124f573bd
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210624/36c37a66/attachment-0001.htm>


More information about the debian-med-commit mailing list