[med-svn] [Git][med-team/kma][debian/experimental] 4 commits: [skip ci] Update email

Nilesh Patra gitlab at salsa.debian.org
Sat Mar 20 18:57:43 GMT 2021



Nilesh Patra pushed to branch debian/experimental at Debian Med / kma


Commits:
b0af6e01 by Nilesh Patra at 2021-03-21T00:22:40+05:30
[skip ci] Update email

- - - - -
45823d7f by Nilesh Patra at 2021-03-21T00:23:19+05:30
New upstream version 1.3.13
- - - - -
049034e8 by Nilesh Patra at 2021-03-21T00:23:20+05:30
Update upstream source from tag 'upstream/1.3.13'

Update to upstream version '1.3.13'
with Debian dir 67722e805c4f0fc3ce70cf9e752243f54f103ca7
- - - - -
6a009189 by Nilesh Patra at 2021-03-21T00:23:57+05:30
Update changelog

- - - - -


17 changed files:

- assembly.c
- debian/changelog
- debian/control
- frags.c
- index.c
- kma.c
- mt1.c
- mt1.h
- nw.c
- runkma.c
- runkma.h
- savekmers.c
- spltdb.c
- spltdb.h
- vcf.c
- vcf.h
- version.h


Changes:

=====================================
assembly.c
=====================================
@@ -685,16 +685,16 @@ void * assemble_KMA_threaded(void *arg) {
 	while(i < asm_len) {
 		/* call template */
 		if(pos < t_len) {
-			aligned_assem->t[i] = bases[getNuc(template_index->seq, pos)]; 
+			bestNuc = getNuc(template_index->seq, pos);
 		} else {
-			aligned_assem->t[i] = '-';
+			bestNuc = 5;
 		}
+		aligned_assem->t[i] = bases[bestNuc];
 		
 		/* call query */
-		bestNuc = 0;
-		bestScore = assembly[pos].counts[0];
-		depthUpdate = bestScore;
-		for(j = 1; j < 6; ++j) {
+		bestScore = assembly[pos].counts[bestNuc];
+		depthUpdate = 0;
+		for(j = 0; j < 6; ++j) {
 			if(bestScore < assembly[pos].counts[j]) {
 				bestScore = assembly[pos].counts[j];
 				bestNuc = j;
@@ -704,7 +704,9 @@ void * assemble_KMA_threaded(void *arg) {
 		bestNuc = bases[bestNuc];
 		
 		/* check for minor base call */
-		if((bestScore << 1) < depthUpdate) {
+		if(!depthUpdate) {
+			bestNuc = '-';
+		} else if((bestScore << 1) < depthUpdate) {
 			if(bestNuc == '-') {
 				bestBaseScore = assembly[pos].counts[4];
 				bestNuc = 4;
@@ -719,14 +721,20 @@ void * assemble_KMA_threaded(void *arg) {
 				bestNuc = tolower(bestNuc);
 			}
 			bestScore = depthUpdate - assembly[pos].counts[5];
+		} else if(depthUpdate < bcd) {
+			/* too low depth */
+			bestNuc = tolower(bestNuc);
 		}
 		
 		/* determine base at current position */
+		/*
 		if(bcd <= depthUpdate) {
 			bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
 		} else {
 			bestNuc = baseCall('-', aligned_assem->t[i], 0, 0, evalue, &assembly[pos]);
 		}
+		*/
+		bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
 		aligned_assem->q[i] = bestNuc;
 		
 		if(bestNuc != '-') {
@@ -1128,8 +1136,7 @@ void * assemble_KMA_dense_threaded(void *arg) {
 		aligned_assem->t[i] = bases[bestNuc];
 		
 		/* call query */
-		bestNuc = 5;
-		bestScore = 0;
+		bestScore = assembly[i].counts[bestNuc];
 		depthUpdate = 0;
 		for(j = 0; j < 6; ++j) {
 			if(bestScore < assembly[i].counts[j]) {
@@ -1141,7 +1148,9 @@ void * assemble_KMA_dense_threaded(void *arg) {
 		bestNuc = bases[bestNuc];
 		
 		/* Check for minor base call */
-		if((bestScore << 1) < depthUpdate) {
+		if(!depthUpdate) {
+			bestNuc = '-';
+		} else if((bestScore << 1) < depthUpdate) {
 			if(bestNuc == '-') {
 				bestBaseScore = 0;
 				bestNuc = 4;
@@ -1156,14 +1165,20 @@ void * assemble_KMA_dense_threaded(void *arg) {
 				bestNuc = tolower(bestNuc);
 			}
 			bestScore = depthUpdate - assembly[i].counts[5];
+		} else if(depthUpdate < bcd) {
+			/* too low depth */
+			bestNuc = tolower(bestNuc);
 		}
 		
 		/* determine base at current position */
+		/*
 		if(bcd <= depthUpdate) {
 			bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[i]);
 		} else {
 			bestNuc = baseCall('-', aligned_assem->t[i], 0, 0, evalue, &assembly[i]);
 		}
+		*/
+		bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[i]);
 		aligned_assem->q[i] = bestNuc;
 		
 		if(bestNuc != '-') {
@@ -1476,16 +1491,16 @@ void callConsensus(AssemInfo *matrix, Assem *aligned_assem, long unsigned *seq,
 			while(i < end) {
 				/* call template */
 				if(pos < t_len) {
-					aligned_assem->t[i] = bases[getNuc(seq, pos)]; 
+					bestNuc = getNuc(seq, pos);
 				} else {
-					aligned_assem->t[i] = '-';
+					bestNuc = 5;
 				}
+				aligned_assem->t[i] = bases[bestNuc];
 				
 				/* call query */
-				bestNuc = 0;
-				bestScore = assembly[pos].counts[0];
-				depthUpdate = bestScore;
-				for(j = 1; j < 6; ++j) {
+				bestScore = assembly[pos].counts[bestNuc];
+				depthUpdate = 0;
+				for(j = 0; j < 6; ++j) {
 					if(bestScore < assembly[pos].counts[j]) {
 						bestScore = assembly[pos].counts[j];
 						bestNuc = j;
@@ -1495,7 +1510,9 @@ void callConsensus(AssemInfo *matrix, Assem *aligned_assem, long unsigned *seq,
 				bestNuc = bases[bestNuc];
 				
 				/* check for minor base call */
-				if((bestScore << 1) < depthUpdate) {
+				if(!depthUpdate) {
+					bestNuc = '-';
+				} else if((bestScore << 1) < depthUpdate) {
 					if(bestNuc == '-') {
 						bestBaseScore = assembly[pos].counts[4];
 						bestNuc = 4;
@@ -1510,14 +1527,20 @@ void callConsensus(AssemInfo *matrix, Assem *aligned_assem, long unsigned *seq,
 						bestNuc = tolower(bestNuc);
 					}
 					bestScore = depthUpdate - assembly[pos].counts[5];
+				} else if(depthUpdate < bcd) {
+					/* too low depth */
+					bestNuc = tolower(bestNuc);
 				}
 				
 				/* determine base at current position */
+				/*
 				if(bcd <= depthUpdate) {
 					bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
 				} else {
 					bestNuc = baseCall('-', aligned_assem->t[i], 0, 0, evalue, &assembly[pos]);
 				}
+				*/
+				bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
 				aligned_assem->q[i] = bestNuc;
 				
 				if(bestNuc != '-') {
@@ -1616,11 +1639,11 @@ void * assemble_KMA(void *arg) {
 	static volatile int excludeIn[1] = {0}, excludeOut[1] = {0}, excludeMatrix[1] = {0};
 	static volatile int thread_wait = 0, thread_init = 0, thread_begin = 0;
 	static volatile int mainTemplate = -2, next;
-	static int t_len, load;
+	static int t_len, load, seq_in;
 	static char *template_name;
 	static HashMapCCI *template_index;
 	Assemble_thread *thread = arg;
-	int i, minlen, aln_len, kmersize, sam, chunk, seq_in, ef, template;
+	int i, minlen, aln_len, kmersize, sam, chunk, ef, template;
 	int read_score, asm_len, nextTemplate, file_i, file_count, delta, status;
 	int thread_num, mq, bcd, start, end, q_start, q_end;
 	int stats[5], buffer[8], *qBoundPtr;


=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+kma (1.3.13-1~0exp0) experimental; urgency=medium
+
+  * New upstream version 1.3.13
+
+ -- Nilesh Patra <nilesh at debian.org>  Sun, 21 Mar 2021 00:23:28 +0530
+
 kma (1.3.11-1~0exp0) experimental; urgency=medium
 
   * New upstream version 1.3.11


=====================================
debian/control
=====================================
@@ -1,6 +1,6 @@
 Source: kma
 Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>, Nilesh Patra <npatra974 at gmail.com>
+Uploaders: Andreas Tille <tille at debian.org>, Nilesh Patra <nilesh at debian.org>
 Section: science
 Priority: optional
 Build-Depends: debhelper-compat (= 13),


=====================================
frags.c
=====================================
@@ -85,13 +85,15 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
 	/* copy seq */
 	update = (char *) dest->next;
 	++q_len;
+	--qseq;
 	while(--q_len) {
-		*update++ = bases[*qseq++];
+		*update++ = bases[*++qseq];
 	}
 	avail -= check;
 	
 	check = 33;
 	if(avail < check) {
+		dest->bytes = avail;
 		writeGzFileBuff(dest);
 		avail = dest->bytes;
 		update = (char *) dest->next;
@@ -102,6 +104,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
 	
 	for(i = 1; i < bestHits; ++i) {
 		if(avail < 11) {
+			dest->bytes = avail;
 			writeGzFileBuff(dest);
 			avail = dest->bytes;
 			update = (char *) dest->next;
@@ -112,6 +115,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
 	}
 	
 	if(avail < 11) {
+		dest->bytes = avail;
 		writeGzFileBuff(dest);
 		avail = dest->bytes;
 		update = (char *) dest->next;
@@ -121,6 +125,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
 	update += check;
 	for(i = 1; i < bestHits; ++i) {
 		if(avail < 11) {
+			dest->bytes = avail;
 			writeGzFileBuff(dest);
 			avail = dest->bytes;
 			update = (char *) dest->next;
@@ -131,6 +136,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
 	}
 	
 	if(avail < 11) {
+		dest->bytes = avail;
 		writeGzFileBuff(dest);
 		avail = dest->bytes;
 		update = (char *) dest->next;
@@ -139,7 +145,8 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
 	avail -= check;
 	update += check;
 	for(i = 1; i < bestHits; ++i) {
-		if(avail < 11) {
+		if(avail < 12) {
+			dest->bytes = avail;
 			writeGzFileBuff(dest);
 			avail = dest->bytes;
 			update = (char *) dest->next;
@@ -151,6 +158,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
 	
 	check = header->len + 1;
 	if(avail < check) {
+		dest->bytes = avail;
 		writeGzFileBuff(dest);
 		avail = dest->bytes;
 		update = (char *) dest->next;


=====================================
index.c
=====================================
@@ -231,8 +231,9 @@ int index_main(int argc, char *argv[]) {
 				} else if(kmersize == 0) {
 					fprintf(stderr, "# Invalid kmersize parsed, using default\n");
 					kmersize = 16;
-				} else if(kmersize > 32) {
-					kmersize = 32;
+				} else if(kmersize > 31) {
+					fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+					exit(1);
 				}
 				kmerindex = kmersize;
 			}
@@ -246,8 +247,9 @@ int index_main(int argc, char *argv[]) {
 				} else if(kmersize == 0) {
 					fprintf(stderr, "# Invalid kmersize parsed, using default\n");
 					kmersize = 16;
-				} else if(kmersize > 32) {
-					kmersize = 32;
+				} else if(kmersize > 31) {
+					fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+					exit(1);
 				}
 			}
 		} else if(strcmp(argv[args], "-k_i") == 0) {
@@ -260,8 +262,9 @@ int index_main(int argc, char *argv[]) {
 				} else if(kmerindex == 0) {
 					fprintf(stderr, "# Invalid kmersize parsed, using default\n");
 					kmerindex = 16;
-				} else if(kmerindex > 32) {
-					kmerindex = 32;
+				} else if(kmerindex > 31) {
+					fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+					exit(1);
 				}
 			}
 		} else if(strcmp(argv[args], "-CS") == 0) {


=====================================
kma.c
=====================================
@@ -207,12 +207,11 @@ int kma_main(int argc, char *argv[]) {
 	static unsigned xml, nc, nf, shm, exhaustive, verbose;
 	static char *outputfilename, *templatefilename, **templatefilenames;
 	static char **inputfiles, **inputfiles_PE, **inputfiles_INT, ss;
-	static double ID_t, scoreT, coverT, mrc, evalue;
+	static double ID_t, scoreT, coverT, mrc, evalue, minFrac, support;
 	static Penalties *rewards;
 	int i, j, args, exe_len, fileCount, size, escape, tmp, step1, step2;
 	unsigned totFrags;
 	char *to2Bit, *exeBasic, *myTemplatefilename;
-	double support;
 	FILE *templatefile, *ioStream;
 	time_t t0, t1;
 	Qseqs qseq;
@@ -254,6 +253,8 @@ int kma_main(int argc, char *argv[]) {
 		kmersize = 0;
 		minlen = 16;
 		evalue = 0.05;
+		support = 0.0;
+		minFrac = 1.0;
 		exhaustive = 0;
 		shm = 0;
 		mq = 0;
@@ -498,8 +499,9 @@ int kma_main(int argc, char *argv[]) {
 					} else if(kmersize == 0) {
 						fprintf(stderr, "# Invalid kmersize parsed, using default\n");
 						kmersize = 16;
-					} else if(kmersize > 32) {
-						kmersize = 32;
+					} else if(kmersize > 31) {
+						fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+						exit(1);
 					}
 				}
 			} else if(strcmp(argv[args], "-ml") == 0) {
@@ -590,8 +592,8 @@ int kma_main(int argc, char *argv[]) {
 				one2one = 0;
 			} else if(strcmp(argv[args], "-proxi") == 0) {
 				if(++args < argc) {
-					support = strtod(argv[args], &exeBasic);
-					if(*exeBasic != 0 || support < 0 || 1 < support) {
+					minFrac = strtod(argv[args], &exeBasic);
+					if(*exeBasic != 0 || minFrac < 0 || 1 < minFrac) {
 						fprintf(stderr, "Invalid argument at \"-proxi\".\n");
 						exit(1);
 					} else {
@@ -603,13 +605,13 @@ int kma_main(int argc, char *argv[]) {
 						getF = &getF_Proxi;
 						getR = &getR_Proxi;
 						getChainTemplates = &getProxiChainTemplates;
-						getMatch((int *)(&support), 0);
-						getMatchSparse((int *)(&support), 0, 0, 0, 0, 0);
-						getSecondPen((int *)(&support), 0, 0, 0, 0, 0, 0, 0);
-						getF((int *)(&support), 0, 0, 0, 0);
-						ankerAndClean((int *)(&support), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-						ankerAndClean_MEM((int *)(&support), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-						getProxiChainTemplates(0, (const Penalties *)(&support), 0, 0, 0, 0, 0);
+						getMatch((int *)(&minFrac), 0);
+						getMatchSparse((int *)(&minFrac), 0, 0, 0, 0, 0);
+						getSecondPen((int *)(&minFrac), 0, 0, 0, 0, 0, 0, 0);
+						getF((int *)(&minFrac), 0, 0, 0, 0);
+						ankerAndClean((int *)(&minFrac), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+						ankerAndClean_MEM((int *)(&minFrac), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+						getProxiChainTemplates(0, (const Penalties *)(&minFrac), 0, 0, 0, 0, 0);
 					}
 				} else {
 					fprintf(stderr, "Need argument at: \"-proxi\".\n");
@@ -633,7 +635,7 @@ int kma_main(int argc, char *argv[]) {
 			} else if(strcmp(argv[args], "-p") == 0 || strcmp(argv[args], "-e") == 0) {
 				if(++args < argc) {
 					evalue = strtod(argv[args], &exeBasic);
-					if(*exeBasic != 0) {
+					if(*exeBasic != 0 || evalue < 0 || 1.0 < evalue) {
 						fprintf(stderr, "Invalid argument at \"%s\".\n", argv[--args]);
 						exit(1);
 					}
@@ -642,7 +644,7 @@ int kma_main(int argc, char *argv[]) {
 				if(++args < argc && argv[args][0] != '-') {
 					significantBase = &significantAndSupport;
 					support = strtod(argv[args], &exeBasic);
-					if(*exeBasic != 0 || 1 < support) {
+					if(*exeBasic != 0 || support < 0 || 1 < support) {
 						fprintf(stderr, "Invalid argument at \"-bc\".\n");
 						exit(1);
 					} else {
@@ -1285,7 +1287,7 @@ int kma_main(int argc, char *argv[]) {
 	} else if(Mt1) {
 		myTemplatefilename = smalloc(strlen(templatefilename) + 64);
 		strcpy(myTemplatefilename, templatefilename);
-		runKMA_Mt1(myTemplatefilename, outputfilename, strjoin(argv, argc), kmersize, minlen, rewards, ID_t, mq, scoreT, mrc, evalue, bcd, Mt1, ref_fsa, print_matrix, vcf, xml, sam, nc, nf, thread_num);
+		runKMA_Mt1(myTemplatefilename, outputfilename, strjoin(argv, argc), kmersize, minlen, rewards, ID_t, mq, scoreT, mrc, evalue, support, bcd, Mt1, ref_fsa, print_matrix, vcf, xml, sam, nc, nf, thread_num);
 		free(myTemplatefilename);
 		fprintf(stderr, "# Closing files\n");
 	} else if(step2) {
@@ -1304,11 +1306,11 @@ int kma_main(int argc, char *argv[]) {
 		myTemplatefilename = smalloc(strlen(templatefilename) + 64);
 		strcpy(myTemplatefilename, templatefilename);
 		if(spltDB == 0 && targetNum != 1) {
-			status |= runKMA_spltDB(templatefilenames, targetNum, outputfilename, argc, argv, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
+			status |= runKMA_spltDB(templatefilenames, targetNum, outputfilename, argc, argv, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, support, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
 		} else if(mem_mode) {
-			status |= runKMA_MEM(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
+			status |= runKMA_MEM(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, support, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
 		} else {
-			status |= runKMA(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
+			status |= runKMA(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, support, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
 		}
 		free(myTemplatefilename);
 		fprintf(stderr, "# Closing files\n");


=====================================
mt1.c
=====================================
@@ -17,6 +17,7 @@
  * limitations under the License.
 */
 #define _XOPEN_SOURCE 600
+#include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -80,9 +81,9 @@ void printFsa_pairMt1(Qseqs *header, Qseqs *qseq, Qseqs *header_r, Qseqs *qseq_r
 	
 }
 
-void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num) {
+void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num) {
 	
-	int i, j, aln_len, t_len, coverScore, file_len, DB_size, delta;
+	int i, j, aln_len, t_len, coverScore, file_len, DB_size, delta, seq_in;
 	int *template_lengths;
 	long unsigned read_score, seeker;
 	double p_value, id, q_id, cover, q_cover;
@@ -213,10 +214,13 @@ void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int
 	}
 	
 	strcat(templatefilename, ".seq.b");
-	DB_file = sfopen(templatefilename, "rb");
+	seq_in = open(templatefilename, O_RDONLY);
+	if(seq_in == -1) {
+		ERROR();
+	}
 	templatefilename[file_len] = 0;
-	template_index = alignLoad_fly(0, fileno(DB_file), *template_lengths, kmersize, seeker);
-	fclose(DB_file);
+	template_index = alignLoad_fly(0, seq_in, *template_lengths, kmersize, seeker);
+	close(seq_in);
 	
 	/* get name */
 	strcat(templatefilename, ".name");
@@ -427,7 +431,7 @@ void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int
 				updateMatrix(matrix_out, thread->template_name, template_index->seq, matrix, t_len);
 			}
 			if(vcf) {
-				updateVcf(thread->template_name, template_index->seq, evalue, t_len, matrix, vcf, vcf_out);
+				updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
 			}
 		}
 		/* destroy this DB index */


=====================================
mt1.h
=====================================
@@ -24,4 +24,4 @@
 
 void printFsaMt1(Qseqs *header, Qseqs *qseq, CompDNA *compressor, FILE *out);
 void printFsa_pairMt1(Qseqs *header, Qseqs *qseq, Qseqs *header_r, Qseqs *qseq_r, CompDNA *compressor, FILE *out);
-void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num);
+void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num);


=====================================
nw.c
=====================================
@@ -45,6 +45,7 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
 		t_len += aligned->pos;
 	}
 	query = (unsigned char*)(queryOrg + q_s);
+	Stat.pos = 0;
 	
 	if(t_len == 0 || q_len == 0) {
 		if(t_len == q_len) {
@@ -109,7 +110,6 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
 	E = matrices->E;
 	thisScore = (t_len + q_len) * (MM + U + W1);
 	Stat.score = thisScore;
-	Stat.pos = 0;
 	if(0 < k) {
 		E_ptr = E;
 		for(m = 0; m < t_len; ++m) {
@@ -331,6 +331,7 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
 		t_len += template_length;
 	}
 	query = (unsigned char*)(queryOrg + q_s);
+	Stat.pos = 0;
 	
 	if(t_len == 0 || q_len == 0) {
 		if(t_len == q_len) {
@@ -402,7 +403,6 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
 	E = matrices->E;
 	thisScore = (t_len + q_len) * (MM + U + W1);
 	Stat.score = thisScore;
-	Stat.pos = 0;
 	E_ptr = E + (t_len * (bq_len + 1));
 	c_pos = (t_len + q_len) >> 1;
 	
@@ -661,6 +661,7 @@ AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg,
 		t_len += template_length;
 	}
 	query = (unsigned char*)(queryOrg + q_s);
+	Stat.pos = 0;
 	
 	if(t_len == 0 || q_len == 0) {
 		if(t_len == q_len) {
@@ -709,7 +710,6 @@ AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg,
 	E = matrices->E;
 	thisScore = (t_len + q_len) * (MM + U + W1);
 	Stat.score = thisScore;
-	Stat.pos = 0;
 	if(0 < k) {
 		E_ptr = E;
 		for(m = 0; m < t_len; ++m) {
@@ -912,6 +912,7 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
 		t_len += template_length;
 	}
 	query = (unsigned char*)(queryOrg + q_s);
+	Stat.pos = 0;
 	
 	if(t_len == 0 || q_len == 0) {
 		if(t_len == q_len) {
@@ -967,7 +968,6 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
 	E = matrices->E;
 	thisScore = (t_len + q_len) * (MM + U + W1);
 	Stat.score = thisScore;
-	Stat.pos = 0;
 	E_ptr = E + (t_len * (bq_len + 1));
 	c_pos = (t_len + q_len) >> 1;
 	


=====================================
runkma.c
=====================================
@@ -17,6 +17,7 @@
  * limitations under the License.
 */
 #define _XOPEN_SOURCE 600
+#include <fcntl.h>
 #include <limits.h>
 #include <pthread.h>
 #include <stdio.h>
@@ -135,7 +136,7 @@ char * nameLoad(Qseqs *name, FILE *infile) {
 	return (char *) name->seq;
 }
 
-int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
+int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
 	
 	int i, j, tmp_template, tmp_tmp_template, file_len, bestTemplate, tot;
 	int template, bestHits, t_len, start, end, aln_len, status, rand, sparse;
@@ -149,7 +150,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
 	long unsigned *w_scores, *uniq_alignment_scores, *alignment_scores;
 	double tmp_score, bestScore, id, q_id, cover, q_cover, p_value;
 	long double depth, expected, q_value;
-	FILE *inputfile, *frag_in_raw, *seq_in, *res_out, *name_file;
+	FILE *inputfile, *frag_in_raw, *res_out, *name_file;
 	FILE *alignment_out, *consensus_out, *frag_out_raw, **template_fragments;
 	FILE *extendedFeatures_out, *xml_out;
 	time_t t0, t1;
@@ -191,11 +192,14 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
 	
 	/* load databases */
 	strcat(templatefilename, ".seq.b");
-	seq_in = sfopen(templatefilename, "rb");
-	fseek(seq_in, 0, SEEK_END);
-	seqin_size = 4 * ftell(seq_in);
-	fseek(seq_in, 0, SEEK_SET);
-	seq_in_no = fileno(seq_in);
+	seq_in_no = open(templatefilename, O_RDONLY);
+	if(seq_in_no == -1) {
+		ERROR();
+	}
+	seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
+	if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
+		ERROR();
+	}
 	templatefilename[file_len] = 0;
 	templates_index = calloc(DB_size, sizeof(HashMapCCI*));
 	if(!templates_index) {
@@ -1254,7 +1258,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
 						printExtendedFeatures(thread->template_name, aligned_assem, fragmentCounts[template], readCounts[template], extendedFeatures_out);
 					}
 					if(vcf) {
-						updateVcf(thread->template_name, templates_index[template]->seq, evalue, t_len, matrix, vcf, vcf_out);
+						updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
 					}
 				}
 			} else {
@@ -1326,7 +1330,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
 	return status;
 }
 
-int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
+int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
 	
 	/* runKMA_MEM is a memory saving version of runKMA,
 	   at the cost it chooses best templates based on kmers
@@ -1344,7 +1348,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
 	long unsigned *w_scores, *uniq_alignment_scores, *alignment_scores;
 	double tmp_score, bestScore, id, cover, q_id, q_cover, p_value;
 	long double depth, q_value, expected;
-	FILE *inputfile, *frag_in_raw, *seq_in, *res_out, *name_file;
+	FILE *inputfile, *frag_in_raw, *res_out, *name_file;
 	FILE *alignment_out, *consensus_out, *frag_out_raw, **template_fragments;
 	FILE *extendedFeatures_out, *xml_out;
 	time_t t0, t1;
@@ -1395,11 +1399,11 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
 		kmersize = 16;
 	}
 	strcat(templatefilename, ".seq.b");
-	seq_in = sfopen(templatefilename, "rb");
-	fseek(seq_in, 0, SEEK_END);
-	seqin_size = 4 * ftell(seq_in);
-	fseek(seq_in, 0, SEEK_SET);
-	seq_in_no = fileno(seq_in);
+	seq_in_no = open(templatefilename, O_RDONLY);
+	if(seq_in_no == -1) {
+		ERROR();
+	}
+	seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
 	if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
 		ERROR();
 	}
@@ -1502,6 +1506,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
 	while((rc_flag = get_ankers(matched_templates, qseq_comp, header, &flag, inputfile)) != 0) {
 		if(*matched_templates) { // SE
 			read_score = 0;
+			qseq_r->len = 0;
 		} else { // PE
 			read_score = get_ankers(matched_templates, qseq_r_comp, header_r, &flag_r, inputfile);
 			read_score = labs(read_score);
@@ -1536,7 +1541,6 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
 			if(rc_flag < 0 && 0 < matched_templates[*matched_templates]) {
 				bestHits = -bestHits;
 			}
-			
 			if(read_score && kmersize <= qseq_r->len) {
 				unCompDNA(qseq_r_comp, qseq_r->seq);
 				update_Scores_pe(qseq->seq, qseq->len, qseq_r->seq, qseq_r->len, bestHits, best_read_score + read_score, best_start_pos, best_end_pos, bestTemplates, header, header_r, flag, flag_r, alignment_scores, uniq_alignment_scores, frag_out_raw);
@@ -2420,7 +2424,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
 						printExtendedFeatures(thread->template_name, aligned_assem, fragmentCounts[template], readCounts[template], extendedFeatures_out);
 					}
 					if(vcf) {
-						updateVcf(thread->template_name, thread->template_index->seq, evalue, t_len, matrix, vcf, vcf_out);
+						updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
 					}
 				}
 			} else {
@@ -2475,7 +2479,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
 	}
 	
 	/* Close files */
-	fclose(seq_in);
+	close(seq_in_no);
 	fclose(res_out);
 	if(alignment_out) {
 		fclose(alignment_out);


=====================================
runkma.h
=====================================
@@ -26,5 +26,5 @@
 unsigned char * ustrdup(unsigned char *src, size_t n);
 int load_DBs_KMA(char *templatefilename, long unsigned **alignment_scores, long unsigned **uniq_alignment_scores, int **template_lengths, unsigned shm);
 char * nameLoad(Qseqs *name, FILE *infile);
-int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
-int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
+int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
+int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);


=====================================
savekmers.c
=====================================
@@ -1885,7 +1885,8 @@ int save_kmers_Sparse(const HashMapKMA *templates, const Penalties *rewards, int
 		}
 		
 		/* get best match(es) */
-		if(hitCounter * kmersize > (n_kmers - hitCounter)) {
+		/*if(hitCounter * kmersize > (n_kmers - hitCounter)) {*/
+		if(hitCounter) {
 			bestScore = getMatchSparse(bestTemplates, Score, kmersize, n_kmers, M, MM);
 			
 			/*
@@ -2007,7 +2008,8 @@ int save_kmers_Sparse(const HashMapKMA *templates, const Penalties *rewards, int
 		qseq->N[0]--;
 		
 		/* get best match(es) */
-		if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+		/*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+		if(hitCounter) {
 			bestScore = getMatch(bestTemplates, Score);
 		} else {
 			*bestTemplates = clearScore(bestTemplates, Score);
@@ -2258,8 +2260,9 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
 	qseq->N[0]--;
 	
 	/* get best match(es) */
-	//clearScore(bestTemplates, extendScore);
-	if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+	/*clearScore(bestTemplates, extendScore);*/
+	/*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+	if(hitCounter) {
 		bestScore = getMatch(bestTemplates, Score);
 		/*
 		bestHits = 0;
@@ -2549,8 +2552,9 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 		}
 		
 		/* get best match(es) */
-		//clearScore(bestTemplates, extendScore);
-		if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+		/* clearScore(bestTemplates, extendScore); */
+		/* if(hitCounter * kmersize > (end - hitCounter - kmersize)) { */
+		if(hitCounter) {
 			bestScore = getMatch(bestTemplates, Score);
 			/*
 			bestHits = 0;
@@ -2791,8 +2795,9 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 		}
 		
 		/* get best match(es) */
-		//clearScore(bestTemplates_r, extendScore);
-		if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+		/*clearScore(bestTemplates_r, extendScore);*/
+		/*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+		if(hitCounter) {
 			bestScore_r = getMatch(bestTemplates_r, Score_r);
 			
 			/*
@@ -2953,7 +2958,8 @@ int save_kmers_count(const HashMapKMA *templates, const Penalties *rewards, int
 		reps = 0;
 		
 		/* get best match(es) */
-		if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+		/*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+		if(hitCounter) {
 			
 			bestScore = getMatch(bestTemplates, Score);
 			
@@ -3064,7 +3070,8 @@ int save_kmers_count(const HashMapKMA *templates, const Penalties *rewards, int
 		reps = 0;
 		
 		/* get best match(es) */
-		if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+		/*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+		if(hitCounter) {
 			bestScore_r = getMatch(bestTemplates_r, Score_r);
 			
 			/*
@@ -3135,7 +3142,8 @@ int save_kmers_unionPair(const HashMapKMA *templates, const Penalties *rewards,
 	}
 	
 	/* get forward */
-	if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq, extendScore, exhaustive)) && (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {
+	/*if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq, extendScore, exhaustive)) && (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {*/
+	if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq, extendScore, exhaustive))) {
 		/* got hits */
 		bestScore = getF(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates);
 		
@@ -3160,7 +3168,8 @@ int save_kmers_unionPair(const HashMapKMA *templates, const Penalties *rewards,
 	*extendScore = 1;
 	
 	/* get reverse */
-	if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq_r, extendScore, exhaustive)) && (qseq_r->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {
+	/*if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq_r, extendScore, exhaustive)) && (qseq_r->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {*/
+	if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq_r, extendScore, exhaustive))) {
 		if(bestScore) {
 			bestScore_r = getR(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates);
 		} else {
@@ -3562,8 +3571,10 @@ int save_kmers_forcePair(const HashMapKMA *templates, const Penalties *rewards,
 	*extendScore = 1;
 	
 	/* get reverse */
-	if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) && 
+	/*if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) && 
 		(qseq->seqlen + qseq_r->seqlen - hitCounter - hitCounter_r - (kmersize << 1)) < (hitCounter + hitCounter_r) * kmersize && 
+	(bestScore = getSecondForce(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates, regionScores))) {*/
+	if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) && 
 	(bestScore = getSecondForce(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates, regionScores))) {
 		
 		if((qseq->seqlen + qseq_r->seqlen - bestScore) < bestScore * kmersize) {
@@ -3952,8 +3963,9 @@ int save_kmers_HMM(const HashMapKMA *templates, const Penalties *rewards, int *b
 			stop = j + kmersize - 1;
 			
 			/* evaluate hit */
-			if(hitCounter > 0 && (hitCounter * kmersize > (stop - start - hitCounter + kmersize)) 
-				&& ((stop - start) > minLen || start == 0 || stop == seqlen)) {
+			/*if(hitCounter > 0 && (hitCounter * kmersize > (stop - start - hitCounter + kmersize)) 
+				&& ((stop - start) > minLen || start == 0 || stop == seqlen)) {*/
+			if(hitCounter > 0 && ((stop - start) > minLen || start == 0 || stop == seqlen)) {
 				if(deCon) {
 					if(SU) {
 						for(k = start; k < j; ++k) {


=====================================
spltdb.c
=====================================
@@ -17,6 +17,7 @@
  * limitations under the License.
 */
 #define _XOPEN_SOURCE 600
+#include <fcntl.h>
 #include <limits.h>
 #include <pthread.h>
 #include <stdio.h>
@@ -395,7 +396,7 @@ unsigned get_ankers_spltDB(int *infoSize, int *out_Tem, CompDNA *qseq, Qseqs *he
 	return num;
 }
 
-int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
+int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
 	
 	/* https://www.youtube.com/watch?v=LtXEMwSG5-8 */
 	
@@ -414,7 +415,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 	double tmp_score, bestScore, id, cover, q_id, q_cover, p_value;
 	long double depth, q_value, expected;
 	char *templatefilename, Date[11];
-	FILE **inputfiles, *inputfile, *frag_in_raw, *seq_in;
+	FILE **inputfiles, *inputfile, *frag_in_raw;
 	FILE *res_out, *alignment_out, *consensus_out, *frag_out_raw, *xml_out;
 	FILE *extendedFeatures_out, *name_file, **template_fragments;
 	time_t t0, t1;
@@ -1376,7 +1377,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 		templatefilename = 0;
 		initialiseVcf(vcf_out, templatefilename);
 	}
-	seq_in = 0;
+	seq_in_no = 0;
 	
 	/* preallocate assembly matrices */
 	matrix = smalloc(sizeof(AssemInfo));
@@ -1434,7 +1435,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 		thread->bcd = bcd;
 		thread->sam = sam;
 		thread->ef = extendedFeatures;
-		thread->seq_in = fileno(seq_in);
+		thread->seq_in = seq_in_no;
 		thread->kmersize = kmersize;
 		thread->template = -2;
 		thread->file_count = fileCount;
@@ -1499,7 +1500,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 	thread->bcd = bcd;
 	thread->sam = sam;
 	thread->ef = extendedFeatures;
-	thread->seq_in = fileno(seq_in);
+	thread->seq_in = seq_in_no;
 	thread->kmersize = kmersize;
 	thread->template = 0;
 	thread->file_count = fileCount;
@@ -1538,11 +1539,11 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 	templatefilename = *templatefilenames++;
 	file_len = strlen(templatefilename);
 	strcat(templatefilename, ".seq.b");
-	seq_in = sfopen(templatefilename, "rb");
-	fseek(seq_in, 0, SEEK_END);
-	seqin_size = 4 * ftell(seq_in);
-	fseek(seq_in, 0, SEEK_SET);
-	seq_in_no = fileno(seq_in);
+	seq_in_no = open(templatefilename, O_RDONLY);
+	if(seq_in_no == -1) {
+		ERROR();
+	}
+	seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
 	if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
 		ERROR();
 	}
@@ -1573,12 +1574,16 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 			name_file = sfopen(templatefilename, "rb");
 			templatefilename[file_len] = 0;
 			strcat(templatefilename, ".seq.b");
-			fclose(seq_in);
-			seq_in = sfopen(templatefilename, "rb");
-			fseek(seq_in, 0, SEEK_END);
-			seqin_size = 4 * ftell(seq_in);
-			fseek(seq_in, 0, SEEK_SET);
-			seq_in_no = fileno(seq_in);
+			close(seq_in_no);
+			seq_in_no = open(templatefilename, O_RDONLY);
+			if(seq_in_no == -1) {
+				ERROR();
+			}
+			seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
+			if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
+				ERROR();
+			}
+			thread->seq_in = seq_in_no;
 			templatefilename[file_len] = 0;
 			seq_seeker = 0;
 			if(extendedFeatures == 2) {
@@ -1665,7 +1670,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 						printExtendedFeatures(thread->template_name, aligned_assem, fragmentCounts[template], readCounts[template], extendedFeatures_out);
 					}
 					if(vcf) {
-						updateVcf(thread->template_name, thread->template_index->seq, evalue, t_len, matrix, vcf, vcf_out);
+						updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
 					}
 				}
 			} else {
@@ -1714,7 +1719,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 	hashMapCCI_destroy(thread->template_index);
 	
 	/* Close files */
-	fclose(seq_in);
+	close(seq_in_no);
 	fclose(res_out);
 	if(alignment_out) {
 		fclose(alignment_out);


=====================================
spltdb.h
=====================================
@@ -35,4 +35,4 @@ struct spltDBbuff {
 int print_ankers_spltDB(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out);
 int print_ankers_Sparse_spltDB(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out);
 unsigned get_ankers_spltDB(int *infoSize, int *out_Tem, CompDNA *qseq, Qseqs *header, int *flag, FILE *inputfile);
-int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
+int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);


=====================================
vcf.c
=====================================
@@ -94,10 +94,19 @@ void initialiseVcf(FileBuff *fileP, char *templateFilename) {
 	
 }
 
-void updateVcf(char *template_name, long unsigned *template_seq, double evalue, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP) {
+void updateVcf(char *template_name, unsigned char *template_seq, double evalue, double support, int bcd, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP) {
 	
-	static const char *PASS = "PASS", *FAIL = "FAIL", *LowQual = "LowQual", *UNKNOWN = ".";
-	int i, j, pos, bestScore, depthUpdate, bestBaseScore, nucNum;
+	static const char nuc2num[256] = {
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 0, 8, 1, 8, 8, 8, 2, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 0, 8, 1, 8, 8, 8, 2, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
+	const char *PASS = "PASS", *FAIL = "FAIL", *LowQual = "LowQual", *UNKNOWN = ".";
+	int i, pos, nextPos, bestScore, depthUpdate, bestBaseScore, nucNum;
 	int template_name_length, check, avail, DP, AD, DEL, QUAL;
 	double AF, RAF, Q, P;
 	const double lnConst = -10 / log(10);
@@ -115,79 +124,84 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
 	update = (char *) fileP->next;
 	avail = fileP->bytes;
 	assembly = matrix->assmb;
-	pos = 0;
-	i = 0;
+	nextPos = 0;
 	do {
-		/* does not handle insertions yet */
+		pos = nextPos;
+		nextPos = assembly[pos].next;
+		
+		/* handle insertions now */
+		nuc = *template_seq++;
 		if(pos < t_len) {
-			nuc = bases[getNuc(template_seq, pos)];
 			++i;
-		} else {
+		} else if(nuc != '-') {
+			--template_seq;
 			nuc = '-';
 		}
+		
 		/* call base */
-		nucNum = 0;
-		bestNuc = 5;
-		bestScore = 0;
+		bestNuc = nuc2num[nuc];
+		bestScore = assembly[pos].counts[bestNuc];
 		depthUpdate = 0;
-		for(j = 0; j < 5; ++j) {
-			if(bestScore < assembly[pos].counts[j]) {
-				bestScore = assembly[pos].counts[j];
-				bestNuc = j;
+		for(i = 0; i < 6; ++i) {
+			if(bestScore < assembly[pos].counts[i]) {
+				bestScore = assembly[pos].counts[i];
+				bestNuc = i;
 			}
-			depthUpdate += assembly[pos].counts[j];
-		}
-		if(bestScore < assembly[pos].counts[j]) {
-			bestScore = assembly[pos].counts[j];
-			bestNuc = j;
+			depthUpdate += assembly[pos].counts[i];
 		}
-		depthUpdate += assembly[pos].counts[j];
 		nucNum = bestNuc;
 		bestNuc = bases[bestNuc];
 		
 		/* Check for minor base call */
-		if((bestScore << 1) < depthUpdate) {
+		if(!depthUpdate) {
+			nucNum = 5;
+			bestNuc = '-';
+		} else if((bestScore << 1) < depthUpdate) {
 			if(bestNuc == '-') {
 				bestBaseScore = 0;
 				bestNuc = 4;
-				for(j = 0; j < 5; ++j) {
-					if(bestBaseScore < assembly[pos].counts[j]) {
-						bestBaseScore = assembly[pos].counts[j];
-						bestNuc = j;
+				for(i = 0; i < 5; ++i) {
+					if(bestBaseScore < assembly[pos].counts[i]) {
+						bestBaseScore = assembly[pos].counts[i];
+						bestNuc = i;
 					}
 				}
+				nucNum = bestNuc;
 				bestNuc = tolower(bases[bestNuc]);
 			} else {
 				bestNuc = tolower(bestNuc);
 			}
 			bestScore = depthUpdate - assembly[pos].counts[5];
+		} else if(depthUpdate < bcd) {
+			/* too low depth */
+			bestNuc = tolower(bestNuc);
 		}
 		
 		if(bestScore) {
 			/* determine base at current position */
 			bestNuc = baseCall(bestNuc, nuc, bestScore, depthUpdate, evalue, &assembly[pos]);
+			nucNum = nuc2num[bestNuc];
+			
+			/* INFO */
+			DP = depthUpdate;
+			AD = assembly[pos].counts[nucNum];
+			AF = (double) AD / DP;
+			RAF = (double) bestScore / DP;
+			DEL = assembly[pos].counts[5];
+			Q = pow(depthUpdate - (bestScore << 1), 2) / depthUpdate;
+			P = p_chisqr(Q);
 			
 			/* discard unimportant changes */
-			if(nuc != toupper(bestNuc)) {
-				/* INFO */
-				DP = depthUpdate;
-				AD = assembly[pos].counts[nucNum];
-				AF = (double) AD / DP;
-				RAF = (double) bestScore / DP;
-				DEL = assembly[pos].counts[5];
-				
-				/* FORMAT */
-				Q = pow(depthUpdate - (bestScore << 1), 2) / depthUpdate;
-				P = p_chisqr(Q);
+			if(nuc != bestNuc || (t_len <= nextPos && *template_seq == '-') || DP < bcd || evalue < P || AD < support * DP) {
 				/* QUAL */
 				//QUAL = lnConst * log(P);
 				QUAL = lnConst * log(binP(DP, AD, 0.25));
 				QUAL = (QUAL < 0 || 3079 < QUAL) ? 3079 : QUAL;
 				
 				/* FILTER */
-				if(isupper(bestNuc)) {
+				if(bcd <= DP && P <= evalue && support * DP <= AD) {
 					FILTER = (char *) PASS;
-				} else if(P <= evalue || (9 * depthUpdate) <= (10 * bestScore)) {
+				} else if(bcd <= DP || P <= evalue || support * DP <= AD) {
 					FILTER = (char *) LowQual;
 				} else {
 					FILTER = (char *) FAIL;
@@ -204,8 +218,8 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
 				update += template_name_length; avail -= template_name_length;
 				*update++ = '\t'; --avail;
 				
-				if(nuc != '-') {
-					check = sprintf(update, "%d", i);
+				if(pos < t_len) {
+					check = sprintf(update, "%d", pos + 1);
 					update += check; avail -= check;
 				} else {
 					*update++ = '0';
@@ -227,7 +241,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
 				}
 				*update++ = '\t';
 				--avail;
-				if(nuc != '-' && bestNuc == '-') {
+				if(bestNuc == '-') {
 					*update++ = '<';
 					*update++ = bestNuc;
 					*update++ = '>';
@@ -244,7 +258,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
 				check = sprintf(update, "Q:P:FT\t%.2f:%4.1e:%s\n", Q, P, FILTER);
 				update += check; avail -= check;
 			}
-		} else if(nuc != '-') {
+		} else if(pos < t_len) {
 			FILTER = (char *) FAIL;
 			if(avail < template_name_length + 105) {
 				fileP->bytes = avail;
@@ -252,7 +266,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
 				avail = fileP->bytes;
 				update = (char *) fileP->next;
 			}
-			check = sprintf(update, "%s\t%d\t.\t%c\t%c\t%d\t%s\t", template_name, i, nuc, '.', 0, *FILTER_ptr);
+			check = sprintf(update, "%s\t%d\t.\t%c\t%c\t%d\t%s\t", template_name, pos + 1, nuc, '.', 0, *FILTER_ptr);
 			update += check; avail -= check;
 			check = sprintf(update, "DP=%d;AD=%d;AF=%.2f;RAF=%.2f;DEL=%d;AD6=%d,%d,%d,%d,%d,%d\t", 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0, 0);
 			update += check; avail -= check;
@@ -260,7 +274,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
 			update += check; avail -= check;
 		
 		}
-	} while((pos = assembly[pos].next) != 0);
+	} while(nextPos != 0);
 	
 	fileP->next = (unsigned char *) update;
 	fileP->bytes = avail;


=====================================
vcf.h
=====================================
@@ -22,4 +22,4 @@
 
 char * noFolder(const char *src);
 void initialiseVcf(FileBuff *fileP, char *templateFilename);
-void updateVcf(char *template_name, long unsigned *template_seq, double evalue, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP);
+void updateVcf(char *template_name, unsigned char *template_seq, double evalue, double support, int bcd, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP);


=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
  * limitations under the License.
 */
 
-#define KMA_VERSION "1.3.11"
+#define KMA_VERSION "1.3.13"



View it on GitLab: https://salsa.debian.org/med-team/kma/-/compare/7c11cdb113ec37ebc8246f0214ab4d4e7b7d757b...6a0091892c30be3345014ce639fa793c408942ea

-- 
View it on GitLab: https://salsa.debian.org/med-team/kma/-/compare/7c11cdb113ec37ebc8246f0214ab4d4e7b7d757b...6a0091892c30be3345014ce639fa793c408942ea
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210320/a15a2e24/attachment-0001.htm>


More information about the debian-med-commit mailing list