[med-svn] [Git][med-team/kma][upstream] New upstream version 1.2.10a

Andreas Tille gitlab at salsa.debian.org
Thu Aug 1 13:08:42 BST 2019



Andreas Tille pushed to branch upstream at Debian Med / kma


Commits:
f5137c3d by Andreas Tille at 2019-08-01T11:50:45Z
New upstream version 1.2.10a
- - - - -


19 changed files:

- KMAspecification.pdf
- Makefile
- align.c
- compress.c
- frags.c
- index.c
- kma.c
- loadupdate.c
- loadupdate.h
- makeindex.c
- penalties.h
- runkma.c
- savekmers.c
- savekmers.h
- seq2fasta.c
- spltdb.c
- + tmp.c
- + tmp.h
- version.h


Changes:

=====================================
KMAspecification.pdf
=====================================
Binary files a/KMAspecification.pdf and b/KMAspecification.pdf differ


=====================================
Makefile
=====================================
@@ -1,5 +1,5 @@
 CFLAGS = -Wall -O3 -std=c99
-LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o update.o updateindex.o updatescores.o valueshash.o vcf.o
+LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
 PROGS = kma kma_index kma_shm kma_update
 
 .c .o:
@@ -43,7 +43,7 @@ hashmapkma.o: hashmapkma.h pherror.h
 hashmapkmers.o: hashmapkmers.h pherror.h
 hashtable.o: hashtable.h hashmapkma.h hashmapkmers.h pherror.h
 index.o: index.h compress.h decon.h hashmap.h hashmapkma.h loadupdate.h makeindex.h pherror.h stdstat.h version.h
-kma.o: kma.h ankers.h assembly.h chain.h hashmapkma.h kmers.h mt1.h penalties.h pherror.h qseqs.h runinput.h runkma.h savekmers.h sparse.h spltdb.h version.h
+kma.o: kma.h ankers.h assembly.h chain.h hashmapkma.h kmers.h mt1.h penalties.h pherror.h qseqs.h runinput.h runkma.h savekmers.h sparse.h spltdb.h tmp.h version.h
 kmapipe.o: kmapipe.h pherror.h
 kmers.o: kmers.h ankers.h compdna.h hashmapkma.h kmapipe.h pherror.h qseqs.h savekmers.h spltdb.h
 kmmap.o: kmmap.h hashmapkma.h
@@ -56,16 +56,17 @@ printconsensus.o: printconsensus.h assembly.h
 qseqs.o: qseqs.h pherror.h
 qualcheck.o: qualcheck.h compdna.h hashmap.h pherror.h stdnuc.h stdstat.h
 runinput.o: runinput.h compdna.h filebuff.h pherror.h qseqs.h seqparse.h
-runkma.o: runkma.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h stdnuc.h stdstat.h vcf.h
+runkma.o: runkma.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h stdnuc.h stdstat.h tmp.h vcf.h
 sam.o: sam.h nw.h pherror.h qseqs.h runkma.h
 savekmers.o: savekmers.h ankers.h compdna.h hashmapkma.h penalties.h pherror.h qseqs.h stdnuc.h stdstat.h threader.h
 seq2fasta.o: seq2fasta.h pherror.h qseqs.h runkma.h stdnuc.h
 seqparse.o: seqparse.h filebuff.h qseqs.h
 shm.o: shm.h pherror.h hashmapkma.h version.h
 sparse.o: sparse.h compkmers.h hashtable.h kmapipe.h pherror.h runinput.h savekmers.h stdnuc.h stdstat.h
-spltdb.o: spltdb.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h runkma.h stdnuc.h stdstat.h vcf.h
+spltdb.o: spltdb.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h runkma.h stdnuc.h stdstat.h tmp.h vcf.h
 stdnuc.o: stdnuc.h
 stdstat.o: stdstat.h
+tmp.o: tmp.h pherror.h threader.h
 update.o: update.h hashmapkma.h pherror.h stdnuc.h
 updateindex.o: updateindex.h compdna.h hashmap.h hashmapindex.h pherror.h qualcheck.h stdnuc.h pherror.h
 updatescores.o: updatescores.h qseqs.h


=====================================
align.c
=====================================
@@ -48,6 +48,7 @@ AlnScore KMA(const HashMap_index *template_index, const unsigned char *qseq, int
 	mask = 0;
 	mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
 	key = 0;
+	
 	/* circular, skip boundaries */
 	if(min < max) {
 		min = 0;
@@ -757,6 +758,7 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
 			if(end == -1) {
 				end = q_len;
 			}
+			
 			if(i < end - kmersize) {
 				key = makeKmer(qseq, i, kmersize - 1);
 				i += (kmersize - 1);
@@ -767,6 +769,7 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
 			while(i < end) {
 				key = ((key << 2) | qseq[i]) & mask;
 				value = hashMap_index_get_bound(template_index, key, 0, t_len, shifter);
+				
 				if(value == 0) {
 					++i;
 				} else if(0 < value) {
@@ -870,6 +873,8 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
 						
 						stop = hashMap_index_getNextDubPos(template_index, key, 0, t_len, stop, shifter);
 					}
+					/* add best anker score */
+					score_r += (bias - i);
 					i = bias + 1;
 					
 					/* update position */
@@ -883,6 +888,7 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
 			}
 			i = end + 1;
 		}
+		
 		if(bestScore < score_r) {
 			bestScore = score_r;
 		}


=====================================
compress.c
=====================================
@@ -702,6 +702,7 @@ HashMapKMA * compressKMA_megaDB(HashMap *templates, FILE *out) {
 		fprintf(stderr, "# Overflow bypassed.\n");
 	}
 	free(templates->values);
+	
 	/* convert valuesHash to a linked list */
 	table = 0;
 	i = shmValues->size;


=====================================
frags.c
=====================================
@@ -24,6 +24,7 @@
 #include "pherror.h"
 #include "qseqs.h"
 #include "threader.h"
+#include "tmp.h"
 
 FILE * printFrags(Frag **alignFrags, int DB_size) {
 	
@@ -31,7 +32,7 @@ FILE * printFrags(Frag **alignFrags, int DB_size) {
 	FILE *OUT;
 	Frag *alignFrag, *next;
 	
-	if(!(OUT = tmpfile())) {
+	if(!(OUT = tmpF(0))) {
 		fprintf(stderr, "Could not create tmp files.\n");
 		ERROR();
 	}


=====================================
index.c
=====================================
@@ -497,9 +497,13 @@ int index_main(int argc, char *argv[]) {
 	if(templatefilename != 0) {
 		/* load */
 		fprintf(stderr, "# Loading database: %s\n", outputfilename);
-		finalDB = load_DBs(templatefilename, outputfilename, &template_lengths, &template_ulengths, &template_slengths);
+		finalDB = smalloc(sizeof(HashMapKMA));
+		kmerindex = load_DBs(templatefilename, outputfilename, &template_lengths, &template_ulengths, &template_slengths, finalDB);
 		kmersize = finalDB->kmersize;
-		kmerindex = *template_lengths;
+		mask = 0;
+		mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
+		prefix = finalDB->prefix;
+		prefix_len = finalDB->prefix_len;
 		
 		/* determine params based on loaded DB */
 		if(prefix_len == 0 && prefix == 0) {
@@ -598,7 +602,6 @@ int index_main(int argc, char *argv[]) {
 				*template_lengths = 1024;
 			}
 		}
-		
 		fprintf(stderr, "# Indexing databases.\n");
 		t0 = clock();
 		makeDB(templates, kmerindex, inputfiles, filecount, outputfilename, appender, to2Bit, MinLen, MinKlen, homQ, homT, &template_lengths, &template_ulengths, &template_slengths);


=====================================
kma.c
=====================================
@@ -41,6 +41,7 @@
 #include "sparse.h"
 #include "spltdb.h"
 #include "stdstat.h"
+#include "tmp.h"
 #include "vcf.h"
 #include "version.h"
 
@@ -137,6 +138,7 @@ static void helpMessage(int exeStatus) {
 	fprintf(helpOut, "#\t-apm\t\tSets both pm and fpm\t\tu\n");
 	fprintf(helpOut, "#\t-shm\t\tUse shared DB made by kma_shm\t0 (lvl)\n");
 	fprintf(helpOut, "#\t-mmap\t\tMemory map *.comp.by\n");
+	fprintf(helpOut, "#\t-tmp\t\tSet directory for temporary files.\n");
 	fprintf(helpOut, "#\t-1t1\t\tForce end to end mapping\tFalse\n");
 	fprintf(helpOut, "#\t-ck\t\tCount kmers instead of\n#\t\t\tpseudo alignment\t\tFalse\n");
 	fprintf(helpOut, "#\t-ca\t\tMake circular alignments\tFalse\n");
@@ -167,13 +169,13 @@ int kma_main(int argc, char *argv[]) {
 	static int minPhred, fiveClip, sparse_run, mem_mode, Mt1, ConClave, bcd;
 	static int fileCounter, fileCounter_PE, fileCounter_INT, targetNum, vcf;
 	static int extendedFeatures, spltDB, mq, thread_num, kmersize, one2one;
-	static int ref_fsa, print_matrix, print_all, sam, **d, W1, U, M, MM, PE;
+	static int ref_fsa, print_matrix, print_all, sam, Ts, Tv, **d;
 	static unsigned nc, nf, shm, exhaustive;
 	static char *outputfilename, *templatefilename, **templatefilenames;
 	static char **inputfiles, **inputfiles_PE, **inputfiles_INT, ss;
 	static double ID_t, scoreT, evalue;
 	static Penalties *rewards;
-	int i, j, args, exe_len, status, size, escape, step1, step2;
+	int i, j, args, exe_len, status, size, escape, tmp, step1, step2;
 	unsigned totFrags;
 	char *to2Bit, *exeBasic, *myTemplatefilename;
 	double support;
@@ -223,16 +225,22 @@ int kma_main(int argc, char *argv[]) {
 		one2one = 0;
 		ss = 'q';
 		mem_mode = 0;
-		M = 1;
-		MM = -2;
-		W1 = -3;
-		U = -1;
-		PE = 7;
+		rewards = smalloc(sizeof(Penalties));
+		rewards->M = 1;
+		rewards->MM = -2;
+		rewards->U = -1;
+		rewards->W1 = -3;
+		rewards->Wl = -6;
+		rewards->Mn = -1;
+		rewards->PE = 7;
+		Tv = -2;
+		Ts = -2;
 		thread_num = 1;
 		inputfiles_PE = 0;
 		inputfiles_INT = 0;
 		inputfiles = 0;
 		templatefilenames = 0;
+		tmp = 0;
 		Mt1 = 0;
 		inputfiles = 0;
 		deConPrintPtr = printPtr;
@@ -618,8 +626,8 @@ int kma_main(int argc, char *argv[]) {
 			} else if(strcmp(argv[args], "-reward") == 0) {
 				++args;
 				if(args < argc) {
-					M = strtol(argv[args], &exeBasic, 10);
-					M = abs(M);
+					rewards->M = strtol(argv[args], &exeBasic, 10);
+					rewards->M = abs(rewards->M);
 					if(*exeBasic != 0) {
 						fprintf(stderr, "Invalid argument at \"-reward\".\n");
 						exit(4);
@@ -628,8 +636,8 @@ int kma_main(int argc, char *argv[]) {
 			} else if(strcmp(argv[args], "-penalty") == 0) {
 				++args;
 				if(args < argc) {
-					MM = strtol(argv[args], &exeBasic, 10);
-					MM = MIN(-MM, MM);
+					rewards->MM = strtol(argv[args], &exeBasic, 10);
+					rewards->MM = MIN(-rewards->MM, rewards->MM);
 					if(*exeBasic != 0) {
 						fprintf(stderr, "Invalid argument at \"-penalty\".\n");
 						exit(4);
@@ -638,8 +646,8 @@ int kma_main(int argc, char *argv[]) {
 			} else if(strcmp(argv[args], "-gapopen") == 0) {
 				++args;
 				if(args < argc) {
-					W1 = strtol(argv[args], &exeBasic, 10);
-					W1 = MIN(-W1, W1);
+					rewards->W1 = strtol(argv[args], &exeBasic, 10);
+					rewards->W1 = MIN(-rewards->W1, rewards->W1);
 					if(*exeBasic != 0) {
 						fprintf(stderr, "Invalid argument at \"-gapopen\".\n");
 						exit(4);
@@ -648,23 +656,71 @@ int kma_main(int argc, char *argv[]) {
 			} else if(strcmp(argv[args], "-gapextend") == 0) {
 				++args;
 				if(args < argc) {
-					U = strtol(argv[args], &exeBasic, 10);
-					U = MIN(-U, U);
+					rewards->U = strtol(argv[args], &exeBasic, 10);
+					rewards->U = MIN(-rewards->U, rewards->U);
 					if(*exeBasic != 0) {
 						fprintf(stderr, "Invalid argument at \"-gapextend\".\n");
 						exit(4);
 					}
 				}
+			} else if(strcmp(argv[args], "-localopen") == 0) {
+				/* here */
+				/* add to help */
+				++args;
+				if(args < argc) {
+					rewards->Wl = strtol(argv[args], &exeBasic, 10);
+					rewards->Wl = MIN(-rewards->Wl, rewards->Wl);
+					if(*exeBasic != 0) {
+						fprintf(stderr, "Invalid argument at \"-localopen\".\n");
+						exit(4);
+					}
+				}
+			} else if(strcmp(argv[args], "-Npenalty") == 0) {
+				/* here */
+				/* add to help */
+				++args;
+				if(args < argc) {
+					rewards->Mn = strtol(argv[args], &exeBasic, 10);
+					rewards->Mn = MIN(-rewards->Mn, rewards->Mn);
+					if(*exeBasic != 0) {
+						fprintf(stderr, "Invalid argument at \"-localopen\".\n");
+						exit(4);
+					}
+				}
 			} else if(strcmp(argv[args], "-per") == 0) {
 				++args;
 				if(args < argc) {
-					PE = strtol(argv[args], &exeBasic, 10);
-					PE = abs(PE);
+					rewards->PE = strtol(argv[args], &exeBasic, 10);
+					rewards->PE = abs(rewards->PE);
 					if(*exeBasic != 0) {
 						fprintf(stderr, "Invalid argument at \"-per\".\n");
 						exit(4);
 					}
 				}
+			} else if(strcmp(argv[args], "-transition") == 0) {
+				/* here */
+				/* add to help */
+				++args;
+				if(args < argc) {
+					Ts = strtol(argv[args], &exeBasic, 10);
+					Ts = MIN(-Ts, Ts);
+					if(*exeBasic != 0) {
+						fprintf(stderr, "Invalid argument at \"-localopen\".\n");
+						exit(4);
+					}
+				}
+			} else if(strcmp(argv[args], "-transversion") == 0) {
+				/* here */
+				/* add to help */
+				++args;
+				if(args < argc) {
+					Tv = strtol(argv[args], &exeBasic, 10);
+					Tv = MIN(-Tv, Tv);
+					if(*exeBasic != 0) {
+						fprintf(stderr, "Invalid argument at \"-localopen\".\n");
+						exit(4);
+					}
+				}
 			} else if(strcmp(argv[args], "-and") == 0) {
 				cmp = &cmp_and;
 			} else if(strcmp(argv[args], "-boot") == 0) {
@@ -727,11 +783,21 @@ int kma_main(int argc, char *argv[]) {
 				nf = 1;
 			} else if(strcmp(argv[args], "-cge") == 0) {
 				scoreT = 0.75;
-				M = 1;
-				MM = -3;
-				W1 = -5;
-				U = -1;
-				PE = 17;
+				rewards->M = 1;
+				rewards->MM = -3;
+				rewards->W1 = -5;
+				rewards->U = -1;
+				rewards->PE = 17;
+			} else if(strcmp(argv[args], "-tmp") == 0) {
+				tmp = 1;
+				if(++args < argc) {
+					if(argv[args][0] != '-') {
+						tmpF(argv[args]);
+						tmp = 0;
+					} else {
+						--args;
+					}
+				}
 			} else if(strcmp(argv[args], "-spltDB") == 0) {
 				spltDB = 1;
 			} else if(strcmp(argv[args], "-status") == 0) {
@@ -782,40 +848,40 @@ int kma_main(int argc, char *argv[]) {
 			fprintf(stderr, " Too few arguments handed\n");
 			fprintf(stderr, " Printing help message:\n");
 			helpMessage(1);
+		} else if(tmp) {
+			/* set tmp files */
+			tmpF(outputfilename);
 		}
 		
 		if(fileCounter == 0 && fileCounter_PE == 0 && fileCounter_INT == 0) {
-			inputfiles = malloc(sizeof(char*));
-			if(!inputfiles) {
-				ERROR();
-			}
+			inputfiles = smalloc(sizeof(char*));
 			inputfiles[0] = "--";
 			fileCounter = 1;
 		}
 		status = 0;
 		
 		/* set scoring matrix */
-		d = smalloc(5 * sizeof(int *));
-		for(i = 0; i < 4; ++i) {
-			d[i] = smalloc(5 * sizeof(int));
-			for(j = 0; j < 4; ++j) {
-				d[i][j] = MM;
+		rewards->MM = (Ts + Tv - 1) / 2; /* avg. of transition and transversion, rounded down */
+		d = smalloc(5 * sizeof(int *) + 25 * sizeof(int));
+		*d = (int *) (d + 5);
+		i = 0;
+		while(i < 4) {
+			j = 4;
+			d[i][j] = rewards->Mn;
+			while(j--) {
+				d[i][j] = Tv;
 			}
-			d[i][i] = M;
+			d[i][(i - 2) < 0 ? (i + 2) : (i - 2)] = Ts;
+			d[i][i] = rewards->M;
+			j = i++;
+			d[i] = d[j] + 5;
 		}
-		d[4] = smalloc(5 * sizeof(int));
-		for(i = 0; i < 5; ++i) {
-			d[4][i] = U;
-			d[i][4] = U;
+		i = 5;
+		while(i--) {
+			d[4][i] = rewards->Mn;
 		}
 		d[4][4] = 0;
-		rewards = smalloc(sizeof(Penalties));
 		rewards->d = (int **) d;
-		rewards->W1 = W1;
-		rewards->U = U;
-		rewards->M = M;
-		rewards->MM = MM;
-		rewards->PE = PE;
 		
 		if(spltDB && targetNum != 1) {
 			/* allocate space for commands */
@@ -827,8 +893,8 @@ int kma_main(int argc, char *argv[]) {
 				} else if(escape) {
 					size += 2;
 				}
-				size += strlen(argv[i]);
-				if(strncmp(argv[i], "-i", 2) == 0) {
+				size += strlen(argv[args]);
+				if(strncmp(argv[args], "-i", 2) == 0) {
 					escape = 1;
 				}
 			}


=====================================
loadupdate.c
=====================================
@@ -148,17 +148,15 @@ unsigned ** hashMapKMA_openValues(HashMapKMA *src) {
 	return Values;
 }
 
-HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths) {
+unsigned load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths, HashMapKMA *finalDB) {
 	
-	int file_len, out_len, DB_size;
+	int file_len, out_len, DB_size, kmerindex;
 	FILE *infile;
-	HashMapKMA *finalDB;
 	
 	file_len = strlen(templatefilename);
 	out_len = strlen(outputfilename);
 	
 	/* load hash */
-	finalDB = smalloc(sizeof(HashMapKMA));
 	strcat(templatefilename, ".comp.b");
 	infile = sfopen(templatefilename, "rb");
 	if(hashMapKMAload(finalDB, infile)) {
@@ -178,13 +176,14 @@ HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **t
 	}
 	templatefilename[file_len] = 0;
 	fread(&DB_size, sizeof(unsigned), 1, infile);
-	if(finalDB->prefix_len) {
+	if(finalDB->prefix) {
 		*template_lengths = smalloc((DB_size << 1) * sizeof(unsigned));
 		*template_slengths = smalloc((DB_size << 1) * sizeof(unsigned));
 		*template_ulengths = smalloc((DB_size << 1) * sizeof(unsigned));
+		fread(*template_lengths, sizeof(unsigned), DB_size, infile);
 		fread(*template_slengths, sizeof(unsigned), DB_size, infile);
 		fread(*template_ulengths, sizeof(unsigned), DB_size, infile);
-		fread(*template_lengths, sizeof(unsigned), DB_size, infile);
+		kmerindex = **template_slengths;
 		**template_ulengths = DB_size << 1;
 		**template_slengths = DB_size << 1;
 	} else {
@@ -192,6 +191,7 @@ HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **t
 		*template_slengths = 0;
 		*template_ulengths = 0;
 		fread(*template_lengths, sizeof(unsigned), DB_size, infile);
+		kmerindex = **template_lengths;
 		**template_lengths = DB_size << 1;
 	}
 	fclose(infile);
@@ -222,5 +222,5 @@ HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **t
 	}
 	templatefilename[file_len] = 0;
 	
-	return finalDB;
+	return kmerindex;
 }


=====================================
loadupdate.h
=====================================
@@ -24,4 +24,4 @@ void * memdup(const void * src, size_t size);
 int CP(char *templatefilename, char *outputfilename);
 HashMap * hashMapKMA_openChains(HashMapKMA *src);
 unsigned ** hashMapKMA_openValues(HashMapKMA *src);
-HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths);
+unsigned load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths, HashMapKMA *finalDB);


=====================================
makeindex.c
=====================================
@@ -71,7 +71,6 @@ void makeDB(HashMap *templates, int kmerindex, char **inputfiles, int fileCount,
 		seq_out = sfopen(outputfilename, "wb");
 		outputfilename[file_len] = 0;
 	}
-	
 	if(dumpIndex == &makeIndexing) {
 		if(appender) {
 			strcat(outputfilename, ".index.b");
@@ -109,16 +108,16 @@ void makeDB(HashMap *templates, int kmerindex, char **inputfiles, int fileCount,
 					while(isspace(*--seq)) {
 						*seq = 0;
 					}
-					
 					if(bias > 0) {
 						fprintf(name_out, "%s B%d\n", header->seq + 1, bias);
 					} else {
 						fprintf(name_out, "%s\n", header->seq + 1);
 					}
 					updateAnnotsPtr(compressor, templates->DB_size, kmerindex, seq_out, index_out, template_lengths, template_ulengths, template_slengths);
+					/* here */
 					fprintf(stderr, "# Added:\t%s\n", header->seq + 1);
 					
-					if(++templates->DB_size == USHRT_MAX) {
+					if(++(templates->DB_size) == USHRT_MAX) {
 						/* convert values to unsigned */
 						convertToU(templates);
 					}


=====================================
penalties.h
=====================================
@@ -21,10 +21,12 @@
 typedef struct penalties Penalties;
 struct penalties {
 	int **d;
-	int W1;
-	int U;
 	int M;
 	int MM;
+	int U;
+	int W1;
+	int Wl;
+	int Mn;
 	int PE;
 };
 #endif


=====================================
runkma.c
=====================================
@@ -43,6 +43,7 @@
 #include "sam.h"
 #include "stdnuc.h"
 #include "stdstat.h"
+#include "tmp.h"
 #include "updatescores.h"
 #include "vcf.h"
 #ifndef _WIN32
@@ -254,7 +255,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
 			alignment_out = 0;
 			consensus_out = 0;
 		}
-		frag_out_raw = tmpfile();
+		frag_out_raw = tmpF(0);
 		if(!frag_out_raw) {
 			ERROR();
 		}
@@ -1394,7 +1395,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
 			alignment_out = 0;
 			consensus_out = 0;
 		}
-		frag_out_raw = tmpfile();
+		frag_out_raw = tmpF(0);
 		if(!frag_out_raw) {
 			ERROR();
 		}


=====================================
savekmers.c
=====================================
@@ -393,17 +393,13 @@ int getProxiMatchSparse(int *bestTemplates, int *Score, int kmersize, int n_kmer
 	return bestScore;
 }
 
-int clearScore(int *bestTemplates, int *Score) {
+static int clearScore(int *bestTemplates, int *Score) {
 	
 	int i;
 	
 	i = *bestTemplates + 1;
 	while(--i) {
 		Score[*++bestTemplates] = 0;
-		if(*bestTemplates < 0) {
-			fprintf(stderr, "Moher fucker.\n");
-			exit(1);
-		}
 	}
 	
 	return 0;
@@ -4759,3 +4755,299 @@ void ankerAndClean_MEM(int *regionTemplates, int *Score, int *Score_r, char *inc
 	header->seq[l] = 0;
 	header->len = l;
 }
+
+int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out) {
+	
+	/* return 0 for match, 1 otherwise */
+	static int *Sizes = 0;
+	static KmerAnker **tVF_scores, **tVR_scores;
+	
+	int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, maxS;
+	int *bests;
+	unsigned i, j, rc, shifter, kmersize, DB_size, SU, HIT, start, end, pos;
+	unsigned hitCounter, hitCounter_r;
+	unsigned *values, *last;
+	short unsigned *values_s;
+	char *include;
+	CompDNA *qseqP;
+	KmerAnker *V_score, *V_scores, *VF_scores, *VR_scores, *tmp_score;
+	
+	if(qseq == 0) {
+		if(Sizes) {
+			free(Sizes);
+			Sizes = 0;
+			i = *bestTemplates;
+			while(i--) {
+				free(tVF_scores[i]);
+			}
+			free(tVF_scores);
+		} else {
+			i = *bestTemplates;
+			Sizes = smalloc(i * sizeof(int));
+			tVF_scores = smalloc(2 * i * sizeof(KmerAnker *));
+			tVR_scores = tVF_scores + i;
+			while(i--) {
+				Sizes[i] = 256;
+				tVF_scores[i] = calloc(512, sizeof(KmerAnker));
+				if(!tVF_scores[i]) {
+					ERROR();
+				} else {
+					tVR_scores[i] = tVF_scores[i] + 256;
+				}
+			}
+		}
+		return 0;
+	} else if(qseq->seqlen < (kmersize = templates->kmersize)) {
+		return 1;
+	} else if((DB_size = templates->DB_size) < USHRT_MAX) {
+		SU = 1;
+	} else {
+		SU = 0;
+	}
+	shifter = sizeof(long unsigned) * sizeof(long unsigned) - (templates->kmersize << 1);
+	M = rewards->M;
+	MM = rewards->MM;
+	U = rewards->U;
+	W1 = rewards->W1;
+	Wl = rewards->Wl;
+	include = (char *) (extendScore + (templates->DB_size + 1));
+	hitCounter = 0;
+	hitCounter_r = 0;
+	values = 0;
+	values_s = 0;
+	
+	if(Sizes[*Score] < qseq->size) {
+		Sizes[*Score] = qseq->size << 1;
+		free(tVF_scores[*Score]);
+		tVF_scores[*Score] = calloc(qseq->size, sizeof(KmerAnker));
+		if(!tVF_scores[*Score]) {
+			ERROR();
+		} else {
+			tVR_scores[*Score] = tVF_scores[*Score] + qseq->size;
+		}
+	}
+	VF_scores = tVF_scores[*Score];
+	VR_scores = tVR_scores[*Score];
+	
+	V_scores = VF_scores;
+	qseqP = qseq;
+	rc = 2;
+	while(rc--) {
+		if(rc == 0) {
+			V_scores = VR_scores;
+			qseqP = qseq_r;
+			rc_comp(qseq, qseq_r);
+			hitCounter = hitCounter_r;
+			hitCounter_r = 0;
+		}
+		
+		HIT = exhaustive;
+		j = 0;
+		++*(qseqP->N);
+		qseqP->N[*(qseqP->N)] = qseqP->seqlen;
+		for(i = 1; i <= *(qseqP->N) && !HIT; ++i) {
+			end = qseqP->N[i] - kmersize + 1;
+			while(j < end && hashMap_get(templates, getKmer(qseqP->seq, j, shifter)) == 0) {
+				j += kmersize;
+			}
+			if(j < end) {
+				HIT = 1;
+			} else {
+				j = qseqP->N[i] + 1;
+			}
+		}
+		
+		if(HIT) {
+			V_score = V_scores;
+			V_score->start = (j = 0);
+			V_score->values = (last = 0);
+			V_score->next = 0;
+			Ms = 0;
+			MMs = 0;
+			Us = 0;
+			W1s = 0;
+			gaps = 0;
+			for(i = 1; i <= *(qseqP->N); ++i) {
+				end = qseqP->N[i] - kmersize + 1;
+				while(j < end) {
+					if((values = hashMap_get(templates, getKmer(qseqP->seq, j, shifter)))) {
+						if(values == last) {
+							if(kmersize < gaps) {
+								Ms += kmersize;
+								gaps -= kmersize;
+								if(gaps) {
+									/* go for best scenario */
+									if(gaps == 1) {
+										MMs += 2;
+									} else {
+										gaps -= 2;
+										if((MM << 1) + gaps * M < 0) {
+											Ms += gaps;
+											MMs += 2;
+										}
+									}
+								} else {
+									++MMs;
+								}
+							} else if(gaps) {
+								--gaps;
+								++W1s;
+								Us += gaps;
+							} else {
+								++Ms;
+							}
+						} else {
+							if(last) {
+								/* update and link between ankers */
+								V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
+								V_score->end = j - gaps + kmersize;
+								++V_score;
+							}
+							V_score->start = j;
+							V_score->values = (last = values);
+							Ms = 0;
+							MMs = 0;
+							Us = 0;
+							W1s = 0;
+							++hitCounter_r;
+						}
+						gaps = 0;
+					} else {
+						++gaps;
+					}
+					j += kmersize;
+				}
+				j = qseqP->N[i] + 1;
+			}
+			if(last) {
+				/* update anker */
+				V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
+			}
+		}
+		--*(qseqP->N);
+	}
+	
+	/* here */
+	/* chaining */
+	
+	/*
+	while(chain is good) {
+		1. make chain. OK
+		2. choose best.
+		3. silence "used" ankers.
+		4. redo chaining if next best hits a silenced anker.
+	}
+	*/
+	
+	/*
+	for each anker
+		mark last anker w.r.t. highest score on anker
+	*/
+	
+	/* make chains */
+	maxS = 0;
+	V_score = VF_scores;
+	HIT = hitCounter + 1;
+	bests = bestTemplates;
+	rc = 2;
+	while(rc--) {
+		if(rc == 0) {
+			V_score = VR_scores;
+			HIT = hitCounter_r + 1;
+			bests = bestTemplates_r;
+		}
+		*bests = 0;
+		while(--HIT) {
+			/*
+			Save pos of best score, and chain it (0 if local).
+			Score is always current / w.r.t. the anker you are on.
+			extendScore contains endpos of last hit anker.
+			*/
+			start = V_score->start;
+			end = V_score->end;
+			
+			/* chain anker */
+			V_score->score = 0;
+			if(SU) {
+				values_s = (short unsigned *) V_score->values;
+				i = *values_s + 1;
+				values_s += i;
+			} else {
+				values = V_score->values;
+				i = *values + 1;
+				values += i;
+			}
+			while(--i) {
+				template = SU ? *--values_s : *--values;
+				if(!include[template]) {
+					include[template] = 1;
+					bests[++*bests] = template;
+				}
+				score = Score[template];
+				gaps = start - (pos = extendScore[template]);
+				if(gaps == 0) {
+					/* continued match */
+					score += M;
+				} else if(gaps < 0) {
+					/* deletion */
+					score += (W1 + abs(gaps + 1) * U);
+				} else if(gaps == 1) {
+					/* one unmatched between */
+					score += MM;
+				} else {
+					/* unmatched bases between */
+					if((gaps = (gaps - 2) * M) < 0) {
+						score += gaps;
+					}
+					score += (MM << 1);
+				}
+				
+				/* check local chain is needed */
+				if(score < Wl) {
+					score = Wl + V_score->weight;
+					/* mark as terminating */
+					V_score->next = 0;
+					extendScore[template] = 0;
+				} else {
+					score += V_score->weight;
+					/* mark descendant */
+					tmp_score = V_score;
+					while((--tmp_score)->end != pos);
+					V_score->next = tmp_score;
+					extendScore[template] = end;
+				}
+				
+				/* update Scores */
+				if(V_score->score < score) {
+					V_score->score = score;
+					/* here */
+					/* consider several bests */
+					if(maxS < score) {
+						maxS = score;
+					}
+				}
+				Score[template] = score;
+			}
+			++V_score;
+		}
+		
+		/* clear Score */
+		i = *bests + 1;
+		while(--i) {
+			include[*++bests] = 0;
+		}
+		clearScore(bests, Score);
+		clearScore(bests, extendScore);
+	}
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	return 1;
+}


=====================================
savekmers.h
=====================================
@@ -25,6 +25,8 @@
 
 #ifndef SAVEKMERS
 typedef struct kmerScan_thread KmerScan_thread;
+typedef struct kmerAnker KmerAnker;
+
 struct kmerScan_thread {
 	pthread_t id;
 	int num;
@@ -40,6 +42,16 @@ struct kmerScan_thread {
 	Penalties *rewards;
 	struct kmerScan_thread *next;
 };
+
+struct kmerAnker {
+	int score;
+	int weight;
+	unsigned start;
+	unsigned end;
+	unsigned *values;
+	struct kmerAnker *next;
+};
+
 #define SAVEKMERS 1;
 #endif
 
@@ -84,3 +96,4 @@ int save_kmers_forcePair(const HashMapKMA *templates, const Penalties *rewards,
 int save_kmers_HMM(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out);
 void ankerAndClean(int *regionTemplates, int *Score, int *Score_r, char *include, int *template_lengths, unsigned **VF_scores, unsigned **VR_scores, int *tmpNs, CompDNA *qseq, int HIT, int bestScore, int start_cut, int end_cut, Qseqs *header, volatile int *excludeOut, FILE *out);
 void ankerAndClean_MEM(int *regionTemplates, int *Score, int *Score_r, char *include, int *template_lengths, unsigned **VF_scores, unsigned **VR_scores, int *tmpNs, CompDNA *qseq, int HIT, int bestScore, int start_cut, int end_cut, Qseqs *header, volatile int *excludeOut, FILE *out);
+int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out);


=====================================
seq2fasta.c
=====================================
@@ -80,8 +80,7 @@ void printFastas(char *filename, int *template_lengths) {
 	if(!seq || !compseq) {
 		ERROR();
 	}
-	
-	for(i = 1; i < DB_size; i++) {
+	for(i = 1; i < DB_size; ++i) {
 		fread(compseq, sizeof(long unsigned), (template_lengths[i] >> 5) + 1, seqfile);
 		
 		j = template_lengths[i];
@@ -103,7 +102,7 @@ void printFastaList(char *filename, int *template_lengths, int *seqlist) {
 	
 	const char bases[6] = "ACGTN-";
 	int i, j, n, max, DB_size, file_len;
-	long unsigned *compseq;
+	long unsigned skip, *compseq;
 	char *seq;
 	FILE *seqfile, *namefile;
 	Qseqs *template_name;
@@ -139,11 +138,13 @@ void printFastaList(char *filename, int *template_lengths, int *seqlist) {
 	if(!seq || !compseq) {
 		ERROR();
 	}
-	
-	for(i = 1; n && i < DB_size; i++) {
+	skip = 0;
+	for(i = 1; n && i < DB_size; ++i) {
 		if(i == *seqlist) {
 			/* get seq */
+			fseek(seqfile, skip, SEEK_CUR);
 			fread(compseq, sizeof(long unsigned), (template_lengths[i] >> 5) + 1, seqfile);
+			
 			j = template_lengths[i];
 			*(seq += j) = '\n';
 			while(j--) {
@@ -156,8 +157,10 @@ void printFastaList(char *filename, int *template_lengths, int *seqlist) {
 			
 			/* get next target */
 			while(--n && i == *++seqlist);
+			skip = 0;
 		} else {
 			nameSkip(namefile, max);
+			skip += (((template_lengths[i] >> 5) + 1) * sizeof(long unsigned));
 		}
 	}
 }


=====================================
spltdb.c
=====================================
@@ -42,6 +42,7 @@
 #include "spltdb.h"
 #include "stdnuc.h"
 #include "stdstat.h"
+#include "tmp.h"
 #include "updatescores.h"
 #include "version.h"
 #include "vcf.h"
@@ -522,7 +523,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 		alignment_out = 0;
 		consensus_out = 0;
 	}
-	frag_out_raw = tmpfile();
+	frag_out_raw = tmpF(0);
 	if(!frag_out_raw) {
 		ERROR();
 	}


=====================================
tmp.c
=====================================
@@ -0,0 +1,91 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *		http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#define _XOPEN_SOURCE 600
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "pherror.h"
+#include "threader.h"
+#include "tmp.h"
+
+FILE * tmpF(const char *location) {
+	
+	static int tmpNum = 0;
+	static char *tmpname, *dirname = 0, *filename = 0;
+	static volatile int lock[1] = {0};
+	int fd;
+	FILE *file;
+	
+	lock(lock);
+	if(location) {
+		/* set location for tmpfiles */
+		if(filename) {
+			free(filename);
+			filename = 0;
+		} else if(dirname) {
+			free(dirname);
+			dirname = 0;
+		}
+		if(*location) {
+			if(location[strlen(location) - 1] != '/') {
+				filename = smalloc(strlen(location) + 15);
+				tmpname = filename + sprintf(filename, "%s.tmp", location);
+				dirname = 0;
+			} else {
+				dirname = smalloc(strlen(location) + 12);
+				tmpname = dirname + sprintf(dirname, "%s.kma-", location);
+				strcpy(tmpname, "XXXXXX");
+				filename = 0;
+			}
+		}
+		file = 0;
+	} else if(dirname) {
+		if((fd = mkstemp(dirname)) == -1) {
+			ERROR();
+		}
+		if((file = fdopen(fd, "wb+"))) {
+			unlink(dirname);
+		}
+		strcpy(tmpname, "XXXXXX");
+	} else if(filename) {
+		/* open tmpfile on previous location */
+		sprintf(tmpname, "%d", tmpNum++);
+		if((file = fopen(filename, "wb+"))) {
+			unlink(filename);
+		}
+		*tmpname = 0;
+	} else {
+		/* open a "normal" tmp file */
+		file = tmpfile();
+	}
+	unlock(lock);
+	
+	return file;
+}
+
+
+
+
+
+
+
+
+
+


=====================================
tmp.h
=====================================
@@ -0,0 +1,22 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *		http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <stdio.h>
+
+FILE * tmpF(const char *location);


=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
  * limitations under the License.
 */
 
-#define KMA_VERSION "1.2.6"
+#define KMA_VERSION "1.2.10a"



View it on GitLab: https://salsa.debian.org/med-team/kma/commit/f5137c3d39949889292033da254f861b4981ebbf

-- 
View it on GitLab: https://salsa.debian.org/med-team/kma/commit/f5137c3d39949889292033da254f861b4981ebbf
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190801/ca540f83/attachment-0001.html>


More information about the debian-med-commit mailing list