[med-svn] [Git][med-team/kma][upstream] New upstream version 1.2.10a
Andreas Tille
gitlab at salsa.debian.org
Thu Aug 1 13:08:42 BST 2019
Andreas Tille pushed to branch upstream at Debian Med / kma
Commits:
f5137c3d by Andreas Tille at 2019-08-01T11:50:45Z
New upstream version 1.2.10a
- - - - -
19 changed files:
- KMAspecification.pdf
- Makefile
- align.c
- compress.c
- frags.c
- index.c
- kma.c
- loadupdate.c
- loadupdate.h
- makeindex.c
- penalties.h
- runkma.c
- savekmers.c
- savekmers.h
- seq2fasta.c
- spltdb.c
- + tmp.c
- + tmp.h
- version.h
Changes:
=====================================
KMAspecification.pdf
=====================================
Binary files a/KMAspecification.pdf and b/KMAspecification.pdf differ
=====================================
Makefile
=====================================
@@ -1,5 +1,5 @@
CFLAGS = -Wall -O3 -std=c99
-LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o update.o updateindex.o updatescores.o valueshash.o vcf.o
+LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
PROGS = kma kma_index kma_shm kma_update
.c .o:
@@ -43,7 +43,7 @@ hashmapkma.o: hashmapkma.h pherror.h
hashmapkmers.o: hashmapkmers.h pherror.h
hashtable.o: hashtable.h hashmapkma.h hashmapkmers.h pherror.h
index.o: index.h compress.h decon.h hashmap.h hashmapkma.h loadupdate.h makeindex.h pherror.h stdstat.h version.h
-kma.o: kma.h ankers.h assembly.h chain.h hashmapkma.h kmers.h mt1.h penalties.h pherror.h qseqs.h runinput.h runkma.h savekmers.h sparse.h spltdb.h version.h
+kma.o: kma.h ankers.h assembly.h chain.h hashmapkma.h kmers.h mt1.h penalties.h pherror.h qseqs.h runinput.h runkma.h savekmers.h sparse.h spltdb.h tmp.h version.h
kmapipe.o: kmapipe.h pherror.h
kmers.o: kmers.h ankers.h compdna.h hashmapkma.h kmapipe.h pherror.h qseqs.h savekmers.h spltdb.h
kmmap.o: kmmap.h hashmapkma.h
@@ -56,16 +56,17 @@ printconsensus.o: printconsensus.h assembly.h
qseqs.o: qseqs.h pherror.h
qualcheck.o: qualcheck.h compdna.h hashmap.h pherror.h stdnuc.h stdstat.h
runinput.o: runinput.h compdna.h filebuff.h pherror.h qseqs.h seqparse.h
-runkma.o: runkma.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h stdnuc.h stdstat.h vcf.h
+runkma.o: runkma.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h stdnuc.h stdstat.h tmp.h vcf.h
sam.o: sam.h nw.h pherror.h qseqs.h runkma.h
savekmers.o: savekmers.h ankers.h compdna.h hashmapkma.h penalties.h pherror.h qseqs.h stdnuc.h stdstat.h threader.h
seq2fasta.o: seq2fasta.h pherror.h qseqs.h runkma.h stdnuc.h
seqparse.o: seqparse.h filebuff.h qseqs.h
shm.o: shm.h pherror.h hashmapkma.h version.h
sparse.o: sparse.h compkmers.h hashtable.h kmapipe.h pherror.h runinput.h savekmers.h stdnuc.h stdstat.h
-spltdb.o: spltdb.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h runkma.h stdnuc.h stdstat.h vcf.h
+spltdb.o: spltdb.h align.h alnfrags.h assembly.h chain.h compdna.h ef.h filebuff.h frags.h hashmapindex.h kmapipe.h nw.h pherror.h printconsensus.h qseqs.h runkma.h stdnuc.h stdstat.h tmp.h vcf.h
stdnuc.o: stdnuc.h
stdstat.o: stdstat.h
+tmp.o: tmp.h pherror.h threader.h
update.o: update.h hashmapkma.h pherror.h stdnuc.h
updateindex.o: updateindex.h compdna.h hashmap.h hashmapindex.h pherror.h qualcheck.h stdnuc.h pherror.h
updatescores.o: updatescores.h qseqs.h
=====================================
align.c
=====================================
@@ -48,6 +48,7 @@ AlnScore KMA(const HashMap_index *template_index, const unsigned char *qseq, int
mask = 0;
mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
key = 0;
+
/* circular, skip boundaries */
if(min < max) {
min = 0;
@@ -757,6 +758,7 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
if(end == -1) {
end = q_len;
}
+
if(i < end - kmersize) {
key = makeKmer(qseq, i, kmersize - 1);
i += (kmersize - 1);
@@ -767,6 +769,7 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
while(i < end) {
key = ((key << 2) | qseq[i]) & mask;
value = hashMap_index_get_bound(template_index, key, 0, t_len, shifter);
+
if(value == 0) {
++i;
} else if(0 < value) {
@@ -870,6 +873,8 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
stop = hashMap_index_getNextDubPos(template_index, key, 0, t_len, stop, shifter);
}
+ /* add best anker score */
+ score_r += (bias - i);
i = bias + 1;
/* update position */
@@ -883,6 +888,7 @@ int anker_rc(const HashMap_index *template_index, unsigned char *qseq, int q_len
}
i = end + 1;
}
+
if(bestScore < score_r) {
bestScore = score_r;
}
=====================================
compress.c
=====================================
@@ -702,6 +702,7 @@ HashMapKMA * compressKMA_megaDB(HashMap *templates, FILE *out) {
fprintf(stderr, "# Overflow bypassed.\n");
}
free(templates->values);
+
/* convert valuesHash to a linked list */
table = 0;
i = shmValues->size;
=====================================
frags.c
=====================================
@@ -24,6 +24,7 @@
#include "pherror.h"
#include "qseqs.h"
#include "threader.h"
+#include "tmp.h"
FILE * printFrags(Frag **alignFrags, int DB_size) {
@@ -31,7 +32,7 @@ FILE * printFrags(Frag **alignFrags, int DB_size) {
FILE *OUT;
Frag *alignFrag, *next;
- if(!(OUT = tmpfile())) {
+ if(!(OUT = tmpF(0))) {
fprintf(stderr, "Could not create tmp files.\n");
ERROR();
}
=====================================
index.c
=====================================
@@ -497,9 +497,13 @@ int index_main(int argc, char *argv[]) {
if(templatefilename != 0) {
/* load */
fprintf(stderr, "# Loading database: %s\n", outputfilename);
- finalDB = load_DBs(templatefilename, outputfilename, &template_lengths, &template_ulengths, &template_slengths);
+ finalDB = smalloc(sizeof(HashMapKMA));
+ kmerindex = load_DBs(templatefilename, outputfilename, &template_lengths, &template_ulengths, &template_slengths, finalDB);
kmersize = finalDB->kmersize;
- kmerindex = *template_lengths;
+ mask = 0;
+ mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
+ prefix = finalDB->prefix;
+ prefix_len = finalDB->prefix_len;
/* determine params based on loaded DB */
if(prefix_len == 0 && prefix == 0) {
@@ -598,7 +602,6 @@ int index_main(int argc, char *argv[]) {
*template_lengths = 1024;
}
}
-
fprintf(stderr, "# Indexing databases.\n");
t0 = clock();
makeDB(templates, kmerindex, inputfiles, filecount, outputfilename, appender, to2Bit, MinLen, MinKlen, homQ, homT, &template_lengths, &template_ulengths, &template_slengths);
=====================================
kma.c
=====================================
@@ -41,6 +41,7 @@
#include "sparse.h"
#include "spltdb.h"
#include "stdstat.h"
+#include "tmp.h"
#include "vcf.h"
#include "version.h"
@@ -137,6 +138,7 @@ static void helpMessage(int exeStatus) {
fprintf(helpOut, "#\t-apm\t\tSets both pm and fpm\t\tu\n");
fprintf(helpOut, "#\t-shm\t\tUse shared DB made by kma_shm\t0 (lvl)\n");
fprintf(helpOut, "#\t-mmap\t\tMemory map *.comp.by\n");
+ fprintf(helpOut, "#\t-tmp\t\tSet directory for temporary files.\n");
fprintf(helpOut, "#\t-1t1\t\tForce end to end mapping\tFalse\n");
fprintf(helpOut, "#\t-ck\t\tCount kmers instead of\n#\t\t\tpseudo alignment\t\tFalse\n");
fprintf(helpOut, "#\t-ca\t\tMake circular alignments\tFalse\n");
@@ -167,13 +169,13 @@ int kma_main(int argc, char *argv[]) {
static int minPhred, fiveClip, sparse_run, mem_mode, Mt1, ConClave, bcd;
static int fileCounter, fileCounter_PE, fileCounter_INT, targetNum, vcf;
static int extendedFeatures, spltDB, mq, thread_num, kmersize, one2one;
- static int ref_fsa, print_matrix, print_all, sam, **d, W1, U, M, MM, PE;
+ static int ref_fsa, print_matrix, print_all, sam, Ts, Tv, **d;
static unsigned nc, nf, shm, exhaustive;
static char *outputfilename, *templatefilename, **templatefilenames;
static char **inputfiles, **inputfiles_PE, **inputfiles_INT, ss;
static double ID_t, scoreT, evalue;
static Penalties *rewards;
- int i, j, args, exe_len, status, size, escape, step1, step2;
+ int i, j, args, exe_len, status, size, escape, tmp, step1, step2;
unsigned totFrags;
char *to2Bit, *exeBasic, *myTemplatefilename;
double support;
@@ -223,16 +225,22 @@ int kma_main(int argc, char *argv[]) {
one2one = 0;
ss = 'q';
mem_mode = 0;
- M = 1;
- MM = -2;
- W1 = -3;
- U = -1;
- PE = 7;
+ rewards = smalloc(sizeof(Penalties));
+ rewards->M = 1;
+ rewards->MM = -2;
+ rewards->U = -1;
+ rewards->W1 = -3;
+ rewards->Wl = -6;
+ rewards->Mn = -1;
+ rewards->PE = 7;
+ Tv = -2;
+ Ts = -2;
thread_num = 1;
inputfiles_PE = 0;
inputfiles_INT = 0;
inputfiles = 0;
templatefilenames = 0;
+ tmp = 0;
Mt1 = 0;
inputfiles = 0;
deConPrintPtr = printPtr;
@@ -618,8 +626,8 @@ int kma_main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-reward") == 0) {
++args;
if(args < argc) {
- M = strtol(argv[args], &exeBasic, 10);
- M = abs(M);
+ rewards->M = strtol(argv[args], &exeBasic, 10);
+ rewards->M = abs(rewards->M);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-reward\".\n");
exit(4);
@@ -628,8 +636,8 @@ int kma_main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-penalty") == 0) {
++args;
if(args < argc) {
- MM = strtol(argv[args], &exeBasic, 10);
- MM = MIN(-MM, MM);
+ rewards->MM = strtol(argv[args], &exeBasic, 10);
+ rewards->MM = MIN(-rewards->MM, rewards->MM);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-penalty\".\n");
exit(4);
@@ -638,8 +646,8 @@ int kma_main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-gapopen") == 0) {
++args;
if(args < argc) {
- W1 = strtol(argv[args], &exeBasic, 10);
- W1 = MIN(-W1, W1);
+ rewards->W1 = strtol(argv[args], &exeBasic, 10);
+ rewards->W1 = MIN(-rewards->W1, rewards->W1);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-gapopen\".\n");
exit(4);
@@ -648,23 +656,71 @@ int kma_main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-gapextend") == 0) {
++args;
if(args < argc) {
- U = strtol(argv[args], &exeBasic, 10);
- U = MIN(-U, U);
+ rewards->U = strtol(argv[args], &exeBasic, 10);
+ rewards->U = MIN(-rewards->U, rewards->U);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-gapextend\".\n");
exit(4);
}
}
+ } else if(strcmp(argv[args], "-localopen") == 0) {
+ /* here */
+ /* add to help */
+ ++args;
+ if(args < argc) {
+ rewards->Wl = strtol(argv[args], &exeBasic, 10);
+ rewards->Wl = MIN(-rewards->Wl, rewards->Wl);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "Invalid argument at \"-localopen\".\n");
+ exit(4);
+ }
+ }
+ } else if(strcmp(argv[args], "-Npenalty") == 0) {
+ /* here */
+ /* add to help */
+ ++args;
+ if(args < argc) {
+ rewards->Mn = strtol(argv[args], &exeBasic, 10);
+ rewards->Mn = MIN(-rewards->Mn, rewards->Mn);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "Invalid argument at \"-localopen\".\n");
+ exit(4);
+ }
+ }
} else if(strcmp(argv[args], "-per") == 0) {
++args;
if(args < argc) {
- PE = strtol(argv[args], &exeBasic, 10);
- PE = abs(PE);
+ rewards->PE = strtol(argv[args], &exeBasic, 10);
+ rewards->PE = abs(rewards->PE);
if(*exeBasic != 0) {
fprintf(stderr, "Invalid argument at \"-per\".\n");
exit(4);
}
}
+ } else if(strcmp(argv[args], "-transition") == 0) {
+ /* here */
+ /* add to help */
+ ++args;
+ if(args < argc) {
+ Ts = strtol(argv[args], &exeBasic, 10);
+ Ts = MIN(-Ts, Ts);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "Invalid argument at \"-localopen\".\n");
+ exit(4);
+ }
+ }
+ } else if(strcmp(argv[args], "-transversion") == 0) {
+ /* here */
+ /* add to help */
+ ++args;
+ if(args < argc) {
+ Tv = strtol(argv[args], &exeBasic, 10);
+ Tv = MIN(-Tv, Tv);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "Invalid argument at \"-localopen\".\n");
+ exit(4);
+ }
+ }
} else if(strcmp(argv[args], "-and") == 0) {
cmp = &cmp_and;
} else if(strcmp(argv[args], "-boot") == 0) {
@@ -727,11 +783,21 @@ int kma_main(int argc, char *argv[]) {
nf = 1;
} else if(strcmp(argv[args], "-cge") == 0) {
scoreT = 0.75;
- M = 1;
- MM = -3;
- W1 = -5;
- U = -1;
- PE = 17;
+ rewards->M = 1;
+ rewards->MM = -3;
+ rewards->W1 = -5;
+ rewards->U = -1;
+ rewards->PE = 17;
+ } else if(strcmp(argv[args], "-tmp") == 0) {
+ tmp = 1;
+ if(++args < argc) {
+ if(argv[args][0] != '-') {
+ tmpF(argv[args]);
+ tmp = 0;
+ } else {
+ --args;
+ }
+ }
} else if(strcmp(argv[args], "-spltDB") == 0) {
spltDB = 1;
} else if(strcmp(argv[args], "-status") == 0) {
@@ -782,40 +848,40 @@ int kma_main(int argc, char *argv[]) {
fprintf(stderr, " Too few arguments handed\n");
fprintf(stderr, " Printing help message:\n");
helpMessage(1);
+ } else if(tmp) {
+ /* set tmp files */
+ tmpF(outputfilename);
}
if(fileCounter == 0 && fileCounter_PE == 0 && fileCounter_INT == 0) {
- inputfiles = malloc(sizeof(char*));
- if(!inputfiles) {
- ERROR();
- }
+ inputfiles = smalloc(sizeof(char*));
inputfiles[0] = "--";
fileCounter = 1;
}
status = 0;
/* set scoring matrix */
- d = smalloc(5 * sizeof(int *));
- for(i = 0; i < 4; ++i) {
- d[i] = smalloc(5 * sizeof(int));
- for(j = 0; j < 4; ++j) {
- d[i][j] = MM;
+ rewards->MM = (Ts + Tv - 1) / 2; /* avg. of transition and transversion, rounded down */
+ d = smalloc(5 * sizeof(int *) + 25 * sizeof(int));
+ *d = (int *) (d + 5);
+ i = 0;
+ while(i < 4) {
+ j = 4;
+ d[i][j] = rewards->Mn;
+ while(j--) {
+ d[i][j] = Tv;
}
- d[i][i] = M;
+ d[i][(i - 2) < 0 ? (i + 2) : (i - 2)] = Ts;
+ d[i][i] = rewards->M;
+ j = i++;
+ d[i] = d[j] + 5;
}
- d[4] = smalloc(5 * sizeof(int));
- for(i = 0; i < 5; ++i) {
- d[4][i] = U;
- d[i][4] = U;
+ i = 5;
+ while(i--) {
+ d[4][i] = rewards->Mn;
}
d[4][4] = 0;
- rewards = smalloc(sizeof(Penalties));
rewards->d = (int **) d;
- rewards->W1 = W1;
- rewards->U = U;
- rewards->M = M;
- rewards->MM = MM;
- rewards->PE = PE;
if(spltDB && targetNum != 1) {
/* allocate space for commands */
@@ -827,8 +893,8 @@ int kma_main(int argc, char *argv[]) {
} else if(escape) {
size += 2;
}
- size += strlen(argv[i]);
- if(strncmp(argv[i], "-i", 2) == 0) {
+ size += strlen(argv[args]);
+ if(strncmp(argv[args], "-i", 2) == 0) {
escape = 1;
}
}
=====================================
loadupdate.c
=====================================
@@ -148,17 +148,15 @@ unsigned ** hashMapKMA_openValues(HashMapKMA *src) {
return Values;
}
-HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths) {
+unsigned load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths, HashMapKMA *finalDB) {
- int file_len, out_len, DB_size;
+ int file_len, out_len, DB_size, kmerindex;
FILE *infile;
- HashMapKMA *finalDB;
file_len = strlen(templatefilename);
out_len = strlen(outputfilename);
/* load hash */
- finalDB = smalloc(sizeof(HashMapKMA));
strcat(templatefilename, ".comp.b");
infile = sfopen(templatefilename, "rb");
if(hashMapKMAload(finalDB, infile)) {
@@ -178,13 +176,14 @@ HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **t
}
templatefilename[file_len] = 0;
fread(&DB_size, sizeof(unsigned), 1, infile);
- if(finalDB->prefix_len) {
+ if(finalDB->prefix) {
*template_lengths = smalloc((DB_size << 1) * sizeof(unsigned));
*template_slengths = smalloc((DB_size << 1) * sizeof(unsigned));
*template_ulengths = smalloc((DB_size << 1) * sizeof(unsigned));
+ fread(*template_lengths, sizeof(unsigned), DB_size, infile);
fread(*template_slengths, sizeof(unsigned), DB_size, infile);
fread(*template_ulengths, sizeof(unsigned), DB_size, infile);
- fread(*template_lengths, sizeof(unsigned), DB_size, infile);
+ kmerindex = **template_slengths;
**template_ulengths = DB_size << 1;
**template_slengths = DB_size << 1;
} else {
@@ -192,6 +191,7 @@ HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **t
*template_slengths = 0;
*template_ulengths = 0;
fread(*template_lengths, sizeof(unsigned), DB_size, infile);
+ kmerindex = **template_lengths;
**template_lengths = DB_size << 1;
}
fclose(infile);
@@ -222,5 +222,5 @@ HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **t
}
templatefilename[file_len] = 0;
- return finalDB;
+ return kmerindex;
}
=====================================
loadupdate.h
=====================================
@@ -24,4 +24,4 @@ void * memdup(const void * src, size_t size);
int CP(char *templatefilename, char *outputfilename);
HashMap * hashMapKMA_openChains(HashMapKMA *src);
unsigned ** hashMapKMA_openValues(HashMapKMA *src);
-HashMapKMA * load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths);
+unsigned load_DBs(char *templatefilename, char *outputfilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned **template_slengths, HashMapKMA *finalDB);
=====================================
makeindex.c
=====================================
@@ -71,7 +71,6 @@ void makeDB(HashMap *templates, int kmerindex, char **inputfiles, int fileCount,
seq_out = sfopen(outputfilename, "wb");
outputfilename[file_len] = 0;
}
-
if(dumpIndex == &makeIndexing) {
if(appender) {
strcat(outputfilename, ".index.b");
@@ -109,16 +108,16 @@ void makeDB(HashMap *templates, int kmerindex, char **inputfiles, int fileCount,
while(isspace(*--seq)) {
*seq = 0;
}
-
if(bias > 0) {
fprintf(name_out, "%s B%d\n", header->seq + 1, bias);
} else {
fprintf(name_out, "%s\n", header->seq + 1);
}
updateAnnotsPtr(compressor, templates->DB_size, kmerindex, seq_out, index_out, template_lengths, template_ulengths, template_slengths);
+ /* here */
fprintf(stderr, "# Added:\t%s\n", header->seq + 1);
- if(++templates->DB_size == USHRT_MAX) {
+ if(++(templates->DB_size) == USHRT_MAX) {
/* convert values to unsigned */
convertToU(templates);
}
=====================================
penalties.h
=====================================
@@ -21,10 +21,12 @@
typedef struct penalties Penalties;
struct penalties {
int **d;
- int W1;
- int U;
int M;
int MM;
+ int U;
+ int W1;
+ int Wl;
+ int Mn;
int PE;
};
#endif
=====================================
runkma.c
=====================================
@@ -43,6 +43,7 @@
#include "sam.h"
#include "stdnuc.h"
#include "stdstat.h"
+#include "tmp.h"
#include "updatescores.h"
#include "vcf.h"
#ifndef _WIN32
@@ -254,7 +255,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
alignment_out = 0;
consensus_out = 0;
}
- frag_out_raw = tmpfile();
+ frag_out_raw = tmpF(0);
if(!frag_out_raw) {
ERROR();
}
@@ -1394,7 +1395,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
alignment_out = 0;
consensus_out = 0;
}
- frag_out_raw = tmpfile();
+ frag_out_raw = tmpF(0);
if(!frag_out_raw) {
ERROR();
}
=====================================
savekmers.c
=====================================
@@ -393,17 +393,13 @@ int getProxiMatchSparse(int *bestTemplates, int *Score, int kmersize, int n_kmer
return bestScore;
}
-int clearScore(int *bestTemplates, int *Score) {
+static int clearScore(int *bestTemplates, int *Score) {
int i;
i = *bestTemplates + 1;
while(--i) {
Score[*++bestTemplates] = 0;
- if(*bestTemplates < 0) {
- fprintf(stderr, "Moher fucker.\n");
- exit(1);
- }
}
return 0;
@@ -4759,3 +4755,299 @@ void ankerAndClean_MEM(int *regionTemplates, int *Score, int *Score_r, char *inc
header->seq[l] = 0;
header->len = l;
}
+
+int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out) {
+
+ /* return 0 for match, 1 otherwise */
+ static int *Sizes = 0;
+ static KmerAnker **tVF_scores, **tVR_scores;
+
+ int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, maxS;
+ int *bests;
+ unsigned i, j, rc, shifter, kmersize, DB_size, SU, HIT, start, end, pos;
+ unsigned hitCounter, hitCounter_r;
+ unsigned *values, *last;
+ short unsigned *values_s;
+ char *include;
+ CompDNA *qseqP;
+ KmerAnker *V_score, *V_scores, *VF_scores, *VR_scores, *tmp_score;
+
+ if(qseq == 0) {
+ if(Sizes) {
+ free(Sizes);
+ Sizes = 0;
+ i = *bestTemplates;
+ while(i--) {
+ free(tVF_scores[i]);
+ }
+ free(tVF_scores);
+ } else {
+ i = *bestTemplates;
+ Sizes = smalloc(i * sizeof(int));
+ tVF_scores = smalloc(2 * i * sizeof(KmerAnker *));
+ tVR_scores = tVF_scores + i;
+ while(i--) {
+ Sizes[i] = 256;
+ tVF_scores[i] = calloc(512, sizeof(KmerAnker));
+ if(!tVF_scores[i]) {
+ ERROR();
+ } else {
+ tVR_scores[i] = tVF_scores[i] + 256;
+ }
+ }
+ }
+ return 0;
+ } else if(qseq->seqlen < (kmersize = templates->kmersize)) {
+ return 1;
+ } else if((DB_size = templates->DB_size) < USHRT_MAX) {
+ SU = 1;
+ } else {
+ SU = 0;
+ }
+ shifter = sizeof(long unsigned) * sizeof(long unsigned) - (templates->kmersize << 1);
+ M = rewards->M;
+ MM = rewards->MM;
+ U = rewards->U;
+ W1 = rewards->W1;
+ Wl = rewards->Wl;
+ include = (char *) (extendScore + (templates->DB_size + 1));
+ hitCounter = 0;
+ hitCounter_r = 0;
+ values = 0;
+ values_s = 0;
+
+ if(Sizes[*Score] < qseq->size) {
+ Sizes[*Score] = qseq->size << 1;
+ free(tVF_scores[*Score]);
+ tVF_scores[*Score] = calloc(qseq->size, sizeof(KmerAnker));
+ if(!tVF_scores[*Score]) {
+ ERROR();
+ } else {
+ tVR_scores[*Score] = tVF_scores[*Score] + qseq->size;
+ }
+ }
+ VF_scores = tVF_scores[*Score];
+ VR_scores = tVR_scores[*Score];
+
+ V_scores = VF_scores;
+ qseqP = qseq;
+ rc = 2;
+ while(rc--) {
+ if(rc == 0) {
+ V_scores = VR_scores;
+ qseqP = qseq_r;
+ rc_comp(qseq, qseq_r);
+ hitCounter = hitCounter_r;
+ hitCounter_r = 0;
+ }
+
+ HIT = exhaustive;
+ j = 0;
+ ++*(qseqP->N);
+ qseqP->N[*(qseqP->N)] = qseqP->seqlen;
+ for(i = 1; i <= *(qseqP->N) && !HIT; ++i) {
+ end = qseqP->N[i] - kmersize + 1;
+ while(j < end && hashMap_get(templates, getKmer(qseqP->seq, j, shifter)) == 0) {
+ j += kmersize;
+ }
+ if(j < end) {
+ HIT = 1;
+ } else {
+ j = qseqP->N[i] + 1;
+ }
+ }
+
+ if(HIT) {
+ V_score = V_scores;
+ V_score->start = (j = 0);
+ V_score->values = (last = 0);
+ V_score->next = 0;
+ Ms = 0;
+ MMs = 0;
+ Us = 0;
+ W1s = 0;
+ gaps = 0;
+ for(i = 1; i <= *(qseqP->N); ++i) {
+ end = qseqP->N[i] - kmersize + 1;
+ while(j < end) {
+ if((values = hashMap_get(templates, getKmer(qseqP->seq, j, shifter)))) {
+ if(values == last) {
+ if(kmersize < gaps) {
+ Ms += kmersize;
+ gaps -= kmersize;
+ if(gaps) {
+ /* go for best scenario */
+ if(gaps == 1) {
+ MMs += 2;
+ } else {
+ gaps -= 2;
+ if((MM << 1) + gaps * M < 0) {
+ Ms += gaps;
+ MMs += 2;
+ }
+ }
+ } else {
+ ++MMs;
+ }
+ } else if(gaps) {
+ --gaps;
+ ++W1s;
+ Us += gaps;
+ } else {
+ ++Ms;
+ }
+ } else {
+ if(last) {
+ /* update and link between ankers */
+ V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
+ V_score->end = j - gaps + kmersize;
+ ++V_score;
+ }
+ V_score->start = j;
+ V_score->values = (last = values);
+ Ms = 0;
+ MMs = 0;
+ Us = 0;
+ W1s = 0;
+ ++hitCounter_r;
+ }
+ gaps = 0;
+ } else {
+ ++gaps;
+ }
+ j += kmersize;
+ }
+ j = qseqP->N[i] + 1;
+ }
+ if(last) {
+ /* update anker */
+ V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
+ }
+ }
+ --*(qseqP->N);
+ }
+
+ /* here */
+ /* chaining */
+
+ /*
+ while(chain is good) {
+ 1. make chain. OK
+ 2. choose best.
+ 3. silence "used" ankers.
+ 4. redo chaining if next best hits a silenced anker.
+ }
+ */
+
+ /*
+ for each anker
+ mark last anker w.r.t. highest score on anker
+ */
+
+ /* make chains */
+ maxS = 0;
+ V_score = VF_scores;
+ HIT = hitCounter + 1;
+ bests = bestTemplates;
+ rc = 2;
+ while(rc--) {
+ if(rc == 0) {
+ V_score = VR_scores;
+ HIT = hitCounter_r + 1;
+ bests = bestTemplates_r;
+ }
+ *bests = 0;
+ while(--HIT) {
+ /*
+ Save pos of best score, and chain it (0 if local).
+ Score is always current / w.r.t. the anker you are on.
+ extendScore contains endpos of last hit anker.
+ */
+ start = V_score->start;
+ end = V_score->end;
+
+ /* chain anker */
+ V_score->score = 0;
+ if(SU) {
+ values_s = (short unsigned *) V_score->values;
+ i = *values_s + 1;
+ values_s += i;
+ } else {
+ values = V_score->values;
+ i = *values + 1;
+ values += i;
+ }
+ while(--i) {
+ template = SU ? *--values_s : *--values;
+ if(!include[template]) {
+ include[template] = 1;
+ bests[++*bests] = template;
+ }
+ score = Score[template];
+ gaps = start - (pos = extendScore[template]);
+ if(gaps == 0) {
+ /* continued match */
+ score += M;
+ } else if(gaps < 0) {
+ /* deletion */
+ score += (W1 + abs(gaps + 1) * U);
+ } else if(gaps == 1) {
+ /* one unmatched between */
+ score += MM;
+ } else {
+ /* unmatched bases between */
+ if((gaps = (gaps - 2) * M) < 0) {
+ score += gaps;
+ }
+ score += (MM << 1);
+ }
+
+ /* check local chain is needed */
+ if(score < Wl) {
+ score = Wl + V_score->weight;
+ /* mark as terminating */
+ V_score->next = 0;
+ extendScore[template] = 0;
+ } else {
+ score += V_score->weight;
+ /* mark descendant */
+ tmp_score = V_score;
+ while((--tmp_score)->end != pos);
+ V_score->next = tmp_score;
+ extendScore[template] = end;
+ }
+
+ /* update Scores */
+ if(V_score->score < score) {
+ V_score->score = score;
+ /* here */
+ /* consider several bests */
+ if(maxS < score) {
+ maxS = score;
+ }
+ }
+ Score[template] = score;
+ }
+ ++V_score;
+ }
+
+ /* clear Score */
+ i = *bests + 1;
+ while(--i) {
+ include[*++bests] = 0;
+ }
+ clearScore(bests, Score);
+ clearScore(bests, extendScore);
+ }
+
+
+
+
+
+
+
+
+
+
+ return 1;
+}
=====================================
savekmers.h
=====================================
@@ -25,6 +25,8 @@
#ifndef SAVEKMERS
typedef struct kmerScan_thread KmerScan_thread;
+typedef struct kmerAnker KmerAnker;
+
struct kmerScan_thread {
pthread_t id;
int num;
@@ -40,6 +42,16 @@ struct kmerScan_thread {
Penalties *rewards;
struct kmerScan_thread *next;
};
+
+struct kmerAnker {
+ int score;
+ int weight;
+ unsigned start;
+ unsigned end;
+ unsigned *values;
+ struct kmerAnker *next;
+};
+
#define SAVEKMERS 1;
#endif
@@ -84,3 +96,4 @@ int save_kmers_forcePair(const HashMapKMA *templates, const Penalties *rewards,
int save_kmers_HMM(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out);
void ankerAndClean(int *regionTemplates, int *Score, int *Score_r, char *include, int *template_lengths, unsigned **VF_scores, unsigned **VR_scores, int *tmpNs, CompDNA *qseq, int HIT, int bestScore, int start_cut, int end_cut, Qseqs *header, volatile int *excludeOut, FILE *out);
void ankerAndClean_MEM(int *regionTemplates, int *Score, int *Score_r, char *include, int *template_lengths, unsigned **VF_scores, unsigned **VR_scores, int *tmpNs, CompDNA *qseq, int HIT, int bestScore, int start_cut, int end_cut, Qseqs *header, volatile int *excludeOut, FILE *out);
+int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out);
=====================================
seq2fasta.c
=====================================
@@ -80,8 +80,7 @@ void printFastas(char *filename, int *template_lengths) {
if(!seq || !compseq) {
ERROR();
}
-
- for(i = 1; i < DB_size; i++) {
+ for(i = 1; i < DB_size; ++i) {
fread(compseq, sizeof(long unsigned), (template_lengths[i] >> 5) + 1, seqfile);
j = template_lengths[i];
@@ -103,7 +102,7 @@ void printFastaList(char *filename, int *template_lengths, int *seqlist) {
const char bases[6] = "ACGTN-";
int i, j, n, max, DB_size, file_len;
- long unsigned *compseq;
+ long unsigned skip, *compseq;
char *seq;
FILE *seqfile, *namefile;
Qseqs *template_name;
@@ -139,11 +138,13 @@ void printFastaList(char *filename, int *template_lengths, int *seqlist) {
if(!seq || !compseq) {
ERROR();
}
-
- for(i = 1; n && i < DB_size; i++) {
+ skip = 0;
+ for(i = 1; n && i < DB_size; ++i) {
if(i == *seqlist) {
/* get seq */
+ fseek(seqfile, skip, SEEK_CUR);
fread(compseq, sizeof(long unsigned), (template_lengths[i] >> 5) + 1, seqfile);
+
j = template_lengths[i];
*(seq += j) = '\n';
while(j--) {
@@ -156,8 +157,10 @@ void printFastaList(char *filename, int *template_lengths, int *seqlist) {
/* get next target */
while(--n && i == *++seqlist);
+ skip = 0;
} else {
nameSkip(namefile, max);
+ skip += (((template_lengths[i] >> 5) + 1) * sizeof(long unsigned));
}
}
}
=====================================
spltdb.c
=====================================
@@ -42,6 +42,7 @@
#include "spltdb.h"
#include "stdnuc.h"
#include "stdstat.h"
+#include "tmp.h"
#include "updatescores.h"
#include "version.h"
#include "vcf.h"
@@ -522,7 +523,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
alignment_out = 0;
consensus_out = 0;
}
- frag_out_raw = tmpfile();
+ frag_out_raw = tmpF(0);
if(!frag_out_raw) {
ERROR();
}
=====================================
tmp.c
=====================================
@@ -0,0 +1,91 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#define _XOPEN_SOURCE 600
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "pherror.h"
+#include "threader.h"
+#include "tmp.h"
+
+FILE * tmpF(const char *location) {
+
+ static int tmpNum = 0;
+ static char *tmpname, *dirname = 0, *filename = 0;
+ static volatile int lock[1] = {0};
+ int fd;
+ FILE *file;
+
+ lock(lock);
+ if(location) {
+ /* set location for tmpfiles */
+ if(filename) {
+ free(filename);
+ filename = 0;
+ } else if(dirname) {
+ free(dirname);
+ dirname = 0;
+ }
+ if(*location) {
+ if(location[strlen(location) - 1] != '/') {
+ filename = smalloc(strlen(location) + 15);
+ tmpname = filename + sprintf(filename, "%s.tmp", location);
+ dirname = 0;
+ } else {
+ dirname = smalloc(strlen(location) + 12);
+ tmpname = dirname + sprintf(dirname, "%s.kma-", location);
+ strcpy(tmpname, "XXXXXX");
+ filename = 0;
+ }
+ }
+ file = 0;
+ } else if(dirname) {
+ if((fd = mkstemp(dirname)) == -1) {
+ ERROR();
+ }
+ if((file = fdopen(fd, "wb+"))) {
+ unlink(dirname);
+ }
+ strcpy(tmpname, "XXXXXX");
+ } else if(filename) {
+ /* open tmpfile on previous location */
+ sprintf(tmpname, "%d", tmpNum++);
+ if((file = fopen(filename, "wb+"))) {
+ unlink(filename);
+ }
+ *tmpname = 0;
+ } else {
+ /* open a "normal" tmp file */
+ file = tmpfile();
+ }
+ unlock(lock);
+
+ return file;
+}
+
+
+
+
+
+
+
+
+
+
=====================================
tmp.h
=====================================
@@ -0,0 +1,22 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <stdio.h>
+
+FILE * tmpF(const char *location);
=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
* limitations under the License.
*/
-#define KMA_VERSION "1.2.6"
+#define KMA_VERSION "1.2.10a"
View it on GitLab: https://salsa.debian.org/med-team/kma/commit/f5137c3d39949889292033da254f861b4981ebbf
--
View it on GitLab: https://salsa.debian.org/med-team/kma/commit/f5137c3d39949889292033da254f861b4981ebbf
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190801/ca540f83/attachment-0001.html>
More information about the debian-med-commit
mailing list