[med-svn] [Git][med-team/kma][upstream] New upstream version 1.3.13
Nilesh Patra
gitlab at salsa.debian.org
Sat Mar 20 18:57:56 GMT 2021
Nilesh Patra pushed to branch upstream at Debian Med / kma
Commits:
45823d7f by Nilesh Patra at 2021-03-21T00:23:19+05:30
New upstream version 1.3.13
- - - - -
15 changed files:
- assembly.c
- frags.c
- index.c
- kma.c
- mt1.c
- mt1.h
- nw.c
- runkma.c
- runkma.h
- savekmers.c
- spltdb.c
- spltdb.h
- vcf.c
- vcf.h
- version.h
Changes:
=====================================
assembly.c
=====================================
@@ -685,16 +685,16 @@ void * assemble_KMA_threaded(void *arg) {
while(i < asm_len) {
/* call template */
if(pos < t_len) {
- aligned_assem->t[i] = bases[getNuc(template_index->seq, pos)];
+ bestNuc = getNuc(template_index->seq, pos);
} else {
- aligned_assem->t[i] = '-';
+ bestNuc = 5;
}
+ aligned_assem->t[i] = bases[bestNuc];
/* call query */
- bestNuc = 0;
- bestScore = assembly[pos].counts[0];
- depthUpdate = bestScore;
- for(j = 1; j < 6; ++j) {
+ bestScore = assembly[pos].counts[bestNuc];
+ depthUpdate = 0;
+ for(j = 0; j < 6; ++j) {
if(bestScore < assembly[pos].counts[j]) {
bestScore = assembly[pos].counts[j];
bestNuc = j;
@@ -704,7 +704,9 @@ void * assemble_KMA_threaded(void *arg) {
bestNuc = bases[bestNuc];
/* check for minor base call */
- if((bestScore << 1) < depthUpdate) {
+ if(!depthUpdate) {
+ bestNuc = '-';
+ } else if((bestScore << 1) < depthUpdate) {
if(bestNuc == '-') {
bestBaseScore = assembly[pos].counts[4];
bestNuc = 4;
@@ -719,14 +721,20 @@ void * assemble_KMA_threaded(void *arg) {
bestNuc = tolower(bestNuc);
}
bestScore = depthUpdate - assembly[pos].counts[5];
+ } else if(depthUpdate < bcd) {
+ /* too low depth */
+ bestNuc = tolower(bestNuc);
}
/* determine base at current position */
+ /*
if(bcd <= depthUpdate) {
bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
} else {
bestNuc = baseCall('-', aligned_assem->t[i], 0, 0, evalue, &assembly[pos]);
}
+ */
+ bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
aligned_assem->q[i] = bestNuc;
if(bestNuc != '-') {
@@ -1128,8 +1136,7 @@ void * assemble_KMA_dense_threaded(void *arg) {
aligned_assem->t[i] = bases[bestNuc];
/* call query */
- bestNuc = 5;
- bestScore = 0;
+ bestScore = assembly[i].counts[bestNuc];
depthUpdate = 0;
for(j = 0; j < 6; ++j) {
if(bestScore < assembly[i].counts[j]) {
@@ -1141,7 +1148,9 @@ void * assemble_KMA_dense_threaded(void *arg) {
bestNuc = bases[bestNuc];
/* Check for minor base call */
- if((bestScore << 1) < depthUpdate) {
+ if(!depthUpdate) {
+ bestNuc = '-';
+ } else if((bestScore << 1) < depthUpdate) {
if(bestNuc == '-') {
bestBaseScore = 0;
bestNuc = 4;
@@ -1156,14 +1165,20 @@ void * assemble_KMA_dense_threaded(void *arg) {
bestNuc = tolower(bestNuc);
}
bestScore = depthUpdate - assembly[i].counts[5];
+ } else if(depthUpdate < bcd) {
+ /* too low depth */
+ bestNuc = tolower(bestNuc);
}
/* determine base at current position */
+ /*
if(bcd <= depthUpdate) {
bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[i]);
} else {
bestNuc = baseCall('-', aligned_assem->t[i], 0, 0, evalue, &assembly[i]);
}
+ */
+ bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[i]);
aligned_assem->q[i] = bestNuc;
if(bestNuc != '-') {
@@ -1476,16 +1491,16 @@ void callConsensus(AssemInfo *matrix, Assem *aligned_assem, long unsigned *seq,
while(i < end) {
/* call template */
if(pos < t_len) {
- aligned_assem->t[i] = bases[getNuc(seq, pos)];
+ bestNuc = getNuc(seq, pos);
} else {
- aligned_assem->t[i] = '-';
+ bestNuc = 5;
}
+ aligned_assem->t[i] = bases[bestNuc];
/* call query */
- bestNuc = 0;
- bestScore = assembly[pos].counts[0];
- depthUpdate = bestScore;
- for(j = 1; j < 6; ++j) {
+ bestScore = assembly[pos].counts[bestNuc];
+ depthUpdate = 0;
+ for(j = 0; j < 6; ++j) {
if(bestScore < assembly[pos].counts[j]) {
bestScore = assembly[pos].counts[j];
bestNuc = j;
@@ -1495,7 +1510,9 @@ void callConsensus(AssemInfo *matrix, Assem *aligned_assem, long unsigned *seq,
bestNuc = bases[bestNuc];
/* check for minor base call */
- if((bestScore << 1) < depthUpdate) {
+ if(!depthUpdate) {
+ bestNuc = '-';
+ } else if((bestScore << 1) < depthUpdate) {
if(bestNuc == '-') {
bestBaseScore = assembly[pos].counts[4];
bestNuc = 4;
@@ -1510,14 +1527,20 @@ void callConsensus(AssemInfo *matrix, Assem *aligned_assem, long unsigned *seq,
bestNuc = tolower(bestNuc);
}
bestScore = depthUpdate - assembly[pos].counts[5];
+ } else if(depthUpdate < bcd) {
+ /* too low depth */
+ bestNuc = tolower(bestNuc);
}
/* determine base at current position */
+ /*
if(bcd <= depthUpdate) {
bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
} else {
bestNuc = baseCall('-', aligned_assem->t[i], 0, 0, evalue, &assembly[pos]);
}
+ */
+ bestNuc = baseCall(bestNuc, aligned_assem->t[i], bestScore, depthUpdate, evalue, &assembly[pos]);
aligned_assem->q[i] = bestNuc;
if(bestNuc != '-') {
@@ -1616,11 +1639,11 @@ void * assemble_KMA(void *arg) {
static volatile int excludeIn[1] = {0}, excludeOut[1] = {0}, excludeMatrix[1] = {0};
static volatile int thread_wait = 0, thread_init = 0, thread_begin = 0;
static volatile int mainTemplate = -2, next;
- static int t_len, load;
+ static int t_len, load, seq_in;
static char *template_name;
static HashMapCCI *template_index;
Assemble_thread *thread = arg;
- int i, minlen, aln_len, kmersize, sam, chunk, seq_in, ef, template;
+ int i, minlen, aln_len, kmersize, sam, chunk, ef, template;
int read_score, asm_len, nextTemplate, file_i, file_count, delta, status;
int thread_num, mq, bcd, start, end, q_start, q_end;
int stats[5], buffer[8], *qBoundPtr;
=====================================
frags.c
=====================================
@@ -85,13 +85,15 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
/* copy seq */
update = (char *) dest->next;
++q_len;
+ --qseq;
while(--q_len) {
- *update++ = bases[*qseq++];
+ *update++ = bases[*++qseq];
}
avail -= check;
check = 33;
if(avail < check) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -102,6 +104,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
for(i = 1; i < bestHits; ++i) {
if(avail < 11) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -112,6 +115,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
}
if(avail < 11) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -121,6 +125,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
update += check;
for(i = 1; i < bestHits; ++i) {
if(avail < 11) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -131,6 +136,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
}
if(avail < 11) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -139,7 +145,8 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
avail -= check;
update += check;
for(i = 1; i < bestHits; ++i) {
- if(avail < 11) {
+ if(avail < 12) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
@@ -151,6 +158,7 @@ void updateAllFrag(unsigned char *qseq, int q_len, int bestHits, int best_read_s
check = header->len + 1;
if(avail < check) {
+ dest->bytes = avail;
writeGzFileBuff(dest);
avail = dest->bytes;
update = (char *) dest->next;
=====================================
index.c
=====================================
@@ -231,8 +231,9 @@ int index_main(int argc, char *argv[]) {
} else if(kmersize == 0) {
fprintf(stderr, "# Invalid kmersize parsed, using default\n");
kmersize = 16;
- } else if(kmersize > 32) {
- kmersize = 32;
+ } else if(kmersize > 31) {
+ fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+ exit(1);
}
kmerindex = kmersize;
}
@@ -246,8 +247,9 @@ int index_main(int argc, char *argv[]) {
} else if(kmersize == 0) {
fprintf(stderr, "# Invalid kmersize parsed, using default\n");
kmersize = 16;
- } else if(kmersize > 32) {
- kmersize = 32;
+ } else if(kmersize > 31) {
+ fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+ exit(1);
}
}
} else if(strcmp(argv[args], "-k_i") == 0) {
@@ -260,8 +262,9 @@ int index_main(int argc, char *argv[]) {
} else if(kmerindex == 0) {
fprintf(stderr, "# Invalid kmersize parsed, using default\n");
kmerindex = 16;
- } else if(kmerindex > 32) {
- kmerindex = 32;
+ } else if(kmerindex > 31) {
+ fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+ exit(1);
}
}
} else if(strcmp(argv[args], "-CS") == 0) {
=====================================
kma.c
=====================================
@@ -207,12 +207,11 @@ int kma_main(int argc, char *argv[]) {
static unsigned xml, nc, nf, shm, exhaustive, verbose;
static char *outputfilename, *templatefilename, **templatefilenames;
static char **inputfiles, **inputfiles_PE, **inputfiles_INT, ss;
- static double ID_t, scoreT, coverT, mrc, evalue;
+ static double ID_t, scoreT, coverT, mrc, evalue, minFrac, support;
static Penalties *rewards;
int i, j, args, exe_len, fileCount, size, escape, tmp, step1, step2;
unsigned totFrags;
char *to2Bit, *exeBasic, *myTemplatefilename;
- double support;
FILE *templatefile, *ioStream;
time_t t0, t1;
Qseqs qseq;
@@ -254,6 +253,8 @@ int kma_main(int argc, char *argv[]) {
kmersize = 0;
minlen = 16;
evalue = 0.05;
+ support = 0.0;
+ minFrac = 1.0;
exhaustive = 0;
shm = 0;
mq = 0;
@@ -498,8 +499,9 @@ int kma_main(int argc, char *argv[]) {
} else if(kmersize == 0) {
fprintf(stderr, "# Invalid kmersize parsed, using default\n");
kmersize = 16;
- } else if(kmersize > 32) {
- kmersize = 32;
+ } else if(kmersize > 31) {
+ fprintf(stderr, "# Invalid kmersize parsed, max size is 31\n");
+ exit(1);
}
}
} else if(strcmp(argv[args], "-ml") == 0) {
@@ -590,8 +592,8 @@ int kma_main(int argc, char *argv[]) {
one2one = 0;
} else if(strcmp(argv[args], "-proxi") == 0) {
if(++args < argc) {
- support = strtod(argv[args], &exeBasic);
- if(*exeBasic != 0 || support < 0 || 1 < support) {
+ minFrac = strtod(argv[args], &exeBasic);
+ if(*exeBasic != 0 || minFrac < 0 || 1 < minFrac) {
fprintf(stderr, "Invalid argument at \"-proxi\".\n");
exit(1);
} else {
@@ -603,13 +605,13 @@ int kma_main(int argc, char *argv[]) {
getF = &getF_Proxi;
getR = &getR_Proxi;
getChainTemplates = &getProxiChainTemplates;
- getMatch((int *)(&support), 0);
- getMatchSparse((int *)(&support), 0, 0, 0, 0, 0);
- getSecondPen((int *)(&support), 0, 0, 0, 0, 0, 0, 0);
- getF((int *)(&support), 0, 0, 0, 0);
- ankerAndClean((int *)(&support), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
- ankerAndClean_MEM((int *)(&support), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
- getProxiChainTemplates(0, (const Penalties *)(&support), 0, 0, 0, 0, 0);
+ getMatch((int *)(&minFrac), 0);
+ getMatchSparse((int *)(&minFrac), 0, 0, 0, 0, 0);
+ getSecondPen((int *)(&minFrac), 0, 0, 0, 0, 0, 0, 0);
+ getF((int *)(&minFrac), 0, 0, 0, 0);
+ ankerAndClean((int *)(&minFrac), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ ankerAndClean_MEM((int *)(&minFrac), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ getProxiChainTemplates(0, (const Penalties *)(&minFrac), 0, 0, 0, 0, 0);
}
} else {
fprintf(stderr, "Need argument at: \"-proxi\".\n");
@@ -633,7 +635,7 @@ int kma_main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-p") == 0 || strcmp(argv[args], "-e") == 0) {
if(++args < argc) {
evalue = strtod(argv[args], &exeBasic);
- if(*exeBasic != 0) {
+ if(*exeBasic != 0 || evalue < 0 || 1.0 < evalue) {
fprintf(stderr, "Invalid argument at \"%s\".\n", argv[--args]);
exit(1);
}
@@ -642,7 +644,7 @@ int kma_main(int argc, char *argv[]) {
if(++args < argc && argv[args][0] != '-') {
significantBase = &significantAndSupport;
support = strtod(argv[args], &exeBasic);
- if(*exeBasic != 0 || 1 < support) {
+ if(*exeBasic != 0 || support < 0 || 1 < support) {
fprintf(stderr, "Invalid argument at \"-bc\".\n");
exit(1);
} else {
@@ -1285,7 +1287,7 @@ int kma_main(int argc, char *argv[]) {
} else if(Mt1) {
myTemplatefilename = smalloc(strlen(templatefilename) + 64);
strcpy(myTemplatefilename, templatefilename);
- runKMA_Mt1(myTemplatefilename, outputfilename, strjoin(argv, argc), kmersize, minlen, rewards, ID_t, mq, scoreT, mrc, evalue, bcd, Mt1, ref_fsa, print_matrix, vcf, xml, sam, nc, nf, thread_num);
+ runKMA_Mt1(myTemplatefilename, outputfilename, strjoin(argv, argc), kmersize, minlen, rewards, ID_t, mq, scoreT, mrc, evalue, support, bcd, Mt1, ref_fsa, print_matrix, vcf, xml, sam, nc, nf, thread_num);
free(myTemplatefilename);
fprintf(stderr, "# Closing files\n");
} else if(step2) {
@@ -1304,11 +1306,11 @@ int kma_main(int argc, char *argv[]) {
myTemplatefilename = smalloc(strlen(templatefilename) + 64);
strcpy(myTemplatefilename, templatefilename);
if(spltDB == 0 && targetNum != 1) {
- status |= runKMA_spltDB(templatefilenames, targetNum, outputfilename, argc, argv, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
+ status |= runKMA_spltDB(templatefilenames, targetNum, outputfilename, argc, argv, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, support, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
} else if(mem_mode) {
- status |= runKMA_MEM(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
+ status |= runKMA_MEM(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, support, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
} else {
- status |= runKMA(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
+ status |= runKMA(myTemplatefilename, outputfilename, exeBasic, ConClave, kmersize, minlen, rewards, extendedFeatures, ID_t, mq, scoreT, mrc, evalue, support, bcd, ref_fsa, print_matrix, print_all, vcf, xml, sam, nc, nf, shm, thread_num, verbose);
}
free(myTemplatefilename);
fprintf(stderr, "# Closing files\n");
=====================================
mt1.c
=====================================
@@ -17,6 +17,7 @@
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -80,9 +81,9 @@ void printFsa_pairMt1(Qseqs *header, Qseqs *qseq, Qseqs *header_r, Qseqs *qseq_r
}
-void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num) {
+void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num) {
- int i, j, aln_len, t_len, coverScore, file_len, DB_size, delta;
+ int i, j, aln_len, t_len, coverScore, file_len, DB_size, delta, seq_in;
int *template_lengths;
long unsigned read_score, seeker;
double p_value, id, q_id, cover, q_cover;
@@ -213,10 +214,13 @@ void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int
}
strcat(templatefilename, ".seq.b");
- DB_file = sfopen(templatefilename, "rb");
+ seq_in = open(templatefilename, O_RDONLY);
+ if(seq_in == -1) {
+ ERROR();
+ }
templatefilename[file_len] = 0;
- template_index = alignLoad_fly(0, fileno(DB_file), *template_lengths, kmersize, seeker);
- fclose(DB_file);
+ template_index = alignLoad_fly(0, seq_in, *template_lengths, kmersize, seeker);
+ close(seq_in);
/* get name */
strcat(templatefilename, ".name");
@@ -427,7 +431,7 @@ void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int
updateMatrix(matrix_out, thread->template_name, template_index->seq, matrix, t_len);
}
if(vcf) {
- updateVcf(thread->template_name, template_index->seq, evalue, t_len, matrix, vcf, vcf_out);
+ updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
}
}
/* destroy this DB index */
=====================================
mt1.h
=====================================
@@ -24,4 +24,4 @@
void printFsaMt1(Qseqs *header, Qseqs *qseq, CompDNA *compressor, FILE *out);
void printFsa_pairMt1(Qseqs *header, Qseqs *qseq, Qseqs *header_r, Qseqs *qseq_r, CompDNA *compressor, FILE *out);
-void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num);
+void runKMA_Mt1(char *templatefilename, char *outputfilename, char *exePrev, int kmersize, int minlen, Penalties *rewards, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int Mt1, int ref_fsa, int print_matrix, int vcf, int xml, int sam, int nc, int nf, int thread_num);
=====================================
nw.c
=====================================
@@ -45,6 +45,7 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
t_len += aligned->pos;
}
query = (unsigned char*)(queryOrg + q_s);
+ Stat.pos = 0;
if(t_len == 0 || q_len == 0) {
if(t_len == q_len) {
@@ -109,7 +110,6 @@ AlnScore NW(const long unsigned *template, const unsigned char *queryOrg, int k,
E = matrices->E;
thisScore = (t_len + q_len) * (MM + U + W1);
Stat.score = thisScore;
- Stat.pos = 0;
if(0 < k) {
E_ptr = E;
for(m = 0; m < t_len; ++m) {
@@ -331,6 +331,7 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
t_len += template_length;
}
query = (unsigned char*)(queryOrg + q_s);
+ Stat.pos = 0;
if(t_len == 0 || q_len == 0) {
if(t_len == q_len) {
@@ -402,7 +403,6 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
E = matrices->E;
thisScore = (t_len + q_len) * (MM + U + W1);
Stat.score = thisScore;
- Stat.pos = 0;
E_ptr = E + (t_len * (bq_len + 1));
c_pos = (t_len + q_len) >> 1;
@@ -661,6 +661,7 @@ AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg,
t_len += template_length;
}
query = (unsigned char*)(queryOrg + q_s);
+ Stat.pos = 0;
if(t_len == 0 || q_len == 0) {
if(t_len == q_len) {
@@ -709,7 +710,6 @@ AlnScore NW_score(const long unsigned *template, const unsigned char *queryOrg,
E = matrices->E;
thisScore = (t_len + q_len) * (MM + U + W1);
Stat.score = thisScore;
- Stat.pos = 0;
if(0 < k) {
E_ptr = E;
for(m = 0; m < t_len; ++m) {
@@ -912,6 +912,7 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
t_len += template_length;
}
query = (unsigned char*)(queryOrg + q_s);
+ Stat.pos = 0;
if(t_len == 0 || q_len == 0) {
if(t_len == q_len) {
@@ -967,7 +968,6 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
E = matrices->E;
thisScore = (t_len + q_len) * (MM + U + W1);
Stat.score = thisScore;
- Stat.pos = 0;
E_ptr = E + (t_len * (bq_len + 1));
c_pos = (t_len + q_len) >> 1;
=====================================
runkma.c
=====================================
@@ -17,6 +17,7 @@
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
+#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
#include <stdio.h>
@@ -135,7 +136,7 @@ char * nameLoad(Qseqs *name, FILE *infile) {
return (char *) name->seq;
}
-int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
+int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
int i, j, tmp_template, tmp_tmp_template, file_len, bestTemplate, tot;
int template, bestHits, t_len, start, end, aln_len, status, rand, sparse;
@@ -149,7 +150,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
long unsigned *w_scores, *uniq_alignment_scores, *alignment_scores;
double tmp_score, bestScore, id, q_id, cover, q_cover, p_value;
long double depth, expected, q_value;
- FILE *inputfile, *frag_in_raw, *seq_in, *res_out, *name_file;
+ FILE *inputfile, *frag_in_raw, *res_out, *name_file;
FILE *alignment_out, *consensus_out, *frag_out_raw, **template_fragments;
FILE *extendedFeatures_out, *xml_out;
time_t t0, t1;
@@ -191,11 +192,14 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
/* load databases */
strcat(templatefilename, ".seq.b");
- seq_in = sfopen(templatefilename, "rb");
- fseek(seq_in, 0, SEEK_END);
- seqin_size = 4 * ftell(seq_in);
- fseek(seq_in, 0, SEEK_SET);
- seq_in_no = fileno(seq_in);
+ seq_in_no = open(templatefilename, O_RDONLY);
+ if(seq_in_no == -1) {
+ ERROR();
+ }
+ seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
+ if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
+ ERROR();
+ }
templatefilename[file_len] = 0;
templates_index = calloc(DB_size, sizeof(HashMapCCI*));
if(!templates_index) {
@@ -1254,7 +1258,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
printExtendedFeatures(thread->template_name, aligned_assem, fragmentCounts[template], readCounts[template], extendedFeatures_out);
}
if(vcf) {
- updateVcf(thread->template_name, templates_index[template]->seq, evalue, t_len, matrix, vcf, vcf_out);
+ updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
}
}
} else {
@@ -1326,7 +1330,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
return status;
}
-int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
+int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
/* runKMA_MEM is a memory saving version of runKMA,
at the cost it chooses best templates based on kmers
@@ -1344,7 +1348,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
long unsigned *w_scores, *uniq_alignment_scores, *alignment_scores;
double tmp_score, bestScore, id, cover, q_id, q_cover, p_value;
long double depth, q_value, expected;
- FILE *inputfile, *frag_in_raw, *seq_in, *res_out, *name_file;
+ FILE *inputfile, *frag_in_raw, *res_out, *name_file;
FILE *alignment_out, *consensus_out, *frag_out_raw, **template_fragments;
FILE *extendedFeatures_out, *xml_out;
time_t t0, t1;
@@ -1395,11 +1399,11 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
kmersize = 16;
}
strcat(templatefilename, ".seq.b");
- seq_in = sfopen(templatefilename, "rb");
- fseek(seq_in, 0, SEEK_END);
- seqin_size = 4 * ftell(seq_in);
- fseek(seq_in, 0, SEEK_SET);
- seq_in_no = fileno(seq_in);
+ seq_in_no = open(templatefilename, O_RDONLY);
+ if(seq_in_no == -1) {
+ ERROR();
+ }
+ seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
ERROR();
}
@@ -1502,6 +1506,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
while((rc_flag = get_ankers(matched_templates, qseq_comp, header, &flag, inputfile)) != 0) {
if(*matched_templates) { // SE
read_score = 0;
+ qseq_r->len = 0;
} else { // PE
read_score = get_ankers(matched_templates, qseq_r_comp, header_r, &flag_r, inputfile);
read_score = labs(read_score);
@@ -1536,7 +1541,6 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
if(rc_flag < 0 && 0 < matched_templates[*matched_templates]) {
bestHits = -bestHits;
}
-
if(read_score && kmersize <= qseq_r->len) {
unCompDNA(qseq_r_comp, qseq_r->seq);
update_Scores_pe(qseq->seq, qseq->len, qseq_r->seq, qseq_r->len, bestHits, best_read_score + read_score, best_start_pos, best_end_pos, bestTemplates, header, header_r, flag, flag_r, alignment_scores, uniq_alignment_scores, frag_out_raw);
@@ -2420,7 +2424,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
printExtendedFeatures(thread->template_name, aligned_assem, fragmentCounts[template], readCounts[template], extendedFeatures_out);
}
if(vcf) {
- updateVcf(thread->template_name, thread->template_index->seq, evalue, t_len, matrix, vcf, vcf_out);
+ updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
}
}
} else {
@@ -2475,7 +2479,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
}
/* Close files */
- fclose(seq_in);
+ close(seq_in_no);
fclose(res_out);
if(alignment_out) {
fclose(alignment_out);
=====================================
runkma.h
=====================================
@@ -26,5 +26,5 @@
unsigned char * ustrdup(unsigned char *src, size_t n);
int load_DBs_KMA(char *templatefilename, long unsigned **alignment_scores, long unsigned **uniq_alignment_scores, int **template_lengths, unsigned shm);
char * nameLoad(Qseqs *name, FILE *infile);
-int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
-int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
+int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
+int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
=====================================
savekmers.c
=====================================
@@ -1885,7 +1885,8 @@ int save_kmers_Sparse(const HashMapKMA *templates, const Penalties *rewards, int
}
/* get best match(es) */
- if(hitCounter * kmersize > (n_kmers - hitCounter)) {
+ /*if(hitCounter * kmersize > (n_kmers - hitCounter)) {*/
+ if(hitCounter) {
bestScore = getMatchSparse(bestTemplates, Score, kmersize, n_kmers, M, MM);
/*
@@ -2007,7 +2008,8 @@ int save_kmers_Sparse(const HashMapKMA *templates, const Penalties *rewards, int
qseq->N[0]--;
/* get best match(es) */
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+ if(hitCounter) {
bestScore = getMatch(bestTemplates, Score);
} else {
*bestTemplates = clearScore(bestTemplates, Score);
@@ -2258,8 +2260,9 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
qseq->N[0]--;
/* get best match(es) */
- //clearScore(bestTemplates, extendScore);
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /*clearScore(bestTemplates, extendScore);*/
+ /*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+ if(hitCounter) {
bestScore = getMatch(bestTemplates, Score);
/*
bestHits = 0;
@@ -2549,8 +2552,9 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
}
/* get best match(es) */
- //clearScore(bestTemplates, extendScore);
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /* clearScore(bestTemplates, extendScore); */
+ /* if(hitCounter * kmersize > (end - hitCounter - kmersize)) { */
+ if(hitCounter) {
bestScore = getMatch(bestTemplates, Score);
/*
bestHits = 0;
@@ -2791,8 +2795,9 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
}
/* get best match(es) */
- //clearScore(bestTemplates_r, extendScore);
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /*clearScore(bestTemplates_r, extendScore);*/
+ /*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+ if(hitCounter) {
bestScore_r = getMatch(bestTemplates_r, Score_r);
/*
@@ -2953,7 +2958,8 @@ int save_kmers_count(const HashMapKMA *templates, const Penalties *rewards, int
reps = 0;
/* get best match(es) */
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+ if(hitCounter) {
bestScore = getMatch(bestTemplates, Score);
@@ -3064,7 +3070,8 @@ int save_kmers_count(const HashMapKMA *templates, const Penalties *rewards, int
reps = 0;
/* get best match(es) */
- if(hitCounter * kmersize > (end - hitCounter + kmersize)) {
+ /*if(hitCounter * kmersize > (end - hitCounter + kmersize)) {*/
+ if(hitCounter) {
bestScore_r = getMatch(bestTemplates_r, Score_r);
/*
@@ -3135,7 +3142,8 @@ int save_kmers_unionPair(const HashMapKMA *templates, const Penalties *rewards,
}
/* get forward */
- if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq, extendScore, exhaustive)) && (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {
+ /*if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq, extendScore, exhaustive)) && (qseq->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {*/
+ if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq, extendScore, exhaustive))) {
/* got hits */
bestScore = getF(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates);
@@ -3160,7 +3168,8 @@ int save_kmers_unionPair(const HashMapKMA *templates, const Penalties *rewards,
*extendScore = 1;
/* get reverse */
- if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq_r, extendScore, exhaustive)) && (qseq_r->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {
+ /*if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq_r, extendScore, exhaustive)) && (qseq_r->seqlen - hitCounter - kmersize) < hitCounter * kmersize) {*/
+ if((hitCounter = get_kmers_for_pair_ptr(templates, rewards, bestTemplates, bestTemplates_r, Score, Score_r, qseq_r, extendScore, exhaustive))) {
if(bestScore) {
bestScore_r = getR(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates);
} else {
@@ -3562,8 +3571,10 @@ int save_kmers_forcePair(const HashMapKMA *templates, const Penalties *rewards,
*extendScore = 1;
/* get reverse */
- if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) &&
+ /*if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) &&
(qseq->seqlen + qseq_r->seqlen - hitCounter - hitCounter_r - (kmersize << 1)) < (hitCounter + hitCounter_r) * kmersize &&
+ (bestScore = getSecondForce(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates, regionScores))) {*/
+ if((hitCounter_r = get_kmers_for_pair_ptr(templates, rewards, bestTemplates_r, bestTemplates, Score_r, Score, qseq_r, extendScore, exhaustive)) &&
(bestScore = getSecondForce(bestTemplates, bestTemplates_r, Score, Score_r, regionTemplates, regionScores))) {
if((qseq->seqlen + qseq_r->seqlen - bestScore) < bestScore * kmersize) {
@@ -3952,8 +3963,9 @@ int save_kmers_HMM(const HashMapKMA *templates, const Penalties *rewards, int *b
stop = j + kmersize - 1;
/* evaluate hit */
- if(hitCounter > 0 && (hitCounter * kmersize > (stop - start - hitCounter + kmersize))
- && ((stop - start) > minLen || start == 0 || stop == seqlen)) {
+ /*if(hitCounter > 0 && (hitCounter * kmersize > (stop - start - hitCounter + kmersize))
+ && ((stop - start) > minLen || start == 0 || stop == seqlen)) {*/
+ if(hitCounter > 0 && ((stop - start) > minLen || start == 0 || stop == seqlen)) {
if(deCon) {
if(SU) {
for(k = start; k < j; ++k) {
=====================================
spltdb.c
=====================================
@@ -17,6 +17,7 @@
* limitations under the License.
*/
#define _XOPEN_SOURCE 600
+#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
#include <stdio.h>
@@ -395,7 +396,7 @@ unsigned get_ankers_spltDB(int *infoSize, int *out_Tem, CompDNA *qseq, Qseqs *he
return num;
}
-int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
+int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose) {
/* https://www.youtube.com/watch?v=LtXEMwSG5-8 */
@@ -414,7 +415,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
double tmp_score, bestScore, id, cover, q_id, q_cover, p_value;
long double depth, q_value, expected;
char *templatefilename, Date[11];
- FILE **inputfiles, *inputfile, *frag_in_raw, *seq_in;
+ FILE **inputfiles, *inputfile, *frag_in_raw;
FILE *res_out, *alignment_out, *consensus_out, *frag_out_raw, *xml_out;
FILE *extendedFeatures_out, *name_file, **template_fragments;
time_t t0, t1;
@@ -1376,7 +1377,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
templatefilename = 0;
initialiseVcf(vcf_out, templatefilename);
}
- seq_in = 0;
+ seq_in_no = 0;
/* preallocate assembly matrices */
matrix = smalloc(sizeof(AssemInfo));
@@ -1434,7 +1435,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
thread->bcd = bcd;
thread->sam = sam;
thread->ef = extendedFeatures;
- thread->seq_in = fileno(seq_in);
+ thread->seq_in = seq_in_no;
thread->kmersize = kmersize;
thread->template = -2;
thread->file_count = fileCount;
@@ -1499,7 +1500,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
thread->bcd = bcd;
thread->sam = sam;
thread->ef = extendedFeatures;
- thread->seq_in = fileno(seq_in);
+ thread->seq_in = seq_in_no;
thread->kmersize = kmersize;
thread->template = 0;
thread->file_count = fileCount;
@@ -1538,11 +1539,11 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
templatefilename = *templatefilenames++;
file_len = strlen(templatefilename);
strcat(templatefilename, ".seq.b");
- seq_in = sfopen(templatefilename, "rb");
- fseek(seq_in, 0, SEEK_END);
- seqin_size = 4 * ftell(seq_in);
- fseek(seq_in, 0, SEEK_SET);
- seq_in_no = fileno(seq_in);
+ seq_in_no = open(templatefilename, O_RDONLY);
+ if(seq_in_no == -1) {
+ ERROR();
+ }
+ seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
ERROR();
}
@@ -1573,12 +1574,16 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
name_file = sfopen(templatefilename, "rb");
templatefilename[file_len] = 0;
strcat(templatefilename, ".seq.b");
- fclose(seq_in);
- seq_in = sfopen(templatefilename, "rb");
- fseek(seq_in, 0, SEEK_END);
- seqin_size = 4 * ftell(seq_in);
- fseek(seq_in, 0, SEEK_SET);
- seq_in_no = fileno(seq_in);
+ close(seq_in_no);
+ seq_in_no = open(templatefilename, O_RDONLY);
+ if(seq_in_no == -1) {
+ ERROR();
+ }
+ seqin_size = 4 * lseek(seq_in_no, 0, SEEK_END);
+ if(lseek(seq_in_no, 0, SEEK_SET) != 0) {
+ ERROR();
+ }
+ thread->seq_in = seq_in_no;
templatefilename[file_len] = 0;
seq_seeker = 0;
if(extendedFeatures == 2) {
@@ -1665,7 +1670,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
printExtendedFeatures(thread->template_name, aligned_assem, fragmentCounts[template], readCounts[template], extendedFeatures_out);
}
if(vcf) {
- updateVcf(thread->template_name, thread->template_index->seq, evalue, t_len, matrix, vcf, vcf_out);
+ updateVcf(thread->template_name, aligned_assem->t, evalue, support, bcd, t_len, matrix, vcf, vcf_out);
}
}
} else {
@@ -1714,7 +1719,7 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
hashMapCCI_destroy(thread->template_index);
/* Close files */
- fclose(seq_in);
+ close(seq_in_no);
fclose(res_out);
if(alignment_out) {
fclose(alignment_out);
=====================================
spltdb.h
=====================================
@@ -35,4 +35,4 @@ struct spltDBbuff {
int print_ankers_spltDB(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out);
int print_ankers_Sparse_spltDB(int *out_Tem, CompDNA *qseq, int rc_flag, const Qseqs *header, const int flag, FILE *out);
unsigned get_ankers_spltDB(int *infoSize, int *out_Tem, CompDNA *qseq, Qseqs *header, int *flag, FILE *inputfile);
-int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
+int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename, int argc, char **argv, int ConClave, int kmersize, int minlen, Penalties *rewards, int extendedFeatures, double ID_t, int mq, double scoreT, double mrc, double evalue, double support, int bcd, int ref_fsa, int print_matrix, int print_all, int vcf, int xml, int sam, int nc, int nf, unsigned shm, int thread_num, int verbose);
=====================================
vcf.c
=====================================
@@ -94,10 +94,19 @@ void initialiseVcf(FileBuff *fileP, char *templateFilename) {
}
-void updateVcf(char *template_name, long unsigned *template_seq, double evalue, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP) {
+void updateVcf(char *template_name, unsigned char *template_seq, double evalue, double support, int bcd, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP) {
- static const char *PASS = "PASS", *FAIL = "FAIL", *LowQual = "LowQual", *UNKNOWN = ".";
- int i, j, pos, bestScore, depthUpdate, bestBaseScore, nucNum;
+ static const char nuc2num[256] = {
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 0, 8, 1, 8, 8, 8, 2, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 0, 8, 1, 8, 8, 8, 2, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
+ const char *PASS = "PASS", *FAIL = "FAIL", *LowQual = "LowQual", *UNKNOWN = ".";
+ int i, pos, nextPos, bestScore, depthUpdate, bestBaseScore, nucNum;
int template_name_length, check, avail, DP, AD, DEL, QUAL;
double AF, RAF, Q, P;
const double lnConst = -10 / log(10);
@@ -115,79 +124,84 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
update = (char *) fileP->next;
avail = fileP->bytes;
assembly = matrix->assmb;
- pos = 0;
- i = 0;
+ nextPos = 0;
do {
- /* does not handle insertions yet */
+ pos = nextPos;
+ nextPos = assembly[pos].next;
+
+ /* handle insertions now */
+ nuc = *template_seq++;
if(pos < t_len) {
- nuc = bases[getNuc(template_seq, pos)];
++i;
- } else {
+ } else if(nuc != '-') {
+ --template_seq;
nuc = '-';
}
+
/* call base */
- nucNum = 0;
- bestNuc = 5;
- bestScore = 0;
+ bestNuc = nuc2num[nuc];
+ bestScore = assembly[pos].counts[bestNuc];
depthUpdate = 0;
- for(j = 0; j < 5; ++j) {
- if(bestScore < assembly[pos].counts[j]) {
- bestScore = assembly[pos].counts[j];
- bestNuc = j;
+ for(i = 0; i < 6; ++i) {
+ if(bestScore < assembly[pos].counts[i]) {
+ bestScore = assembly[pos].counts[i];
+ bestNuc = i;
}
- depthUpdate += assembly[pos].counts[j];
- }
- if(bestScore < assembly[pos].counts[j]) {
- bestScore = assembly[pos].counts[j];
- bestNuc = j;
+ depthUpdate += assembly[pos].counts[i];
}
- depthUpdate += assembly[pos].counts[j];
nucNum = bestNuc;
bestNuc = bases[bestNuc];
/* Check for minor base call */
- if((bestScore << 1) < depthUpdate) {
+ if(!depthUpdate) {
+ nucNum = 5;
+ bestNuc = '-';
+ } else if((bestScore << 1) < depthUpdate) {
if(bestNuc == '-') {
bestBaseScore = 0;
bestNuc = 4;
- for(j = 0; j < 5; ++j) {
- if(bestBaseScore < assembly[pos].counts[j]) {
- bestBaseScore = assembly[pos].counts[j];
- bestNuc = j;
+ for(i = 0; i < 5; ++i) {
+ if(bestBaseScore < assembly[pos].counts[i]) {
+ bestBaseScore = assembly[pos].counts[i];
+ bestNuc = i;
}
}
+ nucNum = bestNuc;
bestNuc = tolower(bases[bestNuc]);
} else {
bestNuc = tolower(bestNuc);
}
bestScore = depthUpdate - assembly[pos].counts[5];
+ } else if(depthUpdate < bcd) {
+ /* too low depth */
+ bestNuc = tolower(bestNuc);
}
if(bestScore) {
/* determine base at current position */
bestNuc = baseCall(bestNuc, nuc, bestScore, depthUpdate, evalue, &assembly[pos]);
+ nucNum = nuc2num[bestNuc];
+
+ /* INFO */
+ DP = depthUpdate;
+ AD = assembly[pos].counts[nucNum];
+ AF = (double) AD / DP;
+ RAF = (double) bestScore / DP;
+ DEL = assembly[pos].counts[5];
+ Q = pow(depthUpdate - (bestScore << 1), 2) / depthUpdate;
+ P = p_chisqr(Q);
/* discard unimportant changes */
- if(nuc != toupper(bestNuc)) {
- /* INFO */
- DP = depthUpdate;
- AD = assembly[pos].counts[nucNum];
- AF = (double) AD / DP;
- RAF = (double) bestScore / DP;
- DEL = assembly[pos].counts[5];
-
- /* FORMAT */
- Q = pow(depthUpdate - (bestScore << 1), 2) / depthUpdate;
- P = p_chisqr(Q);
+ if(nuc != bestNuc || (t_len <= nextPos && *template_seq == '-') || DP < bcd || evalue < P || AD < support * DP) {
/* QUAL */
//QUAL = lnConst * log(P);
QUAL = lnConst * log(binP(DP, AD, 0.25));
QUAL = (QUAL < 0 || 3079 < QUAL) ? 3079 : QUAL;
/* FILTER */
- if(isupper(bestNuc)) {
+ if(bcd <= DP && P <= evalue && support * DP <= AD) {
FILTER = (char *) PASS;
- } else if(P <= evalue || (9 * depthUpdate) <= (10 * bestScore)) {
+ } else if(bcd <= DP || P <= evalue || support * DP <= AD) {
FILTER = (char *) LowQual;
} else {
FILTER = (char *) FAIL;
@@ -204,8 +218,8 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
update += template_name_length; avail -= template_name_length;
*update++ = '\t'; --avail;
- if(nuc != '-') {
- check = sprintf(update, "%d", i);
+ if(pos < t_len) {
+ check = sprintf(update, "%d", pos + 1);
update += check; avail -= check;
} else {
*update++ = '0';
@@ -227,7 +241,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
}
*update++ = '\t';
--avail;
- if(nuc != '-' && bestNuc == '-') {
+ if(bestNuc == '-') {
*update++ = '<';
*update++ = bestNuc;
*update++ = '>';
@@ -244,7 +258,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
check = sprintf(update, "Q:P:FT\t%.2f:%4.1e:%s\n", Q, P, FILTER);
update += check; avail -= check;
}
- } else if(nuc != '-') {
+ } else if(pos < t_len) {
FILTER = (char *) FAIL;
if(avail < template_name_length + 105) {
fileP->bytes = avail;
@@ -252,7 +266,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
avail = fileP->bytes;
update = (char *) fileP->next;
}
- check = sprintf(update, "%s\t%d\t.\t%c\t%c\t%d\t%s\t", template_name, i, nuc, '.', 0, *FILTER_ptr);
+ check = sprintf(update, "%s\t%d\t.\t%c\t%c\t%d\t%s\t", template_name, pos + 1, nuc, '.', 0, *FILTER_ptr);
update += check; avail -= check;
check = sprintf(update, "DP=%d;AD=%d;AF=%.2f;RAF=%.2f;DEL=%d;AD6=%d,%d,%d,%d,%d,%d\t", 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0, 0);
update += check; avail -= check;
@@ -260,7 +274,7 @@ void updateVcf(char *template_name, long unsigned *template_seq, double evalue,
update += check; avail -= check;
}
- } while((pos = assembly[pos].next) != 0);
+ } while(nextPos != 0);
fileP->next = (unsigned char *) update;
fileP->bytes = avail;
=====================================
vcf.h
=====================================
@@ -22,4 +22,4 @@
char * noFolder(const char *src);
void initialiseVcf(FileBuff *fileP, char *templateFilename);
-void updateVcf(char *template_name, long unsigned *template_seq, double evalue, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP);
+void updateVcf(char *template_name, unsigned char *template_seq, double evalue, double support, int bcd, int t_len, AssemInfo *matrix, int filter, FileBuff *fileP);
=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
* limitations under the License.
*/
-#define KMA_VERSION "1.3.11"
+#define KMA_VERSION "1.3.13"
View it on GitLab: https://salsa.debian.org/med-team/kma/-/commit/45823d7f759ee458373b2656515b9020148d2885
--
View it on GitLab: https://salsa.debian.org/med-team/kma/-/commit/45823d7f759ee458373b2656515b9020148d2885
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210320/caeef102/attachment-0001.htm>
More information about the debian-med-commit
mailing list