[med-svn] [Git][med-team/kma][master] 6 commits: New upstream version 1.2.12
Steffen Möller
gitlab at salsa.debian.org
Mon Sep 23 09:21:02 BST 2019
Steffen Möller pushed to branch master at Debian Med / kma
Commits:
d16e92cc by Steffen Moeller at 2019-09-23T07:58:31Z
New upstream version 1.2.12
- - - - -
f4ebf671 by Steffen Moeller at 2019-09-23T07:58:31Z
New upstream version
- - - - -
4aa98414 by Steffen Moeller at 2019-09-23T07:58:32Z
Update upstream source from tag 'upstream/1.2.12'
Update to upstream version '1.2.12'
with Debian dir f59f664288f0a32189f90019cc41d9a12f3c4332
- - - - -
8131deef by Steffen Moeller at 2019-09-23T08:14:47Z
Updated patch and created upstream PR
https://bitbucket.org/genomicepidemiology/kma/pull-requests/5
- - - - -
4b4b45d1 by Steffen Moeller at 2019-09-23T08:17:11Z
Added ref to conda
- - - - -
3b8fcded by Steffen Moeller at 2019-09-23T08:18:43Z
Uploaded to unstable
- - - - -
25 changed files:
- Makefile
- align.c
- assembly.c
- compress.c
- debian/changelog
- debian/patches/hardening.patch
- debian/upstream/metadata
- hashmapindex.c
- hashmapindex.h
- kma.c
- kmapipe.c
- kmeranker.c
- kmeranker.h
- + kmerlink.c
- + kmerlink.h
- kmers.c
- kmers.h
- nw.c
- qseqs.c
- runkma.c
- sam.c
- savekmers.c
- savekmers.h
- seqmenttree.c
- version.h
Changes:
=====================================
Makefile
=====================================
@@ -1,5 +1,5 @@
CFLAGS = -Wall -O3 -std=c99
-LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmeranker.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqmenttree.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
+LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmeranker.o kmerlink.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqmenttree.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
PROGS = kma kma_index kma_shm kma_update
.c .o:
@@ -46,6 +46,7 @@ index.o: index.h compress.h decon.h hashmap.h hashmapkma.h loadupdate.h makeinde
kma.o: kma.h ankers.h assembly.h chain.h hashmapkma.h kmers.h mt1.h penalties.h pherror.h qseqs.h runinput.h runkma.h savekmers.h sparse.h spltdb.h tmp.h version.h
kmapipe.o: kmapipe.h pherror.h
kmeranker.o: kmeranker.h penalties.h
+kmerlink.o: kmerlink.h kmeranker.h
kmers.o: kmers.h ankers.h compdna.h hashmapkma.h kmapipe.h pherror.h qseqs.h savekmers.h spltdb.h
kmmap.o: kmmap.h hashmapkma.h
loadupdate.o: loadupdate.h pherror.h hashmap.h hashmapkma.h updateindex.h
=====================================
align.c
=====================================
@@ -210,7 +210,6 @@ AlnScore KMA(const HashMap_index *template_index, const unsigned char *qseq, int
start = chainSeedsPtr(points, q_len, t_len, kmersize, &aligned->mapQ);
score = points->score[start];
- //fprintf(stderr, "%d\t%d\t%d\t%d\n", t_len, mem_count, aligned->mapQ, score);
if(aligned->mapQ < mq || score < kmersize) {
Stat.score = 0;
Stat.len = 1;
@@ -250,6 +249,8 @@ AlnScore KMA(const HashMap_index *template_index, const unsigned char *qseq, int
NWstat = NW(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, matrices);
} else {
NWstat = NW_band(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, band, matrices);
+ /* here */
+ //NWstat = NW(template_index->seq, qseq, -1 - (t_s == 0), t_s, t_e, q_s, q_e, Frag_align, matrices);
}
/* trim leading gaps */
@@ -343,7 +344,7 @@ AlnScore KMA(const HashMap_index *template_index, const unsigned char *qseq, int
NWstat = NW(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, matrices);
} else {
NWstat = NW_band(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, band, matrices);
- //NWstat = NW(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align);
+ //NWstat = NW(template_index->seq, qseq, 0, t_s, t_e, q_s, q_e, Frag_align, matrices);
}
memcpy(aligned->t + Stat.len, Frag_align->t, NWstat.len);
@@ -377,7 +378,7 @@ AlnScore KMA(const HashMap_index *template_index, const unsigned char *qseq, int
NWstat = NW(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, matrices);
} else {
NWstat = NW_band(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, band, matrices);
- //NWstat = NW(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align);
+ //NWstat = NW(template_index->seq, qseq, 1 + (t_e == t_len), t_s, t_e, q_s, q_e, Frag_align, matrices);
}
/* trim trailing gaps */
if(t_e == t_len) {
@@ -390,12 +391,12 @@ AlnScore KMA(const HashMap_index *template_index, const unsigned char *qseq, int
--bias;
}
++bias;
- /*
+
+
if(bias != NWstat.len) {
- NWstat.score -= (W1 + (NWstat.len - bias) * U);
+ //NWstat.score -= (W1 + (NWstat.len - bias) * U);
NWstat.len = bias;
}
- */
}
memcpy(aligned->t + Stat.len, Frag_align->t, NWstat.len);
=====================================
assembly.c
=====================================
@@ -274,7 +274,6 @@ void * assemble_KMA_threaded(void *arg) {
static volatile int excludeIn[1] = {0}, excludeOut[1] = {0}, excludeMatrix[1] = {0}, mainTemplate = -2, thread_wait = 0;
static char *template_name;
- static HashMap_index *template_index;
Assemble_thread *thread = arg;
int i, j, t_len, aln_len, start, end, bias, myBias, gaps, pos, spin, sam;
int read_score, depthUpdate, bestBaseScore, bestScore, template, asm_len;
@@ -295,6 +294,7 @@ void * assemble_KMA_threaded(void *arg) {
AssemInfo *matrix;
AlnPoints *points;
NWmat *NWmatrices;
+ HashMap_index *template_index;
/* get input */
template = thread->template;
@@ -317,6 +317,7 @@ void * assemble_KMA_threaded(void *arg) {
sam = thread->sam;
spin = thread->spin;
thread_num = thread->thread_num;
+ template_index = thread->template_index;
if(template != -2) {
/* all assemblies done,
@@ -331,7 +332,6 @@ void * assemble_KMA_threaded(void *arg) {
/* Allocate assembly arrays */
lock(excludeMatrix);
template_name = thread->template_name;
- template_index = thread->template_index;
t_len = template_index->len;
matrix->len = t_len;
if(matrix->size < (t_len << 1)) {
@@ -552,6 +552,7 @@ void * assemble_KMA_threaded(void *arg) {
header->seq[header->len - 1] = 0;
samwrite(qseq, header, 0, template_name, aligned, stats);
}
+
} else if(sam) {
stats[1] = read_score;
stats[2] = start;
@@ -617,7 +618,6 @@ void * assemble_KMA_threaded(void *arg) {
return NULL;
}
-
/* diff */
/* pre on dense */
/* Pepare and make alignment on consensus */
@@ -697,6 +697,7 @@ void * assemble_KMA_threaded(void *arg) {
pos = assembly[pos].next;
}
+
/* Trim alignment on consensus */
coverScore = 0;
bias = 0;
=====================================
compress.c
=====================================
@@ -20,6 +20,7 @@
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
+#include <sys/param.h>
#include "compress.h"
#include "hashmap.h"
#include "hashmapkma.h"
@@ -44,6 +45,7 @@ static void mmapinit(HashMapKMA *finalDB, long unsigned size, FILE *out) {
if(finalDB->exist == MAP_FAILED) {
ERROR();
}
+ posix_madvise(finalDB->exist, size, POSIX_MADV_SEQUENTIAL);
*finalDB->exist++ = finalDB->DB_size;
*finalDB->exist++ = finalDB->kmersize;
*finalDB->exist++ = finalDB->prefix_len;
@@ -72,8 +74,8 @@ HashMapKMA * compressKMA_DB(HashMap *templates, FILE *out) {
void *data;
long unsigned i, j, check, size, v_size;
long unsigned index, t_index, v_index, new_index, null_index;
- unsigned swap, *values;
- short unsigned *values_s;
+ unsigned swap, *values, *finalV;
+ short unsigned *values_s, *finalV_s;
HashMapKMA *finalDB;
ValuesHash *shmValues;
ValuesTable *node, *next, *table;
@@ -227,6 +229,7 @@ HashMapKMA * compressKMA_DB(HashMap *templates, FILE *out) {
if(finalDB->exist == MAP_FAILED) {
ERROR();
}
+ posix_madvise(finalDB->exist, size, POSIX_MADV_SEQUENTIAL);
finalDB->exist_l = (long unsigned *) (finalDB->exist + 3);
finalDB->exist_l += 5;
@@ -470,6 +473,17 @@ HashMapKMA * compressKMA_DB(HashMap *templates, FILE *out) {
/* move values */
if(finalDB->DB_size < USHRT_MAX) {
for(node = table; node != 0; node = next) {
+ next = node->next;
+ values_s = (short unsigned *)(node->values);
+ finalV_s = finalDB->values_s + node->v_index - 1;
+ i = 2 + *values_s--;
+ while(--i) {
+ *++finalV_s = *++values_s;
+ }
+ free(node->values);
+ free(node);
+
+ /*
next = node->next;
values_s = (short unsigned *)(node->values);
for(i = node->v_index, j = 0; j <= *values_s; ++i, ++j) {
@@ -477,9 +491,21 @@ HashMapKMA * compressKMA_DB(HashMap *templates, FILE *out) {
}
free(values_s);
free(node);
+ */
}
} else {
for(node = table; node != 0; node = next) {
+ next = node->next;
+ values = node->values;
+ finalV = finalDB->values + node->v_index - 1;
+ i = 2 + *values--;
+ while(--i) {
+ *++finalV = *++values;
+ }
+ free(node->values);
+ free(node);
+
+ /*
next = node->next;
values = node->values;
for(i = node->v_index, j = 0; j <= *values; ++i, ++j) {
@@ -487,6 +513,7 @@ HashMapKMA * compressKMA_DB(HashMap *templates, FILE *out) {
}
free(values);
free(node);
+ */
}
}
@@ -537,8 +564,8 @@ HashMapKMA * compressKMA_DB(HashMap *templates, FILE *out) {
HashMapKMA * compressKMA_megaDB(HashMap *templates, FILE *out) {
long unsigned i, j, v_index, new_index, null_index, size, v_size;
- unsigned check, swap, *values;
- short unsigned *values_s;
+ unsigned check, swap, *values, *finalV;
+ short unsigned *values_s, *finalV_s;
HashMapKMA *finalDB;
ValuesHash *shmValues;
ValuesTable *node, *next, *table;
@@ -653,6 +680,7 @@ HashMapKMA * compressKMA_megaDB(HashMap *templates, FILE *out) {
if(finalDB->exist == MAP_FAILED) {
ERROR();
}
+ posix_madvise(finalDB->exist, size, POSIX_MADV_SEQUENTIAL);
finalDB->exist_l = (long unsigned *) (finalDB->exist + 3);
finalDB->exist_l += 5;
} else {
@@ -810,6 +838,7 @@ HashMapKMA * compressKMA_megaDB(HashMap *templates, FILE *out) {
if(finalDB->values == MAP_FAILED) {
ERROR();
}
+ posix_madvise(finalDB->values, size, POSIX_MADV_SEQUENTIAL);
finalDB->values = ((void *) finalDB->values) + v_size;
if(finalDB->DB_size < USHRT_MAX) {
finalDB->values_s = (short unsigned *) finalDB->values;
@@ -819,6 +848,17 @@ HashMapKMA * compressKMA_megaDB(HashMap *templates, FILE *out) {
/* move values */
if(finalDB->DB_size < USHRT_MAX) {
for(node = table; node != 0; node = next) {
+ next = node->next;
+ values_s = (short unsigned *)(node->values);
+ finalV_s = finalDB->values_s + node->v_index - 1;
+ i = 2 + *values_s--;
+ while(--i) {
+ *++finalV_s = *++values_s;
+ }
+ free(node->values);
+ free(node);
+
+ /*
next = node->next;
values_s = (short unsigned *)(node->values);
for(i = node->v_index, j = 0; j <= *values_s; ++i, ++j) {
@@ -826,9 +866,21 @@ HashMapKMA * compressKMA_megaDB(HashMap *templates, FILE *out) {
}
free(values_s);
free(node);
+ */
}
} else {
for(node = table; node != 0; node = next) {
+ next = node->next;
+ values = node->values;
+ finalV = finalDB->values + node->v_index - 1;
+ i = 2 + *values--;
+ while(--i) {
+ *++finalV = *++values;
+ }
+ free(node->values);
+ free(node);
+
+ /*
next = node->next;
values = node->values;
for(i = node->v_index, j = 0; j <= *values; ++i, ++j) {
@@ -836,6 +888,7 @@ HashMapKMA * compressKMA_megaDB(HashMap *templates, FILE *out) {
}
free(values);
free(node);
+ */
}
}
/* dump final DB */
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+kma (1.2.12-1) unstable; urgency=medium
+
+ * Team upload.
+ * New upstream version
+
+ -- Steffen Moeller <moeller at debian.org> Mon, 23 Sep 2019 09:58:31 +0200
+
kma (1.2.11-1) unstable; urgency=medium
* Team upload.
=====================================
debian/patches/hardening.patch
=====================================
@@ -1,6 +1,7 @@
Author: Andreas Tille <tille at debian.org>
Last-Update: Thu, 07 Jun 2018 21:55:00 +0200
Description: Propagate hardening options
+ Brought upstream as https://bitbucket.org/genomicepidemiology/kma/pull-requests/5
Index: kma/Makefile
===================================================================
@@ -9,7 +10,7 @@ Index: kma/Makefile
@@ -1,4 +1,4 @@
-CFLAGS = -Wall -O3 -std=c99
+CFLAGS += -Wall -O3 -std=c99
- LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmeranker.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqmenttree.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
+ LIBS = align.o alnfrags.o ankers.o assembly.o chain.o compdna.o compkmers.o compress.o decon.o ef.o filebuff.o frags.o hashmap.o hashmapindex.o hashmapkma.o hashmapkmers.o hashtable.o index.o kma.o kmapipe.o kmeranker.o kmerlink.o kmers.o kmmap.o loadupdate.o makeindex.o mt1.o nw.o pherror.o printconsensus.o qseqs.o qualcheck.o runinput.o runkma.o sam.o savekmers.o seq2fasta.o seqmenttree.o seqparse.o shm.o sparse.o spltdb.o stdnuc.o stdstat.o tmp.o update.o updateindex.o updatescores.o valueshash.o vcf.o
PROGS = kma kma_index kma_shm kma_update
@@ -8,16 +8,16 @@ PROGS = kma kma_index kma_shm kma_update
=====================================
debian/upstream/metadata
=====================================
@@ -1,6 +1,8 @@
Reference:
Author: Philip T. L. C. Clausen and Frank M. Aarestrup and Ole Lund
- Title: Rapid and precise alignment of raw reads against redundant databases with KMA
+ Title: >
+ Rapid and precise alignment of raw reads against redundant databases
+ with KMA
Journal: BMC Bioinformatics
Year: 2018
Volume: 19
@@ -15,3 +17,5 @@ Registry:
Entry: OMICS_31606
- Name: bio.tools
Entry: NA
+ - Name: conda:bioconda
+ Entry: kma
=====================================
hashmapindex.c
=====================================
@@ -37,17 +37,26 @@ typedef int key_t;
void (*destroyPtr)(HashMap_index *) = &alignClean;
HashMap_index * (*alignLoadPtr)(HashMap_index *, int, int, int, int, long unsigned, long unsigned) = &alignLoad_fly;
-void hashMap_index_initialize(HashMap_index *dest, int len, int kmerindex) {
+int hashMap_index_initialize(HashMap_index *dest, int len, int kmerindex) {
dest->len = len;
- dest->size = len << 1;
dest->kmerindex = kmerindex;
-
- dest->index = calloc(dest->size, sizeof(int));
- dest->seq = malloc(((len >> 5) + 1) * sizeof(long unsigned));
- if(!dest->index || !dest->seq) {
- ERROR();
+ if(dest->size < (len << 1)) {
+ if(dest->size) {
+ free(dest->index);
+ free(dest->seq);
+ }
+ dest->size = len << 1;
+ dest->index = calloc(dest->size, sizeof(int));
+ dest->seq = malloc(((len >> 5) + 1) * sizeof(long unsigned));
+ if(!dest->index || !dest->seq) {
+ ERROR();
+ }
+
+ return 0;
}
+
+ return 1;
}
void hashMap_index_set(HashMap_index *dest) {
@@ -248,8 +257,13 @@ HashMap_index * hashMap_index_load(HashMap_index *src, int seq, int index, int l
if(src == 0) {
src = smalloc(sizeof(HashMap_index));
+ src->size = 0;
+ }
+ if(hashMap_index_initialize(src, len, kmersize)) {
+ src->size = len << 1;
+ memset(src->index, 0, src->size * sizeof(int));
}
- hashMap_index_initialize(src, len, kmersize);
+
read(seq, src->seq, ((src->len >> 5) + 1) * sizeof(long unsigned));
read(index, src->index, src->size * sizeof(int));
@@ -267,8 +281,13 @@ HashMap_index * hashMap_index_build(HashMap_index *src, int seq, int len, int km
if(src == 0) {
src = smalloc(sizeof(HashMap_index));
+ src->size = 0;
+ }
+
+ if(hashMap_index_initialize(src, len, kmersize)) {
+ memset(src->index, 0, src->size * sizeof(int));
}
- hashMap_index_initialize(src, len, kmersize);
+
shifter = sizeof(long unsigned) * sizeof(long unsigned) - (src->kmerindex << 1);
read(seq, src->seq, ((src->len >> 5) + 1) * sizeof(long unsigned));
@@ -375,10 +394,12 @@ HashMap_index * alignLoad_shm_initial(char *templatefilename, int file_len, int
}
void alignClean(HashMap_index *template_index) {
-
+ /* Overwrite later */
+ /*
if(template_index) {
hashMap_index_destroy(template_index);
}
+ */
}
void alignClean_shm(HashMap_index *template_index) {
=====================================
hashmapindex.h
=====================================
@@ -35,7 +35,7 @@ struct hashMap_index {
extern void (*destroyPtr)(HashMap_index *);
extern HashMap_index * (*alignLoadPtr)(HashMap_index *, int, int, int, int, long unsigned, long unsigned);
-void hashMap_index_initialize(HashMap_index *dest, int len, int kmerindex);
+int hashMap_index_initialize(HashMap_index *dest, int len, int kmerindex);
void hashMap_index_set(HashMap_index *dest);
void hashMap_index_destroy(HashMap_index *dest);
int hashMap_index_get(const HashMap_index *dest, long unsigned key, unsigned shifter);
=====================================
kma.c
=====================================
@@ -173,7 +173,7 @@ int kma_main(int argc, char *argv[]) {
static unsigned nc, nf, shm, exhaustive;
static char *outputfilename, *templatefilename, **templatefilenames;
static char **inputfiles, **inputfiles_PE, **inputfiles_INT, ss;
- static double ID_t, scoreT, evalue;
+ static double ID_t, scoreT, coverT, evalue;
static Penalties *rewards;
int i, j, args, exe_len, status, size, escape, tmp, step1, step2;
unsigned totFrags;
@@ -221,6 +221,7 @@ int kma_main(int argc, char *argv[]) {
mq = 0;
bcd = 1;
scoreT = 0.5;
+ coverT = 0.5;
ID_t = 1.0;
one2one = 0;
ss = 'q';
@@ -264,7 +265,6 @@ int kma_main(int argc, char *argv[]) {
deConPrintPtr = printPtr;
*/
-
/* PARSE COMMAND LINE OPTIONS */
args = 1;
while(args < argc) {
@@ -1137,7 +1137,7 @@ int kma_main(int argc, char *argv[]) {
} else if(step2) {
myTemplatefilename = smalloc(strlen(templatefilename) + 64);
strcpy(myTemplatefilename, templatefilename);
- status = save_kmers_batch(myTemplatefilename, "-s1", shm, thread_num, exhaustive, rewards, ioStream, sam);
+ status = save_kmers_batch(myTemplatefilename, "-s1", shm, thread_num, exhaustive, rewards, ioStream, sam, scoreT, coverT);
free(myTemplatefilename);
} else if(sparse_run) {
myTemplatefilename = smalloc(strlen(templatefilename) + 64);
=====================================
kmapipe.c
=====================================
@@ -226,13 +226,14 @@ FILE * kmaPipeFork(const char *cmd, const char *type, FILE *ioStream, int *statu
for (last = 0, src = pidlist; src->fp != ioStream; last = src, src = src->next) {
if(!src) {
*status = 1;
+ unlock(lock);
return 0;
}
}
unlock(lock);
/* close stream and get exit status */
- *status = 1;
+ *status = 0;
#ifndef _WIN32
while ((pid = waitpid(src->pid, status, 0)) == -1 && errno == EINTR) {
usleep(100);
@@ -240,7 +241,6 @@ FILE * kmaPipeFork(const char *cmd, const char *type, FILE *ioStream, int *statu
#else
WaitForSingleObject(src->pid, INFINITE);
#endif
- *status = 0;
fclose(ioStream);
/* Remove the entry from the linked list. */
=====================================
kmeranker.c
=====================================
@@ -18,14 +18,15 @@
*/
#define _XOPEN_SOURCE 600
#include <stdlib.h>
+#include <math.h>
#include "kmeranker.h"
#include "penalties.h"
-KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int *bests, int *Score, int *extendScore, char *include) {
+KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int kmersize, int *bests, int *Score, int *extendScore, char *include) {
/* get set of best templates and silences the chain, except for the initial anker */
- int template, score, bestScore, Wl, W1, U, M, MM, gaps;
- unsigned i, j, start, end, SU;
+ int i, j, template, score, bestScore, Wl, W1, U, M, MM, gaps;
+ int start, end, pos, SU;
unsigned *values;
short unsigned *values_s;
KmerAnker *node, *prev;
@@ -35,8 +36,9 @@ KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int
return 0;
} else if((SU = *include)) {
values_s = (short unsigned *) src->values;
- i = *values_s + 1;
- *bests += i;
+ i = *values_s;
+ *bests = i;
+ ++i;
values_s += i;
bests += i;
while(--i) {
@@ -44,23 +46,27 @@ KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int
}
} else {
values = src->values;
- i = *values + 1;
- *bests += i;
+ i = *values;
+ *bests = i;
+ ++i;
values += i;
bests += i;
while(--i) {
include[(*--bests = *--values)] ^= 1;
}
}
+ --bests;
M = rewards->M;
MM = rewards->MM;
U = rewards->U;
W1 = rewards->W1;
Wl = rewards->Wl;
bestScore = src->score;
+ values_s = 0;
+ values = 0;
/* get chaining scores */
- for(node = src; node != 0; node = node->next) {
+ for(node = src; node; node = node->next) {
if(SU) {
values_s = (short unsigned *) node->values;
i = *values_s + 1;
@@ -73,41 +79,44 @@ KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int
start = node->start;
end = node->end;
-
while(--i) {
template = SU ? *--values_s : *--values;
- if(!include[template]) {
+ if(include[template]) {
score = Score[template];
- gaps = start - extendScore[template];
- if(gaps == 0) {
- /* continued match */
- score += M;
- } else if(gaps < 0) {
- /* deletion */
- score += (W1 + abs(gaps + 1) * U);
- } else if(gaps == 1) {
- /* one unmatched between */
- score += MM;
- } else {
- /* unmatched bases between */
- if((gaps = (gaps - 2) * M) < 0) {
- score += gaps;
- }
- score += (MM << 1);
- }
+ pos = extendScore[template];
+ gaps = pos - end;
- /* check local chain is needed */
- if(score < Wl) {
- score = Wl + node->weight;
- /* mark as terminating */
- extendScore[template] = 0;
+ /* extend chain */
+ if(pos == 0) {
+ if(node->cStart) {
+ score = W1 + (node->cStart - 1) * U;
+ score = node->weight + (Wl < score ? score : Wl);
+ } else {
+ score = node->weight;
+ }
} else {
- score += node->weight;
- /* mark descendant */
- extendScore[template] = end;
+ if(gaps < 0) {
+ if(gaps == -kmersize) {
+ score += node->weight - (kmersize - 1) * M;
+ } else {
+ score += (W1 + (-gaps - 1) * U) + node->weight + gaps * M;
+ }
+ } else if(gaps == 0) {
+ score += node->weight + W1;
+ } else if(gaps <= 2) {
+ score += node->weight + gaps * MM;
+ } else if((MM * 2 + (gaps - 2) * M) < 0) {
+ score += node->weight + (MM * 2 + (gaps - 2) * M);
+ } else {
+ score += node->weight - log(gaps * M);
+ }
+
+ /* verify extension */
+ if(score < 0 || score == bestScore) {
+ include[template] = 0;
+ }
}
-
- /* update Scores */
+ extendScore[template] = start;
Score[template] = score;
}
}
@@ -124,6 +133,7 @@ KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int
if(Score[(template = bests[i])] == bestScore) {
bests[++j] = template;
}
+
/* clear Score arrays */
Score[template] = 0;
extendScore[template] = 0;
@@ -134,18 +144,22 @@ KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int
return prev;
}
-KmerAnker * pruneAnkers(KmerAnker *V_score, int kmersize) {
+KmerAnker * pruneAnkers(KmerAnker *V_score, int kmersize, double mrs) {
KmerAnker *node, *prev;
- while(V_score->score < kmersize) {
- ++V_score;
+ if(!V_score || !V_score->score) {
+ return 0;
}
+ while((V_score->score < kmersize || V_score->score < mrs * (V_score->end - V_score->cStart)) && (V_score = V_score->descend));
+ if(!V_score) {
+ return 0;
+ }
prev = V_score;
node = V_score->descend;
while(node) {
- if(kmersize <= node->score) {
+ if(kmersize <= node->score && mrs * (node->end - node->cStart) <= node->score) {
prev->descend = node;
prev = node;
}
@@ -156,14 +170,6 @@ KmerAnker * pruneAnkers(KmerAnker *V_score, int kmersize) {
return V_score;
}
-unsigned getStartAnker(KmerAnker *src) {
-
- while(src->next) {
- src = src->next;
- }
- return src->start;
-}
-
KmerAnker * getBestAnker(KmerAnker **src, unsigned *ties) {
KmerAnker *node, *prev, *best;
@@ -174,18 +180,19 @@ KmerAnker * getBestAnker(KmerAnker **src, unsigned *ties) {
prev = prev->descend;
}
*src = prev;
- best = prev;
+ if(!(best = prev)) {
+ return 0;
+ }
node = prev->descend;
-
while(node) {
if(best->score < node->score) {
best = node;
*ties = 0;
} else if(best->score == node->score) {
- if((node->end - node->start) < (best->end - best->start)) {
+ if((node->end - node->start) < (best->end - best->cStart)) {
best = node;
*ties = 0;
- } else if((node->end - node->start) == (best->end - best->start)) {
+ } else if((node->end - node->start) == (best->end - best->cStart)) {
best = node;
++*ties;
}
@@ -200,11 +207,15 @@ KmerAnker * getBestAnker(KmerAnker **src, unsigned *ties) {
return best;
}
-KmerAnker * getTieAnker(KmerAnker *stop, KmerAnker *src, int score, int len) {
+KmerAnker * getTieAnker(int stop, KmerAnker *src, int score, int len) {
+
+ if(!src || src->start == stop) {
+ return 0;
+ }
/* search downwards */
- while(--src != stop) {
- if(src->score == score && (src->end - src->start) == len) {
+ while((--src)->start != stop) {
+ if(src->score == score && (src->end - src->cStart) == len) {
return src;
}
}
=====================================
kmeranker.h
=====================================
@@ -27,15 +27,15 @@ struct kmerAnker {
int weight;
unsigned start;
unsigned end;
- unsigned flag;
+ unsigned cStart;
+ unsigned ties;
unsigned *values;
struct kmerAnker *descend; /* descending anker */
- struct kmerAnker *next; /* next anker in chain */
+ struct kmerAnker *next; /* next ankers in chain */
};
#endif
-KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int *bests, int *Score, int *extendScore, char *include);
-KmerAnker * pruneAnkers(KmerAnker *V_score, int kmersize);
-unsigned getStartAnker(KmerAnker *src);
+KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, int kmersize, int *bests, int *Score, int *extendScore, char *include);
+KmerAnker * pruneAnkers(KmerAnker *V_score, int kmersize, double mrs);
KmerAnker * getBestAnker(KmerAnker **src, unsigned *ties);
-KmerAnker * getTieAnker(KmerAnker *stop, KmerAnker *src, int score, int len);
+KmerAnker * getTieAnker(int stop, KmerAnker *src, int score, int len);
=====================================
kmerlink.c
=====================================
@@ -0,0 +1,73 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#define _XOPEN_SOURCE 600
+#include <stdlib.h>
+#include "kmerlink.h"
+#include "pherror.h"
+
+KmerLink * initKmerLink(unsigned size) {
+
+ KmerLink *dest;
+
+ dest = smalloc(sizeof(KmerLink));
+ dest->size = size;
+ dest->n = 0;
+ dest->list = smalloc(size * sizeof(KmerAnker));
+
+ return dest;
+}
+
+void reallocKmerLink(KmerLink *src, unsigned newSize) {
+
+ src->size = newSize;
+ src->list = realloc(src->list, newSize * sizeof(KmerAnker));
+ if(!src->list) {
+ ERROR();
+ }
+}
+
+KmerAnker * pushKmerLink(KmerLink *src, KmerAnker *node) {
+
+ KmerAnker *dest;
+
+ if(src->size == ++src->n) {
+ reallocKmerLink(src, src->size << 1);
+ }
+ dest = src->list + (src->n - 1);
+ *dest = *node;
+
+ return dest;
+}
+
+KmerAnker * popKmerLink(KmerLink *src, int n) {
+
+ if(n < src->n) {
+ src->n -= n;
+ } else {
+ src->n = 0;
+ }
+
+ return src->list + src->n;
+}
+
+void destroyKmerLink(KmerLink *src) {
+
+ free(src->list);
+ free(src);
+}
=====================================
kmerlink.h
=====================================
@@ -0,0 +1,36 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#define _XOPEN_SOURCE 600
+#include "kmeranker.h"
+
+#ifndef KMERLINK
+#define KMERLINK 1;
+typedef struct kmerLink KmerLink;
+struct kmerLink {
+ unsigned size;
+ unsigned n;
+ KmerAnker *list;
+};
+#endif
+
+KmerLink * initKmerLink(unsigned size);
+void reallocKmerLink(KmerLink *src, unsigned newSize);
+KmerAnker * pushKmerLink(KmerLink *src, KmerAnker *node);
+KmerAnker * popKmerLink(KmerLink *src, int n);
+void destroyKmerLink(KmerLink *src);
=====================================
kmers.c
=====================================
@@ -47,7 +47,7 @@ typedef int key_t;
#define shmctl(shmid, cmd, buf) fprintf(stderr, "sysV not available on Windows.\n")
#endif
-int save_kmers_batch(char *templatefilename, char *exePrev, unsigned shm, int thread_num, const int exhaustive, Penalties *rewards, FILE *out, int sam) {
+int save_kmers_batch(char *templatefilename, char *exePrev, unsigned shm, int thread_num, const int exhaustive, Penalties *rewards, FILE *out, int sam, double mrs, double coverT) {
int i, file_len, shmid, deCon, *bestTemplates, *template_lengths;
FILE *inputfile, *templatefile;
@@ -116,6 +116,7 @@ int save_kmers_batch(char *templatefilename, char *exePrev, unsigned shm, int th
if(printPtr == &print_ankers_spltDB || printPtr == &print_ankers_Sparse_spltDB) {
printPtr(0, 0, thread_num, 0, 0, 0);
}
+ template_lengths = 0;
if(kmerScan == &save_kmers_HMM) {
/* load lengths */
strcat(templatefilename, ".length.b");
@@ -138,8 +139,8 @@ int save_kmers_batch(char *templatefilename, char *exePrev, unsigned shm, int th
templatefilename[file_len] = 0;
fclose(templatefile);
save_kmers_HMM(templates, 0, &(int){thread_num}, template_lengths, 0, 0, 0, 0, 0, 0, 0, 0, 0);
- } else {
- template_lengths = 0;
+ } else if(kmerScan == &save_kmers_chain || kmerScan == &save_kmers_sparse_chain) {
+ kmerScan(0, 0, &(int){thread_num}, (int *)(&mrs), (int *)(&coverT), 0, 0, 0, 0, 0, 0, 0, 0);
}
t1 = clock();
@@ -234,6 +235,8 @@ int save_kmers_batch(char *templatefilename, char *exePrev, unsigned shm, int th
}
if(kmerScan == &save_kmers_HMM && (shm & 4) == 0) {
free(template_lengths);
+ } else if(kmerScan == &save_kmers_chain || kmerScan == &save_kmers_sparse_chain) {
+ kmerScan(0, 0, &(int){thread_num}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}
for(thread = threads; thread; thread = threads) {
threads = thread->next;
=====================================
kmers.h
=====================================
@@ -19,4 +19,4 @@
#define _XOPEN_SOURCE 600
#include "penalties.h"
-int save_kmers_batch(char *templatefilename, char *exePrev, unsigned shm, int thread_num, const int exhaustive, Penalties *rewards, FILE *out, int sam);
+int save_kmers_batch(char *templatefilename, char *exePrev, unsigned shm, int thread_num, const int exhaustive, Penalties *rewards, FILE *out, int sam, double mrs, double coverT);
=====================================
nw.c
=====================================
@@ -414,6 +414,7 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
/* Perform banded NW */
pos[0] = 0;
+ pos[1] = 0;
en = 0;
c_pos = (t_len + q_len) >> 1;
for(m = t_len - 1, nuc_pos = t_e - 1; m >= 0; --m, --nuc_pos, --c_pos) {
@@ -532,6 +533,7 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
if(k < 0 && Stat.score <= D_ptr[n]) {
Stat.score = D_ptr[n];
pos[0] = m;
+ pos[1] = n;
}
tmp = D_ptr;
@@ -546,12 +548,15 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
/* get start position of alignment */
if(k < 0) {
- pos[1] = en;
- q_pos = 0;
+ q_pos = pos[0];
+ if(pos[0] == 0) {
+ pos[1] = en;
+ }
if(k == -2) {
for(n = en; n < bq_len; ++n) {
if(D_prev[n] > Stat.score) {
Stat.score = D_prev[n];
+ q_pos = 0;
pos[0] = 0;
pos[1] = (aligned->start = n);
}
@@ -563,7 +568,7 @@ AlnScore NW_band(const long unsigned *template, const unsigned char *queryOrg, i
pos[0] = 0;
pos[1] = en;
}
-
+ aligned->start = q_pos;
/* back tracking */
m = pos[0];
@@ -960,6 +965,7 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
/* Perform banded NW */
pos[0] = 0;
+ pos[1] = 0;
en = 0;
c_pos = (t_len + q_len) >> 1;
for(m = t_len - 1, nuc_pos = t_e - 1; m >= 0; --m, --nuc_pos, --c_pos) {
@@ -1078,6 +1084,7 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
if(k < 0 && Stat.score <= D_ptr[n]) {
Stat.score = D_ptr[n];
pos[0] = m;
+ pos[1] = n;
}
tmp = D_ptr;
@@ -1092,12 +1099,15 @@ AlnScore NW_band_score(const long unsigned *template, const unsigned char *query
/* get start position of alignment */
if(k < 0) {
- pos[1] = en;
- q_pos = 0;
+ q_pos = pos[0];
+ if(pos[0] == 0) {
+ pos[1] = en;
+ }
if(k == -2) {
for(n = en; n < bq_len; ++n) {
if(D_prev[n] > Stat.score) {
Stat.score = D_prev[n];
+ q_pos = 0;
pos[0] = 0;
pos[1] = n;
}
=====================================
qseqs.c
=====================================
@@ -42,8 +42,8 @@ void insertKmerBound(Qseqs *header, int start, int end) {
int *seq;
- if((header->len + 3 * sizeof(int)) < header->size) {
- header->size = (header->len + 3 * sizeof(int)) << 1;
+ if((header->len + 2 * sizeof(int)) < header->size) {
+ header->size = (header->len + 2 * sizeof(int)) << 1;
if(!(header->seq = realloc(header->seq, header->size))) {
ERROR();
}
@@ -52,7 +52,8 @@ void insertKmerBound(Qseqs *header, int start, int end) {
seq = (int *) (header->seq + header->len + 1);
*seq = start;
*++seq = end;
- *++seq = 0;
- header->len += (2 * sizeof(int) + 1);
+ /* here */
+ /* uncomment */
+ //header->len += (2 * sizeof(int));
}
=====================================
runkma.c
=====================================
@@ -215,6 +215,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
index_in_no = fileno(index_in);
read(index_in_no, &kmersize, sizeof(int));
}
+
templatefilename[file_len] = 0;
/* allocate stuff */
@@ -1311,6 +1312,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
AlnPoints *points;
NWmat *NWmatrices;
Assemble_thread *threads, *thread;
+ HashMap_index *template_index;
/* get lengths and names */
file_len = strlen(templatefilename);
@@ -2095,6 +2097,16 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
matrix->size = template_lengths[i];
}
}
+
+ template_index = smalloc(sizeof(HashMap_index));
+ template_index->size = 0;
+ if(alignLoadPtr != alignLoad_fly_shm) {
+ hashMap_index_initialize(template_index, matrix->size, kmersize);
+ } else {
+ template_index->seq = 0;
+ template_index->index = 0;
+ }
+
if(assembly_KMA_Ptr == &assemble_KMA_threaded) {
matrix->size <<= 1;
} else {
@@ -2153,7 +2165,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
thread->points = seedPoint_init(delta, rewards);
thread->points->len = 0;
thread->spin = (sparse < 0) ? 10 : 100;
-
+ thread->template_index = template_index;
thread->next = threads;
threads = thread;
@@ -2211,7 +2223,7 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
thread->header = header;
thread->points = points;
thread->points->len = 0;
- thread->template_index = 0;
+ thread->template_index = template_index;
thread->next = 0;
thread->spin = (sparse < 0) ? 10 : 100;
@@ -2228,7 +2240,6 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
fprintf(stderr, "# Progress:\t%3d%%\r", 0);
fflush(stderr);
}
-
for(template = 1; template < DB_size; ++template) {
if(w_scores[template] > 0) {
if(progress) {
@@ -2269,7 +2280,6 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
//status |= assemblyPtr(aligned_assem, template, template_fragments, fileCount, frag_out, aligned, gap_align, qseq, header, matrix, points, NWmatrices);
thread->template = template;
assembly_KMA_Ptr(thread);
-
/* Depth, ID and coverage */
if(aligned_assem->cover > 0) {
coverScore = aligned_assem->cover;
@@ -2333,7 +2343,6 @@ int runKMA_MEM(char *templatefilename, char *outputfilename, char *exePrev, int
seq_seeker += ((template_lengths[template] >> 5) + 1);
}
}
-
if(progress) {
fprintf(stderr, "\n");
}
=====================================
sam.c
=====================================
@@ -51,6 +51,7 @@ char * makeCigar(Qseqs *Cigar, const Aln *aligned) {
} else {
cLen = 0;
}
+
rep = 1;
if(*s == '|') {
pop = '=';
@@ -86,6 +87,7 @@ char * makeCigar(Qseqs *Cigar, const Aln *aligned) {
++q;
}
cLen += sprintf(cigar + cLen, "%d%c", rep, pop);
+
if(aligned->end) {
cLen += sprintf(cigar + cLen, "%dS", aligned->end);
}
@@ -106,7 +108,7 @@ int samwrite(const Qseqs *qseq, const Qseqs *header, const Qseqs *Qual, char *rn
static volatile int lock[1] = {0};
static Qseqs *Cigar = 0;
- int flag, pos, mapQ, pnext, tlen, size, et, score;
+ int flag, pos, mapQ, pnext, tlen, size, et, score, tab;
char *qname, *cigar, *rnext, *qual;
unsigned char *seq;
@@ -154,10 +156,6 @@ int samwrite(const Qseqs *qseq, const Qseqs *header, const Qseqs *Qual, char *rn
pos = stats[2] + 1;
tlen = stats[3] - pos;
flag = stats[4];
- if(Cigar == 0) {
- Cigar = setQseqs(256);
- }
- cigar = makeCigar(Cigar, aligned);
} else {
mapQ = 0;
et = 0;
@@ -174,9 +172,32 @@ int samwrite(const Qseqs *qseq, const Qseqs *header, const Qseqs *Qual, char *rn
rnext = "*";
pnext = 0;
+ tab = 0;
+ if(qname) {
+ while(*qname) {
+ if(*qname == '\t') {
+ *qname = 0;
+ } else {
+ ++qname;
+ ++tab;
+ }
+ }
+ qname = (char *) header->seq;
+ }
+
lock(lock);
+ if(aligned) {
+ if(Cigar == 0) {
+ Cigar = setQseqs(256);
+ }
+ cigar = makeCigar(Cigar, aligned);
+ }
size = fprintf(stdout, "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\tET:i:%d\tAS:i:%d\n", qname, flag, rname, pos, mapQ, cigar, rnext, pnext, tlen, (char *) seq, qual, et, score);
unlock(lock);
+ if(tab) {
+ qname[tab] = '\t';
+ }
+
return size;
}
=====================================
savekmers.c
=====================================
@@ -26,6 +26,7 @@
#include "compdna.h"
#include "hashmapkma.h"
#include "kmeranker.h"
+#include "kmerlink.h"
#include "penalties.h"
#include "pherror.h"
#include "qseqs.h"
@@ -37,7 +38,7 @@
#include "threader.h"
void (*ankerPtr)(int*, int*, int*, char*, int*, unsigned**, unsigned**, int*, CompDNA*, int, int, int, int, Qseqs*, volatile int*, FILE*) = &ankerAndClean;
-int (*kmerScan)(const HashMapKMA *, const Penalties *, int*, int*, int*, int*, CompDNA*, CompDNA*, const Qseqs*, int*, const int, volatile int*, FILE*) = &save_kmers_HMM;
+int (*kmerScan)(const HashMapKMA *, const Penalties *, int*, int*, int*, int*, CompDNA*, CompDNA*, const Qseqs*, int*, const int, volatile int*, FILE*) = &save_kmers_HMM; //&save_kmers_chain;
int (*save_kmers_pair)(const HashMapKMA *, const Penalties *, int*, int*, int*, int*, int*, int*, CompDNA*, CompDNA*, const Qseqs*, const Qseqs*, int*, const int, volatile int*, FILE*) = &save_kmers_unionPair;
int (*get_kmers_for_pair_ptr)(const HashMapKMA *, const Penalties *, int *, int *, int *, int *, CompDNA *, int *, int) = &get_kmers_for_pair;
int (*getMatch)(int*, int*) = &getBestMatch;
@@ -2334,6 +2335,16 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
qseq->N[0]++;
qseq->N[qseq->N[0]] = qseq->seqlen;
for(i = 1; i <= qseq->N[0] && !HIT; ++i) {
+ end = qseq->N[i] - kmersize + 1;
+ while(j < end && hashMap_get(templates, getKmer(qseq->seq, j, shifter)) == 0) {
+ j += kmersize;
+ }
+ if(j < end) {
+ HIT = 1;
+ } else {
+ j = qseq->N[i] + 1;
+ }
+ /*
end = qseq->N[i] - kmersize + 1;
for(;j < end && !HIT; j += kmersize) {
if(hashMap_get(templates, getKmer(qseq->seq, j, shifter))) {
@@ -2341,6 +2352,7 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
}
}
j = qseq->N[i] + 1;
+ */
}
/* If deltamer qseq hits, then continue */
@@ -4764,16 +4776,17 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
static int *Sizes = 0;
static double mrs = 0.5, coverT = 0.5;
static KmerAnker **tVF_scores, **tVR_scores;
+ static KmerLink **AnkerLists;
static SeqmentTree **tSeqments;
-
- int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, *bests;
- unsigned i, j, rc, shifter, kmersize, DB_size, SU, HIT, start, end, pos;
- unsigned hitCounter, hitCounter_r, ties, len, *values, *last;
+ int i, j, rc, Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, SU, HIT;
+ int start, end, pos, shifter, kmersize, cover, len, template, *bests;
+ unsigned DB_size, hitCounter, hitCounter_r, ties, *values, *last;
short unsigned *values_s;
char *include;
CompDNA *qseqP;
KmerAnker *V_score, *V_scores, *VF_scores, *VR_scores, *tmp_score;
KmerAnker *best_score, *best_score_r;
+ KmerLink *AnkerList;
SeqmentTree *chainSeqments;
if(qseq == 0) {
@@ -4786,26 +4799,27 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
free(tVF_scores);
} else {
- /* here */
/* set mrs and coverT */
-
-
+ mrs = *((double *)(bestTemplates_r));
+ coverT = *((double *)(Score));
i = *bestTemplates;
Sizes = smalloc(i * sizeof(int));
tVF_scores = smalloc(2 * i * sizeof(KmerAnker *));
tVR_scores = tVF_scores + i;
+ AnkerLists = smalloc(i * sizeof(KmerLink *));
tSeqments = calloc(i, sizeof(SeqmentTree *));
if(!tSeqments) {
ERROR();
}
while(i--) {
- Sizes[i] = 256;
- tVF_scores[i] = calloc(512, sizeof(KmerAnker));
+ Sizes[i] = 1024;
+ tVF_scores[i] = calloc(2048, sizeof(KmerAnker));
if(!tVF_scores[i]) {
ERROR();
- } else {
- tVR_scores[i] = tVF_scores[i] + 256;
}
+ tVR_scores[i] = tVF_scores[i] + 1024;
+ AnkerLists[i] = initKmerLink(64);
+ tSeqments[i] = initializeSeqmentTree(64);
}
}
return 0;
@@ -4827,11 +4841,12 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
hitCounter_r = 0;
values = 0;
values_s = 0;
+ rc_comp(qseq, qseq_r);
if(Sizes[*Score] < qseq->size) {
- Sizes[*Score] = qseq->size << 1;
+ Sizes[*Score] = qseq->size;
free(tVF_scores[*Score]);
- tVF_scores[*Score] = calloc(qseq->size, sizeof(KmerAnker));
+ tVF_scores[*Score] = calloc(2 * qseq->size, sizeof(KmerAnker));
if(!tVF_scores[*Score]) {
ERROR();
} else {
@@ -4840,8 +4855,9 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
VF_scores = tVF_scores[*Score];
VR_scores = tVR_scores[*Score];
+ AnkerList = AnkerLists[*Score];
chainSeqments = tSeqments[*Score];
-
+ chainSeqments->n = 0;
V_scores = VF_scores;
qseqP = qseq;
rc = 2;
@@ -4849,11 +4865,9 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
if(rc == 0) {
V_scores = VR_scores;
qseqP = qseq_r;
- rc_comp(qseq, qseq_r);
hitCounter = hitCounter_r;
hitCounter_r = 0;
}
-
HIT = exhaustive;
j = 0;
++*(qseqP->N);
@@ -4870,13 +4884,13 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
}
- V_score = V_scores;
- V_score->start = 0;
- V_score->end = 0;
- V_score->values = 0;
- V_score->next = 0;
- V_score->descend = 0;
if(HIT) {
+ V_score = V_scores;
+ V_score->start = 0;
+ V_score->end = 0;
+ V_score->values = 0;
+ V_score->next = 0;
+ V_score->descend = 0;
Ms = 0;
MMs = 0;
Us = 0;
@@ -4917,14 +4931,17 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
if(last) {
/* update and link between ankers */
V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
+ /* here */
V_score->end = j - gaps + kmersize;
V_score->descend = V_score + 1;
++V_score;
}
V_score->start = j;
- V_score->values = (last = values);
+ V_score->cStart = j;
+ V_score->values = values;
V_score->descend = 0;
- Ms = 0;
+ last = values;
+ Ms = kmersize;
MMs = 0;
Us = 0;
W1s = 0;
@@ -4934,13 +4951,14 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
} else {
++gaps;
}
- j += kmersize;
+ ++j;
}
j = qseqP->N[i] + 1;
}
if(last) {
/* update anker */
V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
+ V_score->end = j - gaps + kmersize;
}
}
--*(qseqP->N);
@@ -4953,21 +4971,28 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
/* make chains */
- V_score = VF_scores;
+ V_scores = VF_scores;
+ V_score = V_scores;
+ V_score->score = 0;
best_score = 0;
best_score_r = V_score;
HIT = hitCounter + 1;
bests = bestTemplates;
+ *bestTemplates = 0;
+ *bestTemplates_r = 0;
ties = 0;
rc = 2;
while(rc--) {
if(rc == 0) {
- V_score = VR_scores;
+ V_scores = VR_scores;
+ V_score = V_scores;
+ V_score->score = 0;
HIT = hitCounter_r + 1;
bests = bestTemplates_r;
best_score = best_score_r;
best_score_r = V_score;
}
+ len = 0;
*bests = 0;
while(--HIT) {
/*
@@ -4980,6 +5005,7 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
/* chain anker */
V_score->score = 0;
+ V_score->next = 0;
if(SU) {
values_s = (short unsigned *) V_score->values;
i = *values_s + 1;
@@ -4991,52 +5017,93 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
while(--i) {
template = SU ? *--values_s : *--values;
- if(!include[template]) {
- include[template] = 1;
- bests[++*bests] = template;
- }
score = Score[template];
- gaps = start - (pos = extendScore[template]);
- if(gaps == 0) {
- /* continued match */
- score += M;
- } else if(gaps < 0) {
- /* deletion */
- score += (W1 + abs(gaps + 1) * U);
- } else if(gaps == 1) {
- /* one unmatched between */
- score += MM;
- } else {
- /* unmatched bases between */
- if((gaps = (gaps - 2) * M) < 0) {
- score += gaps;
- }
- score += (MM << 1);
- }
+ pos = extendScore[template];
+ gaps = start - pos;
+ tmp_score = 0;
- /* check local chain is needed */
- if(score < Wl) {
- score = Wl + V_score->weight;
- /* mark as terminating */
+ /* extend chain */
+ if(!Score_r[template]) {
+ bests[++*bests] = template;
+ if(start) {
+ score = W1 + (start - 1) * U;
+ score = V_score->weight + MAX(Wl, score);
+ } else {
+ score = V_score->weight;
+ }
tmp_score = 0;
- extendScore[template] = 0;
} else {
- score += V_score->weight;
- /* mark descendant */
- tmp_score = V_score;
- extendScore[template] = end;
+ if(gaps < 0) {
+ if(gaps == -kmersize) {
+ score += V_score->weight - (kmersize - 1) * M;
+ } else {
+ score += (W1 + (-gaps - 1) * U) + V_score->weight + gaps * M;
+ }
+ } else if(gaps == 0) {
+ score += V_score->weight + W1;
+ } else if(gaps <= 2) {
+ score += V_score->weight + gaps * MM;
+ } else if((MM * 2 + (gaps - 2) * M) < 0) {
+ score += V_score->weight + (MM * 2 + (gaps - 2) * M);
+ } else {
+ score += V_score->weight - log(gaps * M);
+ }
+
+ /* verify extension */
+ if(score <= Wl + V_score->weight) {
+ score = Wl + V_score->weight;
+ tmp_score = 0;
+ } else {
+ tmp_score = V_scores + Score_r[template];
+ }
}
/* update Scores */
if(V_score->score < score) {
V_score->score = score;
/* find link chain */
- if(tmp_score) {
- while((--tmp_score)->end != pos);
+ if(tmp_score && pos) {
+ /* update start of chain */
+ V_score->cStart = tmp_score->cStart;
+ V_score->next = tmp_score;
+ } else {
+ V_score->cStart = V_score->start;
+ V_score->next = 0;
+ }
+ if(V_score->ties) {
+ popKmerLink(AnkerList, V_score->ties);
+ V_score->ties = 0;
+ }
+ } else if(V_score->score == score && V_score->next) {
+ /* check length of equal chains */
+ if(tmp_score && pos) {
+ /* update start of chain */
+ if(V_score->cStart < tmp_score->cStart) {
+ V_score->cStart = tmp_score->cStart;
+ V_score->next = tmp_score;
+ if(V_score->ties) {
+ popKmerLink(AnkerList, V_score->ties);
+ V_score->ties = 0;
+ }
+ } else if(V_score->cStart == tmp_score->cStart && V_score->next != tmp_score) {
+ /* equal paths through the N-dimensional space */
+ if(!V_score->ties++) {
+ V_score->next = pushKmerLink(AnkerList, V_score->next);
+ }
+ pushKmerLink(AnkerList, tmp_score);
+ }
+ } else {
+ V_score->cStart = V_score->start;
+ V_score->next = 0;
+ if(V_score->ties) {
+ popKmerLink(AnkerList, V_score->ties);
+ V_score->ties = 0;
+ }
}
- V_score->next = tmp_score;
}
Score[template] = score;
+ Score_r[template] = len;
+ extendScore[template] = end;
}
/* Mark (first) best hit */
@@ -5045,10 +5112,10 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
ties = 0;
} else if(best_score_r->score == V_score->score) {
/* lower uncertainty in shorter chains */
- if((V_score->end - V_score->start) < (best_score_r->end - best_score_r->start)) {
+ if((V_score->end - V_score->cStart) < (best_score_r->end - best_score_r->cStart)) {
best_score_r = V_score;
ties = 0;
- } else if((V_score->end - V_score->start) == (best_score_r->end - best_score_r->start)) {
+ } else if((V_score->end - V_score->cStart) == (best_score_r->end - best_score_r->cStart)) {
/* equals */
++ties;
/* first hit on rc is likely last hit on forward */
@@ -5057,140 +5124,251 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
}
}
}
-
- V_score->flag = 0;
++V_score;
+ ++len;
}
/* clear Score arrays */
clearScore(bests, Score);
+ clearScore(bests, Score_r);
clearScore(bests, extendScore);
- i = *bests + 1;
- while(--i) {
- include[*++bests] = 0;
- }
}
/* no good hits */
- if(best_score->score < kmersize && best_score_r->score < kmersize) {
+ if((best_score->score < kmersize && best_score_r->score < kmersize) ||
+ (best_score->score < mrs * (best_score->end - best_score->cStart) && (best_score_r->score < mrs * (best_score_r->end - best_score_r->cStart)))) {
*include = 0;
return 1;
}
+ /* prune hits */
+ VF_scores = pruneAnkers(VF_scores, kmersize, mrs);
+ VR_scores = pruneAnkers(VR_scores, kmersize, mrs);
+
/* get best chain,
start/end, score, template(s)
and zero out ankers */
*include = SU;
*bestTemplates = 0;
- if(best_score_r->score < best_score->score ||
- (best_score_r->score < best_score->score &&
- (best_score->end - best_score->start) <= (best_score_r->end - best_score->start))) {
- tmp_score = getBestChainTemplates(best_score, rewards, bestTemplates, Score, extendScore, include);
+ *bestTemplates_r = 0;
+ if(best_score_r->score < best_score->score) {
+ tmp_score = getBestChainTemplates(best_score, rewards, kmersize, bestTemplates, Score, extendScore, include);
score = best_score->score;
start = tmp_score->start;
len = best_score->end - start;
- rc = 0;
+ rc = 1;
+ } else if(best_score_r->score == best_score->score) {
+ if((best_score->end - best_score->cStart) < (best_score_r->end - best_score->cStart)) {
+ tmp_score = getBestChainTemplates(best_score, rewards, kmersize, bestTemplates, Score, extendScore, include);
+ score = best_score->score;
+ start = tmp_score->start;
+ len = best_score->end - start;
+ rc = 1;
+ } else if((best_score->end - best_score->cStart) == (best_score_r->end - best_score->cStart)) {
+ tmp_score = getBestChainTemplates(best_score, rewards, kmersize, bestTemplates, Score, extendScore, include);
+ getBestChainTemplates(best_score_r, rewards, kmersize, bestTemplates_r, Score, extendScore, include);
+ score = best_score->score;
+ start = tmp_score->start;
+ len = best_score->end - start;
+ rc = 3;
+ } else {
+ tmp_score = getBestChainTemplates(best_score_r, rewards, kmersize, bestTemplates_r, Score, extendScore, include);
+ score = best_score_r->score;
+ start = tmp_score->start;
+ len = best_score_r->end - start;
+ rc = 2;
+ }
} else {
- tmp_score = getBestChainTemplates(best_score_r, rewards, bestTemplates, Score, extendScore, include);
+ tmp_score = getBestChainTemplates(best_score_r, rewards, kmersize, bestTemplates_r, Score, extendScore, include);
score = best_score_r->score;
start = tmp_score->start;
len = best_score_r->end - start;
- rc = 1;
+ rc = 2;
}
- if(score < mrs * len) {
- *include = 0;
- return 1;
- }
- /* prune hits */
- VF_scores = pruneAnkers(VF_scores, kmersize);
- VR_scores = pruneAnkers(VR_scores, kmersize);
+ /* here */
+ *include = 0;
+ return 1;
+
/* get best chains */
while(best_score || best_score_r) {
/* get equal ankers inside chain,
that lies under the threshold */
- /* here */
- if(ties && best_score != tmp_score) {
- V_score = best_score;
- while(V_score && (V_score = getTieAnker(tmp_score, V_score, best_score->score, len))) {
- /*
- 1. tie anker -> tie_len == best_len
- 2. Best match is last on seq -> tie_start < best_start
- 1. && 2. -> cover = |tie_start - best_end| / len
- */
- if((V_score->end - start) < coverT * len ) {
- /* not a co-match / overlap is insufficient ->
- no more equal matches */
- V_score = 0;
- } else { /* Anker is equal, update with templates */
- /* Mark current templates to avoid double hitting */
- i = *bestTemplates + 1;
- bests = bestTemplates;
- while(--i) {
- include[*++bests] = 1;
+ if(ties) {
+ if(rc & 1) {
+ score = best_score->score;
+ start = best_score->cStart;
+ len = best_score->end - start;
+ V_score = best_score;
+ while((V_score = getTieAnker(best_score->cStart, V_score, best_score->score, len))) {
+ /*
+ 1. tie anker -> tie_len == best_len
+ 2. Best match is last on seq -> tie_start < best_start
+ 1. && 2. -> cover = |tie_start - best_end| / len
+ */
+ if((V_score->end - start) < coverT * len ) {
+ /* not a co-match / overlap is insufficient ->
+ no more equal matches */
+ V_score = 0;
+ } else { /* Anker is equal, update with templates */
+ /* Mark current templates to avoid double hitting */
+ i = *bestTemplates + 1;
+ bests = bestTemplates;
+ while(--i) {
+ include[*++bests] = 1;
+ }
+ /* update best template candidates */
+ template = *bests;
+ *bests = 0;
+ getBestChainTemplates(V_score, rewards, kmersize, bests, Score, extendScore, include);
+ *bestTemplates += *bests;
+ *bests = template;
+ start = V_score->end - len;
+ }
+ }
+ }
+ if(rc & 2) {
+ score = best_score_r->score;
+ start = best_score_r->cStart;
+ len = best_score_r->end - start;
+ V_score = best_score_r;
+ while((V_score = getTieAnker(best_score_r->cStart, V_score, best_score_r->score, len))) {
+ /*
+ 1. tie anker -> tie_len == best_len
+ 2. Best match is last on seq -> tie_start < best_start
+ 1. && 2. -> cover = |tie_start - best_end| / len
+ */
+ if((V_score->end - start) < coverT * len ) {
+ /* not a co-match / overlap is insufficient ->
+ no more equal matches */
+ V_score = 0;
+ } else { /* Anker is equal, update with templates */
+ /* Mark current templates to avoid double hitting */
+ i = *bestTemplates_r + 1;
+ bests = bestTemplates_r;
+ while(--i) {
+ include[*++bests] = 1;
+ }
+ /* update best template candidates */
+ template = *bests;
+ *bests = 0;
+ getBestChainTemplates(V_score, rewards, kmersize, bests, Score, extendScore, include);
+ *bestTemplates_r += *bests;
+ *bests = template;
+ start = V_score->end - len;
}
- /* update best template candidates */
- template = *bests;
- *bests = 0;
- getBestChainTemplates(V_score, rewards, bests, Score, extendScore, include);
- *bestTemplates += *bests;
- *bests = template;
- start = V_score->end - len;
}
}
-
-
}
+ /* use segment trees to mark "used" regions of query.
+ Allow for quick check of intersection with chosen chains.
+ And insert anker-boundaries in header*/
+ if(rc & 1) {
+ growSeqmentTree(chainSeqments, start, best_score->end);
+ insertKmerBound((Qseqs *) header, start, best_score->end);
+ } else {
+ growSeqmentTree(chainSeqments, qseq->seqlen - best_score_r->end, qseq->seqlen - start);
+ insertKmerBound((Qseqs *) header, start, best_score_r->end);
+ }
+ /* print match */
+ if(rc & 1) {
+ if(rc & 2) {
+ i = *bestTemplates_r;
+ j = (*bestTemplates += i) + 1;
+ while(--i) {
+ bestTemplates[--j] = -bestTemplates_r[i];
+ }
+ best_score->score = -best_score->score;
+ best_score_r->score = 0;
+ *bestTemplates_r = 0;
+ }
+ lock(excludeOut);
+ i = deConPrintPtr(bestTemplates, qseq, best_score->score, header, 0, out);
+ unlock(excludeOut);
+ best_score->score = 0;
+ *bestTemplates = 0;
+ } else {
+ lock(excludeOut);
+ i = deConPrintPtr(bestTemplates_r, qseq_r, best_score_r->score, header, 0, out);
+ unlock(excludeOut);
+ best_score_r->score = 0;
+ *bestTemplates_r = 0;
+ }
+ /* get next match */
+ ties = 0;
+ rc = 0;
+ while(best_score && best_score->score == 0) {
+ /* find best anker */
+ if((best_score = getBestAnker(&VF_scores, &ties))) {
+ if(kmersize < best_score->score && mrs * (best_score->end - best_score->cStart) <= best_score->score) {
+ /* check chain */
+ start = best_score->cStart;
+ cover = queSeqmentTree(chainSeqments->root, start, best_score->end);
+
+ /* verify chain */
+ if(cover < coverT * (best_score->end - start)) {
+ /* get chain */
+ getBestChainTemplates(best_score, rewards, kmersize, bestTemplates, Score, extendScore, include);
+ rc = 1;
+ } else {
+ /* silence anker */
+ best_score->score = 0;
+ }
+ } else {
+ /* silence anker */
+ best_score->score = 0;
+ }
+ }
+ }
+
+ while(best_score_r && best_score_r->score == 0) {
+ /* find best anker */
+ if((best_score_r = getBestAnker(&VR_scores, &ties))) {
+ if(kmersize < best_score_r->score && mrs * (best_score_r->end - best_score_r->cStart) <= best_score_r->score) {
+ /* check chain */
+ start = best_score_r->cStart;
+ cover = queSeqmentTree(chainSeqments->root, qseq_r->seqlen - best_score_r->end, qseq_r->seqlen - start);
+
+ /* verify chain */
+ if(cover < coverT * (best_score_r->end - start)) {
+ /* get chain */
+ getBestChainTemplates(best_score_r, rewards, kmersize, bestTemplates_r, Score, extendScore, include);
+ rc |= 2;
+ } else {
+ /* silence anker */
+ best_score_r->score = 0;
+ }
+ } else {
+ /* silence anker */
+ best_score_r->score = 0;
+ }
+ }
+ }
+
+ if(!best_score && !best_score_r) {
+ *include = 0;
+ return 0;
+ } else if(best_score && best_score_r) {
+ if(best_score_r->score < best_score->score) {
+ rc = 1;
+ } else if(best_score_r->score == best_score->score) {
+ if((best_score->end - best_score->cStart) < (best_score_r->end - best_score->cStart)) {
+ rc = 1;
+ } else if((best_score->end - best_score->cStart) == (best_score_r->end - best_score->cStart)) {
+ rc = 3;
+ } else {
+ rc = 2;
+ }
+ } else {
+ rc = 2;
+ }
+ }
}
-
-
-
-
-
-
- /*
- Best template(s) must in values on best anker,
- remove non-union of A and B, while backtracking.
- Where A and B are templates on ankerA and ankerB.
- For proxi scoring the score has to be imputed while tracking.
- */
-
- /* here */
- /* what happens when two equal chains overlap */
- /*
- ------------- ------------- -------------
- ------------- -------------
-
- ------------- ------------- -------------
- ------------- -------------
- */
-
-
-
-
- /* no rechaining */
-
- /*
- Q = set of best chains
-
- while(score is good)
- 1. if in Q.
- if(<50%)
- add to Q
- else if(scores equal)
- add with match
- else
- skip chain
- else
- add to Q.
- 2. Silence it.
- 3. find best chain.
- */
*include = 0;
return 1;
@@ -5203,10 +5381,11 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
static double mrs = 0.5, coverT = 0.5;
static KmerAnker **tVF_scores;
static SeqmentTree **tSeqments;
- int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, score, gaps, template, *bests;
- unsigned i, j, shifter, kmersize, DB_size, SU, HIT, start, end, pos;
- unsigned len, cover, hitCounter, ties, *values, *last;
+ int i, j, shifter, prefix_shifter, kmersize, DB_size, pos, start, score;
+ int Wl, W1, U, M, MM, Ms, MMs, Us, W1s, end, len, gaps, template, *bests;
+ unsigned SU, HIT, cover, hitCounter, ties, prefix_len, *values, *last;
short unsigned *values_s;
+ long unsigned prefix;
char *include;
KmerAnker *V_score, *VF_scores, *tmp_score, *best_score;
SeqmentTree *chainSeqments;
@@ -5221,10 +5400,9 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
}
free(tVF_scores);
} else {
- /* here */
/* set mrs and coverT */
-
-
+ mrs = *((double *)(bestTemplates_r));
+ coverT = *((double *)(Score));
i = *bestTemplates;
Sizes = smalloc(i * sizeof(int));
tVF_scores = smalloc(i * sizeof(KmerAnker *));
@@ -5233,8 +5411,8 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
ERROR();
}
while(i--) {
- Sizes[i] = 256;
- tVF_scores[i] = calloc(512, sizeof(KmerAnker));
+ Sizes[i] = 1024;
+ tVF_scores[i] = calloc(1024, sizeof(KmerAnker));
if(!tVF_scores[i]) {
ERROR();
}
@@ -5249,6 +5427,7 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
SU = 0;
}
shifter = sizeof(long unsigned) * sizeof(long unsigned) - (templates->kmersize << 1);
+ prefix_shifter = sizeof(long unsigned) * sizeof(long unsigned) - (templates->prefix_len << 1);
M = rewards->M;
MM = rewards->MM;
U = rewards->U;
@@ -5258,6 +5437,8 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
hitCounter = 0;
values = 0;
values_s = 0;
+ last = 0;
+ start = 0;
if(Sizes[*Score] < qseq->size) {
Sizes[*Score] = qseq->size << 1;
@@ -5269,93 +5450,180 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
}
VF_scores = tVF_scores[*Score];
chainSeqments = tSeqments[*Score];
-
- HIT = exhaustive;
- j = 0;
- ++*(qseq->N);
- qseq->N[*(qseq->N)] = qseq->seqlen;
- for(i = 1; i <= *(qseq->N) && !HIT; ++i) {
- end = qseq->N[i] - kmersize + 1;
- while(j < end && hashMap_get(templates, getKmer(qseq->seq, j, shifter)) == 0) {
- j += kmersize;
+ V_score = VF_scores;
+ V_score->start = 0;
+ V_score->values = 0;
+ V_score->next = 0;
+
+ if((prefix_len = templates->prefix_len)) {
+ prefix = templates->prefix;
+ rc_comp(qseq, qseq_r);
+ ++*(qseq->N);
+ qseq->N[*(qseq->N)] = qseq->seqlen;
+ i = 0;
+ j = qseq->seqlen - kmersize - prefix_len;
+ for(gaps = 1; gaps <= *(qseq->N); ++gaps) {
+ end = qseq->N[gaps] - kmersize - prefix_len + 1;
+ while(i < end) {
+ if(getKmer(qseq->seq, i, prefix_shifter) == prefix) {
+ if((values = hashMap_get(templates, getKmer(qseq->seq, i + prefix_len, shifter)))) {
+ if(last) {
+ /* update and link between ankers */
+ V_score->end = (V_score->end + i) / 2;
+ start = V_score->end + 1;
+ V_score->weight = V_score->end - V_score->start;
+ V_score->descend = V_score + 1;
+ ++V_score;
+ } else if(start) {
+ start = (start + i) / 2;
+ }
+
+ V_score->start = start;
+ V_score->end = i + kmersize;
+ V_score->cStart = start;
+ V_score->values = (last = values);
+ V_score->descend = 0;
+ } else {
+ if(last) {
+ /* update and link between ankers */
+ V_score->end = (V_score->end + i) / 2;
+ V_score->weight = V_score->end - V_score->start;
+ V_score->descend = V_score + 1;
+ ++V_score;
+ }
+ start = i + kmersize;
+ last = 0;
+ }
+ } else if(getKmer(qseq_r->seq, j, prefix_shifter) == prefix) {
+ if((values = hashMap_get(templates, getKmer(qseq_r->seq, j + prefix_len, shifter)))) {
+ if(last) {
+ /* update and link between ankers */
+ V_score->end = (V_score->end + i) / 2;
+ start = V_score->end + 1;
+ V_score->weight = V_score->end - V_score->start;
+ V_score->descend = V_score + 1;
+ ++V_score;
+ } else if(start) {
+ start = (start + i) / 2;
+ }
+
+ V_score->start = start;
+ V_score->end = i + kmersize;
+ V_score->cStart = start;
+ V_score->values = (last = values);
+ V_score->descend = 0;
+ } else {
+ if(last) {
+ /* update and link between ankers */
+ V_score->end = (V_score->end + i) / 2;
+ V_score->weight = V_score->end - V_score->start;
+ V_score->descend = V_score + 1;
+ ++V_score;
+ }
+ start = i + kmersize;
+ last = 0;
+ }
+ }
+ ++i;
+ --j;
+ }
+ i = qseq->N[gaps] + 1;
+ j -= (kmersize + prefix_len);
}
- if(j < end) {
- HIT = 1;
+ if(last) {
+ V_score->end = qseq->seqlen;
+ V_score->weight = V_score->end - V_score->start;
} else {
- j = qseq->N[i] + 1;
+ (--V_score)->descend = 0;
}
- }
-
- if(HIT) {
- V_score = VF_scores;
- V_score->start = (j = 0);
- V_score->values = (last = 0);
- V_score->next = 0;
- Ms = 0;
- MMs = 0;
- Us = 0;
- W1s = 0;
- gaps = 0;
- for(i = 1; i <= *(qseq->N); ++i) {
+ --*(qseq->N);
+ } else {
+ HIT = exhaustive;
+ j = 0;
+ ++*(qseq->N);
+ qseq->N[*(qseq->N)] = qseq->seqlen;
+ for(i = 1; i <= *(qseq->N) && !HIT; ++i) {
end = qseq->N[i] - kmersize + 1;
- while(j < end) {
- if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
- if(values == last) {
- if(kmersize < gaps) {
- Ms += kmersize;
- gaps -= kmersize;
- if(gaps) {
- /* go for best scenario */
- if(gaps == 1) {
- MMs += 2;
- } else {
- gaps -= 2;
- if((MM << 1) + gaps * M < 0) {
- Ms += gaps;
+ while(j < end && hashMap_get(templates, getKmer(qseq->seq, j, shifter)) == 0) {
+ j += kmersize;
+ }
+ if(j < end) {
+ HIT = 1;
+ } else {
+ j = qseq->N[i] + 1;
+ }
+ }
+
+ if(HIT) {
+ Ms = 0;
+ MMs = 0;
+ Us = 0;
+ W1s = 0;
+ gaps = 0;
+ j = 0;
+ for(i = 1; i <= *(qseq->N); ++i) {
+ end = qseq->N[i] - kmersize + 1;
+ while(j < end) {
+ if((values = hashMap_get(templates, getKmer(qseq->seq, j, shifter)))) {
+ if(values == last) {
+ if(kmersize < gaps) {
+ Ms += kmersize;
+ gaps -= kmersize;
+ if(gaps) {
+ /* go for best scenario */
+ if(gaps == 1) {
MMs += 2;
+ } else {
+ gaps -= 2;
+ if((MM << 1) + gaps * M < 0) {
+ Ms += gaps;
+ MMs += 2;
+ }
}
+ } else {
+ ++MMs;
}
+ } else if(gaps) {
+ --gaps;
+ ++W1s;
+ Us += gaps;
} else {
- ++MMs;
+ ++Ms;
}
- } else if(gaps) {
- --gaps;
- ++W1s;
- Us += gaps;
} else {
- ++Ms;
+ if(last) {
+ /* update and link between ankers */
+ V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
+ V_score->end = j - gaps + kmersize;
+ V_score->descend = V_score + 1;
+ ++V_score;
+ }
+ V_score->start = j;
+ V_score->cStart = j;
+ V_score->values = (last = values);
+ V_score->descend = 0;
+ Ms = 0;
+ MMs = 0;
+ Us = 0;
+ W1s = 0;
+ ++hitCounter;
}
+ gaps = 0;
} else {
- if(last) {
- /* update and link between ankers */
- V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
- V_score->end = j - gaps + kmersize;
- V_score->descend = V_score + 1;
- ++V_score;
- }
- V_score->start = j;
- V_score->values = (last = values);
- V_score->descend = 0;
- Ms = 0;
- MMs = 0;
- Us = 0;
- W1s = 0;
- ++hitCounter;
+ ++gaps;
}
- gaps = 0;
- } else {
- ++gaps;
+ j += kmersize;
}
- j += kmersize;
+ j = qseq->N[i] + 1;
+ }
+ if(last) {
+ /* update anker */
+ V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
+ V_score->end = j - gaps + kmersize;
}
- j = qseq->N[i] + 1;
- }
- if(last) {
- /* update anker */
- V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
}
+ --*(qseq->N);
}
- --*(qseq->N);
/* no matches */
if(!hitCounter) {
@@ -5432,8 +5700,10 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
/* find link chain */
if(tmp_score) {
while((--tmp_score)->end != pos);
+ /* update start of chain */
+ V_score->cStart = tmp_score->start;
}
- V_score->next = tmp_score;
+ V_score->next = tmp_score;
}
Score[template] = score;
}
@@ -5443,18 +5713,17 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
best_score = V_score;
ties = 0;
} else if(best_score->score == V_score->score) {
- if((V_score->end - V_score->start) < (best_score->end - best_score->start)) {
+ if((V_score->end - V_score->start) < (best_score->end - best_score->cStart)) {
/* lower uncertainty in shorter chains */
best_score = V_score;
ties = 0;
- } else if((V_score->end - V_score->start) == (best_score->end - best_score->start)) {
+ } else if((V_score->end - V_score->start) == (best_score->end - best_score->cStart)) {
/* equals */
best_score = V_score;
++ties;
}
}
- V_score->flag = 0;
++V_score;
}
@@ -5467,7 +5736,10 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
}
/* no good hits */
- if((score = best_score->score) < kmersize) {
+ score = best_score->score;
+ start = best_score->cStart;
+ len = best_score->end - start;
+ if(score < kmersize || score < mrs * len) {
*include = 0;
return 1;
}
@@ -5477,16 +5749,16 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
and zero out ankers */
*include = SU;
*bestTemplates = 0;
- tmp_score = getBestChainTemplates(best_score, rewards, bestTemplates, Score, extendScore, include);
+ tmp_score = getBestChainTemplates(best_score, rewards, kmersize, bestTemplates, Score, extendScore, include);
+ /* here */
+ /* check consistency */
+ /*
start = tmp_score->start;
len = best_score->end - start;
- if(score < mrs * len) {
- *include = 0;
- return 1;
- }
+ */
/* prune hits */
- VF_scores = pruneAnkers(VF_scores, kmersize);
+ VF_scores = pruneAnkers(VF_scores, kmersize, mrs);
/* get best chains */
while(best_score) {
@@ -5494,7 +5766,7 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
that lies under the threshold */
if(ties && best_score != tmp_score) {
V_score = best_score;
- while(V_score && (V_score = getTieAnker(tmp_score, V_score, best_score->score, len))) {
+ while((V_score = getTieAnker(best_score->cStart, V_score, best_score->score, len))) {
/*
1. tie anker -> tie_len == best_len
2. Best match is last on seq -> tie_start < best_start
@@ -5514,7 +5786,7 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
/* update best template candidates */
template = *bests;
*bests = 0;
- getBestChainTemplates(V_score, rewards, bests, Score, extendScore, include);
+ getBestChainTemplates(V_score, rewards, kmersize, bests, Score, extendScore, include);
*bestTemplates += *bests;
*bests = template;
start = V_score->end - len;
@@ -5535,6 +5807,7 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
unlock(excludeOut);
/* get next match */
+ ties = 0;
best_score->score = 0;
*bestTemplates = 0;
while(best_score->score == 0) {
@@ -5545,13 +5818,13 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
}
/* check chain */
- start = getStartAnker(best_score);
+ start = best_score->cStart;
cover = queSeqmentTree(chainSeqments->root, start, best_score->end);
/* verify chain */
if(cover < coverT * (best_score->end - start)) {
/* get chain */
- tmp_score = getBestChainTemplates(best_score, rewards, bestTemplates, Score, extendScore, include);
+ tmp_score = getBestChainTemplates(best_score, rewards, kmersize, bestTemplates, Score, extendScore, include);
len = best_score->end - start;
if(best_score->score < mrs * len) {
/* silence anker */
@@ -5567,7 +5840,3 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
return 1;
}
-
-
-
-
=====================================
savekmers.h
=====================================
@@ -87,3 +87,4 @@ int save_kmers_HMM(const HashMapKMA *templates, const Penalties *rewards, int *b
void ankerAndClean(int *regionTemplates, int *Score, int *Score_r, char *include, int *template_lengths, unsigned **VF_scores, unsigned **VR_scores, int *tmpNs, CompDNA *qseq, int HIT, int bestScore, int start_cut, int end_cut, Qseqs *header, volatile int *excludeOut, FILE *out);
void ankerAndClean_MEM(int *regionTemplates, int *Score, int *Score_r, char *include, int *template_lengths, unsigned **VF_scores, unsigned **VR_scores, int *tmpNs, CompDNA *qseq, int HIT, int bestScore, int start_cut, int end_cut, Qseqs *header, volatile int *excludeOut, FILE *out);
int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out);
+int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *rewards, int *bestTemplates, int *bestTemplates_r, int *Score, int *Score_r, CompDNA *qseq, CompDNA *qseq_r, const Qseqs *header, int *extendScore, const int exhaustive, volatile int *excludeOut, FILE *out);
=====================================
seqmenttree.c
=====================================
@@ -140,10 +140,13 @@ unsigned addSeqmentTrees(SeqmentTrees *root, SeqmentTrees *node) {
void growSeqmentTree(SeqmentTree *src, const unsigned start, const unsigned end) {
SeqmentTrees *node;
-
+
/* make room for new anker */
if(src->size <= src->n + 2) {
reallocSeqmentTree(src);
+ } else if(src->n == 0) {
+ initSeqmentTree(src, start, end);
+ return;
}
/* make new leaf */
=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
* limitations under the License.
*/
-#define KMA_VERSION "1.2.11"
+#define KMA_VERSION "1.2.12"
View it on GitLab: https://salsa.debian.org/med-team/kma/compare/b761f76a9d868b91d7f02423972569522c738968...3b8fcdeddbd318fc94d0f957fbf28231ee90c5c9
--
View it on GitLab: https://salsa.debian.org/med-team/kma/compare/b761f76a9d868b91d7f02423972569522c738968...3b8fcdeddbd318fc94d0f957fbf28231ee90c5c9
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190923/72392d29/attachment-0001.html>
More information about the debian-med-commit
mailing list