[med-svn] [Git][med-team/kma][upstream] New upstream version 1.4.14

Lance Lin (@linqigang) gitlab at salsa.debian.org
Wed Aug 23 14:29:04 BST 2023



Lance Lin pushed to branch upstream at Debian Med / kma


Commits:
149fa09e by Lance Lin at 2023-08-22T22:12:39+07:00
New upstream version 1.4.14
- - - - -


29 changed files:

- + .github/workflows/conda-linux-release.yml
- + .github/workflows/conda-osx-release.yml
- + .github/workflows/release.yml
- + .idea/.gitignore
- + .idea/kma.iml
- + .idea/misc.xml
- + .idea/modules.xml
- + .idea/vcs.xml
- README.md
- + bitbucket-pipelines.yml
- + conda/build.sh
- + conda/meta.yaml
- dist.c
- + envs/osx-build.yml
- kma.c
- kmeranker.c
- kmeranker.h
- kmers.c
- runinput.c
- runinput.h
- runkma.c
- savekmers.c
- seqparse.c
- sparse.c
- sparse.h
- spltdb.c
- stdstat.h
- trim.c
- version.h


Changes:

=====================================
.github/workflows/conda-linux-release.yml
=====================================
@@ -0,0 +1,19 @@
+name: conda-linux-release
+on:
+  push:
+    # Sequence of patterns matched against refs/tags
+    tags:
+      - '*' # Push events to matching *, i.e. 1.0, 20.15.10
+jobs:
+  conda-linux:
+    name: Linux Upload
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout at v1
+      - name: publish-to-conda-linux
+        uses: BEFH/anaconda-publish at v1.5.1
+        with:
+          subDir: 'conda'
+          channels: 'conda-forge -c bioconda -c defaults'
+          AnacondaToken: ${{ secrets.GENOMICEPIDEMIOLOGY_CONDA_AUTH_TOKEN }}
+          publish: true


=====================================
.github/workflows/conda-osx-release.yml
=====================================
@@ -0,0 +1,43 @@
+name: conda-osx-release
+on:
+  push:
+    # Sequence of patterns matched against refs/tags
+    tags:
+      - '*' # Push events to matching *, i.e. 1.0, 20.15.10
+jobs:
+  conda-osx:
+    name: ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest]
+
+    steps:
+      - name: checkout repository
+        uses: actions/checkout at v2
+
+      - name: create environment with mamba
+        uses: conda-incubator/setup-miniconda at v2
+        with:
+          mamba-version: "*"
+          channels: conda-forge,bioconda,defaults
+          auto-activate-base: true
+          activate-environment: osx-build
+          environment-file: envs/osx-build.yml
+
+      - name: check solution
+        run: |
+          mamba env export
+
+      - name: build conda package
+        run: |
+          conda mambabuild conda -c conda-forge --output-folder .
+
+      - name: upload conda package
+        run: |
+          anaconda -t ${{ secrets.GENOMICEPIDEMIOLOGY_CONDA_AUTH_TOKEN }} upload --label main osx-64/*.tar.bz2


=====================================
.github/workflows/release.yml
=====================================
@@ -0,0 +1,29 @@
+on:
+  push:
+    # Sequence of patterns matched against refs/tags
+    tags:
+      - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10
+
+name: Create Release
+
+jobs:
+  auto-tag-release:
+    name: Create Release
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout at v2
+      - name: Create Release
+        id: create_release
+        uses: actions/create-release at v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.PERSONAL_TOKEN }} # This token is provided by Actions, you do not need to create your own token
+        with:
+          tag_name: ${{ github.ref }}
+          release_name: Release ${{ github.ref }}
+          body: |
+            Changes in this Release
+            - First Change
+            - Second Change
+          draft: false
+          prerelease: false
\ No newline at end of file


=====================================
.idea/.gitignore
=====================================
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml


=====================================
.idea/kma.iml
=====================================
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file


=====================================
.idea/misc.xml
=====================================
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager">
+    <output url="file://$PROJECT_DIR$/out" />
+  </component>
+</project>
\ No newline at end of file


=====================================
.idea/modules.xml
=====================================
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/kma.iml" filepath="$PROJECT_DIR$/.idea/kma.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file


=====================================
.idea/vcs.xml
=====================================
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file


=====================================
README.md
=====================================
@@ -1,3 +1,4 @@
+[![install with bioconda](https://img.shields.io/badge/install%20with-genomicepidemiology-brightgreen.svg?style=flat)](https://anaconda.org/genomicepidemiology/kma)
 [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/kma/README.html)
 # Getting Started #
 


=====================================
bitbucket-pipelines.yml
=====================================
@@ -0,0 +1,8 @@
+clone:
+  depth: full
+pipelines:
+  custom:
+    mirror-to-github:
+      - step:
+          script:
+            - git push --mirror git at github.com:genomicepidemiology/kma
\ No newline at end of file


=====================================
conda/build.sh
=====================================
@@ -0,0 +1,7 @@
+BINARIES="kma kma_index kma_shm kma_update"
+make CFLAGS="-w -O3 -I$PREFIX/include -L$PREFIX/lib"
+
+mkdir -p ${PREFIX}/bin
+cp $BINARIES $PREFIX/bin
+mkdir -p $PREFIX/doc/kma
+cp README.md $PREFIX/doc/kma/
\ No newline at end of file


=====================================
conda/meta.yaml
=====================================
@@ -0,0 +1,30 @@
+package:
+  name: kma
+  version: 1.4.9
+
+source:
+  url: https://bitbucket.org/genomicepidemiology/kma/get/1.4.9.tar.gz
+
+build:
+  number: 0
+
+requirements:
+  build:
+    - make
+    - {{ compiler('c') }}
+  host:
+    - zlib
+  run:
+    - zlib
+
+test:
+  commands:
+    - kma -h | grep KMA >/dev/null
+
+about:
+  home: https://bitbucket.org/genomicepidemiology/kma
+  summary: 'KMA is mapping a method designed to map raw reads directly against redundant databases, in an ultra-fast manner using seed and extend.'
+  license: Apache-2.0
+extra:
+  identifiers:
+    - doi:10.1186/s12859-018-2336-6
\ No newline at end of file


=====================================
dist.c
=====================================
@@ -80,8 +80,8 @@ HashMapKMA * loadValues(const char *filename) {
 		dest->exist = smalloc(size);
 		check = fread(dest->exist, 1, size, file);
 		if(check != size) {
-			free(dest);
 			free(dest->exist);
+			free(dest);
 			fclose(file);
 			return 0;
 		}
@@ -110,9 +110,9 @@ HashMapKMA * loadValues(const char *filename) {
 	dest->values = smalloc(size);
 	check = fread(dest->values, 1, size, file);
 	if(check != size) {
-		free(dest);
 		free(dest->exist);
 		free(dest->values);
+		free(dest);
 		fclose(file);
 		return 0;
 	}
@@ -141,9 +141,9 @@ HashMapKMA * loadValues(const char *filename) {
 		dest->exist = smalloc(size);
 		check = fread(dest->exist, 1, size, file);
 		if(check != size) {
-			free(dest);
 			free(dest->exist);
 			free(dest->values);
+			free(dest);
 			fclose(file);
 			return 0;
 		}


=====================================
envs/osx-build.yml
=====================================
@@ -0,0 +1,9 @@
+name: osx-build
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - mamba
+  - python
+  - boa
+  - anaconda-client
\ No newline at end of file


=====================================
kma.c
=====================================
@@ -192,6 +192,7 @@ static void helpMessage(int exitStatus) {
 	
 	fprintf(out, "#\n# Trimming:\n");
 	fprintf(out, "# %16s\t%-32s\t%s\n", "-mp", "Minimum phred score", "20");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-mi", "Minimum internal phred score", "0");
 	fprintf(out, "# %16s\t%-32s\t%s\n", "-eq", "Minimum avg. quality score", "0");
 	fprintf(out, "# %16s\t%-32s\t%s\n", "-5p", "Trim 5 prime", "0");
 	fprintf(out, "# %16s\t%-32s\t%s\n", "-3p", "Trim 3 prime", "0");
@@ -230,11 +231,11 @@ int kma_main(int argc, char *argv[]) {
 		0.0000000000398107, 0.0000000000316228, 0.0000000000251189, 0.0000000000199526, 0.0000000000158489, 0.0000000000125893, 0.0000000000100000, 0.0000000000079433,
 		0.0000000000063096, 0.0000000000050119, 0.0000000000039811, 0.0000000000031623, 0.0000000000025119, 0.0000000000019953, 0.0000000000015849, 0.0000000000012589,
 		0.0000000000010000, 0.0000000000007943, 0.0000000000006310, 0.0000000000005012, 0.0000000000003981, 0.0000000000003162, 0.0000000000002512, 0.0000000000001995};
-	static int minPhred, minQ, fiveClip, threeClip, ConClave, mem_mode;
-	static int fileCounter, fileCounter_PE, fileCounter_INT, Ts, Tv, minlen;
+	static int minPhred, minmaskQ, minQ, fiveClip, threeClip, minlen, maxlen;
+	static int fileCounter, fileCounter_PE, fileCounter_INT, Ts, Tv, mem_mode;
 	static int extendedFeatures, spltDB, thread_num, kmersize, targetNum, mq;
 	static int ref_fsa, print_matrix, print_all, sam, vcf, Mt1, bcd, one2one;
-	static int sparse_run, ts, maxlen, **d, status = 0;
+	static int ConClave, sparse_run, ts, **d, status = 0;
 	static unsigned xml, nc, nf, shm, exhaustive, verbose;
 	static long unsigned tsv;
 	static char *outputfilename, *templatefilename, **templatefilenames;
@@ -271,6 +272,7 @@ int kma_main(int argc, char *argv[]) {
 		spltDB = 0;
 		extendedFeatures = 0;
 		minPhred = 20;
+		minmaskQ = 0;
 		minQ = 0;
 		fiveClip = 0;
 		threeClip = 0;
@@ -580,6 +582,15 @@ int kma_main(int argc, char *argv[]) {
 						exit(1);
 					}
 				}
+			} else if(strcmp(argv[args], "-mi") == 0) {
+				++args;
+				if(args < argc) {
+					minmaskQ = strtoul(argv[args], &exeBasic, 10);
+					if(*exeBasic != 0) {
+						fprintf(stderr, "# Invalid internal minimum phred score parsed\n");
+						exit(1);
+					}
+				}
 			} else if(strcmp(argv[args], "-eq") == 0) {
 				++args;
 				if(args < argc) {
@@ -1430,8 +1441,11 @@ int kma_main(int argc, char *argv[]) {
 				free(inputfiles_INT);
 				fprintf(stderr, "Interleaved information is not considered in Sparse mode.\n");
 			}
+			if(minPhred < minmaskQ) {
+				minPhred = minmaskQ;
+			}
 			
-			run_input_sparse(templates, inputfiles, fileCounter, minPhred, minQ, fiveClip, threeClip, kmersize, to2Bit, prob, ioStream);
+			run_input_sparse(templates, inputfiles, fileCounter, minPhred, minmaskQ, minQ, fiveClip, threeClip, kmersize, to2Bit, prob, ioStream);
 			hashMapKMA_destroy(templates);
 			free(myTemplatefilename);
 		} else {
@@ -1449,20 +1463,23 @@ int kma_main(int argc, char *argv[]) {
 				myTemplatefilename = 0;
 			}
 			totFrags = 0;
+			if(minPhred < minmaskQ) {
+				minPhred = minmaskQ;
+			}
 			
 			/* SE */
 			if(fileCounter > 0) {
-				totFrags += run_input(inputfiles, fileCounter, minPhred, minQ, fiveClip, threeClip, minlen, maxlen, to2Bit, prob, ioStream);
+				totFrags += run_input(inputfiles, fileCounter, minPhred, minmaskQ, minQ, fiveClip, threeClip, minlen, maxlen, to2Bit, prob, ioStream);
 			}
 			
 			/* PE */
 			if(fileCounter_PE > 0) {
-				totFrags += run_input_PE(inputfiles_PE, fileCounter_PE, minPhred, minQ, fiveClip, threeClip, minlen, to2Bit, prob, ioStream);
+				totFrags += run_input_PE(inputfiles_PE, fileCounter_PE, minPhred, minmaskQ, minQ, fiveClip, threeClip, minlen, to2Bit, prob, ioStream);
 			}
 			
 			/* INT */
 			if(fileCounter_INT > 0) {
-				totFrags += run_input_INT(inputfiles_INT, fileCounter_INT, minPhred, minQ, fiveClip, threeClip, minlen, to2Bit, prob, ioStream);
+				totFrags += run_input_INT(inputfiles_INT, fileCounter_INT, minPhred, minmaskQ, minQ, fiveClip, threeClip, minlen, to2Bit, prob, ioStream);
 			}
 			
 			fprintf(stderr, "#\n# Total number of query fragment after trimming:\t%lu\n", totFrags);


=====================================
kmeranker.c
=====================================
@@ -22,7 +22,7 @@
 #include "kmeranker.h"
 #include "penalties.h"
 
-KmerAnker * (*getChainTemplates)(KmerAnker*, const Penalties*, const int*, const int, const int, int*, int*, int*, char*) = &getBestChainTemplates;
+KmerAnker * (*getChainTemplates)(KmerAnker*, const Penalties*, const int*, const int, const int, const int, int*, int*, int*, char*) = &getBestChainTemplates;
 int (*kmerAnkerScore)(KmerAnker*) = &ankerScore;
 const int (*testExtension)(const int, const int, const int) = &testExtensionScore;
 const int (*proxiTestBest)(const double, const int, const int, const int, const int) = &proxiTestBestScore;
@@ -80,7 +80,7 @@ const int mrchain(int *bestTemaples, const int *template_lengths, const int q_le
 	return 1;
 }
 
-KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, const int *template_lengths, const int q_len, const int kmersize, int *bests, int *Score, int *extendScore, char *include) {
+KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, const int *template_lengths, const int q_len, const int kmersize, const int mlen, int *bests, int *Score, int *extendScore, char *include) {
 	
 	/* get set of best templates and silences the chain, except for the initial anker */
 	int i, j, template, score, tmpScore, bestScore, Wl, W1, U, M, MM, Ms, MMs;
@@ -160,6 +160,8 @@ KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, cons
 					if(gaps == -kmersize) {
 						/* Perfect match */
 						score += node->weight - (kmersize - 1) * M;
+					} else if(gaps == 0) {
+						score += node->weight + MM;
 					} else if(0 < gaps) {
 						/* mismatch or insersion */
 						if(gaps <= 2) {
@@ -178,9 +180,12 @@ KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, cons
 						} else {
 							score += node->weight + (W1 + (gaps - 1) * U);
 						}
-					} else {
+					} else if(mlen != kmersize) {
 						/* snp */
-						score += node->weight + (gaps + 1) * M + MM;
+						score += node->weight + gaps * M + MM;
+					} else {
+						/* indel */
+						score += node->weight + gaps * M - (gaps + 1) * U + W1;
 					}
 					
 					/* mark as used */
@@ -227,7 +232,7 @@ KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, cons
 	return j ? prev : 0;
 }
 
-KmerAnker * getProxiChainTemplates(KmerAnker *src, const Penalties *rewards, const int *template_lengths, const int q_len, const int kmersize, int *bests, int *Score, int *extendScore, char *include) {
+KmerAnker * getProxiChainTemplates(KmerAnker *src, const Penalties *rewards, const int *template_lengths, const int q_len, const int kmersize, const int mlen, int *bests, int *Score, int *extendScore, char *include) {
 	
 	/* get set of best templates and silences the chain, except for the initial anker */
 	static long unsigned *softProxi = 0;
@@ -290,6 +295,8 @@ KmerAnker * getProxiChainTemplates(KmerAnker *src, const Penalties *rewards, con
 				if(gaps == -kmersize) {
 					/* Perfect match */
 					score += node->weight - (kmersize - 1) * M;
+				} else if(gaps == 0) {
+					score += node->weight + MM;
 				} else if(0 < gaps) {
 					/* mismatch or insersion */
 					if(gaps <= 2) {
@@ -308,9 +315,12 @@ KmerAnker * getProxiChainTemplates(KmerAnker *src, const Penalties *rewards, con
 					} else {
 						score += node->weight + (W1 + (gaps - 1) * U);
 					}
-				} else {
+				} else if(mlen != kmersize) {
 					/* snp */
-					score += node->weight + (gaps + 1) * M + MM;
+					score += node->weight + gaps * M + MM;
+				} else {
+					/* indel */
+					score += node->weight + gaps * M - (gaps + 1) * U + W1;
 				}
 				
 				/* mark as used */


=====================================
kmeranker.h
=====================================
@@ -34,7 +34,7 @@ struct kmerAnker {
 };
 #endif
 
-extern KmerAnker * (*getChainTemplates)(KmerAnker*, const Penalties*, const int*, const int, const int, int*, int*, int*, char*);
+extern KmerAnker * (*getChainTemplates)(KmerAnker*, const Penalties*, const int*, const int, const int, const int, int*, int*, int*, char*);
 extern int (*kmerAnkerScore)(KmerAnker*);
 extern const int (*testExtension)(const int, const int, const int);
 extern const int (*proxiTestBest)(const double, const int, const int, const int, const int);
@@ -47,8 +47,8 @@ const int testExtensionScoreLen(const int q_len, const int t_len, const int best
 const int proxiTestBestScore(const double proxiScore, const int score, const int q_len, const int t_len, const int best_len);
 const int proxiTestBestScoreLen(const double proxiScore, const int score, const int q_len, const int t_len, const int best_len);
 const int mrchain(int *bestTemaples, const int *template_lengths, const int q_len, const int maplen);
-KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, const int *template_lengths, const int q_len, const int kmersize, int *bests, int *Score, int *extendScore, char *include);
-KmerAnker * getProxiChainTemplates(KmerAnker *src, const Penalties *rewards, const int *template_lengths, const int q_len, const int kmersize, int *bests, int *Score, int *extendScore, char *include);
+KmerAnker * getBestChainTemplates(KmerAnker *src, const Penalties *rewards, const int *template_lengths, const int q_len, const int kmersize, const int mlen, int *bests, int *Score, int *extendScore, char *include);
+KmerAnker * getProxiChainTemplates(KmerAnker *src, const Penalties *rewards, const int *template_lengths, const int q_len, const int kmersize, const int mlen, int *bests, int *Score, int *extendScore, char *include);
 KmerAnker * pruneAnkers(KmerAnker *V_score, int kmersize);
 KmerAnker * getBestAnkerScore(KmerAnker **src, unsigned *ties, const int *template_lengths);
 KmerAnker * getBestAnkerScoreLen(KmerAnker **src, unsigned *ties, const int *template_lengths);


=====================================
kmers.c
=====================================
@@ -147,7 +147,7 @@ int save_kmers_batch(char *templatefilename, char *exePrev, unsigned shm, int th
 		getF((int *)(&minFrac), (int *)(softProxi), 0, 0, 0);
 		ankerAndClean((int *)(&minFrac), (int *)(softProxi), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 		ankerAndClean_MEM((int *)(&minFrac), (int *)(softProxi), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-		getProxiChainTemplates(0, (const Penalties *)(&minFrac), (int *)(softProxi), 0, 0, 0, 0, 0, 0);
+		getProxiChainTemplates(0, (const Penalties *)(&minFrac), (int *)(softProxi), 0, 0, 0, 0, 0, 0, 0);
 	}
 	template_lengths = 0;
 	if(kmerScan == &save_kmers_HMM || kmerScan == &save_kmers_chain || kmerScan == &save_kmers_sparse_chain) {


=====================================
runinput.c
=====================================
@@ -29,7 +29,29 @@
 void (*printFsa_ptr)(Qseqs*, Qseqs*, Qseqs*, CompDNA*, FILE*) = &printFsa;
 void (*printFsa_pair_ptr)(Qseqs*, Qseqs*, Qseqs*, Qseqs*, Qseqs*, Qseqs*, CompDNA*, FILE*) = &printFsa_pair;
 
-long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, int maxlen, char *trans, const double *prob, FILE *out) {
+unsigned hardmaskQ(unsigned char *seq, unsigned char *qual, int len, const int phredScale, int minQ) {
+	
+	unsigned n;
+	
+	if(!len || !minQ) {
+		return 0;
+	}
+	
+	minQ += phredScale;
+	n = 0;
+	--qual;
+	do {
+		if(*++qual < minQ) {
+			*seq = 4;
+			++n;
+		}
+		++seq;
+	} while(--len);
+	
+	return n;
+}
+
+long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minmaskQ, int minQ, int fiveClip, int threeClip, int minlen, int maxlen, char *trans, const double *prob, FILE *out) {
 	
 	int fileCounter, phredScale, phredCut, start, end;
 	unsigned FASTQ;
@@ -89,7 +111,7 @@ long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ
 					qual->len = end - start;
 					
 					/* print */
-					if(minlen <= qseq->len && minQ <= eQual(seq + start, qseq->len, minQ, prob - phredScale)) {
+					if(minlen <= qseq->len && qseq->len != hardmaskQ(qseq->seq + start, seq + start, qseq->len, phredScale, minmaskQ) && minQ <= eQual(seq + start, qseq->len, minQ, prob - phredScale)) {
 						/* dump seq */
 						qseq->seq += start;
 						qual->seq += start;
@@ -144,7 +166,7 @@ long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ
 	return count;
 }
 
-long unsigned run_input_PE(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out) {
+long unsigned run_input_PE(char **inputfiles, int fileCount, int minPhred, int minmaskQ, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out) {
 	
 	int fileCounter, phredScale, phredCut, start, start2, end;
 	unsigned FASTQ, FASTQ2;
@@ -185,6 +207,7 @@ long unsigned run_input_PE(char **inputfiles, int fileCount, int minPhred, int m
 		} else {
 			fprintf(stderr, "Inputfiles:\t%s %s\nAre in different format.\n", inputfiles[fileCounter-1], filename);
 			FASTQ = 0;
+			errno = 1;
 		}
 		
 		/* parse the file */
@@ -222,6 +245,10 @@ long unsigned run_input_PE(char **inputfiles, int fileCount, int minPhred, int m
 				*/
 				qseq->len = end - start;
 				qual->len = end - start;
+				if(qseq->len == hardmaskQ(qseq->seq + start, seq + start, qseq->len, phredScale, minmaskQ)) {
+					qseq->len = 0;
+					qual->len = 0;
+				}
 				eq1 = eQual(seq + start, qseq->len, minQ, prob - phredScale);
 				
 				/* trim reverse */
@@ -245,6 +272,10 @@ long unsigned run_input_PE(char **inputfiles, int fileCount, int minPhred, int m
 				*/
 				qseq2->len = end - start2;
 				qual2->len = end - start2;
+				if(qseq2->len == hardmaskQ(qseq2->seq + start, seq + start, qseq2->len, phredScale, minmaskQ)) {
+					qseq2->len = 0;
+					qual2->len = 0;
+				}
 				eq2 = eQual(seq + start2, qseq2->len, minQ, prob - phredScale);
 				
 				/* print */
@@ -344,7 +375,7 @@ long unsigned run_input_PE(char **inputfiles, int fileCount, int minPhred, int m
 	return count;
 }
 
-long unsigned run_input_INT(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out) {
+long unsigned run_input_INT(char **inputfiles, int fileCount, int minPhred, int minmaskQ, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out) {
 	
 	int fileCounter, phredScale, phredCut, start, start2, end;
 	unsigned FASTQ;
@@ -409,6 +440,10 @@ long unsigned run_input_INT(char **inputfiles, int fileCount, int minPhred, int
 				*/
 				qseq->len = end - start;
 				qual->len = end - start;
+				if(qseq->len == hardmaskQ(qseq->seq + start, seq + start, qseq->len, phredScale, minmaskQ)) {
+					qseq->len = 0;
+					qual->len = 0;
+				}
 				eq1 = eQual(seq + start, qseq->len, minQ, prob - phredScale);
 				
 				/* trim reverse */
@@ -431,6 +466,10 @@ long unsigned run_input_INT(char **inputfiles, int fileCount, int minPhred, int
 				*/
 				qseq2->len = end - start2;
 				qual2->len = end - start2;
+				if(qseq2->len == hardmaskQ(qseq2->seq + start, seq + start, qseq2->len, phredScale, minmaskQ)) {
+					qseq2->len = 0;
+					qual2->len = 0;
+				}
 				eq2 = eQual(seq + start2, qseq2->len, minQ, prob - phredScale);
 				
 				/* print */


=====================================
runinput.h
=====================================
@@ -23,9 +23,10 @@
 /* pointers determining how to deliver the input */
 extern void (*printFsa_ptr)(Qseqs*, Qseqs*, Qseqs*, CompDNA*, FILE*);
 extern void (*printFsa_pair_ptr)(Qseqs*, Qseqs*, Qseqs*, Qseqs*, Qseqs*, Qseqs*, CompDNA*, FILE*);
-long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, int maxlen, char *trans, const double *prob, FILE *out);
-long unsigned run_input_PE(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out);
-long unsigned run_input_INT(char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out);
+unsigned hardmaskQ(unsigned char *seq, unsigned char *qual, int len, const int phredScale, int minQ);
+long unsigned run_input(char **inputfiles, int fileCount, int minPhred, int minmaskQ, int minQ, int fiveClip, int threeClip, int minlen, int maxlen, char *trans, const double *prob, FILE *out);
+long unsigned run_input_PE(char **inputfiles, int fileCount, int minPhred, int minmaskQ, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out);
+long unsigned run_input_INT(char **inputfiles, int fileCount, int minPhred, int minmaskQ, int minQ, int fiveClip, int threeClip, int minlen, char *trans, const double *prob, FILE *out);
 void bootFsa(Qseqs *header, Qseqs *qseq, Qseqs *qual, CompDNA *compressor, FILE *out);
 void printFsa(Qseqs *header, Qseqs *qseq, Qseqs *qual, CompDNA *compressor, FILE *out);
 void printFsa_pair(Qseqs *header, Qseqs *qseq, Qseqs *qual, Qseqs *header_r, Qseqs *qseq_r, Qseqs *qual_r, CompDNA *compressor, FILE *out);


=====================================
runkma.c
=====================================
@@ -152,7 +152,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
 	AssemInfo *matrix;
 	AlnPoints *points;
 	NWmat *NWmatrices;
-	Assemble_thread *threads, *thread;
+	Assemble_thread *threads, *thread, *next;
 	Aln_thread *alnThreads, *alnThread;
 	HashMapCCI **templates_index;
 	
@@ -650,7 +650,12 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
 	aligned_assem->q = smalloc(aligned_assem->size);
 	
 	/* allocate matrcies for NW */
-	for(thread = threads; thread != 0; thread = thread->next) {
+	i = 1;
+	thread = threads;
+	threads = 0;
+	while(thread) {
+		next = thread->next;
+		
 		/* get remaining thread info */
 		aligned = smalloc(sizeof(Aln));
 		gap_align = smalloc(sizeof(Aln));
@@ -672,9 +677,36 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
 		if((errno = pthread_create(&thread->id, NULL, assembly_KMA_Ptr, thread))) {
 			fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
 			fprintf(stderr, "Will continue with %d threads.\n", i);
-			threads = thread->next;
-			free(thread);
+			
+			/* free tracebacks */
+			free(aligned->t);
+			free(aligned->s);
+			free(aligned->q);
+			free(aligned);
+			free(gap_align->t);
+			free(gap_align->s);
+			free(gap_align->q);
+			free(gap_align);
+			
+			/* free remaining nodes */
+			while(thread) {
+				next = thread->next;
+				NWmatrices = thread->NWmatrices;
+				free(NWmatrices->E);
+				free(NWmatrices->D[0]);
+				free(NWmatrices->P[0]);
+				free(NWmatrices);
+				destroyQseqs(thread->qseq);
+				destroyQseqs(thread->header);
+				seedPoint_free(thread->points);
+				thread = next;
+			}
+		} else {
+			thread->next = threads;
+			threads = thread;
+			++i;
 		}
+		thread = next;
 	}
 	
 	/* start main thread */
@@ -835,7 +867,7 @@ int runKMA(char *templatefilename, char *outputfilename, char *exePrev, int ConC
 	assembly_KMA_Ptr(thread);
 	for(thread = threads; thread != 0; thread = thread->next) {
 		/* join thread */
-		if((errno = pthread_join(thread->id, NULL))) {
+		if(thread->id && (errno = pthread_join(thread->id, NULL))) {
 			ERROR();
 		}
 	}


=====================================
savekmers.c
=====================================
@@ -546,12 +546,17 @@ int get_kmers_for_pair(const HashMapKMA *templates, const Penalties *rewards, in
 									++W1s;
 									Us += (gaps -1);
 								}
-							} else {
-								Ms += (gaps - 1);
+							} else if(mlen != kmersize) {
+								Ms += gaps;
 								++MMs;
 								/* unlikely deletion or random k-mer mismatch, 
 								unless minimizers or homopolymer compression,
 								assume better and go random zero score else. */
+							} else {
+								/* indel */
+								Ms += gaps;
+								++W1s;
+								Us += (kmersize - gaps);
 							}
 							
 							HIT = j;
@@ -568,20 +573,19 @@ int get_kmers_for_pair(const HashMapKMA *templates, const Penalties *rewards, in
 								}
 								HIT = j - 1;
 								last = values;
-								score = kmersize * M;
 								values_s = (short unsigned *) values;
 								n = SU ? *values_s : *values;
 								l = n + 1;
 								while(--l) {
 									template = SU ? *++values_s : *++values;
-									if(Scores[template] != 0) {
+									if(include[template]) {
 										gaps = HIT - extendScore[template];
 										if(gaps == 0) {
 											/* match */
-											Scores[template] += M;
+											score = M;
 										} else if(gaps == kmersize) {
 											/* snp */
-											Scores[template] += score + MM;
+											score = kmersize * M + MM;
 										} else if(kmersize < gaps) {
 											/* mismatch or insersion */
 											gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
@@ -597,18 +601,22 @@ int get_kmers_for_pair(const HashMapKMA *templates, const Penalties *rewards, in
 											
 											/* evaluate best option */
 											if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
-												Scores[template] += score + (mm * MM + m * M);
+												score = kmersize * M + (mm * MM + m * M);
 											} else {
-												Scores[template] += score + (W1 + (gaps - 1) * U);
+												score = kmersize * M + (W1 + (gaps - 1) * U);
 											}
-										} else {
-											Scores[template] += (gaps + 1) * M + MM;
+										} else if(mlen != kmersize) {
+											score = gaps * M + MM;
 											/* unlikely deletion or random k-mer mismatch, 
 											unless minimizers or homopolymer compression,
 											assume better and go random zero score else. */
+										} else {
+											/* indel */
+											score = gaps * M + (kmersize - gaps) * U + W1;
 										}
+										Score[template] += score;
 									} else {
-										Scores[template] = score;
+										Scores[template] = kmersize * M;
 										if(include[template] == 0) {
 											include[template] = 1;
 											bests[++*bests] = template;
@@ -1107,12 +1115,17 @@ int get_kmers_for_pair_pseoudoSparse(const HashMapKMA *templates, const Penaltie
 								++W1s;
 								Us += (gaps -1);
 							}
-						} else {
-							Ms += (gaps - 1);
+						} else if(mlen != kmersize) {
+							Ms += gaps;
 							++MMs;
 							/* unlikely deletion or random k-mer mismatch, 
 							unless minimizers or homopolymer compression,
 							assume better and go random zero score else. */
+						} else {
+							/* indel */
+							Ms += gaps;
+							++W1s;
+							Us += (kmersize - gaps);
 						}
 						
 						HIT = j;
@@ -1129,20 +1142,19 @@ int get_kmers_for_pair_pseoudoSparse(const HashMapKMA *templates, const Penaltie
 							}
 							HIT = j - 1;
 							last = values;
-							score = kmersize * M;
 							values_s = (short unsigned *) values;
 							n = SU ? *values_s : *values;
 							l = n + 1;
 							while(--l) {
 								template = SU ? *++values_s : *++values;
-								if(Scores[template] != 0) {
+								if(include[template]) {
 									gaps = HIT - extendScore[template];
 									if(gaps == 0) {
 										/* match */
-										Scores[template] += M;
+										score = M;
 									} else if(gaps == kmersize) {
 										/* snp */
-										Scores[template] += score + MM;
+										score = kmersize * M + MM;
 									} else if(kmersize < gaps) {
 										/* mismatch or insersion */
 										gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
@@ -1158,18 +1170,22 @@ int get_kmers_for_pair_pseoudoSparse(const HashMapKMA *templates, const Penaltie
 										
 										/* evaluate best option */
 										if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
-											Scores[template] += score + (mm * MM + m * M);
+											score = kmersize * M + (mm * MM + m * M);
 										} else {
-											Scores[template] += score + (W1 + (gaps - 1) * U);
+											score = kmersize * M + (W1 + (gaps - 1) * U);
 										}
-									} else {
-										Scores[template] += (gaps + 1) * M + MM;
+									} else if(mlen != kmersize) {
+										score = gaps * M + MM;
 										/* unlikely deletion or random k-mer mismatch, 
 										unless minimizers or homopolymer compression,
 										assume better and go random zero score else. */
+									} else {
+										/* indel */
+										score = gaps * M + (kmersize - gaps) * U + W1;
 									}
+									Scores[template] += score;
 								} else {
-									Scores[template] = score;
+									Scores[template] = kmersize * M;
 									if(include[template] == 0) {
 										include[template] = 1;
 										bests[++*bests] = template;
@@ -2248,12 +2264,17 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
 								++W1s;
 								Us += (gaps -1);
 							}
-						} else {
-							Ms += (gaps - 1);
+						} else if(mlen != kmersize) {
+							Ms += gaps;
 							++MMs;
 							/* unlikely deletion or random k-mer mismatch, 
 							unless minimizers or homopolymer compression,
 							assume better and go random zero score else. */
+						} else {
+							/* indel */
+							Ms += gaps;
+							++W1s;
+							Us += (kmersize - gaps);
 						}
 						
 						HIT = j;
@@ -2270,20 +2291,19 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
 							}
 							HIT = j - 1;
 							last = values;
-							score = kmersize * M;
 							values_s = (short unsigned *) values;
 							n = SU ? *values_s : *values;
 							l = n + 1;
 							while(--l) {
 								template = SU ? *++values_s : *++values;
-								if(Score[template] != 0) {
+								if(include[template]) {
 									gaps = HIT - extendScore[template];
 									if(gaps == 0) {
 										/* match */
-										Score[template] += M;
+										score = M;
 									} else if(gaps == kmersize) {
 										/* snp */
-										Score[template] += score + MM;
+										score = kmersize * M + MM;
 									} else if(kmersize < gaps) {
 										/* mismatch or insersion */
 										gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
@@ -2299,18 +2319,22 @@ int save_kmers_pseuodeSparse(const HashMapKMA *templates, const Penalties *rewar
 										
 										/* evaluate best option */
 										if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
-											Score[template] += score + (mm * MM + m * M);
+											score = kmersize * M + (mm * MM + m * M);
 										} else {
-											Score[template] += score + (W1 + (gaps - 1) * U);
+											score = kmersize * M + (W1 + (gaps - 1) * U);
 										}
-									} else {
-										Score[template] += (gaps + 1) * M + MM;
+									} else if(mlen != kmersize) {
+										score = gaps * M + MM;
 										/* unlikely deletion or random k-mer mismatch, 
 										unless minimizers or homopolymer compression,
 										assume better and go random zero score else. */
+									} else {
+										/* indel */
+										score = gaps * M + (kmersize - gaps) * U + W1;
 									}
+									Score[template] += score;
 								} else {
-									Score[template] = score;
+									Score[template] = kmersize * M;
 									if(include[template] == 0) {
 										include[template] = 1;
 										bestTemplates[++*bestTemplates] = template;
@@ -2419,8 +2443,8 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 	
 	int i, j, j_u, l, end, HIT, gaps, score, Ms, MMs, Us, W1s, W1, U, M, MM;
 	int template, hitCounter, bestScore, bestScore_r, kmersize, mPos, hLen;
-	int seqend, m, mm;
-	unsigned *values, *last, n, SU, shifter, cPos, iPos, mlen, flag;
+	int seqend, m, mm, mlen;
+	unsigned *values, *last, n, SU, shifter, cPos, iPos, flag;
 	short unsigned *values_s;
 	long unsigned mask, mmask, kmer, cmer, hmer, *seq;
 	char *include;
@@ -2529,14 +2553,19 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 								Ms += m;
 							} else {
 								++W1s;
-								Us += (gaps -1);
+								Us += (gaps - 1);
 							}
-						} else {
-							Ms += (gaps - 1);
+						} else if(mlen != kmersize) {
+							Ms += gaps;
 							++MMs;
 							/* unlikely deletion or random k-mer mismatch, 
 							unless minimizers or homopolymer compression,
 							assume better and go random zero score else. */
+						} else {
+							/* indel */
+							Ms += gaps;
+							++W1s;
+							Us += (kmersize - gaps);
 						}
 						
 						HIT = j;
@@ -2553,20 +2582,19 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 							}
 							HIT = j - 1;
 							last = values;
-							score = kmersize * M;
 							values_s = (short unsigned *) values;
 							n = SU ? *values_s : *values;
 							l = n + 1;
 							while(--l) {
 								template = SU ? *++values_s : *++values;
-								if(Score[template] != 0) {
+								if(include[template]) {
 									gaps = HIT - extendScore[template];
 									if(gaps == 0) {
 										/* match */
-										Score[template] += M;
-									} else if(gaps == kmersize) {
+										score = M;
+									} else if(mlen <= gaps && gaps <= kmersize) {
 										/* snp */
-										Score[template] += score + MM;
+										score = gaps * M + MM;
 									} else if(kmersize < gaps) {
 										/* mismatch or insersion */
 										gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
@@ -2582,18 +2610,43 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 										
 										/* evaluate best option */
 										if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
-											Score[template] += score + (mm * MM + m * M);
+											score = kmersize * M + (mm * MM + m * M);
 										} else {
-											Score[template] += score + (W1 + (gaps - 1) * U);
+											score = kmersize * M + (W1 + (gaps - 1) * U);
 										}
-									} else {
-										Score[template] += (gaps + 1) * M + MM;
+									} else if(mlen != kmersize) {
+										score = gaps * M + MM;
 										/* unlikely deletion or random k-mer mismatch, 
 										unless minimizers or homopolymer compression,
 										assume better and go random zero score else. */
+									} else {
+										/* indel */
+										score = gaps * M + (kmersize - gaps) * U + W1;
 									}
+									Score[template] += score;
 								} else {
-									Score[template] = score;
+									/* consider opening penalty */
+									/*
+									gaps = j;
+									if(gaps == 0) {
+										Score[template] = score;
+									} else {
+										if(gaps <= 2) {
+											mm = gaps;
+											m = 0;
+										} else {
+											mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+											mm = MAX(2, mm);
+											m = MIN(gaps - mm, kmersize);
+											m = MIN(m, mm);
+										}
+										// evaluate best option
+										open = ((W1 + (gaps - 1) * U) < Wl) ? Wl : (W1 + (gaps - 1) * U);
+										open = open < (mm * MM + m * M) ? (mm * MM + m * M) : open;
+										Score[template] = score + open;
+									}
+									*/
+									Score[template] = kmersize * M;
 									if(include[template] == 0) {
 										include[template] = 1;
 										bestTemplates[++*bestTemplates] = template;
@@ -2604,10 +2657,31 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 							last = values;
 							values_s = (short unsigned *) values;
 							n = SU ? *values_s : *values;
-							Ms = kmersize * M;
+							score = kmersize * M;
+							
+							/* consider opening penalty */
+							/*
+							gaps = j;
+							if(gaps > 0) {
+								if(gaps <= 2) {
+									mm = gaps;
+									m = 0;
+								} else {
+									mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+									mm = MAX(2, mm);
+									m = MIN(gaps - mm, kmersize);
+									m = MIN(m, mm);
+								}
+								// evaluate best option
+								open = ((W1 + (gaps - 1) * U) < Wl) ? Wl : (W1 + (gaps - 1) * U);
+								open = open < (mm * MM + m * M) ? (mm * MM + m * M) : open;
+								score += open;
+							}
+							*/
+							
 							for(l = 1; l <= n; ++l) {
 								template = SU ? *++values_s : *++values;
-								Score[template] = Ms;
+								Score[template] = score;
 								include[template] = 1;
 								bestTemplates[l] = template;
 							}
@@ -2636,16 +2710,42 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 				values_s = (short unsigned *) last;
 				l = (*values_s) + 1;
 				while(--l) {
-					Score[values_s[l]] += score;
+					Score[(template = values_s[l])] += score;
+					extendScore[template] = HIT;
 				}
 			} else {
 				l = (*last) + 1;
 				while(--l) {
-					Score[last[l]] += score;
+					Score[(template = last[l])] += score;
+					extendScore[template] = HIT;
 				}
 			}
+			
 			for(l = *bestTemplates; l != 0; --l) {
-				extendScore[(template = bestTemplates[l])] = 0;
+				template = bestTemplates[l];
+				/* consider closing penalties */
+				/*
+				HIT = qseq->seqlen - kmersize;
+				gaps = HIT - extendScore[template];
+				if(gaps > 0) {
+					if(gaps <= 2) {
+						mm = gaps;
+						m = 0;
+					} else {
+						mm = gaps / kmersize + (gaps % kmersize ? 1 : 0);
+						mm = MAX(2, mm);
+						m = MIN(gaps - mm, kmersize);
+						m = MIN(m, mm);
+					}
+					// evaluate best option
+					open = ((W1 + (gaps - 1) * U) < Wl) ? Wl : (W1 + (gaps - 1) * U);
+					open = open < (mm * MM + m * M) ? (mm * MM + m * M) : open;
+					Score[template] += open;
+				}
+				*/
+				
+				/* clear up */
+				extendScore[template] = 0;
 				include[template] = 0;
 				if(Score[template] < 0) {
 					Score[template] = 0;
@@ -2764,14 +2864,19 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 								Ms += m;
 							} else {
 								++W1s;
-								Us += (gaps -1);
+								Us += (gaps - 1);
 							}
-						} else {
-							Ms += (gaps - 1);
+						} else if(mlen != kmersize) {
+							Ms += gaps;
 							++MMs;
 							/* unlikely deletion or random k-mer mismatch, 
 							unless minimizers or homopolymer compression,
 							assume better and go random zero score else. */
+						} else {
+							/* indel */
+							Ms += gaps;
+							++W1s;
+							Us += (kmersize - gaps);
 						}
 						
 						HIT = j;
@@ -2788,20 +2893,19 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 							}
 							HIT = j - 1;
 							last = values;
-							score = kmersize * M;
 							values_s = (short unsigned *) values;
 							n = SU ? *values_s : *values;
 							l = n + 1;
 							while(--l) {
 								template = SU ? *++values_s : *++values;
-								if(Score_r[template] != 0) {
+								if(include[template]) {
 									gaps = HIT - extendScore[template];
 									if(gaps == 0) {
 										/* match */
-										Score_r[template] += M;
+										score = M;
 									} else if(gaps == kmersize) {
 										/* snp */
-										Score_r[template] += score + MM;
+										score = kmersize * M + MM;
 									} else if(kmersize < gaps) {
 										/* mismatch or insersion */
 										gaps -= (kmersize - 1); /* adjust for consecutive k-mer mismatches */
@@ -2817,18 +2921,22 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 										
 										/* evaluate best option */
 										if((W1 + (gaps - 1) * U) <= (mm * MM + m * M)) {
-											Score_r[template] += score + (mm * MM + m * M);
+											score = kmersize * M + (mm * MM + m * M);
 										} else {
-											Score_r[template] += score + (W1 + (gaps - 1) * U);
+											score = kmersize * M + (W1 + (gaps - 1) * U);
 										}
-									} else {
-										Score_r[template] += (gaps + 1) * M + MM;
+									} else if(mlen != kmersize) {
+										score = gaps * M + MM;
 										/* unlikely deletion or random k-mer mismatch, 
 										unless minimizers or homopolymer compression,
 										assume better and go random zero score else. */
+									} else {
+										/* indel */
+										score = gaps * M + (kmersize - gaps) * U + W1;
 									}
+									Score_r[template] += score;
 								} else {
-									Score_r[template] = score;
+									Score_r[template] = kmersize * M;
 									if(include[template] == 0) {
 										include[template] = 1;
 										bestTemplates_r[++*bestTemplates_r] = template;
@@ -2839,10 +2947,10 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 							last = values;
 							values_s = (short unsigned *) values;
 							n = SU ? *values_s : *values;
-							Ms = kmersize * M;
+							score = kmersize * M;
 							for(l = 1; l <= n; ++l) {
 								template = SU ? *++values_s : *++values;
-								Score_r[template] = Ms;
+								Score_r[template] = score;
 								include[template] = 1;
 								bestTemplates_r[l] = template;
 							}
@@ -2879,8 +2987,12 @@ int save_kmers(const HashMapKMA *templates, const Penalties *rewards, int *bestT
 					Score_r[last[l]] += score;
 				}
 			}
+			
 			for(l = *bestTemplates_r; l != 0; --l) {
-				extendScore[(template = bestTemplates_r[l])] = 0;
+				template = bestTemplates_r[l];
+				
+				/* clear up */
+				extendScore[template] = 0;
 				include[template] = 0;
 				if(Score_r[template] < 0) {
 					Score_r[template] = 0;
@@ -5423,6 +5535,8 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
 					if(gaps == -kmersize) {
 						/* Perfect match */
 						score += V_score->weight - (kmersize - 1) * M;
+					} else if(gaps == 0) {
+						score += V_score->weight + MM;
 					} else if(0 < gaps) {
 						/* mismatch or insersion */
 						if(gaps <= 2) {
@@ -5441,9 +5555,12 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
 						} else {
 							score += V_score->weight + (W1 + (gaps - 1) * U);
 						}
-					} else {
+					} else if(mlen != kmersize) {
 						/* snp */
-						score += V_score->weight + (gaps + 1) * M + MM;
+						score += V_score->weight + gaps * M + MM;
+					} else {
+						/* indel */
+						score += V_score->weight + gaps * M - (gaps + 1) * U + W1;
 					}
 					
 					/* verify extension */
@@ -5559,22 +5676,22 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
 	cStart_r = -1;
 	if(!best_score->score || !best_score_r->score) {
 		if(best_score->score) {
-			tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, bestTemplates, Score, extendScore, include);
+			tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bestTemplates, Score, extendScore, include);
 			cStart = tmp_score->start;
 			start = cStart;
 			len = best_score->end - start;
 			rc = 1;
 		} else {
-			tmp_score = getChainTemplates(best_score_r, rewards, template_lengths, qseq->seqlen, kmersize, bestTemplates_r, Score, extendScore, include);
+			tmp_score = getChainTemplates(best_score_r, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bestTemplates_r, Score, extendScore, include);
 			cStart_r = tmp_score->start;
 			start = cStart_r;
 			len = best_score_r->end - start;
 			rc = 2;
 		}
 	} else {
-		tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, bestTemplates, Score, extendScore, include);
+		tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bestTemplates, Score, extendScore, include);
 		cStart = tmp_score->start;
-		tmp_score = getChainTemplates(best_score_r, rewards, template_lengths, qseq->seqlen, kmersize, bestTemplates_r, Score, extendScore, include);
+		tmp_score = getChainTemplates(best_score_r, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bestTemplates_r, Score, extendScore, include);
 		cStart_r = tmp_score->start;;
 		rc = chooseChain(best_score, best_score_r, cStart, cStart_r, &start, &len);
 	}
@@ -5615,7 +5732,7 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
 						/* update best template candidates */
 						template = *bests;
 						*bests = 0;
-						getChainTemplates(V_score, rewards, template_lengths, qseq->seqlen, kmersize, bests, Score, extendScore, include);
+						getChainTemplates(V_score, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bests, Score, extendScore, include);
 						/* update bests */
 						*bestTemplates += *bests;
 						*bests = template;
@@ -5655,7 +5772,7 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
 						/* update best template candidates */
 						template = *bests;
 						*bests = 0;
-						getChainTemplates(V_score, rewards, template_lengths, qseq->seqlen, kmersize, bests, Score, extendScore, include);
+						getChainTemplates(V_score, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bests, Score, extendScore, include);
 						/* update bests */
 						*bestTemplates_r += *bests;
 						*bests = template;
@@ -5729,7 +5846,7 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
 		ties = 0;
 		rc = 0;
 		if(best_score) {
-			if(best_score->score && (tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, bestTemplates, Score, extendScore, include))) {
+			if(best_score->score && (tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bestTemplates, Score, extendScore, include))) {
 				cStart = tmp_score->start;
 				cover = queSeqmentTree(chainSeqments->root, cStart, best_score->end);
 				
@@ -5748,7 +5865,7 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
 			while(best_score && best_score->score == 0) {
 				/* find best anker */
 				if((best_score = getBestAnker(&VF_scores, &ties, template_lengths))) {
-					if(kmersize < best_score->score && (tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, bestTemplates, Score, extendScore, include))) {
+					if(kmersize < best_score->score && (tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bestTemplates, Score, extendScore, include))) {
 						/* check chain */
 						cStart = tmp_score->start;
 						cover = queSeqmentTree(chainSeqments->root, cStart, best_score->end);
@@ -5771,7 +5888,7 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
 		}
 		
 		if(best_score_r) {
-			if(best_score_r->score && (tmp_score = getChainTemplates(best_score_r, rewards, template_lengths, qseq->seqlen, kmersize, bestTemplates_r, Score, extendScore, include))) {
+			if(best_score_r->score && (tmp_score = getChainTemplates(best_score_r, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bestTemplates_r, Score, extendScore, include))) {
 				cStart_r = tmp_score->start;
 				cover = queSeqmentTree(chainSeqments->root, cStart_r, best_score_r->end);
 				//cover = queSeqmentTree(chainSeqments->root, qseq->seqlen - best_score_r->end, qseq->seqlen - cStart_r);
@@ -5791,7 +5908,7 @@ int save_kmers_chain(const HashMapKMA *templates, const Penalties *rewards, int
 			while(best_score_r && best_score_r->score == 0) {
 				/* find best anker */
 				if((best_score_r = getBestAnker(&VR_scores, &ties, template_lengths))) {
-					if(kmersize < best_score_r->score && (tmp_score = getChainTemplates(best_score_r, rewards, template_lengths, qseq->seqlen, kmersize, bestTemplates_r, Score, extendScore, include))) {
+					if(kmersize < best_score_r->score && (tmp_score = getChainTemplates(best_score_r, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bestTemplates_r, Score, extendScore, include))) {
 						/* check chain */
 						cStart_r = tmp_score->start;
 						cover = queSeqmentTree(chainSeqments->root, cStart_r, best_score_r->end);
@@ -6124,7 +6241,7 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
 								/* snp */
 								Ms += kmersize;
 								++MMs;
-							} else {//if(kmersize < gaps) {
+							} else {
 								if(last) {
 									/* update and link between ankers */
 									V_score->weight = Ms * M + MMs * MM + Us * U + W1s * W1;
@@ -6141,10 +6258,7 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
 								Us = 0;
 								W1s = 0;
 								++hitCounter;
-							} /*else {
-								// unlikely deletion or random k-mer mismatch, 
-								// assume better and go random zero score
-							}*/
+							}
 						} else {
 							if(last) {
 								/* update and link between ankers */
@@ -6258,9 +6372,12 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
 					} else {
 						score += V_score->weight + (W1 + (gaps - 1) * U);
 					}
-				} else {
+				} else if(mlen != kmersize) {
 					/* snp */
-					score += V_score->weight + (gaps + 1) * M + MM;
+					score += V_score->weight + gaps * M + MM;
+				} else {
+					/* indel */
+					score += V_score->weight + gaps * M - (gaps + 1) * U + W1;
 				}
 				
 				/* verify extension */
@@ -6357,7 +6474,7 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
 	and zero out ankers */
 	*include = SU;
 	*bestTemplates = 0;
-	tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, bestTemplates, Score, extendScore, include);
+	tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bestTemplates, Score, extendScore, include);
 	score = best_score->score;
 	start = tmp_score->start;
 	len = best_score->end - start;
@@ -6401,7 +6518,7 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
 					/* update best template candidates */
 					template = *bests;
 					*bests = 0;
-					getChainTemplates(V_score, rewards, template_lengths, qseq->seqlen, kmersize, bests, Score, extendScore, include);
+					getChainTemplates(V_score, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bests, Score, extendScore, include);
 					/* update */
 					*bestTemplates += *bests;
 					*bests = template;
@@ -6442,7 +6559,7 @@ int save_kmers_sparse_chain(const HashMapKMA *templates, const Penalties *reward
 		while(best_score->score == 0) {
 			/* find best anker */
 			if((best_score = getBestAnker(&VF_scores, &ties, template_lengths))) {
-				if(kmersize < best_score->score && (tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, bestTemplates, Score, extendScore, include))) {
+				if(kmersize < best_score->score && (tmp_score = getChainTemplates(best_score, rewards, template_lengths, qseq->seqlen, kmersize, mlen, bestTemplates, Score, extendScore, include))) {
 					/* check chain */
 					start = tmp_score->start;
 					cover = queSeqmentTree(chainSeqments->root, start, best_score->end);


=====================================
seqparse.c
=====================================
@@ -38,6 +38,7 @@ int openAndDetermine(FileBuff *inputfile, char *filename) {
 	} else {
 		openFileBuff(inputfile, filename, "rb");
 	}
+	inputfile->buffer[0] = 0;
 	if(buff_FileBuff(inputfile)) {
 		check = (short unsigned *) inputfile->buffer;
 		if(*check == 35615) {
@@ -47,14 +48,13 @@ int openAndDetermine(FileBuff *inputfile, char *filename) {
 		} else {
 			buffFileBuff = &buff_FileBuff;
 		}
-	} else {
-		inputfile->buffer[0] = 0;
 	}
-	
 	if(inputfile->buffer[0] == '@') { //FASTQ
 		FASTQ |= 1;
 	} else if(inputfile->buffer[0] == '>') { //FASTA
 		FASTQ |= 2;
+	} else if(inputfile->buffer[0] == 0) {
+		fprintf(stderr, "Empty file:\t%s\n", filename);
 	} else {
 		fprintf(stderr, "Cannot determine format of file:\t%s\n", filename);
 		errno |= 1;


=====================================
sparse.c
=====================================
@@ -241,7 +241,7 @@ void save_kmers_sparse(const HashMapKMA *templates, HashMap_kmers *foundKmers, C
 	}
 }
 
-void run_input_sparse(const HashMapKMA *templates, char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int kmersize, char *trans, const double *prob, FILE *out) {
+void run_input_sparse(const HashMapKMA *templates, char **inputfiles, int fileCount, int minPhred, int minmaskQ, int minQ, int fiveClip, int threeClip, int kmersize, char *trans, const double *prob, FILE *out) {
 	
 	int FASTQ, fileCounter, phredScale, phredCut, start, end;
 	char *filename;
@@ -289,7 +289,7 @@ void run_input_sparse(const HashMapKMA *templates, char **inputfiles, int fileCo
 				qseq->len = end - start;
 				
 				/* print */
-				if(qseq->len > kmersize && minQ <= eQual(seq + start, qseq->len, minQ, prob - phredScale)) {
+				if(qseq->len > kmersize && qseq->len != hardmaskQ(qseq->seq + start, seq + start, qseq->len, phredScale, minmaskQ) && minQ <= eQual(seq + start, qseq->len, minQ, prob - phredScale)) {
 					/* translate to kmers */
 					Kmers->n = translateToKmersAndDump(Kmers->kmers, Kmers->n, Kmers->size, qseq->seq + start, qseq->len, templates, out);
 				}
@@ -588,13 +588,13 @@ int save_kmers_sparse_batch(char *templatefilename, char *outputfilename, char *
 							score_tot_add += node->key;
 							if(prev == 0) {
 								deConTable = node->next;
-								free(node);
 								free(node->value);
+								free(node);
 								node = deConTable;
 							} else {
 								prev->next = node->next;
-								free(node);
 								free(node->value);
+								free(node);
 								node = prev->next;
 							}
 						} else {


=====================================
sparse.h
=====================================
@@ -24,5 +24,5 @@
 int translateToKmersAndDump(long unsigned *Kmers, int n, int max, unsigned char *qseq, int seqlen, const HashMapKMA *templates, FILE *out);
 char ** load_DBs_Sparse(char *templatefilename, unsigned **template_lengths, unsigned **template_ulengths, unsigned shm);
 void save_kmers_sparse(const HashMapKMA *templates, HashMap_kmers *foundKmers, CompKmers *compressor);
-void run_input_sparse(const HashMapKMA *templates, char **inputfiles, int fileCount, int minPhred, int minQ, int fiveClip, int threeClip, int kmersize, char *trans, const double *prob, FILE *out);
+void run_input_sparse(const HashMapKMA *templates, char **inputfiles, int fileCount, int minPhred, int minmaskQ, int minQ, int fiveClip, int threeClip, int kmersize, char *trans, const double *prob, FILE *out);
 int save_kmers_sparse_batch(char *templatefilename, char *outputfilename, char *exePrev, double ID_t, double Depth_t, double evalue, char ss, unsigned shm);


=====================================
spltdb.c
=====================================
@@ -739,9 +739,6 @@ int runKMA_spltDB(char **templatefilenames, int targetNum, char *outputfilename,
 							--uPtr;
 							--i;
 						} else {
-							if(targetInfo[i] == 0) {
-								*uPtr = get_ankers_spltDB(targetInfo[i], matched_templates, qseq_comp, header, &flag, inputfiles[i]);
-							}
 							*uPtr = get_ankers_spltDB(targetInfo[i], matched_templates, qseq_comp, header, &flag, inputfiles[i]);
 							--uPtr;
 							--i;


=====================================
stdstat.h
=====================================
@@ -20,6 +20,7 @@
 #define MIN(X, Y) ((X < Y) ? X : Y)
 #define MAX(X, Y) ((X < Y) ? Y : X)
 #define murmur(index, kmer) index = (3323198485ul ^ kmer) * 0x5bd1e995; index ^= index >> 15;
+#define murmur3(index, kmer) index = kmer * 0xcc9e2d51; index = (index << 15) | (index >> 17); index *= 0x1b873593; index = (index << 13) | (index >> 19); index ^= index >> 16; index *= 0x85ebca6b; index ^= index >> 13; index *= 0xc2b2ae35; index ^= index >> 16; 
 
 extern int (*cmp)(int, int);
 int cmp_or(int t, int q);


=====================================
trim.c
=====================================
@@ -136,6 +136,7 @@ static int helpMessage(FILE *out) {
 	fprintf(out, "# %16s\t%-32s\t%s\n", "-ml", "Minimum length", "16");
 	fprintf(out, "# %16s\t%-32s\t%s\n", "-xl", "Maximum length", "2147483647");
 	fprintf(out, "# %16s\t%-32s\t%s\n", "-mp", "Minimum phred", "20");
+	fprintf(out, "# %16s\t%-32s\t%s\n", "-mi", "Minimum internal phred score", "0");
 	fprintf(out, "# %16s\t%-32s\t%s\n", "-eq", "Minimum average quality", "0");
 	fprintf(out, "# %16s\t%-32s\t%s\n", "-5p", "Trim 5 prime", "0");
 	fprintf(out, "# %16s\t%-32s\t%s\n", "-3p", "Trim 3 prime", "0");
@@ -163,8 +164,8 @@ int trim_main(int argc, char *argv[]) {
 		0.0000000000398107, 0.0000000000316228, 0.0000000000251189, 0.0000000000199526, 0.0000000000158489, 0.0000000000125893, 0.0000000000100000, 0.0000000000079433,
 		0.0000000000063096, 0.0000000000050119, 0.0000000000039811, 0.0000000000031623, 0.0000000000025119, 0.0000000000019953, 0.0000000000015849, 0.0000000000012589,
 		0.0000000000010000, 0.0000000000007943, 0.0000000000006310, 0.0000000000005012, 0.0000000000003981, 0.0000000000003162, 0.0000000000002512, 0.0000000000001995};
-	int i, args, minPhred, minQ, fiveClip, threeClip, minlen, maxlen;
-	int fileCounter, fileCounter_PE, fileCounter_INT, fileCount;
+	int i, args, fileCounter, fileCounter_PE, fileCounter_INT, fileCount;
+	int minPhred, minmaskQ, minQ, fiveClip, threeClip, minlen, maxlen;
 	long unsigned totFrags;
 	char **inputfiles, **inputfiles_PE, **inputfiles_INT;
 	char *to2Bit, *outputfilename, *outputfilename_int, *exeBasic;
@@ -172,6 +173,7 @@ int trim_main(int argc, char *argv[]) {
 	
 	/* set defaults */
 	minPhred = 20;
+	minmaskQ = 0;
 	minQ = 0;
 	fiveClip = 0;
 	threeClip = 0;
@@ -291,6 +293,15 @@ int trim_main(int argc, char *argv[]) {
 					exit(1);
 				}
 			}
+		} else if(strcmp(argv[args], "-mi") == 0) {
+			++args;
+			if(args < argc) {
+				minmaskQ = strtoul(argv[args], &exeBasic, 10);
+				if(*exeBasic != 0) {
+					fprintf(stderr, "# Invalid internal minimum phred score parsed\n");
+					exit(1);
+				}
+			}
 		} else if(strcmp(argv[args], "-eq") == 0) {
 			++args;
 			if(args < argc) {
@@ -389,20 +400,23 @@ int trim_main(int argc, char *argv[]) {
 		inputfiles = smalloc(sizeof(char *));
 		*inputfiles = "--";
 	}
+	if(minPhred < minmaskQ) {
+		minPhred = minmaskQ;
+	}
 	
 	/* SE */
 	if(fileCounter > 0) {
-		totFrags += run_input(inputfiles, fileCounter, minPhred, minQ, fiveClip, threeClip, minlen, maxlen, to2Bit, prob, out);
+		totFrags += run_input(inputfiles, fileCounter, minPhred, minmaskQ, minQ, fiveClip, threeClip, minlen, maxlen, to2Bit, prob, out);
 	}
 	
 	/* PE */
 	if(fileCounter_PE > 0) {
-		totFrags += run_input_PE(inputfiles_PE, fileCounter_PE, minPhred, minQ, fiveClip, threeClip, minlen, to2Bit, prob, out);
+		totFrags += run_input_PE(inputfiles_PE, fileCounter_PE, minPhred, minmaskQ, minQ, fiveClip, threeClip, minlen, to2Bit, prob, out);
 	}
 	
 	/* INT */
 	if(fileCounter_INT > 0) {
-		totFrags += run_input_INT(inputfiles_INT, fileCounter_INT, minPhred, minQ, fiveClip, threeClip, minlen, to2Bit, prob, out);
+		totFrags += run_input_INT(inputfiles_INT, fileCounter_INT, minPhred, minmaskQ, minQ, fiveClip, threeClip, minlen, to2Bit, prob, out);
 	}
 	
 	fprintf(stderr, "#\n# Total number of query fragment after trimming:\t%lu\n", totFrags);


=====================================
version.h
=====================================
@@ -17,4 +17,4 @@
  * limitations under the License.
 */
 
-#define KMA_VERSION "1.4.11"
+#define KMA_VERSION "1.4.14"



View it on GitLab: https://salsa.debian.org/med-team/kma/-/commit/149fa09e1c71a0fcbc6851116e4ea40a00dcee2a

-- 
View it on GitLab: https://salsa.debian.org/med-team/kma/-/commit/149fa09e1c71a0fcbc6851116e4ea40a00dcee2a
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230823/0dc76e60/attachment-0001.htm>


More information about the debian-med-commit mailing list