[med-svn] [Git][med-team/seqtk][upstream] New upstream version 1.3

Andreas Tille gitlab at salsa.debian.org
Thu Sep 20 20:42:51 BST 2018


Andreas Tille pushed to branch upstream at Debian Med / seqtk


Commits:
ba538534 by Andreas Tille at 2018-09-20T19:34:08Z
New upstream version 1.3
- - - - -


6 changed files:

- Makefile
- kseq.h
- − ksort.h
- − kstring.h
- − kvec.h
- seqtk.c


Changes:

=====================================
Makefile
=====================================
@@ -1,10 +1,14 @@
 CC=gcc
 CFLAGS=-g -Wall -O2 -Wno-unused-function
+BINDIR=/usr/local/bin
 
 all:seqtk
 
 seqtk:seqtk.c khash.h kseq.h
 		$(CC) $(CFLAGS) seqtk.c -o $@ -lz -lm
 
+install:all
+		install seqtk $(BINDIR)
+
 clean:
 		rm -fr gmon.out *.o ext/*.o a.out seqtk trimadap *~ *.a *.dSYM session*


=====================================
kseq.h
=====================================
@@ -23,7 +23,7 @@
    SOFTWARE.
 */
 
-/* Last Modified: 05MAR2012 */
+/* Last Modified: 2017-02-11 */
 
 #ifndef AC_KSEQ_H
 #define AC_KSEQ_H
@@ -37,42 +37,45 @@
 #define KS_SEP_LINE  2 // line separator: "\n" (Unix) or "\r\n" (Windows)
 #define KS_SEP_MAX   2
 
-#define __KS_TYPE(type_t)						\
-	typedef struct __kstream_t {				\
-		unsigned char *buf;						\
-		int begin, end, is_eof;					\
-		type_t f;								\
+#define __KS_TYPE(type_t) \
+	typedef struct __kstream_t { \
+		unsigned char *buf; \
+		int begin, end, is_eof; \
+		type_t f; \
 	} kstream_t;
 
+#define ks_err(ks) ((ks)->end < 0)
 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
 
-#define __KS_BASIC(type_t, __bufsize)								\
-	static inline kstream_t *ks_init(type_t f)						\
-	{																\
-		kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));	\
-		ks->f = f;													\
-		ks->buf = (unsigned char*)malloc(__bufsize);				\
-		return ks;													\
-	}																\
-	static inline void ks_destroy(kstream_t *ks)					\
-	{																\
-		if (ks) {													\
-			free(ks->buf);											\
-			free(ks);												\
-		}															\
+#define __KS_BASIC(type_t, __bufsize) \
+	static inline kstream_t *ks_init(type_t f) \
+	{ \
+		kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
+		ks->f = f; \
+		ks->buf = (unsigned char*)malloc(__bufsize); \
+		return ks; \
+	} \
+	static inline void ks_destroy(kstream_t *ks) \
+	{ \
+		if (ks) { \
+			free(ks->buf); \
+			free(ks); \
+		} \
 	}
 
-#define __KS_GETC(__read, __bufsize)						\
-	static inline int ks_getc(kstream_t *ks)				\
-	{														\
-		if (ks->is_eof && ks->begin >= ks->end) return -1;	\
-		if (ks->begin >= ks->end) {							\
-			ks->begin = 0;									\
-			ks->end = __read(ks->f, ks->buf, __bufsize);	\
-			if (ks->end == 0) { ks->is_eof = 1; return -1;}	\
-		}													\
-		return (int)ks->buf[ks->begin++];					\
+#define __KS_GETC(__read, __bufsize) \
+	static inline int ks_getc(kstream_t *ks) \
+	{ \
+		if (ks_err(ks)) return -3; \
+		if (ks_eof(ks)) return -1; \
+		if (ks->begin >= ks->end) { \
+			ks->begin = 0; \
+			ks->end = __read(ks->f, ks->buf, __bufsize); \
+			if (ks->end == 0) { ks->is_eof = 1; return -1; } \
+			else if (ks->end < 0) { ks->is_eof = 1; return -3; } \
+		} \
+		return (int)ks->buf[ks->begin++]; \
 	}
 
 #ifndef KSTRING_T
@@ -87,140 +90,145 @@ typedef struct __kstring_t {
 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
 #endif
 
-#define __KS_GETUNTIL(__read, __bufsize)								\
+#define __KS_GETUNTIL(__read, __bufsize) \
 	static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
-	{																	\
-		int gotany = 0;													\
-		if (dret) *dret = 0;											\
-		str->l = append? str->l : 0;									\
-		for (;;) {														\
-			int i;														\
-			if (ks->begin >= ks->end) {									\
-				if (!ks->is_eof) {										\
-					ks->begin = 0;										\
-					ks->end = __read(ks->f, ks->buf, __bufsize);		\
-					if (ks->end == 0) { ks->is_eof = 1; break; }		\
-				} else break;											\
-			}															\
+	{ \
+		int gotany = 0; \
+		if (dret) *dret = 0; \
+		str->l = append? str->l : 0; \
+		for (;;) { \
+			int i; \
+			if (ks_err(ks)) return -3; \
+			if (ks->begin >= ks->end) { \
+				if (!ks->is_eof) { \
+					ks->begin = 0; \
+					ks->end = __read(ks->f, ks->buf, __bufsize); \
+					if (ks->end == 0) { ks->is_eof = 1; break; } \
+					if (ks->end == -1) { ks->is_eof = 1; return -3; } \
+				} else break; \
+			} \
 			if (delimiter == KS_SEP_LINE) { \
 				for (i = ks->begin; i < ks->end; ++i) \
 					if (ks->buf[i] == '\n') break; \
-			} else if (delimiter > KS_SEP_MAX) {						\
-				for (i = ks->begin; i < ks->end; ++i)					\
-					if (ks->buf[i] == delimiter) break;					\
-			} else if (delimiter == KS_SEP_SPACE) {						\
-				for (i = ks->begin; i < ks->end; ++i)					\
-					if (isspace(ks->buf[i])) break;						\
-			} else if (delimiter == KS_SEP_TAB) {						\
-				for (i = ks->begin; i < ks->end; ++i)					\
+			} else if (delimiter > KS_SEP_MAX) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (ks->buf[i] == delimiter) break; \
+			} else if (delimiter == KS_SEP_SPACE) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (isspace(ks->buf[i])) break; \
+			} else if (delimiter == KS_SEP_TAB) { \
+				for (i = ks->begin; i < ks->end; ++i) \
 					if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
-			} else i = 0; /* never come to here! */						\
-			if (str->m - str->l < (size_t)(i - ks->begin + 1)) {		\
-				str->m = str->l + (i - ks->begin) + 1;					\
-				kroundup32(str->m);										\
-				str->s = (char*)realloc(str->s, str->m);				\
-			}															\
-			gotany = 1;													\
+			} else i = 0; /* never come to here! */ \
+			if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
+				str->m = str->l + (i - ks->begin) + 1; \
+				kroundup32(str->m); \
+				str->s = (char*)realloc(str->s, str->m); \
+			} \
+			gotany = 1; \
 			memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
-			str->l = str->l + (i - ks->begin);							\
-			ks->begin = i + 1;											\
-			if (i < ks->end) {											\
-				if (dret) *dret = ks->buf[i];							\
-				break;													\
-			}															\
-		}																\
-		if (!gotany && ks_eof(ks)) return -1;							\
-		if (str->s == 0) {												\
-			str->m = 1;													\
-			str->s = (char*)calloc(1, 1);								\
+			str->l = str->l + (i - ks->begin); \
+			ks->begin = i + 1; \
+			if (i < ks->end) { \
+				if (dret) *dret = ks->buf[i]; \
+				break; \
+			} \
+		} \
+		if (!gotany && ks_eof(ks)) return -1; \
+		if (str->s == 0) { \
+			str->m = 1; \
+			str->s = (char*)calloc(1, 1); \
 		} else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
-		str->s[str->l] = '\0';											\
-		return str->l;													\
+		str->s[str->l] = '\0'; \
+		return str->l; \
 	} \
 	static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
 	{ return ks_getuntil2(ks, delimiter, str, dret, 0); }
 
 #define KSTREAM_INIT(type_t, __read, __bufsize) \
-	__KS_TYPE(type_t)							\
-	__KS_BASIC(type_t, __bufsize)				\
-	__KS_GETC(__read, __bufsize)				\
+	__KS_TYPE(type_t) \
+	__KS_BASIC(type_t, __bufsize) \
+	__KS_GETC(__read, __bufsize) \
 	__KS_GETUNTIL(__read, __bufsize)
 
 #define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
 
-#define __KSEQ_BASIC(SCOPE, type_t)										\
-	SCOPE kseq_t *kseq_init(type_t fd)									\
-	{																	\
-		kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));					\
-		s->f = ks_init(fd);												\
-		return s;														\
-	}																	\
-	SCOPE void kseq_destroy(kseq_t *ks)									\
-	{																	\
-		if (!ks) return;												\
+#define __KSEQ_BASIC(SCOPE, type_t) \
+	SCOPE kseq_t *kseq_init(type_t fd) \
+	{ \
+		kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
+		s->f = ks_init(fd); \
+		return s; \
+	} \
+	SCOPE void kseq_destroy(kseq_t *ks) \
+	{ \
+		if (!ks) return; \
 		free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
-		ks_destroy(ks->f);												\
-		free(ks);														\
+		ks_destroy(ks->f); \
+		free(ks); \
 	}
 
 /* Return value:
    >=0  length of the sequence (normal)
    -1   end-of-file
    -2   truncated quality string
+   -3   error reading stream
  */
 #define __KSEQ_READ(SCOPE) \
 	SCOPE int kseq_read(kseq_t *seq) \
 	{ \
-		int c; \
+		int c,r; \
 		kstream_t *ks = seq->f; \
 		if (seq->last_char == 0) { /* then jump to the next header line */ \
-			while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
-			if (c == -1) return -1; /* end of file */ \
+			while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '@'); \
+			if (c < 0) return c; /* end of file or error*/ \
 			seq->last_char = c; \
 		} /* else: the first header char has been read in the previous call */ \
 		seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
-		if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
+		if ((r=ks_getuntil(ks, 0, &seq->name, &c)) < 0) return r;  /* normal exit: EOF or error */ \
 		if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
 		if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
 			seq->seq.m = 256; \
 			seq->seq.s = (char*)malloc(seq->seq.m); \
 		} \
-		while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
+		while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '+' && c != '@') { \
 			if (c == '\n') continue; /* skip empty lines */ \
 			seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
 			ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
 		} \
-		if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */	\
+		if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
 		if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
 			seq->seq.m = seq->seq.l + 2; \
 			kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
 			seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
 		} \
 		seq->seq.s[seq->seq.l] = 0;	/* null terminated string */ \
-		if (c != '+') return seq->seq.l; /* FASTA */ \
+		seq->is_fastq = (c == '+'); \
+		if (!seq->is_fastq) return seq->seq.l; /* FASTA */ \
 		if (seq->qual.m < seq->seq.m) {	/* allocate memory for qual in case insufficient */ \
 			seq->qual.m = seq->seq.m; \
 			seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
 		} \
-		while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
+		while ((c = ks_getc(ks)) >= 0 && c != '\n'); /* skip the rest of '+' line */ \
 		if (c == -1) return -2; /* error: no quality string */ \
-		while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
+		while ((c = ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l)); \
+		if (c == -3) return -3; /* stream error */ \
 		seq->last_char = 0;	/* we have not come to the next header line */ \
 		if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
 		return seq->seq.l; \
 	}
 
-#define __KSEQ_TYPE(type_t)						\
-	typedef struct {							\
-		kstring_t name, comment, seq, qual;		\
-		int last_char;							\
-		kstream_t *f;							\
+#define __KSEQ_TYPE(type_t) \
+	typedef struct { \
+		kstring_t name, comment, seq, qual; \
+		int last_char, is_fastq; \
+		kstream_t *f; \
 	} kseq_t;
 
-#define KSEQ_INIT2(SCOPE, type_t, __read)		\
-	KSTREAM_INIT(type_t, __read, 16384)			\
-	__KSEQ_TYPE(type_t)							\
-	__KSEQ_BASIC(SCOPE, type_t)					\
+#define KSEQ_INIT2(SCOPE, type_t, __read) \
+	KSTREAM_INIT(type_t, __read, 16384) \
+	__KSEQ_TYPE(type_t) \
+	__KSEQ_BASIC(SCOPE, type_t) \
 	__KSEQ_READ(SCOPE)
 
 #define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)


=====================================
ksort.h deleted
=====================================
@@ -1,298 +0,0 @@
-/* The MIT License
-
-   Copyright (c) 2008, 2011 Attractive Chaos <attractor at live.co.uk>
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-/*
-  2011-04-10 (0.1.6):
-
-  	* Added sample
-
-  2011-03 (0.1.5):
-
-	* Added shuffle/permutation
-
-  2008-11-16 (0.1.4):
-
-    * Fixed a bug in introsort() that happens in rare cases.
-
-  2008-11-05 (0.1.3):
-
-    * Fixed a bug in introsort() for complex comparisons.
-
-	* Fixed a bug in mergesort(). The previous version is not stable.
-
-  2008-09-15 (0.1.2):
-
-	* Accelerated introsort. On my Mac (not on another Linux machine),
-	  my implementation is as fast as std::sort on random input.
-
-	* Added combsort and in introsort, switch to combsort if the
-	  recursion is too deep.
-
-  2008-09-13 (0.1.1):
-
-	* Added k-small algorithm
-
-  2008-09-05 (0.1.0):
-
-	* Initial version
-
-*/
-
-#ifndef AC_KSORT_H
-#define AC_KSORT_H
-
-#include <stdlib.h>
-#include <string.h>
-
-typedef struct {
-	void *left, *right;
-	int depth;
-} ks_isort_stack_t;
-
-#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
-
-#define KSORT_INIT(name, type_t, __sort_lt)								\
-	void ks_mergesort_##name(size_t n, type_t array[], type_t temp[])	\
-	{																	\
-		type_t *a2[2], *a, *b;											\
-		int curr, shift;												\
-																		\
-		a2[0] = array;													\
-		a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n);		\
-		for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) {			\
-			a = a2[curr]; b = a2[1-curr];								\
-			if (shift == 0) {											\
-				type_t *p = b, *i, *eb = a + n;							\
-				for (i = a; i < eb; i += 2) {							\
-					if (i == eb - 1) *p++ = *i;							\
-					else {												\
-						if (__sort_lt(*(i+1), *i)) {					\
-							*p++ = *(i+1); *p++ = *i;					\
-						} else {										\
-							*p++ = *i; *p++ = *(i+1);					\
-						}												\
-					}													\
-				}														\
-			} else {													\
-				size_t i, step = 1ul<<shift;							\
-				for (i = 0; i < n; i += step<<1) {						\
-					type_t *p, *j, *k, *ea, *eb;						\
-					if (n < i + step) {									\
-						ea = a + n; eb = a;								\
-					} else {											\
-						ea = a + i + step;								\
-						eb = a + (n < i + (step<<1)? n : i + (step<<1)); \
-					}													\
-					j = a + i; k = a + i + step; p = b + i;				\
-					while (j < ea && k < eb) {							\
-						if (__sort_lt(*k, *j)) *p++ = *k++;				\
-						else *p++ = *j++;								\
-					}													\
-					while (j < ea) *p++ = *j++;							\
-					while (k < eb) *p++ = *k++;							\
-				}														\
-			}															\
-			curr = 1 - curr;											\
-		}																\
-		if (curr == 1) {												\
-			type_t *p = a2[0], *i = a2[1], *eb = array + n;				\
-			for (; p < eb; ++i) *p++ = *i;								\
-		}																\
-		if (temp == 0) free(a2[1]);										\
-	}																	\
-	void ks_heapadjust_##name(size_t i, size_t n, type_t l[])			\
-	{																	\
-		size_t k = i;													\
-		type_t tmp = l[i];												\
-		while ((k = (k << 1) + 1) < n) {								\
-			if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k;				\
-			if (__sort_lt(l[k], tmp)) break;							\
-			l[i] = l[k]; i = k;											\
-		}																\
-		l[i] = tmp;														\
-	}																	\
-	void ks_heapmake_##name(size_t lsize, type_t l[])					\
-	{																	\
-		size_t i;														\
-		for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i)				\
-			ks_heapadjust_##name(i, lsize, l);							\
-	}																	\
-	void ks_heapsort_##name(size_t lsize, type_t l[])					\
-	{																	\
-		size_t i;														\
-		for (i = lsize - 1; i > 0; --i) {								\
-			type_t tmp;													\
-			tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \
-		}																\
-	}																	\
-	static inline void __ks_insertsort_##name(type_t *s, type_t *t)			\
-	{																	\
-		type_t *i, *j, swap_tmp;										\
-		for (i = s + 1; i < t; ++i)										\
-			for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) {			\
-				swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp;			\
-			}															\
-	}																	\
-	void ks_combsort_##name(size_t n, type_t a[])						\
-	{																	\
-		const double shrink_factor = 1.2473309501039786540366528676643; \
-		int do_swap;													\
-		size_t gap = n;													\
-		type_t tmp, *i, *j;												\
-		do {															\
-			if (gap > 2) {												\
-				gap = (size_t)(gap / shrink_factor);					\
-				if (gap == 9 || gap == 10) gap = 11;					\
-			}															\
-			do_swap = 0;												\
-			for (i = a; i < a + n - gap; ++i) {							\
-				j = i + gap;											\
-				if (__sort_lt(*j, *i)) {								\
-					tmp = *i; *i = *j; *j = tmp;						\
-					do_swap = 1;										\
-				}														\
-			}															\
-		} while (do_swap || gap > 2);									\
-		if (gap != 1) __ks_insertsort_##name(a, a + n);					\
-	}																	\
-	void ks_introsort_##name(size_t n, type_t a[])						\
-	{																	\
-		int d;															\
-		ks_isort_stack_t *top, *stack;									\
-		type_t rp, swap_tmp;											\
-		type_t *s, *t, *i, *j, *k;										\
-																		\
-		if (n < 1) return;												\
-		else if (n == 2) {												\
-			if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \
-			return;														\
-		}																\
-		for (d = 2; 1ul<<d < n; ++d);									\
-		stack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \
-		top = stack; s = a; t = a + (n-1); d <<= 1;						\
-		while (1) {														\
-			if (s < t) {												\
-				if (--d == 0) {											\
-					ks_combsort_##name(t - s + 1, s);					\
-					t = s;												\
-					continue;											\
-				}														\
-				i = s; j = t; k = i + ((j-i)>>1) + 1;					\
-				if (__sort_lt(*k, *i)) {								\
-					if (__sort_lt(*k, *j)) k = j;						\
-				} else k = __sort_lt(*j, *i)? i : j;					\
-				rp = *k;												\
-				if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; }	\
-				for (;;) {												\
-					do ++i; while (__sort_lt(*i, rp));					\
-					do --j; while (i <= j && __sort_lt(rp, *j));		\
-					if (j <= i) break;									\
-					swap_tmp = *i; *i = *j; *j = swap_tmp;				\
-				}														\
-				swap_tmp = *i; *i = *t; *t = swap_tmp;					\
-				if (i-s > t-i) {										\
-					if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \
-					s = t-i > 16? i+1 : t;								\
-				} else {												\
-					if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \
-					t = i-s > 16? i-1 : s;								\
-				}														\
-			} else {													\
-				if (top == stack) {										\
-					free(stack);										\
-					__ks_insertsort_##name(a, a+n);						\
-					return;												\
-				} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \
-			}															\
-		}																\
-	}																	\
-	/* This function is adapted from: http://ndevilla.free.fr/median/ */ \
-	/* 0 <= kk < n */													\
-	type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)			\
-	{																	\
-		type_t *low, *high, *k, *ll, *hh, *mid;							\
-		low = arr; high = arr + n - 1; k = arr + kk;					\
-		for (;;) {														\
-			if (high <= low) return *k;									\
-			if (high == low + 1) {										\
-				if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
-				return *k;												\
-			}															\
-			mid = low + (high - low) / 2;								\
-			if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
-			if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
-			if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);	\
-			KSORT_SWAP(type_t, *mid, *(low+1));							\
-			ll = low + 1; hh = high;									\
-			for (;;) {													\
-				do ++ll; while (__sort_lt(*ll, *low));					\
-				do --hh; while (__sort_lt(*low, *hh));					\
-				if (hh < ll) break;										\
-				KSORT_SWAP(type_t, *ll, *hh);							\
-			}															\
-			KSORT_SWAP(type_t, *low, *hh);								\
-			if (hh <= k) low = ll;										\
-			if (hh >= k) high = hh - 1;									\
-		}																\
-	}																	\
-	void ks_shuffle_##name(size_t n, type_t a[])						\
-	{																	\
-		int i, j;														\
-		for (i = n; i > 1; --i) {										\
-			type_t tmp;													\
-			j = (int)(drand48() * i);									\
-			tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp;					\
-		}																\
-	}																	\
-	void ks_sample_##name(size_t n, size_t r, type_t a[]) /* FIXME: NOT TESTED!!! */ \
-	{ /* reference: http://code.activestate.com/recipes/272884/ */ \
-		int i, k, pop = n; \
-		for (i = (int)r, k = 0; i >= 0; --i) { \
-			double z = 1., x = drand48(); \
-			type_t tmp; \
-			while (x < z) z -= z * i / (pop--); \
-			if (k != n - pop - 1) tmp = a[k], a[k] = a[n-pop-1], a[n-pop-1] = tmp; \
-			++k; \
-		} \
-	}
-
-#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)
-#define ks_introsort(name, n, a) ks_introsort_##name(n, a)
-#define ks_combsort(name, n, a) ks_combsort_##name(n, a)
-#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)
-#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)
-#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)
-#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
-#define ks_shuffle(name, n, a) ks_shuffle_##name(n, a)
-
-#define ks_lt_generic(a, b) ((a) < (b))
-#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
-
-typedef const char *ksstr_t;
-
-#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
-#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
-
-#endif


=====================================
kstring.h deleted
=====================================
@@ -1,169 +0,0 @@
-/* The MIT License
-
-   Copyright (c) by Attractive Chaos <attractor at live.co.uk> 
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-#ifndef KSTRING_H
-#define KSTRING_H
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-
-#ifndef kroundup32
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-#endif
-
-#ifndef KSTRING_T
-#define KSTRING_T kstring_t
-typedef struct __kstring_t {
-	uint32_t l, m;
-	char *s;
-} kstring_t;
-#endif
-
-typedef struct {
-	uint64_t tab[4];
-	int sep, finished;
-	const char *p; // end of the current token
-} ks_tokaux_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-	int ksprintf(kstring_t *s, const char *fmt, ...);
-	int ksprintf_fast(kstring_t *s, const char *fmt, ...);
-	int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
-	char *kstrstr(const char *str, const char *pat, int **_prep);
-	char *kstrnstr(const char *str, const char *pat, int n, int **_prep);
-	void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep);
-
-	/* kstrtok() is similar to strtok_r() except that str is not
-	 * modified and both str and sep can be NULL. For efficiency, it is
-	 * actually recommended to set both to NULL in the subsequent calls
-	 * if sep is not changed. */
-	char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux);
-
-#ifdef __cplusplus
-}
-#endif
-
-static inline void ks_resize(kstring_t *s, size_t size)
-{
-	if (s->m < size) {
-		s->m = size;
-		kroundup32(s->m);
-		s->s = (char*)realloc(s->s, s->m);
-	}
-}
-
-static inline int kputsn(const char *p, int l, kstring_t *s)
-{
-	if (s->l + l + 1 >= s->m) {
-		s->m = s->l + l + 2;
-		kroundup32(s->m);
-		s->s = (char*)realloc(s->s, s->m);
-	}
-	memcpy(s->s + s->l, p, l);
-	s->l += l;
-	s->s[s->l] = 0;
-	return l;
-}
-
-static inline int kputs(const char *p, kstring_t *s)
-{
-	return kputsn(p, strlen(p), s);
-}
-
-static inline int kputc(int c, kstring_t *s)
-{
-	if (s->l + 1 >= s->m) {
-		s->m = s->l + 2;
-		kroundup32(s->m);
-		s->s = (char*)realloc(s->s, s->m);
-	}
-	s->s[s->l++] = c;
-	s->s[s->l] = 0;
-	return c;
-}
-
-static inline int kputw(int c, kstring_t *s)
-{
-	char buf[16];
-	int l, x;
-	if (c == 0) return kputc('0', s);
-	for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
-	if (c < 0) buf[l++] = '-';
-	if (s->l + l + 1 >= s->m) {
-		s->m = s->l + l + 2;
-		kroundup32(s->m);
-		s->s = (char*)realloc(s->s, s->m);
-	}
-	for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
-	s->s[s->l] = 0;
-	return 0;
-}
-
-static inline int kputuw(unsigned c, kstring_t *s)
-{
-	char buf[16];
-	int l, i;
-	unsigned x;
-	if (c == 0) return kputc('0', s);
-	for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0';
-	if (s->l + l + 1 >= s->m) {
-		s->m = s->l + l + 2;
-		kroundup32(s->m);
-		s->s = (char*)realloc(s->s, s->m);
-	}
-	for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
-	s->s[s->l] = 0;
-	return 0;
-}
-
-static inline int kputl(long c, kstring_t *s)
-{
-	char buf[32];
-	long l, x;
-	if (c == 0) return kputc('0', s);
-	for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
-	if (c < 0) buf[l++] = '-';
-	if (s->l + l + 1 >= s->m) {
-		s->m = s->l + l + 2;
-		kroundup32(s->m);
-		s->s = (char*)realloc(s->s, s->m);
-	}
-	for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
-	s->s[s->l] = 0;
-	return 0;
-}
-
-static inline int *ksplit(kstring_t *s, int delimiter, int *n)
-{
-	int max = 0, *offsets = 0;
-	*n = ksplit_core(s->s, delimiter, &max, &offsets);
-	return offsets;
-}
-
-#endif


=====================================
kvec.h deleted
=====================================
@@ -1,90 +0,0 @@
-/* The MIT License
-
-   Copyright (c) 2008, by Attractive Chaos <attractor at live.co.uk>
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-/*
-  An example:
-
-#include "kvec.h"
-int main() {
-	kvec_t(int) array;
-	kv_init(array);
-	kv_push(int, array, 10); // append
-	kv_a(int, array, 20) = 5; // dynamic
-	kv_A(array, 20) = 4; // static
-	kv_destroy(array);
-	return 0;
-}
-*/
-
-/*
-  2008-09-22 (0.1.0):
-
-	* The initial version.
-
-*/
-
-#ifndef AC_KVEC_H
-#define AC_KVEC_H
-
-#include <stdlib.h>
-
-#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-
-#define kvec_t(type) struct { size_t n, m; type *a; }
-#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0)
-#define kv_destroy(v) free((v).a)
-#define kv_A(v, i) ((v).a[(i)])
-#define kv_pop(v) ((v).a[--(v).n])
-#define kv_size(v) ((v).n)
-#define kv_max(v) ((v).m)
-
-#define kv_resize(type, v, s)  ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m))
-
-#define kv_copy(type, v1, v0) do {							\
-		if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n);	\
-		(v1).n = (v0).n;									\
-		memcpy((v1).a, (v0).a, sizeof(type) * (v0).n);		\
-	} while (0)												\
-
-#define kv_push(type, v, x) do {									\
-		if ((v).n == (v).m) {										\
-			(v).m = (v).m? (v).m<<1 : 2;							\
-			(v).a = (type*)realloc((v).a, sizeof(type) * (v).m);	\
-		}															\
-		(v).a[(v).n++] = (x);										\
-	} while (0)
-
-#define kv_pushp(type, v) (((v).n == (v).m)?							\
-						   ((v).m = ((v).m? (v).m<<1 : 2),				\
-							(v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0)	\
-						   : 0), ((v).a + ((v).n++))
-
-#define kv_a(type, v, i) (((v).m <= (size_t)(i)? \
-						  ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \
-						   (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
-						  : (v).n <= (size_t)(i)? (v).n = (i) + 1 \
-						  : 0), (v).a[(i)])
-
-#endif


=====================================
seqtk.c
=====================================
@@ -1,6 +1,6 @@
 /* The MIT License
 
-   Copyright (c) 20082-2012 by Heng Li <lh3 at me.com>
+   Copyright (c) 2008-2016 Broad Institute
 
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
@@ -57,9 +57,10 @@ reghash_t *stk_reg_read(const char *fn)
 	int dret;
 	kstring_t *str;
 	// read the list
-	str = calloc(1, sizeof(kstring_t));
 	fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) return 0;
 	ks = ks_init(fp);
+	str = calloc(1, sizeof(kstring_t));
 	while (ks_getuntil(ks, 0, str, &dret) >= 0) {
 		int beg = -1, end = -1;
 		reglist_t *p;
@@ -298,10 +299,15 @@ int stk_trimfq(int argc, char *argv[])
 		fprintf(stderr, "         -b INT      trim INT bp from left (non-zero to disable -q/-l) [0]\n");
 		fprintf(stderr, "         -e INT      trim INT bp from right (non-zero to disable -q/-l) [0]\n");
 		fprintf(stderr, "         -L INT      retain at most INT bp from the 5'-end (non-zero to disable -q/-l) [0]\n");
+		fprintf(stderr, "         -Q          force FASTQ output\n");
 		fprintf(stderr, "\n");
 		return 1;
 	}
 	fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	for (i = 0; i < 128; ++i)
 		q_int2real[i] = pow(10., -(i - 33) / 10.);
@@ -336,12 +342,12 @@ int stk_trimfq(int argc, char *argv[])
 				end = beg + min_len;
 			}
 		} else beg = 0, end = seq->seq.l;
-		putchar(seq->qual.l? '@' : '>'); fputs(seq->name.s, stdout); 
+		putchar(seq->is_fastq? '@' : '>'); fputs(seq->name.s, stdout); 
 		if (seq->comment.l) {
 			putchar(' '); puts(seq->comment.s);
 		} else putchar('\n');
 		fwrite(seq->seq.s + beg, 1, end - beg, stdout); putchar('\n');
-		if (seq->qual.l) {
+		if (seq->is_fastq) {
 			puts("+");
 			fwrite(seq->qual.s + beg, 1, end - beg, stdout); putchar('\n');
 		}
@@ -372,6 +378,10 @@ int stk_comp(int argc, char *argv[])
 		return 1;
 	}
 	fp = optind < argc && strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	dummy.n= dummy.m = 1; dummy.a = calloc(1, 8);
 	while ((l = kseq_read(seq)) >= 0) {
@@ -437,6 +447,10 @@ int stk_randbase(int argc, char *argv[])
 		return 1;
 	}
 	fp = (strcmp(argv[1], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[1], "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	while ((l = kseq_read(seq)) >= 0) {
 		int i;
@@ -489,6 +503,10 @@ int stk_hety(int argc, char *argv[])
 		}
 	}
 	fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	win_step = win_size / n_start;
 	buf = calloc(win_size, 1);
@@ -547,8 +565,16 @@ int stk_subseq(int argc, char *argv[])
 		return 1;
 	}
 	h = stk_reg_read(argv[optind+1]);
+	if (h == 0) {
+		fprintf(stderr, "[E::%s] failed to read the list of regions in file '%s'\n", __func__, argv[optind+1]);
+		return 1;
+	}
 	// subseq
 	fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	while ((l = kseq_read(seq)) >= 0) {
 		reglist_t *p;
@@ -626,6 +652,10 @@ int stk_mergefa(int argc, char *argv[])
 		fp[i] = strcmp(argv[optind+i], "-")? gzopen(argv[optind+i], "r") : gzdopen(fileno(stdin), "r");
 		seq[i] = kseq_init(fp[i]);
 	}
+	if (fp[0] == 0 || fp[1] == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	cnt[0] = cnt[1] = cnt[2] = cnt[3] = cnt[4] = 0;
 	srand48(11);
 	while (kseq_read(seq[0]) >= 0) {
@@ -707,6 +737,10 @@ int stk_famask(int argc, char *argv[])
 		fp[i] = strcmp(argv[optind+i], "-")? gzopen(argv[optind+i], "r") : gzdopen(fileno(stdin), "r");
 		seq[i] = kseq_init(fp[i]);
 	}
+	if (fp[0] == 0 || fp[1] == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	while (kseq_read(seq[0]) >= 0) {
 		int min_l, c[2];
 		kseq_read(seq[1]);
@@ -746,6 +780,10 @@ int stk_mutfa(int argc, char *argv[])
 	// read the list
 	str = calloc(1, sizeof(kstring_t));
 	fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	ks = ks_init(fp);
 	while (ks_getuntil(ks, 0, str, &dret) >= 0) {
 		char *s = strdup(str->s);
@@ -775,6 +813,10 @@ int stk_mutfa(int argc, char *argv[])
 	free(str->s); free(str);
 	// mutfa
 	fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	while ((l = kseq_read(seq)) >= 0) {
 		reglist_t *p;
@@ -817,6 +859,10 @@ int stk_listhet(int argc, char *argv[])
 		return 1;
 	}
 	fp = (strcmp(argv[1], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[1], "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	while ((l = kseq_read(seq)) >= 0) {
 		for (i = 0; i < l; ++i) {
@@ -902,6 +948,10 @@ int stk_cutN(int argc, char *argv[])
 		return 1;
 	}
 	fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	ks = kseq_init(fp);
 	while ((l = kseq_read(ks)) >= 0) {
 		int k = 0, begin = 0, end = 0;
@@ -930,6 +980,10 @@ int stk_hrun(int argc, char *argv[])
 	}
 	if (argc == optind + 2) min_len = atoi(argv[optind+1]);
 	fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	ks = kseq_init(fp);
 	while (kseq_read(ks) >= 0) {
 		c = ks->seq.s[0]; l = 1; beg = 0;
@@ -989,7 +1043,7 @@ int stk_sample(int argc, char *argv[])
 		return 1;
 	}
 	frac = atof(argv[optind+1]);
-	if (frac > 1.) num = (uint64_t)(frac + .499), frac = 0.;
+	if (frac >= 1.0) num = (uint64_t)(frac + .499), frac = 0.;
 	else if (twopass) {
 		fprintf(stderr, "[W::%s] when sampling a fraction, option -2 is ignored.", __func__);
 		twopass = 0;
@@ -1006,6 +1060,10 @@ int stk_sample(int argc, char *argv[])
 		}
 
 		fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+		if (fp == 0) {
+			fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+			return 1;
+		}
 		seq = kseq_init(fp);
 		n_seqs = 0;
 		while (kseq_read(seq) >= 0) {
@@ -1037,6 +1095,10 @@ int stk_sample(int argc, char *argv[])
 		buf = malloc(num * 8);
 		for (i = 0; i < num; ++i) buf[i] = UINT64_MAX;
 		fp = gzopen(argv[optind], "r");
+		if (fp == 0) {
+			fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+			return 1;
+		}
 		seq = kseq_init(fp);
 		n_seqs = 0;
 		while (kseq_read(seq) >= 0) {
@@ -1117,14 +1179,14 @@ int stk_seq(int argc, char *argv[])
 {
 	gzFile fp;
 	kseq_t *seq;
-	int c, qual_thres = 0, flag = 0, qual_shift = 33, mask_chr = 0, min_len = 0, max_q = 255;
+	int c, qual_thres = 0, flag = 0, qual_shift = 33, mask_chr = 0, min_len = 0, max_q = 255, fake_qual = -1;
 	unsigned i, line_len = 0;
 	int64_t n_seqs = 0;
 	double frac = 1.;
 	khash_t(reg) *h = 0;
 	krand_t *kr = 0;
 
-	while ((c = getopt(argc, argv, "N12q:l:Q:aACrn:s:f:M:L:cVUX:S")) >= 0) {
+	while ((c = getopt(argc, argv, "N12q:l:Q:aACrn:s:f:M:L:cVUX:SF:")) >= 0) {
 		switch (c) {
 			case 'a':
 			case 'A': flag |= 1; break;
@@ -1146,6 +1208,7 @@ int stk_seq(int argc, char *argv[])
 			case 'L': min_len = atoi(optarg); break;
 			case 's': kr = kr_srand(atol(optarg)); break;
 			case 'f': frac = atof(optarg); break;
+			case 'F': fake_qual = *optarg; break;
 		}
 	}
 	if (kr == 0) kr = kr_srand(11);
@@ -1161,6 +1224,7 @@ int stk_seq(int argc, char *argv[])
 		fprintf(stderr, "         -f FLOAT  sample FLOAT fraction of sequences [1]\n");
 		fprintf(stderr, "         -M FILE   mask regions in BED or name list FILE [null]\n");
 		fprintf(stderr, "         -L INT    drop sequences with length shorter than INT [0]\n");
+		fprintf(stderr, "         -F CHAR   fake FASTQ quality []\n");
 		fprintf(stderr, "         -c        mask complement region (effective with -M)\n");
 		fprintf(stderr, "         -r        reverse complement\n");
 		fprintf(stderr, "         -A        force FASTA output (discard quality)\n");
@@ -1177,6 +1241,10 @@ int stk_seq(int argc, char *argv[])
 	}
 	if (line_len == 0) line_len = UINT_MAX;
 	fp = optind < argc && strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	qual_thres += qual_shift;
 	while (kseq_read(seq) >= 0) {
@@ -1215,6 +1283,15 @@ int stk_seq(int argc, char *argv[])
 			for (i = 0; i < seq->seq.l; ++i)
 				seq->seq.s[i] = toupper(seq->seq.s[i]);
 		if (flag & 1) seq->qual.l = 0; // option -a: fastq -> fasta
+		else if (fake_qual >= 33 && fake_qual <= 127) {
+			if (seq->qual.m < seq->seq.m) {
+				seq->qual.m = seq->seq.m;
+				seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);
+			}
+			seq->qual.l = seq->seq.l;
+			memset(seq->qual.s, fake_qual, seq->qual.l);
+			seq->qual.s[seq->qual.l] = 0;
+		}
 		if (flag & 2) seq->comment.l = 0; // option -C: drop fasta/q comments
 		if (h) stk_mask(seq, h, flag&8, mask_chr); // masking
 		if (flag & 4) { // option -r: reverse complement
@@ -1274,6 +1351,10 @@ int stk_gc(int argc, char *argv[])
 	q = (1.0f - frac) / frac;
 
 	fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	while (kseq_read(seq) >= 0) {
 		int i, start = 0, max_i = 0, n_hits = 0, start_hits = 0, max_hits = 0;
@@ -1317,6 +1398,10 @@ int stk_mergepe(int argc, char *argv[])
 	}
 	fp1 = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
 	fp2 = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
+	if (fp1 == 0 || fp2 == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq[0] = kseq_init(fp1);
 	seq[1] = kseq_init(fp2);
 	while (kseq_read(seq[0]) >= 0) {
@@ -1344,6 +1429,10 @@ int stk_dropse(int argc, char *argv[])
 		return 1;
 	}
 	fp = argc > 1 && strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 
 	memset(&last, 0, sizeof(kseq_t));
@@ -1381,6 +1470,10 @@ int stk_rename(int argc, char *argv[])
 		return 1;
 	}
 	fp = argc > 1 && strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	if (argc > 2) prefix = argv[2];
 
@@ -1444,6 +1537,10 @@ int stk_kfreq(int argc, char *argv[])
 	}
 
 	fp = argc == 2 || strcmp(argv[2], "-") == 0? gzdopen(fileno(stdin), "r") : gzopen(argv[2], "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	ks = kseq_init(fp);
 	while (kseq_read(ks) >= 0) {
 		int k, x[2], cnt[2], cnt_nei[2], which;
@@ -1517,6 +1614,10 @@ int stk_fqchk(int argc, char *argv[])
 		return 1;
 	}
 	fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
+	if (fp == 0) {
+		fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+		return 1;
+	}
 	seq = kseq_init(fp);
 	for (k = 0; k <= 93; ++k)
 		perr[k] = pow(10., -.1 * k);
@@ -1574,7 +1675,7 @@ static int usage()
 {
 	fprintf(stderr, "\n");
 	fprintf(stderr, "Usage:   seqtk <command> <arguments>\n");
-	fprintf(stderr, "Version: 1.2-r94\n\n");
+	fprintf(stderr, "Version: 1.3-r106\n\n");
 	fprintf(stderr, "Command: seq       common transformation of FASTA/Q\n");
 	fprintf(stderr, "         comp      get the nucleotide composition of FASTA/Q\n");
 	fprintf(stderr, "         sample    subsample sequences\n");
@@ -1599,28 +1700,27 @@ static int usage()
 int main(int argc, char *argv[])
 {
 	if (argc == 1) return usage();
-	if (strcmp(argv[1], "comp") == 0) stk_comp(argc-1, argv+1);
-	else if (strcmp(argv[1], "fqchk") == 0) stk_fqchk(argc-1, argv+1);
-	else if (strcmp(argv[1], "hety") == 0) stk_hety(argc-1, argv+1);
-	else if (strcmp(argv[1], "gc") == 0) stk_gc(argc-1, argv+1);
-	else if (strcmp(argv[1], "subseq") == 0) stk_subseq(argc-1, argv+1);
-	else if (strcmp(argv[1], "mutfa") == 0) stk_mutfa(argc-1, argv+1);
-	else if (strcmp(argv[1], "mergefa") == 0) stk_mergefa(argc-1, argv+1);
-	else if (strcmp(argv[1], "mergepe") == 0) stk_mergepe(argc-1, argv+1);
-	else if (strcmp(argv[1], "dropse") == 0) stk_dropse(argc-1, argv+1);
-	else if (strcmp(argv[1], "randbase") == 0) stk_randbase(argc-1, argv+1);
-	else if (strcmp(argv[1], "cutN") == 0) stk_cutN(argc-1, argv+1);
-	else if (strcmp(argv[1], "listhet") == 0) stk_listhet(argc-1, argv+1);
-	else if (strcmp(argv[1], "famask") == 0) stk_famask(argc-1, argv+1);
-	else if (strcmp(argv[1], "trimfq") == 0) stk_trimfq(argc-1, argv+1);
-	else if (strcmp(argv[1], "hrun") == 0) stk_hrun(argc-1, argv+1);
-	else if (strcmp(argv[1], "sample") == 0) stk_sample(argc-1, argv+1);
-	else if (strcmp(argv[1], "seq") == 0) stk_seq(argc-1, argv+1);
-	else if (strcmp(argv[1], "kfreq") == 0) stk_kfreq(argc-1, argv+1);
-	else if (strcmp(argv[1], "rename") == 0) stk_rename(argc-1, argv+1);
+	if (strcmp(argv[1], "comp") == 0) return stk_comp(argc-1, argv+1);
+	else if (strcmp(argv[1], "fqchk") == 0) return stk_fqchk(argc-1, argv+1);
+	else if (strcmp(argv[1], "hety") == 0) return stk_hety(argc-1, argv+1);
+	else if (strcmp(argv[1], "gc") == 0) return stk_gc(argc-1, argv+1);
+	else if (strcmp(argv[1], "subseq") == 0) return stk_subseq(argc-1, argv+1);
+	else if (strcmp(argv[1], "mutfa") == 0) return stk_mutfa(argc-1, argv+1);
+	else if (strcmp(argv[1], "mergefa") == 0) return stk_mergefa(argc-1, argv+1);
+	else if (strcmp(argv[1], "mergepe") == 0) return stk_mergepe(argc-1, argv+1);
+	else if (strcmp(argv[1], "dropse") == 0) return stk_dropse(argc-1, argv+1);
+	else if (strcmp(argv[1], "randbase") == 0) return stk_randbase(argc-1, argv+1);
+	else if (strcmp(argv[1], "cutN") == 0) return stk_cutN(argc-1, argv+1);
+	else if (strcmp(argv[1], "listhet") == 0) return stk_listhet(argc-1, argv+1);
+	else if (strcmp(argv[1], "famask") == 0) return stk_famask(argc-1, argv+1);
+	else if (strcmp(argv[1], "trimfq") == 0) return stk_trimfq(argc-1, argv+1);
+	else if (strcmp(argv[1], "hrun") == 0) return stk_hrun(argc-1, argv+1);
+	else if (strcmp(argv[1], "sample") == 0) return stk_sample(argc-1, argv+1);
+	else if (strcmp(argv[1], "seq") == 0) return stk_seq(argc-1, argv+1);
+	else if (strcmp(argv[1], "kfreq") == 0) return stk_kfreq(argc-1, argv+1);
+	else if (strcmp(argv[1], "rename") == 0) return stk_rename(argc-1, argv+1);
 	else {
 		fprintf(stderr, "[main] unrecognized command '%s'. Abort!\n", argv[1]);
 		return 1;
 	}
-	return 0;
 }



View it on GitLab: https://salsa.debian.org/med-team/seqtk/commit/ba538534001ad07ceffd5ca1157bc37df5f2ece2

-- 
View it on GitLab: https://salsa.debian.org/med-team/seqtk/commit/ba538534001ad07ceffd5ca1157bc37df5f2ece2
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180920/ed518564/attachment-0001.html>


More information about the debian-med-commit mailing list