[med-svn] [Git][med-team/seqtk][upstream] New upstream version 1.3
Andreas Tille
gitlab at salsa.debian.org
Thu Sep 20 20:42:51 BST 2018
Andreas Tille pushed to branch upstream at Debian Med / seqtk
Commits:
ba538534 by Andreas Tille at 2018-09-20T19:34:08Z
New upstream version 1.3
- - - - -
6 changed files:
- Makefile
- kseq.h
- − ksort.h
- − kstring.h
- − kvec.h
- seqtk.c
Changes:
=====================================
Makefile
=====================================
@@ -1,10 +1,14 @@
CC=gcc
CFLAGS=-g -Wall -O2 -Wno-unused-function
+BINDIR=/usr/local/bin
all:seqtk
seqtk:seqtk.c khash.h kseq.h
$(CC) $(CFLAGS) seqtk.c -o $@ -lz -lm
+install:all
+ install seqtk $(BINDIR)
+
clean:
rm -fr gmon.out *.o ext/*.o a.out seqtk trimadap *~ *.a *.dSYM session*
=====================================
kseq.h
=====================================
@@ -23,7 +23,7 @@
SOFTWARE.
*/
-/* Last Modified: 05MAR2012 */
+/* Last Modified: 2017-02-11 */
#ifndef AC_KSEQ_H
#define AC_KSEQ_H
@@ -37,42 +37,45 @@
#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)
#define KS_SEP_MAX 2
-#define __KS_TYPE(type_t) \
- typedef struct __kstream_t { \
- unsigned char *buf; \
- int begin, end, is_eof; \
- type_t f; \
+#define __KS_TYPE(type_t) \
+ typedef struct __kstream_t { \
+ unsigned char *buf; \
+ int begin, end, is_eof; \
+ type_t f; \
} kstream_t;
+#define ks_err(ks) ((ks)->end < 0)
#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
-#define __KS_BASIC(type_t, __bufsize) \
- static inline kstream_t *ks_init(type_t f) \
- { \
- kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
- ks->f = f; \
- ks->buf = (unsigned char*)malloc(__bufsize); \
- return ks; \
- } \
- static inline void ks_destroy(kstream_t *ks) \
- { \
- if (ks) { \
- free(ks->buf); \
- free(ks); \
- } \
+#define __KS_BASIC(type_t, __bufsize) \
+ static inline kstream_t *ks_init(type_t f) \
+ { \
+ kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
+ ks->f = f; \
+ ks->buf = (unsigned char*)malloc(__bufsize); \
+ return ks; \
+ } \
+ static inline void ks_destroy(kstream_t *ks) \
+ { \
+ if (ks) { \
+ free(ks->buf); \
+ free(ks); \
+ } \
}
-#define __KS_GETC(__read, __bufsize) \
- static inline int ks_getc(kstream_t *ks) \
- { \
- if (ks->is_eof && ks->begin >= ks->end) return -1; \
- if (ks->begin >= ks->end) { \
- ks->begin = 0; \
- ks->end = __read(ks->f, ks->buf, __bufsize); \
- if (ks->end == 0) { ks->is_eof = 1; return -1;} \
- } \
- return (int)ks->buf[ks->begin++]; \
+#define __KS_GETC(__read, __bufsize) \
+ static inline int ks_getc(kstream_t *ks) \
+ { \
+ if (ks_err(ks)) return -3; \
+ if (ks_eof(ks)) return -1; \
+ if (ks->begin >= ks->end) { \
+ ks->begin = 0; \
+ ks->end = __read(ks->f, ks->buf, __bufsize); \
+ if (ks->end == 0) { ks->is_eof = 1; return -1; } \
+ else if (ks->end < 0) { ks->is_eof = 1; return -3; } \
+ } \
+ return (int)ks->buf[ks->begin++]; \
}
#ifndef KSTRING_T
@@ -87,140 +90,145 @@ typedef struct __kstring_t {
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
-#define __KS_GETUNTIL(__read, __bufsize) \
+#define __KS_GETUNTIL(__read, __bufsize) \
static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
- { \
- int gotany = 0; \
- if (dret) *dret = 0; \
- str->l = append? str->l : 0; \
- for (;;) { \
- int i; \
- if (ks->begin >= ks->end) { \
- if (!ks->is_eof) { \
- ks->begin = 0; \
- ks->end = __read(ks->f, ks->buf, __bufsize); \
- if (ks->end == 0) { ks->is_eof = 1; break; } \
- } else break; \
- } \
+ { \
+ int gotany = 0; \
+ if (dret) *dret = 0; \
+ str->l = append? str->l : 0; \
+ for (;;) { \
+ int i; \
+ if (ks_err(ks)) return -3; \
+ if (ks->begin >= ks->end) { \
+ if (!ks->is_eof) { \
+ ks->begin = 0; \
+ ks->end = __read(ks->f, ks->buf, __bufsize); \
+ if (ks->end == 0) { ks->is_eof = 1; break; } \
+ if (ks->end == -1) { ks->is_eof = 1; return -3; } \
+ } else break; \
+ } \
if (delimiter == KS_SEP_LINE) { \
for (i = ks->begin; i < ks->end; ++i) \
if (ks->buf[i] == '\n') break; \
- } else if (delimiter > KS_SEP_MAX) { \
- for (i = ks->begin; i < ks->end; ++i) \
- if (ks->buf[i] == delimiter) break; \
- } else if (delimiter == KS_SEP_SPACE) { \
- for (i = ks->begin; i < ks->end; ++i) \
- if (isspace(ks->buf[i])) break; \
- } else if (delimiter == KS_SEP_TAB) { \
- for (i = ks->begin; i < ks->end; ++i) \
+ } else if (delimiter > KS_SEP_MAX) { \
+ for (i = ks->begin; i < ks->end; ++i) \
+ if (ks->buf[i] == delimiter) break; \
+ } else if (delimiter == KS_SEP_SPACE) { \
+ for (i = ks->begin; i < ks->end; ++i) \
+ if (isspace(ks->buf[i])) break; \
+ } else if (delimiter == KS_SEP_TAB) { \
+ for (i = ks->begin; i < ks->end; ++i) \
if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
- } else i = 0; /* never come to here! */ \
- if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
- str->m = str->l + (i - ks->begin) + 1; \
- kroundup32(str->m); \
- str->s = (char*)realloc(str->s, str->m); \
- } \
- gotany = 1; \
+ } else i = 0; /* never come to here! */ \
+ if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
+ str->m = str->l + (i - ks->begin) + 1; \
+ kroundup32(str->m); \
+ str->s = (char*)realloc(str->s, str->m); \
+ } \
+ gotany = 1; \
memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
- str->l = str->l + (i - ks->begin); \
- ks->begin = i + 1; \
- if (i < ks->end) { \
- if (dret) *dret = ks->buf[i]; \
- break; \
- } \
- } \
- if (!gotany && ks_eof(ks)) return -1; \
- if (str->s == 0) { \
- str->m = 1; \
- str->s = (char*)calloc(1, 1); \
+ str->l = str->l + (i - ks->begin); \
+ ks->begin = i + 1; \
+ if (i < ks->end) { \
+ if (dret) *dret = ks->buf[i]; \
+ break; \
+ } \
+ } \
+ if (!gotany && ks_eof(ks)) return -1; \
+ if (str->s == 0) { \
+ str->m = 1; \
+ str->s = (char*)calloc(1, 1); \
} else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
- str->s[str->l] = '\0'; \
- return str->l; \
+ str->s[str->l] = '\0'; \
+ return str->l; \
} \
static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
{ return ks_getuntil2(ks, delimiter, str, dret, 0); }
#define KSTREAM_INIT(type_t, __read, __bufsize) \
- __KS_TYPE(type_t) \
- __KS_BASIC(type_t, __bufsize) \
- __KS_GETC(__read, __bufsize) \
+ __KS_TYPE(type_t) \
+ __KS_BASIC(type_t, __bufsize) \
+ __KS_GETC(__read, __bufsize) \
__KS_GETUNTIL(__read, __bufsize)
#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
-#define __KSEQ_BASIC(SCOPE, type_t) \
- SCOPE kseq_t *kseq_init(type_t fd) \
- { \
- kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
- s->f = ks_init(fd); \
- return s; \
- } \
- SCOPE void kseq_destroy(kseq_t *ks) \
- { \
- if (!ks) return; \
+#define __KSEQ_BASIC(SCOPE, type_t) \
+ SCOPE kseq_t *kseq_init(type_t fd) \
+ { \
+ kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
+ s->f = ks_init(fd); \
+ return s; \
+ } \
+ SCOPE void kseq_destroy(kseq_t *ks) \
+ { \
+ if (!ks) return; \
free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
- ks_destroy(ks->f); \
- free(ks); \
+ ks_destroy(ks->f); \
+ free(ks); \
}
/* Return value:
>=0 length of the sequence (normal)
-1 end-of-file
-2 truncated quality string
+ -3 error reading stream
*/
#define __KSEQ_READ(SCOPE) \
SCOPE int kseq_read(kseq_t *seq) \
{ \
- int c; \
+ int c,r; \
kstream_t *ks = seq->f; \
if (seq->last_char == 0) { /* then jump to the next header line */ \
- while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
- if (c == -1) return -1; /* end of file */ \
+ while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '@'); \
+ if (c < 0) return c; /* end of file or error*/ \
seq->last_char = c; \
} /* else: the first header char has been read in the previous call */ \
seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
- if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
+ if ((r=ks_getuntil(ks, 0, &seq->name, &c)) < 0) return r; /* normal exit: EOF or error */ \
if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
seq->seq.m = 256; \
seq->seq.s = (char*)malloc(seq->seq.m); \
} \
- while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
+ while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '+' && c != '@') { \
if (c == '\n') continue; /* skip empty lines */ \
seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
} \
- if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
+ if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
seq->seq.m = seq->seq.l + 2; \
kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
} \
seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
- if (c != '+') return seq->seq.l; /* FASTA */ \
+ seq->is_fastq = (c == '+'); \
+ if (!seq->is_fastq) return seq->seq.l; /* FASTA */ \
if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \
seq->qual.m = seq->seq.m; \
seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
} \
- while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
+ while ((c = ks_getc(ks)) >= 0 && c != '\n'); /* skip the rest of '+' line */ \
if (c == -1) return -2; /* error: no quality string */ \
- while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
+ while ((c = ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l)); \
+ if (c == -3) return -3; /* stream error */ \
seq->last_char = 0; /* we have not come to the next header line */ \
if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
return seq->seq.l; \
}
-#define __KSEQ_TYPE(type_t) \
- typedef struct { \
- kstring_t name, comment, seq, qual; \
- int last_char; \
- kstream_t *f; \
+#define __KSEQ_TYPE(type_t) \
+ typedef struct { \
+ kstring_t name, comment, seq, qual; \
+ int last_char, is_fastq; \
+ kstream_t *f; \
} kseq_t;
-#define KSEQ_INIT2(SCOPE, type_t, __read) \
- KSTREAM_INIT(type_t, __read, 16384) \
- __KSEQ_TYPE(type_t) \
- __KSEQ_BASIC(SCOPE, type_t) \
+#define KSEQ_INIT2(SCOPE, type_t, __read) \
+ KSTREAM_INIT(type_t, __read, 16384) \
+ __KSEQ_TYPE(type_t) \
+ __KSEQ_BASIC(SCOPE, type_t) \
__KSEQ_READ(SCOPE)
#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
=====================================
ksort.h deleted
=====================================
@@ -1,298 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008, 2011 Attractive Chaos <attractor at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/*
- 2011-04-10 (0.1.6):
-
- * Added sample
-
- 2011-03 (0.1.5):
-
- * Added shuffle/permutation
-
- 2008-11-16 (0.1.4):
-
- * Fixed a bug in introsort() that happens in rare cases.
-
- 2008-11-05 (0.1.3):
-
- * Fixed a bug in introsort() for complex comparisons.
-
- * Fixed a bug in mergesort(). The previous version is not stable.
-
- 2008-09-15 (0.1.2):
-
- * Accelerated introsort. On my Mac (not on another Linux machine),
- my implementation is as fast as std::sort on random input.
-
- * Added combsort and in introsort, switch to combsort if the
- recursion is too deep.
-
- 2008-09-13 (0.1.1):
-
- * Added k-small algorithm
-
- 2008-09-05 (0.1.0):
-
- * Initial version
-
-*/
-
-#ifndef AC_KSORT_H
-#define AC_KSORT_H
-
-#include <stdlib.h>
-#include <string.h>
-
-typedef struct {
- void *left, *right;
- int depth;
-} ks_isort_stack_t;
-
-#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
-
-#define KSORT_INIT(name, type_t, __sort_lt) \
- void ks_mergesort_##name(size_t n, type_t array[], type_t temp[]) \
- { \
- type_t *a2[2], *a, *b; \
- int curr, shift; \
- \
- a2[0] = array; \
- a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \
- for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) { \
- a = a2[curr]; b = a2[1-curr]; \
- if (shift == 0) { \
- type_t *p = b, *i, *eb = a + n; \
- for (i = a; i < eb; i += 2) { \
- if (i == eb - 1) *p++ = *i; \
- else { \
- if (__sort_lt(*(i+1), *i)) { \
- *p++ = *(i+1); *p++ = *i; \
- } else { \
- *p++ = *i; *p++ = *(i+1); \
- } \
- } \
- } \
- } else { \
- size_t i, step = 1ul<<shift; \
- for (i = 0; i < n; i += step<<1) { \
- type_t *p, *j, *k, *ea, *eb; \
- if (n < i + step) { \
- ea = a + n; eb = a; \
- } else { \
- ea = a + i + step; \
- eb = a + (n < i + (step<<1)? n : i + (step<<1)); \
- } \
- j = a + i; k = a + i + step; p = b + i; \
- while (j < ea && k < eb) { \
- if (__sort_lt(*k, *j)) *p++ = *k++; \
- else *p++ = *j++; \
- } \
- while (j < ea) *p++ = *j++; \
- while (k < eb) *p++ = *k++; \
- } \
- } \
- curr = 1 - curr; \
- } \
- if (curr == 1) { \
- type_t *p = a2[0], *i = a2[1], *eb = array + n; \
- for (; p < eb; ++i) *p++ = *i; \
- } \
- if (temp == 0) free(a2[1]); \
- } \
- void ks_heapadjust_##name(size_t i, size_t n, type_t l[]) \
- { \
- size_t k = i; \
- type_t tmp = l[i]; \
- while ((k = (k << 1) + 1) < n) { \
- if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k; \
- if (__sort_lt(l[k], tmp)) break; \
- l[i] = l[k]; i = k; \
- } \
- l[i] = tmp; \
- } \
- void ks_heapmake_##name(size_t lsize, type_t l[]) \
- { \
- size_t i; \
- for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i) \
- ks_heapadjust_##name(i, lsize, l); \
- } \
- void ks_heapsort_##name(size_t lsize, type_t l[]) \
- { \
- size_t i; \
- for (i = lsize - 1; i > 0; --i) { \
- type_t tmp; \
- tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \
- } \
- } \
- static inline void __ks_insertsort_##name(type_t *s, type_t *t) \
- { \
- type_t *i, *j, swap_tmp; \
- for (i = s + 1; i < t; ++i) \
- for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) { \
- swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp; \
- } \
- } \
- void ks_combsort_##name(size_t n, type_t a[]) \
- { \
- const double shrink_factor = 1.2473309501039786540366528676643; \
- int do_swap; \
- size_t gap = n; \
- type_t tmp, *i, *j; \
- do { \
- if (gap > 2) { \
- gap = (size_t)(gap / shrink_factor); \
- if (gap == 9 || gap == 10) gap = 11; \
- } \
- do_swap = 0; \
- for (i = a; i < a + n - gap; ++i) { \
- j = i + gap; \
- if (__sort_lt(*j, *i)) { \
- tmp = *i; *i = *j; *j = tmp; \
- do_swap = 1; \
- } \
- } \
- } while (do_swap || gap > 2); \
- if (gap != 1) __ks_insertsort_##name(a, a + n); \
- } \
- void ks_introsort_##name(size_t n, type_t a[]) \
- { \
- int d; \
- ks_isort_stack_t *top, *stack; \
- type_t rp, swap_tmp; \
- type_t *s, *t, *i, *j, *k; \
- \
- if (n < 1) return; \
- else if (n == 2) { \
- if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \
- return; \
- } \
- for (d = 2; 1ul<<d < n; ++d); \
- stack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \
- top = stack; s = a; t = a + (n-1); d <<= 1; \
- while (1) { \
- if (s < t) { \
- if (--d == 0) { \
- ks_combsort_##name(t - s + 1, s); \
- t = s; \
- continue; \
- } \
- i = s; j = t; k = i + ((j-i)>>1) + 1; \
- if (__sort_lt(*k, *i)) { \
- if (__sort_lt(*k, *j)) k = j; \
- } else k = __sort_lt(*j, *i)? i : j; \
- rp = *k; \
- if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; } \
- for (;;) { \
- do ++i; while (__sort_lt(*i, rp)); \
- do --j; while (i <= j && __sort_lt(rp, *j)); \
- if (j <= i) break; \
- swap_tmp = *i; *i = *j; *j = swap_tmp; \
- } \
- swap_tmp = *i; *i = *t; *t = swap_tmp; \
- if (i-s > t-i) { \
- if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \
- s = t-i > 16? i+1 : t; \
- } else { \
- if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \
- t = i-s > 16? i-1 : s; \
- } \
- } else { \
- if (top == stack) { \
- free(stack); \
- __ks_insertsort_##name(a, a+n); \
- return; \
- } else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \
- } \
- } \
- } \
- /* This function is adapted from: http://ndevilla.free.fr/median/ */ \
- /* 0 <= kk < n */ \
- type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \
- { \
- type_t *low, *high, *k, *ll, *hh, *mid; \
- low = arr; high = arr + n - 1; k = arr + kk; \
- for (;;) { \
- if (high <= low) return *k; \
- if (high == low + 1) { \
- if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
- return *k; \
- } \
- mid = low + (high - low) / 2; \
- if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
- if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
- if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \
- KSORT_SWAP(type_t, *mid, *(low+1)); \
- ll = low + 1; hh = high; \
- for (;;) { \
- do ++ll; while (__sort_lt(*ll, *low)); \
- do --hh; while (__sort_lt(*low, *hh)); \
- if (hh < ll) break; \
- KSORT_SWAP(type_t, *ll, *hh); \
- } \
- KSORT_SWAP(type_t, *low, *hh); \
- if (hh <= k) low = ll; \
- if (hh >= k) high = hh - 1; \
- } \
- } \
- void ks_shuffle_##name(size_t n, type_t a[]) \
- { \
- int i, j; \
- for (i = n; i > 1; --i) { \
- type_t tmp; \
- j = (int)(drand48() * i); \
- tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp; \
- } \
- } \
- void ks_sample_##name(size_t n, size_t r, type_t a[]) /* FIXME: NOT TESTED!!! */ \
- { /* reference: http://code.activestate.com/recipes/272884/ */ \
- int i, k, pop = n; \
- for (i = (int)r, k = 0; i >= 0; --i) { \
- double z = 1., x = drand48(); \
- type_t tmp; \
- while (x < z) z -= z * i / (pop--); \
- if (k != n - pop - 1) tmp = a[k], a[k] = a[n-pop-1], a[n-pop-1] = tmp; \
- ++k; \
- } \
- }
-
-#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)
-#define ks_introsort(name, n, a) ks_introsort_##name(n, a)
-#define ks_combsort(name, n, a) ks_combsort_##name(n, a)
-#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)
-#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)
-#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)
-#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
-#define ks_shuffle(name, n, a) ks_shuffle_##name(n, a)
-
-#define ks_lt_generic(a, b) ((a) < (b))
-#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
-
-typedef const char *ksstr_t;
-
-#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
-#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
-
-#endif
=====================================
kstring.h deleted
=====================================
@@ -1,169 +0,0 @@
-/* The MIT License
-
- Copyright (c) by Attractive Chaos <attractor at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-#ifndef KSTRING_H
-#define KSTRING_H
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-
-#ifndef kroundup32
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-#endif
-
-#ifndef KSTRING_T
-#define KSTRING_T kstring_t
-typedef struct __kstring_t {
- uint32_t l, m;
- char *s;
-} kstring_t;
-#endif
-
-typedef struct {
- uint64_t tab[4];
- int sep, finished;
- const char *p; // end of the current token
-} ks_tokaux_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- int ksprintf(kstring_t *s, const char *fmt, ...);
- int ksprintf_fast(kstring_t *s, const char *fmt, ...);
- int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
- char *kstrstr(const char *str, const char *pat, int **_prep);
- char *kstrnstr(const char *str, const char *pat, int n, int **_prep);
- void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep);
-
- /* kstrtok() is similar to strtok_r() except that str is not
- * modified and both str and sep can be NULL. For efficiency, it is
- * actually recommended to set both to NULL in the subsequent calls
- * if sep is not changed. */
- char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux);
-
-#ifdef __cplusplus
-}
-#endif
-
-static inline void ks_resize(kstring_t *s, size_t size)
-{
- if (s->m < size) {
- s->m = size;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- }
-}
-
-static inline int kputsn(const char *p, int l, kstring_t *s)
-{
- if (s->l + l + 1 >= s->m) {
- s->m = s->l + l + 2;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- }
- memcpy(s->s + s->l, p, l);
- s->l += l;
- s->s[s->l] = 0;
- return l;
-}
-
-static inline int kputs(const char *p, kstring_t *s)
-{
- return kputsn(p, strlen(p), s);
-}
-
-static inline int kputc(int c, kstring_t *s)
-{
- if (s->l + 1 >= s->m) {
- s->m = s->l + 2;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- }
- s->s[s->l++] = c;
- s->s[s->l] = 0;
- return c;
-}
-
-static inline int kputw(int c, kstring_t *s)
-{
- char buf[16];
- int l, x;
- if (c == 0) return kputc('0', s);
- for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
- if (c < 0) buf[l++] = '-';
- if (s->l + l + 1 >= s->m) {
- s->m = s->l + l + 2;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- }
- for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
- s->s[s->l] = 0;
- return 0;
-}
-
-static inline int kputuw(unsigned c, kstring_t *s)
-{
- char buf[16];
- int l, i;
- unsigned x;
- if (c == 0) return kputc('0', s);
- for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0';
- if (s->l + l + 1 >= s->m) {
- s->m = s->l + l + 2;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- }
- for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
- s->s[s->l] = 0;
- return 0;
-}
-
-static inline int kputl(long c, kstring_t *s)
-{
- char buf[32];
- long l, x;
- if (c == 0) return kputc('0', s);
- for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
- if (c < 0) buf[l++] = '-';
- if (s->l + l + 1 >= s->m) {
- s->m = s->l + l + 2;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- }
- for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
- s->s[s->l] = 0;
- return 0;
-}
-
-static inline int *ksplit(kstring_t *s, int delimiter, int *n)
-{
- int max = 0, *offsets = 0;
- *n = ksplit_core(s->s, delimiter, &max, &offsets);
- return offsets;
-}
-
-#endif
=====================================
kvec.h deleted
=====================================
@@ -1,90 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008, by Attractive Chaos <attractor at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/*
- An example:
-
-#include "kvec.h"
-int main() {
- kvec_t(int) array;
- kv_init(array);
- kv_push(int, array, 10); // append
- kv_a(int, array, 20) = 5; // dynamic
- kv_A(array, 20) = 4; // static
- kv_destroy(array);
- return 0;
-}
-*/
-
-/*
- 2008-09-22 (0.1.0):
-
- * The initial version.
-
-*/
-
-#ifndef AC_KVEC_H
-#define AC_KVEC_H
-
-#include <stdlib.h>
-
-#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-
-#define kvec_t(type) struct { size_t n, m; type *a; }
-#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0)
-#define kv_destroy(v) free((v).a)
-#define kv_A(v, i) ((v).a[(i)])
-#define kv_pop(v) ((v).a[--(v).n])
-#define kv_size(v) ((v).n)
-#define kv_max(v) ((v).m)
-
-#define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m))
-
-#define kv_copy(type, v1, v0) do { \
- if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \
- (v1).n = (v0).n; \
- memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \
- } while (0) \
-
-#define kv_push(type, v, x) do { \
- if ((v).n == (v).m) { \
- (v).m = (v).m? (v).m<<1 : 2; \
- (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \
- } \
- (v).a[(v).n++] = (x); \
- } while (0)
-
-#define kv_pushp(type, v) (((v).n == (v).m)? \
- ((v).m = ((v).m? (v).m<<1 : 2), \
- (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
- : 0), ((v).a + ((v).n++))
-
-#define kv_a(type, v, i) (((v).m <= (size_t)(i)? \
- ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \
- (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
- : (v).n <= (size_t)(i)? (v).n = (i) + 1 \
- : 0), (v).a[(i)])
-
-#endif
=====================================
seqtk.c
=====================================
@@ -1,6 +1,6 @@
/* The MIT License
- Copyright (c) 20082-2012 by Heng Li <lh3 at me.com>
+ Copyright (c) 2008-2016 Broad Institute
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -57,9 +57,10 @@ reghash_t *stk_reg_read(const char *fn)
int dret;
kstring_t *str;
// read the list
- str = calloc(1, sizeof(kstring_t));
fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) return 0;
ks = ks_init(fp);
+ str = calloc(1, sizeof(kstring_t));
while (ks_getuntil(ks, 0, str, &dret) >= 0) {
int beg = -1, end = -1;
reglist_t *p;
@@ -298,10 +299,15 @@ int stk_trimfq(int argc, char *argv[])
fprintf(stderr, " -b INT trim INT bp from left (non-zero to disable -q/-l) [0]\n");
fprintf(stderr, " -e INT trim INT bp from right (non-zero to disable -q/-l) [0]\n");
fprintf(stderr, " -L INT retain at most INT bp from the 5'-end (non-zero to disable -q/-l) [0]\n");
+ fprintf(stderr, " -Q force FASTQ output\n");
fprintf(stderr, "\n");
return 1;
}
fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
for (i = 0; i < 128; ++i)
q_int2real[i] = pow(10., -(i - 33) / 10.);
@@ -336,12 +342,12 @@ int stk_trimfq(int argc, char *argv[])
end = beg + min_len;
}
} else beg = 0, end = seq->seq.l;
- putchar(seq->qual.l? '@' : '>'); fputs(seq->name.s, stdout);
+ putchar(seq->is_fastq? '@' : '>'); fputs(seq->name.s, stdout);
if (seq->comment.l) {
putchar(' '); puts(seq->comment.s);
} else putchar('\n');
fwrite(seq->seq.s + beg, 1, end - beg, stdout); putchar('\n');
- if (seq->qual.l) {
+ if (seq->is_fastq) {
puts("+");
fwrite(seq->qual.s + beg, 1, end - beg, stdout); putchar('\n');
}
@@ -372,6 +378,10 @@ int stk_comp(int argc, char *argv[])
return 1;
}
fp = optind < argc && strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
dummy.n= dummy.m = 1; dummy.a = calloc(1, 8);
while ((l = kseq_read(seq)) >= 0) {
@@ -437,6 +447,10 @@ int stk_randbase(int argc, char *argv[])
return 1;
}
fp = (strcmp(argv[1], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[1], "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
while ((l = kseq_read(seq)) >= 0) {
int i;
@@ -489,6 +503,10 @@ int stk_hety(int argc, char *argv[])
}
}
fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
win_step = win_size / n_start;
buf = calloc(win_size, 1);
@@ -547,8 +565,16 @@ int stk_subseq(int argc, char *argv[])
return 1;
}
h = stk_reg_read(argv[optind+1]);
+ if (h == 0) {
+ fprintf(stderr, "[E::%s] failed to read the list of regions in file '%s'\n", __func__, argv[optind+1]);
+ return 1;
+ }
// subseq
fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
while ((l = kseq_read(seq)) >= 0) {
reglist_t *p;
@@ -626,6 +652,10 @@ int stk_mergefa(int argc, char *argv[])
fp[i] = strcmp(argv[optind+i], "-")? gzopen(argv[optind+i], "r") : gzdopen(fileno(stdin), "r");
seq[i] = kseq_init(fp[i]);
}
+ if (fp[0] == 0 || fp[1] == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
cnt[0] = cnt[1] = cnt[2] = cnt[3] = cnt[4] = 0;
srand48(11);
while (kseq_read(seq[0]) >= 0) {
@@ -707,6 +737,10 @@ int stk_famask(int argc, char *argv[])
fp[i] = strcmp(argv[optind+i], "-")? gzopen(argv[optind+i], "r") : gzdopen(fileno(stdin), "r");
seq[i] = kseq_init(fp[i]);
}
+ if (fp[0] == 0 || fp[1] == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
while (kseq_read(seq[0]) >= 0) {
int min_l, c[2];
kseq_read(seq[1]);
@@ -746,6 +780,10 @@ int stk_mutfa(int argc, char *argv[])
// read the list
str = calloc(1, sizeof(kstring_t));
fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
ks = ks_init(fp);
while (ks_getuntil(ks, 0, str, &dret) >= 0) {
char *s = strdup(str->s);
@@ -775,6 +813,10 @@ int stk_mutfa(int argc, char *argv[])
free(str->s); free(str);
// mutfa
fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
while ((l = kseq_read(seq)) >= 0) {
reglist_t *p;
@@ -817,6 +859,10 @@ int stk_listhet(int argc, char *argv[])
return 1;
}
fp = (strcmp(argv[1], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[1], "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
while ((l = kseq_read(seq)) >= 0) {
for (i = 0; i < l; ++i) {
@@ -902,6 +948,10 @@ int stk_cutN(int argc, char *argv[])
return 1;
}
fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
ks = kseq_init(fp);
while ((l = kseq_read(ks)) >= 0) {
int k = 0, begin = 0, end = 0;
@@ -930,6 +980,10 @@ int stk_hrun(int argc, char *argv[])
}
if (argc == optind + 2) min_len = atoi(argv[optind+1]);
fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
ks = kseq_init(fp);
while (kseq_read(ks) >= 0) {
c = ks->seq.s[0]; l = 1; beg = 0;
@@ -989,7 +1043,7 @@ int stk_sample(int argc, char *argv[])
return 1;
}
frac = atof(argv[optind+1]);
- if (frac > 1.) num = (uint64_t)(frac + .499), frac = 0.;
+ if (frac >= 1.0) num = (uint64_t)(frac + .499), frac = 0.;
else if (twopass) {
fprintf(stderr, "[W::%s] when sampling a fraction, option -2 is ignored.", __func__);
twopass = 0;
@@ -1006,6 +1060,10 @@ int stk_sample(int argc, char *argv[])
}
fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
n_seqs = 0;
while (kseq_read(seq) >= 0) {
@@ -1037,6 +1095,10 @@ int stk_sample(int argc, char *argv[])
buf = malloc(num * 8);
for (i = 0; i < num; ++i) buf[i] = UINT64_MAX;
fp = gzopen(argv[optind], "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
n_seqs = 0;
while (kseq_read(seq) >= 0) {
@@ -1117,14 +1179,14 @@ int stk_seq(int argc, char *argv[])
{
gzFile fp;
kseq_t *seq;
- int c, qual_thres = 0, flag = 0, qual_shift = 33, mask_chr = 0, min_len = 0, max_q = 255;
+ int c, qual_thres = 0, flag = 0, qual_shift = 33, mask_chr = 0, min_len = 0, max_q = 255, fake_qual = -1;
unsigned i, line_len = 0;
int64_t n_seqs = 0;
double frac = 1.;
khash_t(reg) *h = 0;
krand_t *kr = 0;
- while ((c = getopt(argc, argv, "N12q:l:Q:aACrn:s:f:M:L:cVUX:S")) >= 0) {
+ while ((c = getopt(argc, argv, "N12q:l:Q:aACrn:s:f:M:L:cVUX:SF:")) >= 0) {
switch (c) {
case 'a':
case 'A': flag |= 1; break;
@@ -1146,6 +1208,7 @@ int stk_seq(int argc, char *argv[])
case 'L': min_len = atoi(optarg); break;
case 's': kr = kr_srand(atol(optarg)); break;
case 'f': frac = atof(optarg); break;
+ case 'F': fake_qual = *optarg; break;
}
}
if (kr == 0) kr = kr_srand(11);
@@ -1161,6 +1224,7 @@ int stk_seq(int argc, char *argv[])
fprintf(stderr, " -f FLOAT sample FLOAT fraction of sequences [1]\n");
fprintf(stderr, " -M FILE mask regions in BED or name list FILE [null]\n");
fprintf(stderr, " -L INT drop sequences with length shorter than INT [0]\n");
+ fprintf(stderr, " -F CHAR fake FASTQ quality []\n");
fprintf(stderr, " -c mask complement region (effective with -M)\n");
fprintf(stderr, " -r reverse complement\n");
fprintf(stderr, " -A force FASTA output (discard quality)\n");
@@ -1177,6 +1241,10 @@ int stk_seq(int argc, char *argv[])
}
if (line_len == 0) line_len = UINT_MAX;
fp = optind < argc && strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
qual_thres += qual_shift;
while (kseq_read(seq) >= 0) {
@@ -1215,6 +1283,15 @@ int stk_seq(int argc, char *argv[])
for (i = 0; i < seq->seq.l; ++i)
seq->seq.s[i] = toupper(seq->seq.s[i]);
if (flag & 1) seq->qual.l = 0; // option -a: fastq -> fasta
+ else if (fake_qual >= 33 && fake_qual <= 127) {
+ if (seq->qual.m < seq->seq.m) {
+ seq->qual.m = seq->seq.m;
+ seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);
+ }
+ seq->qual.l = seq->seq.l;
+ memset(seq->qual.s, fake_qual, seq->qual.l);
+ seq->qual.s[seq->qual.l] = 0;
+ }
if (flag & 2) seq->comment.l = 0; // option -C: drop fasta/q comments
if (h) stk_mask(seq, h, flag&8, mask_chr); // masking
if (flag & 4) { // option -r: reverse complement
@@ -1274,6 +1351,10 @@ int stk_gc(int argc, char *argv[])
q = (1.0f - frac) / frac;
fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
while (kseq_read(seq) >= 0) {
int i, start = 0, max_i = 0, n_hits = 0, start_hits = 0, max_hits = 0;
@@ -1317,6 +1398,10 @@ int stk_mergepe(int argc, char *argv[])
}
fp1 = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
fp2 = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
+ if (fp1 == 0 || fp2 == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq[0] = kseq_init(fp1);
seq[1] = kseq_init(fp2);
while (kseq_read(seq[0]) >= 0) {
@@ -1344,6 +1429,10 @@ int stk_dropse(int argc, char *argv[])
return 1;
}
fp = argc > 1 && strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
memset(&last, 0, sizeof(kseq_t));
@@ -1381,6 +1470,10 @@ int stk_rename(int argc, char *argv[])
return 1;
}
fp = argc > 1 && strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
if (argc > 2) prefix = argv[2];
@@ -1444,6 +1537,10 @@ int stk_kfreq(int argc, char *argv[])
}
fp = argc == 2 || strcmp(argv[2], "-") == 0? gzdopen(fileno(stdin), "r") : gzopen(argv[2], "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
ks = kseq_init(fp);
while (kseq_read(ks) >= 0) {
int k, x[2], cnt[2], cnt_nei[2], which;
@@ -1517,6 +1614,10 @@ int stk_fqchk(int argc, char *argv[])
return 1;
}
fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
+ if (fp == 0) {
+ fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
+ return 1;
+ }
seq = kseq_init(fp);
for (k = 0; k <= 93; ++k)
perr[k] = pow(10., -.1 * k);
@@ -1574,7 +1675,7 @@ static int usage()
{
fprintf(stderr, "\n");
fprintf(stderr, "Usage: seqtk <command> <arguments>\n");
- fprintf(stderr, "Version: 1.2-r94\n\n");
+ fprintf(stderr, "Version: 1.3-r106\n\n");
fprintf(stderr, "Command: seq common transformation of FASTA/Q\n");
fprintf(stderr, " comp get the nucleotide composition of FASTA/Q\n");
fprintf(stderr, " sample subsample sequences\n");
@@ -1599,28 +1700,27 @@ static int usage()
int main(int argc, char *argv[])
{
if (argc == 1) return usage();
- if (strcmp(argv[1], "comp") == 0) stk_comp(argc-1, argv+1);
- else if (strcmp(argv[1], "fqchk") == 0) stk_fqchk(argc-1, argv+1);
- else if (strcmp(argv[1], "hety") == 0) stk_hety(argc-1, argv+1);
- else if (strcmp(argv[1], "gc") == 0) stk_gc(argc-1, argv+1);
- else if (strcmp(argv[1], "subseq") == 0) stk_subseq(argc-1, argv+1);
- else if (strcmp(argv[1], "mutfa") == 0) stk_mutfa(argc-1, argv+1);
- else if (strcmp(argv[1], "mergefa") == 0) stk_mergefa(argc-1, argv+1);
- else if (strcmp(argv[1], "mergepe") == 0) stk_mergepe(argc-1, argv+1);
- else if (strcmp(argv[1], "dropse") == 0) stk_dropse(argc-1, argv+1);
- else if (strcmp(argv[1], "randbase") == 0) stk_randbase(argc-1, argv+1);
- else if (strcmp(argv[1], "cutN") == 0) stk_cutN(argc-1, argv+1);
- else if (strcmp(argv[1], "listhet") == 0) stk_listhet(argc-1, argv+1);
- else if (strcmp(argv[1], "famask") == 0) stk_famask(argc-1, argv+1);
- else if (strcmp(argv[1], "trimfq") == 0) stk_trimfq(argc-1, argv+1);
- else if (strcmp(argv[1], "hrun") == 0) stk_hrun(argc-1, argv+1);
- else if (strcmp(argv[1], "sample") == 0) stk_sample(argc-1, argv+1);
- else if (strcmp(argv[1], "seq") == 0) stk_seq(argc-1, argv+1);
- else if (strcmp(argv[1], "kfreq") == 0) stk_kfreq(argc-1, argv+1);
- else if (strcmp(argv[1], "rename") == 0) stk_rename(argc-1, argv+1);
+ if (strcmp(argv[1], "comp") == 0) return stk_comp(argc-1, argv+1);
+ else if (strcmp(argv[1], "fqchk") == 0) return stk_fqchk(argc-1, argv+1);
+ else if (strcmp(argv[1], "hety") == 0) return stk_hety(argc-1, argv+1);
+ else if (strcmp(argv[1], "gc") == 0) return stk_gc(argc-1, argv+1);
+ else if (strcmp(argv[1], "subseq") == 0) return stk_subseq(argc-1, argv+1);
+ else if (strcmp(argv[1], "mutfa") == 0) return stk_mutfa(argc-1, argv+1);
+ else if (strcmp(argv[1], "mergefa") == 0) return stk_mergefa(argc-1, argv+1);
+ else if (strcmp(argv[1], "mergepe") == 0) return stk_mergepe(argc-1, argv+1);
+ else if (strcmp(argv[1], "dropse") == 0) return stk_dropse(argc-1, argv+1);
+ else if (strcmp(argv[1], "randbase") == 0) return stk_randbase(argc-1, argv+1);
+ else if (strcmp(argv[1], "cutN") == 0) return stk_cutN(argc-1, argv+1);
+ else if (strcmp(argv[1], "listhet") == 0) return stk_listhet(argc-1, argv+1);
+ else if (strcmp(argv[1], "famask") == 0) return stk_famask(argc-1, argv+1);
+ else if (strcmp(argv[1], "trimfq") == 0) return stk_trimfq(argc-1, argv+1);
+ else if (strcmp(argv[1], "hrun") == 0) return stk_hrun(argc-1, argv+1);
+ else if (strcmp(argv[1], "sample") == 0) return stk_sample(argc-1, argv+1);
+ else if (strcmp(argv[1], "seq") == 0) return stk_seq(argc-1, argv+1);
+ else if (strcmp(argv[1], "kfreq") == 0) return stk_kfreq(argc-1, argv+1);
+ else if (strcmp(argv[1], "rename") == 0) return stk_rename(argc-1, argv+1);
else {
fprintf(stderr, "[main] unrecognized command '%s'. Abort!\n", argv[1]);
return 1;
}
- return 0;
}
View it on GitLab: https://salsa.debian.org/med-team/seqtk/commit/ba538534001ad07ceffd5ca1157bc37df5f2ece2
--
View it on GitLab: https://salsa.debian.org/med-team/seqtk/commit/ba538534001ad07ceffd5ca1157bc37df5f2ece2
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180920/ed518564/attachment-0001.html>
More information about the debian-med-commit
mailing list