[med-svn] samtools 02/03: New upstream version 1.6
Andreas Tille
tille at debian.org
Mon Dec 11 14:22:01 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository samtools.
commit 8abd692651ac07ea7138fe705c83b3253081f3be
Author: Andreas Tille <tille at debian.org>
Date: Mon Dec 11 15:18:48 2017 +0100
New upstream version 1.6
---
.appveyor.yml | 52 ++
INSTALL | 36 ++
Makefile | 34 +-
NEWS | 27 +
README | 4 +-
bam.h | 2 +-
bam_lpileup.c | 1 +
bam_markdup.c | 844 +++++++++++++++++++++++++++++
bam_mate.c | 50 +-
bam_plcmd.c | 22 +
bam_sort.c | 494 ++++++++++++-----
bamtk.c | 15 +
bedidx.c | 4 -
config.h.in | 50 --
configure.ac | 34 +-
debian/NEWS | 9 -
debian/README.Debian | 20 -
debian/README.source | 4 -
debian/ax_with_curses.m4 | 578 --------------------
debian/ax_with_htslib.m4 | 140 -----
debian/changelog | 362 -------------
debian/compat | 1 -
debian/control | 46 --
debian/copyright | 75 ---
debian/gbp.conf | 11 -
debian/mans/ace2sam.1 | 24 -
debian/mans/samtools.pl.1 | 19 -
debian/mans/wgsim.1 | 44 --
debian/patches/dynamic-build.patch | 17 -
debian/patches/literal_version.patch | 16 -
debian/patches/mayhem.patch | 69 ---
debian/patches/series | 3 -
debian/patches/spelling.patch | 15 -
debian/reference | 12 -
debian/rules | 33 --
debian/samtools-Dockerfile | 84 ---
debian/samtools-docker.yml | 5 -
debian/samtools-faidx.cwl | 78 ---
debian/samtools-index.cwl | 86 ---
debian/samtools-metadata.yaml | 73 ---
debian/samtools-rmdup.cwl | 75 ---
debian/samtools-sort.cwl | 106 ----
debian/samtools-test.install | 1 -
debian/samtools-view.cwl | 242 ---------
debian/samtools.bash-completion | 21 -
debian/samtools.docs | 2 -
debian/samtools.install | 4 -
debian/samtools.lintian-overrides | 10 -
debian/source/format | 1 -
debian/source/options | 2 -
debian/tests/control | 3 -
debian/tests/samtools-test | 7 -
debian/upstream/metadata | 29 -
debian/watch | 3 -
dict.c | 4 +
misc/blast2sam.pl | 2 +-
misc/maq2sam.c | 5 +-
misc/wgsim.1 | 2 +-
misc/wgsim.c | 6 +-
padding.c | 31 +-
phase.c | 1 +
sam_view.c | 33 +-
samtools.1 | 60 +-
test/bam2fq/9.1.fq.expected | 28 +
test/bam2fq/9.2.fq.expected | 12 +
test/dat/bam2fq.703.sam | 15 +
test/markdup/1_name_sort.expected.sam | 0
test/markdup/1_name_sort.expected.sam.err | 1 +
test/markdup/1_name_sort.sam | 18 +
test/markdup/2_bad_order.expected.sam | 7 +
test/markdup/2_bad_order.expected.sam.err | 1 +
test/markdup/2_bad_order.sam | 19 +
test/markdup/3_missing_mc.expected.sam | 2 +
test/markdup/3_missing_mc.expected.sam.err | 2 +
test/markdup/3_missing_mc.sam | 18 +
test/markdup/4_missing_ms.expected.sam | 2 +
test/markdup/4_missing_ms.expected.sam.err | 2 +
test/markdup/4_missing_ms.sam | 18 +
test/markdup/5_markdup.expected.sam | 18 +
test/markdup/5_markdup.sam | 18 +
test/markdup/6_remove_dups.expected.sam | 12 +
test/markdup/6_remove_dups.sam | 18 +
test/merge/tag.pg.merge.expected.sam | 24 +-
test/mpileup/regression.sh | 3 +-
test/test.pl | 237 +++++---
version.sh | 13 +
86 files changed, 1985 insertions(+), 2646 deletions(-)
diff --git a/.appveyor.yml b/.appveyor.yml
new file mode 100644
index 0000000..30ee338
--- /dev/null
+++ b/.appveyor.yml
@@ -0,0 +1,52 @@
+# version format.
+# you can use {branch} name in version format too
+# version: 1.0.{build}-{branch}
+version: 'vers.{build}'
+
+# branches to build
+branches:
+ # Whitelist
+ only:
+ - develop
+
+ # Blacklist
+ except:
+ - gh-pages
+
+# Do not build on tags (GitHub and BitBucket)
+skip_tags: true
+
+# Skipping commits affecting specific files (GitHub only). More details here: /docs/appveyor-yml
+#skip_commits:
+# files:
+# - docs/*
+# - '**/*.html'
+
+# We use Mingw/Msys, so use pacman for installs
+install:
+ - set HOME=.
+ - set MSYSTEM=MINGW64
+ - set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
+ - set MINGWPREFIX=x86_64-w64-mingw32
+ - "sh -lc \"pacman -S --noconfirm --needed base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2 mingw-w64-x86_64-xz mingw-w64-x86_64-curl\""
+
+# The user may have e.g. jkbonfield/samtools branch FOO and an associated
+# jkbonfield/htslib branch FOO. If so use that related htslib, obtained by
+# munging $APPVEYOR_REPO_NAME. Otherwise we assume this is a PR only to
+# samtools and should be linked against samtools(org)/htslib develop branch.
+clone_script:
+ - "sh -lc \"git clone --branch=$APPVEYOR_REPO_BRANCH https://github.com/$APPVEYOR_REPO_NAME $APPVEYOR_BUILD_FOLDER\""
+ - "sh -lc \"git clone --branch=$APPVEYOR_REPO_BRANCH https://github.com/`echo $APPVEYOR_REPO_NAME|sed 's#/samtools#/htslib#'`.git $APPVEYOR_BUILD_FOLDER/htslib || git clone https://github.com/samtools/htslib.git $APPVEYOR_BUILD_FOLDER/htslib \""
+
+build_script:
+ - set HOME=.
+ - set MSYSTEM=MINGW64
+ - set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
+ - "sh -lc \"(cd htslib; aclocal && autoheader && autoconf)\""
+ - "sh -lc \"aclocal && autoheader && autoconf && ./configure && make -j2\""
+
+test_script:
+ - set HOME=.
+ - set MSYSTEM=MINGW64
+ - set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
+ - "sh -lc \"make test\""
diff --git a/INSTALL b/INSTALL
index dd74794..b9742d3 100644
--- a/INSTALL
+++ b/INSTALL
@@ -7,10 +7,46 @@ a curses or GNU ncurses library <http://www.gnu.org/software/ncurses/>.
The bzip2 and liblzma dependencies can be removed if full CRAM support
is not needed - see HTSlib's INSTALL file for details.
+The following programs are required:
+
+ GNU make
+ C compiler (e.g. gcc or clang)
+
+In addition, building the configure script requires:
+
+ autoheader
+ autoconf
+
+Running the configure script uses awk, along with a number of
+standard UNIX tools (cat, cp, grep, mv, rm, sed, among others). Almost
+all installations will have these already.
+
+Running the test harness (make test) uses:
+
+ bash
+ perl
+
If you are unsure about this, be sure to use './configure' to determine
whether you have these libraries and to help diagnose which packages may
need to be installed on your build machine to provide them.
+Building Configure
+==================
+
+This step is only needed if configure.ac has been changed, or if configure
+does not exist (for example, when building from a git clone). The
+configure script and config.h.in can be built by running:
+
+ autoheader
+ autoconf -Wno-syntax
+
+If you have a full GNU autotools install, you can alternatively run:
+
+ autoreconf
+
+When running these tools, you may see warnings about AC_CONFIG_SUBDIRS.
+These are expected, and should be ignored.
+
Basic Installation
==================
diff --git a/Makefile b/Makefile
index 36be11d..59def9a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
# Makefile for samtools, utilities for the Sequence Alignment/Map format.
#
-# Copyright (C) 2008-2016 Genome Research Ltd.
+# Copyright (C) 2008-2017 Genome Research Ltd.
# Portions copyright (C) 2010-2012 Broad Institute.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -38,7 +38,7 @@ AOBJS= bam_index.o bam_plcmd.o sam_view.o \
cut_target.o phase.o bam2depth.o padding.o bedcov.o bamshuf.o \
faidx.o dict.o stats.o stats_isize.o bam_flags.o bam_split.o \
bam_tview.o bam_tview_curses.o bam_tview_html.o bam_lpileup.o \
- bam_quickcheck.o bam_addrprg.o
+ bam_quickcheck.o bam_addrprg.o bam_markdup.o
prefix = /usr/local
exec_prefix = $(prefix)
@@ -102,20 +102,12 @@ config.h:
include config.mk
-
-PACKAGE_VERSION = 1.5
-
-# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
-# description of the working tree: either a release tag with the same value
-# as $(PACKAGE_VERSION) above, or an exact description likely based on a tag.
-# $(shell), :=, etc are GNU Make-specific. If you don't have GNU Make,
-# comment out this conditional.
-ifneq "$(wildcard .git)" ""
-PACKAGE_VERSION := $(shell git describe --always --dirty)
+# If not using GNU make, you need to copy the version number from version.sh
+# into here.
+PACKAGE_VERSION = $(shell ./version.sh)
# Force version.h to be remade if $(PACKAGE_VERSION) has changed.
version.h: $(if $(wildcard version.h),$(if $(findstring "$(PACKAGE_VERSION)",$(shell cat version.h)),,force))
-endif
# If you don't have GNU Make but are building from a Git repository, you may
# wish to replace this with a rule that always rebuilds version.h:
@@ -203,6 +195,7 @@ sam_view.o: sam_view.c config.h $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstri
sample.o: sample.c config.h $(sample_h) $(htslib_khash_h)
stats_isize.o: stats_isize.c config.h stats_isize.h $(htslib_khash_h)
stats.o: stats.c config.h $(htslib_faidx_h) $(htslib_sam_h) $(htslib_hts_h) sam_header.h $(htslib_khash_str2int_h) samtools.h $(htslib_khash_h) $(htslib_kstring_h) stats_isize.h $(sam_opts_h)
+bam_markdup.o: bam_markdup.c config.h $(htslib_sam_h) $(sam_opts_h) samtools.h $(bam_h) $(htslib_khash_h)
# test programs
@@ -210,8 +203,11 @@ stats.o: stats.c config.h $(htslib_faidx_h) $(htslib_sam_h) $(htslib_hts_h) sam_
# For tests that might use it, set $REF_PATH explicitly to use only reference
# areas within the test suite (or set it to ':' to use no reference areas).
# (regression.sh sets $REF_PATH to a subdirectory itself.)
+#
+# If using MSYS, avoid poor shell expansion via:
+# MSYS2_ARG_CONV_EXCL="*" make check
check test: samtools $(BGZIP) $(TEST_PROGRAMS)
- REF_PATH=: test/test.pl --exec bgzip=$(BGZIP)
+ REF_PATH=: test/test.pl --exec bgzip=$(BGZIP) $${TEST_OPTS:-}
test/merge/test_bam_translate test/merge/test_bam_translate.tmp
test/merge/test_rtrans_build
test/merge/test_trans_tbl_init
@@ -277,18 +273,18 @@ misc/md5fa: misc/md5fa.o $(HTSLIB)
misc/md5sum-lite: misc/md5sum-lite.o $(HTSLIB)
$(CC) $(ALL_LDFLAGS) -o $@ misc/md5sum-lite.o $(HTSLIB_LIB) $(ALL_LIBS)
-misc/wgsim: misc/wgsim.o
- $(CC) $(LDFLAGS) -o $@ misc/wgsim.o -lm $(ALL_LIBS)
+misc/wgsim: misc/wgsim.o $(HTSLIB)
+ $(CC) $(ALL_LDFLAGS) -o $@ misc/wgsim.o -lm $(HTSLIB_LIB) $(ALL_LIBS)
misc/ace2sam.o: misc/ace2sam.c config.h $(htslib_kstring_h) $(htslib_kseq_h)
misc/md5fa.o: misc/md5fa.c config.h $(htslib_kseq_h) $(htslib_hts_h)
misc/md5sum-lite.o: misc/md5sum-lite.c config.h $(htslib_hts_h)
-misc/wgsim.o: misc/wgsim.c config.h $(htslib_kseq_h)
+misc/wgsim.o: misc/wgsim.c config.h version.h $(htslib_kseq_h)
-misc/maq2sam-short.o: misc/maq2sam.c config.h
+misc/maq2sam-short.o: misc/maq2sam.c config.h version.h
$(CC) $(CFLAGS) $(ALL_CPPFLAGS) -c -o $@ misc/maq2sam.c
-misc/maq2sam-long.o: misc/maq2sam.c config.h
+misc/maq2sam-long.o: misc/maq2sam.c config.h version.h
$(CC) $(CFLAGS) -DMAQ_LONGREADS $(ALL_CPPFLAGS) -c -o $@ misc/maq2sam.c
diff --git a/NEWS b/NEWS
index 742755f..f674dab 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,30 @@
+Release 1.6 (28th September 2017)
+--------------------
+
+* Added new markdup sub-command and '-m' option for fixmate. Used together,
+ they allow duplicates to be marked and optionally removed. This
+ fixes a number of problems with the old 'rmdup' sub-command, for
+ example samtools issue #497. 'rmdup' is kept for backwards compatibility
+ but 'markdup' should be used in preference.
+
+* Sort is now much better at keeping within the requested memory limit. It
+ should also be slightly faster and need fewer temporary files when the file
+ to be sorted does not fit in memory. (#593; thanks to Nathan Weeks.)
+
+* Sort no longer rewrites the header when merging from files. It can also
+ now merge from memory, so fewer temporary files need to be written and
+ it is better at sorting in parallel when everything fits in memory.
+
+* Both sort and merge now resolve ties when merging based on the position
+ in the input file(s). This makes them fully stable for all ordering
+ options. (Previously position sort was stable, but name and by tag
+ sorts were not).
+
+* New --output-qname option for mpileup.
+
+* Support for building on Windows using msys2/mingw64 or cygwin has
+ been improved.
+
Release 1.5 [Solstice Release] (21st June 2017)
--------------------
diff --git a/README b/README
index bc5e50f..f111fa5 100644
--- a/README
+++ b/README
@@ -9,7 +9,7 @@ Building samtools
The typical simple case of building Samtools using the HTSlib bundled within
this Samtools release tarball is done as follows:
- cd .../samtools-1.5 # Within the unpacked release directory
+ cd .../samtools-1.6 # Within the unpacked release directory
./configure
make
@@ -21,7 +21,7 @@ install samtools etc properly into a directory of your choosing. Building for
installation using the HTSlib bundled within this Samtools release tarball,
and building the various HTSlib utilities such as bgzip is done as follows:
- cd .../samtools-1.5 # Within the unpacked release directory
+ cd .../samtools-1.6 # Within the unpacked release directory
./configure --prefix=/path/to/location
make all all-htslib
make install install-htslib
diff --git a/bam.h b/bam.h
index 48388b7..2120875 100644
--- a/bam.h
+++ b/bam.h
@@ -38,7 +38,7 @@ DEALINGS IN THE SOFTWARE. */
@copyright Genome Research Ltd.
*/
-#define BAM_VERSION "1.5"
+#define BAM_VERSION "1.6"
#include <stdint.h>
#include <stdlib.h>
diff --git a/bam_lpileup.c b/bam_lpileup.c
index e20cc92..cc7a75b 100644
--- a/bam_lpileup.c
+++ b/bam_lpileup.c
@@ -29,6 +29,7 @@ DEALINGS IN THE SOFTWARE. */
#include <assert.h>
#include "bam_plbuf.h"
#include "bam_lpileup.h"
+#include "samtools.h"
#include <htslib/ksort.h>
#define TV_GAP 2
diff --git a/bam_markdup.c b/bam_markdup.c
new file mode 100644
index 0000000..cf6a82a
--- /dev/null
+++ b/bam_markdup.c
@@ -0,0 +1,844 @@
+/* bam_markdup.c -- Mark duplicates from a coord sorted file that has gone
+ through fixmates with the mate scoring option on.
+
+ Copyright (C) 2017 Genome Research Ltd.
+
+ Author: Andrew Whitwham <aw7 at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE
+*/
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <zlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include "htslib/thread_pool.h"
+#include "htslib/sam.h"
+#include "sam_opts.h"
+#include "samtools.h"
+#include "htslib/khash.h"
+#include "htslib/klist.h"
+
+typedef struct {
+ int32_t single;
+ int32_t this_ref;
+ int32_t this_coord;
+ int32_t other_ref;
+ int32_t other_coord;
+ int32_t leftmost;
+ int32_t orientation;
+} key_data_t;
+
+typedef struct {
+ bam1_t *p;
+} in_hash_t;
+
+typedef struct {
+ bam1_t *b;
+ int32_t pos;
+ key_data_t pair_key;
+ key_data_t single_key;
+} read_queue_t;
+
+
+
+static khint32_t do_hash(unsigned char *key, khint32_t len);
+
+static khint_t hash_key(key_data_t key) {
+ int i = 0;
+ khint_t hash;
+
+ if (key.single) {
+ unsigned char sig[12];
+
+ memcpy(sig + i, &key.this_ref, 4); i += 4;
+ memcpy(sig + i, &key.this_coord, 4); i += 4;
+ memcpy(sig + i, &key.orientation, 4); i += 4;
+
+ hash = do_hash(sig, i);
+ } else {
+ unsigned char sig[24];
+
+ memcpy(sig + i, &key.this_ref, 4); i += 4;
+ memcpy(sig + i, &key.this_coord, 4); i += 4;
+ memcpy(sig + i, &key.other_ref, 4); i += 4;
+ memcpy(sig + i, &key.other_coord, 4); i += 4;
+ memcpy(sig + i, &key.leftmost, 4); i += 4;
+ memcpy(sig + i, &key.orientation, 4); i += 4;
+
+ hash = do_hash(sig, i);
+ }
+
+ return hash;
+}
+
+
+static int key_equal(key_data_t a, key_data_t b) {
+ int match = 1;
+
+ if (a.this_coord != b.this_coord)
+ match = 0;
+ else if (a.orientation != b.orientation)
+ match = 0;
+ else if (a.this_ref != b.this_ref)
+ match = 0;
+ else if (a.single != b.single)
+ match = 0;
+
+ if (!a.single) {
+ if (a.other_coord != b.other_coord)
+ match = 0;
+ else if (a.leftmost != b.leftmost)
+ match = 0;
+ else if (a.other_ref != b.other_ref)
+ match = 0;
+ }
+
+ return match;
+}
+
+
+#define __free_queue_element(p)
+#define O_FF 2
+#define O_RR 3
+#define O_FR 5
+#define O_RF 7
+
+KHASH_INIT(reads, key_data_t, in_hash_t, 1, hash_key, key_equal) // read map hash
+KLIST_INIT(read_queue, read_queue_t, __free_queue_element) // the reads buffer
+
+
+/* Calculate the mate's unclipped start based on position and cigar string from MC tag. */
+
+static int32_t unclipped_other_start(int32_t op, char *cigar) {
+ char *c = cigar;
+ int32_t clipped = 0;
+
+ while (*c && *c != '*') {
+ long num = 0;
+
+ if (isdigit((int)*c)) {
+ num = strtol(c, &c, 10);
+ } else {
+ num = 1;
+ }
+
+ if (*c == 'S' || *c == 'H') { // clips
+ clipped += num;
+ } else {
+ break;
+ }
+
+ c++;
+ }
+
+ return op - clipped + 1;
+}
+
+
+/* Calculate the current read's start based on the stored cigar string. */
+
+static int32_t unclipped_start(bam1_t *b) {
+ uint32_t *cigar = bam_get_cigar(b);
+ int32_t clipped = 0;
+ uint32_t i;
+
+ for (i = 0; i < b->core.n_cigar; i++) {
+ char c = bam_cigar_opchr(cigar[i]);
+
+ if (c == 'S' || c == 'H') { // clips
+ clipped += bam_cigar_oplen(cigar[i]);
+ } else {
+ break;
+ }
+ }
+
+ return b->core.pos - clipped + 1;
+}
+
+
+/* Calculate the mate's unclipped end based on start position and cigar string from MC tag.*/
+
+static int32_t unclipped_other_end(int32_t op, char *cigar) {
+ char *c = cigar;
+ int32_t refpos = 0;
+ int skip = 1;
+
+ while (*c && *c != '*') {
+ long num = 0;
+
+ if (isdigit((int)*c)) {
+ num = strtol(c, &c, 10);
+ } else {
+ num = 1;
+ }
+
+ switch (*c) {
+ case 'M':
+ case 'D':
+ case 'N':
+ case '=':
+ case 'X':
+ refpos += num;
+ skip = 0; // ignore initial clips
+ break;
+
+ case 'S':
+ case 'H':
+ if (!skip) {
+ refpos += num;
+ }
+ break;
+ }
+
+ c++;
+ }
+
+ return op + refpos;
+}
+
+
+/* Calculate the current read's end based on the stored cigar string. */
+
+static int32_t unclipped_end(bam1_t *b) {
+ uint32_t *cigar = bam_get_cigar(b);
+ int32_t end_pos, clipped = 0;
+ int32_t i;
+
+ end_pos = bam_endpos(b);
+
+ // now get the clipped end bases (if any)
+ // if we get to the beginning of the cigar string
+ // without hitting a non-clip then the results are meaningless
+ for (i = b->core.n_cigar - 1; i >= 0; i--) {
+ char c = bam_cigar_opchr(cigar[i]);
+
+ if (c == 'S' || c == 'H') { // clips
+ clipped += bam_cigar_oplen(cigar[i]);
+ } else {
+ break;
+ }
+ }
+
+ return end_pos + clipped;
+}
+
+
+/* The Bob Jenkins one_at_a_time hash to reduce the key to a 32 bit value. */
+
+static khint32_t do_hash(unsigned char *key, khint32_t len) {
+ khint32_t hash, i;
+
+ for (hash = 0, i = 0; i < len; ++i) {
+ hash += key[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash;
+}
+
+
+/* Get mate score from tag. */
+
+static int64_t get_mate_score(bam1_t *b) {
+ uint8_t *data;
+ int64_t score;
+
+ if ((data = bam_aux_get(b, "ms"))) {
+ score = bam_aux2i(data);
+ } else {
+ fprintf(stderr, "[markdup] error: no ms score tag.\n");
+ return -1;
+ }
+
+ return score;
+}
+
+
+/* Calc current score from quality. */
+
+static int64_t calc_score(bam1_t *b)
+{
+ int64_t score = 0;
+ uint8_t *qual = bam_get_qual(b);
+ int i;
+
+ for (i = 0; i < b->core.l_qseq; i++) {
+ if (qual[i] >= 15) score += qual[i];
+ }
+
+ return score;
+}
+
+
+/* Create a signature hash of the current read and its pair.
+ Uses the unclipped start (or end depending on orientation),
+ the reference id, orientation and whether the current
+ read is leftmost of the pair. */
+
+static int make_pair_key(key_data_t *key, bam1_t *bam) {
+ int32_t this_ref, this_coord, this_end;
+ int32_t other_ref, other_coord, other_end;
+ int32_t orientation, leftmost;
+ uint8_t *data;
+ char *cig;
+
+ this_ref = bam->core.tid + 1; // avoid a 0 being put into the hash
+ other_ref = bam->core.mtid + 1;
+
+ this_coord = unclipped_start(bam);
+ this_end = unclipped_end(bam);
+
+ if ((data = bam_aux_get(bam, "MC"))) {
+ cig = bam_aux2Z(data);
+ other_end = unclipped_other_end(bam->core.mpos, cig);
+ other_coord = unclipped_other_start(bam->core.mpos, cig);
+ } else {
+ fprintf(stderr, "[markdup] error: no MC tag.\n");
+ return 1;
+ }
+
+ // work out orientations
+ if (this_ref != other_ref) {
+ leftmost = this_ref < other_ref;
+ } else {
+ if (bam_is_rev(bam) == bam_is_mrev(bam)) {
+ if (!bam_is_rev(bam)) {
+ leftmost = this_coord <= other_coord;
+ } else {
+ leftmost = this_end <= other_end;
+ }
+ } else {
+ if (bam_is_rev(bam)) {
+ leftmost = this_end <= other_coord;
+ } else {
+ leftmost = this_coord <= other_end;
+ }
+ }
+ }
+
+ // pair orientation
+ if (leftmost) {
+ if (bam_is_rev(bam) == bam_is_mrev(bam)) {
+ other_coord = other_end;
+
+ if (!bam_is_rev(bam)) {
+ if (bam->core.flag & BAM_FREAD1) {
+ orientation = O_FF;
+ } else {
+ orientation = O_RR;
+ }
+ } else {
+ if (bam->core.flag & BAM_FREAD1) {
+ orientation = O_RR;
+ } else {
+ orientation = O_FF;
+ }
+ }
+ } else {
+ if (!bam_is_rev(bam)) {
+ orientation = O_FR;
+ other_coord = other_end;
+ } else {
+ orientation = O_RF;
+ this_coord = this_end;
+ }
+ }
+ } else {
+ if (bam_is_rev(bam) == bam_is_mrev(bam)) {
+ this_coord = this_end;
+
+ if (!bam_is_rev(bam)) {
+ if (bam->core.flag & BAM_FREAD1) {
+ orientation = O_RR;
+ } else {
+ orientation = O_FF;
+ }
+ } else {
+ if (bam->core.flag & BAM_FREAD1) {
+ orientation = O_FF;
+ } else {
+ orientation = O_RR;
+ }
+ }
+ } else {
+ if (!bam_is_rev(bam)) {
+ orientation = O_RF;
+ other_coord = other_end;
+ } else {
+ orientation = O_FR;
+ this_coord = this_end;
+ }
+ }
+ }
+
+ if (!leftmost)
+ leftmost = 13;
+ else
+ leftmost = 11;
+
+ key->single = 0;
+ key->this_ref = this_ref;
+ key->this_coord = this_coord;
+ key->other_ref = other_ref;
+ key->other_coord = other_coord;
+ key->leftmost = leftmost;
+ key->orientation = orientation;
+
+ return 0;
+}
+
+
+/* Create a signature hash of single read (or read with an unmatched pair).
+ Uses unclipped start (or end depending on orientation), reference id,
+ and orientation. */
+
+static void make_single_key(key_data_t *key, bam1_t *bam) {
+ int32_t this_ref, this_coord;
+ int32_t orientation;
+
+ this_ref = bam->core.tid + 1; // avoid a 0 being put into the hash
+
+ if (bam_is_rev(bam)) {
+ this_coord = unclipped_end(bam);
+ orientation = O_RR;
+ } else {
+ this_coord = unclipped_start(bam);
+ orientation = O_FF;
+ }
+
+ key->single = 1;
+ key->this_ref = this_ref;
+ key->this_coord = this_coord;
+ key->orientation = orientation;
+}
+
+
+/* Compare the reads near each other (coordinate sorted) and try to spot the duplicates.
+ Generally the highest quality scoring is chosen as the original and all others the duplicates.
+ The score is based on the sum of the quality values (<= 15) of the read and its mate (if any).
+ While single reads are compared to only one read of a pair, the pair will chosen as the original.
+ The comparison is done on position and orientation, see above for details. */
+
+static int bam_mark_duplicates(samFile *in, samFile *out, int remove_dups, int32_t max_length, int do_stats) {
+ bam_hdr_t *header;
+ khiter_t k;
+ khash_t(reads) *pair_hash = kh_init(reads);
+ khash_t(reads) *single_hash = kh_init(reads);
+ klist_t(read_queue) *read_buffer = kl_init(read_queue);
+ kliter_t(read_queue) *rq;
+ int32_t prev_tid, prev_coord;
+ read_queue_t *in_read;
+ int ret;
+ int reading, writing, excluded, duplicate, single, pair, single_dup, examined;
+
+ if ((header = sam_hdr_read(in)) == NULL) {
+ fprintf(stderr, "[markdup] error reading header\n");
+ return 1;
+ }
+
+ // accept unknown, unsorted or coordinate sort order, but error on queryname sorted.
+ // only really works on coordinate sorted files.
+ if ((header->l_text > 3) && (strncmp(header->text, "@HD", 3) == 0)) {
+ char *p, *q;
+
+ p = strstr(header->text, "\tSO:queryname");
+ q = strchr(header->text, '\n');
+
+ // looking for SO:queryname within @HD only
+ // (e.g. must ignore in a @CO comment line later in header)
+ if ((p != 0) && (p < q)) {
+ fprintf(stderr, "[markdup] error: queryname sorted, must be sorted by coordinate.\n");
+ return 1;
+ }
+ }
+
+ if (sam_hdr_write(out, header) < 0) {
+ fprintf(stderr, "[markdup] error writing header.\n");
+ return 1;
+ }
+
+ // used for coordinate order checks
+ prev_tid = prev_coord = 0;
+
+ // get the buffer going
+ in_read = kl_pushp(read_queue, read_buffer);
+
+
+ if ((in_read->b = bam_init1()) == NULL) {
+ fprintf(stderr, "[markdup] error: unable to allocate memory for alignment.\n");
+ return 1;
+ }
+
+ reading = writing = excluded = single_dup = duplicate = examined = pair = single = 0;
+
+ while ((ret = sam_read1(in, header, in_read->b)) >= 0) {
+
+ // do some basic coordinate order checks
+ if (in_read->b->core.tid >= 0) { // -1 for unmapped reads
+ if (in_read->b->core.tid < prev_tid ||
+ ((in_read->b->core.tid == prev_tid) && (in_read->b->core.pos < prev_coord))) {
+ fprintf(stderr, "[markdup] error: bad coordinate order.\n");
+ return 1;
+ }
+ }
+
+ prev_coord = in_read->pos = in_read->b->core.pos;
+ prev_tid = in_read->b->core.tid;
+ in_read->pair_key.single = 1;
+ in_read->single_key.single = 0;
+
+ reading++;
+
+ // read must not be secondary, supplementary, unmapped or failed QC
+ if (!(in_read->b->core.flag & (BAM_FSECONDARY | BAM_FSUPPLEMENTARY | BAM_FUNMAP | BAM_FQCFAIL))) {
+ examined++;
+
+ // look at the pairs first
+ if ((in_read->b->core.flag & BAM_FPAIRED) && !(in_read->b->core.flag & BAM_FMUNMAP)) {
+ int ret, mate_tmp;
+ key_data_t pair_key;
+ key_data_t single_key;
+ in_hash_t *bp;
+
+ if (make_pair_key(&pair_key, in_read->b)) {
+ fprintf(stderr, "[markdup] error: unable to assign pair hash key.\n");
+ return 1;
+ }
+
+ make_single_key(&single_key, in_read->b);
+
+ pair++;
+ in_read->pos = single_key.this_coord; // cigar/orientation modified pos
+
+ // put in singles hash for checking against non paired reads
+ k = kh_put(reads, single_hash, single_key, &ret);
+
+ if (ret > 0) { // new
+ // add to single duplicate hash
+ bp = &kh_val(single_hash, k);
+ bp->p = in_read->b;
+ in_read->single_key = single_key;
+ } else if (ret == 0) { // exists
+ // look at singles only for duplication marking
+ bp = &kh_val(single_hash, k);
+
+ if (!(bp->p->core.flag & BAM_FPAIRED) || (bp->p->core.flag & BAM_FMUNMAP)) {
+ bam1_t *dup = bp->p;
+
+ // singleton will always be marked duplicate even if
+ // scores more than one read of the pair
+
+ bp->p = in_read->b;
+ dup->core.flag |= BAM_FDUP;
+ single_dup++;
+ }
+ } else {
+ fprintf(stderr, "[markdup] error: single hashing failure.\n");
+ return 1;
+ }
+
+ // now do the pair
+ k = kh_put(reads, pair_hash, pair_key, &ret);
+
+ if (ret > 0) { // new
+ // add to the pair hash
+ bp = &kh_val(pair_hash, k);
+ bp->p = in_read->b;
+ in_read->pair_key = pair_key;
+ } else if (ret == 0) {
+ int64_t old_score, new_score, tie_add = 0;
+ bam1_t *dup;
+
+ bp = &kh_val(pair_hash, k);
+
+ if ((mate_tmp = get_mate_score(bp->p)) == -1) {
+ fprintf(stderr, "[markdup] error: no ms score tag.\n");
+ return 1;
+ } else {
+ old_score = calc_score(bp->p) + mate_tmp;
+ }
+
+ if ((mate_tmp = get_mate_score(in_read->b)) == -1) {
+ fprintf(stderr, "[markdup] error: no ms score tag.\n");
+ return 1;
+ } else {
+ new_score = calc_score(in_read->b) + mate_tmp;
+ }
+
+ // choose the highest score as the original
+ // and add it to the pair hash, mark the other as duplicate
+
+ if (new_score == old_score) {
+ if (strcmp(bam_get_qname(in_read->b), bam_get_qname(bp->p)) < 0) {
+ tie_add = 1;
+ } else {
+ tie_add = -1;
+ }
+ }
+
+ if (new_score + tie_add > old_score) { // swap reads
+ dup = bp->p;
+ bp->p = in_read->b;
+ } else {
+ dup = in_read->b;
+ }
+
+ dup->core.flag |= BAM_FDUP;
+
+ duplicate++;
+ } else {
+ fprintf(stderr, "[markdup] error: pair hashing failure.\n");
+ return 1;
+ }
+ } else { // do the single (or effectively single) reads
+ int ret;
+ key_data_t single_key;
+ in_hash_t *bp;
+
+ make_single_key(&single_key, in_read->b);
+
+ single++;
+ in_read->pos = single_key.this_coord; // cigar/orientation modified pos
+
+ k = kh_put(reads, single_hash, single_key, &ret);
+
+ if (ret > 0) { // new
+ bp = &kh_val(single_hash, k);
+ bp->p = in_read->b;
+ in_read->single_key = single_key;
+ } else if (ret == 0) { // exists
+ bp = &kh_val(single_hash, k);
+
+ if ((bp->p->core.flag & BAM_FPAIRED) && !(bp->p->core.flag & BAM_FMUNMAP)) {
+ // if matched against one of a pair just mark as duplicate
+ in_read->b->core.flag |= BAM_FDUP;
+ } else {
+ int64_t old_score, new_score;
+ bam1_t *dup;
+
+ old_score = calc_score(bp->p);
+ new_score = calc_score(in_read->b);
+
+ // choose the highest score as the original, add it
+ // to the single hash and mark the other as duplicate
+ if (new_score > old_score) { // swap reads
+ dup = bp->p;
+ bp->p = in_read->b;
+ } else {
+ dup = in_read->b;
+ }
+
+ dup->core.flag |= BAM_FDUP;
+ }
+
+ single_dup++;
+ } else {
+ fprintf(stderr, "[markdup] error: single hashing failure.\n");
+ return 1;
+ }
+ }
+ } else {
+ excluded++;
+ }
+
+ // loop through the stored reads and write out those we
+ // no longer need
+ rq = kl_begin(read_buffer);
+ while (rq != kl_end(read_buffer)) {
+ in_read = &kl_val(rq);
+
+ /* keep a moving window of reads based on coordinates and max read length. Any unaligned reads
+ should just be written as they cannot be matched as duplicates. */
+ if (in_read->pos + max_length > prev_coord && in_read->b->core.tid == prev_tid && (prev_tid != -1 || prev_coord != -1)) {
+ break;
+ }
+
+ if (!remove_dups || !(in_read->b->core.flag & BAM_FDUP)) {
+ if (sam_write1(out, header, in_read->b) < 0) {
+ fprintf(stderr, "[markdup] error: writing output failed.\n");
+ return 1;
+ }
+
+ writing++;
+ }
+
+ // remove from hash
+ if (in_read->pair_key.single == 0) {
+ k = kh_get(reads, pair_hash, in_read->pair_key);
+ kh_del(reads, pair_hash, k);
+ }
+
+ if (in_read->single_key.single == 1) {
+ k = kh_get(reads, single_hash, in_read->single_key);
+ kh_del(reads, single_hash, k);
+ }
+
+ kl_shift(read_queue, read_buffer, NULL);
+ bam_destroy1(in_read->b);
+ rq = kl_begin(read_buffer);
+ }
+
+ // set the next one up for reading
+ in_read = kl_pushp(read_queue, read_buffer);
+
+ if ((in_read->b = bam_init1()) == NULL) {
+ fprintf(stderr, "[markdup] error: unable to allocate memory for alignment.\n");
+ return 1;
+ }
+ }
+
+ if (ret < -1) {
+ fprintf(stderr, "[markdup] error: truncated input file.\n");
+ return 1;
+ }
+
+ // write out the end of the list
+ rq = kl_begin(read_buffer);
+ while (rq != kl_end(read_buffer)) {
+ in_read = &kl_val(rq);
+
+ if (bam_get_qname(in_read->b)) { // last entry will be blank
+ if (!remove_dups || !(in_read->b->core.flag & BAM_FDUP)) {
+ if (sam_write1(out, header, in_read->b) < 0) {
+ fprintf(stderr, "[markdup] error: writing final output failed.\n");
+ return 1;
+ }
+
+ writing++;
+ }
+ }
+
+ kl_shift(read_queue, read_buffer, NULL);
+ bam_destroy1(in_read->b);
+ rq = kl_begin(read_buffer);
+ }
+
+ if (do_stats) {
+ fprintf(stderr, "READ %d WRITTEN %d \n"
+ "EXCLUDED %d EXAMINED %d\n"
+ "PAIRED %d SINGLE %d\n"
+ "DULPICATE PAIR %d DUPLICATE SINGLE %d\n"
+ "DUPLICATE TOTAL %d\n", reading, writing, excluded, examined, pair, single,
+ duplicate, single_dup, single_dup + duplicate);
+ }
+
+ kh_destroy(reads, pair_hash);
+ kh_destroy(reads, single_hash);
+ kl_destroy(read_queue, read_buffer);
+ bam_hdr_destroy(header);
+
+ return 0;
+}
+
+
+static int markdup_usage(void) {
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Usage: samtools markdup <input.bam> <output.bam>\n\n");
+ fprintf(stderr, "Option: \n");
+ fprintf(stderr, " -r Remove duplicate reads\n");
+ fprintf(stderr, " -l Max read length (default 300 bases)\n");
+ fprintf(stderr, " -s Report stats.\n");
+
+ sam_global_opt_help(stderr, "-.O..@");
+
+ fprintf(stderr, "\nThe input file must be coordinate sorted and must have gone"
+ " through fixmates with the mate scoring option on.\n");
+
+ return 1;
+}
+
+
+int bam_markdup(int argc, char **argv) {
+ int c, ret, remove_dups = 0, report_stats = 0;
+ int32_t max_length = 300;
+ samFile *in = NULL, *out = NULL;
+ char wmode[3] = {'w', 'b', 0};
+ sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
+ htsThreadPool p = {NULL, 0};
+
+ static const struct option lopts[] = {
+ SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
+ {NULL, 0, NULL, 0}
+ };
+
+ while ((c = getopt_long(argc, argv, "rsl:O:@:", lopts, NULL)) >= 0) {
+ switch (c) {
+ case 'r': remove_dups = 1; break;
+ case 'l': max_length = atoi(optarg); break;
+ case 's': report_stats = 1; break;
+ default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
+ /* else fall-through */
+ case '?': return markdup_usage();
+ }
+ }
+
+ if (optind + 2 > argc)
+ return markdup_usage();
+
+ in = sam_open_format(argv[optind], "r", &ga.in);
+
+ if (!in) {
+ print_error_errno("markdup", "failed to open \"%s\" for input", argv[optind]);
+ return 1;
+ }
+
+ sam_open_mode(wmode + 1, argv[optind + 1], NULL);
+ out = sam_open_format(argv[optind + 1], wmode, &ga.out);
+
+ if (!out) {
+ print_error_errno("markdup", "failed to open \"%s\" for output", argv[optind + 1]);
+ return 1;
+ }
+
+ if (ga.nthreads > 0) {
+ if (!(p.pool = hts_tpool_init(ga.nthreads))) {
+ fprintf(stderr, "[markdup] error creating thread pool\n");
+ return 1;
+ }
+
+ hts_set_opt(in, HTS_OPT_THREAD_POOL, &p);
+ hts_set_opt(out, HTS_OPT_THREAD_POOL, &p);
+ }
+
+ // actual stuff happens here
+ ret = bam_mark_duplicates(in, out, remove_dups, max_length, report_stats);
+
+ sam_close(in);
+
+ if (sam_close(out) < 0) {
+ fprintf(stderr, "[markdup] error closing output file\n");
+ ret = 1;
+ }
+
+ if (p.pool) hts_tpool_destroy(p.pool);
+
+ sam_global_args_free(&ga);
+
+ return ret;
+}
diff --git a/bam_mate.c b/bam_mate.c
index 75c2f51..1d6c55f 100644
--- a/bam_mate.c
+++ b/bam_mate.c
@@ -218,8 +218,39 @@ static int sync_mate(bam1_t* a, bam1_t* b)
return 0;
}
+
+static uint32_t calc_mate_score(bam1_t *b)
+{
+ uint32_t score = 0;
+ uint8_t *qual = bam_get_qual(b);
+ int i;
+
+ for (i = 0; i < b->core.l_qseq; i++) {
+ if (qual[i] >= 15) score += qual[i];
+ }
+
+ return score;
+}
+
+
+static int add_mate_score(bam1_t *src, bam1_t *dest)
+{
+ uint8_t *data_ms;
+ uint32_t mate_score = calc_mate_score(src);
+
+ if ((data_ms = bam_aux_get(dest, "ms")) != NULL) {
+ bam_aux_del(dest, data_ms);
+ }
+
+ if (bam_aux_append(dest, "ms", 'i', sizeof(uint32_t), (uint8_t*)&mate_score) == -1) {
+ return -1;
+ }
+
+ return 0;
+}
+
// currently, this function ONLY works if each read has one hit
-static int bam_mating_core(samFile* in, samFile* out, int remove_reads, int proper_pair_check, int add_ct)
+static int bam_mating_core(samFile *in, samFile *out, int remove_reads, int proper_pair_check, int add_ct, int do_mate_scoring)
{
bam_hdr_t *header;
bam1_t *b[2] = { NULL, NULL };
@@ -295,6 +326,13 @@ static int bam_mating_core(samFile* in, samFile* out, int remove_reads, int prop
cur->core.flag &= ~BAM_FPROPER_PAIR;
}
+ if (do_mate_scoring) {
+ if ((add_mate_score(pre, cur) == -1) || (add_mate_score(cur, pre) == -1)) {
+ fprintf(stderr, "[bam_mating_core] ERROR: unable to add mate score.\n");
+ goto fail;
+ }
+ }
+
// Write out result
if ( !remove_reads ) {
if (sam_write1(out, header, pre) < 0) goto write_fail;
@@ -361,7 +399,8 @@ void usage(FILE* where)
"Options:\n"
" -r Remove unmapped reads and secondary alignments\n"
" -p Disable FR proper pair check\n"
-" -c Add template cigar ct tag\n");
+" -c Add template cigar ct tag\n"
+" -m Add mate score tag\n");
sam_global_opt_help(where, "-.O..@");
@@ -376,7 +415,7 @@ int bam_mating(int argc, char *argv[])
{
htsThreadPool p = {NULL, 0};
samFile *in = NULL, *out = NULL;
- int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1;
+ int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1, mate_score = 0;
sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
char wmode[3] = {'w', 'b', 0};
static const struct option lopts[] = {
@@ -386,11 +425,12 @@ int bam_mating(int argc, char *argv[])
// parse args
if (argc == 1) { usage(stdout); return 0; }
- while ((c = getopt_long(argc, argv, "rpcO:@:", lopts, NULL)) >= 0) {
+ while ((c = getopt_long(argc, argv, "rpcmO:@:", lopts, NULL)) >= 0) {
switch (c) {
case 'r': remove_reads = 1; break;
case 'p': proper_pair_check = 0; break;
case 'c': add_ct = 1; break;
+ case 'm': mate_score = 1; break;
default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
/* else fall-through */
case '?': usage(stderr); goto fail;
@@ -419,7 +459,7 @@ int bam_mating(int argc, char *argv[])
}
// run
- res = bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
+ res = bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct, mate_score);
// cleanup
sam_close(in);
diff --git a/bam_plcmd.c b/bam_plcmd.c
index d17e9d6..d451ffd 100644
--- a/bam_plcmd.c
+++ b/bam_plcmd.c
@@ -113,6 +113,7 @@ static inline void pileup_seq(FILE *fp, const bam_pileup1_t *p, int pos, int ref
#define MPLP_PRINT_MAPQ (1<<10)
#define MPLP_PER_SAMPLE (1<<11)
#define MPLP_SMART_OVERLAPS (1<<12)
+#define MPLP_PRINT_QNAME (1<<13)
void *bed_read(const char *fn);
void bed_destroy(void *_h);
@@ -220,6 +221,7 @@ print_empty_pileup(FILE *fp, const mplp_conf_t *conf, const char *tname,
fputs("\t0\t*\t*", fp);
if (conf->flag & MPLP_PRINT_MAPQ) fputs("\t*", fp);
if (conf->flag & MPLP_PRINT_POS) fputs("\t*", fp);
+ if (conf->flag & MPLP_PRINT_QNAME) fputs("\t*", fp);
}
putc('\n', fp);
}
@@ -642,6 +644,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
fputs("*\t*", pileup_fp);
if (conf->flag & MPLP_PRINT_MAPQ) fputs("\t*", pileup_fp);
if (conf->flag & MPLP_PRINT_POS) fputs("\t*", pileup_fp);
+ if (conf->flag & MPLP_PRINT_QNAME) fputs("\t*", pileup_fp);
} else {
int n = 0;
for (j = 0; j < n_plp[i]; ++j) {
@@ -698,6 +701,21 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
}
if (!n) putc('*', pileup_fp);
}
+
+ if (conf->flag & MPLP_PRINT_QNAME) {
+ n = 0;
+ putc('\t', pileup_fp);
+ for (j = 0; j < n_plp[i]; ++j) {
+ const bam_pileup1_t *p = &plp[i][j];
+ int c = bam_get_qual(p->b)[p->qpos];
+ if ( c < conf->min_baseQ ) continue;
+
+ if (n > 0) putc(',', pileup_fp);
+ fputs(bam_get_qname(p->b), pileup_fp);
+ n++;
+ }
+ if (!n) putc('*', pileup_fp);
+ }
}
}
putc('\n', pileup_fp);
@@ -898,6 +916,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
"Output options for mpileup format (without -g/-v):\n"
" -O, --output-BP output base positions on reads\n"
" -s, --output-MQ output mapping quality\n"
+" --output-QNAME output read names\n"
" -a output all positions (including zero depth)\n"
" -a -a (or -aa) output absolutely all positions, including unused ref. sequences\n"
"\n"
@@ -960,6 +979,8 @@ int bam_mpileup(int argc, char *argv[])
{"excl-flags", required_argument, NULL, 2},
{"output", required_argument, NULL, 3},
{"open-prob", required_argument, NULL, 4},
+ {"output-QNAME", no_argument, NULL, 5},
+ {"output-qname", no_argument, NULL, 5},
{"illumina1.3+", no_argument, NULL, '6'},
{"count-orphans", no_argument, NULL, 'A'},
{"bam-list", required_argument, NULL, 'b'},
@@ -1016,6 +1037,7 @@ int bam_mpileup(int argc, char *argv[])
break;
case 3 : mplp.output_fname = optarg; break;
case 4 : mplp.openQ = atoi(optarg); break;
+ case 5 : mplp.flag |= MPLP_PRINT_QNAME; break;
case 'f':
mplp.fai = fai_load(optarg);
if (mplp.fai == NULL) return 1;
diff --git a/bam_sort.c b/bam_sort.c
index d32a241..b1d5898 100644
--- a/bam_sort.c
+++ b/bam_sort.c
@@ -38,7 +38,9 @@ DEALINGS IN THE SOFTWARE. */
#include <getopt.h>
#include <assert.h>
#include <pthread.h>
+#include "htslib/bgzf.h"
#include "htslib/ksort.h"
+#include "htslib/hts_os.h"
#include "htslib/khash.h"
#include "htslib/klist.h"
#include "htslib/kstring.h"
@@ -49,10 +51,10 @@ DEALINGS IN THE SOFTWARE. */
// Struct which contains the a record, and the pointer to the sort tag (if any)
// Used to speed up sort-by-tag.
-typedef struct bam1_p {
- bam1_t *b;
+typedef struct bam1_tag {
+ bam1_t *bam_record;
const uint8_t *tag;
-} bam1_p;
+} bam1_tag;
/* Minimum memory required in megabytes before sort will attempt to run. This
is to prevent accidents where failing to use the -m option correctly results
@@ -122,29 +124,36 @@ static int strnum_cmp(const char *_a, const char *_b)
typedef struct {
int i;
uint64_t pos, idx;
- bam1_p b;
+ bam1_tag entry;
} heap1_t;
-#define __pos_cmp(a, b) ((a).pos > (b).pos || ((a).pos == (b).pos && ((a).i > (b).i || ((a).i == (b).i && (a).idx > (b).idx))))
-
-static inline int bam1_lt_by_tag(const bam1_p a, const bam1_p b);
+static inline int bam1_cmp_by_tag(const bam1_tag a, const bam1_tag b);
// Function to compare reads in the heap and determine which one is < the other
static inline int heap_lt(const heap1_t a, const heap1_t b)
{
+ if (!a.entry.bam_record)
+ return 1;
+ if (!b.entry.bam_record)
+ return 0;
+
if (g_is_by_tag) {
int t;
- if (a.b.b == NULL || b.b.b == NULL) return a.b.b == NULL? 1 : 0;
- t = bam1_lt_by_tag(b.b,a.b);
- return t;
+ t = bam1_cmp_by_tag(a.entry, b.entry);
+ if (t != 0) return t > 0;
} else if (g_is_by_qname) {
- int t;
- if (a.b.b == NULL || b.b.b == NULL) return a.b.b == NULL? 1 : 0;
- t = strnum_cmp(bam_get_qname(a.b.b), bam_get_qname(b.b.b));
- return (t > 0 || (t == 0 && (a.b.b->core.flag&0xc0) > (b.b.b->core.flag&0xc0)));
+ int t, fa, fb;
+ t = strnum_cmp(bam_get_qname(a.entry.bam_record), bam_get_qname(b.entry.bam_record));
+ if (t != 0) return t > 0;
+ fa = a.entry.bam_record->core.flag & 0xc0;
+ fb = b.entry.bam_record->core.flag & 0xc0;
+ if (fa != fb) return fa > fb;
} else {
- return __pos_cmp(a, b);
+ if (a.pos != b.pos) return a.pos > b.pos;
}
+ // This compares by position in the input file(s)
+ if (a.i != b.i) return a.i > b.i;
+ return a.idx > b.idx;
}
KSORT_INIT(heap, heap1_t, heap_lt)
@@ -1351,25 +1360,25 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
heap1_t *h = heap + i;
int res;
h->i = i;
- h->b.b = bam_init1();
- h->b.tag = NULL;
- if (!h->b.b) goto mem_fail;
- res = iter[i] ? sam_itr_next(fp[i], iter[i], h->b.b) : sam_read1(fp[i], hdr[i], h->b.b);
+ h->entry.bam_record = bam_init1();
+ h->entry.tag = NULL;
+ if (!h->entry.bam_record) goto mem_fail;
+ res = iter[i] ? sam_itr_next(fp[i], iter[i], h->entry.bam_record) : sam_read1(fp[i], hdr[i], h->entry.bam_record);
if (res >= 0) {
- bam_translate(h->b.b, translation_tbl + i);
- h->pos = ((uint64_t)h->b.b->core.tid<<32) | (uint32_t)((int32_t)h->b.b->core.pos+1)<<1 | bam_is_rev(h->b.b);
+ bam_translate(h->entry.bam_record, translation_tbl + i);
+ h->pos = ((uint64_t)h->entry.bam_record->core.tid<<32) | (uint32_t)((int32_t)h->entry.bam_record->core.pos+1)<<1 | bam_is_rev(h->entry.bam_record);
h->idx = idx++;
if (g_is_by_tag) {
- h->b.tag = bam_aux_get(h->b.b, g_sort_tag);
+ h->entry.tag = bam_aux_get(h->entry.bam_record, g_sort_tag);
} else {
- h->b.tag = NULL;
+ h->entry.tag = NULL;
}
}
else if (res == -1 && (!iter[i] || iter[i]->finished)) {
h->pos = HEAP_EMPTY;
- bam_destroy1(h->b.b);
- h->b.b = NULL;
- h->b.tag = NULL;
+ bam_destroy1(h->entry.bam_record);
+ h->entry.bam_record = NULL;
+ h->entry.tag = NULL;
} else {
print_error(cmd, "failed to read first record from \"%s\"", fn[i]);
goto fail;
@@ -1391,7 +1400,7 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
// Begin the actual merge
ks_heapmake(heap, n, heap);
while (heap->pos != HEAP_EMPTY) {
- bam1_t *b = heap->b.b;
+ bam1_t *b = heap->entry.bam_record;
if (flag & MERGE_RG) {
uint8_t *rg = bam_aux_get(b, "RG");
if (rg) bam_aux_del(b, rg);
@@ -1407,15 +1416,15 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam_is_rev(b);
heap->idx = idx++;
if (g_is_by_tag) {
- heap->b.tag = bam_aux_get(heap->b.b, g_sort_tag);
+ heap->entry.tag = bam_aux_get(heap->entry.bam_record, g_sort_tag);
} else {
- heap->b.tag = NULL;
+ heap->entry.tag = NULL;
}
} else if (j == -1 && (!iter[heap->i] || iter[heap->i]->finished)) {
heap->pos = HEAP_EMPTY;
- bam_destroy1(heap->b.b);
- heap->b.b = NULL;
- heap->b.tag = NULL;
+ bam_destroy1(heap->entry.bam_record);
+ heap->entry.bam_record = NULL;
+ heap->entry.tag = NULL;
} else {
print_error(cmd, "\"%s\" is truncated", fn[heap->i]);
goto fail;
@@ -1459,7 +1468,7 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
if (iter && iter[i]) hts_itr_destroy(iter[i]);
if (hdr && hdr[i]) bam_hdr_destroy(hdr[i]);
if (fp && fp[i]) sam_close(fp[i]);
- if (heap && heap[i].b.b) bam_destroy1(heap[i].b.b);
+ if (heap && heap[i].entry.bam_record) bam_destroy1(heap[i].entry.bam_record);
}
if (hout) bam_hdr_destroy(hout);
free(RG);
@@ -1615,6 +1624,169 @@ end:
* BAM sorting *
***************/
+typedef struct {
+ size_t from;
+ size_t to;
+} buf_region;
+
+/* Simplified version of bam_merge_core2() for merging part-sorted
+ temporary files. No need for header merging or translation,
+ it just needs to read data into the heap and push it out again. */
+
+static inline int heap_add_read(heap1_t *heap, int nfiles, samFile **fp,
+ int num_in_mem, buf_region *in_mem,
+ bam1_tag *buf, uint64_t *idx, bam_hdr_t *hout) {
+ int i = heap->i, res;
+ if (i < nfiles) { // read from file
+ res = sam_read1(fp[i], hout, heap->entry.bam_record);
+ } else { // read from memory
+ if (in_mem[i - nfiles].from < in_mem[i - nfiles].to) {
+ heap->entry.bam_record = buf[in_mem[i - nfiles].from++].bam_record;
+ res = 0;
+ } else {
+ res = -1;
+ }
+ }
+ if (res >= 0) {
+ heap->pos = (((uint64_t)heap->entry.bam_record->core.tid<<32)
+ | (uint32_t)((int32_t)heap->entry.bam_record->core.pos+1)<<1
+ | bam_is_rev(heap->entry.bam_record));
+ heap->idx = (*idx)++;
+ if (g_is_by_tag) {
+ heap->entry.tag = bam_aux_get(heap->entry.bam_record, g_sort_tag);
+ } else {
+ heap->entry.tag = NULL;
+ }
+ } else if (res == -1) {
+ heap->pos = HEAP_EMPTY;
+ if (i < nfiles) bam_destroy1(heap->entry.bam_record);
+ heap->entry.bam_record = NULL;
+ heap->entry.tag = NULL;
+ } else {
+ return -1;
+ }
+ return 0;
+}
+
+static int bam_merge_simple(int by_qname, char *sort_tag, const char *out,
+ const char *mode, bam_hdr_t *hout,
+ int n, char * const *fn, int num_in_mem,
+ buf_region *in_mem, bam1_tag *buf, int n_threads,
+ const char *cmd, const htsFormat *in_fmt,
+ const htsFormat *out_fmt) {
+ samFile *fpout = NULL, **fp = NULL;
+ heap1_t *heap = NULL;
+ uint64_t idx = 0;
+ int i, heap_size = n + num_in_mem;
+
+ g_is_by_qname = by_qname;
+ if (sort_tag) {
+ g_is_by_tag = 1;
+ g_sort_tag[0] = sort_tag[0];
+ g_sort_tag[1] = sort_tag[1];
+ }
+ if (n > 0) {
+ fp = (samFile**)calloc(n, sizeof(samFile*));
+ if (!fp) goto mem_fail;
+ }
+ heap = (heap1_t*)calloc(heap_size, sizeof(heap1_t));
+ if (!heap) goto mem_fail;
+
+ // Open each file, read the header and put the first read into the heap
+ for (i = 0; i < heap_size; i++) {
+ bam_hdr_t *hin;
+ heap1_t *h = &heap[i];
+
+ if (i < n) {
+ fp[i] = sam_open_format(fn[i], "r", in_fmt);
+ if (fp[i] == NULL) {
+ print_error_errno(cmd, "fail to open \"%s\"", fn[i]);
+ goto fail;
+ }
+
+ // Read header ...
+ hin = sam_hdr_read(fp[i]);
+ if (hin == NULL) {
+ print_error(cmd, "failed to read header from \"%s\"", fn[i]);
+ goto fail;
+ }
+ // ... and throw it away as we don't really need it
+ bam_hdr_destroy(hin);
+ }
+
+ // Get a read into the heap
+ h->i = i;
+ h->entry.tag = NULL;
+ if (i < n) {
+ h->entry.bam_record = bam_init1();
+ if (!h->entry.bam_record) goto mem_fail;
+ }
+ if (heap_add_read(h, n, fp, num_in_mem, in_mem, buf, &idx, hout) < 0) {
+ assert(i < n);
+ print_error(cmd, "failed to read first record from \"%s\"", fn[i]);
+ goto fail;
+ }
+ }
+
+ // Open output file and write header
+ if ((fpout = sam_open_format(out, mode, out_fmt)) == 0) {
+ print_error_errno(cmd, "failed to create \"%s\"", out);
+ return -1;
+ }
+
+ hts_set_threads(fpout, n_threads);
+
+ if (sam_hdr_write(fpout, hout) != 0) {
+ print_error_errno(cmd, "failed to write header to \"%s\"", out);
+ sam_close(fpout);
+ return -1;
+ }
+
+ // Now do the merge
+ ks_heapmake(heap, heap_size, heap);
+ while (heap->pos != HEAP_EMPTY) {
+ bam1_t *b = heap->entry.bam_record;
+ if (sam_write1(fpout, hout, b) < 0) {
+ print_error_errno(cmd, "failed writing to \"%s\"", out);
+ sam_close(fpout);
+ return -1;
+ }
+ if (heap_add_read(heap, n, fp, num_in_mem, in_mem, buf, &idx, hout) < 0) {
+ assert(heap->i < n);
+ print_error(cmd, "Error reading \"%s\" : %s",
+ fn[heap->i], strerror(errno));
+ goto fail;
+ }
+ ks_heapadjust(heap, 0, heap_size, heap);
+ }
+ // Clean up and close
+ for (i = 0; i < n; i++) {
+ if (sam_close(fp[i]) != 0) {
+ print_error(cmd, "Error on closing \"%s\" : %s",
+ fn[i], strerror(errno));
+ }
+ }
+ free(fp);
+ free(heap);
+ if (sam_close(fpout) < 0) {
+ print_error(cmd, "error closing output file");
+ return -1;
+ }
+ return 0;
+ mem_fail:
+ print_error(cmd, "Out of memory");
+
+ fail:
+ for (i = 0; i < n; i++) {
+ if (fp && fp[i]) sam_close(fp[i]);
+ if (heap && heap[i].entry.bam_record) bam_destroy1(heap[i].entry.bam_record);
+ }
+ free(fp);
+ free(heap);
+ if (fpout) sam_close(fpout);
+ return -1;
+}
+
static int change_SO(bam_hdr_t *h, const char *so)
{
char *p, *q, *beg = NULL, *end = NULL, *newtext;
@@ -1635,29 +1807,41 @@ static int change_SO(bam_hdr_t *h, const char *so)
if (beg == NULL) { // no @HD
h->l_text += strlen(so) + 15;
newtext = (char*)malloc(h->l_text + 1);
- sprintf(newtext, "@HD\tVN:1.3\tSO:%s\n", so);
- strcat(newtext, h->text);
+ if (!newtext) return -1;
+ snprintf(newtext, h->l_text + 1,
+ "@HD\tVN:1.3\tSO:%s\n%s", so, h->text);
} else { // has @HD but different or no SO
h->l_text = (beg - h->text) + (4 + strlen(so)) + (h->text + h->l_text - end);
newtext = (char*)malloc(h->l_text + 1);
- strncpy(newtext, h->text, beg - h->text);
- sprintf(newtext + (beg - h->text), "\tSO:%s", so);
- strcat(newtext, end);
+ if (!newtext) return -1;
+ snprintf(newtext, h->l_text + 1, "%.*s\tSO:%s%s",
+ (int) (beg - h->text), h->text, so, end);
}
free(h->text);
h->text = newtext;
return 0;
}
-// Function to compare reads and determine which one is < the other
+// Function to compare reads and determine which one is < or > the other
// Handle sort-by-pos and sort-by-name. Used as the secondary sort in bam1_lt_by_tag, if reads are equivalent by tag.
-static inline int bam1_lt_core(const bam1_p a, const bam1_p b)
+// Returns a value less than, equal to or greater than zero if a is less than,
+// equal to or greater than b, respectively.
+static inline int bam1_cmp_core(const bam1_tag a, const bam1_tag b)
{
+ uint64_t pa, pb;
+ if (!a.bam_record)
+ return 1;
+ if (!b.bam_record)
+ return 0;
+
if (g_is_by_qname) {
- int t = strnum_cmp(bam_get_qname(a.b), bam_get_qname(b.b));
- return (t < 0 || (t == 0 && (a.b->core.flag&0xc0) < (b.b->core.flag&0xc0)));
+ int t = strnum_cmp(bam_get_qname(a.bam_record), bam_get_qname(b.bam_record));
+ if (t != 0) return t;
+ return (int) (a.bam_record->core.flag&0xc0) - (int) (b.bam_record->core.flag&0xc0);
} else {
- return (((uint64_t)a.b->core.tid<<32|(a.b->core.pos+1)<<1|bam_is_rev(a.b)) < ((uint64_t)b.b->core.tid<<32|(b.b->core.pos+1)<<1|bam_is_rev(b.b)));
+ pa = (uint64_t)a.bam_record->core.tid<<32|(a.bam_record->core.pos+1)<<1|bam_is_rev(a.bam_record);
+ pb = (uint64_t)b.bam_record->core.tid<<32|(b.bam_record->core.pos+1)<<1|bam_is_rev(b.bam_record);
+ return pa < pb ? -1 : (pa > pb ? 1 : 0);
}
}
@@ -1675,17 +1859,19 @@ uint8_t normalize_type(const uint8_t* aux) {
// Sort record by tag, using pos or read name as a secondary key if tags are identical. Reads not carrying the tag sort first.
// Tags are first sorted by the type character (in case the types differ), or by the appropriate comparator for that type if they agree.
-static inline int bam1_lt_by_tag(const bam1_p a, const bam1_p b)
+// Returns a value less than, equal to or greater than zero if a is less than,
+// equal to or greater than b, respectively.
+static inline int bam1_cmp_by_tag(const bam1_tag a, const bam1_tag b)
{
const uint8_t* aux_a = a.tag;
const uint8_t* aux_b = b.tag;
if (aux_a == NULL && aux_b != NULL) {
- return 1;
+ return -1;
} else if (aux_a != NULL && aux_b == NULL) {
- return 0;
+ return 1;
} else if (aux_a == NULL && aux_b == NULL) {
- return bam1_lt_core(a,b);
+ return bam1_cmp_core(a,b);
}
// 'Normalize' the letters of the datatypes to a canonical letter,
@@ -1702,57 +1888,62 @@ static inline int bam1_lt_by_tag(const bam1_p a, const bam1_p b)
b_type = 'f';
} else {
// Unfixable mismatched types
- return a_type < b_type ? 1 : 0;
+ return a_type < b_type ? -1 : 1;
}
}
if (a_type == 'c') {
int64_t va = bam_aux2i(aux_a);
int64_t vb = bam_aux2i(aux_b);
- return (va < vb || (va == vb && bam1_lt_core(a, b)));
+ if (va != vb) return va < vb ? -1 : 1;
+ return bam1_cmp_core(a, b);
} else if (a_type == 'f') {
double va = bam_aux2f(aux_a);
double vb = bam_aux2f(aux_b);
- return (va < vb || (va == vb && bam1_lt_core(a,b)));
+ if (va != vb) return va < vb ? -1 : 1;
+ return bam1_cmp_core(a, b);
} else if (a_type == 'A') {
- char va = bam_aux2A(aux_a);
- char vb = bam_aux2A(aux_b);
- return (va < vb || (va == vb && bam1_lt_core(a,b)));
+ unsigned char va = bam_aux2A(aux_a);
+ unsigned char vb = bam_aux2A(aux_b);
+ if (va != vb) return va < vb ? -1 : 1;
+ return bam1_cmp_core(a, b);
} else if (a_type == 'H') {
int t = strcmp(bam_aux2Z(aux_a), bam_aux2Z(aux_b));
- return (t < 0 || (t == 0 && bam1_lt_core(a,b)));
+ if (t) return t;
+ return bam1_cmp_core(a, b);
} else {
- return bam1_lt_core(a,b);
+ return bam1_cmp_core(a,b);
}
}
// Function to compare reads and determine which one is < the other
// Handle sort-by-pos, sort-by-name, or sort-by-tag
-static inline int bam1_lt(const bam1_p a, const bam1_p b)
+static inline int bam1_lt(const bam1_tag a, const bam1_tag b)
{
if (g_is_by_tag) {
- return bam1_lt_by_tag(a, b);
+ return bam1_cmp_by_tag(a, b) < 0;
} else {
- return bam1_lt_core(a,b);
+ return bam1_cmp_core(a,b) < 0;
}
}
-KSORT_INIT(sort, bam1_p, bam1_lt)
+KSORT_INIT(sort, bam1_tag, bam1_lt)
typedef struct {
size_t buf_len;
const char *prefix;
- bam1_p *buf;
+ bam1_tag *buf;
const bam_hdr_t *h;
int index;
int error;
+ int no_save;
} worker_t;
// Returns 0 for success
// -1 for failure
-static int write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads, const htsFormat *fmt)
+static int write_buffer(const char *fn, const char *mode, size_t l, bam1_tag *buf, const bam_hdr_t *h, int n_threads, const htsFormat *fmt)
{
size_t i;
samFile* fp;
@@ -1761,7 +1952,7 @@ static int write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf,
if (sam_hdr_write(fp, h) != 0) goto fail;
if (n_threads > 1) hts_set_threads(fp, n_threads);
for (i = 0; i < l; ++i) {
- if (sam_write1(fp, h, buf[i].b) < 0) goto fail;
+ if (sam_write1(fp, h, buf[i].bam_record) < 0) goto fail;
}
if (sam_close(fp) < 0) return -1;
return 0;
@@ -1776,6 +1967,10 @@ static void *worker(void *data)
char *name;
w->error = 0;
ks_mergesort(sort, w->buf_len, w->buf, 0);
+
+ if (w->no_save)
+ return 0;
+
name = (char*)calloc(strlen(w->prefix) + 20, 1);
if (!name) { w->error = errno; return 0; }
sprintf(name, "%s.%.4d.bam", w->prefix, w->index);
@@ -1783,7 +1978,7 @@ static void *worker(void *data)
uint32_t max_ncigar = 0;
int i;
for (i = 0; i < w->buf_len; i++) {
- uint32_t nc = w->buf[i].b->core.n_cigar;
+ uint32_t nc = w->buf[i].bam_record->core.n_cigar;
if (max_ncigar < nc)
max_ncigar = nc;
}
@@ -1808,11 +2003,11 @@ static void *worker(void *data)
return 0;
}
-static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, const bam_hdr_t *h, int n_threads)
+static int sort_blocks(int n_files, size_t k, bam1_tag *buf, const char *prefix,
+ const bam_hdr_t *h, int n_threads, buf_region *in_mem)
{
int i;
- size_t rest;
- bam1_p *b;
+ size_t pos, rest;
pthread_t *tid;
pthread_attr_t attr;
worker_t *w;
@@ -1823,15 +2018,24 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
w = (worker_t*)calloc(n_threads, sizeof(worker_t));
+ if (!w) return -1;
tid = (pthread_t*)calloc(n_threads, sizeof(pthread_t));
- b = buf; rest = k;
+ if (!tid) { free(w); return -1; }
+ pos = 0; rest = k;
for (i = 0; i < n_threads; ++i) {
w[i].buf_len = rest / (n_threads - i);
- w[i].buf = b;
+ w[i].buf = &buf[pos];
w[i].prefix = prefix;
w[i].h = h;
w[i].index = n_files + i;
- b += w[i].buf_len; rest -= w[i].buf_len;
+ if (in_mem) {
+ w[i].no_save = 1;
+ in_mem[i].from = pos;
+ in_mem[i].to = pos + w[i].buf_len;
+ } else {
+ w[i].no_save = 0;
+ }
+ pos += w[i].buf_len; rest -= w[i].buf_len;
pthread_create(&tid[i], &attr, worker, &w[i]);
}
for (i = 0; i < n_threads; ++i) {
@@ -1843,7 +2047,9 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
}
}
free(tid); free(w);
- return (n_failed == 0)? n_files + n_threads : -1;
+ if (n_failed) return -1;
+ if (in_mem) return n_threads;
+ return n_files + n_threads;
}
/*!
@@ -1862,7 +2068,7 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
@return 0 for successful sorting, negative on errors
@discussion It may create multiple temporary subalignment files
- and then merge them by calling bam_merge_core2(). This function is
+ and then merge them by calling bam_merge_simple(). This function is
NOT thread safe.
*/
int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const char *prefix,
@@ -1870,12 +2076,22 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
size_t _max_mem, int n_threads,
const htsFormat *in_fmt, const htsFormat *out_fmt)
{
- int ret = -1, i, n_files = 0;
- size_t mem, max_k, k, max_mem;
+ int ret = -1, res, i, n_files = 0;
+ size_t max_k, k, max_mem, bam_mem_offset;
bam_hdr_t *header = NULL;
samFile *fp;
- bam1_p *buf;
- bam1_t *b;
+ bam1_tag *buf = NULL;
+ bam1_t *b = bam_init1();
+ uint8_t *bam_mem = NULL;
+ char **fns = NULL;
+ const char *new_so;
+ buf_region *in_mem = NULL;
+ int num_in_mem = 0;
+
+ if (!b) {
+ print_error("sort", "couldn't allocate memory for bam record");
+ return -1;
+ }
if (n_threads < 2) n_threads = 1;
g_is_by_qname = is_by_qname;
@@ -1884,13 +2100,12 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
strncpy(g_sort_tag, sort_by_tag, 2);
}
- max_k = k = 0; mem = 0;
max_mem = _max_mem * n_threads;
buf = NULL;
fp = sam_open_format(fn, "r", in_fmt);
if (fp == NULL) {
print_error_errno("sort", "can't open \"%s\"", fn);
- return -2;
+ goto err;
}
header = sam_hdr_read(fp);
if (header == NULL) {
@@ -1899,11 +2114,17 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
}
if (sort_by_tag != NULL)
- change_SO(header, "unknown");
+ new_so = "unknown";
else if (is_by_qname)
- change_SO(header, "queryname");
+ new_so = "queryname";
else
- change_SO(header, "coordinate");
+ new_so = "coordinate";
+
+ if (change_SO(header, new_so) != 0) {
+ print_error("sort",
+ "failed to change sort order header to '%s'\n", new_so);
+ goto err;
+ }
// No gain to using the thread pool here as the flow of this code
// is such that we are *either* reading *or* sorting. Hence a shared
@@ -1911,93 +2132,121 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
if (n_threads > 1)
hts_set_threads(fp, n_threads);
+ if ((bam_mem = malloc(max_mem)) == NULL) {
+ print_error("sort", "couldn't allocate memory for bam_mem");
+ goto err;
+ }
+
// write sub files
- for (;;) {
+ k = max_k = bam_mem_offset = 0;
+ while ((res = sam_read1(fp, header, b)) >= 0) {
+ int mem_full = 0;
+
if (k == max_k) {
- size_t kk, old_max = max_k;
+ bam1_tag *new_buf;
max_k = max_k? max_k<<1 : 0x10000;
- buf = (bam1_p*)realloc(buf, max_k * sizeof(bam1_p));
- for (kk = old_max; kk < max_k; ++kk) {
- buf[kk].b = NULL;
- buf[kk].tag = NULL;
+ if ((new_buf = realloc(buf, max_k * sizeof(bam1_tag))) == NULL) {
+ print_error("sort", "couldn't allocate memory for buf");
+ goto err;
}
+ buf = new_buf;
}
- if (buf[k].b == NULL) buf[k].b = bam_init1();
- b = buf[k].b;
- if ((ret = sam_read1(fp, header, b)) < 0) break;
- if (b->l_data < b->m_data>>2) { // shrink
- b->m_data = b->l_data;
- kroundup32(b->m_data);
- b->data = (uint8_t*)realloc(b->data, b->m_data);
+
+ // Check if the BAM record will fit in the memory limit
+ if (bam_mem_offset + sizeof(*b) + b->l_data < max_mem) {
+ // Copy record into the memory block
+ buf[k].bam_record = (bam1_t *)(bam_mem + bam_mem_offset);
+ *buf[k].bam_record = *b;
+ buf[k].bam_record->data = (uint8_t *)((char *)buf[k].bam_record + sizeof(bam1_t));
+ memcpy(buf[k].bam_record->data, b->data, b->l_data);
+ // store next BAM record in next 8-byte-aligned address after
+ // current one
+ bam_mem_offset = (bam_mem_offset + sizeof(*b) + b->l_data + 8 - 1) & ~((size_t)(8 - 1));
+ } else {
+ // Add a pointer to the remaining record
+ buf[k].bam_record = b;
+ mem_full = 1;
}
// Pull out the pointer to the sort tag if applicable
if (g_is_by_tag) {
- buf[k].tag = bam_aux_get(b, g_sort_tag);
+ buf[k].tag = bam_aux_get(buf[k].bam_record, g_sort_tag);
} else {
buf[k].tag = NULL;
}
-
- mem += sizeof(bam1_t) + b->m_data + sizeof(void*) + sizeof(void*); // two sizeof(void*) for the data allocated to pointer arrays
++k;
- if (mem >= max_mem) {
- n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
+
+ if (mem_full) {
+ n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads,
+ NULL);
if (n_files < 0) {
- ret = -1;
goto err;
}
- mem = k = 0;
+ k = 0;
+ bam_mem_offset = 0;
}
}
- if (ret != -1) {
+ if (res != -1) {
print_error("sort", "truncated file. Aborting");
- ret = -1;
goto err;
}
+ // Sort last records
+ if (k > 0) {
+ in_mem = calloc(n_threads > 0 ? n_threads : 1, sizeof(in_mem[0]));
+ if (!in_mem) goto err;
+ num_in_mem = sort_blocks(n_files, k, buf, prefix, header, n_threads,
+ in_mem);
+ if (num_in_mem < 0) goto err;
+ } else {
+ num_in_mem = 0;
+ }
+
// write the final output
- if (n_files == 0) { // a single block
+ if (n_files == 0 && num_in_mem < 2) { // a single block
ks_mergesort(sort, k, buf, 0);
if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt) != 0) {
print_error_errno("sort", "failed to create \"%s\"", fnout);
- ret = -1;
goto err;
}
} else { // then merge
- char **fns;
- n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
- if (n_files == -1) {
- ret = -1;
- goto err;
- }
- fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n_files);
+ fprintf(stderr,
+ "[bam_sort_core] merging from %d files and %d in-memory blocks...\n",
+ n_files, num_in_mem);
fns = (char**)calloc(n_files, sizeof(char*));
+ if (!fns) goto err;
for (i = 0; i < n_files; ++i) {
fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
+ if (!fns[i]) goto err;
sprintf(fns[i], "%s.%.4d.bam", prefix, i);
}
- if (bam_merge_core2(is_by_qname, sort_by_tag, fnout, modeout, NULL, n_files, fns,
- MERGE_COMBINE_RG|MERGE_COMBINE_PG|MERGE_FIRST_CO,
- NULL, n_threads, "sort", in_fmt, out_fmt) < 0) {
- // Propagate bam_merge_core2() failure; it has already emitted a
+ if (bam_merge_simple(is_by_qname, sort_by_tag, fnout, modeout, header,
+ n_files, fns, num_in_mem, in_mem, buf,
+ n_threads, "sort", in_fmt, out_fmt) < 0) {
+ // Propagate bam_merge_simple() failure; it has already emitted a
// message explaining the failure, so no further message is needed.
goto err;
}
- for (i = 0; i < n_files; ++i) {
- unlink(fns[i]);
- free(fns[i]);
- }
- free(fns);
}
ret = 0;
err:
// free
- for (k = 0; k < max_k; ++k) bam_destroy1(buf[k].b);
+ if (fns) {
+ for (i = 0; i < n_files; ++i) {
+ if (fns[i]) {
+ unlink(fns[i]);
+ free(fns[i]);
+ }
+ }
+ free(fns);
+ }
+ bam_destroy1(b);
free(buf);
+ free(bam_mem);
bam_hdr_destroy(header);
- sam_close(fp);
+ if (fp) sam_close(fp);
return ret;
}
@@ -2006,6 +2255,7 @@ int bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t ma
{
int ret;
char *fnout = calloc(strlen(prefix) + 4 + 1, 1);
+ if (!fnout) return -1;
sprintf(fnout, "%s.bam", prefix);
ret = bam_sort_core_ext(is_by_qname, NULL, fn, prefix, fnout, "wb", max_mem, 0, NULL, NULL);
free(fnout);
diff --git a/bamtk.c b/bamtk.c
index bd520b6..9316386 100644
--- a/bamtk.c
+++ b/bamtk.c
@@ -44,6 +44,7 @@ int bam_rmdup(int argc, char *argv[]);
int bam_flagstat(int argc, char *argv[]);
int bam_fillmd(int argc, char *argv[]);
int bam_idxstats(int argc, char *argv[]);
+int bam_markdup(int argc, char *argv[]);
int main_samview(int argc, char *argv[]);
int main_import(int argc, char *argv[]);
int main_reheader(int argc, char *argv[]);
@@ -92,6 +93,7 @@ static void usage(FILE *fp)
" rmdup remove PCR duplicates\n"
" targetcut cut fosmid regions (for fosmid pool only)\n"
" addreplacerg adds or replaces RG tags\n"
+" markdup mark duplicates\n"
"\n"
" -- File operations\n"
" collate shuffle and group alignments by name\n"
@@ -126,6 +128,18 @@ static void usage(FILE *fp)
#endif
}
+// This is a tricky one, but on Windows the filename wildcard expansion is done by
+// the application and not by the shell, as traditionally it never had a "shell".
+// Even now, DOS and Powershell do not do this expansion (but bash does).
+//
+// This means that Mingw/Msys implements code before main() that takes e.g. "*" and
+// expands it up to a list of matching filenames. This in turn breaks things like
+// specifying "*" as a region (all the unmapped reads). We take a hard line here -
+// filename expansion is the task of the shell, not our application!
+#ifdef _WIN32
+int _CRT_glob = 0;
+#endif
+
int main(int argc, char *argv[])
{
#ifdef _WIN32
@@ -156,6 +170,7 @@ int main(int argc, char *argv[])
else if (strcmp(argv[1], "dict") == 0) ret = dict_main(argc-1, argv+1);
else if (strcmp(argv[1], "fixmate") == 0) ret = bam_mating(argc-1, argv+1);
else if (strcmp(argv[1], "rmdup") == 0) ret = bam_rmdup(argc-1, argv+1);
+ else if (strcmp(argv[1], "markdup") == 0) ret = bam_markdup(argc-1, argv+1);
else if (strcmp(argv[1], "flagstat") == 0) ret = bam_flagstat(argc-1, argv+1);
else if (strcmp(argv[1], "calmd") == 0) ret = bam_fillmd(argc-1, argv+1);
else if (strcmp(argv[1], "fillmd") == 0) ret = bam_fillmd(argc-1, argv+1);
diff --git a/bedidx.c b/bedidx.c
index c1954ad..86d2338 100644
--- a/bedidx.c
+++ b/bedidx.c
@@ -32,10 +32,6 @@ DEALINGS IN THE SOFTWARE. */
#include <errno.h>
#include <zlib.h>
-#ifdef _WIN32
-#define drand48() ((double)rand() / RAND_MAX)
-#endif
-
#include "htslib/ksort.h"
KSORT_INIT_GENERIC(uint64_t)
diff --git a/config.h.in b/config.h.in
deleted file mode 100644
index aac2a4c..0000000
--- a/config.h.in
+++ /dev/null
@@ -1,50 +0,0 @@
-/* config.h.in -- template for config.h. If you use configure, this file
- provides #defines reflecting your configuration choices. If you don't
- run configure, suitable conservative defaults will be used.
-
- This template file can be updated with autoheader, but do so carefully
- as autoheader adds #defines such as PACKAGE_* that we don't want. */
-
-/* Define to 1 if a SysV or X/Open compatible Curses library is present */
-#undef HAVE_CURSES
-
-/* Define to 1 if library supports color (enhanced functions) */
-#undef HAVE_CURSES_COLOR
-
-/* Define to 1 if library supports X/Open Enhanced functions */
-#undef HAVE_CURSES_ENHANCED
-
-/* Define to 1 if <curses.h> is present */
-#undef HAVE_CURSES_H
-
-/* Define to 1 if library supports certain obsolete features */
-#undef HAVE_CURSES_OBSOLETE
-
-/* Define to 1 if the Ncurses library is present */
-#undef HAVE_NCURSES
-
-/* Define to 1 if the NcursesW library is present */
-#undef HAVE_NCURSESW
-
-/* Define to 1 if <ncursesw/curses.h> is present */
-#undef HAVE_NCURSESW_CURSES_H
-
-/* Define to 1 if <ncursesw.h> is present */
-#undef HAVE_NCURSESW_H
-
-/* Define to 1 if <ncurses/curses.h> is present */
-#undef HAVE_NCURSES_CURSES_H
-
-/* Define to 1 if <ncurses.h> is present */
-#undef HAVE_NCURSES_H
-
-/* Enable large inode numbers on Mac OS X 10.5. */
-#ifndef _DARWIN_USE_64_BIT_INODE
-# define _DARWIN_USE_64_BIT_INODE 1
-#endif
-
-/* Number of bits in a file offset, on hosts where this is settable. */
-#undef _FILE_OFFSET_BITS
-
-/* Define for large files, on AIX-style hosts. */
-#undef _LARGE_FILES
diff --git a/configure.ac b/configure.ac
index 33bddcf..97db6b2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
# DEALINGS IN THE SOFTWARE.
dnl Process this file with autoconf to produce a configure script
-AC_INIT([Samtools], m4_esyscmd_s([make print-version]),
+AC_INIT([Samtools], m4_esyscmd_s([./version.sh 2>/dev/null]),
[samtools-help at lists.sourceforge.net], [], [http://www.htslib.org/])
AC_PREREQ([2.63]) dnl This version introduced 4-argument AC_CHECK_HEADER
AC_CONFIG_SRCDIR([bamtk.c])
@@ -33,11 +33,22 @@ m4_include([m4/ax_with_curses.m4])
m4_include([m4/ax_with_htslib.m4])
dnl Copyright notice to be copied into the generated configure script
-AC_COPYRIGHT([Portions copyright (C) 2015 Genome Research Ltd.
+AC_COPYRIGHT([Portions copyright (C) 2015,2017 Genome Research Ltd.
This configure script is free software: you are free to change and
redistribute it. There is NO WARRANTY, to the extent permitted by law.])
+dnl Notes to be copied (by autoheader) into the generated config.h.in
+AH_TOP([/* If you use configure, this file provides @%:@defines reflecting your
+ configuration choices. If you have not run configure, suitable
+ conservative defaults will be used.
+
+ Autoheader adds a number of items to this template file that are not
+ used by samtools: STDC_HEADERS and most HAVE_*_H header file defines
+ are immaterial, as we assume standard ISO C headers and facilities;
+ the PACKAGE_* defines are unused and are overridden by the more
+ accurate PACKAGE_VERSION as computed by the Makefile. */])
+
AC_PROG_CC
AC_SYS_LARGEFILE
@@ -84,9 +95,7 @@ samtools successfully.])
fi
else
CURSES_LIB=
- CURSES_CPPFLAGS=
AC_SUBST([CURSES_LIB])
- AC_SUBST([CURSES_CPPFLAGS])
fi
save_LIBS=$LIBS
@@ -107,5 +116,22 @@ Ubuntu Linux) or zlib-devel (on RPM-based Linux distributions) is installed.
FAILED. This error must be resolved in order to build samtools successfully.])
fi
+dnl Look for regcomp in various libraries (needed on windows/mingw).
+AC_SEARCH_LIBS(regcomp, regex, [libregex=needed], [])
+
+dnl Force POSIX mode on Windows/Mingw
+test -n "$host_alias" || host_alias=unknown-`uname -s`
+case $host_alias in
+ *-msys* | *-MSYS* | *-mingw* | *-MINGW*)
+ host_result="MSYS dll"
+ PLATFORM=MSYS
+ PLUGIN_EXT=.dll
+ # This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64,
+ # %lld and %z printf formats work. It also enforces the snprintf to
+ # be C99 compliant so it returns the correct values (in kstring.c).
+ CPPFLAGS="$CPPCFLAGS -D_POSIX_C_SOURCE=600"
+ ;;
+esac
+
AC_CONFIG_FILES([config.mk])
AC_OUTPUT
diff --git a/debian/NEWS b/debian/NEWS
deleted file mode 100644
index 27eab94..0000000
--- a/debian/NEWS
+++ /dev/null
@@ -1,9 +0,0 @@
-samtools (1.3-1) unstable; urgency=medium
-
- Samtools 1.3 introduces various changes to the command line tools.
- Most importantly, the legacy syntax for 'samtools sort' has been dropped,
- and the 'bamshuf' tool has been renamed to 'collate'. Please see the
- full upstream change log at:
- https://github.com/samtools/samtools/releases/tag/1.3
-
- -- Sascha Steinbiss <sascha at steinbiss.name> Fri, 05 Feb 2016 08:26:05 +0000
diff --git a/debian/README.Debian b/debian/README.Debian
deleted file mode 100644
index 7f04b3e..0000000
--- a/debian/README.Debian
+++ /dev/null
@@ -1,20 +0,0 @@
-SAMtools for Debian.
-━━━━━━━━━━━━━━━━━━━━
-
-
-Extra utilities.
-────────────────
-
-The extra utilities found in the ‘misc’ directory of the upstream archive are
-placed in ‘/usr/share/samtools’ for the Perl and Python scripts, and
-‘/usr/lib/samtools’ for the binary executables.
-
-
-Note about how this package is tested.
-──────────────────────────────────────
-
-At build time, the regression tests of the package are ran. You can consult the
-results on ‘http://buildd.debian.org/samtools’.
-
-
- -- Charles Plessy <plessy at debian.org> Thu, 15 Jul 2010 08:37:41 +0900
diff --git a/debian/README.source b/debian/README.source
deleted file mode 100644
index 2f3da8f..0000000
--- a/debian/README.source
+++ /dev/null
@@ -1,4 +0,0 @@
-This package track Upstream's GitHub repository directly; see the
-debian/README.source file from the htslib package for details.
-
- -- Charles Plessy <plessy at debian.org> Thu, 17 Sep 2015 20:17:51 +0900:
diff --git a/debian/ax_with_curses.m4 b/debian/ax_with_curses.m4
deleted file mode 100644
index 8709a80..0000000
--- a/debian/ax_with_curses.m4
+++ /dev/null
@@ -1,578 +0,0 @@
-# ===========================================================================
-# http://www.gnu.org/software/autoconf-archive/ax_with_curses.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-# AX_WITH_CURSES
-#
-# DESCRIPTION
-#
-# This macro checks whether a SysV or X/Open-compatible Curses library is
-# present, along with the associated header file. The NcursesW
-# (wide-character) library is searched for first, followed by Ncurses,
-# then the system-default plain Curses. The first library found is the
-# one returned. Finding libraries will first be attempted by using
-# pkg-config, and should the pkg-config files not be available, will
-# fallback to combinations of known flags itself.
-#
-# The following options are understood: --with-ncursesw, --with-ncurses,
-# --without-ncursesw, --without-ncurses. The "--with" options force the
-# macro to use that particular library, terminating with an error if not
-# found. The "--without" options simply skip the check for that library.
-# The effect on the search pattern is:
-#
-# (no options) - NcursesW, Ncurses, Curses
-# --with-ncurses --with-ncursesw - NcursesW only [*]
-# --without-ncurses --with-ncursesw - NcursesW only [*]
-# --with-ncursesw - NcursesW only [*]
-# --with-ncurses --without-ncursesw - Ncurses only [*]
-# --with-ncurses - NcursesW, Ncurses [**]
-# --without-ncurses --without-ncursesw - Curses only
-# --without-ncursesw - Ncurses, Curses
-# --without-ncurses - NcursesW, Curses
-#
-# [*] If the library is not found, abort the configure script.
-#
-# [**] If the second library (Ncurses) is not found, abort configure.
-#
-# The following preprocessor symbols may be defined by this macro if the
-# appropriate conditions are met:
-#
-# HAVE_CURSES - if any SysV or X/Open Curses library found
-# HAVE_CURSES_ENHANCED - if library supports X/Open Enhanced functions
-# HAVE_CURSES_COLOR - if library supports color (enhanced functions)
-# HAVE_CURSES_OBSOLETE - if library supports certain obsolete features
-# HAVE_NCURSESW - if NcursesW (wide char) library is to be used
-# HAVE_NCURSES - if the Ncurses library is to be used
-#
-# HAVE_CURSES_H - if <curses.h> is present and should be used
-# HAVE_NCURSESW_H - if <ncursesw.h> should be used
-# HAVE_NCURSES_H - if <ncurses.h> should be used
-# HAVE_NCURSESW_CURSES_H - if <ncursesw/curses.h> should be used
-# HAVE_NCURSES_CURSES_H - if <ncurses/curses.h> should be used
-#
-# (These preprocessor symbols are discussed later in this document.)
-#
-# The following output variables are defined by this macro; they are
-# precious and may be overridden on the ./configure command line:
-#
-# CURSES_LIB - library to add to xxx_LDADD
-# CURSES_CPPFLAGS - include paths to add to xxx_CPPFLAGS
-#
-# Neither the library listed in CURSES_LIB, nor the flags in
-# CURSES_CPPFLAGS are added to LIBS, respectively CPPFLAGS, by default.
-# You need to add both to the appropriate xxx_LDADD/xxx_CPPFLAGS line in
-# your Makefile.am. For example:
-#
-# prog_LDADD = @CURSES_LIB@
-# prog_CPPFLAGS = @CURSES_CPPFLAGS@
-#
-# If CURSES_LIB is set on the configure command line (such as by running
-# "./configure CURSES_LIB=-lmycurses"), then the only header searched for
-# is <curses.h>. If the user needs to specify an alternative path for a
-# library (such as for a non-standard NcurseW), the user should use the
-# LDFLAGS variable.
-#
-# The following shell variables may be defined by this macro:
-#
-# ax_cv_curses - set to "yes" if any Curses library found
-# ax_cv_curses_enhanced - set to "yes" if Enhanced functions present
-# ax_cv_curses_color - set to "yes" if color functions present
-# ax_cv_curses_obsolete - set to "yes" if obsolete features present
-#
-# ax_cv_ncursesw - set to "yes" if NcursesW library found
-# ax_cv_ncurses - set to "yes" if Ncurses library found
-# ax_cv_plaincurses - set to "yes" if plain Curses library found
-# ax_cv_curses_which - set to "ncursesw", "ncurses", "plaincurses" or "no"
-#
-# These variables can be used in your configure.ac to determine the level
-# of support you need from the Curses library. For example, if you must
-# have either Ncurses or NcursesW, you could include:
-#
-# AX_WITH_CURSES
-# if test "x$ax_cv_ncursesw" != xyes && test "x$ax_cv_ncurses" != xyes; then
-# AC_MSG_ERROR([requires either NcursesW or Ncurses library])
-# fi
-#
-# If any Curses library will do (but one must be present and must support
-# color), you could use:
-#
-# AX_WITH_CURSES
-# if test "x$ax_cv_curses" != xyes || test "x$ax_cv_curses_color" != xyes; then
-# AC_MSG_ERROR([requires an X/Open-compatible Curses library with color])
-# fi
-#
-# Certain preprocessor symbols and shell variables defined by this macro
-# can be used to determine various features of the Curses library. In
-# particular, HAVE_CURSES and ax_cv_curses are defined if the Curses
-# library found conforms to the traditional SysV and/or X/Open Base Curses
-# definition. Any working Curses library conforms to this level.
-#
-# HAVE_CURSES_ENHANCED and ax_cv_curses_enhanced are defined if the
-# library supports the X/Open Enhanced Curses definition. In particular,
-# the wide-character types attr_t, cchar_t and wint_t, the functions
-# wattr_set() and wget_wch() and the macros WA_NORMAL and _XOPEN_CURSES
-# are checked. The Ncurses library does NOT conform to this definition,
-# although NcursesW does.
-#
-# HAVE_CURSES_COLOR and ax_cv_curses_color are defined if the library
-# supports color functions and macros such as COLOR_PAIR, A_COLOR,
-# COLOR_WHITE, COLOR_RED and init_pair(). These are NOT part of the
-# X/Open Base Curses definition, but are part of the Enhanced set of
-# functions. The Ncurses library DOES support these functions, as does
-# NcursesW.
-#
-# HAVE_CURSES_OBSOLETE and ax_cv_curses_obsolete are defined if the
-# library supports certain features present in SysV and BSD Curses but not
-# defined in the X/Open definition. In particular, the functions
-# getattrs(), getcurx() and getmaxx() are checked.
-#
-# To use the HAVE_xxx_H preprocessor symbols, insert the following into
-# your system.h (or equivalent) header file:
-#
-# #if defined HAVE_NCURSESW_CURSES_H
-# # include <ncursesw/curses.h>
-# #elif defined HAVE_NCURSESW_H
-# # include <ncursesw.h>
-# #elif defined HAVE_NCURSES_CURSES_H
-# # include <ncurses/curses.h>
-# #elif defined HAVE_NCURSES_H
-# # include <ncurses.h>
-# #elif defined HAVE_CURSES_H
-# # include <curses.h>
-# #else
-# # error "SysV or X/Open-compatible Curses header file required"
-# #endif
-#
-# For previous users of this macro: you should not need to change anything
-# in your configure.ac or Makefile.am, as the previous (serial 10)
-# semantics are still valid. However, you should update your system.h (or
-# equivalent) header file to the fragment shown above. You are encouraged
-# also to make use of the extended functionality provided by this version
-# of AX_WITH_CURSES, as well as in the additional macros
-# AX_WITH_CURSES_PANEL, AX_WITH_CURSES_MENU and AX_WITH_CURSES_FORM.
-#
-# LICENSE
-#
-# Copyright (c) 2009 Mark Pulford <mark at kyne.com.au>
-# Copyright (c) 2009 Damian Pietras <daper at daper.net>
-# Copyright (c) 2012 Reuben Thomas <rrt at sc3d.org>
-# Copyright (c) 2011 John Zaitseff <J.Zaitseff at zap.org.au>
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation, either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# As a special exception, the respective Autoconf Macro's copyright owner
-# gives unlimited permission to copy, distribute and modify the configure
-# scripts that are the output of Autoconf when processing the Macro. You
-# need not follow the terms of the GNU General Public License when using
-# or distributing such scripts, even though portions of the text of the
-# Macro appear in them. The GNU General Public License (GPL) does govern
-# all other use of the material that constitutes the Autoconf Macro.
-#
-# This special exception to the GPL applies to versions of the Autoconf
-# Macro released by the Autoconf Archive. When you make and distribute a
-# modified version of the Autoconf Macro, you may extend this special
-# exception to the GPL to apply to your modified version as well.
-
-#serial 16
-
-# internal function to factorize common code that is used by both ncurses
-# and ncursesw
-AC_DEFUN([_FIND_CURSES_FLAGS], [
- AC_MSG_CHECKING([for $1 via pkg-config])
-
- _PKG_CONFIG([_ax_cv_$1_lib], [libs], [$1])
- _PKG_CONFIG([_ax_cv_$1_cppflags], [cflags], [$1])
-
- AS_IF([test "x$pkg_failed" = "xyes" || test "x$pkg_failed" = "xuntried"],[
- AC_MSG_RESULT([no])
- # No suitable .pc file found, have to find flags via fallback
- AC_CACHE_CHECK([for $1 via fallback], [ax_cv_$1], [
- AS_ECHO()
- pkg_cv__ax_cv_$1_lib="-l$1"
- pkg_cv__ax_cv_$1_cppflags="$CURSES_CPPFLAGS"
- LIBS="$ax_saved_LIBS $pkg_cv__ax_cv_$1_lib"
- CPPFLAGS="$ax_saved_CPPFLAGS $pkg_cv__ax_cv_$1_cppflags"
-
- AC_MSG_CHECKING([for initscr() with $pkg_cv__ax_cv_$1_lib])
- AC_LINK_IFELSE([AC_LANG_CALL([], [initscr])],
- [
- AC_MSG_RESULT([yes])
- AC_MSG_CHECKING([for nodelay() with $pkg_cv__ax_cv_$1_lib])
- AC_LINK_IFELSE([AC_LANG_CALL([], [nodelay])],[
- ax_cv_$1=yes
- ],[
- AC_MSG_RESULT([no])
- m4_if(
- [$1],[ncursesw],[pkg_cv__ax_cv_$1_lib="$pkg_cv__ax_cv_$1_lib -ltinfow"],
- [$1],[ncurses],[pkg_cv__ax_cv_$1_lib="$pkg_cv__ax_cv_$1_lib -ltinfo"]
- )
- LIBS="$ax_saved_LIBS $pkg_cv__ax_cv_$1_lib"
-
- AC_MSG_CHECKING([for nodelay() with $pkg_cv__ax_cv_$1_lib])
- AC_LINK_IFELSE([AC_LANG_CALL([], [nodelay])],[
- ax_cv_$1=yes
- ],[
- ax_cv_$1=no
- ])
- ])
- ],[
- ax_cv_$1=no
- ])
- ])
- ],[
- AC_MSG_RESULT([yes])
- # Found .pc file, using its information
- LIBS="$ax_saved_LIBS $pkg_cv__ax_cv_$1_lib"
- CPPFLAGS="$ax_saved_CPPFLAGS $pkg_cv__ax_cv_$1_cppflags"
- ax_cv_$1=yes
- ])
-])
-
-AU_ALIAS([MP_WITH_CURSES], [AX_WITH_CURSES])
-AC_DEFUN([AX_WITH_CURSES], [
- AC_ARG_VAR([CURSES_LIB], [linker library for Curses, e.g. -lcurses])
- AC_ARG_VAR([CURSES_CPPFLAGS], [preprocessor flags for Curses, e.g. -I/usr/include/ncursesw])
- AC_ARG_WITH([ncurses], [AS_HELP_STRING([--with-ncurses],
- [force the use of Ncurses or NcursesW])],
- [], [with_ncurses=check])
- AC_ARG_WITH([ncursesw], [AS_HELP_STRING([--without-ncursesw],
- [do not use NcursesW (wide character support)])],
- [], [with_ncursesw=check])
- AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-
- ax_saved_LIBS=$LIBS
- ax_saved_CPPFLAGS=$CPPFLAGS
-
- AS_IF([test "x$with_ncurses" = xyes || test "x$with_ncursesw" = xyes],
- [ax_with_plaincurses=no], [ax_with_plaincurses=check])
-
- ax_cv_curses_which=no
-
- # Test for NcursesW
- AS_IF([test "x$CURSES_LIB" = x && test "x$with_ncursesw" != xno], [
- _FIND_CURSES_FLAGS([ncursesw])
-
- AS_IF([test "x$ax_cv_ncursesw" = xno && test "x$with_ncursesw" = xyes], [
- AC_MSG_ERROR([--with-ncursesw specified but could not find NcursesW library])
- ])
-
- AS_IF([test "x$ax_cv_ncursesw" = xyes], [
- ax_cv_curses=yes
- ax_cv_curses_which=ncursesw
- CURSES_LIB="$pkg_cv__ax_cv_ncursesw_lib"
- CURSES_CPPFLAGS="$pkg_cv__ax_cv_ncursesw_cppflags"
- AC_DEFINE([HAVE_NCURSESW], [1], [Define to 1 if the NcursesW library is present])
- AC_DEFINE([HAVE_CURSES], [1], [Define to 1 if a SysV or X/Open compatible Curses library is present])
-
- AC_CACHE_CHECK([for working ncursesw/curses.h], [ax_cv_header_ncursesw_curses_h], [
- AC_LINK_IFELSE([AC_LANG_PROGRAM([[
- @%:@define _XOPEN_SOURCE_EXTENDED 1
- @%:@include <ncursesw/curses.h>
- ]], [[
- chtype a = A_BOLD;
- int b = KEY_LEFT;
- chtype c = COLOR_PAIR(1) & A_COLOR;
- attr_t d = WA_NORMAL;
- cchar_t e;
- wint_t f;
- int g = getattrs(stdscr);
- int h = getcurx(stdscr) + getmaxx(stdscr);
- initscr();
- init_pair(1, COLOR_WHITE, COLOR_RED);
- wattr_set(stdscr, d, 0, NULL);
- wget_wch(stdscr, &f);
- ]])],
- [ax_cv_header_ncursesw_curses_h=yes],
- [ax_cv_header_ncursesw_curses_h=no])
- ])
- AS_IF([test "x$ax_cv_header_ncursesw_curses_h" = xyes], [
- ax_cv_curses_enhanced=yes
- ax_cv_curses_color=yes
- ax_cv_curses_obsolete=yes
- AC_DEFINE([HAVE_CURSES_ENHANCED], [1], [Define to 1 if library supports X/Open Enhanced functions])
- AC_DEFINE([HAVE_CURSES_COLOR], [1], [Define to 1 if library supports color (enhanced functions)])
- AC_DEFINE([HAVE_CURSES_OBSOLETE], [1], [Define to 1 if library supports certain obsolete features])
- AC_DEFINE([HAVE_NCURSESW_CURSES_H], [1], [Define to 1 if <ncursesw/curses.h> is present])
- ])
-
- AC_CACHE_CHECK([for working ncursesw.h], [ax_cv_header_ncursesw_h], [
- AC_LINK_IFELSE([AC_LANG_PROGRAM([[
- @%:@define _XOPEN_SOURCE_EXTENDED 1
- @%:@include <ncursesw.h>
- ]], [[
- chtype a = A_BOLD;
- int b = KEY_LEFT;
- chtype c = COLOR_PAIR(1) & A_COLOR;
- attr_t d = WA_NORMAL;
- cchar_t e;
- wint_t f;
- int g = getattrs(stdscr);
- int h = getcurx(stdscr) + getmaxx(stdscr);
- initscr();
- init_pair(1, COLOR_WHITE, COLOR_RED);
- wattr_set(stdscr, d, 0, NULL);
- wget_wch(stdscr, &f);
- ]])],
- [ax_cv_header_ncursesw_h=yes],
- [ax_cv_header_ncursesw_h=no])
- ])
- AS_IF([test "x$ax_cv_header_ncursesw_h" = xyes], [
- ax_cv_curses_enhanced=yes
- ax_cv_curses_color=yes
- ax_cv_curses_obsolete=yes
- AC_DEFINE([HAVE_CURSES_ENHANCED], [1], [Define to 1 if library supports X/Open Enhanced functions])
- AC_DEFINE([HAVE_CURSES_COLOR], [1], [Define to 1 if library supports color (enhanced functions)])
- AC_DEFINE([HAVE_CURSES_OBSOLETE], [1], [Define to 1 if library supports certain obsolete features])
- AC_DEFINE([HAVE_NCURSESW_H], [1], [Define to 1 if <ncursesw.h> is present])
- ])
-
- AC_CACHE_CHECK([for working ncurses.h], [ax_cv_header_ncurses_h_with_ncursesw], [
- AC_LINK_IFELSE([AC_LANG_PROGRAM([[
- @%:@define _XOPEN_SOURCE_EXTENDED 1
- @%:@include <ncurses.h>
- ]], [[
- chtype a = A_BOLD;
- int b = KEY_LEFT;
- chtype c = COLOR_PAIR(1) & A_COLOR;
- attr_t d = WA_NORMAL;
- cchar_t e;
- wint_t f;
- int g = getattrs(stdscr);
- int h = getcurx(stdscr) + getmaxx(stdscr);
- initscr();
- init_pair(1, COLOR_WHITE, COLOR_RED);
- wattr_set(stdscr, d, 0, NULL);
- wget_wch(stdscr, &f);
- ]])],
- [ax_cv_header_ncurses_h_with_ncursesw=yes],
- [ax_cv_header_ncurses_h_with_ncursesw=no])
- ])
- AS_IF([test "x$ax_cv_header_ncurses_h_with_ncursesw" = xyes], [
- ax_cv_curses_enhanced=yes
- ax_cv_curses_color=yes
- ax_cv_curses_obsolete=yes
- AC_DEFINE([HAVE_CURSES_ENHANCED], [1], [Define to 1 if library supports X/Open Enhanced functions])
- AC_DEFINE([HAVE_CURSES_COLOR], [1], [Define to 1 if library supports color (enhanced functions)])
- AC_DEFINE([HAVE_CURSES_OBSOLETE], [1], [Define to 1 if library supports certain obsolete features])
- AC_DEFINE([HAVE_NCURSES_H], [1], [Define to 1 if <ncurses.h> is present])
- ])
-
- AS_IF([test "x$ax_cv_header_ncursesw_curses_h" = xno && test "x$ax_cv_header_ncursesw_h" = xno && test "x$ax_cv_header_ncurses_h_with_ncursesw" = xno], [
- AC_MSG_WARN([could not find a working ncursesw/curses.h, ncursesw.h or ncurses.h])
- ])
- ])
- ])
- unset pkg_cv__ax_cv_ncursesw_lib
- unset pkg_cv__ax_cv_ncursesw_cppflags
-
- # Test for Ncurses
- AS_IF([test "x$CURSES_LIB" = x && test "x$with_ncurses" != xno && test "x$ax_cv_curses_which" = xno], [
- _FIND_CURSES_FLAGS([ncurses])
-
- AS_IF([test "x$ax_cv_ncurses" = xno && test "x$with_ncurses" = xyes], [
- AC_MSG_ERROR([--with-ncurses specified but could not find Ncurses library])
- ])
-
- AS_IF([test "x$ax_cv_ncurses" = xyes], [
- ax_cv_curses=yes
- ax_cv_curses_which=ncurses
- CURSES_LIB="$pkg_cv__ax_cv_ncurses_lib"
- CURSES_CPPFLAGS="$pkg_cv__ax_cv_ncurses_cppflags"
- AC_DEFINE([HAVE_NCURSES], [1], [Define to 1 if the Ncurses library is present])
- AC_DEFINE([HAVE_CURSES], [1], [Define to 1 if a SysV or X/Open compatible Curses library is present])
-
- AC_CACHE_CHECK([for working ncurses/curses.h], [ax_cv_header_ncurses_curses_h], [
- AC_LINK_IFELSE([AC_LANG_PROGRAM([[
- @%:@include <ncurses/curses.h>
- ]], [[
- chtype a = A_BOLD;
- int b = KEY_LEFT;
- chtype c = COLOR_PAIR(1) & A_COLOR;
- int g = getattrs(stdscr);
- int h = getcurx(stdscr) + getmaxx(stdscr);
- initscr();
- init_pair(1, COLOR_WHITE, COLOR_RED);
- ]])],
- [ax_cv_header_ncurses_curses_h=yes],
- [ax_cv_header_ncurses_curses_h=no])
- ])
- AS_IF([test "x$ax_cv_header_ncurses_curses_h" = xyes], [
- ax_cv_curses_color=yes
- ax_cv_curses_obsolete=yes
- AC_DEFINE([HAVE_CURSES_COLOR], [1], [Define to 1 if library supports color (enhanced functions)])
- AC_DEFINE([HAVE_CURSES_OBSOLETE], [1], [Define to 1 if library supports certain obsolete features])
- AC_DEFINE([HAVE_NCURSES_CURSES_H], [1], [Define to 1 if <ncurses/curses.h> is present])
- ])
-
- AC_CACHE_CHECK([for working ncurses.h], [ax_cv_header_ncurses_h], [
- AC_LINK_IFELSE([AC_LANG_PROGRAM([[
- @%:@include <ncurses.h>
- ]], [[
- chtype a = A_BOLD;
- int b = KEY_LEFT;
- chtype c = COLOR_PAIR(1) & A_COLOR;
- int g = getattrs(stdscr);
- int h = getcurx(stdscr) + getmaxx(stdscr);
- initscr();
- init_pair(1, COLOR_WHITE, COLOR_RED);
- ]])],
- [ax_cv_header_ncurses_h=yes],
- [ax_cv_header_ncurses_h=no])
- ])
- AS_IF([test "x$ax_cv_header_ncurses_h" = xyes], [
- ax_cv_curses_color=yes
- ax_cv_curses_obsolete=yes
- AC_DEFINE([HAVE_CURSES_COLOR], [1], [Define to 1 if library supports color (enhanced functions)])
- AC_DEFINE([HAVE_CURSES_OBSOLETE], [1], [Define to 1 if library supports certain obsolete features])
- AC_DEFINE([HAVE_NCURSES_H], [1], [Define to 1 if <ncurses.h> is present])
- ])
-
- AS_IF([test "x$ax_cv_header_ncurses_curses_h" = xno && test "x$ax_cv_header_ncurses_h" = xno], [
- AC_MSG_WARN([could not find a working ncurses/curses.h or ncurses.h])
- ])
- ])
- ])
- unset pkg_cv__ax_cv_ncurses_lib
- unset pkg_cv__ax_cv_ncurses_cppflags
-
- # Test for plain Curses (or if CURSES_LIB was set by user)
- AS_IF([test "x$with_plaincurses" != xno && test "x$ax_cv_curses_which" = xno], [
- AS_IF([test "x$CURSES_LIB" != x], [
- LIBS="$ax_saved_LIBS $CURSES_LIB"
- ], [
- LIBS="$ax_saved_LIBS -lcurses"
- ])
-
- AC_CACHE_CHECK([for Curses library], [ax_cv_plaincurses], [
- AC_LINK_IFELSE([AC_LANG_CALL([], [initscr])],
- [ax_cv_plaincurses=yes], [ax_cv_plaincurses=no])
- ])
-
- AS_IF([test "x$ax_cv_plaincurses" = xyes], [
- ax_cv_curses=yes
- ax_cv_curses_which=plaincurses
- AS_IF([test "x$CURSES_LIB" = x], [
- CURSES_LIB="-lcurses"
- ])
- AC_DEFINE([HAVE_CURSES], [1], [Define to 1 if a SysV or X/Open compatible Curses library is present])
-
- # Check for base conformance (and header file)
-
- AC_CACHE_CHECK([for working curses.h], [ax_cv_header_curses_h], [
- AC_LINK_IFELSE([AC_LANG_PROGRAM([[
- @%:@include <curses.h>
- ]], [[
- chtype a = A_BOLD;
- int b = KEY_LEFT;
- initscr();
- ]])],
- [ax_cv_header_curses_h=yes],
- [ax_cv_header_curses_h=no])
- ])
- AS_IF([test "x$ax_cv_header_curses_h" = xyes], [
- AC_DEFINE([HAVE_CURSES_H], [1], [Define to 1 if <curses.h> is present])
-
- # Check for X/Open Enhanced conformance
-
- AC_CACHE_CHECK([for X/Open Enhanced Curses conformance], [ax_cv_plaincurses_enhanced], [
- AC_LINK_IFELSE([AC_LANG_PROGRAM([[
- @%:@define _XOPEN_SOURCE_EXTENDED 1
- @%:@include <curses.h>
- @%:@ifndef _XOPEN_CURSES
- @%:@error "this Curses library is not enhanced"
- "this Curses library is not enhanced"
- @%:@endif
- ]], [[
- chtype a = A_BOLD;
- int b = KEY_LEFT;
- chtype c = COLOR_PAIR(1) & A_COLOR;
- attr_t d = WA_NORMAL;
- cchar_t e;
- wint_t f;
- initscr();
- init_pair(1, COLOR_WHITE, COLOR_RED);
- wattr_set(stdscr, d, 0, NULL);
- wget_wch(stdscr, &f);
- ]])],
- [ax_cv_plaincurses_enhanced=yes],
- [ax_cv_plaincurses_enhanced=no])
- ])
- AS_IF([test "x$ax_cv_plaincurses_enhanced" = xyes], [
- ax_cv_curses_enhanced=yes
- ax_cv_curses_color=yes
- AC_DEFINE([HAVE_CURSES_ENHANCED], [1], [Define to 1 if library supports X/Open Enhanced functions])
- AC_DEFINE([HAVE_CURSES_COLOR], [1], [Define to 1 if library supports color (enhanced functions)])
- ])
-
- # Check for color functions
-
- AC_CACHE_CHECK([for Curses color functions], [ax_cv_plaincurses_color], [
- AC_LINK_IFELSE([AC_LANG_PROGRAM([[
- @%:@define _XOPEN_SOURCE_EXTENDED 1
- @%:@include <curses.h>
- ]], [[
- chtype a = A_BOLD;
- int b = KEY_LEFT;
- chtype c = COLOR_PAIR(1) & A_COLOR;
- initscr();
- init_pair(1, COLOR_WHITE, COLOR_RED);
- ]])],
- [ax_cv_plaincurses_color=yes],
- [ax_cv_plaincurses_color=no])
- ])
- AS_IF([test "x$ax_cv_plaincurses_color" = xyes], [
- ax_cv_curses_color=yes
- AC_DEFINE([HAVE_CURSES_COLOR], [1], [Define to 1 if library supports color (enhanced functions)])
- ])
-
- # Check for obsolete functions
-
- AC_CACHE_CHECK([for obsolete Curses functions], [ax_cv_plaincurses_obsolete], [
- AC_LINK_IFELSE([AC_LANG_PROGRAM([[
- @%:@include <curses.h>
- ]], [[
- chtype a = A_BOLD;
- int b = KEY_LEFT;
- int g = getattrs(stdscr);
- int h = getcurx(stdscr) + getmaxx(stdscr);
- initscr();
- ]])],
- [ax_cv_plaincurses_obsolete=yes],
- [ax_cv_plaincurses_obsolete=no])
- ])
- AS_IF([test "x$ax_cv_plaincurses_obsolete" = xyes], [
- ax_cv_curses_obsolete=yes
- AC_DEFINE([HAVE_CURSES_OBSOLETE], [1], [Define to 1 if library supports certain obsolete features])
- ])
- ])
-
- AS_IF([test "x$ax_cv_header_curses_h" = xno], [
- AC_MSG_WARN([could not find a working curses.h])
- ])
- ])
- ])
-
- AS_IF([test "x$ax_cv_curses" != xyes], [ax_cv_curses=no])
- AS_IF([test "x$ax_cv_curses_enhanced" != xyes], [ax_cv_curses_enhanced=no])
- AS_IF([test "x$ax_cv_curses_color" != xyes], [ax_cv_curses_color=no])
- AS_IF([test "x$ax_cv_curses_obsolete" != xyes], [ax_cv_curses_obsolete=no])
-
- LIBS=$ax_saved_LIBS
- CPPFLAGS=$ax_saved_CPPFLAGS
-
- unset ax_saved_LIBS
- unset ax_saved_CPPFLAGS
-])dnl
diff --git a/debian/ax_with_htslib.m4 b/debian/ax_with_htslib.m4
deleted file mode 100644
index 33831da..0000000
--- a/debian/ax_with_htslib.m4
+++ /dev/null
@@ -1,140 +0,0 @@
-# ===========================================================================
-# http://www.gnu.org/software/autoconf-archive/ax_with_htslib.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-# AX_WITH_HTSLIB
-#
-# DESCRIPTION
-#
-# This macro checks whether HTSlib <http://www.htslib.org/> is installed
-# or nearby, and adds a --with-htslib=DIR option to the configure script
-# for specifying the location. It locates either an installation prefix
-# (with 'include' and 'lib' subdirectories) or an HTSlib source tree, as
-# HTSlib is fast-moving and users may wish to use an in-development tree.
-#
-# Different checks occur depending on the --with-htslib argument given:
-#
-# With --with-htslib=DIR, checks whether DIR is a source tree or contains
-# a working installation.
-# By default, searches for a source tree (with a name matching htslib*)
-# within or alongside $srcdir. Produces AC_MSG_ERROR if there are
-# several equally-likely candidates. If there are none, checks for
-# a working default installation.
-# With --with-htslib=system, checks for a working default installation.
-#
-# If a source tree is found or specified, it is added to AC_CONFIG_SUBDIRS
-# (which unfortunately may cause a "you should use literals" warning when
-# autoconf is run).
-#
-# The following output variables are set by this macro:
-#
-# HTSDIR Directory containing HTSlib source tree
-# HTSLIB_CPPFLAGS Preprocessor flags for compiling with HTSlib
-# HTSLIB_LDFLAGS Linker flags for linking with HTSlib
-#
-# The following shell variables may be defined:
-#
-# ax_cv_htslib Set to "yes" if HTSlib was found
-# ax_cv_htslib_which Set to "source", "install", or "none"
-#
-# LICENSE
-#
-# Copyright (C) 2015 Genome Research Ltd
-#
-# Copying and distribution of this file, with or without modification, are
-# permitted in any medium without royalty provided the copyright notice
-# and this notice are preserved. This file is offered as-is, without any
-# warranty.
-
-#serial 1
-
-AC_DEFUN([AX_WITH_HTSLIB],
-[AC_ARG_WITH([htslib],
- [AS_HELP_STRING([--with-htslib=DIR],
- [use the HTSlib source tree or installation in DIR])
-dnl Not indented, to avoid extra whitespace outwith AS_HELP_STRING()
-AS_HELP_STRING([--with-htslib=system],
- [use only a system HTSlib installation])],
- [], [with_htslib=search])
-
-case $with_htslib in
-yes|search)
- AC_MSG_CHECKING([location of HTSlib source tree])
- case $srcdir in
- .) srcp= ;;
- *) srcp=$srcdir/ ;;
- esac
- found=
- for dir in ${srcp}htslib* -- ${srcp}../htslib -- ${srcp}../htslib*
- do
- if test "$dir" = "--"; then
- test -n "$found" && break
- elif test -f "$dir/hts.c" && test -f "$dir/htslib/hts.h"; then
- found="${found}1"
- HTSDIR=$dir
- fi
- done
- if test -z "$found"; then
- AC_MSG_RESULT([none found])
- ax_cv_htslib_which=system
- elif test "$found" = 1; then
- AC_MSG_RESULT([$HTSDIR])
- ax_cv_htslib_which=source
- else
- AC_MSG_RESULT([several directories found])
- AC_MSG_ERROR([use --with-htslib=DIR to select which HTSlib to use])
- fi
- ;;
-no) ax_cv_htslib_which=none ;;
-system) ax_cv_htslib_which=system ;;
-*)
- HTSDIR=$with_htslib
- if test -f "$HTSDIR/hts.c" && test -f "$HTSDIR/htslib/hts.h"; then
- ax_cv_htslib_which=source
- else
- ax_cv_htslib_which=install
- fi
- ;;
-esac
-
-case $ax_cv_htslib_which in
-source)
- ax_cv_htslib=yes
- HTSLIB_CPPFLAGS="-I$HTSDIR"
- HTSLIB_LDFLAGS="-L$HTSDIR"
- # We can't use a literal, because $HTSDIR is user-provided and variable
- AC_CONFIG_SUBDIRS($HTSDIR)
- ;;
-system)
- AC_CHECK_HEADER([htslib/sam.h],
- [AC_CHECK_LIB(hts, hts_version, [ax_cv_htslib=yes], [ax_cv_htslib=no])],
- [ax_cv_htslib=no], [;])
- ax_cv_htslib_which=install
- HTSDIR=
- HTSLIB_CPPFLAGS=
- HTSLIB_LDFLAGS=
- ;;
-install)
- ax_saved_CPPFLAGS=$CPPFLAGS
- ax_saved_LDFLAGS=$LDFLAGS
- HTSLIB_CPPFLAGS="-I$HTSDIR/include"
- HTSLIB_LDFLAGS="-L$HTSDIR/lib"
- CPPFLAGS="$CPPFLAGS $HTSLIB_CPPFLAGS"
- LDFLAGS="$LDFLAGS $HTSLIB_LDFLAGS"
- AC_CHECK_HEADER([htslib/sam.h],
- [AC_CHECK_LIB(hts, hts_version, [ax_cv_htslib=yes], [ax_cv_htslib=no])],
- [ax_cv_htslib=no], [;])
- HTSDIR=
- CPPFLAGS=$ax_saved_CPPFLAGS
- LDFLAGS=$ax_saved_LDFLAGS
- ;;
-none)
- ax_cv_htslib=no
- ;;
-esac
-
-AC_SUBST([HTSDIR])
-AC_SUBST([HTSLIB_CPPFLAGS])
-AC_SUBST([HTSLIB_LDFLAGS])])
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index a28849d..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,362 +0,0 @@
-samtools (1.5-2) UNRELEASED; urgency=medium
-
- * debian/upstream/metadata:
- - added references to registries
- - yamllint cleanliness
-
- -- Steffen Moeller <moeller at debian.org> Sun, 10 Sep 2017 11:33:03 +0200
-
-samtools (1.5-1) unstable; urgency=medium
-
- * New upstream version
- * debhelper 10
- * Standards-Version: 4.0.0 (no changes needed)
- * Bump versioned Build-Depends: libhts-dev (>= 1.5)
- * Add recommends to cwl-tool and fix shebang line of cwl scripts
-
- -- Andreas Tille <tille at debian.org> Fri, 04 Aug 2017 17:58:11 +0200
-
-samtools (1.4.1-1) unstable; urgency=medium
-
- * New upstream version
- Closes: #865008
-
- -- Andreas Tille <tille at debian.org> Sun, 18 Jun 2017 21:11:09 +0200
-
-samtools (1.4-2) experimental; urgency=medium
-
- * Rebuild against properly named libhts which had a soname change
-
- -- Andreas Tille <tille at debian.org> Tue, 21 Mar 2017 21:42:52 +0100
-
-samtools (1.4-1) experimental; urgency=low
-
- [ Anton Khodak ]
- * CWL files added
-
- [ Andreas Tille ]
- * New upstream version
- * Replaced Dominique Belhachemi by my own ID as Uploader
- * Bump versioned Depends: libhts-dev (>= 1.4)
-
- -- Andreas Tille <tille at debian.org> Tue, 21 Mar 2017 16:24:50 +0100
-
-samtools (1.3.1-3) unstable; urgency=medium
-
- * Team upload.
- * Refresh existing patches.
- * Fix FTBFS by including upstream patches.
- Closes: #840974
-
- -- Sascha Steinbiss <satta at debian.org> Mon, 24 Oct 2016 16:33:30 +0200
-
-samtools (1.3.1-2) unstable; urgency=medium
-
- 952da10 Build-depends on htslib 1.3.1. (Closes: #822701)
-
- -- Charles Plessy <plessy at debian.org> Sun, 08 May 2016 21:11:12 +0900
-
-samtools (1.3.1-1) unstable; urgency=medium
-
- 0c961b2 Merge tag '1.3.1' into debian/unstable
- 3666b08 Conforms with Policy version 3.9.8.
-
- -- Charles Plessy <plessy at debian.org> Tue, 26 Apr 2016 21:12:05 +0900
-
-samtools (1.3-1) unstable; urgency=medium
-
- [ Charles Plessy ]
- 95c561d Merge tag '1.3' into debian/unstable
-
- [ Sascha Steinbiss ]
- * Team upload
- * Adapt to new build system
- * Use secure Vcs-Git
- * Patch autoconf not to use make to extract version
- * Clean up d/copyright
- * Fix spelling
-
- -- Sascha Steinbiss <sascha at steinbiss.name> Fri, 05 Feb 2016 08:32:40 +0000
-
-samtools (1.2-4) unstable; urgency=medium
-
- * Team upload
- * Fix synopsis
- * Fix Vcs-Browser
- * Add lintian-override to document that the language extensions are used
- internally
- * Fix spelling
- * Fix Mayhem issues caused by unlimited reads of gz input
- Closes: #716199, #716197
- * Provide some more manpages
-
- -- Andreas Tille <tille at debian.org> Mon, 21 Dec 2015 13:49:05 +0100
-
-samtools (1.2-3) unstable; urgency=medium
-
- * Team upload.
- [19de55e] Revert "Allow all scripts to be called without language extensions"
- (Closes: #799698)
- [658179c] Revert "Remove source files not present in upstream tarball"
-
- -- Afif Elghraoui <afif at ghraoui.name> Tue, 22 Sep 2015 06:42:01 -0700
-
-samtools (1.2-2) unstable; urgency=medium
-
- [ Afif Elghraoui ]
- eb56918 Add patch to fix building on i386
- 702d289 Remove source files not present in upstream tarball
- 7afd302 Allow all scripts to be called without language extensions
-
- [ Charles Plessy ]
- 876f680 Document the update workflow of the source package's repository.
-
- -- Charles Plessy <plessy at debian.org> Thu, 17 Sep 2015 20:21:15 +0900
-
-samtools (1.2-1) unstable; urgency=medium
-
- a5f28a0 Merge tag '1.2' into debian/unstable
- c542e84 Build-depend on libhts-dev 1.2.
- 6c2d9cd Build-depend and recommend libio-pty-perl.
- (Needed for the regression tests.)
- 23e44a4 Removed fix_coverage_cap.patch, obsoleted by -d option.
- 1091aab Refreshed fuzzy patches.
-
- -- Charles Plessy <plessy at debian.org> Tue, 16 Jun 2015 06:12:12 +0900
-
-samtools (1.1-1) unstable; urgency=medium
-
- c448d48 Merge tag '1.1' into debian/unstable
- d12ab1e Merge branch 'develop' into debian/unstable to solve issue
- with regression tests needing a pseudo-terminal.
- See https://github.com/samtools/samtools/issues/300
- c87f8ce Add autopkg tests.
- 7c50b7b Removed python-wrong-path-for-interpreter.patch
- (issue solved upstream).
-
- -- Charles Plessy <plessy at debian.org> Thu, 25 Sep 2014 09:04:20 +0900
-
-samtools (1.0-1) unstable; urgency=low
-
- [ Charles Plessy ]
- 54ca0c1 Merge with upstream's develop branch.
- 241a731 razip was removed from samtools.
- dc25b81 debian/watch: also track release candidates.
- 07aaf71 Indicate repository layout in git-buildpackage conf. file.
-
- * debian/control
-
- 8bc0fb7 Stop depending on CDBS and quilt.
- e613142 Drop static libbam, replaced by the dynamic libhts (Closes: #544976).
- b5f284c Build-depend on tabix for the regression tests, that use bgzip.
- 9febad6 Recommend python since samtools installs a python script in /usr/bin.
- 0446ed7 Conforms with Policy 3.5.6.
-
- * New package samtools-test
-
- 7293cca Create a test package for the test data.
- 6bd7b89 Adjust install file for two-package build.
- caff5f5 Clean before installing test files (this removes compiled files).
-
- * debian/rules
-
- c930cbb Use Debhelper instead of CDBS and rely on the new upstream Makefile.
- 845581d Adjust for dynamic linking.
- d859052 Indicate path to packaged bgzip.
- cb23517 Mimick Makefile's CPPFLAGS.
- aee1c6c Allow parallel building.
- edc09e4 Prevent Makefile from including local copy of the HTSlib.
-
- * debian/patches
-
- f73d764 d062687 Refreshed patches.
- 2c52f6a 61ac2c5 Forwarded patches on GitHub.
- b8eb378 Dropped fix_example_makefile.patch, obsolete.
- 66079b9 Correct path to python interpreter.
- 218c1c1 Modify Makefile to facilitate dynamic linking.
-
- [ Andreas Tille ]
- f69f412 Versioned Build-Depends libhts-dev >= 1.0
-
- -- Charles Plessy <plessy at debian.org> Tue, 23 Sep 2014 09:36:27 +0900
-
-samtools (0.1.19-1) unstable; urgency=low
-
- cb9bd50 Imported https://github.com/samtools/samtools/archive/0.1.19.tar.gz
- 1e6d442 new tools ace2sam, bamcheck, plot-bamcheck, r2plot.lua, vcfutils.lua.
- 2f2d5c5 debian/copyright: collapsed copyright notices from main holders.
- e194f9e Removed fix_float_tag.patch, corrected upstream.
- 9c0e543 Refreshed patches.
- 3e7e0ec Renamed no-pileup-tests.patch fix_example_makefile.patch.
- 8d8550e example/Makefile: pass -lpthread when compiling calDepth.
- fc32243 harden by concatenating Debian's LDFLAGS and CPPFLAGS in CFLAGS.
- 6f75b48 debian/control: normalised with cme.
- e13bb5e debian/control: normalised VCS URLs.
- a1b1b00 debian/control: deleted DM-Upload-Allowed field.
- ddcd54e Use NEWS as upstream changelog.
- 81ad760 Conforms to Policy version 3.9.4.
-
- -- Charles Plessy <plessy at debian.org> Sat, 30 Mar 2013 17:31:47 +0900
-
-samtools (0.1.18-4) unstable; urgency=low
-
- [Charles Plessy]
- * debian/copyright: Normalised with config-model
- * added bash completion
- * Build-depend on Debhelper 9
- * Pass build flaggs set by dpkg-buildflags via Debhelper
- * Moved DOI and PubMed to the Reference namespace
- * Do not compress the example binary alignment file
- * Mention ‘SAMtools’ in libbam-dev's description
-
- [Andreas Tille]
- * debian/upstream: replaced BibTeX entry
-
- [Dominique Belhachemi]
- * added patch to fix float tag bug (upstream bug 71)
-
- -- Dominique Belhachemi <domibel at debian.org> Fri, 21 Dec 2012 12:09:18 -0500
-
-samtools (0.1.18-3) unstable; urgency=low
-
- * upload 0.1.18-2 closed the wrong bug, correction:
- * added patch to fix segfault in mpileup (Closes: #653042)
- * added patch to fix coverage cap (Closes: #671524)
-
- -- Dominique Belhachemi <domibel at debian.org> Fri, 27 Jul 2012 11:11:41 -0400
-
-samtools (0.1.18-2) unstable; urgency=low
-
- * added patch to fix segfault in mpileup (Closes: #544976)
- * added patch to fix coverage cap (Closes: #671524)
-
- -- Dominique Belhachemi <domibel at debian.org> Thu, 26 Jul 2012 18:30:48 -0400
-
-samtools (0.1.18-1) unstable; urgency=low
-
- [b990d9d3] New upstream release.
- [d59366b9] Updated copyright statements.
- [aa08abe5] Compress binary packages with xz.
- [9a36c994] Added a DEP 3 header to debian/patches/no-pileup-tests.patch.
-
- -- Charles Plessy <plessy at debian.org> Tue, 06 Sep 2011 16:52:04 +0900
-
-samtools (0.1.17-1) unstable; urgency=low
-
- * New upstream release 0.1.17 (6 July, 2011).
- * bcftools.1 and samtools.1 merged (debian/rules, debian/samtools.manpages).
- * Patched the example makefile to not use samtools pileup, deprecated.
- (debian/control, debian/rules, debian/patches/series,
- debian/patches/no-pileup-tests.patch)
-
- -- Charles Plessy <plessy at debian.org> Mon, 11 Jul 2011 21:04:30 +0900
-
-samtools (0.1.16-1) unstable; urgency=low
-
- * New upstream release, implementing SAM Spec v1.4.
- * Distribute sam.h and the other headers in /usr/include/samtools.
- Closes: #625622, #626044, LP: #690997.
-
- -- Charles Plessy <plessy at debian.org> Thu, 19 May 2011 13:24:25 +0900
-
-samtools (0.1.15-1) unstable; urgency=low
-
- * New upstream release.
- * Incremented Standards-Version to reflect conformance with Policy 3.9.2
- (debian/control, no changes needed).
- * debian/watch directly interrogates and downloads froms SourceForge.
-
- -- Charles Plessy <plessy at debian.org> Tue, 12 Apr 2011 19:44:41 +0900
-
-samtools (0.1.14-1) unstable; urgency=low
-
- * New upstream release.
-
- -- Charles Plessy <plessy at debian.org> Tue, 29 Mar 2011 13:13:54 +0900
-
-samtools (0.1.13-1) unstable; urgency=low
-
- * New upstream release:
- - backward incompatible with VCF/BCF generated by samtools older
- than r921 inclusive (0.1.12a is r862);
- - buildable with with ld --as-needed (Closes: #606004);
- - the helper tool bcf-fix.pl was removed.
- * Install the manual page of bcftools (debian/rules,
- debian/samtools.manpages). Closes: #616025.
-
- -- Charles Plessy <plessy at debian.org> Wed, 02 Mar 2011 13:54:34 +0900
-
-samtools (0.1.12a-1) unstable; urgency=low
-
- * New upstream release.
- * Use bzip2 compression for the binary packages (debian/rules).
- * Incremented debhelper compatibility level to 8, in case ‘--‘ is not
- available in earlier versions (debian/compat, debian/control).
-
- -- Charles Plessy <plessy at debian.org> Fri, 03 Dec 2010 13:32:21 +0900
-
-samtools (0.1.10-1) unstable; urgency=low
-
- * New upstream release.
-
- -- Charles Plessy <plessy at debian.org> Wed, 17 Nov 2010 20:32:40 +0900
-
-samtools (0.1.9-1) unstable; urgency=low
-
- * New upstream release, and new program: bcftools.
- * debian/rules, debian/samtools.install: install bcftools, its manpage,
- and bcf-fix.pl and vcfutils.pl.
- * debian/samtools.examples, debian/rules: install and clean new examples.
- * Incremented Standards-Version to reflect conformance with Policy 3.9.1
- (debian/control, no changes needed).
- * Updated and reformatted debian/copyright according to latest DEP-5 draft.
-
- -- Charles Plessy <plessy at debian.org> Mon, 01 Nov 2010 20:06:32 +0900
-
-samtools (0.1.8-1) unstable; urgency=low
-
- * New upstream release.
- * Updated the VCS fields to the current Git repository (debian/control).
- * Install python scripts in usr/share/samtools, and suggest python
- (debian/samtools.install, debian/rules, debian/control).
- * Incremented Standards-Version to reflect conformance with Policy 3.9.0
- (debian/control, no changes needed).
- * Added notes on how the package is tested (debian/README.Debian).
-
- -- Charles Plessy <plessy at debian.org> Wed, 14 Jul 2010 14:08:59 +0900
-
-samtools (0.1.7a~dfsg-1) unstable; urgency=low
-
- * New upstream release: new script sam2vcf.pl, and many other changes.
- * Package converted to the format ‘3.0 (quilt)’ (debian/source/format).
- * Wrote a manual page for razip (debian/razip.1).
- * Better clean the example directory to make the source package
- buildable twice in a row (debian/rules).
-
- -- Charles Plessy <plessy at debian.org> Tue, 17 Nov 2009 21:38:24 +0900
-
-samtools (0.1.6~dfsg-1) unstable; urgency=low
-
- * New upstream release.
- * Upstream sources repackaged to remove the win32 directory:
- - The removal was done with git-import-orig in a parallel Git repository,
- and the corresponding debian/gbp.conf file was added to this package.
- - Mangled the ~dfsg string of the Debian version in debian/watch.
- - Added a debian/README.source file and explanations in debian/copyright.
-
- -- Charles Plessy <plessy at debian.org> Mon, 14 Sep 2009 21:58:36 +0900
-
-samtools (0.1.5c-2) unstable; urgency=low
-
- * Rebuilt with -fPIC (debian/rules, README.Debian).
- * Small corrections to the description from upstream (debian/changelog).
- * Incremented Standards-Version to reflect conformance with Policy 3.8.3
- (no changes needed).
-
- -- Charles Plessy <plessy at debian.org> Thu, 03 Sep 2009 19:29:40 +0900
-
-samtools (0.1.5c-1) unstable; urgency=low
-
- * Initial release (Closes: #540453)
-
- -- Charles Plessy <plessy at debian.org> Sun, 16 Aug 2009 14:38:20 +0900
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index f599e28..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-10
diff --git a/debian/control b/debian/control
deleted file mode 100644
index cc4a03a..0000000
--- a/debian/control
+++ /dev/null
@@ -1,46 +0,0 @@
-Source: samtools
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Charles Plessy <plessy at debian.org>,
- Andreas Tille <tille at debian.org>
-Section: science
-Priority: optional
-Build-Depends: debhelper (>= 10),
- bash-completion,
-# libio-pty-perl is needed by the regression test.
- libio-pty-perl,
- libncurses5-dev,
- libhts-dev (>= 1.5),
- zlib1g-dev,
- automake,
- autoconf-archive,
- pkg-config,
- tabix (>= 1.0)
-# tabix is needed for the regression tests.
-Standards-Version: 4.0.0
-Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/samtools.git
-Vcs-Git: https://anonscm.debian.org/git/debian-med/samtools.git
-Homepage: http://www.htslib.org/
-
-Package: samtools
-Architecture: any
-Depends: ${shlibs:Depends},
- ${misc:Depends}
-Recommends: python,
- cwltool
-Description: processing sequence alignments in SAM and BAM formats
- Samtools is a set of utilities that manipulate nucleotide sequence alignments
- in the binary BAM format. It imports from and exports to the ascii SAM
- (Sequence Alignment/Map) format, does sorting, merging and indexing, and allows
- one to retrieve reads in any regions swiftly. It is designed to work on a
- stream, and is able to open a BAM (not SAM) file on a remote FTP or HTTP
- server.
-
-Package: samtools-test
-Architecture: all
-Depends: ${misc:Depends}
-Recommends: libio-pty-perl
-Description: test files for the samtools package
- Samtools is a set of utilities that manipulate nucleotide sequence alignments
- in the binary BAM format.
- .
- This package contains test files for the samtools package.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 05fba1c..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,75 +0,0 @@
-Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Source: https://github.com/samtools/samtools/archive/1.3.1.tar.gz
-
-Files: *
-Copyright: © 2008–2016, Genome Research Ltd. (GRL)
- © 2003–2006, 2008–2011, by Heng Li <lh3lh3 at live.co.uk>
- © 2009–2012 Broad Institute
- © 2010 Illumina, Inc.
- © 2012–2013 Peter Cock, The James Hutton Institute.
-License: Expat
-
-Files: win32/xcurses.h win32/libcurses.a
-Copyright: Public Domain
-License: public-domain
- This file is part of PDCurses (http://pdcurses.sourceforge.net/),
- which is in the public domain.
-
-Files: win32/z*.h win32/libz.a
-Copyright: © 1995-2005 Jean-loup Gailly and Mark Adler
-License: zlib
- This software is provided 'as-is', without any express or implied
- warranty. In no event will the authors be held liable for any damages
- arising from the use of this software.
- .
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it
- freely, subject to the following restrictions:
- .
- 1. The origin of this software must not be misrepresented; you must not
- claim that you wrote the original software. If you use this software
- in a product, an acknowledgment in the product documentation would be
- appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and must not be
- misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source distribution.
- .
- Jean-loup Gailly Mark Adler
- jloup at gzip.org madler at alumni.caltech.edu
-
-Files: debian/*.cwl
-Copyright: the CWL project
-License: Apache-2.0
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- .
- http://www.apache.org/licenses/LICENSE-2.0
- .
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- .
- On Debian systems, the complete text of the Apache 2.0 License
- can be found in /usr/share/common-licenses/Apache-2.0 file.
-
-License: Expat
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- .
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- .
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
diff --git a/debian/gbp.conf b/debian/gbp.conf
deleted file mode 100644
index 1fc6fcc..0000000
--- a/debian/gbp.conf
+++ /dev/null
@@ -1,11 +0,0 @@
-# This package uses pristine-tar, and does not follow the canonical layout of git-buildpackage.
-
-[DEFAULT]
-upstream-branch = develop
-debian-branch = debian/unstable
-upstream-tag = %(version)s
-pristine-tar = True
-
-[dch]
-# include 7 digits of the commit id in the changelog enty:
-id-length = 7
diff --git a/debian/mans/ace2sam.1 b/debian/mans/ace2sam.1
deleted file mode 100644
index 2aa12a4..0000000
--- a/debian/mans/ace2sam.1
+++ /dev/null
@@ -1,24 +0,0 @@
-.TH ACE2SAM "1" "December 2015" "ace2sam 1.2" "User Commands"
-.SH NAME
-ace2sam \- ace to sam converter
-.SH SYNOPSIS
-.B ace2sam
-[\fI\,-pc\/\fR] \fI\,<in.ace>\/\fR
-.SH OPTIONS
-.TP
-\fB\-p\fR
-output padded SAM
-.TP
-\fB\-c\fR
-write the contig sequence in SAM
-.TP
-Notes:
-1. Fields must appear in the following order: (CO\->[BQ]\->(AF)\->(RD\->QA))
-.IP
-2. The order of reads in AF and in RD must be identical
-.IP
-3. Except in BQ, words and numbers must be separated by a single SPACE or TAB
-.IP
-4. This program writes the headerless SAM to stdout and header to stderr
-.SH AUTHOR
-This manpage was written by Andreas Tille for the Debian distribution and can be used for any other usage of the program.
diff --git a/debian/mans/samtools.pl.1 b/debian/mans/samtools.pl.1
deleted file mode 100644
index a102ba7..0000000
--- a/debian/mans/samtools.pl.1
+++ /dev/null
@@ -1,19 +0,0 @@
-.TH SAMTOOLS.PL "1" "December 2015" "samtools.pl 1.2" "User Commands"
-.SH NAME
-samtools.pl \- helper script for SAMtools
-.SH SYNOPSIS
-.B samtools.pl
-\fI\,<command> \/\fR[\fI\,<arguments>\/\fR]
-.SH DESCRIPTION
-helper script for SAMtools
-.TP
-Command:
-varFilter filtering SNPs and short indels
-.IP
-pileup2fq generate fastq from `pileup \fB\-c\fR'
-.IP
-showALEN print alignment length (ALEN) following CIGAR
-.SH AUTHOR
-THe software was written by Heng Li <lh3 at sanger.ac.uk>
-.P
-This manpage was written by Andreas Tille for the Debian distribution and can be used for any other usage of the program.
diff --git a/debian/mans/wgsim.1 b/debian/mans/wgsim.1
deleted file mode 100644
index b13f121..0000000
--- a/debian/mans/wgsim.1
+++ /dev/null
@@ -1,44 +0,0 @@
-.TH WGSIM "1" "December 2015" "wgsim 1.2" "User Commands"
-.SH NAME
-wgsim \- short read simulator
-.SH SYNOPSIS
-.B wgsim
-[\fI\,options\/\fR] \fI\,<in.ref.fa> <out.read1.fq> <out.read2.fq>\/\fR
-.SH OPTIONS
-.TP
-\fB\-e\fR FLOAT
-base error rate [0.020]
-.TP
-\fB\-d\fR INT
-outer distance between the two ends [500]
-.TP
-\fB\-s\fR INT
-standard deviation [50]
-.TP
-\fB\-N\fR INT
-number of read pairs [1000000]
-.TP
-\fB\-1\fR INT
-length of the first read [70]
-.TP
-\fB\-2\fR INT
-length of the second read [70]
-.TP
-\fB\-r\fR FLOAT
-rate of mutations [0.0010]
-.TP
-\fB\-R\fR FLOAT
-fraction of indels [0.15]
-.TP
-\fB\-X\fR FLOAT
-probability an indel is extended [0.30]
-.TP
-\fB\-S\fR INT
-seed for random generator [\-1]
-.TP
-\fB\-h\fR
-haplotype mode
-.SH AUTHOR
-The software was written by Heng Li <lh3 at sanger.ac.uk>
-.P
-This manpage was written by Andreas Tille for the Debian distribution and can be used for any other usage of the program.
diff --git a/debian/patches/dynamic-build.patch b/debian/patches/dynamic-build.patch
deleted file mode 100644
index b49b4a3..0000000
--- a/debian/patches/dynamic-build.patch
+++ /dev/null
@@ -1,17 +0,0 @@
-Description: disable inclusion of accessory makefile needed for static build
-Forwarded: https://github.com/samtools/samtools/issues/275
-Author: Charles Plessy
-
-Index: samtools/Makefile
-===================================================================
---- samtools.orig/Makefile
-+++ samtools/Makefile
-@@ -83,7 +83,7 @@ all: $(PROGRAMS) $(BUILT_MISC_PROGRAMS)
-
- # Adjust $(HTSDIR) to point to your top-level htslib directory
- HTSDIR = ../htslib
--include $(HTSDIR)/htslib.mk
-+-include $(HTSDIR)/htslib.mk
- HTSLIB = $(HTSDIR)/libhts.a
- BGZIP = $(HTSDIR)/bgzip
-
diff --git a/debian/patches/literal_version.patch b/debian/patches/literal_version.patch
deleted file mode 100644
index a9731fb..0000000
--- a/debian/patches/literal_version.patch
+++ /dev/null
@@ -1,16 +0,0 @@
-Description: do not use make to extract version
- Without a valid config.mk, trying to call m4_esyscmd_s() to run
- make results in weird error messages. This patch avoids having
- to run make just to extract this value.
-Author: Sascha Steinbiss <sascha at steinbiss.name>
---- a/configure.ac
-+++ b/configure.ac
-@@ -23,7 +23,7 @@
- # DEALINGS IN THE SOFTWARE.
-
- dnl Process this file with autoconf to produce a configure script
--AC_INIT([Samtools], m4_esyscmd_s([make print-version]),
-+AC_INIT([Samtools], m4_esyscmd_s([grep 'PACKAGE_VERSION =' Makefile | cut -f 2 -d'=' | xargs]),
- [samtools-help at lists.sourceforge.net], [], [http://www.htslib.org/])
- AC_PREREQ([2.63]) dnl This version introduced 4-argument AC_CHECK_HEADER
- AC_CONFIG_SRCDIR([bamtk.c])
diff --git a/debian/patches/mayhem.patch b/debian/patches/mayhem.patch
deleted file mode 100644
index 537561a..0000000
--- a/debian/patches/mayhem.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Mon, 21 Dec 2015 13:20:46 +0100
-Bug-Debian: https://bugs.debian.org/716197
- https://bugs.debian.org/716199
-Description: Fix Mayhem issues caused by unlimited reads of gz input
-
-
---- a/misc/maq2sam.c
-+++ b/misc/maq2sam.c
-@@ -100,14 +100,37 @@
- }
- assert(mm->format == MAQMAP_FORMAT_NEW);
- }
-- gzread(fp, &mm->n_ref, sizeof(int));
-+ if ( gzread(fp, &mm->n_ref, sizeof(int)) < sizeof(int)) {
-+ fprintf(stderr, "To few bytes in input file");
-+ return NULL;
-+ }
-+ if (mm->n_ref < 0) {
-+ fprintf(stderr, "Incorrect n_ref read from input file\n");
-+ return NULL;
-+ }
-+ if ( gzeof(fp) ) {
-+ fprintf(stderr, "Unexpected end of file.");
-+ return NULL;
-+ }
- mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*));
- for (k = 0; k != mm->n_ref; ++k) {
-+ if ( gzeof(fp) ) {
-+ fprintf(stderr, "Unexpected end of file.");
-+ return NULL;
-+ }
- gzread(fp, &len, sizeof(int));
- mm->ref_name[k] = (char*)malloc(len * sizeof(char));
-+ if ( gzeof(fp) ) {
-+ fprintf(stderr, "Unexpected end of file.");
-+ return NULL;
-+ }
- gzread(fp, mm->ref_name[k], len);
- }
- /* read number of mapped reads */
-+ if ( gzeof(fp) ) {
-+ fprintf(stderr, "Unexpected end of file.");
-+ return NULL;
-+ }
- gzread(fp, &mm->n_mapped_reads, sizeof(uint64_t));
- return mm;
- }
-@@ -119,6 +142,8 @@
- int ret;
- m1 = &mm1;
- mm = maqmap_read_header(fp);
-+ if (!mm) exit(1);
-+
- while ((ret = gzread(fp, m1, sizeof(maqmap1_t))) == sizeof(maqmap1_t)) {
- int j, flag = 0, se_mapq = m1->seq[MAX_READLEN-1];
- if (m1->flag) flag |= 1;
-@@ -193,6 +218,11 @@
- return 1;
- }
- fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
-+ if (!fp ) {
-+ fprintf(stderr, "Unable to open file %s", argv[1]);
-+ fprintf(stderr, "Usage: maq2sam <in.map> [<readGroup>]\n");
-+ return 1;
-+ }
- maq2tam_core(fp, argc > 2? argv[2] : 0);
- gzclose(fp);
- return 0;
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index 08bb035..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1,3 +0,0 @@
-spelling.patch
-literal_version.patch
-mayhem.patch
diff --git a/debian/patches/spelling.patch b/debian/patches/spelling.patch
deleted file mode 100644
index ad72959..0000000
--- a/debian/patches/spelling.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-Description: spelling fix
-Author: Sascha Steinbiss <sascha at steinbiss.name>
---- a/samtools.1
-+++ b/samtools.1
-@@ -93,8 +93,8 @@
- .PP
- Samtools is a set of utilities that manipulate alignments in the BAM
- format. It imports from and exports to the SAM (Sequence Alignment/Map)
--format, does sorting, merging and indexing, and allows to retrieve reads
--in any regions swiftly.
-+format, does sorting, merging and indexing, and allows one to retrieve
-+reads in any regions swiftly.
-
- Samtools is designed to work on a stream. It regards an input file `-'
- as the standard input (stdin) and an output file `-' as the standard
diff --git a/debian/reference b/debian/reference
deleted file mode 100644
index a6bf486..0000000
--- a/debian/reference
+++ /dev/null
@@ -1,12 +0,0 @@
- at article{HengLi06082009,
-author = {Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan, Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin, Richard and 1000 Genome Project Data Processing Subgroup, },
-title = {{The Sequence Alignment/Map (SAM) Format and SAMtools}},
-journal = {Bioinformatics},
-volume = {},
-number = {},
-pages = {btp352},
-doi = {10.1093/bioinformatics/btp352},
-year = {2009},
-URL = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btp352v1},
-eprint = {http://bioinformatics.oxfordjournals.org/cgi/reprint/btp352v1.pdf}
-}
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 36ada3d..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/make -f
-
-export DH_VERBOSE=1
-export CURSES_LIB=-lcurses
-
-%:
- dh $@
-
-override_dh_auto_clean:
- rm -f config.mk
- touch config.mk
- dh_auto_clean --
-
-override_dh_auto_configure:
- touch config.mk
- aclocal --force -I debian/ -I /usr/share/aclocal
- autoconf
- dh_auto_configure -- --with-htslib=system
-
-override_dh_auto_build:
- dh_auto_build -- \
- CFLAGS="$$(dpkg-buildflags --get CFLAGS)" \
- CPPFLAGS="\$$(DFLAGS) \$$(INCLUDES) $$(dpkg-buildflags --get CPPFLAGS)" \
- LDFLAGS="$$(dpkg-buildflags --get LDFLAGS)"
-
-override_dh_auto_test:
- dh_auto_test -- \
- BGZIP=/usr/bin/bgzip
-
-override_dh_auto_install:
- dh_auto_install -- \
- prefix=/usr
- make clean
diff --git a/debian/samtools-Dockerfile b/debian/samtools-Dockerfile
deleted file mode 100644
index 6f6b755..0000000
--- a/debian/samtools-Dockerfile
+++ /dev/null
@@ -1,84 +0,0 @@
-#################################################################
-# Dockerfile
-#
-# Software: samtools
-# Software Version: 1.2-242-4d56437
-# Description: samtools image for SciDAP
-# Website: https://samtools.github.io, http://scidap.com/
-# Provides: samtools/htslib/tabix/bgzip
-# Base Image: scidap/scidap:v0.0.1
-# Build Cmd: docker build --rm -t scidap/samtools:v1.2-242-4d56437 .
-# Pull Cmd: docker pull scidap/samtools:v1.2-242-4d56437
-# Run Cmd: docker run --rm scidap/samtools:v1.2-242-4d56437 samtools
-#################################################################
-
-### Base Image
-FROM scidap/scidap:v0.0.1
-MAINTAINER Andrey V Kartashov "porter at porter.st"
-ENV DEBIAN_FRONTEND noninteractive
-
-################## BEGIN INSTALLATION ######################
-
-WORKDIR /tmp
-
-### Install required packages (samtools)
-
-RUN apt-get clean all &&\
- apt-get update &&\
- apt-get install -y \
- libncurses5-dev && \
- apt-get clean && \
- apt-get purge && \
- rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/doc/*
-
-### Installing samtools/htslib/tabix/bgzip
-
-ENV VERSIONH 1.2.1-254-6462e34
-ENV NAMEH htslib
-ENV URLH "https://github.com/samtools/htslib/archive/${VERSIONH}.tar.gz"
-ENV SHA1H "6462e349d16e83db8647272e4b98d2a92992071f"
-
-ENV VERSION 1.2-242-4d56437
-ENV NAME "samtools"
-ENV URL "https://github.com/samtools/samtools/archive/${VERSION}.tar.gz"
-ENV SHA1 "4d56437320ad370eb0b48c204ccec7c73f653393"
-
-RUN git clone https://github.com/samtools/htslib.git && \
-cd ${NAMEH} && \
-git reset --hard ${SHA1H} && \
-make -j 4 && \
-cd .. && \
-cp ./${NAMEH}/tabix /usr/local/bin/ && \
-cp ./${NAMEH}/bgzip /usr/local/bin/ && \
-cp ./${NAMEH}/htsfile /usr/local/bin/ && \
-#RUN wget -q -O - $URLH | tar -zxv && \
-#cd ${NAMEH}-${VERSIONH} && \
-#make -j 4 && \
-#cd .. && \
-#cp ./${NAMEH}-${VERSIONH}/tabix /usr/local/bin/ && \
-#cp ./${NAMEH}-${VERSIONH}/bgzip /usr/local/bin/ && \
-#cp ./${NAMEH}-${VERSIONH}/htsfile /usr/local/bin/ && \
-strip /usr/local/bin/tabix; true && \
-strip /usr/local/bin/bgzip; true && \
-strip /usr/local/bin/htsfile; true && \
-#ln -s ./${NAMEH}-${VERSIONH}/ ./${NAMEH} && \
-
-git clone https://github.com/samtools/samtools.git && \
-cd ${NAME} && \
-git reset --hard ${SHA1} && \
-make -j 4 && \
-cp ./${NAME} /usr/local/bin/ && \
-cd .. && \
-strip /usr/local/bin/${NAME}; true && \
-rm -rf ./${NAMEH}/ && \
-rm -rf ./${NAME}/ && \
-rm -rf ./${NAMEH}
-
-#wget -q -O - $URL | tar -zxv && \
-#cd ${NAME}-${VERSION} && \
-#make -j 4 && \
-#cd .. && \
-#cp ./${NAME}-${VERSION}/${NAME} /usr/local/bin/ && \
-#strip /usr/local/bin/${NAME}; true && \
-#rm -rf ./${NAMEH}-${VERSIONH}/ && \
-#rm -rf ./${NAME}-${VERSION}/
diff --git a/debian/samtools-docker.yml b/debian/samtools-docker.yml
deleted file mode 100644
index 4f4348e..0000000
--- a/debian/samtools-docker.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-class: DockerRequirement
-dockerPull: scidap/samtools:v1.2-242-4d56437
-#dockerImageId: scidap/samtools:v1.2-242-4d56437 #not yet ready
-dockerFile: >
- $import: samtools-Dockerfile
diff --git a/debian/samtools-faidx.cwl b/debian/samtools-faidx.cwl
deleted file mode 100755
index 0eb7011..0000000
--- a/debian/samtools-faidx.cwl
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/cwl-runner
-#
-# To use it as stand alone tool. The working directory should not have input .fa file
-# example: "./samtools-faidx.cwl --input=./test-files/mm10.fa"
-# As part of a workflow should be no problem at all
-
-cwlVersion: v1.0
-class: CommandLineTool
-
-requirements:
-- $import: envvar-global.yml
-- $import: samtools-docker.yml
-- class: InlineJavascriptRequirement
-- class: InitialWorkDirRequirement
- listing:
- - entry: $(inputs.input)
- entryname: $(inputs.input.path.split('/').slice(-1)[0])
-inputs:
- input:
- type: File
- doc: <file.fa|file.fa.gz>
- region:
- type: string?
- inputBinding:
- position: 2
-
-outputs:
- index:
- type: File
- outputBinding:
- glob: $(inputs.input.path.split('/').slice(-1)[0]) #+'.fai')
- secondaryFiles:
- - .fai
- - .gzi
-
-baseCommand:
-- samtools
-- faidx
-
-arguments:
-- valueFrom: $(inputs.input.path.split('/').slice(-1)[0])
- position: 1
-
-$namespaces:
- s: http://schema.org/
-
-$schemas:
-- http://schema.org/docs/schema_org_rdfa.html
-
-s:mainEntity:
- $import: samtools-metadata.yaml
-
-s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-faidx.cwl
-s:codeRepository: https://github.com/common-workflow-language/workflows
-s:license: http://www.apache.org/licenses/LICENSE-2.0
-
-s:isPartOf:
- class: s:CreativeWork
- s:name: Common Workflow Language
- s:url: http://commonwl.org/
-
-s:author:
- class: s:Person
- s:name: Andrey Kartashov
- s:email: mailto:Andrey.Kartashov at cchmc.org
- s:sameAs:
- - id: http://orcid.org/0000-0001-9102-5681
- s:worksFor:
- - class: s:Organization
- s:name: Cincinnati Children's Hospital Medical Center
- s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026
- s:department:
- - class: s:Organization
- s:name: Barski Lab
-doc: |
- samtools-faidx.cwl is developed for CWL consortium
- Usage: samtools faidx <file.fa|file.fa.gz> [<reg> [...]]
-
diff --git a/debian/samtools-index.cwl b/debian/samtools-index.cwl
deleted file mode 100755
index 69b4c2c..0000000
--- a/debian/samtools-index.cwl
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/cwl-runner
-
-cwlVersion: v1.0
-class: CommandLineTool
-
-requirements:
-- $import: envvar-global.yml
-- $import: samtools-docker.yml
-- class: InlineJavascriptRequirement
- expressionLib:
- - var new_ext = function() { var ext=inputs.bai?'.bai':inputs.csi?'.csi':'.bai';
- return inputs.input.path.split('/').slice(-1)[0]+ext; };
-inputs:
- input:
- type: File
- inputBinding:
- position: 2
-
- doc: |
- Input bam file.
- interval:
- type: int?
- inputBinding:
- position: 1
- prefix: -m
- doc: |
- Set minimum interval size for CSI indices to 2^INT [14]
- csi:
- type: boolean
- default: false
- doc: |
- Generate CSI-format index for BAM files
- bai:
- type: boolean
- default: false
- doc: |
- Generate BAI-format index for BAM files [default]
-outputs:
- index:
- type: File
- outputBinding:
- glob: $(new_ext())
-
- doc: The index file
-baseCommand: [samtools, index]
-arguments:
-- valueFrom: $(inputs.bai?'-b':inputs.csi?'-c':[])
- position: 1
-- valueFrom: $(new_ext())
- position: 3
-
-$namespaces:
- s: http://schema.org/
-
-$schemas:
-- http://schema.org/docs/schema_org_rdfa.html
-
-s:mainEntity:
- $import: samtools-metadata.yaml
-
-s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-index.cwl
-s:codeRepository: https://github.com/common-workflow-language/workflows
-s:license: http://www.apache.org/licenses/LICENSE-2.0
-
-s:isPartOf:
- class: s:CreativeWork
- s:name: Common Workflow Language
- s:url: http://commonwl.org/
-
-s:author:
- class: s:Person
- s:name: Andrey Kartashov
- s:email: mailto:Andrey.Kartashov at cchmc.org
- s:sameAs:
- - id: http://orcid.org/0000-0001-9102-5681
- s:worksFor:
- - class: s:Organization
- s:name: Cincinnati Children's Hospital Medical Center
- s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026
- s:department:
- - class: s:Organization
- s:name: Barski Lab
-doc: |
- samtools-index.cwl is developed for CWL consortium
-
-
diff --git a/debian/samtools-metadata.yaml b/debian/samtools-metadata.yaml
deleted file mode 100644
index 64b6852..0000000
--- a/debian/samtools-metadata.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-class: s:SoftwareSourceCode
-s:name: "samtools"
-s:about: >
- A suite of programs for interacting with high-throughput sequencing data.
- It consists of three separate repositories: Samtools (Reading/writing/editing/indexing/viewing SAM/BAM/CRAM format),
- BCFtools (Reading/writing BCF2/VCF/gVCF files and calling/filtering/summarising SNP and short indel sequence variants)
- and HTSlib (A C library for reading/writing high-throughput sequencing data).
-
-s:url: http://www.htslib.org/
-s:codeRepository: https://github.com/samtools/samtools.git
-
-s:license:
-- https://opensource.org/licenses/MIT
-- https://opensource.org/licenses/BSD-3-Clause
-
-s:targetProduct:
- class: s:SoftwareApplication
- s:softwareVersion: "1.2-242-4d56437"
- s:applicationCategory: "commandline tool"
-
-s:programmingLanguage: "C, Perl"
-
-s:publication:
-- class: s:ScholarlyArticle
- id: http://dx.doi.org/10.1093/bioinformatics/btr509
- s:name: "(Li, 2011) A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics."
- s:url: http://www.ncbi.nlm.nih.gov/pubmed/21903627
-- class: s:ScholarlyArticle
- id: http://dx.doi.org/10.1093/bioinformatics/btr076
- s:name: "(Li, 2011) Improving SNP discovery by base alignment quality. Bioinformatics."
- s:url: http://www.ncbi.nlm.nih.gov/pubmed/21320865
-- class: s:ScholarlyArticle
- id: http://dx.doi.org/10.1093/bioinformatics/btp352
- s:name: "(Li et al., 2009) The Sequence Alignment/Map format and SAMtools. Bioinformatics."
- s:url: http://www.ncbi.nlm.nih.gov/pubmed/19505943
-
-s:discussionUrl:
-- https://lists.sourceforge.net/lists/listinfo/samtools-help
-- https://lists.sourceforge.net/lists/listinfo/samtools-devel
-
-s:creator:
-- class: s:Organization
- s:name: "Sanger Institute"
- s:member:
- - class: s:Person
- s:name: "Heng Li"
- s:description: "wrote most of the initial source codes of SAMtools and various converters."
-- class: s:Organization
- s:name: "Broad Institute"
- s:member:
- - class: s:Person
- s:name: "Bob Handsaker"
- s:description: |
- A major contributor to the
- SAM/BAM specification. He designed and implemented the BGZF format, the
- underlying indexable compression format for the BAM format. BGZF does
- not support arithmetic between file offsets.
-- class: s:Organization
- s:name: "Beijing Genome Institute"
- s:member:
- - class: s:Person
- s:name: "Jue Ruan"
- s:description: |
- Designed and implemented the
- RAZF format, an alternative indexable compression format. RAZF is no longer
- used by or provided with SAMtools. Source code remains available in older
- SAMtools 0.1.x releases and from the standalone branch in the repository.
-- class: s:Person
- s:name: "Colin Hercus"
- s:description: "updated novo2sam.pl to support gapped alignment by novoalign."
-- class: s:Person
- s:name: "Petr Danecek"
- s:description: "contributed the header parsing library sam_header.c and sam2vcf.pl script."
\ No newline at end of file
diff --git a/debian/samtools-rmdup.cwl b/debian/samtools-rmdup.cwl
deleted file mode 100755
index 85092a8..0000000
--- a/debian/samtools-rmdup.cwl
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/cwl-runner
-
-cwlVersion: v1.0
-class: CommandLineTool
-
-requirements:
-- $import: envvar-global.yml
-- $import: samtools-docker.yml
-- class: InlineJavascriptRequirement
-
-inputs:
- single_end:
- type: boolean
- default: false
- doc: |
- rmdup for SE reads
- input:
- type: File
- inputBinding:
- position: 2
-
- doc: |
- Input bam file.
- output_name:
- type: string
- inputBinding:
- position: 3
-
- pairend_as_se:
- type: boolean
- default: false
- doc: |
- treat PE reads as SE in rmdup (force -s)
-outputs:
- rmdup:
- type: File
- outputBinding:
- glob: $(inputs.output_name)
-
- doc: File with removed duplicates
-baseCommand: [samtools, rmdup]
-$namespaces:
- s: http://schema.org/
-
-$schemas:
-- http://schema.org/docs/schema_org_rdfa.html
-
-s:mainEntity:
- $import: samtools-metadata.yaml
-
-s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-rmdup.cwl
-s:codeRepository: https://github.com/common-workflow-language/workflows
-s:license: http://www.apache.org/licenses/LICENSE-2.0
-
-s:isPartOf:
- class: s:CreativeWork
- s:name: Common Workflow Language
- s:url: http://commonwl.org/
-
-s:author:
- class: s:Person
- s:name: Andrey Kartashov
- s:email: mailto:Andrey.Kartashov at cchmc.org
- s:sameAs:
- - id: http://orcid.org/0000-0001-9102-5681
- s:worksFor:
- - class: s:Organization
- s:name: Cincinnati Children's Hospital Medical Center
- s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026
- s:department:
- - class: s:Organization
- s:name: Barski Lab
-doc: |
- samtools-rmdup.cwl is developed for CWL consortium
-
diff --git a/debian/samtools-sort.cwl b/debian/samtools-sort.cwl
deleted file mode 100755
index 4adefa0..0000000
--- a/debian/samtools-sort.cwl
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/cwl-runner
-
-cwlVersion: v1.0
-class: CommandLineTool
-
-requirements:
-- $import: envvar-global.yml
-- $import: samtools-docker.yml
-- class: InlineJavascriptRequirement
-
-inputs:
- compression_level:
- type: int?
- inputBinding:
- prefix: -l
- doc: |
- Set compression level, from 0 (uncompressed) to 9 (best)
- threads:
- type: int?
- inputBinding:
- prefix: -@
-
- doc: Set number of sorting and compression threads [1]
- memory:
- type: string?
- inputBinding:
- prefix: -m
- doc: |
- Set maximum memory per thread; suffix K/M/G recognized [768M]
- input:
- type: File
- inputBinding:
- position: 1
-
- doc: Input bam file.
- output_name:
- type: string
- inputBinding:
- position: 2
-
- doc: Desired output filename.
- sort_by_name:
- type: boolean?
- inputBinding:
- prefix: -n
-
- doc: Sort by read names (i.e., the QNAME field) rather than by chromosomal coordinates.
-outputs:
- sorted:
- type: File
- outputBinding:
- glob: $(inputs.output_name)
-
-baseCommand: [samtools, sort]
-arguments:
-- -f
-$namespaces:
- s: http://schema.org/
-
-$schemas:
-- http://schema.org/docs/schema_org_rdfa.html
-
-s:mainEntity:
- $import: samtools-metadata.yaml
-
-s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-sort.cwl
-s:codeRepository: https://github.com/common-workflow-language/workflows
-s:license: http://www.apache.org/licenses/LICENSE-2.0
-
-s:isPartOf:
- class: s:CreativeWork
- s:name: Common Workflow Language
- s:url: http://commonwl.org/
-
-s:author:
- class: s:Person
- s:name: Andrey Kartashov
- s:email: mailto:Andrey.Kartashov at cchmc.org
- s:sameAs:
- - id: http://orcid.org/0000-0001-9102-5681
- s:worksFor:
- - class: s:Organization
- s:name: Cincinnati Children's Hospital Medical Center
- s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026
- s:department:
- - class: s:Organization
- s:name: Barski Lab
-doc: |
- samtools-sort.cwl is developed for CWL consortium
- Usage: samtools sort [options...] [in.bam]
- Options:
- -l INT Set compression level, from 0 (uncompressed) to 9 (best)
- -m INT Set maximum memory per thread; suffix K/M/G recognized [768M]
- -n Sort by read name
- -o FILE Write final output to FILE rather than standard output
- -O FORMAT Write output as FORMAT ('sam'/'bam'/'cram') (either -O or
- -T PREFIX Write temporary files to PREFIX.nnnn.bam -T is required)
- -@ INT Set number of sorting and compression threads [1]
-
- Legacy usage: samtools sort [options...] <in.bam> <out.prefix>
- Options:
- -f Use <out.prefix> as full final filename rather than prefix
- -o Write final output to stdout rather than <out.prefix>.bam
- -l,m,n,@ Similar to corresponding options above
-
-
diff --git a/debian/samtools-test.install b/debian/samtools-test.install
deleted file mode 100644
index 7018f18..0000000
--- a/debian/samtools-test.install
+++ /dev/null
@@ -1 +0,0 @@
-test usr/share/samtools
diff --git a/debian/samtools-view.cwl b/debian/samtools-view.cwl
deleted file mode 100755
index c204d98..0000000
--- a/debian/samtools-view.cwl
+++ /dev/null
@@ -1,242 +0,0 @@
-#!/usr/bin/cwl-runner
-
-cwlVersion: v1.0
-class: CommandLineTool
-
-requirements:
-- $import: envvar-global.yml
-- $import: samtools-docker.yml
-- class: InlineJavascriptRequirement
-
-inputs:
- isbam:
- type: boolean
- default: false
- inputBinding:
- position: 2
- prefix: -b
- doc: |
- output in BAM format
- readswithoutbits:
- type: int?
- inputBinding:
- position: 1
- prefix: -F
- doc: |
- only include reads with none of the bits set in INT set in FLAG [0]
- collapsecigar:
- type: boolean
- default: false
- inputBinding:
- position: 1
- prefix: -B
- doc: |
- collapse the backward CIGAR operation
- readsingroup:
- type: string?
- inputBinding:
- position: 1
- prefix: -r
- doc: |
- only include reads in read group STR [null]
- bedoverlap:
- type: File?
- inputBinding:
- position: 1
- prefix: -L
- doc: |
- only include reads overlapping this BED FILE [null]
- uncompressed:
- type: boolean
- default: false
- inputBinding:
- position: 1
- prefix: -u
- doc: |
- uncompressed BAM output (implies -b)
- readtagtostrip:
- type: string[]?
- inputBinding:
- position: 1
-
- doc: |
- read tag to strip (repeatable) [null]
- input:
- type: File
- inputBinding:
- position: 4
-
- doc: |
- Input bam file.
- readsquality:
- type: int?
- inputBinding:
- position: 1
- prefix: -q
- doc: |
- only include reads with mapping quality >= INT [0]
- readswithbits:
- type: int?
- inputBinding:
- position: 1
- prefix: -f
- doc: |
- only include reads with all bits set in INT set in FLAG [0]
- cigar:
- type: int?
- inputBinding:
- position: 1
- prefix: -m
- doc: |
- only include reads with number of CIGAR operations
- consuming query sequence >= INT [0]
- iscram:
- type: boolean
- default: false
- inputBinding:
- position: 2
- prefix: -C
- doc: |
- output in CRAM format
- threads:
- type: int?
- inputBinding:
- position: 1
- prefix: -@
- doc: |
- number of BAM compression threads [0]
- fastcompression:
- type: boolean
- default: false
- inputBinding:
- position: 1
- prefix: '-1'
- doc: |
- use fast BAM compression (implies -b)
- samheader:
- type: boolean
- default: false
- inputBinding:
- position: 1
- prefix: -h
- doc: |
- include header in SAM output
- count:
- type: boolean
- default: false
- inputBinding:
- position: 1
- prefix: -c
- doc: |
- print only the count of matching records
- randomseed:
- type: float?
- inputBinding:
- position: 1
- prefix: -s
- doc: |
- integer part sets seed of random number generator [0];
- rest sets fraction of templates to subsample [no subsampling]
- referencefasta:
- type: File?
- inputBinding:
- position: 1
- prefix: -T
- doc: |
- reference sequence FASTA FILE [null]
- region:
- type: string?
- inputBinding:
- position: 5
-
- doc: |
- [region ...]
- readsingroupfile:
- type: File?
- inputBinding:
- position: 1
- prefix: -R
- doc: |
- only include reads with read group listed in FILE [null]
- readsinlibrary:
- type: string?
- inputBinding:
- position: 1
- prefix: -l
- doc: |
- only include reads in library STR [null]
- output_name:
- type: string
- inputBinding:
- position: 2
- prefix: -o
-outputs:
- output:
- type: File
- outputBinding:
- glob: $(inputs.output_name)
-
-baseCommand: [samtools, view]
-$namespaces:
- s: http://schema.org/
-
-$schemas:
-- http://schema.org/docs/schema_org_rdfa.html
-
-s:mainEntity:
- $import: samtools-metadata.yaml
-
-s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-view.cwl
-s:codeRepository: https://github.com/common-workflow-language/workflows
-s:license: http://www.apache.org/licenses/LICENSE-2.0
-
-s:isPartOf:
- class: s:CreativeWork
- s:name: Common Workflow Language
- s:url: http://commonwl.org/
-
-s:author:
- class: s:Person
- s:name: Andrey Kartashov
- s:email: mailto:Andrey.Kartashov at cchmc.org
- s:sameAs:
- - id: http://orcid.org/0000-0001-9102-5681
- s:worksFor:
- - class: s:Organization
- s:name: Cincinnati Children's Hospital Medical Center
- s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026
- s:department:
- - class: s:Organization
- s:name: Barski Lab
-doc: |
- samtools-view.cwl is developed for CWL consortium
- Usage: samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]
-
- Options: -b output BAM
- -C output CRAM (requires -T)
- -1 use fast BAM compression (implies -b)
- -u uncompressed BAM output (implies -b)
- -h include header in SAM output
- -H print SAM header only (no alignments)
- -c print only the count of matching records
- -o FILE output file name [stdout]
- -U FILE output reads not selected by filters to FILE [null]
- -t FILE FILE listing reference names and lengths (see long help) [null]
- -T FILE reference sequence FASTA FILE [null]
- -L FILE only include reads overlapping this BED FILE [null]
- -r STR only include reads in read group STR [null]
- -R FILE only include reads with read group listed in FILE [null]
- -q INT only include reads with mapping quality >= INT [0]
- -l STR only include reads in library STR [null]
- -m INT only include reads with number of CIGAR operations
- consuming query sequence >= INT [0]
- -f INT only include reads with all bits set in INT set in FLAG [0]
- -F INT only include reads with none of the bits set in INT
- set in FLAG [0]
- -x STR read tag to strip (repeatable) [null]
- -B collapse the backward CIGAR operation
- -s FLOAT integer part sets seed of random number generator [0];
- rest sets fraction of templates to subsample [no subsampling]
- -@ INT number of BAM compression threads [0]
-
-
diff --git a/debian/samtools.bash-completion b/debian/samtools.bash-completion
deleted file mode 100644
index cae64e6..0000000
--- a/debian/samtools.bash-completion
+++ /dev/null
@@ -1,21 +0,0 @@
-# From https://raw.github.com/arq5x/bash_completion/master/samtools revision e931a8b46d9582672cc506e45ad9b4f4d6fbd743
-_samtools()
-{
- local cur prev opts
- COMPREPLY=()
- cur="${COMP_WORDS[COMP_CWORD]}"
- prev="${COMP_WORDS[COMP_CWORD-1]}"
- opts="view sort mpileup depth
- faidx tview index idxstats
- fixmate flagstat calmd merge
- rmdup reheader cat targetcut
- phase pad2unpad"
-
- case $prev in
- samtools)
- COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
- ;;
- esac
- return 0
-}
-complete -F _samtools -o default samtools
diff --git a/debian/samtools.docs b/debian/samtools.docs
deleted file mode 100644
index df44e09..0000000
--- a/debian/samtools.docs
+++ /dev/null
@@ -1,2 +0,0 @@
-AUTHORS
-debian/reference
diff --git a/debian/samtools.install b/debian/samtools.install
deleted file mode 100644
index b674c7c..0000000
--- a/debian/samtools.install
+++ /dev/null
@@ -1,4 +0,0 @@
-usr/bin
-usr/share/man/man1
-debian/mans/*.1 usr/share/man/man1
-debian/*.cwl usr/share/commonwl/
diff --git a/debian/samtools.lintian-overrides b/debian/samtools.lintian-overrides
deleted file mode 100644
index a2f1c98..0000000
--- a/debian/samtools.lintian-overrides
+++ /dev/null
@@ -1,10 +0,0 @@
-# Even if there is a conflict with Debian policy samtools internally
-# relies on these extensions (see #799698)
-samtools: script-with-language-extension usr/bin/*.pl
-samtools: script-with-language-extension usr/bin/*.py
-# /usr/bin/cwl-runner is provided by cwltool (in Recomends, which is a bit weak but should be sufficient for this case)
-samtools: unusual-interpreter usr/share/commonwl/samtools-faidx.cwl #!/usr/bin/cwl-runner
-samtools: unusual-interpreter usr/share/commonwl/samtools-index.cwl #!/usr/bin/cwl-runner
-samtools: unusual-interpreter usr/share/commonwl/samtools-rmdup.cwl #!/usr/bin/cwl-runner
-samtools: unusual-interpreter usr/share/commonwl/samtools-sort.cwl #!/usr/bin/cwl-runner
-samtools: unusual-interpreter usr/share/commonwl/samtools-view.cwl #!/usr/bin/cwl-runner
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/source/options b/debian/source/options
deleted file mode 100644
index ae4cafe..0000000
--- a/debian/source/options
+++ /dev/null
@@ -1,2 +0,0 @@
-single-debian-patch
-extend-diff-ignore = "^(\.travis.*|README.md)$"
diff --git a/debian/tests/control b/debian/tests/control
deleted file mode 100644
index 967e82b..0000000
--- a/debian/tests/control
+++ /dev/null
@@ -1,3 +0,0 @@
-Tests: samtools-test
-Depends: @, tabix
-Restrictions: allow-stderr
diff --git a/debian/tests/samtools-test b/debian/tests/samtools-test
deleted file mode 100755
index 560adf5..0000000
--- a/debian/tests/samtools-test
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/sh -e
-
-cd $ADTTMP
-cp -a /usr/share/samtools/test/ .
-ln -s /usr/bin/samtools .
-cd test
-./test.pl
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
deleted file mode 100644
index 83329b7..0000000
--- a/debian/upstream/metadata
+++ /dev/null
@@ -1,29 +0,0 @@
-Archive: SourceForge
-Changelog: https://raw.github.com/samtools/samtools/HEAD/NEWS
-Homepage: http://www.htslib.org/
-mr:
- checkout: debcheckout -a git://git.debian.org/debian-med/samtools.git
-Name: SAM tools
-Reference:
- author: >
- Heng Li and Bob Handsaker and Alec Wysoker and Tim Fennell and Jue
- Ruan and Nils Homer and Gabor Marth and Goncalo Abecasis and Richard
- Durbin and 1000 Genome Project Data Processing Subgroup
- title: The Sequence Alignment/Map (SAM) Format and SAMtools
- journal: Bioinformatics
- volume: 25
- number: 16
- pages: 2078-2079
- doi: 10.1093/bioinformatics/btp352
- PMID: 19505943
- year: 2009
- URL: http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btp352v1
- eprint: http://bioinformatics.oxfordjournals.org/cgi/reprint/btp352v1.pdf
-Repository: https://github.com/samtools/samtools
-Registry:
- - Name: bio.tools
- Entry: NA
- - Name: RRID
- Entry: SCR_002105
- - Name: OMICtools
- Entry: OMICS_00090
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 2affb2e..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,3 +0,0 @@
-version=3
-opts=filenamemangle=s/.+\/v?(\d\S*)\.tar\.gz/samtools-$1\.tar\.gz/,uversionmangle=s/-rc/~rc/ \
- https://github.com/samtools/samtools/tags .*/v?(\d\S*)\.tar\.gz
diff --git a/dict.c b/dict.c
index fa64a16..cb5622e 100644
--- a/dict.c
+++ b/dict.c
@@ -82,7 +82,11 @@ static void write_dict(const char *fn, args_t *args)
if (args->uri)
fprintf(out, "\tUR:%s", args->uri);
else if (strcmp(fn, "-") != 0) {
+#ifdef _WIN32
+ char *real_path = _fullpath(NULL, fn, PATH_MAX);
+#else
char *real_path = realpath(fn, NULL);
+#endif
fprintf(out, "\tUR:file://%s", real_path);
free(real_path);
}
diff --git a/misc/blast2sam.pl b/misc/blast2sam.pl
index fc46851..58066d9 100755
--- a/misc/blast2sam.pl
+++ b/misc/blast2sam.pl
@@ -171,7 +171,7 @@ Note that there is no header generated, so you will need to run
=over
-samtools -hT your_ref.fasta your_file.sam > your_file_with_header.sam
+samtools view -hT your_ref.fasta your_file.sam > your_file_with_header.sam
=back
diff --git a/misc/maq2sam.c b/misc/maq2sam.c
index 30ac2ec..c36a02c 100644
--- a/misc/maq2sam.c
+++ b/misc/maq2sam.c
@@ -30,8 +30,7 @@ DEALINGS IN THE SOFTWARE. */
#include <inttypes.h>
#include <stdlib.h>
#include <assert.h>
-
-#define PACKAGE_VERSION "r439"
+#include "version.h"
//#define MAQ_LONGREADS
@@ -188,7 +187,7 @@ int main(int argc, char *argv[])
{
gzFile fp;
if (argc == 1) {
- fprintf(stderr, "Version: %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "Version: %s\n", SAMTOOLS_VERSION);
fprintf(stderr, "Usage: maq2sam <in.map> [<readGroup>]\n");
return 1;
}
diff --git a/misc/wgsim.1 b/misc/wgsim.1
index 3992c1e..21c108b 100644
--- a/misc/wgsim.1
+++ b/misc/wgsim.1
@@ -1,4 +1,4 @@
-.TH wgsim 1 "21 June 2017" "samtools-1.5" "Bioinformatics tools"
+.TH wgsim 1 "28 September 2017" "samtools-1.6" "Bioinformatics tools"
.SH NAME
wgsim \- Whole-genome sequencing read simulator
.SH SYNOPSIS
diff --git a/misc/wgsim.c b/misc/wgsim.c
index 9e7c962..28dfbd8 100644
--- a/misc/wgsim.c
+++ b/misc/wgsim.c
@@ -39,11 +39,11 @@
#include <ctype.h>
#include <string.h>
#include <zlib.h>
+#include "version.h"
#include "htslib/kseq.h"
+#include "htslib/hts_os.h"
KSEQ_INIT(gzFile, gzread)
-#define PACKAGE_VERSION "0.3.2"
-
const uint8_t nst_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
@@ -407,7 +407,7 @@ static int simu_usage(void)
{
fprintf(stderr, "\n");
fprintf(stderr, "Program: wgsim (short read simulator)\n");
- fprintf(stderr, "Version: %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "Version: %s\n", SAMTOOLS_VERSION);
fprintf(stderr, "Contact: Heng Li <lh3 at sanger.ac.uk>\n\n");
fprintf(stderr, "Usage: wgsim [options] <in.ref.fa> <out.read1.fq> <out.read2.fq>\n\n");
fprintf(stderr, "Options: -e FLOAT base error rate [%.3f]\n", ERR_RATE);
diff --git a/padding.c b/padding.c
index 2f10e86..650aff8 100644
--- a/padding.c
+++ b/padding.c
@@ -382,6 +382,7 @@ bam_hdr_t * fix_header(bam_hdr_t *old, faidx_t *fai)
{
int i = 0, unpadded_len = 0;
bam_hdr_t *header = 0 ;
+ unsigned short ln_found;
header = bam_hdr_dup(old);
for (i = 0; i < old->n_targets; ++i) {
@@ -418,27 +419,45 @@ bam_hdr_t * fix_header(bam_hdr_t *old, faidx_t *fai)
name += 4;
for (name_end = name; name_end != end && *name_end != '\t'; name_end++);
strcat(newtext, "@SQ");
+ ln_found = 0;
/* Parse the @SQ lines */
while (cp != end) {
- if (end-cp >= 2 && strncmp(cp, "LN", 2) == 0) {
+ if (!ln_found && end-cp >= 2 && strncmp(cp, "LN", 2) == 0) {
// Rewrite the length
char len_buf[100];
int tid;
+ unsigned int old_length, new_length;
+ const char *old_cp = cp;
+
+ ln_found = 1;
+
+ while (cp != end && *cp++ != '\t');
+ old_length = (int)(cp - old_cp);
+
for (tid = 0; tid < header->n_targets; tid++) {
// may want to hash this, but new header API incoming.
if (strncmp(name, header->target_name[tid], name_end - name) == 0) {
- sprintf(len_buf, "LN:%d", header->target_len[tid]);
- strcat(newtext, len_buf);
+ new_length = sprintf(len_buf, "LN:%d", header->target_len[tid]);
+ if (new_length <= old_length) {
+ strcat(newtext, len_buf);
+ }
+ else {
+ fprintf(stderr, "LN value of the reference is larger than the original!\n");
+ exit(1);
+ }
break;
}
}
- while (cp != end && *cp++ != '\t');
+
if (cp != end)
strcat(newtext, "\t");
} else if (end-cp >= 2 &&
- (strncmp(cp, "M5", 2) == 0 ||
- strncmp(cp, "UR", 2) == 0)) {
+ ((ln_found && strncmp(cp, "LN", 2) == 0) ||
+ strncmp(cp, "M5", 2) == 0 ||
+ strncmp(cp, "UR", 2) == 0))
+ {
+ // skip secondary LNs
// MD5 changed during depadding; ditch it.
// URLs are also invalid.
while (cp != end && *cp++ != '\t');
diff --git a/phase.c b/phase.c
index 584334d..0e00d9b 100644
--- a/phase.c
+++ b/phase.c
@@ -36,6 +36,7 @@ DEALINGS IN THE SOFTWARE. */
#include "htslib/kstring.h"
#include "sam_opts.h"
#include "samtools.h"
+#include "htslib/hts_os.h"
#include "htslib/kseq.h"
KSTREAM_INIT(gzFile, gzread, 16384)
diff --git a/sam_view.c b/sam_view.c
index ee65fcd..ceb1080 100644
--- a/sam_view.c
+++ b/sam_view.c
@@ -969,7 +969,7 @@ static bool make_fq_line(const bam1_t *rec, char *seq, char *qual, kstring_t *li
}
/*
- * Create FASTQ lines from the barcode tag using the index-format
+ * Create FASTQ lines from the barcode tag using the index-format
*/
static bool tags2fq(bam1_t *rec, bam2fq_state_t *state, const bam2fq_opts_t* opts)
{
@@ -1072,7 +1072,7 @@ static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t
if (state->use_oq) {
oq = bam_aux_get(b, "OQ");
if (oq) {
- oq++;
+ oq++;
qual = strdup(bam_aux2Z(oq));
if (!qual) goto fail;
if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
@@ -1208,6 +1208,13 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
return false;
}
+ if (nIndex==0 && opts->index_file[0]) {
+ fprintf(stderr, "index_format not specified, but index file given\n");
+ bam2fq_usage(stderr, argv[0]);
+ free_opts(opts);
+ return false;
+ }
+
if (opts->def_qual < 0 || 93 < opts->def_qual) {
fprintf(stderr, "Invalid -v default quality %i, allowed range 0 to 93\n", opts->def_qual);
bam2fq_usage(stderr, argv[0]);
@@ -1375,7 +1382,7 @@ static bool destroy_state(const bam2fq_opts_t *opts, bam2fq_state_t *state, int*
}
}
for (i = 0; i < 2; i++) {
- if (state->fpi[i] && bgzf_close(state->fpi[i])) {
+ if (state->fpi[i] && bgzf_close(state->fpi[i])) {
print_error_errno("bam2fq", "Error closing i%d file \"%s\"", i+1, opts->index_file[i]);
valid = false;
}
@@ -1435,14 +1442,22 @@ static bool bam2fq_mainloop(bam2fq_state_t *state, bam2fq_opts_t* opts)
// print linebuf[1] to fpr[1], linebuf[2] to fpr[2]
if (bgzf_write(state->fpr[1], linebuf[1].s, linebuf[1].l) < 0) { valid = false; break; }
if (bgzf_write(state->fpr[2], linebuf[2].s, linebuf[2].l) < 0) { valid = false; break; }
- } else if ((score[1] > 0 || score[2] > 0) && state->fpse) {
- // print whichever one exists to fpse
- if (score[1] > 0) {
- if (bgzf_write(state->fpse, linebuf[1].s, linebuf[1].l) < 0) { valid = false; break; }
+ } else if (score[1] > 0 || score[2] > 0) {
+ if (state->fpse) {
+ // print whichever one exists to fpse
+ if (score[1] > 0) {
+ if (bgzf_write(state->fpse, linebuf[1].s, linebuf[1].l) < 0) { valid = false; break; }
+ } else {
+ if (bgzf_write(state->fpse, linebuf[2].s, linebuf[2].l) < 0) { valid = false; break; }
+ }
+ ++n_singletons;
} else {
- if (bgzf_write(state->fpse, linebuf[2].s, linebuf[2].l) < 0) { valid = false; break; }
+ if (score[1] > 0) {
+ if (bgzf_write(state->fpr[1], linebuf[1].s, linebuf[1].l) < 0) { valid = false; break; }
+ } else {
+ if (bgzf_write(state->fpr[2], linebuf[2].s, linebuf[2].l) < 0) { valid = false; break; }
+ }
}
- ++n_singletons;
}
if (score[0]) { // TODO: check this
// print linebuf[0] to fpr[0]
diff --git a/samtools.1 b/samtools.1
index c7fb9b5..52d581a 100644
--- a/samtools.1
+++ b/samtools.1
@@ -1,5 +1,5 @@
'\" t
-.TH samtools 1 "21 June 2017" "samtools-1.5" "Bioinformatics tools"
+.TH samtools 1 "28 September 2017" "samtools-1.6" "Bioinformatics tools"
.SH NAME
samtools \- Utilities for the Sequence Alignment/Map (SAM) format
.\"
@@ -88,6 +88,8 @@ samtools addreplacerg -r 'ID:fish' -r 'LB:1334' -r 'SM:alpha' -o output.bam inpu
samtools collate aln.sorted.bam aln.name_collated.bam
.PP
samtools depad input.bam
+.PP
+samtools markdup in.algnsorted.bam out.bam
.SH DESCRIPTION
.PP
@@ -1069,7 +1071,7 @@ unless reading from stdin.
.B fixmate
.na
samtools fixmate
-.RB [ -rpc ]
+.RB [ -rpcm ]
.RB [ -O
.IR format ]
.I in.nameSrt.bam out.bam
@@ -1090,6 +1092,11 @@ Disable FR proper pair check.
.B -c
Add template cigar ct tag.
.TP
+.B -m
+Add ms (mate score) tags. These are used by
+.B markdup
+to select the best reads to keep.
+.TP
.BI "-O " FORMAT
Write the final output as
.BR sam ", " bam ", or " cram .
@@ -1284,6 +1291,9 @@ Output base positions on reads.
.B -s, --output-MQ
Output mapping quality.
.TP
+.B --output-QNAME
+Output an extra column containing comma-separated read names.
+.TP
.B -a
Output all positions, including those with zero depth.
.TP
@@ -1637,6 +1647,8 @@ to stdout.
.B rmdup
samtools rmdup [-sS] <input.srt.bam> <out.bam>
+.B This command is obsolete. Use markdup instead.
+
Remove potential PCR duplicates: if multiple read pairs have identical
external coordinates, only retain the pair with highest mapping quality.
In the paired-end mode, this command
@@ -1825,6 +1837,50 @@ be considered as mandatory.
Specifies the output filename. By default output is sent to stdout.
.RE
+.TP \"-------- markdup
+.B markdup
+.na
+samtools markdup
+.RB [ -l
+.IR length ]
+.RB [ -r ]
+.RB [ -s ]
+.I in.algsort.bam out.bam
+.ad
+
+Mark duplicate alignments from a coordinate sorted file that
+has been run through fixmate with the -m option. This program
+relies on the MC and ms tags that fixmate provides.
+
+.B
+.RS
+.TP 11
+.BI "-l " INT
+.RI "Expected maximum read length of " INT " bases."
+[300]
+.TP
+.B -r
+Remove duplicate reads.
+.TP
+.B -s
+Print some basic stats.
+.RE
+
+.EX 4
+.B EXAMPLE
+
+# The first sort can be omitted if the file is already name ordered
+samtools sort -n -o namesort.bam example.bam
+
+# Add ms and MC tags for markdup to use later
+samtools fixmate -m namesort.bam fixmate.bam
+
+# Markdup needs position order
+samtools sort -o positionsort.bam fixmate.bam
+
+# Finally mark duplicates
+samtools markdup positionsort.bam markdup.bam
+.EE
.TP \"-------- help etc
.BR help ,\ --help
Display a brief usage message listing the samtools commands available.
diff --git a/test/bam2fq/9.1.fq.expected b/test/bam2fq/9.1.fq.expected
new file mode 100644
index 0000000..2919437
--- /dev/null
+++ b/test/bam2fq/9.1.fq.expected
@@ -0,0 +1,28 @@
+ at HS2000-355_269:2:1108:12969:26408
+CCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCT
++
+BCBFFFFFGGHHHJJJJIIJGIIIJIJIJIJII990?G=)?0?FH<GEF=CCFEE at G2=C?EE(;CB9;6 at 6(5??(559<?,9?AB239<2<8<928?8
+ at HS2000-355_269:2:2312:2459:22449
+CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCAAACCCTAACCCTAACCGTAACTCTAACCCTAACCCTATCTCAA
++
+@@@FFBFD8CDHHIBBHIIHIFHH3811)?:)?9):B)B#############################################################
+ at HS2000-355_269:1:2307:11945:73421
+TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCAACCCTAACCCTAACCCTAACCATAACCCTAACCCTAACCCTAACCCTAACC
++
+@@@DFFDEHFHFHHGIJJGIJGHIGEGGHIIFEEHEGHHBBAA;?B at F(B(;;=)==66=?EE>?@7;;A##############################
+ at HS2000-355_269:1:1114:7529:75240
+TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTCACCCTAACCCTAACCCTAACCCTAACCCTAACCCCAACCCTACCC
++
+ at CCFFFFFDDAFB@GGHFGH at FHIFECBGAEC9CBBF?GFGDGIEGG>GCGI;=(8.6@=A)=???;?6;;66=2;;(,5<AB at 5??#############
+ at HS2000-355_269:2:2302:12695:6571
+CTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCACA
++
+@@@FBEFDDHHHHIIIIIFFAHGGHHGE??C8C;;FFB>;D>;@DEC at H(=FA@(==@)6=22)7;;?A>AA2;;;?@A#####################
+ at HS2000-690_130:8:2104:17804:42012
+ACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCCACCCCCACCC
++
+CCCFFFFFHGDFHIIJJIIJJIGFHEHIIIJIGIEGDHHIIJJJHIHI=D(BC at FG2;=CEECH at CDF################################
+ at HS2000-355_269:2:2213:5874:97462
+ACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTACCCCTAACCCTAACCCAAACCCTAACCCTAACCACAACCCACACCCATATCT
++
+@@@FDFADHDDBFG;CGB;BFHDHGGIG3CGGCC;9?GDF939BFG(B####################################################
diff --git a/test/bam2fq/9.2.fq.expected b/test/bam2fq/9.2.fq.expected
new file mode 100644
index 0000000..3bb2efe
--- /dev/null
+++ b/test/bam2fq/9.2.fq.expected
@@ -0,0 +1,12 @@
+ at HS2000-355_269:1:2307:11945:73421
+AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTGGGGTTGGGG
++
+?@@DBDBDFAAC;CBG<CGGII at GHCHGGEHAH@?DGDG2*??DFF*=BFCH'55@;??;6;@#####################################
+ at HS2000-355_269:2:1108:12969:26408
+AGGGTTGGGTTAGGGTTGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTAGGGTTAGGGTTGGGGTTTGGGGTGGGGTTGGGGGT
++
+@@@FDDFFFFFADHIEHGII at G?GIG?DAHII9B at DFG8BBFCG)=BGEHAAA=AH76?@CC','5=B59(9?A##########################
+ at HS2000-690_130:8:1215:11245:58923
+ACACTAACCCGAACCCTAACACAAACCCTAACCCTAACCCTAACCCTAACCCTAACGCGAACACCAACCATAACCCTAACCAACACACTAACGCTAACCC
++
+=>;AAA+AAB6AA12?22?@*:)?1??ABB######################################################################
diff --git a/test/dat/bam2fq.703.sam b/test/dat/bam2fq.703.sam
new file mode 100644
index 0000000..2cb384a
--- /dev/null
+++ b/test/dat/bam2fq.703.sam
@@ -0,0 +1,15 @@
+ at HD VN:1.5 SO:coordinate
+ at RG ID:LP6005441-DNA_C11 SM:LP6005441-DNA_C11
+ at PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:/bix_tools/bwakit-0.7.15/bwa mem -p -t30 -H at RG\tID:LP6005441-DNA_C11\tSM:LP6005441-DNA_C11 -C /sbgenomics/Projects/92c61d0f-a072-4169-b5c7-767d29e937ba/f7e48782-bb07-4283-9537-0a8fa5b7584e/alt-aware-bwa-mem-workflow_Alt_aware_BWA_MEM/hs38DH.fa -
+ at PG ID:MarkDuplicates VN:1.140(a81bc82e781dae05c922d1dbcee737334612399f_1444244284) CL:picard.sam.markduplicates.MarkDuplicates INPUT=[/sbgenomics/Projects/92c61d0f-a072-4169-b5c7-767d29e937ba/f7e48782-bb07-4283-9537-0a8fa5b7584e/alt-aware-bwa-mem-workflow_Alt_aware_BWA_MEM/LP6005441-DNA_C11.hs38DH.aln.bam] OUTPUT=LP6005441-DNA_C11.hs38DH.aln.deduped.bam METRICS_FILE=LP6005441-DNA_C11.hs38DH.aln.metrics REMOVE_DUPLICATES=false ASSUME_SORTED=true VALIDATION_STRINGENCY=SILENT CREATE_IN [...]
+ at SQ SN:chr1 LN:248956422
+HS2000-355_269:1:2307:11945:73421 147 chr1 10002 19 4S96M = 10007 -91 CCCCAACCCCAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTACCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCT #####################################@;6;??;@55'HCFB=*FFD??*2GDGD?@HAHEGGHCHG at IIGGC<GBC;CAAFDBDBD@@? XA:Z:chr1,-10106,7S39M1D32M1I21M,2;chr12,-10233,81M7D19M,10;chr20,+64287260,25M1I27M1D43M4S,4;chr18,-10001,24S21M1D55M,1;chr12_GL877875v1_alt,-233,81M7D19M,10; MD:Z:5T37A52 PG:Z:MarkDuplicates RG:Z:LP6005441-DNA_C11 NM:i [...]
+HS2000-355_269:2:1108:12969:26408 99 chr1 10004 0 100M = 10044 123 CCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCT BCBFFFFFGGHHHJJJJIIJGIIIJIJIJIJII990?G=)?0?FH<GEF=CCFEE at G2=C?EE(;CB9;6 at 6(5??(559<?,9?AB239<2<8<928?8 MD:Z:100 PG:Z:MarkDuplicates RG:Z:LP6005441-DNA_C11 NM:i:0 AS:i:100 XS:i:100
+HS2000-355_269:2:2312:2459:22449 99 chr1 10005 0 94M6S = 10057 140 CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCAAACCCTAACCCTAACCGTAACTCTAACCCTAACCCTATCTCAA @@@FFBFD8CDHHIBBHIIHIFHH3811)?:)?9):B)B############################################################# MD:Z:56T16C4C15 PG:Z:MarkDuplicates RG:Z:LP6005441-DNA_C11 NM:i:3 AS:i:79 XS:i:79
+HS2000-355_269:1:2307:11945:73421 99 chr1 10007 0 48M1D52M = 10002 91 TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCAACCCTAACCCTAACCCTAACCATAACCCTAACCCTAACCCTAACCCTAACC @@@DFFDEHFHFHHGIJJGIJGHIGEGGHIIFEEHEGHHBBAA;?B at F(B(;;=)==66=?EE>?@7;;A############################## XA:Z:chr1,+10061,86M1D14M,2;chr12,+10044,48M1D52M,2;chr3,+10470,31M1I65M3S,2;chr4,-190122909,29M3D23M1D46M2S,4;chr18,+10029,66M1I33M,3;chr4,-190122830,52M1D11M1D37M,3;chr1,+10049,48M1D11M1I40M,3;chr1,-248946052,5M1D77M1D [...]
+HS2000-355_269:1:1114:7529:75240 1123 chr1 10007 0 37M1D60M3S = 10366 457 TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTCACCCTAACCCTAACCCTAACCCTAACCCTAACCCCAACCCTACCC @CCFFFFFDDAFB at GGHFGH@FHIFECBGAEC9CBBF?GFGDGIEGG>GCGI;=(8.6@=A)=???;?6;;66=2;;(,5<AB at 5??############# XA:Z:chr15,-101981029,100M,2;chr4,-190122948,100M,3;chrX,-156030570,62M1D38M,3;chr18,+10041,54M1I45M,3;chr4,+10078,100M,4;chr12,+10014,37M1D60M3S,3;chrX,-156030627,3S59M1D38M,3;chr18,-80262925,3S59M1D38M,3;chr3,+1039 [...]
+HS2000-355_269:2:2302:12695:6571 99 chr1 10012 0 40M1I56M3S = 10056 128 CTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCACA @@@FBEFDDHHHHIIIIIFFAHGGHHGE??C8C;;FFB>;D>;@DEC at H(=FA@(==@)6=22)7;;?A>AA2;;;?@A##################### MD:Z:96 PG:Z:MarkDuplicates RG:Z:LP6005441-DNA_C11 NM:i:1 AS:i:89 XS:i:89
+HS2000-690_130:8:2104:17804:42012 99 chr1 10021 0 88M12S = 10256 322 ACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCCACCCCCACCC CCCFFFFFHGDFHIIJJIIJJIGFHEHIIIJIGIEGDHHIIJJJHIHI=D(BC at FG2;=CEECH at CDF################################ XA:Z:chr22,-50808379,18S82M,0;chr4,-190122711,12S77M1I10M,1;chr12,+10232,85M15S,1;chr5,+11708,77M23S,0;chr18,-80262920,23S77M,0;chr21,-46699849,23M1D77M,5;chr13,-114354165,23S77M,0;chr18,+10131,16M1D12M1D72M,5;chr1,-24894 [...]
+HS2000-355_269:2:2213:5874:97462 99 chr1 10027 0 81M19S = 10070 110 ACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTACCCCTAACCCTAACCCAAACCCTAACCCTAACCACAACCCACACCCATATCT @@@FDFADHDDBFG;CGB;BFHDHGGIG3CGGCC;9?GDF939BFG(B#################################################### XA:Z:chrX,-156030354,19S81M,2;chr12,+10232,76M24S,1;chr21,-46699875,19S81M,2;chr19,-58607537,21S79M,2;chr15,-101981059,19S29M1I51M,2;chr4,-190122978,19S29M1I51M,2;chr4,+10068,48M1I32M19S,2;chr1,-248945940,19S29M1I51M,2;chr [...]
+HS2000-355_269:2:1108:12969:26408 147 chr1 10044 0 18S76M1D6M = 10004 -123 ACCCCCAACCCCACCCCAAACCCCAACCCTAACCCTACCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCAACCCTAACCCAACCCT ##########################A?9(95B=5','CC@?67HA=AAAHEGB=)GCFBB8GFD at B9IIHAD?GIG?G at IIGHEIHDAFFFFFDDF@@@ XA:Z:chr4,+190122818,6S76M18S,3;chr5,-10146,18S65M1D11M6S,3;chr5,-10206,18S65M1D11M6S,3;chr5,-10062,18S65M1D11M6S,3;chr5,-10134,18S65M1D11M6S,3;chr5,-10092,18S65M1D11M6S,3;chr5,-10074,18S65M1D11M6S,3;chr5,-10038,18S [...]
+HS2000-690_130:8:1215:11245:58923 163 chr1 10048 0 3S59M1I18M19S = 10162 177 ACACTAACCCGAACCCTAACACAAACCCTAACCCTAACCCTAACCCTAACCCTAACGCGAACACCAACCATAACCCTAACCAACACACTAACGCTAACCC =>;AAA+AAB6AA12?22?@*:)?1??ABB###################################################################### XA:Z:chr22,-50808004,19S78M3S,7;chr4,-190123032,44S51M5S,2;chr4,+10120,11S70M19S,6;chr12,-133265107,44S53M3S,3;chr12,-133264993,44S53M3S,3;chr12,-133264939,44S53M3S,3;chr12,-133264957,44S53M3S,3;chr12,-133265035,4 [...]
diff --git a/test/markdup/1_name_sort.expected.sam b/test/markdup/1_name_sort.expected.sam
new file mode 100644
index 0000000..e69de29
diff --git a/test/markdup/1_name_sort.expected.sam.err b/test/markdup/1_name_sort.expected.sam.err
new file mode 100644
index 0000000..4f05556
--- /dev/null
+++ b/test/markdup/1_name_sort.expected.sam.err
@@ -0,0 +1 @@
+[markdup] error: queryname sorted, must be sorted by coordinate.
diff --git a/test/markdup/1_name_sort.sam b/test/markdup/1_name_sort.sam
new file mode 100644
index 0000000..04df0aa
--- /dev/null
+++ b/test/markdup/1_name_sort.sam
@@ -0,0 +1,18 @@
+ at HD VN:1.4 SO:queryname
+ at SQ SN:contig_000000000 LN:11391
+entry1 83 contig_000000000 137 60 250M = 109 -278 TTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAGAACCTAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATTTATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCACACTGCCTGGGTTATCATTGATTCGGTACTAGAGATAGTGTTTAAATAATACG ;0C0G;0C0:0FFGDD00D==00000D=00D1D=1DDDF>11=<<1><111<?1<111?1D1??00<>@@11F//0A>2HG>B>2B22222B2FFB2BB22>>2B22BFBEEFGHHG2FGBB22FFB2DB1100F/1FG at G1BHHFGGAGB1DB@22EB00GCEADB1AFHHHGD21BEDF1AF1FA11DA [...]
+entry1 163 contig_000000000 109 60 250M = 137 278 GATTGATATTTATTTATTATTTTATTATGTTTATTTCTTTATTTATTATCATTATTATTATTATTCTTATTATTGTTATATAAAAACATCGTAAACACAGTAAACGATAGTACTAATACTACTACTAATAAAGATAGATTTTTTTATATATATATATGTATGATCTTTTAACGTTACTTATTCAAATGCTATGTCATTTTGTAATATTTGTCATGGCAAGTATCAAACTGCTTCGGTTCTCATTGATTAG 1111>DD3DFFF3B333B3FBG3D3A33BG3D3F3333AFG3DF3D33B22D22222222D2B2A2ADE2AA2DAG222BD22D11//11//00B110ABB2FD1?>/>A2 at 2@2 at F2@F21GB11FDDF21111111B2B11>/?1FB22>>>22>B2BG22B12B>F>11/0<0/2B2222B2 at G11>2 [...]
+entry2 83 contig_000000000 137 60 250M = 109 -278 GTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAG 0GHHHBD0DFGHHHG<D0FD0BD0FHHHG=GGBGG1DGDGHFFGFDHDHGFGAGGHHHGHHHHHHHGGFHHHEGHHGHHGHHGHHHHDBHHHHHHGFGHHHHFHHHFHFG at EBGG4BHGGHHHHHHHHGHHHHHHGHFGBHHHHHHHHHHFHFFHHHHEHFHHHHHHGAFHHHFGHHG2HFHHHGHHHHHH [...]
+entry2 163 contig_000000000 109 60 250M = 137 278 TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFB [...]
+entry3 99 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGH [...]
+entry3 147 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGCCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA 9BGC0FFBGFFGGGGFFGFEGGHGHGCGHHHHGHGGHGHHHGHHHHGHHGGGHHHHFHGHGHHHHHHGHHFHHHHHHHHHGHHHHGHHGHHHHG2HHGFHHGHHHHHHDFHHHGGHHGHGHHHFHHDHHHHHHHHHHHHFHHHHHGHHHHHHHHHHHHFFHHHHGHHHHHHFHHHHHHHHHHHHHHGCGG [...]
+entry4 83 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA GGGGGGGGGGGGGGGGGFFGHHHHHGGHHGHHHHGFHHHHGHHHHHHHHHGGHHFEHHHHHHHHHGGHHHHHHHHHHHFHHGFHGHHHGFFHFHHHHHHHHHFHHEHHHHHHHGGHHGHGHHHHHHHHHFGGHHHHHHGFHFHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHGHHHHHHHHHHHGGGGG [...]
+entry4 163 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BCCCCFFFFFFFGGGGGGGGGGEHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHGHHGHHHHHEEF5FEGFGFHHGHHFHHHHFHFDEGHAEGHHHFHHFHFFHHGBGGHHFHHFHFHHHGGHGHFGEFHDGGHGHHHHGDHHFFHHGHHHDFDHGHHHHHGHGBHGHHFBDHGGF2FDH [...]
+entry5 83 contig_000000000 422 60 250M = 304 -368 TGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAGCAGCTCTCTAAGTGTCTGGCATTGCAGCAAATTGAGCCGAATGCATTTCTGCACACGTAAACACGGCAGAATACAGATTAGCCAAGCCCAATCTCTCATTAAATCCACATTTAATAGA .DDFGEAGGGFGFHGHHFFHHHHHHHHHHGCHHHHHHFFHHHHHHHHHHHHHHHHHHGHHFHHHHHHGHHHHHHHHHGHHHHHHHHGGGGGHGHHHFHHHHFHHHHHHHHHGHHHHHHHHHHHEEEEGHHHHHHHGHHHHHHHHHGHGHHHHHHHHHHHHHHHHHGGGHHHHHHHHHHHHHHHHHHHGHFH [...]
+entry5 163 contig_000000000 304 60 250M = 422 368 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAG AABBBFFFFFFFGGGFGGGGGGHHHHHHHHHGHHHHHHHGGHGHHHHHHHHHHGHHHHHHHHFHFGHHHHHHHHHGHGGHFHHHHHHHHHHGFFHHHHHEHHHGHHHHHGDEEGHGHGEGGHHHHHHHHHGHHHFGHHHHHHHFGHHHHHGG?GGGAGHHFFHHFFGG2>@FGGFGFBGGHFHGHHHHHFF [...]
+entry6 69 contig_000000000 304 0 * = 304 0 GCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAG BCCCBCBCFFDDGGGGGGGGGGGHGGHHHHHHGHHGGGHHHHHHHHHHHHGGHGGHGGGGGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHHGGGGGHHHGGGGGGDGGHHHHGHHGHHHGHHGGGGGGGHHGGGGGGGGGGAGGGGGGDGAGFGGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFF [...]
+entry6 137 contig_000000000 304 49 35M215S = 304 0 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCT ABBBBFFFFFFFGGGGGGGGGGHHHHHHHHGHGHHHHHHHGHHHGHGHHHHHHFGHHGHHHIHHHHHIHHHHHHHHHGHHHHHHHHGHGGGGGHGGGGGHHHHHHHGG1EGGGGGGFHGGHHHGHGGGGGGHHHHGGCGFHHGHHHHFHEHHHHHHHGGGHHGGGHGEGHGFHGCGCFHHHHHHHGGGGD [...]
+entry7 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;GFC/0FC;/AFGFC0BDA.?BGFFC0FB9G0C/HHGBFHG:.GFCGEHGHGHFHHHEFHHGBGDF1EHF1GFGGFDFHFHHHFHHG1BG1HHGHHEHGBC>2<2HHHHFHHFHHHHFEGD2EFHHGEF4EHHFFG?/GGF3GFFCE3HGHFHGAHFGF1AGHHGGHHGD5HGF3HF3HHFHHHGHG1F [...]
+entry7 181 contig_000000000 3891 0 * = 3891 0 CAGTCACTCGCCTCCCGCTAACAGTCCAACTCTTCTGGTTCATCTGCGAGTCATGGTGTACCGATGTTTTGTTCTCTAGAAAGCGAAAACATTGATATGGCTGAGGGGGGCTAGCAATTTTGGCCTGATAATGGGTGTGAAATATTCAAACTGTTATGATGCTAGCCCAAATAAAACTGTTGGGACTATCTCGGGAAGAAAAATCATGATCAGAGAAGCTAGGAAAGTGTCCTTGGTATGGTAAGCACTG ////-------;----0090;.9/0/000A//::.;.0000::.?@<=0/0=0./00..<..11>00>>0111<?111<////<20GF at 212222<1F011B?//<E0FGFB211B1?0/??01BB22210?>?1 at 222B22@211122B11E at 22112110000/B222A222ADB//0FB2DDBA///A/01D [...]
+entry8 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;.9//FFF9/GFB9FBGBC.A.FCFGFGFBHHGB:0GHFCG-AA at CHGC0FFGGC.HC>><BGFFHHGGHEHFHHFGFFGHHGF1?11<1HHG<CFGBFGEHFGFGDHHG2FGEHFHCGGDGGHGFGGFDFHHFHHGGFHGEEHFBGEHHHGGEFHHHHGAGHFE1BFHHHGHGGGFHFHFDDFFCAEF [...]
+entry8 181 contig_000000000 3891 0 * = 3891 0 GACGGCGCCTGGAGCGCGTAGGCAAAGCATGATCATCTGAGCACGGCGAACGAGAGTCAGACAAAGGGTTGATCGCCAGTAACGCTCGAGAAAGACACTCCCCCGCAACAACAATCAAACCAACAGTGCACTTTCATTTGCGAATCATGGAATAATGTTGGTTTGGGCTGTAGAAGGCCAAGATATATAAATTGCTGTAGAGGGTTGGGCATGTGGACCGTAAAATGGGTGGGAAGTATAAGAACTTTGT 9;-----/;;--------/;9//////////////////-9------;...C09000;00090/...90.;---./00....-..<.000=0<00..---////1</00211221/B?//122011211111B111//>//2221122222 at 11>///CE?>/>//1112211111012B22222AD22110011 [...]
diff --git a/test/markdup/2_bad_order.expected.sam b/test/markdup/2_bad_order.expected.sam
new file mode 100644
index 0000000..11409a1
--- /dev/null
+++ b/test/markdup/2_bad_order.expected.sam
@@ -0,0 +1,7 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:contig_000000000 LN:11391
+entry1 1187 contig_000000000 109 60 250M = 137 278 GATTGATATTTATTTATTATTTTATTATGTTTATTTCTTTATTTATTATCATTATTATTATTATTCTTATTATTGTTATATAAAAACATCGTAAACACAGTAAACGATAGTACTAATACTACTACTAATAAAGATAGATTTTTTTATATATATATATGTATGATCTTTTAACGTTACTTATTCAAATGCTATGTCATTTTGTAATATTTGTCATGGCAAGTATCAAACTGCTTCGGTTCTCATTGATTAG 1111>DD3DFFF3B333B3FBG3D3A33BG3D3F3333AFG3DF3D33B22D22222222D2B2A2ADE2AA2DAG222BD22D11//11//00B110ABB2FD1?>/>A2 at 2@2 at F2@F21GB11FDDF21111111B2B11>/?1FB22>>>22>B2BG22B12B>F>11/0<0/2B2222B2 at G11> [...]
+entry2 163 contig_000000000 109 60 250M = 137 278 TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFB [...]
+entry3 99 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGH [...]
+entry4 1187 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BCCCCFFFFFFFGGGGGGGGGGEHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHGHHGHHHHHEEF5FEGFGFHHGHHFHHHHFHFDEGHAEGHHHFHHFHFFHHGBGGHHFHHFHFHHHGGHGHFGEFHDGGHGHHHHGDHHFFHHGHHHDFDHGHHHHHGHGBHGHHFBDHGGF2FD [...]
+entry1 83 contig_000000000 137 60 250M = 109 -278 TTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAGAACCTAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATTTATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCACACTGCCTGGGTTATCATTGATTCGGTACTAGAGATAGTGTTTAAATAATACG ;0C0G;0C0:0FFGDD00D==00000D=00D1D=1DDDF>11=<<1><111<?1<111?1D1??00<>@@11F//0A>2HG>B>2B22222B2FFB2BB22>>2B22BFBEEFGHHG2FGBB22FFB2DB1100F/1FG at G1BHHFGGAGB1DB@22EB00GCEADB1AFHHHGD21BEDF1AF1FA11DA [...]
diff --git a/test/markdup/2_bad_order.expected.sam.err b/test/markdup/2_bad_order.expected.sam.err
new file mode 100644
index 0000000..8ef6d92
--- /dev/null
+++ b/test/markdup/2_bad_order.expected.sam.err
@@ -0,0 +1 @@
+[markdup] error: bad coordinate order.
diff --git a/test/markdup/2_bad_order.sam b/test/markdup/2_bad_order.sam
new file mode 100644
index 0000000..8446835
--- /dev/null
+++ b/test/markdup/2_bad_order.sam
@@ -0,0 +1,19 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:contig_000000000 LN:11391
+entry1 163 contig_000000000 109 60 250M = 137 278 GATTGATATTTATTTATTATTTTATTATGTTTATTTCTTTATTTATTATCATTATTATTATTATTCTTATTATTGTTATATAAAAACATCGTAAACACAGTAAACGATAGTACTAATACTACTACTAATAAAGATAGATTTTTTTATATATATATATGTATGATCTTTTAACGTTACTTATTCAAATGCTATGTCATTTTGTAATATTTGTCATGGCAAGTATCAAACTGCTTCGGTTCTCATTGATTAG 1111>DD3DFFF3B333B3FBG3D3A33BG3D3F3333AFG3DF3D33B22D22222222D2B2A2ADE2AA2DAG222BD22D11//11//00B110ABB2FD1?>/>A2 at 2@2 at F2@F21GB11FDDF21111111B2B11>/?1FB22>>>22>B2BG22B12B>F>11/0<0/2B2222B2 at G11>2 [...]
+entry2 163 contig_000000000 109 60 250M = 137 278 TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFB [...]
+entry3 99 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGH [...]
+entry4 163 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BCCCCFFFFFFFGGGGGGGGGGEHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHGHHGHHHHHEEF5FEGFGFHHGHHFHHHHFHFDEGHAEGHHHFHHFHFFHHGBGGHHFHHFHFHHHGGHGHFGEFHDGGHGHHHHGDHHFFHHGHHHDFDHGHHHHHGHGBHGHHFBDHGGF2FDH [...]
+entry1 83 contig_000000000 137 60 250M = 109 -278 TTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAGAACCTAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATTTATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCACACTGCCTGGGTTATCATTGATTCGGTACTAGAGATAGTGTTTAAATAATACG ;0C0G;0C0:0FFGDD00D==00000D=00D1D=1DDDF>11=<<1><111<?1<111?1D1??00<>@@11F//0A>2HG>B>2B22222B2FFB2BB22>>2B22BFBEEFGHHG2FGBB22FFB2DB1100F/1FG at G1BHHFGGAGB1DB@22EB00GCEADB1AFHHHGD21BEDF1AF1FA11DA [...]
+entry7 181 contig_000000000 3891 0 * = 3891 0 CAGTCACTCGCCTCCCGCTAACAGTCCAACTCTTCTGGTTCATCTGCGAGTCATGGTGTACCGATGTTTTGTTCTCTAGAAAGCGAAAACATTGATATGGCTGAGGGGGGCTAGCAATTTTGGCCTGATAATGGGTGTGAAATATTCAAACTGTTATGATGCTAGCCCAAATAAAACTGTTGGGACTATCTCGGGAAGAAAAATCATGATCAGAGAAGCTAGGAAAGTGTCCTTGGTATGGTAAGCACTG ////-------;----0090;.9/0/000A//::.;.0000::.?@<=0/0=0./00..<..11>00>>0111<?111<////<20GF at 212222<1F011B?//<E0FGFB211B1?0/??01BB22210?>?1 at 222B22@211122B11E at 22112110000/B222A222ADB//0FB2DDBA///A/01D [...]
+entry2 83 contig_000000000 137 60 250M = 109 -278 GTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAG 0GHHHBD0DFGHHHG<D0FD0BD0FHHHG=GGBGG1DGDGHFFGFDHDHGFGAGGHHHGHHHHHHHGGFHHHEGHHGHHGHHGHHHHDBHHHHHHGFGHHHHFHHHFHFG at EBGG4BHGGHHHHHHHHGHHHHHHGHFGBHHHHHHHHHHFHFFHHHHEHFHHHHHHGAFHHHFGHHG2HFHHHGHHHHHH [...]
+entry3 147 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGCCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA 9BGC0FFBGFFGGGGFFGFEGGHGHGCGHHHHGHGGHGHHHGHHHHGHHGGGHHHHFHGHGHHHHHHGHHFHHHHHHHHHGHHHHGHHGHHHHG2HHGFHHGHHHHHHDFHHHGGHHGHGHHHFHHDHHHHHHHHHHHHFHHHHHGHHHHHHHHHHHHFFHHHHGHHHHHHFHHHHHHHHHHHHHHGCGG [...]
+entry4 83 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA GGGGGGGGGGGGGGGGGFFGHHHHHGGHHGHHHHGFHHHHGHHHHHHHHHGGHHFEHHHHHHHHHGGHHHHHHHHHHHFHHGFHGHHHGFFHFHHHHHHHHHFHHEHHHHHHHGGHHGHGHHHHHHHHHFGGHHHHHHGFHFHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHGHHHHHHHHHHHGGGGG [...]
+entry5 163 contig_000000000 304 60 250M = 422 368 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAG AABBBFFFFFFFGGGFGGGGGGHHHHHHHHHGHHHHHHHGGHGHHHHHHHHHHGHHHHHHHHFHFGHHHHHHHHHGHGGHFHHHHHHHHHHGFFHHHHHEHHHGHHHHHGDEEGHGHGEGGHHHHHHHHHGHHHFGHHHHHHHFGHHHHHGG?GGGAGHHFFHHFFGG2>@FGGFGFBGGHFHGHHHHHFF [...]
+entry6 69 contig_000000000 304 0 * = 304 0 GCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAG BCCCBCBCFFDDGGGGGGGGGGGHGGHHHHHHGHHGGGHHHHHHHHHHHHGGHGGHGGGGGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHHGGGGGHHHGGGGGGDGGHHHHGHHGHHHGHHGGGGGGGHHGGGGGGGGGGAGGGGGGDGAGFGGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFF [...]
+entry6 137 contig_000000000 304 49 35M215S = 304 0 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCT ABBBBFFFFFFFGGGGGGGGGGHHHHHHHHGHGHHHHHHHGHHHGHGHHHHHHFGHHGHHHIHHHHHIHHHHHHHHHGHHHHHHHHGHGGGGGHGGGGGHHHHHHHGG1EGGGGGGFHGGHHHGHGGGGGGHHHHGGCGFHHGHHHHFHEHHHHHHHGGGHHGGGHGEGHGFHGCGCFHHHHHHHGGGGD [...]
+entry5 83 contig_000000000 422 60 250M = 304 -368 TGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAGCAGCTCTCTAAGTGTCTGGCATTGCAGCAAATTGAGCCGAATGCATTTCTGCACACGTAAACACGGCAGAATACAGATTAGCCAAGCCCAATCTCTCATTAAATCCACATTTAATAGA .DDFGEAGGGFGFHGHHFFHHHHHHHHHHGCHHHHHHFFHHHHHHHHHHHHHHHHHHGHHFHHHHHHGHHHHHHHHHGHHHHHHHHGGGGGHGHHHFHHHHFHHHHHHHHHGHHHHHHHHHHHEEEEGHHHHHHHGHHHHHHHHHGHGHHHHHHHHHHHHHHHHHGGGHHHHHHHHHHHHHHHHHHHGHFH [...]
+entry7 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;GFC/0FC;/AFGFC0BDA.?BGFFC0FB9G0C/HHGBFHG:.GFCGEHGHGHFHHHEFHHGBGDF1EHF1GFGGFDFHFHHHFHHG1BG1HHGHHEHGBC>2<2HHHHFHHFHHHHFEGD2EFHHGEF4EHHFFG?/GGF3GFFCE3HGHFHGAHFGF1AGHHGGHHGD5HGF3HF3HHFHHHGHG1F [...]
+entry7 181 contig_000000000 3891 0 * = 3891 0 CAGTCACTCGCCTCCCGCTAACAGTCCAACTCTTCTGGTTCATCTGCGAGTCATGGTGTACCGATGTTTTGTTCTCTAGAAAGCGAAAACATTGATATGGCTGAGGGGGGCTAGCAATTTTGGCCTGATAATGGGTGTGAAATATTCAAACTGTTATGATGCTAGCCCAAATAAAACTGTTGGGACTATCTCGGGAAGAAAAATCATGATCAGAGAAGCTAGGAAAGTGTCCTTGGTATGGTAAGCACTG ////-------;----0090;.9/0/000A//::.;.0000::.?@<=0/0=0./00..<..11>00>>0111<?111<////<20GF at 212222<1F011B?//<E0FGFB211B1?0/??01BB22210?>?1 at 222B22@211122B11E at 22112110000/B222A222ADB//0FB2DDBA///A/01D [...]
+entry8 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;.9//FFF9/GFB9FBGBC.A.FCFGFGFBHHGB:0GHFCG-AA at CHGC0FFGGC.HC>><BGFFHHGGHEHFHHFGFFGHHGF1?11<1HHG<CFGBFGEHFGFGDHHG2FGEHFHCGGDGGHGFGGFDFHHFHHGGFHGEEHFBGEHHHGGEFHHHHGAGHFE1BFHHHGHGGGFHFHFDDFFCAEF [...]
+entry8 181 contig_000000000 3891 0 * = 3891 0 GACGGCGCCTGGAGCGCGTAGGCAAAGCATGATCATCTGAGCACGGCGAACGAGAGTCAGACAAAGGGTTGATCGCCAGTAACGCTCGAGAAAGACACTCCCCCGCAACAACAATCAAACCAACAGTGCACTTTCATTTGCGAATCATGGAATAATGTTGGTTTGGGCTGTAGAAGGCCAAGATATATAAATTGCTGTAGAGGGTTGGGCATGTGGACCGTAAAATGGGTGGGAAGTATAAGAACTTTGT 9;-----/;;--------/;9//////////////////-9------;...C09000;00090/...90.;---./00....-..<.000=0<00..---////1</00211221/B?//122011211111B111//>//2221122222 at 11>///CE?>/>//1112211111012B22222AD22110011 [...]
diff --git a/test/markdup/3_missing_mc.expected.sam b/test/markdup/3_missing_mc.expected.sam
new file mode 100644
index 0000000..0e2697e
--- /dev/null
+++ b/test/markdup/3_missing_mc.expected.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:contig_000000000 LN:11391
diff --git a/test/markdup/3_missing_mc.expected.sam.err b/test/markdup/3_missing_mc.expected.sam.err
new file mode 100644
index 0000000..f184d3e
--- /dev/null
+++ b/test/markdup/3_missing_mc.expected.sam.err
@@ -0,0 +1,2 @@
+[markdup] error: no MC tag.
+[markdup] error: unable to assign pair hash key.
diff --git a/test/markdup/3_missing_mc.sam b/test/markdup/3_missing_mc.sam
new file mode 100644
index 0000000..848139e
--- /dev/null
+++ b/test/markdup/3_missing_mc.sam
@@ -0,0 +1,18 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:contig_000000000 LN:11391
+entry1 163 contig_000000000 109 60 250M = 137 278 GATTGATATTTATTTATTATTTTATTATGTTTATTTCTTTATTTATTATCATTATTATTATTATTCTTATTATTGTTATATAAAAACATCGTAAACACAGTAAACGATAGTACTAATACTACTACTAATAAAGATAGATTTTTTTATATATATATATGTATGATCTTTTAACGTTACTTATTCAAATGCTATGTCATTTTGTAATATTTGTCATGGCAAGTATCAAACTGCTTCGGTTCTCATTGATTAG 1111>DD3DFFF3B333B3FBG3D3A33BG3D3F3333AFG3DF3D33B22D22222222D2B2A2ADE2AA2DAG222BD22D11//11//00B110ABB2FD1?>/>A2 at 2@2 at F2@F21GB11FDDF21111111B2B11>/?1FB22>>>22>B2BG22B12B>F>11/0<0/2B2222B2 at G11>2 [...]
+entry2 163 contig_000000000 109 60 250M = 137 278 TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFB [...]
+entry3 99 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGH [...]
+entry4 163 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BCCCCFFFFFFFGGGGGGGGGGEHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHGHHGHHHHHEEF5FEGFGFHHGHHFHHHHFHFDEGHAEGHHHFHHFHFFHHGBGGHHFHHFHFHHHGGHGHFGEFHDGGHGHHHHGDHHFFHHGHHHDFDHGHHHHHGHGBHGHHFBDHGGF2FDH [...]
+entry1 83 contig_000000000 137 60 250M = 109 -278 TTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAGAACCTAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATTTATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCACACTGCCTGGGTTATCATTGATTCGGTACTAGAGATAGTGTTTAAATAATACG ;0C0G;0C0:0FFGDD00D==00000D=00D1D=1DDDF>11=<<1><111<?1<111?1D1??00<>@@11F//0A>2HG>B>2B22222B2FFB2BB22>>2B22BFBEEFGHHG2FGBB22FFB2DB1100F/1FG at G1BHHFGGAGB1DB@22EB00GCEADB1AFHHHGD21BEDF1AF1FA11DA [...]
+entry2 83 contig_000000000 137 60 250M = 109 -278 GTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAG 0GHHHBD0DFGHHHG<D0FD0BD0FHHHG=GGBGG1DGDGHFFGFDHDHGFGAGGHHHGHHHHHHHGGFHHHEGHHGHHGHHGHHHHDBHHHHHHGFGHHHHFHHHFHFG at EBGG4BHGGHHHHHHHHGHHHHHHGHFGBHHHHHHHHHHFHFFHHHHEHFHHHHHHGAFHHHFGHHG2HFHHHGHHHHHH [...]
+entry3 147 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGCCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA 9BGC0FFBGFFGGGGFFGFEGGHGHGCGHHHHGHGGHGHHHGHHHHGHHGGGHHHHFHGHGHHHHHHGHHFHHHHHHHHHGHHHHGHHGHHHHG2HHGFHHGHHHHHHDFHHHGGHHGHGHHHFHHDHHHHHHHHHHHHFHHHHHGHHHHHHHHHHHHFFHHHHGHHHHHHFHHHHHHHHHHHHHHGCGG [...]
+entry4 83 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA GGGGGGGGGGGGGGGGGFFGHHHHHGGHHGHHHHGFHHHHGHHHHHHHHHGGHHFEHHHHHHHHHGGHHHHHHHHHHHFHHGFHGHHHGFFHFHHHHHHHHHFHHEHHHHHHHGGHHGHGHHHHHHHHHFGGHHHHHHGFHFHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHGHHHHHHHHHHHGGGGG [...]
+entry5 163 contig_000000000 304 60 250M = 422 368 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAG AABBBFFFFFFFGGGFGGGGGGHHHHHHHHHGHHHHHHHGGHGHHHHHHHHHHGHHHHHHHHFHFGHHHHHHHHHGHGGHFHHHHHHHHHHGFFHHHHHEHHHGHHHHHGDEEGHGHGEGGHHHHHHHHHGHHHFGHHHHHHHFGHHHHHGG?GGGAGHHFFHHFFGG2>@FGGFGFBGGHFHGHHHHHFF [...]
+entry6 69 contig_000000000 304 0 * = 304 0 GCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAG BCCCBCBCFFDDGGGGGGGGGGGHGGHHHHHHGHHGGGHHHHHHHHHHHHGGHGGHGGGGGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHHGGGGGHHHGGGGGGDGGHHHHGHHGHHHGHHGGGGGGGHHGGGGGGGGGGAGGGGGGDGAGFGGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFF [...]
+entry6 137 contig_000000000 304 49 35M215S = 304 0 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCT ABBBBFFFFFFFGGGGGGGGGGHHHHHHHHGHGHHHHHHHGHHHGHGHHHHHHFGHHGHHHIHHHHHIHHHHHHHHHGHHHHHHHHGHGGGGGHGGGGGHHHHHHHGG1EGGGGGGFHGGHHHGHGGGGGGHHHHGGCGFHHGHHHHFHEHHHHHHHGGGHHGGGHGEGHGFHGCGCFHHHHHHHGGGGD [...]
+entry5 83 contig_000000000 422 60 250M = 304 -368 TGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAGCAGCTCTCTAAGTGTCTGGCATTGCAGCAAATTGAGCCGAATGCATTTCTGCACACGTAAACACGGCAGAATACAGATTAGCCAAGCCCAATCTCTCATTAAATCCACATTTAATAGA .DDFGEAGGGFGFHGHHFFHHHHHHHHHHGCHHHHHHFFHHHHHHHHHHHHHHHHHHGHHFHHHHHHGHHHHHHHHHGHHHHHHHHGGGGGHGHHHFHHHHFHHHHHHHHHGHHHHHHHHHHHEEEEGHHHHHHHGHHHHHHHHHGHGHHHHHHHHHHHHHHHHHGGGHHHHHHHHHHHHHHHHHHHGHFH [...]
+entry7 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;GFC/0FC;/AFGFC0BDA.?BGFFC0FB9G0C/HHGBFHG:.GFCGEHGHGHFHHHEFHHGBGDF1EHF1GFGGFDFHFHHHFHHG1BG1HHGHHEHGBC>2<2HHHHFHHFHHHHFEGD2EFHHGEF4EHHFFG?/GGF3GFFCE3HGHFHGAHFGF1AGHHGGHHGD5HGF3HF3HHFHHHGHG1F [...]
+entry7 181 contig_000000000 3891 0 * = 3891 0 CAGTCACTCGCCTCCCGCTAACAGTCCAACTCTTCTGGTTCATCTGCGAGTCATGGTGTACCGATGTTTTGTTCTCTAGAAAGCGAAAACATTGATATGGCTGAGGGGGGCTAGCAATTTTGGCCTGATAATGGGTGTGAAATATTCAAACTGTTATGATGCTAGCCCAAATAAAACTGTTGGGACTATCTCGGGAAGAAAAATCATGATCAGAGAAGCTAGGAAAGTGTCCTTGGTATGGTAAGCACTG ////-------;----0090;.9/0/000A//::.;.0000::.?@<=0/0=0./00..<..11>00>>0111<?111<////<20GF at 212222<1F011B?//<E0FGFB211B1?0/??01BB22210?>?1 at 222B22@211122B11E at 22112110000/B222A222ADB//0FB2DDBA///A/01D [...]
+entry8 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;.9//FFF9/GFB9FBGBC.A.FCFGFGFBHHGB:0GHFCG-AA at CHGC0FFGGC.HC>><BGFFHHGGHEHFHHFGFFGHHGF1?11<1HHG<CFGBFGEHFGFGDHHG2FGEHFHCGGDGGHGFGGFDFHHFHHGGFHGEEHFBGEHHHGGEFHHHHGAGHFE1BFHHHGHGGGFHFHFDDFFCAEF [...]
+entry8 181 contig_000000000 3891 0 * = 3891 0 GACGGCGCCTGGAGCGCGTAGGCAAAGCATGATCATCTGAGCACGGCGAACGAGAGTCAGACAAAGGGTTGATCGCCAGTAACGCTCGAGAAAGACACTCCCCCGCAACAACAATCAAACCAACAGTGCACTTTCATTTGCGAATCATGGAATAATGTTGGTTTGGGCTGTAGAAGGCCAAGATATATAAATTGCTGTAGAGGGTTGGGCATGTGGACCGTAAAATGGGTGGGAAGTATAAGAACTTTGT 9;-----/;;--------/;9//////////////////-9------;...C09000;00090/...90.;---./00....-..<.000=0<00..---////1</00211221/B?//122011211111B111//>//2221122222 at 11>///CE?>/>//1112211111012B22222AD22110011 [...]
diff --git a/test/markdup/4_missing_ms.expected.sam b/test/markdup/4_missing_ms.expected.sam
new file mode 100644
index 0000000..0e2697e
--- /dev/null
+++ b/test/markdup/4_missing_ms.expected.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:contig_000000000 LN:11391
diff --git a/test/markdup/4_missing_ms.expected.sam.err b/test/markdup/4_missing_ms.expected.sam.err
new file mode 100644
index 0000000..ef8f57f
--- /dev/null
+++ b/test/markdup/4_missing_ms.expected.sam.err
@@ -0,0 +1,2 @@
+[markdup] error: no ms score tag.
+[markdup] error: no ms score tag.
diff --git a/test/markdup/4_missing_ms.sam b/test/markdup/4_missing_ms.sam
new file mode 100644
index 0000000..1bedb5d
--- /dev/null
+++ b/test/markdup/4_missing_ms.sam
@@ -0,0 +1,18 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:contig_000000000 LN:11391
+entry1 163 contig_000000000 109 60 250M = 137 278 GATTGATATTTATTTATTATTTTATTATGTTTATTTCTTTATTTATTATCATTATTATTATTATTCTTATTATTGTTATATAAAAACATCGTAAACACAGTAAACGATAGTACTAATACTACTACTAATAAAGATAGATTTTTTTATATATATATATGTATGATCTTTTAACGTTACTTATTCAAATGCTATGTCATTTTGTAATATTTGTCATGGCAAGTATCAAACTGCTTCGGTTCTCATTGATTAG 1111>DD3DFFF3B333B3FBG3D3A33BG3D3F3333AFG3DF3D33B22D22222222D2B2A2ADE2AA2DAG222BD22D11//11//00B110ABB2FD1?>/>A2 at 2@2 at F2@F21GB11FDDF21111111B2B11>/?1FB22>>>22>B2BG22B12B>F>11/0<0/2B2222B2 at G11>2 [...]
+entry2 163 contig_000000000 109 60 250M = 137 278 TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFB [...]
+entry3 99 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGH [...]
+entry4 163 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BCCCCFFFFFFFGGGGGGGGGGEHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHGHHGHHHHHEEF5FEGFGFHHGHHFHHHHFHFDEGHAEGHHHFHHFHFFHHGBGGHHFHHFHFHHHGGHGHFGEFHDGGHGHHHHGDHHFFHHGHHHDFDHGHHHHHGHGBHGHHFBDHGGF2FDH [...]
+entry1 83 contig_000000000 137 60 250M = 109 -278 TTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAGAACCTAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATTTATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCACACTGCCTGGGTTATCATTGATTCGGTACTAGAGATAGTGTTTAAATAATACG ;0C0G;0C0:0FFGDD00D==00000D=00D1D=1DDDF>11=<<1><111<?1<111?1D1??00<>@@11F//0A>2HG>B>2B22222B2FFB2BB22>>2B22BFBEEFGHHG2FGBB22FFB2DB1100F/1FG at G1BHHFGGAGB1DB@22EB00GCEADB1AFHHHGD21BEDF1AF1FA11DA [...]
+entry2 83 contig_000000000 137 60 250M = 109 -278 GTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAG 0GHHHBD0DFGHHHG<D0FD0BD0FHHHG=GGBGG1DGDGHFFGFDHDHGFGAGGHHHGHHHHHHHGGFHHHEGHHGHHGHHGHHHHDBHHHHHHGFGHHHHFHHHFHFG at EBGG4BHGGHHHHHHHHGHHHHHHGHFGBHHHHHHHHHHFHFFHHHHEHFHHHHHHGAFHHHFGHHG2HFHHHGHHHHHH [...]
+entry3 147 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGCCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA 9BGC0FFBGFFGGGGFFGFEGGHGHGCGHHHHGHGGHGHHHGHHHHGHHGGGHHHHFHGHGHHHHHHGHHFHHHHHHHHHGHHHHGHHGHHHHG2HHGFHHGHHHHHHDFHHHGGHHGHGHHHFHHDHHHHHHHHHHHHFHHHHHGHHHHHHHHHHHHFFHHHHGHHHHHHFHHHHHHHHHHHHHHGCGG [...]
+entry4 83 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA GGGGGGGGGGGGGGGGGFFGHHHHHGGHHGHHHHGFHHHHGHHHHHHHHHGGHHFEHHHHHHHHHGGHHHHHHHHHHHFHHGFHGHHHGFFHFHHHHHHHHHFHHEHHHHHHHGGHHGHGHHHHHHHHHFGGHHHHHHGFHFHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHGHHHHHHHHHHHGGGGG [...]
+entry5 163 contig_000000000 304 60 250M = 422 368 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAG AABBBFFFFFFFGGGFGGGGGGHHHHHHHHHGHHHHHHHGGHGHHHHHHHHHHGHHHHHHHHFHFGHHHHHHHHHGHGGHFHHHHHHHHHHGFFHHHHHEHHHGHHHHHGDEEGHGHGEGGHHHHHHHHHGHHHFGHHHHHHHFGHHHHHGG?GGGAGHHFFHHFFGG2>@FGGFGFBGGHFHGHHHHHFF [...]
+entry6 69 contig_000000000 304 0 * = 304 0 GCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAG BCCCBCBCFFDDGGGGGGGGGGGHGGHHHHHHGHHGGGHHHHHHHHHHHHGGHGGHGGGGGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHHGGGGGHHHGGGGGGDGGHHHHGHHGHHHGHHGGGGGGGHHGGGGGGGGGGAGGGGGGDGAGFGGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFF [...]
+entry6 137 contig_000000000 304 49 35M215S = 304 0 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCT ABBBBFFFFFFFGGGGGGGGGGHHHHHHHHGHGHHHHHHHGHHHGHGHHHHHHFGHHGHHHIHHHHHIHHHHHHHHHGHHHHHHHHGHGGGGGHGGGGGHHHHHHHGG1EGGGGGGFHGGHHHGHGGGGGGHHHHGGCGFHHGHHHHFHEHHHHHHHGGGHHGGGHGEGHGFHGCGCFHHHHHHHGGGGD [...]
+entry5 83 contig_000000000 422 60 250M = 304 -368 TGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAGCAGCTCTCTAAGTGTCTGGCATTGCAGCAAATTGAGCCGAATGCATTTCTGCACACGTAAACACGGCAGAATACAGATTAGCCAAGCCCAATCTCTCATTAAATCCACATTTAATAGA .DDFGEAGGGFGFHGHHFFHHHHHHHHHHGCHHHHHHFFHHHHHHHHHHHHHHHHHHGHHFHHHHHHGHHHHHHHHHGHHHHHHHHGGGGGHGHHHFHHHHFHHHHHHHHHGHHHHHHHHHHHEEEEGHHHHHHHGHHHHHHHHHGHGHHHHHHHHHHHHHHHHHGGGHHHHHHHHHHHHHHHHHHHGHFH [...]
+entry7 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;GFC/0FC;/AFGFC0BDA.?BGFFC0FB9G0C/HHGBFHG:.GFCGEHGHGHFHHHEFHHGBGDF1EHF1GFGGFDFHFHHHFHHG1BG1HHGHHEHGBC>2<2HHHHFHHFHHHHFEGD2EFHHGEF4EHHFFG?/GGF3GFFCE3HGHFHGAHFGF1AGHHGGHHGD5HGF3HF3HHFHHHGHG1F [...]
+entry7 181 contig_000000000 3891 0 * = 3891 0 CAGTCACTCGCCTCCCGCTAACAGTCCAACTCTTCTGGTTCATCTGCGAGTCATGGTGTACCGATGTTTTGTTCTCTAGAAAGCGAAAACATTGATATGGCTGAGGGGGGCTAGCAATTTTGGCCTGATAATGGGTGTGAAATATTCAAACTGTTATGATGCTAGCCCAAATAAAACTGTTGGGACTATCTCGGGAAGAAAAATCATGATCAGAGAAGCTAGGAAAGTGTCCTTGGTATGGTAAGCACTG ////-------;----0090;.9/0/000A//::.;.0000::.?@<=0/0=0./00..<..11>00>>0111<?111<////<20GF at 212222<1F011B?//<E0FGFB211B1?0/??01BB22210?>?1 at 222B22@211122B11E at 22112110000/B222A222ADB//0FB2DDBA///A/01D [...]
+entry8 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;.9//FFF9/GFB9FBGBC.A.FCFGFGFBHHGB:0GHFCG-AA at CHGC0FFGGC.HC>><BGFFHHGGHEHFHHFGFFGHHGF1?11<1HHG<CFGBFGEHFGFGDHHG2FGEHFHCGGDGGHGFGGFDFHHFHHGGFHGEEHFBGEHHHGGEFHHHHGAGHFE1BFHHHGHGGGFHFHFDDFFCAEF [...]
+entry8 181 contig_000000000 3891 0 * = 3891 0 GACGGCGCCTGGAGCGCGTAGGCAAAGCATGATCATCTGAGCACGGCGAACGAGAGTCAGACAAAGGGTTGATCGCCAGTAACGCTCGAGAAAGACACTCCCCCGCAACAACAATCAAACCAACAGTGCACTTTCATTTGCGAATCATGGAATAATGTTGGTTTGGGCTGTAGAAGGCCAAGATATATAAATTGCTGTAGAGGGTTGGGCATGTGGACCGTAAAATGGGTGGGAAGTATAAGAACTTTGT 9;-----/;;--------/;9//////////////////-9------;...C09000;00090/...90.;---./00....-..<.000=0<00..---////1</00211221/B?//122011211111B111//>//2221122222 at 11>///CE?>/>//1112211111012B22222AD22110011 [...]
diff --git a/test/markdup/5_markdup.expected.sam b/test/markdup/5_markdup.expected.sam
new file mode 100644
index 0000000..dfb68c8
--- /dev/null
+++ b/test/markdup/5_markdup.expected.sam
@@ -0,0 +1,18 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:contig_000000000 LN:11391
+entry1 1187 contig_000000000 109 60 250M = 137 278 GATTGATATTTATTTATTATTTTATTATGTTTATTTCTTTATTTATTATCATTATTATTATTATTCTTATTATTGTTATATAAAAACATCGTAAACACAGTAAACGATAGTACTAATACTACTACTAATAAAGATAGATTTTTTTATATATATATATGTATGATCTTTTAACGTTACTTATTCAAATGCTATGTCATTTTGTAATATTTGTCATGGCAAGTATCAAACTGCTTCGGTTCTCATTGATTAG 1111>DD3DFFF3B333B3FBG3D3A33BG3D3F3333AFG3DF3D33B22D22222222D2B2A2ADE2AA2DAG222BD22D11//11//00B110ABB2FD1?>/>A2 at 2@2 at F2@F21GB11FDDF21111111B2B11>/?1FB22>>>22>B2BG22B12B>F>11/0<0/2B2222B2 at G11> [...]
+entry2 163 contig_000000000 109 60 250M = 137 278 TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFB [...]
+entry3 99 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGH [...]
+entry4 1187 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BCCCCFFFFFFFGGGGGGGGGGEHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHGHHGHHHHHEEF5FEGFGFHHGHHFHHHHFHFDEGHAEGHHHFHHFHFFHHGBGGHHFHHFHFHHHGGHGHFGEFHDGGHGHHHHGDHHFFHHGHHHDFDHGHHHHHGHGBHGHHFBDHGGF2FD [...]
+entry1 1107 contig_000000000 137 60 250M = 109 -278 TTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAGAACCTAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATTTATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCACACTGCCTGGGTTATCATTGATTCGGTACTAGAGATAGTGTTTAAATAATACG ;0C0G;0C0:0FFGDD00D==00000D=00D1D=1DDDF>11=<<1><111<?1<111?1D1??00<>@@11F//0A>2HG>B>2B22222B2FFB2BB22>>2B22BFBEEFGHHG2FGBB22FFB2DB1100F/1FG at G1BHHFGGAGB1DB@22EB00GCEADB1AFHHHGD21BEDF1AF1FA11 [...]
+entry2 83 contig_000000000 137 60 250M = 109 -278 GTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAG 0GHHHBD0DFGHHHG<D0FD0BD0FHHHG=GGBGG1DGDGHFFGFDHDHGFGAGGHHHGHHHHHHHGGFHHHEGHHGHHGHHGHHHHDBHHHHHHGFGHHHHFHHHFHFG at EBGG4BHGGHHHHHHHHGHHHHHHGHFGBHHHHHHHHHHFHFFHHHHEHFHHHHHHGAFHHHFGHHG2HFHHHGHHHHHH [...]
+entry3 147 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGCCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA 9BGC0FFBGFFGGGGFFGFEGGHGHGCGHHHHGHGGHGHHHGHHHHGHHGGGHHHHFHGHGHHHHHHGHHFHHHHHHHHHGHHHHGHHGHHHHG2HHGFHHGHHHHHHDFHHHGGHHGHGHHHFHHDHHHHHHHHHHHHFHHHHHGHHHHHHHHHHHHFFHHHHGHHHHHHFHHHHHHHHHHHHHHGCGG [...]
+entry4 1107 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA GGGGGGGGGGGGGGGGGFFGHHHHHGGHHGHHHHGFHHHHGHHHHHHHHHGGHHFEHHHHHHHHHGGHHHHHHHHHHHFHHGFHGHHHGFFHFHHHHHHHHHFHHEHHHHHHHGGHHGHGHHHHHHHHHFGGHHHHHHGFHFHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHGHHHHHHHHHHHGGG [...]
+entry5 163 contig_000000000 304 60 250M = 422 368 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAG AABBBFFFFFFFGGGFGGGGGGHHHHHHHHHGHHHHHHHGGHGHHHHHHHHHHGHHHHHHHHFHFGHHHHHHHHHGHGGHFHHHHHHHHHHGFFHHHHHEHHHGHHHHHGDEEGHGHGEGGHHHHHHHHHGHHHFGHHHHHHHFGHHHHHGG?GGGAGHHFFHHFFGG2>@FGGFGFBGGHFHGHHHHHFF [...]
+entry6 69 contig_000000000 304 0 * = 304 0 GCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAG BCCCBCBCFFDDGGGGGGGGGGGHGGHHHHHHGHHGGGHHHHHHHHHHHHGGHGGHGGGGGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHHGGGGGHHHGGGGGGDGGHHHHGHHGHHHGHHGGGGGGGHHGGGGGGGGGGAGGGGGGDGAGFGGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFF [...]
+entry6 1161 contig_000000000 304 49 35M215S = 304 0 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCT ABBBBFFFFFFFGGGGGGGGGGHHHHHHHHGHGHHHHHHHGHHHGHGHHHHHHFGHHGHHHIHHHHHIHHHHHHHHHGHHHHHHHHGHGGGGGHGGGGGHHHHHHHGG1EGGGGGGFHGGHHHGHGGGGGGHHHHGGCGFHHGHHHHFHEHHHHHHHGGGHHGGGHGEGHGFHGCGCFHHHHHHHGGGG [...]
+entry5 83 contig_000000000 422 60 250M = 304 -368 TGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAGCAGCTCTCTAAGTGTCTGGCATTGCAGCAAATTGAGCCGAATGCATTTCTGCACACGTAAACACGGCAGAATACAGATTAGCCAAGCCCAATCTCTCATTAAATCCACATTTAATAGA .DDFGEAGGGFGFHGHHFFHHHHHHHHHHGCHHHHHHFFHHHHHHHHHHHHHHHHHHGHHFHHHHHHGHHHHHHHHHGHHHHHHHHGGGGGHGHHHFHHHHFHHHHHHHHHGHHHHHHHHHHHEEEEGHHHHHHHGHHHHHHHHHGHGHHHHHHHHHHHHHHHHHGGGHHHHHHHHHHHHHHHHHHHGHFH [...]
+entry7 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;GFC/0FC;/AFGFC0BDA.?BGFFC0FB9G0C/HHGBFHG:.GFCGEHGHGHFHHHEFHHGBGDF1EHF1GFGGFDFHFHHHFHHG1BG1HHGHHEHGBC>2<2HHHHFHHFHHHHFEGD2EFHHGEF4EHHFFG?/GGF3GFFCE3HGHFHGAHFGF1AGHHGGHHGD5HGF3HF3HHFHHHGHG1F [...]
+entry7 181 contig_000000000 3891 0 * = 3891 0 CAGTCACTCGCCTCCCGCTAACAGTCCAACTCTTCTGGTTCATCTGCGAGTCATGGTGTACCGATGTTTTGTTCTCTAGAAAGCGAAAACATTGATATGGCTGAGGGGGGCTAGCAATTTTGGCCTGATAATGGGTGTGAAATATTCAAACTGTTATGATGCTAGCCCAAATAAAACTGTTGGGACTATCTCGGGAAGAAAAATCATGATCAGAGAAGCTAGGAAAGTGTCCTTGGTATGGTAAGCACTG ////-------;----0090;.9/0/000A//::.;.0000::.?@<=0/0=0./00..<..11>00>>0111<?111<////<20GF at 212222<1F011B?//<E0FGFB211B1?0/??01BB22210?>?1 at 222B22@211122B11E at 22112110000/B222A222ADB//0FB2DDBA///A/01D [...]
+entry8 1145 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;.9//FFF9/GFB9FBGBC.A.FCFGFGFBHHGB:0GHFCG-AA at CHGC0FFGGC.HC>><BGFFHHGGHEHFHHFGFFGHHGF1?11<1HHG<CFGBFGEHFGFGDHHG2FGEHFHCGGDGGHGFGGFDFHHFHHGGFHGEEHFBGEHHHGGEFHHHHGAGHFE1BFHHHGHGGGFHFHFDDFFCAE [...]
+entry8 181 contig_000000000 3891 0 * = 3891 0 GACGGCGCCTGGAGCGCGTAGGCAAAGCATGATCATCTGAGCACGGCGAACGAGAGTCAGACAAAGGGTTGATCGCCAGTAACGCTCGAGAAAGACACTCCCCCGCAACAACAATCAAACCAACAGTGCACTTTCATTTGCGAATCATGGAATAATGTTGGTTTGGGCTGTAGAAGGCCAAGATATATAAATTGCTGTAGAGGGTTGGGCATGTGGACCGTAAAATGGGTGGGAAGTATAAGAACTTTGT 9;-----/;;--------/;9//////////////////-9------;...C09000;00090/...90.;---./00....-..<.000=0<00..---////1</00211221/B?//122011211111B111//>//2221122222 at 11>///CE?>/>//1112211111012B22222AD22110011 [...]
diff --git a/test/markdup/5_markdup.sam b/test/markdup/5_markdup.sam
new file mode 100644
index 0000000..5030ca7
--- /dev/null
+++ b/test/markdup/5_markdup.sam
@@ -0,0 +1,18 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:contig_000000000 LN:11391
+entry1 163 contig_000000000 109 60 250M = 137 278 GATTGATATTTATTTATTATTTTATTATGTTTATTTCTTTATTTATTATCATTATTATTATTATTCTTATTATTGTTATATAAAAACATCGTAAACACAGTAAACGATAGTACTAATACTACTACTAATAAAGATAGATTTTTTTATATATATATATGTATGATCTTTTAACGTTACTTATTCAAATGCTATGTCATTTTGTAATATTTGTCATGGCAAGTATCAAACTGCTTCGGTTCTCATTGATTAG 1111>DD3DFFF3B333B3FBG3D3A33BG3D3F3333AFG3DF3D33B22D22222222D2B2A2ADE2AA2DAG222BD22D11//11//00B110ABB2FD1?>/>A2 at 2@2 at F2@F21GB11FDDF21111111B2B11>/?1FB22>>>22>B2BG22B12B>F>11/0<0/2B2222B2 at G11>2 [...]
+entry2 163 contig_000000000 109 60 250M = 137 278 TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFB [...]
+entry3 99 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGH [...]
+entry4 163 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BCCCCFFFFFFFGGGGGGGGGGEHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHGHHGHHHHHEEF5FEGFGFHHGHHFHHHHFHFDEGHAEGHHHFHHFHFFHHGBGGHHFHHFHFHHHGGHGHFGEFHDGGHGHHHHGDHHFFHHGHHHDFDHGHHHHHGHGBHGHHFBDHGGF2FDH [...]
+entry1 83 contig_000000000 137 60 250M = 109 -278 TTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAGAACCTAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATTTATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCACACTGCCTGGGTTATCATTGATTCGGTACTAGAGATAGTGTTTAAATAATACG ;0C0G;0C0:0FFGDD00D==00000D=00D1D=1DDDF>11=<<1><111<?1<111?1D1??00<>@@11F//0A>2HG>B>2B22222B2FFB2BB22>>2B22BFBEEFGHHG2FGBB22FFB2DB1100F/1FG at G1BHHFGGAGB1DB@22EB00GCEADB1AFHHHGD21BEDF1AF1FA11DA [...]
+entry2 83 contig_000000000 137 60 250M = 109 -278 GTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAG 0GHHHBD0DFGHHHG<D0FD0BD0FHHHG=GGBGG1DGDGHFFGFDHDHGFGAGGHHHGHHHHHHHGGFHHHEGHHGHHGHHGHHHHDBHHHHHHGFGHHHHFHHHFHFG at EBGG4BHGGHHHHHHHHGHHHHHHGHFGBHHHHHHHHHHFHFFHHHHEHFHHHHHHGAFHHHFGHHG2HFHHHGHHHHHH [...]
+entry3 147 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGCCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA 9BGC0FFBGFFGGGGFFGFEGGHGHGCGHHHHGHGGHGHHHGHHHHGHHGGGHHHHFHGHGHHHHHHGHHFHHHHHHHHHGHHHHGHHGHHHHG2HHGFHHGHHHHHHDFHHHGGHHGHGHHHFHHDHHHHHHHHHHHHFHHHHHGHHHHHHHHHHHHFFHHHHGHHHHHHFHHHHHHHHHHHHHHGCGG [...]
+entry4 83 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA GGGGGGGGGGGGGGGGGFFGHHHHHGGHHGHHHHGFHHHHGHHHHHHHHHGGHHFEHHHHHHHHHGGHHHHHHHHHHHFHHGFHGHHHGFFHFHHHHHHHHHFHHEHHHHHHHGGHHGHGHHHHHHHHHFGGHHHHHHGFHFHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHGHHHHHHHHHHHGGGGG [...]
+entry5 163 contig_000000000 304 60 250M = 422 368 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAG AABBBFFFFFFFGGGFGGGGGGHHHHHHHHHGHHHHHHHGGHGHHHHHHHHHHGHHHHHHHHFHFGHHHHHHHHHGHGGHFHHHHHHHHHHGFFHHHHHEHHHGHHHHHGDEEGHGHGEGGHHHHHHHHHGHHHFGHHHHHHHFGHHHHHGG?GGGAGHHFFHHFFGG2>@FGGFGFBGGHFHGHHHHHFF [...]
+entry6 69 contig_000000000 304 0 * = 304 0 GCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAG BCCCBCBCFFDDGGGGGGGGGGGHGGHHHHHHGHHGGGHHHHHHHHHHHHGGHGGHGGGGGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHHGGGGGHHHGGGGGGDGGHHHHGHHGHHHGHHGGGGGGGHHGGGGGGGGGGAGGGGGGDGAGFGGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFF [...]
+entry6 137 contig_000000000 304 49 35M215S = 304 0 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCT ABBBBFFFFFFFGGGGGGGGGGHHHHHHHHGHGHHHHHHHGHHHGHGHHHHHHFGHHGHHHIHHHHHIHHHHHHHHHGHHHHHHHHGHGGGGGHGGGGGHHHHHHHGG1EGGGGGGFHGGHHHGHGGGGGGHHHHGGCGFHHGHHHHFHEHHHHHHHGGGHHGGGHGEGHGFHGCGCFHHHHHHHGGGGD [...]
+entry5 83 contig_000000000 422 60 250M = 304 -368 TGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAGCAGCTCTCTAAGTGTCTGGCATTGCAGCAAATTGAGCCGAATGCATTTCTGCACACGTAAACACGGCAGAATACAGATTAGCCAAGCCCAATCTCTCATTAAATCCACATTTAATAGA .DDFGEAGGGFGFHGHHFFHHHHHHHHHHGCHHHHHHFFHHHHHHHHHHHHHHHHHHGHHFHHHHHHGHHHHHHHHHGHHHHHHHHGGGGGHGHHHFHHHHFHHHHHHHHHGHHHHHHHHHHHEEEEGHHHHHHHGHHHHHHHHHGHGHHHHHHHHHHHHHHHHHGGGHHHHHHHHHHHHHHHHHHHGHFH [...]
+entry7 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;GFC/0FC;/AFGFC0BDA.?BGFFC0FB9G0C/HHGBFHG:.GFCGEHGHGHFHHHEFHHGBGDF1EHF1GFGGFDFHFHHHFHHG1BG1HHGHHEHGBC>2<2HHHHFHHFHHHHFEGD2EFHHGEF4EHHFFG?/GGF3GFFCE3HGHFHGAHFGF1AGHHGGHHGD5HGF3HF3HHFHHHGHG1F [...]
+entry7 181 contig_000000000 3891 0 * = 3891 0 CAGTCACTCGCCTCCCGCTAACAGTCCAACTCTTCTGGTTCATCTGCGAGTCATGGTGTACCGATGTTTTGTTCTCTAGAAAGCGAAAACATTGATATGGCTGAGGGGGGCTAGCAATTTTGGCCTGATAATGGGTGTGAAATATTCAAACTGTTATGATGCTAGCCCAAATAAAACTGTTGGGACTATCTCGGGAAGAAAAATCATGATCAGAGAAGCTAGGAAAGTGTCCTTGGTATGGTAAGCACTG ////-------;----0090;.9/0/000A//::.;.0000::.?@<=0/0=0./00..<..11>00>>0111<?111<////<20GF at 212222<1F011B?//<E0FGFB211B1?0/??01BB22210?>?1 at 222B22@211122B11E at 22112110000/B222A222ADB//0FB2DDBA///A/01D [...]
+entry8 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;.9//FFF9/GFB9FBGBC.A.FCFGFGFBHHGB:0GHFCG-AA at CHGC0FFGGC.HC>><BGFFHHGGHEHFHHFGFFGHHGF1?11<1HHG<CFGBFGEHFGFGDHHG2FGEHFHCGGDGGHGFGGFDFHHFHHGGFHGEEHFBGEHHHGGEFHHHHGAGHFE1BFHHHGHGGGFHFHFDDFFCAEF [...]
+entry8 181 contig_000000000 3891 0 * = 3891 0 GACGGCGCCTGGAGCGCGTAGGCAAAGCATGATCATCTGAGCACGGCGAACGAGAGTCAGACAAAGGGTTGATCGCCAGTAACGCTCGAGAAAGACACTCCCCCGCAACAACAATCAAACCAACAGTGCACTTTCATTTGCGAATCATGGAATAATGTTGGTTTGGGCTGTAGAAGGCCAAGATATATAAATTGCTGTAGAGGGTTGGGCATGTGGACCGTAAAATGGGTGGGAAGTATAAGAACTTTGT 9;-----/;;--------/;9//////////////////-9------;...C09000;00090/...90.;---./00....-..<.000=0<00..---////1</00211221/B?//122011211111B111//>//2221122222 at 11>///CE?>/>//1112211111012B22222AD22110011 [...]
diff --git a/test/markdup/6_remove_dups.expected.sam b/test/markdup/6_remove_dups.expected.sam
new file mode 100644
index 0000000..344b471
--- /dev/null
+++ b/test/markdup/6_remove_dups.expected.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:contig_000000000 LN:11391
+entry2 163 contig_000000000 109 60 250M = 137 278 TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFB [...]
+entry3 99 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGH [...]
+entry2 83 contig_000000000 137 60 250M = 109 -278 GTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAG 0GHHHBD0DFGHHHG<D0FD0BD0FHHHG=GGBGG1DGDGHFFGFDHDHGFGAGGHHHGHHHHHHHGGFHHHEGHHGHHGHHGHHHHDBHHHHHHGFGHHHHFHHHFHFG at EBGG4BHGGHHHHHHHHGHHHHHHGHFGBHHHHHHHHHHFHFFHHHHEHFHHHHHHGAFHHHFGHHG2HFHHHGHHHHHH [...]
+entry3 147 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGCCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA 9BGC0FFBGFFGGGGFFGFEGGHGHGCGHHHHGHGGHGHHHGHHHHGHHGGGHHHHFHGHGHHHHHHGHHFHHHHHHHHHGHHHHGHHGHHHHG2HHGFHHGHHHHHHDFHHHGGHHGHGHHHFHHDHHHHHHHHHHHHFHHHHHGHHHHHHHHHHHHFFHHHHGHHHHHHFHHHHHHHHHHHHHHGCGG [...]
+entry5 163 contig_000000000 304 60 250M = 422 368 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAG AABBBFFFFFFFGGGFGGGGGGHHHHHHHHHGHHHHHHHGGHGHHHHHHHHHHGHHHHHHHHFHFGHHHHHHHHHGHGGHFHHHHHHHHHHGFFHHHHHEHHHGHHHHHGDEEGHGHGEGGHHHHHHHHHGHHHFGHHHHHHHFGHHHHHGG?GGGAGHHFFHHFFGG2>@FGGFGFBGGHFHGHHHHHFF [...]
+entry6 69 contig_000000000 304 0 * = 304 0 GCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAG BCCCBCBCFFDDGGGGGGGGGGGHGGHHHHHHGHHGGGHHHHHHHHHHHHGGHGGHGGGGGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHHGGGGGHHHGGGGGGDGGHHHHGHHGHHHGHHGGGGGGGHHGGGGGGGGGGAGGGGGGDGAGFGGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFF [...]
+entry5 83 contig_000000000 422 60 250M = 304 -368 TGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAGCAGCTCTCTAAGTGTCTGGCATTGCAGCAAATTGAGCCGAATGCATTTCTGCACACGTAAACACGGCAGAATACAGATTAGCCAAGCCCAATCTCTCATTAAATCCACATTTAATAGA .DDFGEAGGGFGFHGHHFFHHHHHHHHHHGCHHHHHHFFHHHHHHHHHHHHHHHHHHGHHFHHHHHHGHHHHHHHHHGHHHHHHHHGGGGGHGHHHFHHHHFHHHHHHHHHGHHHHHHHHHHHEEEEGHHHHHHHGHHHHHHHHHGHGHHHHHHHHHHHHHHHHHGGGHHHHHHHHHHHHHHHHHHHGHFH [...]
+entry7 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;GFC/0FC;/AFGFC0BDA.?BGFFC0FB9G0C/HHGBFHG:.GFCGEHGHGHFHHHEFHHGBGDF1EHF1GFGGFDFHFHHHFHHG1BG1HHGHHEHGBC>2<2HHHHFHHFHHHHFEGD2EFHHGEF4EHHFFG?/GGF3GFFCE3HGHFHGAHFGF1AGHHGGHHGD5HGF3HF3HHFHHHGHG1F [...]
+entry7 181 contig_000000000 3891 0 * = 3891 0 CAGTCACTCGCCTCCCGCTAACAGTCCAACTCTTCTGGTTCATCTGCGAGTCATGGTGTACCGATGTTTTGTTCTCTAGAAAGCGAAAACATTGATATGGCTGAGGGGGGCTAGCAATTTTGGCCTGATAATGGGTGTGAAATATTCAAACTGTTATGATGCTAGCCCAAATAAAACTGTTGGGACTATCTCGGGAAGAAAAATCATGATCAGAGAAGCTAGGAAAGTGTCCTTGGTATGGTAAGCACTG ////-------;----0090;.9/0/000A//::.;.0000::.?@<=0/0=0./00..<..11>00>>0111<?111<////<20GF at 212222<1F011B?//<E0FGFB211B1?0/??01BB22210?>?1 at 222B22@211122B11E at 22112110000/B222A222ADB//0FB2DDBA///A/01D [...]
+entry8 181 contig_000000000 3891 0 * = 3891 0 GACGGCGCCTGGAGCGCGTAGGCAAAGCATGATCATCTGAGCACGGCGAACGAGAGTCAGACAAAGGGTTGATCGCCAGTAACGCTCGAGAAAGACACTCCCCCGCAACAACAATCAAACCAACAGTGCACTTTCATTTGCGAATCATGGAATAATGTTGGTTTGGGCTGTAGAAGGCCAAGATATATAAATTGCTGTAGAGGGTTGGGCATGTGGACCGTAAAATGGGTGGGAAGTATAAGAACTTTGT 9;-----/;;--------/;9//////////////////-9------;...C09000;00090/...90.;---./00....-..<.000=0<00..---////1</00211221/B?//122011211111B111//>//2221122222 at 11>///CE?>/>//1112211111012B22222AD22110011 [...]
diff --git a/test/markdup/6_remove_dups.sam b/test/markdup/6_remove_dups.sam
new file mode 100644
index 0000000..5030ca7
--- /dev/null
+++ b/test/markdup/6_remove_dups.sam
@@ -0,0 +1,18 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:contig_000000000 LN:11391
+entry1 163 contig_000000000 109 60 250M = 137 278 GATTGATATTTATTTATTATTTTATTATGTTTATTTCTTTATTTATTATCATTATTATTATTATTCTTATTATTGTTATATAAAAACATCGTAAACACAGTAAACGATAGTACTAATACTACTACTAATAAAGATAGATTTTTTTATATATATATATGTATGATCTTTTAACGTTACTTATTCAAATGCTATGTCATTTTGTAATATTTGTCATGGCAAGTATCAAACTGCTTCGGTTCTCATTGATTAG 1111>DD3DFFF3B333B3FBG3D3A33BG3D3F3333AFG3DF3D33B22D22222222D2B2A2ADE2AA2DAG222BD22D11//11//00B110ABB2FD1?>/>A2 at 2@2 at F2@F21GB11FDDF21111111B2B11>/?1FB22>>>22>B2BG22B12B>F>11/0<0/2B2222B2 at G11>2 [...]
+entry2 163 contig_000000000 109 60 250M = 137 278 TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFB [...]
+entry3 99 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGH [...]
+entry4 163 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BCCCCFFFFFFFGGGGGGGGGGEHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHGHHGHHHHHEEF5FEGFGFHHGHHFHHHHFHFDEGHAEGHHHFHHFHFFHHGBGGHHFHHFHFHHHGGHGHFGEFHDGGHGHHHHGDHHFFHHGHHHDFDHGHHHHHGHGBHGHHFBDHGGF2FDH [...]
+entry1 83 contig_000000000 137 60 250M = 109 -278 TTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAGAACCTAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATTTATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCACACTGCCTGGGTTATCATTGATTCGGTACTAGAGATAGTGTTTAAATAATACG ;0C0G;0C0:0FFGDD00D==00000D=00D1D=1DDDF>11=<<1><111<?1<111?1D1??00<>@@11F//0A>2HG>B>2B22222B2FFB2BB22>>2B22BFBEEFGHHG2FGBB22FFB2DB1100F/1FG at G1BHHFGGAGB1DB@22EB00GCEADB1AFHHHGD21BEDF1AF1FA11DA [...]
+entry2 83 contig_000000000 137 60 250M = 109 -278 GTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAG 0GHHHBD0DFGHHHG<D0FD0BD0FHHHG=GGBGG1DGDGHFFGFDHDHGFGAGGHHHGHHHHHHHGGFHHHEGHHGHHGHHGHHHHDBHHHHHHGFGHHHHFHHHFHFG at EBGG4BHGGHHHHHHHHGHHHHHHGHFGBHHHHHHHHHHFHFFHHHHEHFHHHHHHGAFHHHFGHHG2HFHHHGHHHHHH [...]
+entry3 147 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGCCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA 9BGC0FFBGFFGGGGFFGFEGGHGHGCGHHHHGHGGHGHHHGHHHHGHHGGGHHHHFHGHGHHHHHHGHHFHHHHHHHHHGHHHHGHHGHHHHG2HHGFHHGHHHHHHDFHHHGGHHGHGHHHFHHDHHHHHHHHHHHHFHHHHHGHHHHHHHHHHHHFFHHHHGHHHHHHFHHHHHHHHHHHHHHGCGG [...]
+entry4 83 contig_000000000 222 60 250M = 116 -356 TAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTA GGGGGGGGGGGGGGGGGFFGHHHHHGGHHGHHHHGFHHHHGHHHHHHHHHGGHHFEHHHHHHHHHGGHHHHHHHHHHHFHHGFHGHHHGFFHFHHHHHHHHHFHHEHHHHHHHGGHHGHGHHHHHHHHHFGGHHHHHHGFHFHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHGHHHHHHHHHHHGGGGG [...]
+entry5 163 contig_000000000 304 60 250M = 422 368 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAGAGATAGTGTTTAAATAATAAGTGTCCATCAAAGAGCAGAACAGCTGCGTGTTTGCGTGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAG AABBBFFFFFFFGGGFGGGGGGHHHHHHHHHGHHHHHHHGGHGHHHHHHHHHHGHHHHHHHHFHFGHHHHHHHHHGHGGHFHHHHHHHHHHGFFHHHHHEHHHGHHHHHGDEEGHGHGEGGHHHHHHHHHGHHHFGHHHHHHHFGHHHHHGG?GGGAGHHFFHHFFGG2>@FGGFGFBGGHFHGHHHHHFF [...]
+entry6 69 contig_000000000 304 0 * = 304 0 GCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAG BCCCBCBCFFDDGGGGGGGGGGGHGGHHHHHHGHHGGGHHHHHHHHHHHHGGHGGHGGGGGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHHGGGGGHHHGGGGGGDGGHHHHGHHGHHHGHHGGGGGGGHHGGGGGGGGGGAGGGGGGDGAGFGGGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFF [...]
+entry6 137 contig_000000000 304 49 35M215S = 304 0 ATTTTGTAACATTTGTCATGCCAAGTATCAAACTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCT ABBBBFFFFFFFGGGGGGGGGGHHHHHHHHGHGHHHHHHHGHHHGHGHHHHHHFGHHGHHHIHHHHHIHHHHHHHHHGHHHHHHHHGHGGGGGHGGGGGHHHHHHHGG1EGGGGGGFHGGHHHGHGGGGGGHHHHGGCGFHHGHHHHFHEHHHHHHHGGGHHGGGHGEGHGFHGCGCFHHHHHHHGGGGD [...]
+entry5 83 contig_000000000 422 60 250M = 304 -368 TGTGTGTGTGTGTGAGTTTGAAAGCAATAGACAGAGGGTAAGACTGTGTAATAGAGTGTAAGAGAGTGTCAGAGTGAGTGTGTAAATGGACGCCTATCATTTAGCATGGGTCAATCTAGTGAAAGCTCGCAGCAGCTCTCTAAGTGTCTGGCATTGCAGCAAATTGAGCCGAATGCATTTCTGCACACGTAAACACGGCAGAATACAGATTAGCCAAGCCCAATCTCTCATTAAATCCACATTTAATAGA .DDFGEAGGGFGFHGHHFFHHHHHHHHHHGCHHHHHHFFHHHHHHHHHHHHHHHHHHGHHFHHHHHHGHHHHHHHHHGHHHHHHHHGGGGGHGHHHFHHHHFHHHHHHHHHGHHHHHHHHHHHEEEEGHHHHHHHGHHHHHHHHHGHGHHHHHHHHHHHHHHHHHGGGHHHHHHHHHHHHHHHHHHHGHFH [...]
+entry7 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;GFC/0FC;/AFGFC0BDA.?BGFFC0FB9G0C/HHGBFHG:.GFCGEHGHGHFHHHEFHHGBGDF1EHF1GFGGFDFHFHHHFHHG1BG1HHGHHEHGBC>2<2HHHHFHHFHHHHFEGD2EFHHGEF4EHHFFG?/GGF3GFFCE3HGHFHGAHFGF1AGHHGGHHGD5HGF3HF3HHFHHHGHG1F [...]
+entry7 181 contig_000000000 3891 0 * = 3891 0 CAGTCACTCGCCTCCCGCTAACAGTCCAACTCTTCTGGTTCATCTGCGAGTCATGGTGTACCGATGTTTTGTTCTCTAGAAAGCGAAAACATTGATATGGCTGAGGGGGGCTAGCAATTTTGGCCTGATAATGGGTGTGAAATATTCAAACTGTTATGATGCTAGCCCAAATAAAACTGTTGGGACTATCTCGGGAAGAAAAATCATGATCAGAGAAGCTAGGAAAGTGTCCTTGGTATGGTAAGCACTG ////-------;----0090;.9/0/000A//::.;.0000::.?@<=0/0=0./00..<..11>00>>0111<?111<////<20GF at 212222<1F011B?//<E0FGFB211B1?0/??01BB22210?>?1 at 222B22@211122B11E at 22112110000/B222A222ADB//0FB2DDBA///A/01D [...]
+entry8 121 contig_000000000 3891 60 250M = 3891 0 ACACCACCAGCACCAGCCTAAACCGTTGATACAAGGCAGGATGGATCCGTGCTTTCATGTTGTTGATGCTAAATTCTGACTCACATCTGAATATTCCAGCAGAAATCGAGACTCATCAGAGCAGGCAACGTTTTTACAATCTTTTATTGTCCAATTTTGGTGAGCCTGTGTGAATTGTAGTCTCAGTTTCCTGTTCTTAGCTGACAGGAGTGGCACCCGGTGTGGTCTTCTGCTGCTGTAGCCCATCCGC 99;.9//FFF9/GFB9FBGBC.A.FCFGFGFBHHGB:0GHFCG-AA at CHGC0FFGGC.HC>><BGFFHHGGHEHFHHFGFFGHHGF1?11<1HHG<CFGBFGEHFGFGDHHG2FGEHFHCGGDGGHGFGGFDFHHFHHGGFHGEEHFBGEHHHGGEFHHHHGAGHFE1BFHHHGHGGGFHFHFDDFFCAEF [...]
+entry8 181 contig_000000000 3891 0 * = 3891 0 GACGGCGCCTGGAGCGCGTAGGCAAAGCATGATCATCTGAGCACGGCGAACGAGAGTCAGACAAAGGGTTGATCGCCAGTAACGCTCGAGAAAGACACTCCCCCGCAACAACAATCAAACCAACAGTGCACTTTCATTTGCGAATCATGGAATAATGTTGGTTTGGGCTGTAGAAGGCCAAGATATATAAATTGCTGTAGAGGGTTGGGCATGTGGACCGTAAAATGGGTGGGAAGTATAAGAACTTTGT 9;-----/;;--------/;9//////////////////-9------;...C09000;00090/...90.;---./00....-..<.000=0<00..---////1</00211221/B?//122011211111B111//>//2221122222 at 11>///CE?>/>//1112211111012B22222AD22110011 [...]
diff --git a/test/merge/tag.pg.merge.expected.sam b/test/merge/tag.pg.merge.expected.sam
index de7eba7..92640f1 100644
--- a/test/merge/tag.pg.merge.expected.sam
+++ b/test/merge/tag.pg.merge.expected.sam
@@ -11,27 +11,27 @@
@PG ID:donkey
@CO Do you know?
@CO Do you know?
-x7 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * AS:i:50 FI:i:2 RG:Z:cow PG:Z:bull
x7 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow PG:Z:bull
-x8 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? AS:i:10 FI:f:1.5 RG:Z:cow PG:Z:bull
+x7 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * AS:i:50 FI:i:2 RG:Z:cow PG:Z:bull
x8 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow PG:Z:bull
-x9 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? AS:i:20 FI:i:1 RG:Z:cow PG:Z:bull
+x8 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? AS:i:10 FI:f:1.5 RG:Z:cow PG:Z:bull
x9 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow PG:Z:bull
-x10 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? AS:i:0 FI:A:b RG:Z:cow PG:Z:bull
+x9 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? AS:i:20 FI:i:1 RG:Z:cow PG:Z:bull
x10 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow PG:Z:bull
-x11 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? FI:Z:a RG:Z:cow PG:Z:bull
+x10 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? AS:i:0 FI:A:b RG:Z:cow PG:Z:bull
x11 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow PG:Z:bull
-x12 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? AS:i:65100 RG:Z:cow PG:Z:bull
+x11 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? FI:Z:a RG:Z:cow PG:Z:bull
x12 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow PG:Z:bull
-r005 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 AS:i:10 FI:i:5 RG:Z:colt PG:Z:donkey
+x12 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? AS:i:65100 RG:Z:cow PG:Z:bull
r005 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt PG:Z:donkey
-r006 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 AS:i:20 FI:f:4.5 RG:Z:colt PG:Z:donkey
+r005 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 AS:i:10 FI:i:5 RG:Z:colt PG:Z:donkey
r006 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt PG:Z:donkey
-r007 0 ref1 9 30 5H6M * 0 0 AGCTAA * AS:i:1 FI:i:4 RG:Z:colt PG:Z:donkey
r007 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt PG:Z:donkey
-r007 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * AS:i:-5 FI:f:3.5 RG:Z:colt PG:Z:donkey
+r006 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 AS:i:20 FI:f:4.5 RG:Z:colt PG:Z:donkey
+r007 0 ref1 9 30 5H6M * 0 0 AGCTAA * AS:i:1 FI:i:4 RG:Z:colt PG:Z:donkey
r007 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:donkey
-r006 16 ref1 29 30 6H5M * 0 0 TAGGC * FI:i:3 RG:Z:colt PG:Z:donkey
+r007 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * AS:i:-5 FI:f:3.5 RG:Z:colt PG:Z:donkey
r006 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt PG:Z:donkey
-r005 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * AS:i:100 FI:f:2.5 RG:Z:colt PG:Z:donkey
+r006 16 ref1 29 30 6H5M * 0 0 TAGGC * FI:i:3 RG:Z:colt PG:Z:donkey
r005 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt PG:Z:donkey
+r005 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * AS:i:100 FI:f:2.5 RG:Z:colt PG:Z:donkey
diff --git a/test/mpileup/regression.sh b/test/mpileup/regression.sh
index bde4f26..3bfa427 100755
--- a/test/mpileup/regression.sh
+++ b/test/mpileup/regression.sh
@@ -39,7 +39,8 @@ run_test() {
e=$1; shift
test_iter=`expr $test_iter + 1`
#echo "p=$p e=$e cmd=$@"
- result=`eval ${@+"$@"} 2>/dev/null > _out`
+ # All output here is text, so blanket and naive removal of cr just about works.
+ result=`eval ${@+"$@"} 2>/dev/null | tr -d '\015' > _out`
#result=`eval ${@+"$@"} > _out`
#result=`eval valgrind --error-exitcode=1 --leak-check=full ${@+"$@"}`
if [ $? != 0 ]
diff --git a/test/test.pl b/test/test.pl
index c039167..9599608 100755
--- a/test/test.pl
+++ b/test/test.pl
@@ -29,7 +29,7 @@ use Cwd qw/ abs_path /;
use FindBin;
use lib "$FindBin::Bin";
use Getopt::Long;
-use File::Temp qw/ tempfile tempdir /;
+use File::Temp;
use IO::Handle;
my $opts = parse_params();
@@ -60,6 +60,8 @@ test_quickcheck($opts);
test_reheader($opts);
test_addrprg($opts);
test_addrprg($opts, threads=>2);
+test_markdup($opts);
+test_markdup($opts, threads=>2);
print "\nNumber of tests:\n";
@@ -92,21 +94,61 @@ sub error
"\n";
exit 1;
}
+
+sub tempfile {
+ my ($fh, $name) = File::Temp::tempfile(@_);
+ if (wantarray) {
+ if ($^O =~ /^(?:msys|MSWin32)/) {
+ $name = abs_path($name);
+ }
+ return ($fh, $name);
+ }
+ return $fh;
+}
+
+sub cygpath {
+ my ($path) = @_;
+ $path = `cygpath -m $path`;
+ $path =~ s/\r?\n//;
+ return $path
+}
+
+sub tempdir
+{
+ my $dir = File::Temp::tempdir(@_);
+ if ($^O =~ /^msys/) {
+ $dir = cygpath($dir);
+ } elsif ($^O eq 'MSWin32') {
+ $dir =~ s/\\/\//g;
+ }
+ return $dir;
+}
+
sub parse_params
{
my $opts = { bgzip=>"bgzip", keep_files=>0, nok=>0, nfailed=>0, nxfail => 0, nxpass => 0 };
my $help;
Getopt::Long::Configure('bundling');
my $ret = GetOptions (
- 'e|exec=s' => sub { my ($tool, $path) = split /=/, $_[1]; $$opts{$tool} = $path if $path },
+ 'e|exec=s' => sub {
+ my ($tool, $path) = split /=/, $_[1];
+ if ($^O eq 'MSWin32' && $path !~ /\.exe$/) {
+ $path .= '.exe';
+ }
+ $$opts{$tool} = abs_path($path) if $path;
+ },
't|temp-dir:s' => \$$opts{keep_files},
'r|redo-outputs' => \$$opts{redo_outputs},
'h|?|help' => \$help
);
if ( !$ret or $help ) { error(); }
- $$opts{tmp} = $$opts{keep_files} ? $$opts{keep_files} : tempdir(CLEANUP=>1);
+ $$opts{tmp} = $$opts{keep_files} ? $$opts{keep_files} : tempdir(CLEANUP => 1);
$$opts{path} = $FindBin::RealBin;
$$opts{bin} = $FindBin::RealBin;
+ if ($^O =~ /^msys/) {
+ $$opts{path} = cygpath($$opts{path});
+ $$opts{bin} = cygpath($$opts{bin});
+ }
$$opts{bin} =~ s{/test/?$}{};
if ( $$opts{keep_files} ) { cmd("mkdir -p $$opts{keep_files}"); }
else
@@ -123,34 +165,35 @@ sub clean_files
}
sub _cmd
{
- my ($cmd) = @_;
+ my ($cmd, $args) = @_;
my $kid_io;
- my @out;
- my @err;
+ my $out;
+ my $err;
my ($err_fh, $err_filename) = tempfile(UNLINK => 1);
-
- my $pid = open($kid_io, "-|");
+ my $pid = open($kid_io, "-|", 'bash', '-o','pipefail','-c', "($cmd) 2> $err_filename");
if ( !defined $pid ) { error("Cannot fork: $!"); }
if ($pid)
{
# parent
- @out = <$kid_io>;
+ binmode($kid_io);
+ binmode($err_fh);
+ local $/;
+ $out = <$kid_io>;
close($kid_io);
my $child_retval = $?;
- @err = <$err_fh>;
+ $err = <$err_fh>;
close ($err_fh);
- return ($child_retval, join('', at out), join('', at err));
- }
- else
- {
- # child
- exec('/bin/bash', '-o','pipefail','-c', "($cmd) 2> $err_filename") or error("Cannot execute the command [/bin/sh -o pipefail -c $cmd]: $!");
+ $out =~ s/\x0d\x0a/\x0a/g if (!$args->{binary});
+ $err =~ s/\x0d\x0a/\x0a/g if (!$args->{binary_err});
+ $out =~ s/e([-+])0(\d\d)/e$1$2/g if ($args->{exp_fix});
+
+ return ($child_retval, $out, $err);
}
}
sub cmd
{
- my ($cmd) = @_;
- my ($ret,$out,$err) = _cmd($cmd);
+ my ($cmd, $args) = @_;
+ my ($ret,$out,$err) = _cmd($cmd, $args);
if ( $ret ) { error("The command failed [$ret]: $cmd\n", "out:$out\n", "err:$err\n"); }
return $out;
}
@@ -176,12 +219,13 @@ sub test_cmd
print "$test:\n";
print "\t$args{cmd}\n";
- my ($ret,$out,$err) = _cmd("$args{cmd}");
+ my ($ret,$out,$err) = _cmd("$args{cmd}", \%args);
if ( $args{want_fail}? ($ret == 0) : ($ret != 0) ) { failed($opts,%args,msg=>$test,reason=>"ERR: $err"); return; }
if ( $$opts{redo_outputs} && -e "$$opts{path}/$args{out}" )
{
rename("$$opts{path}/$args{out}","$$opts{path}/$args{out}.old");
open(my $fh,'>',"$$opts{path}/$args{out}") or error("$$opts{path}/$args{out}: $!");
+ binmode($fh);
print $fh $out;
close($fh);
my ($ret,$out) = _cmd("diff -q $$opts{path}/$args{out} $$opts{path}/$args{out}.old");
@@ -197,6 +241,7 @@ sub test_cmd
{
rename("$$opts{path}/$args{err}","$$opts{path}/$args{err}.old");
open(my $fh,'>',"$$opts{path}/$args{err}") or error("$$opts{path}/$args{err}: $!");
+ binmode($fh);
print $fh $err;
close($fh);
my ($ret,$out) = _cmd("diff -q $$opts{path}/$args{err} $$opts{path}/$args{err}.old");
@@ -213,8 +258,9 @@ sub test_cmd
my $exp = '';
if ( open(my $fh,'<',"$$opts{path}/$args{out}") )
{
- my @exp = <$fh>;
- $exp = join('', at exp);
+ binmode($fh);
+ local $/;
+ $exp = <$fh>;
close($fh);
}
elsif ( !$$opts{redo_outputs} ) { failed($opts,%args,msg=>$test,reason=>"$$opts{path}/$args{out}: $!"); return; }
@@ -222,6 +268,7 @@ sub test_cmd
if ( $exp ne $out )
{
open(my $fh,'>',"$$opts{path}/$args{out}.new") or error("$$opts{path}/$args{out}.new");
+ binmode($fh);
print $fh $out;
close($fh);
if ( !-e "$$opts{path}/$args{out}" )
@@ -241,6 +288,7 @@ sub test_cmd
my $exp_err = '';
if ( open(my $fh,'<',"$$opts{path}/$args{err}") )
{
+ binmode($fh);
my @exp = <$fh>;
$exp_err = join('', at exp);
close($fh);
@@ -250,6 +298,7 @@ sub test_cmd
if ( $exp_err ne $err )
{
open(my $fh,'>',"$$opts{path}/$args{err}.new") or error("$$opts{path}/$args{err}.new");
+ binmode($fh);
print $fh $err;
close($fh);
if ( !-e "$$opts{path}/$args{err}" )
@@ -274,8 +323,9 @@ sub test_cmd
my $exp = '';
if ( open(my $fh,'<',"$$opts{path}/$out_expected") )
{
- my @exp = <$fh>;
- $exp = join('', at exp);
+ binmode($fh);
+ local $/;
+ $exp = <$fh>;
close($fh);
}
elsif ( !$$opts{redo_outputs} ) { failed($opts,%args,msg=>$test,reason=>"$$opts{path}/$out_expected: $!"); return; }
@@ -283,12 +333,17 @@ sub test_cmd
my $out = '';
if ( open(my $fh,'<',"$$opts{path}/$out_actual") )
{
- my @out = <$fh>;
+ binmode($fh);
if( exists($args{hskip}) ){
# Strip hskip lines to allow match to the expected output
- splice @out, 0, $args{hskip};
+ for (my $i = 0; $i < $args{hskip}; $i++) {
+ my $ignore = <$fh>;
+ }
}
- $out = join('', at out);
+ local $/;
+ $out = <$fh>;
+ $out =~ s/\x0d\x0a/\x0a/g if (!$args{binary});
+ $out =~ s/e\+0(\d\d)/e+$1/g if ($args{exp_fix});
close($fh);
}
elsif ( !$$opts{redo_outputs} ) { failed($opts,%args,msg=>$test,reason=>"$$opts{path}/$out_actual: $!"); return; }
@@ -296,6 +351,7 @@ sub test_cmd
if ( $exp ne $out )
{
open(my $fh,'>',"$$opts{path}/$out_expected.new") or error("$$opts{path}/$out_expected.new");
+ binmode($fh);
print $fh $out;
close($fh);
if ( !-e "$$opts{path}/$out_expected" )
@@ -310,7 +366,7 @@ sub test_cmd
}
return;
}
- _cmd("rm $$opts{path}/$out_actual");
+ unlink("$$opts{path}/$out_actual");
}
}
passed($opts,%args,msg=>$test);
@@ -364,6 +420,7 @@ sub test_bgzip
# Create test data: The beginning of each line gives the 0-based offset (including '\n's)
#
open(my $fh,'>',"$$opts{tmp}/bgzip.dat") or error("$$opts{tmp}/bgzip.dat: $!");
+ binmode($fh);
my $ntot = 1_000_000;
my $nwr = 0;
while ($nwr < $ntot)
@@ -505,7 +562,7 @@ sub test_index
cmd("$$opts{bin}/samtools index${threads} -c $$opts{tmp}/large_chrom.bam");
test_cmd($opts,out=>'dat/large_chrom.out',cmd=>"$$opts{bin}/samtools view${threads} $$opts{tmp}/large_chrom.bam ref2");
test_cmd($opts,out=>'dat/large_chrom.out',cmd=>"$$opts{bin}/samtools view${threads} $$opts{tmp}/large_chrom.bam ref2:1-541556283");
- test_cmd($opts,out=>'dat/test_input_1_a.bam.bai.expected',cmd=>"$$opts{bin}/samtools index${threads} $$opts{path}/dat/test_input_1_a.bam && cat $$opts{path}/dat/test_input_1_a.bam.bai");
+ test_cmd($opts,out=>'dat/test_input_1_a.bam.bai.expected',cmd=>"$$opts{bin}/samtools index${threads} $$opts{path}/dat/test_input_1_a.bam && cat $$opts{path}/dat/test_input_1_a.bam.bai",binary=>1);
}
sub test_mpileup
@@ -528,9 +585,11 @@ sub test_mpileup
cmd("$$opts{bin}/samtools index $$opts{tmp}/$file.cram");
print $fh1 "$$opts{tmp}/$file.bam\n";
print $fh2 "$$opts{tmp}/$file.cram\n";
- print $fh3 "file://", abs_path("$$opts{tmp}/$file.bam"), "\n";
- print $fh4 "file://", abs_path("$$opts{tmp}/$file.cram"), "\n";
- }
+ my $atmp = $^O =~ /^msys/ ? cygpath($$opts{tmp}) : abs_path($$opts{tmp});
+ unless ($atmp =~ /^\//) { $atmp = "/$atmp"; }
+ print $fh3 "file://$atmp/$file.bam\n";
+ print $fh4 "file://$atmp/$file.cram\n";
+ }
close($fh1);
close($fh2);
close($fh3);
@@ -615,6 +674,9 @@ sub test_usage
# now test subcommand usage as well
foreach my $subcommand (@subcommands) {
+ # Under msys the isatty function fails to recognise the terminal.
+ # Skip these tests for now.
+ next if ($^O =~ /^msys/ && $subcommand =~ /^(dict|sort|stats|view)$/);
test_usage_subcommand($opts,%args,subcmd=>$subcommand);
}
}
@@ -923,6 +985,12 @@ sub run_view_test
my $pid = fork();
unless (defined($pid)) { die "Couldn't fork : $!\n"; }
+ my $save_stdout;
+ if ($^O eq 'MSWin32') {
+ # Ensure we can restore STDOUT properly on Windows. Not doing this
+ # causes output to be lost if STDOUT is redirected below.
+ open($save_stdout, '>&STDOUT') || die "Couldn't dup STDOUT: $!\n";
+ }
unless ($pid) {
if ($args{stdin}) {
open(STDIN, '-|', 'cat', $args{stdin})
@@ -936,7 +1004,11 @@ sub run_view_test
}
my $reaped = waitpid($pid, 0);
my $res = $reaped == $pid && $? == 0 ? 0 : 1;
-
+ if ($^O eq 'MSWin32') {
+ open(STDOUT, '>&', $save_stdout)
+ || die "Couldn't restore STDOUT : $!\n";
+ }
+
if (!$res && $args{compare_sam} && $args{out}) {
# Convert output back to sam and compare
my $sam_name = "$args{out}.sam";
@@ -955,6 +1027,7 @@ sub run_view_test
} else {
open($sam_out, '-|', @cmd2)
|| die "Couldn't open pipe from @cmd2: $!\n";
+ binmode($sam_out);
}
# Hack $args so the comparison gets done
$args{compare} = $args{compare_sam};
@@ -1059,14 +1132,10 @@ sub open_bgunzip
{
my ($opts, $in) = @_;
- my @cmd = ("$$opts{bgzip}", '-c', '-d');
my $bgzip;
- my $pid = open($bgzip, '-|');
- unless (defined($pid)) { die "Couldn't fork: $!\n"; }
- unless ($pid) {
- open(STDIN, '<', $in) || die "Couldn't redirect STDIN: $!\n";
- exec(@cmd) || die "Couldn't exec @cmd: $!\n";
- }
+ open($bgzip, "$$opts{bgzip} -c -d < $in |")
+ || die "Couldn't open pipe to bgzip -c -d $!\n";
+ binmode($bgzip);
return $bgzip;
}
@@ -1115,6 +1184,7 @@ sub sam_compare
while ($l1 = <$f1>) {
$lno1++;
if (($ht1) = $l1 =~ /^(@\S+)/) {
+ $l1 =~ s/\x0d\x0a$/\x0a/;
push(@{$hdr1{$ht1}}, $l1);
} else {
last;
@@ -1130,6 +1200,7 @@ sub sam_compare
while ($l2 = <$f2>) {
$lno2++;
if (($ht2) = $l2 =~ /^(@\S+)/) {
+ $l2 =~ s/\x0d\x0a$/\x0a/;
push(@{$hdr2{$ht2}}, $l2);
} else {
last;
@@ -1170,12 +1241,15 @@ sub sam_compare
}
while ($l1 && $l2) {
- chomp($l1);
- chomp($l2);
+ chomp($l1); $l1 =~ s/\r$//;
+ chomp($l2); $l2 =~ s/\r$//;
my @sam1 = split(/\t/, $l1);
my @sam2 = split(/\t/, $l2);
my @tags1 = sort(splice(@sam1, 11));
my @tags2 = sort(splice(@sam2, 11));
+ # Windows uses e.g. 1.9e+009 vs 1.9e+09 on Linux
+ @tags1 = map {s/(e[-+])0(\d\d)/$1$2/g;$_} @tags1;
+ @tags2 = map {s/(e[-+])0(\d\d)/$1$2/g;$_} @tags2;
if (join("\t", @sam1, @tags1) ne join("\t", @sam2, @tags2)) {
last;
}
@@ -1260,6 +1334,8 @@ sub text_compare
my $l1 = <$t1>;
my $l2 = <$t2>;
last if (!defined($l1) && !defined($l2));
+ $l1 =~ s/\x0d\x0a$/\x0a/;
+ $l2 =~ s/\x0d\x0a$/\x0a/;
if (($l1 || '') ne ($l2 || '')) {
$diff = 1;
if (defined($l1)) { chomp($l1); }
@@ -1290,7 +1366,7 @@ sub count_compare
open(my $c1, '<', $count1) || die "Couldn't open $count1 : $!\n";
my $number1 = <$c1>;
- chomp($number1);
+ chomp($number1); $number1 =~ s/\r$//;
close($c1) || die "Error reading $count1 : $!\n";
unless ($number1 =~ /^\d+$/) {
@@ -1392,6 +1468,7 @@ sub gen_file
}
my $fasta = "$prefix.fa";
open(my $fa, '>', $fasta) || die "Couldn't open $fasta for writing : $!\n";
+ binmode($fa);
print $fa ">ref1\n";
for (my $i = 0; $i < $size; $i += 60) {
print $fa substr($seq, $i, 60), "\n";
@@ -1400,16 +1477,14 @@ sub gen_file
my $fai = "$prefix.fa.fai";
open($fa, '>', $fai) || die "Couldn't open $fai for writing : $!\n";
+ binmode($fa);
print $fa "ref1\t$size\t6\t60\t61\n";
close($fa) || die "Error writing to $fai : $!\n";
my $sam = "$prefix.sam.gz";
- my $pid = open(my $s, '|-');
- unless (defined($pid)) { die "Couldn't fork : $!\n"; }
- unless ($pid) {
- open(STDOUT, '>', $sam) || die "Couldn't redirect STDOUT to $sam: $!\n";
- exec("$$opts{bgzip}", '-c') || die "Couldn't exec bgzip : $!\n";
- }
+ open(my $s, "| $$opts{bgzip} -c > $sam")
+ || die "Couldn't open pipe to bgzip $!";
+ binmode($s);
print $s "\@HD\tVN:1.4\tSO:coordinate\n";
print $s "\@RG\tID:g1\tDS:Group 1\tLB:Lib1\tSM:Sample1\n";
print $s "\@SQ\tSN:ref1\tLN:$size\tUR:$fasta\n";
@@ -2319,17 +2394,19 @@ sub test_stats
{
my ($opts,%args) = @_;
- test_cmd($opts,out=>'stat/1.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/1_map_cigar.sam | tail -n+4");
- test_cmd($opts,out=>'stat/2.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/2_equal_cigar_full_seq.sam | tail -n+4");
- test_cmd($opts,out=>'stat/3.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/3_map_cigar_equal_seq.sam | tail -n+4");
- test_cmd($opts,out=>'stat/4.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/4_X_cigar_full_seq.sam | tail -n+4");
- test_cmd($opts,out=>'stat/5.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/5_insert_cigar.sam | tail -n+4");
- test_cmd($opts,out=>'stat/6.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa -i 0 $$opts{path}/stat/5_insert_cigar.sam | tail -n+4");
- test_cmd($opts,out=>'stat/7.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/7_supp.sam | tail -n+4");
- test_cmd($opts,out=>'stat/8.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/8_secondary.sam | tail -n+4");
-
- test_cmd($opts,out=>'stat/9.stats.expected',cmd=>"$$opts{bin}/samtools stats -S RG -r $$opts{path}/stat/test.fa $$opts{path}/stat/1_map_cigar.sam | tail -n+4",out_map=>{"stat/1_map_cigar.sam_s1_a_1.bamstat"=>"stat/1_map_cigar.sam_s1_a_1.expected.bamstat"},hskip=>3);
- test_cmd($opts,out=>'stat/10.stats.expected',cmd=>"$$opts{bin}/samtools stats -S RG -r $$opts{path}/stat/test.fa $$opts{path}/stat/10_map_cigar.sam | tail -n+4",out_map=>{"stat/10_map_cigar.sam_s1_a_1.bamstat"=>"stat/10_map_cigar.sam_s1_a_1.expected.bamstat", "stat/10_map_cigar.sam_s1_b_1.bamstat"=>"stat/10_map_cigar.sam_s1_b_1.expected.bamstat"},hskip=>3);
+ my $efix = ($^O =~ /^(?:msys|MSWin32)$/) ? 1 : 0;
+
+ test_cmd($opts,out=>'stat/1.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/1_map_cigar.sam | tail -n+4", exp_fix=>$efix);
+ test_cmd($opts,out=>'stat/2.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/2_equal_cigar_full_seq.sam | tail -n+4", exp_fix=>$efix);
+ test_cmd($opts,out=>'stat/3.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/3_map_cigar_equal_seq.sam | tail -n+4", exp_fix=>$efix);
+ test_cmd($opts,out=>'stat/4.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/4_X_cigar_full_seq.sam | tail -n+4", exp_fix=>$efix);
+ test_cmd($opts,out=>'stat/5.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/5_insert_cigar.sam | tail -n+4", exp_fix=>$efix);
+ test_cmd($opts,out=>'stat/6.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa -i 0 $$opts{path}/stat/5_insert_cigar.sam | tail -n+4", exp_fix=>$efix);
+ test_cmd($opts,out=>'stat/7.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/7_supp.sam | tail -n+4", exp_fix=>$efix);
+ test_cmd($opts,out=>'stat/8.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/8_secondary.sam | tail -n+4", exp_fix=>$efix);
+
+ test_cmd($opts,out=>'stat/9.stats.expected',cmd=>"$$opts{bin}/samtools stats -S RG -r $$opts{path}/stat/test.fa $$opts{path}/stat/1_map_cigar.sam | tail -n+4", exp_fix=>$efix,out_map=>{"stat/1_map_cigar.sam_s1_a_1.bamstat"=>"stat/1_map_cigar.sam_s1_a_1.expected.bamstat"},hskip=>3);
+ test_cmd($opts,out=>'stat/10.stats.expected',cmd=>"$$opts{bin}/samtools stats -S RG -r $$opts{path}/stat/test.fa $$opts{path}/stat/10_map_cigar.sam | tail -n+4", exp_fix=>$efix,out_map=>{"stat/10_map_cigar.sam_s1_a_1.bamstat"=>"stat/10_map_cigar.sam_s1_a_1.expected.bamstat", "stat/10_map_cigar.sam_s1_b_1.bamstat"=>"stat/10_map_cigar.sam_s1_b_1.expected.bamstat"},hskip=>3);
}
sub test_merge
@@ -2341,9 +2418,9 @@ sub test_merge
# Note the use of -s 1 to fix the random seed in place
# Merge 1 - Standard 3 file SAM merge all presented on the command line
- test_cmd($opts,out=>'merge/2.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -s 1 - $$opts{path}/dat/test_input_1_a.sam $$opts{path}/dat/test_input_1_b.sam $$opts{path}/dat/test_input_1_c.sam");
+ test_cmd($opts,out=>'merge/2.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -s 1 - $$opts{path}/dat/test_input_1_a.sam $$opts{path}/dat/test_input_1_b.sam $$opts{path}/dat/test_input_1_c.sam",binary=>1);
# Merge 2 - Standard 3 file BAM merge all files presented on the command line
- test_cmd($opts,out=>'merge/2.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -s 1 - $$opts{path}/dat/test_input_1_a.bam $$opts{path}/dat/test_input_1_b.bam $$opts{path}/dat/test_input_1_c.bam");
+ test_cmd($opts,out=>'merge/2.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -s 1 - $$opts{path}/dat/test_input_1_a.bam $$opts{path}/dat/test_input_1_b.bam $$opts{path}/dat/test_input_1_c.bam",binary=>1);
# Merge 3 - Standard 3 file BAM merge 2 files in fofn 1 on command line
open(my $fofn, "$$opts{path}/merge/test_3.fofn");
my ($tmpfile_fh, $tmpfile_filename) = tempfile(UNLINK => 1);
@@ -2352,15 +2429,15 @@ sub test_merge
print $tmpfile_fh "$$opts{path}/$_";
}
close($tmpfile_fh);
- test_cmd($opts,out=>'merge/3.merge.expected.bam', err=>'merge/3.merge.expected.err',cmd=>"$$opts{bin}/samtools merge${threads} -s 1 -b $tmpfile_filename - $$opts{path}/dat/test_input_1_a.bam");
+ test_cmd($opts,out=>'merge/3.merge.expected.bam', err=>'merge/3.merge.expected.err',cmd=>"$$opts{bin}/samtools merge${threads} -s 1 -b $tmpfile_filename - $$opts{path}/dat/test_input_1_a.bam",binary=>1);
# Merge 4 - 1 file BAM merge with file presented on the command line
- test_cmd($opts,out=>'merge/4.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -s 1 - $$opts{path}/dat/test_input_1_b.bam");
+ test_cmd($opts,out=>'merge/4.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -s 1 - $$opts{path}/dat/test_input_1_b.bam",binary=>1);
# Merge 5 - 3 file SAM merge all presented on the command line override IDs to file names
- test_cmd($opts,out=>'merge/5.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -r -s 1 - $$opts{path}/dat/test_input_1_a.sam $$opts{path}/dat/test_input_1_b.sam $$opts{path}/dat/test_input_1_c.sam");
+ test_cmd($opts,out=>'merge/5.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -r -s 1 - $$opts{path}/dat/test_input_1_a.sam $$opts{path}/dat/test_input_1_b.sam $$opts{path}/dat/test_input_1_c.sam",binary=>1);
# Merge 6 - merge all presented on the command line, combine PG and RG rather than dedup
- test_cmd($opts,out=>'merge/6.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -cp -s 1 - $$opts{path}/dat/test_input_1_a.sam $$opts{path}/dat/test_input_1_b.sam");
+ test_cmd($opts,out=>'merge/6.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -cp -s 1 - $$opts{path}/dat/test_input_1_a.sam $$opts{path}/dat/test_input_1_b.sam",binary=>1);
# Merge 7 - ID and SN with regex in them
- test_cmd($opts,out=>'merge/7.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -s 1 - $$opts{path}/dat/test_input_1_a_regex.sam $$opts{path}/dat/test_input_1_b_regex.sam");
+ test_cmd($opts,out=>'merge/7.merge.expected.bam',cmd=>"$$opts{bin}/samtools merge${threads} -s 1 - $$opts{path}/dat/test_input_1_a_regex.sam $$opts{path}/dat/test_input_1_b_regex.sam",binary=>1);
# Sort inputs by PG, then merge
system("$$opts{bin}/samtools sort -o $$opts{tmp}/merge.tag.1.bam -t PG $$opts{path}/dat/test_input_1_b.sam") == 0 or die "failed to create sort BAM: $?";
@@ -2487,28 +2564,33 @@ sub test_reheader
test_cmd($opts,
out=>'reheader/1_view1.sam.expected',
err=>'reheader/1_view1.sam.expected.err',
- cmd=>"$$opts{bin}/samtools reheader $$opts{path}/reheader/hdr.sam $fn.tmp.bam | $$opts{bin}/samtools view -h | perl -pe 's/\tVN:.*//'");
+ cmd=>"$$opts{bin}/samtools reheader $$opts{path}/reheader/hdr.sam $fn.tmp.bam | $$opts{bin}/samtools view -h | perl -pe 's/\tVN:.*//'",
+ exp_fix=>1);
test_cmd($opts,
out=>'reheader/2_view1.sam.expected',
err=>'reheader/2_view1.sam.expected.err',
- cmd=>"$$opts{bin}/samtools reheader $$opts{path}/reheader/hdr.sam $fn.tmp.v21.cram | $$opts{bin}/samtools view -h | perl -pe 's/\tVN:.*//'");
+ cmd=>"$$opts{bin}/samtools reheader $$opts{path}/reheader/hdr.sam $fn.tmp.v21.cram | $$opts{bin}/samtools view -h | perl -pe 's/\tVN:.*//'",
+ exp_fix=>1);
test_cmd($opts,
out=>'reheader/2_view1.sam.expected',
err=>'reheader/2_view1.sam.expected.err',
- cmd=>"$$opts{bin}/samtools reheader $$opts{path}/reheader/hdr.sam $fn.tmp.v30.cram | $$opts{bin}/samtools view -h | perl -pe 's/\tVN:.*//'");
+ cmd=>"$$opts{bin}/samtools reheader $$opts{path}/reheader/hdr.sam $fn.tmp.v30.cram | $$opts{bin}/samtools view -h | perl -pe 's/\tVN:.*//'",
+ exp_fix=>1);
# In-place testing
test_cmd($opts,
out=>'reheader/3_view1.sam.expected',
err=>'reheader/3_view1.sam.expected.err',
- cmd=>"$$opts{bin}/samtools reheader --in-place $$opts{path}/reheader/hdr.sam $fn.tmp.v21.cram && $$opts{bin}/samtools view -h $fn.tmp.v21.cram | perl -pe 's/\tVN:.*//'");
+ cmd=>"$$opts{bin}/samtools reheader --in-place $$opts{path}/reheader/hdr.sam $fn.tmp.v21.cram && $$opts{bin}/samtools view -h $fn.tmp.v21.cram | perl -pe 's/\tVN:.*//'",
+ exp_fix=>1);
test_cmd($opts,
out=>'reheader/3_view1.sam.expected',
err=>'reheader/3_view1.sam.expected.err',
- cmd=>"$$opts{bin}/samtools reheader --in-place $$opts{path}/reheader/hdr.sam $fn.tmp.v30.cram && $$opts{bin}/samtools view -h $fn.tmp.v30.cram | perl -pe 's/\tVN:.*//'");
+ cmd=>"$$opts{bin}/samtools reheader --in-place $$opts{path}/reheader/hdr.sam $fn.tmp.v30.cram && $$opts{bin}/samtools view -h $fn.tmp.v30.cram | perl -pe 's/\tVN:.*//'",
+ exp_fix=>1);
}
sub test_addrprg
@@ -2523,3 +2605,16 @@ sub test_addrprg
test_cmd($opts,out=>'addrprg/1_fixup.sam.expected', err=>'addrprg/1_fixup.sam.expected.err', cmd=>"$$opts{bin}/samtools addreplacerg${threads} -O sam -m overwrite_all -R '1#8' $$opts{path}/addrprg/1_fixup.sam");
test_cmd($opts,out=>'addrprg/4_fixup_norg.sam.expected', err=>'addrprg/4_fixup_norg.sam.expected.err', cmd=>"$$opts{bin}/samtools addreplacerg${threads} -O sam -r 'ID:1#8' -r 'CN:SC' $$opts{path}/addrprg/4_fixup_norg.sam");
}
+
+sub test_markdup
+{
+ my ($opts,%args) = @_;
+
+ my $threads = exists($args{threads}) ? " -@ $args{threads}" : "";
+ test_cmd($opts, out=>'markdup/1_name_sort.expected.sam', err=>'1_name_sort.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/1_name_sort.sam -", expect_fail=>1);
+ test_cmd($opts, out=>'markdup/2_bad_order.expected.sam', err=>'2_bad_order.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/2_bad_order.sam -", expect_fail=>1);
+ test_cmd($opts, out=>'markdup/3_missing_mc.expected.sam', err=>'3_missing_mc.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/3_missing_mc.sam -", expect_fail=>1);
+ test_cmd($opts, out=>'markdup/4_missing_ms.expected.sam', err=>'4_missing_ms.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/4_missing_ms.sam -", expect_fail=>1);
+ test_cmd($opts, out=>'markdup/5_markdup.expected.sam', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/5_markdup.sam -");
+ test_cmd($opts, out=>'markdup/6_remove_dups.expected.sam', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam -r $$opts{path}/markdup/6_remove_dups.sam -");
+}
diff --git a/version.sh b/version.sh
new file mode 100755
index 0000000..68bd205
--- /dev/null
+++ b/version.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+# Master version, for use in tarballs or non-git source copies
+VERSION=1.6
+
+# If we have a git clone, then check against the current tag
+if [ -e .git ]
+then
+ # If we ever get to 10.x this will need to be more liberal
+ VERSION=`git describe --match '[0-9].[0-9]*' --dirty`
+fi
+
+echo $VERSION
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/samtools.git
More information about the debian-med-commit
mailing list