[med-svn] [htslib] 05/11: New upstream version 1.6
Andreas Tille
tille at debian.org
Mon Dec 11 13:58:52 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository htslib.
commit 0b1efffb5e9c447681992014800392e378e67a5f
Author: Andreas Tille <tille at debian.org>
Date: Mon Dec 11 14:18:07 2017 +0100
New upstream version 1.6
---
.appveyor.yml | 43 +++++
INSTALL | 6 +
Makefile | 66 ++++---
NEWS | 37 +++-
bcf_sr_sort.c | 38 ++--
bcf_sr_sort.h | 6 +-
bgzf.c | 15 ++
bgzip.c | 18 +-
configure.ac | 24 ++-
cram/cram_codecs.c | 75 ++++----
cram/cram_decode.c | 70 ++++---
cram/cram_encode.c | 62 +++----
cram/cram_external.c | 3 +-
cram/cram_index.c | 6 +-
cram/cram_io.c | 107 ++++++-----
cram/cram_stats.c | 10 +-
cram/cram_structs.h | 3 +-
cram/mFILE.c | 3 +-
cram/mFILE.h | 5 +
cram/os.h | 24 +--
cram/sam_header.c | 6 +-
errmod.c | 2 +
hfile.c | 27 ++-
hfile_internal.h | 9 +-
hfile_libcurl.c | 433 ++++++++++++++++++++++++++++++++++++--------
hfile_s3.c | 191 ++++++++++++-------
hts.c | 121 ++++++++-----
hts_internal.h | 2 +-
hts_os.c | 40 ++++
htsfile.1 | 2 +-
htslib/bgzf.h | 4 +
htslib/hfile.h | 4 +
htslib/hts.h | 5 +-
htslib/hts_defs.h | 18 ++
htslib/hts_endian.h | 6 +-
htslib/hts_os.h | 59 ++++++
htslib/knetfile.h | 2 +-
htslib/ksort.h | 8 +-
htslib/regidx.h | 10 +-
htslib/sam.h | 6 +-
htslib/vcfutils.h | 4 +-
knetfile.c | 4 +-
kstring.c | 2 +-
multipart.c | 31 +++-
realn.c | 2 +-
regidx.c | 24 +--
sam.c | 27 ++-
synced_bcf_reader.c | 4 +-
tabix.1 | 2 +-
tabix.c | 4 +-
test/compare_sam.pl | 2 +
test/hfile.c | 4 +-
test/sam.c | 2 +-
test/tabix/test-tabix.sh | 7 +-
test/test-bcf-sr.c | 12 +-
test/test-bcf-sr.pl | 6 +-
test/test-bcf-translate.c | 192 ++++++++++++++++++++
test/test-bcf-translate.out | 18 ++
test/test-regidx.c | 8 +-
test/test.pl | 92 +++++++---
test/test_bgzf.c | 19 +-
test/test_view.c | 55 ++++--
vcf.c | 43 +++--
vcfutils.c | 2 +-
version.sh | 31 ++++
win/rand.c | 98 ++++++++++
win/rand.h | 24 +++
67 files changed, 1726 insertions(+), 569 deletions(-)
diff --git a/.appveyor.yml b/.appveyor.yml
new file mode 100644
index 0000000..f90c61d
--- /dev/null
+++ b/.appveyor.yml
@@ -0,0 +1,43 @@
+# version format.
+# you can use {branch} name in version format too
+# version: 1.0.{build}-{branch}
+version: 'vers.{build}'
+
+# branches to build
+branches:
+ # Whitelist
+ only:
+ - develop
+
+ # Blacklist
+ except:
+ - gh-pages
+
+# Do not build on tags (GitHub and BitBucket)
+skip_tags: true
+
+# Skipping commits affecting specific files (GitHub only). More details here: /docs/appveyor-yml
+#skip_commits:
+# files:
+# - docs/*
+# - '**/*.html'
+
+# We use Mingw/Msys, so use pacman for installs
+install:
+ - set HOME=.
+ - set MSYSTEM=MINGW64
+ - set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
+ - set MINGWPREFIX=x86_64-w64-mingw32
+ - "sh -lc \"pacman -S --noconfirm --needed base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2 mingw-w64-x86_64-xz mingw-w64-x86_64-curl\""
+
+build_script:
+ - set HOME=.
+ - set MSYSTEM=MINGW64
+ - set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
+ - "sh -lc \"aclocal && autoheader && autoconf && ./configure && make -j2\""
+
+#build_script:
+# - make
+
+test_script:
+ - "sh -lc \"make test\""
diff --git a/INSTALL b/INSTALL
index 2413ba6..8d9c3b6 100644
--- a/INSTALL
+++ b/INSTALL
@@ -48,6 +48,12 @@ library is used. Systems that do not have CChmac will get this from
libcrypto. libcrypto is part of OpenSSL or one of its derivatives (LibreSSL
or BoringSSL).
+On Microsoft Windows we recommend use of Mingw64/Msys2. Note that
+currently for the test harness to work you will need to override the
+test temporary directory with e.g.: make check TEST_OPTS="-t C:/msys64/tmp/_"
+Whilst the code may work on Windows with other environments, these have
+not be verified.
+
Building Configure
==================
diff --git a/Makefile b/Makefile
index 9a44b62..1b5e2ad 100644
--- a/Makefile
+++ b/Makefile
@@ -76,7 +76,8 @@ BUILT_TEST_PROGRAMS = \
test/test_view \
test/test-vcf-api \
test/test-vcf-sweep \
- test/test-bcf-sr
+ test/test-bcf-sr \
+ test/test-bcf-translate
BUILT_THRASH_PROGRAMS = \
test/thrash_threads1 \
@@ -91,35 +92,17 @@ all: lib-static lib-shared $(BUILT_PROGRAMS) plugins $(BUILT_TEST_PROGRAMS)
HTSPREFIX =
include htslib_vars.mk
-
-PACKAGE_VERSION = 1.5
+# If not using GNU make, you need to copy the version number from version.sh
+# into here.
+PACKAGE_VERSION := $(shell ./version.sh)
LIBHTS_SOVERSION = 2
-
# $(NUMERIC_VERSION) is for items that must have a numeric X.Y.Z string
# even if this is a dirty or untagged Git working tree.
-NUMERIC_VERSION = $(PACKAGE_VERSION)
-
-# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
-# description of the working tree: either a release tag with the same value
-# as $(PACKAGE_VERSION) above, or an exact description likely based on a tag.
-# Much of this is also GNU Make-specific. If you don't have GNU Make and/or
-# are not building from a Git repository, comment out this conditional.
-ifneq "$(wildcard .git)" ""
-original_version := $(PACKAGE_VERSION)
-PACKAGE_VERSION := $(shell git describe --always --dirty)
-
-# Unless the Git description matches /\d*\.\d*(\.\d*)?/, i.e., is exactly a tag
-# with a numeric name, revert $(NUMERIC_VERSION) to the original version number
-# written above, but with the patchlevel field bumped to 255.
-ifneq "$(subst ..,.,$(subst 0,,$(subst 1,,$(subst 2,,$(subst 3,,$(subst 4,,$(subst 5,,$(subst 6,,$(subst 7,,$(subst 8,,$(subst 9,,$(PACKAGE_VERSION))))))))))))" "."
-empty :=
-NUMERIC_VERSION := $(subst $(empty) ,.,$(wordlist 1,2,$(subst ., ,$(original_version))) 255)
-endif
+NUMERIC_VERSION := $(shell ./version.sh numeric)
# Force version.h to be remade if $(PACKAGE_VERSION) has changed.
version.h: $(if $(wildcard version.h),$(if $(findstring "$(PACKAGE_VERSION)",$(shell cat version.h)),,force))
-endif
version.h:
echo '#define HTS_VERSION "$(PACKAGE_VERSION)"' > $@
@@ -127,6 +110,9 @@ version.h:
print-version:
@echo $(PACKAGE_VERSION)
+show-version:
+ @echo PACKAGE_VERSION = $(PACKAGE_VERSION)
+ @echo NUMERIC_VERSION = $(NUMERIC_VERSION)
.SUFFIXES: .bundle .c .cygdll .dll .o .pico .so
@@ -148,6 +134,7 @@ LIBHTS_OBJS = \
hfile.o \
hfile_net.o \
hts.o \
+ hts_os.o\
md5.o \
multipart.o \
probaln.o \
@@ -208,6 +195,8 @@ config.h:
echo '/* Default config.h generated by Makefile */' > $@
echo '#define HAVE_LIBBZ2 1' >> $@
echo '#define HAVE_LIBLZMA 1' >> $@
+ echo '#define HAVE_FSEEKO 1' >> $@
+ echo '#define HAVE_DRAND48 1' >> $@
# And similarly for htslib.pc.tmp ("pkg-config template"). No dependency
# on htslib.pc.in listed, as if that file is newer the usual way to regenerate
@@ -235,6 +224,9 @@ lib-shared: libhts.dylib
else ifeq "$(findstring CYGWIN,$(PLATFORM))" "CYGWIN"
SHLIB_FLAVOUR = cygdll
lib-shared: cyghts-$(LIBHTS_SOVERSION).dll
+else ifeq "$(findstring MSYS,$(PLATFORM))" "MSYS"
+SHLIB_FLAVOUR = dll
+lib-shared: hts-$(LIBHTS_SOVERSION).dll
else
SHLIB_FLAVOUR = so
lib-shared: libhts.so
@@ -276,6 +268,9 @@ libhts.dylib: $(LIBHTS_OBJS)
cyghts-$(LIBHTS_SOVERSION).dll: $(LIBHTS_OBJS)
$(CC) -shared -Wl,--out-implib=libhts.dll.a -Wl,--export-all-symbols -Wl,--enable-auto-import $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread
+hts-$(LIBHTS_SOVERSION).dll: $(LIBHTS_OBJS)
+ $(CC) -shared -Wl,--out-implib=hts.dll.a -Wl,--export-all-symbols -Wl,--enable-auto-import $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread
+
.pico.so:
$(CC) -shared -Wl,-E $(LDFLAGS) -o $@ $< $(LIBS) -lpthread
@@ -286,6 +281,9 @@ cyghts-$(LIBHTS_SOVERSION).dll: $(LIBHTS_OBJS)
.o.cygdll:
$(CC) -shared $(LDFLAGS) -o $@ $< libhts.dll.a $(LIBS)
+.o.dll:
+ $(CC) -shared $(LDFLAGS) -o $@ $< hts.dll.a $(LIBS)
+
bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) cram/pooled_alloc.h $(htslib_khash_h)
errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h)
@@ -348,6 +346,9 @@ tabix.o: tabix.c config.h $(htslib_tbx_h) $(htslib_sam_h) $(htslib_vcf_h) $(htsl
# For tests that might use it, set $REF_PATH explicitly to use only reference
# areas within the test suite (or set it to ':' to use no reference areas).
+#
+# If using MSYS, avoid poor shell expansion via:
+# MSYS2_ARG_CONV_EXCL="*" make check
check test: $(BUILT_PROGRAMS) $(BUILT_TEST_PROGRAMS)
test/hts_endian
test/fieldarith test/fieldarith.sam
@@ -356,7 +357,7 @@ check test: $(BUILT_PROGRAMS) $(BUILT_TEST_PROGRAMS)
cd test/tabix && ./test-tabix.sh tabix.tst
REF_PATH=: test/sam test/ce.fa test/faidx.fa
test/test-regidx
- cd test && REF_PATH=: ./test.pl
+ cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-}
test/hts_endian: test/hts_endian.o
$(CC) $(LDFLAGS) -o $@ test/hts_endian.o $(LIBS)
@@ -388,6 +389,9 @@ test/test-vcf-sweep: test/test-vcf-sweep.o libhts.a
test/test-bcf-sr: test/test-bcf-sr.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test-bcf-sr.o libhts.a -lz $(LIBS) -lpthread
+test/test-bcf-translate: test/test-bcf-translate.o libhts.a
+ $(CC) $(LDFLAGS) -o $@ test/test-bcf-translate.o libhts.a -lz $(LIBS) -lpthread
+
test/hts_endian.o: test/hts_endian.c $(htslib_hts_endian_h)
test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h)
test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h)
@@ -398,6 +402,7 @@ test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h)
test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h)
test/test-vcf-sweep.o: test/test-vcf-sweep.c config.h $(htslib_vcf_sweep_h)
test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_vcf_sweep_h) bcf_sr_sort.h
+test/test-bcf-translate.o: test/test-bcf-translate.c config.h
test/thrash_threads1: test/thrash_threads1.o libhts.a
@@ -446,6 +451,10 @@ install-cygdll: cyghts-$(LIBHTS_SOVERSION).dll installdirs
$(INSTALL_PROGRAM) cyghts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/cyghts-$(LIBHTS_SOVERSION).dll
$(INSTALL_PROGRAM) libhts.dll.a $(DESTDIR)$(libdir)/libhts.dll.a
+install-dll: hts-$(LIBHTS_SOVERSION).dll installdirs
+ $(INSTALL_PROGRAM) hts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/hts-$(LIBHTS_SOVERSION).dll
+ $(INSTALL_PROGRAM) hts.dll.a $(DESTDIR)$(libdir)/hts.dll.a
+
install-dylib: libhts.dylib installdirs
$(INSTALL_PROGRAM) libhts.dylib $(DESTDIR)$(libdir)/libhts.$(PACKAGE_VERSION).dylib
ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.dylib
@@ -483,6 +492,9 @@ clean-so:
clean-cygdll:
-rm -f cyghts-*.dll libhts.dll.a
+clean-dll:
+ -rm -f hts-*.dll hts.dll.a
+
clean-dylib:
-rm -f libhts.dylib libhts.*.dylib
@@ -498,7 +510,8 @@ tags TAGS:
# (The wildcards attempt to omit non-exported files (.git*, README.md,
# etc) and other detritus that might be in the top-level directory.)
distdir:
- tar -c *.[ch15] [ILMNRcht]*[ELSbcekmnt] | (cd $(distdir) && tar -x)
+ @if [ -z "$(distdir)" ]; then echo "Please supply a distdir=DIR argument."; false; fi
+ tar -c *.[ch15] [ILMNRchtv]*[ELSbcekmnth] | (cd $(distdir) && tar -x)
+cd $(distdir) && $(MAKE) distclean
force:
@@ -507,7 +520,8 @@ force:
.PHONY: all check clean distclean distdir force
.PHONY: install install-pkgconfig installdirs lib-shared lib-static
.PHONY: maintainer-clean mostlyclean plugins print-config print-version
-.PHONY: tags test testclean
+.PHONY: show-version tags test testclean
.PHONY: clean-so install-so
.PHONY: clean-cygdll install-cygdll
+.PHONY: clean-dll install-dll
.PHONY: clean-dylib install-dylib
diff --git a/NEWS b/NEWS
index 47546d0..8342a5d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,38 @@
+Noteworthy changes in release 1.6 (28th September 2017)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Fixed bug where iterators on CRAM files did not propagate error return
+ values to the caller correctly. Thanks go to Chris Saunders.
+
+* Overhauled Windows builds. Building with msys2/mingw64 now works
+ correctly and passes all tests.
+
+* More improvements to logging output (thanks again to Anders Kaplan).
+
+* Return codes from sam_read1() when reading cram have been made
+ consistent with those returned when reading sam/bam. Thanks to
+ Chris Saunders (#575).
+
+* BGZF CRC32 checksums are now always verified.
+
+* It's now possible to set nthreads = 1 for cram files.
+
+* hfile_libcurl has been modified to make it thread-safe. It's also
+ better at handling web servers that do not honour byte range requests
+ when attempting to seek - it now sets errno to ESPIPE and keeps
+ the existing connection open so callers can revert to streaming mode
+ it they want to.
+
+* hfile_s3 now recalculates access tokens if they have become stale. This
+ fixes a reported problem where authentication failed after a file
+ had been in use for more than 15 minutes.
+
+* Fixed bug where remote index fetches would fail to notice errors when
+ writing files.
+
+* bam_read1() now checks that the query sequence length derived from the
+ CIGAR alignment matches the sequence length in the BAM record.
+
Noteworthy changes in release 1.5 (21st June 2017)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -18,7 +53,7 @@ Noteworthy changes in release 1.4.1 (8th May 2017)
This is primarily a security bug fix update.
-* Fixed SECURITY issue with buffer overruns with malicious data. (#514).
+* Fixed SECURITY (CVE-2017-1000206) issue with buffer overruns with malicious data. (#514).
* S3 support for non Amazon AWS endpoints. (#506)
diff --git a/bcf_sr_sort.c b/bcf_sr_sort.c
index 5ab46ce..a48e2aa 100644
--- a/bcf_sr_sort.c
+++ b/bcf_sr_sort.c
@@ -1,4 +1,4 @@
-/*
+/*
Copyright (C) 2017 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -9,10 +9,10 @@
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
-
+
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
-
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -77,12 +77,12 @@ static void bcf_sr_init_scores(sr_sort_t *srt)
if ( srt->pair & BCF_SR_PAIR_ANY ) srt->pair |= (BCF_SR_PAIR_SNPS | BCF_SR_PAIR_INDELS | BCF_SR_PAIR_SNP_REF | BCF_SR_PAIR_INDEL_REF);
if ( srt->pair & BCF_SR_PAIR_SNPS ) SR_SCORE(srt,SR_SNP,SR_SNP) = 3;
if ( srt->pair & BCF_SR_PAIR_INDELS ) SR_SCORE(srt,SR_INDEL,SR_INDEL) = 3;
- if ( srt->pair & BCF_SR_PAIR_SNP_REF )
+ if ( srt->pair & BCF_SR_PAIR_SNP_REF )
{
SR_SCORE(srt,SR_SNP,SR_REF) = 2;
SR_SCORE(srt,SR_REF,SR_SNP) = 2;
}
- if ( srt->pair & BCF_SR_PAIR_INDEL_REF )
+ if ( srt->pair & BCF_SR_PAIR_INDEL_REF )
{
SR_SCORE(srt,SR_INDEL,SR_REF) = 2;
SR_SCORE(srt,SR_REF,SR_INDEL) = 2;
@@ -267,6 +267,8 @@ static int cmpstringp(const void *p1, const void *p2)
{
return strcmp(* (char * const *) p1, * (char * const *) p2);
}
+
+#if DEBUG_VSETS
void debug_vsets(sr_sort_t *srt)
{
int i,j,k;
@@ -285,6 +287,9 @@ void debug_vsets(sr_sort_t *srt)
fprintf(stderr,"\n");
}
}
+#endif
+
+#if DEBUG_VBUF
void debug_vbuf(sr_sort_t *srt)
{
int i, j;
@@ -299,6 +304,8 @@ void debug_vbuf(sr_sort_t *srt)
fprintf(stderr,"\n");
}
}
+#endif
+
char *grp_create_key(sr_sort_t *srt)
{
if ( !srt->str.l ) return strdup("");
@@ -365,7 +372,7 @@ static void bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,
{
bcf1_t *line = reader->buffer[irec];
if ( line->rid!=rid || line->pos!=min_pos ) break;
-
+
if ( srt->str.l ) kputc(';',&srt->str);
srt->off[srt->noff++] = srt->str.l;
size_t beg = srt->str.l;
@@ -459,7 +466,7 @@ static void bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,
}
// create the initial list of variant sets
- for (ivar=0; ivar<srt->nvar; ivar++)
+ for (ivar=0; ivar<srt->nvar; ivar++)
{
ivset = srt->nvset++;
hts_expand0(varset_t, srt->nvset, srt->mvset, srt->vset);
@@ -485,7 +492,9 @@ static void bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,
}
var->type = type;
}
- // debug_vsets(srt);
+#if DEBUG_VSETS
+ debug_vsets(srt);
+#endif
// initialize the pairing matrix
hts_expand(int, srt->ngrp*srt->nvset, srt->mpmat, srt->pmat);
@@ -501,7 +510,10 @@ static void bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,
// pair the lines
while ( srt->nvset )
{
- // fprintf(stderr,"\n"); debug_vsets(srt);
+#if DEBUG_VSETS
+ fprintf(stderr,"\n");
+ debug_vsets(srt);
+#endif
int imax = 0;
for (ivset=1; ivset<srt->nvset; ivset++)
@@ -567,14 +579,16 @@ int bcf_sr_sort_next(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int mi
if ( !srt->vcf_buf[0].nrec ) return 0;
- // debug_vbuf(srt);
+#if DEBUG_VBUF
+ debug_vbuf(srt);
+#endif
int nret = 0;
for (i=0; i<srt->sr->nreaders; i++)
{
vcf_buf_t *buf = &srt->vcf_buf[i];
- if ( buf->rec[0] )
+ if ( buf->rec[0] )
{
bcf_sr_t *reader = &srt->sr->readers[i];
for (j=1; j<=reader->nbuffer; j++)
@@ -592,7 +606,7 @@ int bcf_sr_sort_next(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int mi
srt->sr->has_line[i] = 1;
}
else
- srt->sr->has_line[i] = 0;
+ srt->sr->has_line[i] = 0;
buf->nrec--;
if ( buf->nrec > 0 )
diff --git a/bcf_sr_sort.h b/bcf_sr_sort.h
index b3b4e56..b51b679 100644
--- a/bcf_sr_sort.h
+++ b/bcf_sr_sort.h
@@ -1,4 +1,4 @@
-/*
+/*
Copyright (C) 2017 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -9,10 +9,10 @@
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
-
+
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
-
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
diff --git a/bgzf.c b/bgzf.c
index 7e80b8c..935ae6d 100644
--- a/bgzf.c
+++ b/bgzf.c
@@ -39,6 +39,7 @@
#include "htslib/bgzf.h"
#include "htslib/hfile.h"
#include "htslib/thread_pool.h"
+#include "htslib/hts_endian.h"
#include "cram/pooled_alloc.h"
#define BGZF_CACHE
@@ -463,6 +464,16 @@ static int inflate_block(BGZF* fp, int block_length)
return -1;
}
+ // Check CRC of uncompressed block matches the gzip header.
+ // NB: we may wish to switch out the zlib crc32 for something more performant.
+ // See PR#361 and issue#467
+ uint32_t c1 = crc32(0L, (unsigned char *)fp->uncompressed_block, dlen);
+ uint32_t c2 = le_to_u32((uint8_t *)fp->compressed_block + block_length-8);
+ if (c1 != c2) {
+ fp->errcode |= BGZF_ERR_CRC;
+ return -1;
+ }
+
return dlen;
}
@@ -1038,6 +1049,9 @@ static int bgzf_check_EOF_common(BGZF *fp)
off_t offset = htell(fp->fp);
if (hseek(fp->fp, -28, SEEK_END) < 0) {
if (errno == ESPIPE) { hclearerr(fp->fp); return 2; }
+#ifdef _WIN32
+ if (errno == EINVAL) { hclearerr(fp->fp); return 2; }
+#endif
else return -1;
}
if ( hread(fp->fp, buf, 28) != 28 ) return -1;
@@ -1179,6 +1193,7 @@ restart:
pthread_exit(NULL);
}
}
+ return NULL;
}
int bgzf_thread_pool(BGZF *fp, hts_tpool *pool, int qsize) {
diff --git a/bgzip.c b/bgzip.c
index e078185..931e7a0 100644
--- a/bgzip.c
+++ b/bgzip.c
@@ -36,6 +36,11 @@
#include "htslib/bgzf.h"
#include "htslib/hts.h"
+#ifdef _WIN32
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+#endif
+
static const int WINDOW_SIZE = 64 * 1024;
static void error(const char *format, ...)
@@ -198,6 +203,9 @@ int main(int argc, char **argv)
if ( index ) bgzf_index_build_init(fp);
buffer = malloc(WINDOW_SIZE);
+#ifdef _WIN32
+ _setmode(f_src, O_BINARY);
+#endif
if (rebgzip){
if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
@@ -319,13 +327,21 @@ int main(int argc, char **argv)
if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start);
}
+#ifdef _WIN32
+ _setmode(f_dst, O_BINARY);
+#endif
while (1) {
if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
if (c == 0) break;
if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode);
start += c;
- if ( write(f_dst, buffer, c) != c ) error("Could not write %d bytes\n", c);
+ if ( write(f_dst, buffer, c) != c ) {
+#ifdef _WIN32
+ if (GetLastError() != ERROR_NO_DATA)
+#endif
+ error("Could not write %d bytes\n", c);
+ }
if (end >= 0 && start >= end) break;
}
free(buffer);
diff --git a/configure.ac b/configure.ac
index 308a5c1..9221d42 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
# DEALINGS IN THE SOFTWARE.
dnl Process this file with autoconf to produce a configure script
-AC_INIT([HTSlib], m4_esyscmd_s([make print-version]),
+AC_INIT([HTSlib], m4_esyscmd_s([./version.sh 2>/dev/null]),
[samtools-help at lists.sourceforge.net], [], [http://www.htslib.org/])
AC_PREREQ(2.63) dnl This version introduced 4-argument AC_CHECK_HEADER
AC_CONFIG_SRCDIR(hts.c)
@@ -71,6 +71,7 @@ AC_ARG_ENABLE([gcs],
[], [enable_gcs=check])
AC_SYS_LARGEFILE
+AC_FUNC_FSEEKO
AC_ARG_ENABLE([libcurl],
[AS_HELP_STRING([--enable-libcurl],
@@ -112,8 +113,8 @@ AC_ARG_ENABLE([s3],
[support Amazon AWS S3 URLs])],
[], [enable_s3=check])
-AC_MSG_CHECKING([shared library type])
test -n "$host_alias" || host_alias=unknown-`uname -s`
+AC_MSG_CHECKING([shared library type for $host_alias])
case $host_alias in
*-cygwin* | *-CYGWIN*)
host_result="Cygwin DLL"
@@ -125,6 +126,15 @@ case $host_alias in
PLATFORM=Darwin
PLUGIN_EXT=.bundle
;;
+ *-msys* | *-MSYS* | *-mingw* | *-MINGW*)
+ host_result="MSYS dll"
+ PLATFORM=MSYS
+ PLUGIN_EXT=.dll
+ # This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64,
+ # %lld and %z printf formats work. It also enforces the snprintf to
+ # be C99 compliant so it returns the correct values (in kstring.c).
+ CPPFLAGS="$CPPCFLAGS -D_XOPEN_SOURCE=600"
+ ;;
*)
host_result="plain .so"
PLATFORM=default
@@ -136,7 +146,7 @@ AC_SUBST([PLATFORM])
dnl FIXME This pulls in dozens of standard header checks
AC_FUNC_MMAP
-AC_CHECK_FUNCS(gmtime_r)
+AC_CHECK_FUNCS([gmtime_r fsync drand48])
# Darwin has a dubious fdatasync() symbol, but no declaration in <unistd.h>
AC_CHECK_DECL([fdatasync(int)], [AC_CHECK_FUNCS(fdatasync)])
@@ -183,9 +193,11 @@ FAILED. This error must be resolved in order to build HTSlib successfully.])
fi
dnl connect() etc. fns are in libc on linux, but libsocket on illumos/Solaris
-libsocket=unneeded
-AC_SEARCH_LIBS(connect, socket, [libsocket=needed], [])
-
+AC_SEARCH_LIBS([recv], [socket ws2_32], [
+if test "$ac_cv_search_recv" != "none required"
+then
+ static_LIBS="$static_LIBS $ac_cv_search_recv"
+fi], [AC_MSG_ERROR([unable to find the recv() function])])
if test "$enable_bz2" != no; then
bz2_devel=ok
diff --git a/cram/cram_codecs.c b/cram/cram_codecs.c
index 332085a..76fa731 100644
--- a/cram/cram_codecs.c
+++ b/cram/cram_codecs.c
@@ -239,7 +239,7 @@ static inline unsigned int get_bits_MSB(cram_block *block, int nbits) {
* for it elsewhere.)
*/
static int store_bits_MSB(cram_block *block, unsigned int val, int nbits) {
- /* fprintf(stderr, " store_bits: %02x %d\n", val, nbits); */
+ //fprintf(stderr, " store_bits: %02x %d\n", val, nbits);
/*
* Use slow mode until we tweak the huffman generator to never generate
@@ -558,7 +558,7 @@ cram_codec *cram_beta_decode_init(char *data, int size,
else if (option == E_BYTE_ARRAY || option == E_BYTE)
c->decode = cram_beta_decode_char;
else {
- fprintf(stderr, "BYTE_ARRAYs not supported by this codec\n");
+ hts_log_error("BYTE_ARRAYs not supported by this codec");
return NULL;
}
c->free = cram_beta_decode_free;
@@ -748,7 +748,7 @@ cram_codec *cram_subexp_decode_init(char *data, int size,
char *cp = data;
if (option != E_INT) {
- fprintf(stderr, "This codec only supports INT encodings\n");
+ hts_log_error("This codec only supports INT encodings");
return NULL;
}
@@ -764,7 +764,7 @@ cram_codec *cram_subexp_decode_init(char *data, int size,
cp += safe_itf8_get(cp, data + size, &c->subexp.k);
if (cp - data != size || c->subexp.k < 0) {
- fprintf(stderr, "Malformed subexp header stream\n");
+ hts_log_error("Malformed subexp header stream");
free(c);
return NULL;
}
@@ -814,7 +814,7 @@ cram_codec *cram_gamma_decode_init(char *data, int size,
char *cp = data;
if (option != E_INT) {
- fprintf(stderr, "This codec only supports INT encodings\n");
+ hts_log_error("This codec only supports INT encodings");
return NULL;
}
@@ -855,7 +855,7 @@ static int code_sort(const void *vp1, const void *vp2) {
if (c1->len != c2->len)
return c1->len - c2->len;
else
- return c1->symbol - c2->symbol;
+ return c1->symbol < c2->symbol ? -1 : (c1->symbol > c2->symbol ? 1 : 0);
}
void cram_huffman_decode_free(cram_codec *c) {
@@ -980,18 +980,20 @@ cram_codec *cram_huffman_decode_init(char *data, int size,
int32_t ncodes = 0, i, j;
char *cp = data, *data_end = &data[size];
cram_codec *h;
- cram_huffman_code *codes;
+ cram_huffman_code *codes = NULL;
int32_t val, last_len, max_len = 0;
+ uint32_t max_val; // needs one more bit than val
+ const int max_code_bits = sizeof(val) * 8 - 1;
int l;
if (option == E_BYTE_ARRAY_BLOCK) {
- fprintf(stderr, "BYTE_ARRAYs not supported by this codec\n");
+ hts_log_error("BYTE_ARRAYs not supported by this codec");
return NULL;
}
cp += safe_itf8_get(cp, data_end, &ncodes);
if (ncodes < 0) {
- fprintf(stderr, "Invalid number of symbols in huffman stream\n");
+ hts_log_error("Invalid number of symbols in huffman stream");
return NULL;
}
if (ncodes >= SIZE_MAX / sizeof(*codes)) {
@@ -1022,17 +1024,12 @@ cram_codec *cram_huffman_decode_init(char *data, int size,
l = safe_itf8_get(cp, data_end, &codes[i].symbol);
}
- if (l < 1) {
- fprintf(stderr, "Malformed huffman header stream\n");
- free(h);
- return NULL;
- }
+ if (l < 1)
+ goto malformed;
+
cp += safe_itf8_get(cp, data_end, &i);
- if (i != ncodes) {
- fprintf(stderr, "Malformed huffman header stream\n");
- free(h);
- return NULL;
- }
+ if (i != ncodes)
+ goto malformed;
h->reset = cram_nop_decode_reset;
@@ -1050,24 +1047,32 @@ cram_codec *cram_huffman_decode_init(char *data, int size,
if (max_len < codes[i].len)
max_len = codes[i].len;
}
- if (l < 1 || cp - data != size || max_len >= ncodes) {
- fprintf(stderr, "Malformed huffman header stream\n");
- free(h);
- return NULL;
+ if (l < 1 || cp - data != size || max_len >= ncodes)
+ goto malformed;
+
+ /* 31 is max. bits available in val */
+ if (max_len > max_code_bits) {
+ hts_log_error("Huffman code length (%d) is greater "
+ "than maximum supported (%d)", max_len, max_code_bits);
+ free(h);
+ free(codes);
+ return NULL;
}
/* Sort by bit length and then by symbol value */
qsort(codes, ncodes, sizeof(*codes), code_sort);
/* Assign canonical codes */
- val = -1, last_len = 0;
+ val = -1, last_len = 0, max_val = 0;
for (i = 0; i < ncodes; i++) {
val++;
+ if (val > max_val)
+ goto malformed;
+
if (codes[i].len > last_len) {
- while (codes[i].len > last_len) {
- val <<= 1;
- last_len++;
- }
+ val <<= (codes[i].len - last_len);
+ last_len = codes[i].len;
+ max_val = (1U << codes[i].len) - 1;
}
codes[i].code = val;
}
@@ -1116,6 +1121,12 @@ cram_codec *cram_huffman_decode_init(char *data, int size,
}
return (cram_codec *)h;
+
+ malformed:
+ hts_log_error("Malformed huffman header stream");
+ free(codes);
+ free(h);
+ return NULL;
}
int cram_huffman_encode_char0(cram_slice *slice, cram_codec *c,
@@ -1511,7 +1522,7 @@ cram_codec *cram_byte_array_len_decode_init(char *data, int size,
return c;
malformed:
- fprintf(stderr, "Malformed byte_array_len header stream\n");
+ hts_log_error("Malformed byte_array_len header stream");
no_codec:
free(c);
return NULL;
@@ -1855,7 +1866,7 @@ cram_codec *cram_decoder_init(enum cram_encoding codec,
if (codec >= E_NULL && codec < E_NUM_CODECS && decode_init[codec]) {
return decode_init[codec](data, size, option, version);
} else {
- fprintf(stderr, "Unimplemented codec of type %s\n", cram_encoding2str(codec));
+ hts_log_error("Unimplemented codec of type %s", cram_encoding2str(codec));
return NULL;
}
}
@@ -1890,7 +1901,7 @@ cram_codec *cram_encoder_init(enum cram_encoding codec,
r->out = NULL;
return r;
} else {
- fprintf(stderr, "Unimplemented codec of type %s\n", cram_encoding2str(codec));
+ hts_log_error("Unimplemented codec of type %s", cram_encoding2str(codec));
abort();
}
}
@@ -1928,7 +1939,7 @@ int cram_codec_to_id(cram_codec *c, int *id2) {
bnum1 = -2;
break;
default:
- fprintf(stderr, "Unknown codec type %d\n", c->codec);
+ hts_log_error("Unknown codec type %d", c->codec);
bnum1 = -1;
}
diff --git a/cram/cram_decode.c b/cram/cram_decode.c
index bb7bf48..9571326 100644
--- a/cram/cram_decode.c
+++ b/cram/cram_decode.c
@@ -334,8 +334,7 @@ cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd,
}
default:
- fprintf(stderr, "Unrecognised preservation map key %c%c\n",
- cp[-2], cp[-1]);
+ hts_log_warning("Unrecognised preservation map key %c%c", cp[-2], cp[-1]);
// guess byte;
cp++;
break;
@@ -605,8 +604,9 @@ cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd,
}
} else if (key[0] == 'T' && key[1] == 'M') {
} else if (key[0] == 'T' && key[1] == 'V') {
- } else
- fprintf(stderr, "Unrecognised key: %.2s\n", key);
+ } else {
+ hts_log_warning("Unrecognised key: %.2s", key);
+ }
cp += size;
@@ -1274,8 +1274,7 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
pos += prev_pos;
if (pos <= 0) {
- fprintf(stderr, "Error: feature position %d before start of read.\n",
- pos);
+ hts_log_error("Feature position %d before start of read", pos);
return -1;
}
@@ -1288,8 +1287,7 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
static int whinged = 0;
int rlen;
if (!whinged)
- fprintf(stderr, "Ref pos outside of ref "
- "sequence boundary\n");
+ hts_log_warning("Ref pos outside of ref sequence boundary");
whinged = 1;
rlen = bfd->ref[cr->ref_id].len - ref_pos;
// May miss MD/NM cases where both seq/ref are N, but this is a
@@ -1743,7 +1741,7 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
}
default:
- fprintf(stderr, "Error: Unknown feature code '%c'\n", op);
+ hts_log_error("Unknown feature code '%c'", op);
return -1;
}
}
@@ -1758,7 +1756,7 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
static int whinged = 0;
int rlen;
if (!whinged)
- fprintf(stderr, "Ref pos outside of ref sequence boundary\n");
+ hts_log_warning("Ref pos outside of ref sequence boundary");
whinged = 1;
rlen = bfd->ref[cr->ref_id].len - ref_pos;
// May miss MD/NM cases where both seq/ref are N, but this is a
@@ -2142,7 +2140,7 @@ static int cram_decode_slice_xref(cram_slice *s, int required_fields) {
if (s->crecs[cr->mate_line].flags & BAM_FREVERSE)
cr->flags |= BAM_FMREVERSE;
} else {
- fprintf(stderr, "Mate line out of bounds: %d vs [0, %d]\n",
+ hts_log_error("Mate line out of bounds: %d vs [0, %d]",
cr->mate_line, s->hdr->num_records-1);
}
@@ -2267,8 +2265,7 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
if (embed_ref) {
cram_block *b;
if (s->hdr->ref_base_id < 0) {
- fprintf(stderr, "No reference specified and "
- "no embedded reference is available.\n");
+ hts_log_error("No reference specified and no embedded reference is available");
return -1;
}
b = cram_get_block_by_id(s, s->hdr->ref_base_id);
@@ -2280,7 +2277,7 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
s->ref_start = s->hdr->ref_seq_start;
s->ref_end = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1;
if (s->ref_end - s->ref_start > b->uncomp_size) {
- fprintf(stderr, "Embedded reference is too small.\n");
+ hts_log_error("Embedded reference is too small");
return -1;
}
} else if (!fd->no_ref) {
@@ -2298,7 +2295,7 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
/* Sanity check */
if (s->ref_start < 0) {
- fprintf(stderr, "Slice starts before base 1.\n");
+ hts_log_warning("Slice starts before base 1");
s->ref_start = 0;
}
pthread_mutex_lock(&fd->ref_lock);
@@ -2315,7 +2312,7 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
if ((fd->required_fields & SAM_SEQ) &&
s->ref == NULL && s->hdr->ref_seq_id >= 0 && !fd->no_ref) {
- fprintf(stderr, "Unable to fetch reference #%d %d..%d\n",
+ hts_log_error("Unable to fetch reference #%d %d..%d",
s->hdr->ref_seq_id, s->hdr->ref_seq_start,
s->hdr->ref_seq_start + s->hdr->ref_seq_span-1);
return -1;
@@ -2335,14 +2332,14 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
if (s->hdr->ref_seq_start >= s->ref_start) {
start = s->hdr->ref_seq_start - s->ref_start;
} else {
- fprintf(stderr, "Slice starts before base 1.\n");
+ hts_log_warning("Slice starts before base 1");
start = 0;
}
if (s->hdr->ref_seq_span <= s->ref_end - s->ref_start + 1) {
len = s->hdr->ref_seq_span;
} else {
- fprintf(stderr, "Slice ends beyond reference end.\n");
+ hts_log_warning("Slice ends beyond reference end");
len = s->ref_end - s->ref_start + 1;
}
@@ -2368,10 +2365,10 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
if ((!s->ref && s->hdr->ref_base_id < 0)
|| memcmp(digest, s->hdr->md5, 16) != 0) {
char M[33];
- fprintf(stderr, "ERROR: md5sum reference mismatch for ref "
- "%d pos %d..%d\n", ref_id, s->ref_start, s->ref_end);
- fprintf(stderr, "CRAM: %s\n", md5_print(s->hdr->md5, M));
- fprintf(stderr, "Ref : %s\n", md5_print(digest, M));
+ hts_log_error("MD5 checksum reference mismatch for ref %d pos %d..%d",
+ ref_id, s->ref_start, s->ref_end);
+ hts_log_error("CRAM: %s", md5_print(s->hdr->md5, M));
+ hts_log_error("Ref : %s", md5_print(digest, M));
return -1;
}
}
@@ -2468,7 +2465,7 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
cr->ref_id = ref_id; // Forced constant in CRAM 1.0
}
if (cr->ref_id < -1 || cr->ref_id >= bfd->nref) {
- fprintf(stderr, "Requested unknown reference ID %d\n", cr->ref_id);
+ hts_log_error("Requested unknown reference ID %d", cr->ref_id);
return -1;
}
@@ -2479,7 +2476,7 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
(char *)&cr->len, &out_sz);
if (r) return r;
if (cr->len < 0) {
- fprintf(stderr, "Read has negative length\n");
+ hts_log_error("Read has negative length");
return -1;
}
}
@@ -2666,8 +2663,7 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
if (!(bf & BAM_FUNMAP)) {
if ((ds & CRAM_AP) && cr->apos <= 0) {
- fprintf(stderr,
- "Read has alignment position %d but no unmapped flag\n",
+ hts_log_error("Read has alignment position %d but no unmapped flag",
cr->apos);
return -1;
}
@@ -2745,11 +2741,11 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
// As we grow blocks we overallocate by up to 50%. So shrink
// back to their final sizes here.
//
-// fprintf(stderr, "%d %d // %d %d // %d %d // %d %d\n",
-// (int)s->seqs_blk->byte, (int)s->seqs_blk->alloc,
-// (int)s->qual_blk->byte, (int)s->qual_blk->alloc,
-// (int)s->name_blk->byte, (int)s->name_blk->alloc,
-// (int)s->aux_blk->byte, (int)s->aux_blk->alloc);
+ //fprintf(stderr, "%d %d // %d %d // %d %d // %d %d\n",
+ // (int)s->seqs_blk->byte, (int)s->seqs_blk->alloc,
+ // (int)s->qual_blk->byte, (int)s->qual_blk->alloc,
+ // (int)s->name_blk->byte, (int)s->name_blk->alloc,
+ // (int)s->aux_blk->byte, (int)s->aux_blk->alloc);
BLOCK_RESIZE_EXACT(s->seqs_blk, BLOCK_SIZE(s->seqs_blk)+1);
BLOCK_RESIZE_EXACT(s->qual_blk, BLOCK_SIZE(s->qual_blk)+1);
BLOCK_RESIZE_EXACT(s->name_blk, BLOCK_SIZE(s->name_blk)+1);
@@ -3106,7 +3102,7 @@ static cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp) {
if (cram_decode_slice_mt(fd, c, s, fd->header) != 0) {
// if (cram_decode_slice(fd, c, s, fd->header) != 0) {
- fprintf(stderr, "Failure to decode slice\n");
+ hts_log_error("Failure to decode slice");
cram_free_slice(s);
c->slice = NULL;
return NULL;
@@ -3125,9 +3121,9 @@ static cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp) {
hts_tpool_result *res;
cram_decode_job *j;
-// fprintf(stderr, "Thread pool len = %d, %d\n",
-// hts_tpool_results_queue_len(fd->rqueue),
-// hts_tpool_results_queue_sz(fd->rqueue));
+ //fprintf(stderr, "Thread pool len = %d, %d\n",
+ // hts_tpool_results_queue_len(fd->rqueue),
+ // hts_tpool_results_queue_sz(fd->rqueue));
if (fd->ooc && hts_tpool_process_empty(fd->rqueue))
return NULL;
@@ -3135,7 +3131,7 @@ static cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp) {
res = hts_tpool_next_result_wait(fd->rqueue);
if (!res || !hts_tpool_result_data(res)) {
- fprintf(stderr, "hts_tpool_next_result failure\n");
+ hts_log_error("Call to hts_tpool_next_result failed");
return NULL;
}
@@ -3144,7 +3140,7 @@ static cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp) {
s = j->s;
if (j->exit_code != 0) {
- fprintf(stderr, "Slice decode failure\n");
+ hts_log_error("Slice decode failure");
fd->eof = 0;
hts_tpool_delete_result(res, 1);
return NULL;
diff --git a/cram/cram_encode.c b/cram/cram_encode.c
index d7f08f1..2b06b58 100644
--- a/cram/cram_encode.c
+++ b/cram/cram_encode.c
@@ -239,7 +239,7 @@ cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c,
}
default:
- fprintf(stderr, "Unknown preservation key '%.2s'\n", key);
+ hts_log_warning("Unknown preservation key '%.2s'", key);
break;
}
@@ -499,9 +499,7 @@ cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c,
itf8_put_blk(cb, mc);
BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map));
- if (fd->verbose)
- fprintf(stderr, "Wrote compression block header in %d bytes\n",
- (int)BLOCK_SIZE(cb));
+ hts_log_info("Wrote compression block header in %d bytes", (int)BLOCK_SIZE(cb));
BLOCK_UPLEN(cb);
@@ -746,8 +744,7 @@ static int cram_encode_slice_read(cram_fd *fd,
default:
- fprintf(stderr, "unhandled feature code %c\n",
- f->X.code);
+ hts_log_error("Unhandled feature code %c", f->X.code);
return -1;
}
}
@@ -1278,7 +1275,7 @@ int cram_encode_container(cram_fd *fd, cram_container *c) {
ref = cram_get_ref(fd, bam_ref(b), 1, 0);
if (!ref && bam_ref(b) >= 0) {
- fprintf(stderr, "Failed to load reference #%d\n", bam_ref(b));
+ hts_log_error("Failed to load reference #%d", bam_ref(b));
return -1;
}
if ((c->ref_id = bam_ref(b)) >= 0) {
@@ -1322,8 +1319,7 @@ int cram_encode_container(cram_fd *fd, cram_container *c) {
cram_ref_decr(fd->refs, c->ref_seq_id);
if (!cram_get_ref(fd, bam_ref(b), 1, 0)) {
- fprintf(stderr, "Failed to load reference #%d\n",
- bam_ref(b));
+ hts_log_error("Failed to load reference #%d", bam_ref(b));
return -1;
}
@@ -1407,8 +1403,7 @@ int cram_encode_container(cram_fd *fd, cram_container *c) {
multi_ref = c->stats[DS_RI]->nvals > 1;
if (multi_ref) {
- if (fd->verbose)
- fprintf(stderr, "Multi-ref container\n");
+ hts_log_info("Multi-ref container");
c->ref_seq_id = -2;
c->ref_seq_start = 0;
c->ref_seq_span = 0;
@@ -1448,7 +1443,7 @@ int cram_encode_container(cram_fd *fd, cram_container *c) {
h->codecs[DS_CF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_CF]),
c->stats[DS_CF], E_INT, NULL,
fd->version);
-// fprintf(stderr, "=== RN ===\n");
+ //fprintf(stderr, "=== RN ===\n");
// h->codecs[DS_RN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RN]),
// c->stats[DS_RN], E_BYTE_ARRAY, NULL,
// fd->version);
@@ -1642,8 +1637,7 @@ int cram_encode_container(cram_fd *fd, cram_container *c) {
/* Encode slices */
for (i = 0; i < c->curr_slice; i++) {
- if (fd->verbose)
- fprintf(stderr, "Encode slice %d\n", i);
+ hts_log_info("Encode slice %d", i);
if (cram_encode_slice(fd, c, h, c->slices[i]) != 0)
return -1;
@@ -1935,7 +1929,7 @@ static int cram_add_insertion(cram_container *c, cram_slice *s, cram_record *r,
}
/*
- * Encodes auxiliary data.
+ * Encodes auxiliary data, CRAM 1.0 format.
* Returns the read-group parsed out of the BAM aux fields on success
* NULL on failure or no rg present (FIXME)
*/
@@ -1970,7 +1964,7 @@ static char *cram_encode_aux_1_0(cram_fd *fd, bam_seq_t *b, cram_container *c,
case 'A': case 'C': case 'c': aux+=4; break;
case 'I': case 'i': case 'f': aux+=7; break;
default:
- fprintf(stderr, "Unhandled type code for NM tag\n");
+ hts_log_error("Unhandled type code for NM tag");
return NULL;
}
continue;
@@ -2040,10 +2034,8 @@ static char *cram_encode_aux_1_0(cram_fd *fd, bam_seq_t *b, cram_container *c,
blen = 4*count;
break;
default:
- fprintf(stderr, "Unknown sub-type '%c' for aux type 'B'\n",
- type);
+ hts_log_error("Unknown sub-type '%c' for aux type 'B'", type);
return NULL;
-
}
tmp += itf8_put(tmp, blen+5);
@@ -2058,7 +2050,7 @@ static char *cram_encode_aux_1_0(cram_fd *fd, bam_seq_t *b, cram_container *c,
break;
}
default:
- fprintf(stderr, "Unknown aux type '%c'\n", aux[2]);
+ hts_log_error("Unknown aux type '%c'", aux[2]);
return NULL;
}
}
@@ -2117,7 +2109,7 @@ static char *cram_encode_aux(cram_fd *fd, bam_seq_t *b, cram_container *c,
case 'S': case 's': aux+=5; break;
case 'I': case 'i': case 'f': aux+=7; break;
default:
- fprintf(stderr, "Unhandled type code for NM tag\n");
+ hts_log_error("Unhandled type code for NM tag");
return NULL;
}
continue;
@@ -2243,8 +2235,7 @@ static char *cram_encode_aux(cram_fd *fd, bam_seq_t *b, cram_container *c,
}
default:
- fprintf(stderr, "Unsupported SAM aux type '%c'\n",
- aux[2]);
+ hts_log_error("Unsupported SAM aux type '%c'", aux[2]);
c = NULL;
}
@@ -2355,10 +2346,8 @@ static char *cram_encode_aux(cram_fd *fd, bam_seq_t *b, cram_container *c,
blen = 4*count;
break;
default:
- fprintf(stderr, "Unknown sub-type '%c' for aux type 'B'\n",
- type);
+ hts_log_error("Unknown sub-type '%c' for aux type 'B'", type);
return NULL;
-
}
blen += 5; // sub-type & length
@@ -2368,7 +2357,7 @@ static char *cram_encode_aux(cram_fd *fd, bam_seq_t *b, cram_container *c,
break;
}
default:
- fprintf(stderr, "Unknown aux type '%c'\n", aux[2]);
+ hts_log_error("Unknown aux type '%c'", aux[2]);
return NULL;
}
tm->blk->m = tm->m;
@@ -2452,10 +2441,9 @@ static cram_container *cram_next_container(cram_fd *fd, bam_seq_t *b) {
if (c->curr_slice == c->max_slice ||
(bam_ref(b) != c->curr_ref && !c->multi_seq)) {
c->ref_seq_span = fd->last_base - c->ref_seq_start + 1;
- if (fd->verbose)
- fprintf(stderr, "Flush container %d/%d..%d\n",
- c->ref_seq_id, c->ref_seq_start,
- c->ref_seq_start + c->ref_seq_span -1);
+ hts_log_info("Flush container %d/%d..%d",
+ c->ref_seq_id, c->ref_seq_start,
+ c->ref_seq_start + c->ref_seq_span -1);
/* Encode slices */
if (fd->pool) {
@@ -2697,8 +2685,7 @@ static int process_one_read(cram_fd *fd, cram_container *c,
char *rp = &ref[apos];
char *qp = &qual[spos];
if (end > cr->len) {
- fprintf(stderr, "CIGAR and query sequence are of "
- "different length\n");
+ hts_log_error("CIGAR and query sequence are of different length");
return -1;
}
for (l = 0; l < end; l++) {
@@ -2822,13 +2809,12 @@ static int process_one_read(cram_fd *fd, cram_container *c,
break;
default:
- fprintf(stderr, "Unknown CIGAR op code %d\n", cig_op);
+ hts_log_error("Unknown CIGAR op code %d", cig_op);
return -1;
}
}
if (cr->len && spos != cr->len) {
- fprintf(stderr, "CIGAR and query sequence are of different "
- "length\n");
+ hts_log_error("CIGAR and query sequence are of different length");
return -1;
}
fake_qual = spos;
@@ -3094,8 +3080,8 @@ int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b) {
if (fd->multi_seq == -1 && c->curr_rec < c->max_rec/4+10 &&
fd->last_slice && fd->last_slice < c->max_rec/4+10 &&
!fd->embed_ref) {
- if (fd->verbose && !c->multi_seq)
- fprintf(stderr, "Multi-ref enabled for this container\n");
+ if (!c->multi_seq)
+ hts_log_info("Multi-ref enabled for this container");
multi_seq = 1;
}
diff --git a/cram/cram_external.c b/cram/cram_external.c
index be1dc7b..8d87f58 100644
--- a/cram/cram_external.c
+++ b/cram/cram_external.c
@@ -301,8 +301,7 @@ int cram_transcode_rg(cram_fd *in, cram_fd *out,
cram_block_compression_hdr *ch;
if (nrg != 1) {
- fprintf(stderr, "[%s] ERROR: not implemented for nrg != 1\n",
- __func__);
+ hts_log_error("CRAM transcode supports only a single RG");
return -2;
}
diff --git a/cram/cram_index.c b/cram/cram_index.c
index 53f1788..9f65046 100644
--- a/cram/cram_index.c
+++ b/cram/cram_index.c
@@ -208,7 +208,7 @@ int cram_index_load(cram_fd *fd, const char *fn, const char *fn_idx) {
// Parse it line at a time
- do {
+ while (pos < kstr.l) {
/* 1.1 layout */
if (kget_int32(&kstr, &pos, &e.refid) == -1)
goto fail;
@@ -232,7 +232,7 @@ int cram_index_load(cram_fd *fd, const char *fn, const char *fn_idx) {
//printf("%d/%d..%d\n", e.refid, e.start, e.end);
if (e.refid < -1) {
- fprintf(stderr, "Malformed index file, refid %d\n", e.refid);
+ hts_log_error("Malformed index file, refid %d", e.refid);
goto fail;
}
@@ -292,7 +292,7 @@ int cram_index_load(cram_fd *fd, const char *fn, const char *fn_idx) {
while (pos < kstr.l && kstr.s[pos] != '\n')
pos++;
pos++;
- } while (pos < kstr.l);
+ }
free(idx_stack);
free(kstr.s);
diff --git a/cram/cram_io.c b/cram/cram_io.c
index 8e2f1ea..d512734 100644
--- a/cram/cram_io.c
+++ b/cram/cram_io.c
@@ -554,7 +554,7 @@ char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size) {
//err = inflateInit(&s);
err = inflateInit2(&s, 15 + 32);
if (err != Z_OK) {
- fprintf(stderr, "zlib inflateInit error: %s\n", s.msg);
+ hts_log_error("Call to zlib inflateInit failed: %s", s.msg);
free(data);
return NULL;
}
@@ -570,7 +570,7 @@ char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size) {
break;
if (err != Z_OK) {
- fprintf(stderr, "zlib inflate error: %s\n", s.msg);
+ hts_log_error("Call to zlib inflate failed: %s", s.msg);
if (data)
free(data);
return NULL;
@@ -618,7 +618,7 @@ static char *zlib_mem_deflate(char *data, size_t size, size_t *cdata_size,
err = deflateInit2(&s, level, Z_DEFLATED, 15|16, 9, strat);
if (err != Z_OK) {
- fprintf(stderr, "zlib deflateInit2 error: %s\n", s.msg);
+ hts_log_error("Call to zlib deflateInit2 failed: %s", s.msg);
return NULL;
}
@@ -627,23 +627,23 @@ static char *zlib_mem_deflate(char *data, size_t size, size_t *cdata_size,
s.next_out = &cdata[cdata_pos];
s.avail_out = cdata_alloc - cdata_pos;
if (cdata_alloc - cdata_pos <= 0) {
- fprintf(stderr, "Deflate produced larger output than expected. Abort\n");
+ hts_log_error("Deflate produced larger output than expected");
return NULL;
}
err = deflate(&s, Z_NO_FLUSH);
cdata_pos = cdata_alloc - s.avail_out;
if (err != Z_OK) {
- fprintf(stderr, "zlib deflate error: %s\n", s.msg);
+ hts_log_error("Call to zlib deflate failed: %s", s.msg);
break;
}
}
if (deflate(&s, Z_FINISH) != Z_STREAM_END) {
- fprintf(stderr, "zlib deflate error: %s\n", s.msg);
+ hts_log_error("Call to zlib deflate failed: %s", s.msg);
}
*cdata_size = s.total_out;
if (deflateEnd(&s) != Z_OK) {
- fprintf(stderr, "zlib deflate error: %s\n", s.msg);
+ hts_log_error("Call to zlib deflate failed: %s", s.msg);
}
return (char *)cdata;
}
@@ -705,7 +705,7 @@ static char *lzma_mem_inflate(char *cdata, size_t csize, size_t *size) {
r = lzma_code(&strm, LZMA_RUN);
if (LZMA_OK != r && LZMA_STREAM_END != r) {
- fprintf(stderr, "[E::%s] LZMA decode failure (error %d)\n", __func__, r);
+ hts_log_error("LZMA decode failure (error %d)", r);
return NULL;
}
@@ -718,7 +718,7 @@ static char *lzma_mem_inflate(char *cdata, size_t csize, size_t *size) {
/* finish up any unflushed data; necessary? */
r = lzma_code(&strm, LZMA_FINISH);
if (r != LZMA_OK && r != LZMA_STREAM_END) {
- fprintf(stderr, "r=%d\n", r);
+ hts_log_error("Call to lzma_code failed with error %d", r);
return NULL;
}
@@ -786,11 +786,14 @@ cram_block *cram_read_block(cram_fd *fd) {
if (-1 == itf8_decode_crc(fd, &b->comp_size, &crc)) { free(b); return NULL; }
if (-1 == itf8_decode_crc(fd, &b->uncomp_size, &crc)) { free(b); return NULL; }
- // fprintf(stderr, " method %d, ctype %d, cid %d, csize %d, ucsize %d\n",
+ //fprintf(stderr, " method %d, ctype %d, cid %d, csize %d, ucsize %d\n",
// b->method, b->content_type, b->content_id, b->comp_size, b->uncomp_size);
if (b->method == RAW) {
- if (b->uncomp_size < 0) { free(b); return NULL; }
+ if (b->uncomp_size < 0 || b->comp_size != b->uncomp_size) {
+ free(b);
+ return NULL;
+ }
b->alloc = b->uncomp_size;
if (!(b->data = malloc(b->uncomp_size))){ free(b); return NULL; }
if (b->uncomp_size != hread(fd->fp, b->data, b->uncomp_size)) {
@@ -817,7 +820,7 @@ cram_block *cram_read_block(cram_fd *fd) {
crc = crc32(crc, b->data ? b->data : (uc *)"", b->alloc);
if (crc != b->crc32) {
- fprintf(stderr, "Block CRC32 failure\n");
+ hts_log_error("Block CRC32 failure");
free(b->data);
free(b);
return NULL;
@@ -966,8 +969,7 @@ int cram_uncompress_block(cram_block *b) {
}
#else
case BZIP2:
- fprintf(stderr, "Bzip2 compression is not compiled into this "
- "version.\nPlease rebuild and try again.\n");
+ hts_log_error("Bzip2 compression is not compiled into this version. Please rebuild and try again");
return -1;
#endif
@@ -985,8 +987,7 @@ int cram_uncompress_block(cram_block *b) {
break;
#else
case LZMA:
- fprintf(stderr, "Lzma compression is not compiled into this "
- "version.\nPlease rebuild and try again.\n");
+ hts_log_error("Lzma compression is not compiled into this version. Please rebuild and try again");
return -1;
break;
#endif
@@ -1404,7 +1405,7 @@ int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
comp = cram_compress_by_method((char *)b->data, b->uncomp_size,
&comp_size, GZIP, level, Z_FILTERED);
if (!comp) {
- fprintf(stderr, "Compression failed!\n");
+ hts_log_error("Compression failed");
return -1;
}
free(b->data);
@@ -1413,10 +1414,9 @@ int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
b->method = GZIP;
}
- if (fd->verbose)
- fprintf(stderr, "Compressed block ID %d from %d to %d by method %s\n",
- b->content_id, b->uncomp_size, b->comp_size,
- cram_block_method2str(b->method));
+ hts_log_info("Compressed block ID %d from %d to %d by method %s",
+ b->content_id, b->uncomp_size, b->comp_size,
+ cram_block_method2str(b->method));
if (b->method == RANS1)
b->method = RANS0; // Spec just has RANS (not 0/1) with auto-sensing
@@ -1446,7 +1446,7 @@ char *cram_block_method2str(enum cram_block_method m) {
case RANS0: return "RANS0";
case RANS1: return "RANS1";
case GZIP_RLE: return "GZIP_RLE";
- case ERROR: break;
+ case BM_ERROR: break;
}
return "?";
}
@@ -1594,7 +1594,7 @@ static BGZF *bgzf_open_ref(char *fn, char *mode, int is_md5) {
}
if (fp->is_compressed == 1 && bgzf_index_load(fp, fn, ".gzi") < 0) {
- fprintf(stderr, "Unable to load .gzi index '%s.gzi'\n", fn);
+ hts_log_error("Unable to load .gzi index '%s.gzi'", fn);
bgzf_close(fp);
return NULL;
}
@@ -1780,8 +1780,7 @@ static void sanitise_SQ_lines(cram_fd *fd) {
// Should we also check MD5sums here to ensure the correct
// reference was given?
- fprintf(stderr, "WARNING: Header @SQ length mismatch for "
- "ref %s, %d vs %d\n",
+ hts_log_warning("Header @SQ length mismatch for ref %s, %d vs %d",
r->name, fd->header->ref[i].len, (int)r->length);
// Fixing the parsed @SQ header will make MD:Z: strings work
@@ -1816,8 +1815,7 @@ int refs2id(refs_t *r, SAM_hdr *h) {
if (k != kh_end(r->h_meta)) {
r->ref_id[i] = kh_val(r->h_meta, k);
} else {
- fprintf(stderr, "Unable to find ref name '%s'\n",
- h->ref[i].name);
+ hts_log_warning("Unable to find ref name '%s'", h->ref[i].name);
}
}
@@ -2025,8 +2023,7 @@ static int cram_populate_ref(cram_fd *fd, int id, ref_entry *r) {
mFILE *mf;
int local_path = 0;
- if (fd->verbose)
- fprintf(stderr, "cram_populate_ref on fd %p, id %d\n", (void *)fd, id);
+ hts_log_info("Running cram_populate_ref on fd %p, id %d", (void *)fd, id);
cache_root[0] = '\0';
@@ -2042,8 +2039,7 @@ static int cram_populate_ref(cram_fd *fd, int id, ref_entry *r) {
snprintf(cache_root, PATH_MAX, "%s%s/hts-ref", base, extra);
snprintf(cache,PATH_MAX, "%s%s/hts-ref/%%2s/%%2s/%%s", base, extra);
local_cache = cache;
- if (fd->verbose)
- fprintf(stderr, "Populating local cache: %s\n", local_cache);
+ hts_log_info("Populating local cache: %s", local_cache);
}
}
@@ -2056,8 +2052,7 @@ static int cram_populate_ref(cram_fd *fd, int id, ref_entry *r) {
if (!(tag = sam_hdr_find_key(fd->header, ty, "M5", NULL)))
goto no_M5;
- if (fd->verbose)
- fprintf(stderr, "Querying ref %s\n", tag->str+3);
+ hts_log_info("Querying ref %s", tag->str+3);
/* Use cache if available */
if (local_cache && *local_cache) {
@@ -2168,8 +2163,7 @@ static int cram_populate_ref(cram_fd *fd, int id, ref_entry *r) {
}
expand_cache_path(path, local_cache, tag->str+3);
- if (fd->verbose)
- fprintf(stderr, "Writing cache file '%s'\n", path);
+ hts_log_info("Writing cache file '%s'", path);
mkdir_prefix(path, 01777);
do {
@@ -2203,7 +2197,7 @@ static int cram_populate_ref(cram_fd *fd, int id, ref_entry *r) {
hts_md5_hex(md5_buf2, md5_buf1);
if (strncmp(tag->str+3, md5_buf2, 32) != 0) {
- fprintf(stderr, "[E::%s] mismatching md5sum for downloaded reference.\n", __func__);
+ hts_log_error("Mismatching md5sum for downloaded reference");
hclose_abruptly(fp);
unlink(path_tmp);
return -1;
@@ -2330,7 +2324,7 @@ static char *load_ref_portion(BGZF *fp, ref_entry *e, int start, int end) {
cp_to = cp+j;
if (cp_to - seq != end-start+1) {
- fprintf(stderr, "Malformed reference file?\n");
+ hts_log_error("Malformed reference file");
free(seq);
return NULL;
}
@@ -2463,19 +2457,19 @@ char *cram_get_ref(cram_fd *fd, int id, int start, int end) {
/* Sanity checking: does this ID exist? */
if (id >= fd->refs->nref) {
- fprintf(stderr, "No reference found for id %d\n", id);
+ hts_log_error("No reference found for id %d", id);
pthread_mutex_unlock(&fd->ref_lock);
return NULL;
}
if (!fd->refs || !fd->refs->ref_id[id]) {
- fprintf(stderr, "No reference found for id %d\n", id);
+ hts_log_error("No reference found for id %d", id);
pthread_mutex_unlock(&fd->ref_lock);
return NULL;
}
if (!(r = fd->refs->ref_id[id])) {
- fprintf(stderr, "No reference found for id %d\n", id);
+ hts_log_error("No reference found for id %d", id);
pthread_mutex_unlock(&fd->ref_lock);
return NULL;
}
@@ -2495,7 +2489,7 @@ char *cram_get_ref(cram_fd *fd, int id, int start, int end) {
pthread_mutex_lock(&fd->refs->lock);
if (r->length == 0) {
if (cram_populate_ref(fd, id, r) == -1) {
- fprintf(stderr, "Failed to populate reference for id %d\n", id);
+ hts_log_error("Failed to populate reference for id %d", id);
pthread_mutex_unlock(&fd->refs->lock);
pthread_mutex_unlock(&fd->ref_lock);
return NULL;
@@ -2874,7 +2868,7 @@ cram_container *cram_read_container(cram_fd *fd) {
rd+=4;
if (crc != c->crc32) {
- fprintf(stderr, "Container header CRC32 failure\n");
+ hts_log_error("Container header CRC32 failure");
cram_free_container(c);
return NULL;
}
@@ -3094,7 +3088,7 @@ void *cram_flush_thread(void *arg) {
/* Encode the container blocks and generate compression header */
if (0 != cram_encode_container(j->fd, j->c)) {
- fprintf(stderr, "cram_encode_container failed\n");
+ hts_log_error("Call to cram_encode_container failed");
return NULL;
}
@@ -3424,13 +3418,13 @@ cram_slice *cram_read_slice(cram_fd *fd) {
break;
default:
- fprintf(stderr, "Unexpected block of type %s\n",
+ hts_log_error("Unexpected block of type %s",
cram_content_type2str(b->content_type));
goto err;
}
if (s->hdr->num_blocks < 1) {
- fprintf(stderr, "Slice does not include any data blocks.\n");
+ hts_log_error("Slice does not include any data blocks");
goto err;
}
@@ -3514,8 +3508,7 @@ cram_file_def *cram_read_file_def(cram_fd *fd) {
}
if (def->major_version > 3) {
- fprintf(stderr, "CRAM version number mismatch\n"
- "Expected 1.x, 2.x or 3.x, got %d.%d\n",
+ hts_log_error("CRAM version number mismatch. Expected 1.x, 2.x or 3.x, got %d.%d",
def->major_version, def->minor_version);
free(def);
return NULL;
@@ -3576,7 +3569,8 @@ SAM_hdr *cram_read_SAM_hdr(cram_fd *fd) {
} else {
cram_container *c = cram_read_container(fd);
cram_block *b;
- int i, len;
+ int i;
+ int64_t len;
if (!c)
return NULL;
@@ -3663,7 +3657,11 @@ SAM_hdr *cram_read_SAM_hdr(cram_fd *fd) {
* Out must be at least PATH_MAX bytes long.
*/
static void full_path(char *out, char *in) {
- if (*in == '/') {
+ size_t in_l = strlen(in);
+ if (*in == '/' ||
+ // Windows paths
+ (in_l > 3 && toupper(*in) >= 'A' && toupper(*in) <= 'Z' &&
+ in[1] == ':' && (in[2] == '/' || in[2] == '\\'))) {
strncpy(out, in, PATH_MAX);
out[PATH_MAX-1] = 0;
} else {
@@ -4073,7 +4071,6 @@ cram_fd *cram_dopen(hFILE *fp, const char *filename, const char *mode) {
fd->ref = NULL;
fd->decode_md = 0;
- fd->verbose = 0;
fd->seqs_per_slice = SEQS_PER_SLICE;
fd->bases_per_slice = BASES_PER_SLICE;
fd->slices_per_container = SLICE_PER_CNT;
@@ -4341,7 +4338,6 @@ int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args) {
break;
case CRAM_OPT_VERBOSITY:
- fd->verbose = va_arg(args, int);
break;
case CRAM_OPT_SEQS_PER_SLICE:
@@ -4412,14 +4408,13 @@ int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args) {
int major, minor;
char *s = va_arg(args, char *);
if (2 != sscanf(s, "%d.%d", &major, &minor)) {
- fprintf(stderr, "Malformed version string %s\n", s);
+ hts_log_error("Malformed version string %s", s);
return -1;
}
if (!((major == 1 && minor == 0) ||
(major == 2 && (minor == 0 || minor == 1)) ||
(major == 3 && minor == 0))) {
- fprintf(stderr, "Unknown version string; "
- "use 1.0, 2.0, 2.1 or 3.0\n");
+ hts_log_error("Unknown version string; use 1.0, 2.0, 2.1 or 3.0");
errno = EINVAL;
return -1;
}
@@ -4436,7 +4431,7 @@ int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args) {
case CRAM_OPT_NTHREADS: {
int nthreads = va_arg(args, int);
- if (nthreads > 1) {
+ if (nthreads >= 1) {
if (!(fd->pool = hts_tpool_init(nthreads)))
return -1;
@@ -4479,7 +4474,7 @@ int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args) {
break;
default:
- fprintf(stderr, "Unknown CRAM option code %d\n", opt);
+ hts_log_error("Unknown CRAM option code %d", opt);
errno = EINVAL;
return -1;
}
diff --git a/cram/cram_stats.c b/cram/cram_stats.c
index e913055..5fb63d4 100644
--- a/cram/cram_stats.c
+++ b/cram/cram_stats.c
@@ -87,15 +87,16 @@ void cram_stats_del(cram_stats *st, int32_t val) {
if (--kh_val(st->h, k) == 0)
kh_del(m_i2i, st->h, k);
} else {
- fprintf(stderr, "Failed to remove val %d from cram_stats\n", val);
+ hts_log_warning("Failed to remove val %d from cram_stats", val);
st->nsamp++;
}
} else {
- fprintf(stderr, "Failed to remove val %d from cram_stats\n", val);
+ hts_log_warning("Failed to remove val %d from cram_stats", val);
st->nsamp++;
}
}
+#if DEBUG_CRAM_STATS
void cram_stats_dump(cram_stats *st) {
int i;
fprintf(stderr, "cram_stats:\n");
@@ -114,6 +115,7 @@ void cram_stats_dump(cram_stats *st) {
}
}
}
+#endif
/*
* Computes entropy from integer frequencies for various encoding methods and
@@ -128,7 +130,9 @@ enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st) {
int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX;
int *vals = NULL, *freqs = NULL, vals_alloc = 0;
- //cram_stats_dump(st);
+#if DEBUG_CRAM_STATS
+ cram_stats_dump(st);
+#endif
/* Count number of unique symbols */
for (nvals = i = 0; i < MAX_STAT_VAL; i++) {
diff --git a/cram/cram_structs.h b/cram/cram_structs.h
index 5165605..a50507e 100644
--- a/cram/cram_structs.h
+++ b/cram/cram_structs.h
@@ -192,7 +192,7 @@ typedef struct cram_file_def {
struct cram_slice;
enum cram_block_method {
- ERROR = -1,
+ BM_ERROR = -1,
RAW = 0,
GZIP = 1,
BZIP2 = 2,
@@ -706,7 +706,6 @@ typedef struct cram_fd {
// options
int decode_md; // Whether to export MD and NM tags
- int verbose;
int seqs_per_slice;
int bases_per_slice;
int slices_per_container;
diff --git a/cram/mFILE.c b/cram/mFILE.c
index 77368b4..dbb8b0a 100644
--- a/cram/mFILE.c
+++ b/cram/mFILE.c
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <stdarg.h>
+#include "hts_internal.h"
#include "cram/os.h"
#include "cram/mFILE.h"
@@ -307,7 +308,7 @@ mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) {
mf = mfcreate(NULL, 0);
if (NULL == mf) return NULL;
} else {
- fprintf(stderr, "Must specify either r, w or a for mode\n");
+ hts_log_error("Must specify either r, w or a for mode");
return NULL;
}
mf->fp = fp;
diff --git a/cram/mFILE.h b/cram/mFILE.h
index 05a3a88..a6b067a 100644
--- a/cram/mFILE.h
+++ b/cram/mFILE.h
@@ -48,6 +48,11 @@ typedef struct {
size_t flush_pos;
} mFILE;
+// Work around a clash with winuser.h
+#ifdef MF_APPEND
+# undef MF_APPEND
+#endif
+
#define MF_READ 1
#define MF_WRITE 2
#define MF_APPEND 4
diff --git a/cram/os.h b/cram/os.h
index b1fdca6..ba0a207 100644
--- a/cram/os.h
+++ b/cram/os.h
@@ -205,27 +205,13 @@ static inline uint16_t le_int2(uint16_t x) {
* Microsoft Windows running MinGW
*/
#if defined(__MINGW32__)
-/* #define mkdir(filename,mode) mkdir((filename)) */
+#include <io.h>
+#define mkdir(filename,mode) mkdir((filename))
#define sysconf(x) 512
-#define ftruncate(fd,len) _chsize(fd,len)
+#ifndef ftruncate
+# define ftruncate(fd,len) _chsize(fd,len)
+#endif
#endif
-
-/* Generic WIN32 API issues */
-#ifdef _WIN32
-# ifndef HAVE_FSEEKO
-# if __MSVCRT_VERSION__ >= 0x800
- /* if you have MSVCR80 installed then you can use these definitions: */
-# define off_t __int64
-# define fseeko _fseeki64
-# define ftello _ftelli64
-# else
- /* otherwise we're stuck with 32-bit file support */
-# define off_t long
-# define fseeko fseek
-# define ftello ftell
-# endif
-# endif /* !HAVE_FSEEKO */
-#endif /* _WIN32 */
#ifdef __cplusplus
}
diff --git a/cram/sam_header.c b/cram/sam_header.c
index e2cb55d..288a356 100644
--- a/cram/sam_header.c
+++ b/cram/sam_header.c
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <string.h>
#include <assert.h>
+#include "hts_internal.h"
#include "cram/sam_header.h"
#include "cram/string_alloc.h"
@@ -41,7 +42,7 @@ static void sam_hdr_error(char *msg, char *line, int len, int lno) {
for (j = 0; j < len && line[j] != '\n'; j++)
;
- fprintf(stderr, "%s at line %d: \"%.*s\"\n", msg, lno, j, line);
+ hts_log_error("%s at line %d: \"%.*s\"", msg, lno, j, line);
}
void sam_hdr_dump(SAM_hdr *hdr) {
@@ -775,8 +776,7 @@ static enum sam_sort_order sam_hdr_parse_sort_order(SAM_hdr *hdr) {
else if (strcmp(tag->str+3, "coordinate") == 0)
so = ORDER_COORD;
else if (strcmp(tag->str+3, "unknown") != 0)
- fprintf(stderr, "Unknown sort order field: %s\n",
- tag->str+3);
+ hts_log_error("Unknown sort order field: %s", tag->str+3);
}
}
}
diff --git a/errmod.c b/errmod.c
index ee4823b..bb9fc28 100644
--- a/errmod.c
+++ b/errmod.c
@@ -28,6 +28,8 @@ DEALINGS IN THE SOFTWARE. */
#include <math.h>
#include "htslib/hts.h"
#include "htslib/ksort.h"
+#include "htslib/hts_os.h" // for drand48
+
KSORT_INIT_GENERIC(uint16_t)
struct errmod_t {
diff --git a/hfile.c b/hfile.c
index 57e2b89..b0c5eba 100644
--- a/hfile.c
+++ b/hfile.c
@@ -526,6 +526,18 @@ static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
n = fp->is_socket? send(fp->fd, buffer, nbytes, 0)
: write(fp->fd, buffer, nbytes);
} while (n < 0 && errno == EINTR);
+#ifdef _WIN32
+ // On windows we have no SIGPIPE. Instead write returns
+ // EINVAL. We check for this and our fd being a pipe.
+ // If so, we raise SIGTERM instead of SIGPIPE. It's not
+ // ideal, but I think the only alternative is extra checking
+ // in every single piece of code.
+ if (n < 0 && errno == EINVAL &&
+ GetLastError() == ERROR_NO_DATA &&
+ GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) {
+ raise(SIGTERM);
+ }
+#endif
return n;
}
@@ -537,12 +549,13 @@ static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
static int fd_flush(hFILE *fpv)
{
- hFILE_fd *fp = (hFILE_fd *) fpv;
- int ret;
+ int ret = 0;
do {
#ifdef HAVE_FDATASYNC
+ hFILE_fd *fp = (hFILE_fd *) fpv;
ret = fdatasync(fp->fd);
-#else
+#elif defined(HAVE_FSYNC)
+ hFILE_fd *fp = (hFILE_fd *) fpv;
ret = fsync(fp->fd);
#endif
// Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
@@ -619,6 +632,11 @@ static hFILE *hopen_fd_fileuri(const char *url, const char *mode)
else if (strncmp(url, "file:///", 8) == 0) url += 7;
else { errno = EPROTONOSUPPORT; return NULL; }
+#ifdef _WIN32
+ // For cases like C:/foo
+ if (url[0] == '/' && url[2] == ':' && url[3] == '/') url++;
+#endif
+
return hopen_fd(url, mode);
}
@@ -876,7 +894,8 @@ static const struct hFILE_scheme_handler *find_scheme_handler(const char *s)
else if (s[i] == ':') break;
else return NULL;
- if (i == 0 || i >= sizeof scheme) return NULL;
+ // 1 byte schemes are likely windows C:/foo pathnames
+ if (i <= 1 || i >= sizeof scheme) return NULL;
scheme[i] = '\0';
pthread_mutex_lock(&plugins_lock);
diff --git a/hfile_internal.h b/hfile_internal.h
index 8ca7b57..b405746 100644
--- a/hfile_internal.h
+++ b/hfile_internal.h
@@ -42,11 +42,11 @@ extern "C" {
the buffer is beyond the new capacity.
@param fp The file stream
- @param bufsiz The size of the new bufsiz
+ @param bufsiz The size of the new buffer
@return Returns 0 on success, -1 on failure.
*/
-int hfile_set_blksize(hFILE *fp, size_t capacity);
+int hfile_set_blksize(hFILE *fp, size_t bufsiz);
struct BGZF;
/*!
@@ -172,6 +172,11 @@ extern int hfile_plugin_init_s3(struct hFILE_plugin *self);
/* This one is never built as a separate plugin. */
extern int hfile_plugin_init_net(struct hFILE_plugin *self);
+// Callback to allow headers to be set in http connections. Currently used
+// to allow s3 to renew tokens when seeking. Kept internal for now,
+// although we may consider exposing it in the API later.
+typedef int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
+
#ifdef __cplusplus
}
#endif
diff --git a/hfile_libcurl.c b/hfile_libcurl.c
index ea99aa7..9bab53f 100644
--- a/hfile_libcurl.c
+++ b/hfile_libcurl.c
@@ -28,7 +28,10 @@ DEALINGS IN THE SOFTWARE. */
#include <stdlib.h>
#include <string.h>
#include <errno.h>
-#include <sys/select.h>
+#include <pthread.h>
+#ifndef _WIN32
+# include <sys/select.h>
+#endif
#include "hfile_internal.h"
#ifdef ENABLE_PLUGINS
@@ -39,10 +42,24 @@ DEALINGS IN THE SOFTWARE. */
#include <curl/curl.h>
+// Curl-compatible header linked list
+typedef struct {
+ struct curl_slist *list;
+ unsigned int num;
+ unsigned int size;
+} hdrlist;
+
+typedef struct {
+ hdrlist fixed; // List of headers supplied at hopen()
+ hdrlist extra; // List of headers from callback
+ hts_httphdr_callback callback; // Callback to get more headers
+ void *callback_data; // Data to pass to callback
+} http_headers;
+
typedef struct {
hFILE base;
CURL *easy;
- struct curl_slist *headers;
+ CURLM *multi;
off_t file_size;
struct {
union { char *rd; const char *wr; } ptr;
@@ -53,6 +70,11 @@ typedef struct {
unsigned paused : 1; // callback tells us that it has paused transfer
unsigned closing : 1; // informs callback that hclose() has been invoked
unsigned finished : 1; // wait_perform() tells us transfer is complete
+ unsigned perform_again : 1;
+ unsigned is_read : 1; // Opened in read mode
+ unsigned can_seek : 1; // Can (attempt to) seek on this handle
+ int nrunning;
+ http_headers headers;
} hFILE_libcurl;
static int http_status_errno(int status)
@@ -178,16 +200,25 @@ static int multi_errno(CURLMcode errm)
static struct {
- CURLM *multi;
kstring_t useragent;
- int nrunning;
- unsigned perform_again : 1;
-} curl = { NULL, { 0, 0, NULL }, 0, 0 };
+ CURLSH *share;
+ pthread_mutex_t lock;
+} curl = { { 0, 0, NULL }, NULL, PTHREAD_MUTEX_INITIALIZER };
+
+static void share_lock(CURL *handle, curl_lock_data data,
+ curl_lock_access access, void *userptr) {
+ pthread_mutex_lock(&curl.lock);
+}
+
+static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
+ pthread_mutex_unlock(&curl.lock);
+}
+
static void libcurl_exit()
{
- (void) curl_multi_cleanup(curl.multi);
- curl.multi = NULL;
+ if (curl_share_cleanup(curl.share) == CURLSHE_OK)
+ curl.share = NULL;
free(curl.useragent.s);
curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
@@ -195,15 +226,98 @@ static void libcurl_exit()
curl_global_cleanup();
}
+static int append_header(hdrlist *hdrs, const char *data, int dup) {
+ if (hdrs->num == hdrs->size) {
+ unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i;
+ struct curl_slist *new_list = realloc(hdrs->list,
+ new_sz * sizeof(*new_list));
+ if (!new_list) return -1;
+ hdrs->size = new_sz;
+ hdrs->list = new_list;
+ for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i];
+ }
+ // Annoyingly, libcurl doesn't declare the char * as const...
+ hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data;
+ if (!hdrs->list[hdrs->num].data) return -1;
+ if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num];
+ hdrs->list[hdrs->num].next = NULL;
+ hdrs->num++;
+ return 0;
+}
+
+static void free_headers(hdrlist *hdrs, int completely) {
+ unsigned int i;
+ for (i = 0; i < hdrs->num; i++) {
+ free(hdrs->list[i].data);
+ hdrs->list[i].data = NULL;
+ hdrs->list[i].next = NULL;
+ }
+ hdrs->num = 0;
+ if (completely) {
+ free(hdrs->list);
+ hdrs->size = 0;
+ hdrs->list = NULL;
+ }
+}
+
+static struct curl_slist * get_header_list(hFILE_libcurl *fp) {
+ if (fp->headers.fixed.num > 0)
+ return &fp->headers.fixed.list[0];
+ if (fp->headers.extra.num > 0)
+ return &fp->headers.extra.list[0];
+ return 0;
+}
+
+static int add_callback_headers(hFILE_libcurl *fp) {
+ char **hdrs = NULL, **hdr;
+
+ if (!fp->headers.callback)
+ return 0;
+
+ // Get the headers from the callback
+ if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) {
+ return -1;
+ }
+
+ if (!hdrs) // No change
+ return 0;
+
+ // Remove any old callback headers
+ if (fp->headers.fixed.num > 0) {
+ // Unlink lists
+ fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
+ }
+ free_headers(&fp->headers.extra, 0);
+
+ // Convert to libcurl-suitable form
+ for (hdr = hdrs; *hdr; hdr++) {
+ if (append_header(&fp->headers.extra, *hdr, 0) < 0) {
+ goto cleanup;
+ }
+ }
+ for (hdr = hdrs; *hdr; hdr++) *hdr = NULL;
-static void process_messages()
+ if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) {
+ // Relink lists
+ fp->headers.fixed.list[fp->headers.fixed.num - 1].next
+ = &fp->headers.extra.list[0];
+ }
+ return 0;
+
+ cleanup:
+ while (hdr && *hdr) {
+ free(*hdr);
+ *hdr = NULL;
+ }
+ return -1;
+}
+
+static void process_messages(hFILE_libcurl *fp)
{
CURLMsg *msg;
int remaining;
- while ((msg = curl_multi_info_read(curl.multi, &remaining)) != NULL) {
- hFILE_libcurl *fp = NULL;
- curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, (char **) &fp);
+ while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) {
switch (msg->msg) {
case CURLMSG_DONE:
fp->finished = 1;
@@ -216,7 +330,7 @@ static void process_messages()
}
}
-static int wait_perform()
+static int wait_perform(hFILE_libcurl *fp)
{
fd_set rd, wr, ex;
int maxfd, nrunning;
@@ -226,18 +340,18 @@ static int wait_perform()
FD_ZERO(&rd);
FD_ZERO(&wr);
FD_ZERO(&ex);
- if (curl_multi_fdset(curl.multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
+ if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
maxfd = -1, timeout = 1000;
else if (maxfd < 0)
timeout = 100; // as recommended by curl_multi_fdset(3)
else {
- if (curl_multi_timeout(curl.multi, &timeout) != CURLM_OK)
+ if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK)
timeout = 1000;
else if (timeout < 0)
timeout = 10000; // as recommended by curl_multi_timeout(3)
}
- if (timeout > 0 && ! curl.perform_again) {
+ if (timeout > 0 && ! fp->perform_again) {
struct timeval tval;
tval.tv_sec = (timeout / 1000);
tval.tv_usec = (timeout % 1000) * 1000;
@@ -245,12 +359,12 @@ static int wait_perform()
if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1;
}
- errm = curl_multi_perform(curl.multi, &nrunning);
- curl.perform_again = 0;
- if (errm == CURLM_CALL_MULTI_PERFORM) curl.perform_again = 1;
+ errm = curl_multi_perform(fp->multi, &nrunning);
+ fp->perform_again = 0;
+ if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1;
else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
- if (nrunning < curl.nrunning) process_messages();
+ if (nrunning < fp->nrunning) process_messages(fp);
return 0;
}
@@ -282,7 +396,7 @@ static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes)
if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
while (! fp->paused && ! fp->finished)
- if (wait_perform() < 0) return -1;
+ if (wait_perform(fp) < 0) return -1;
nbytes = fp->buffer.ptr.rd - buffer;
fp->buffer.ptr.rd = NULL;
@@ -327,7 +441,7 @@ static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
while (! fp->paused && ! fp->finished)
- if (wait_perform() < 0) return -1;
+ if (wait_perform(fp) < 0) return -1;
nbytes = fp->buffer.ptr.wr - buffer;
fp->buffer.ptr.wr = NULL;
@@ -344,11 +458,17 @@ static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
{
hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
-
+ hFILE_libcurl temp_fp;
CURLcode err;
CURLMcode errm;
off_t origin, pos;
+ if (!fp->is_read || !fp->can_seek) {
+ // Cowardly refuse to seek when writing or a previous seek failed.
+ errno = ESPIPE;
+ return -1;
+ }
+
switch (whence) {
case SEEK_SET:
origin = 0;
@@ -374,36 +494,116 @@ static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
pos = origin + offset;
- errm = curl_multi_remove_handle(curl.multi, fp->easy);
- if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
- curl.nrunning--;
-
// TODO If we seem to be doing random access, use CURLOPT_RANGE to do
// limited reads (e.g. about a BAM block!) so seeking can reuse the
// existing connection more often.
- err = curl_easy_setopt(fp->easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
- if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
+ // Get new headers from the callback (if defined). This changes the
+ // headers in fp before it gets duplicated, but they should be have been
+ // sent by now.
- fp->buffer.len = 0;
- fp->paused = fp->finished = 0;
+ if (fp->headers.callback) {
+ struct curl_slist *list;
+ if (add_callback_headers(fp) != 0)
+ return -1;
+ list = get_header_list(fp);
+ if (list) {
+ err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
+ if (err != CURLE_OK) {
+ errno = easy_errno(fp->easy,err);
+ return -1;
+ }
+ }
+ }
- errm = curl_multi_add_handle(curl.multi, fp->easy);
+ /*
+ Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open
+ a new request to the server, reading from the location that we want
+ to seek to. If the new request works and returns the correct data,
+ the original easy handle in *fp is closed and replaced with the new
+ one. If not, we close the new handle, leave *fp unchanged, set
+ errno to ESPIPE and return -1 so that the caller knows we can't seek.
+ This allows the caller to decide if it wants to continue reading from
+ fp, in the same way as it would if reading from a pipe.
+ */
+
+ memcpy(&temp_fp, fp, sizeof(temp_fp));
+ temp_fp.buffer.len = 0;
+ temp_fp.buffer.ptr.rd = NULL;
+ temp_fp.easy = curl_easy_duphandle(fp->easy);
+ if (!temp_fp.easy)
+ goto early_error;
+
+ err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
+ err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp);
+ err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp);
+ if (err != CURLE_OK)
+ goto error;
+
+ temp_fp.buffer.len = 0; // Ensures we only read the response headers
+ temp_fp.paused = temp_fp.finished = 0;
+
+ // fp->multi and temp_fp.multi are the same.
+ errm = curl_multi_add_handle(fp->multi, temp_fp.easy);
if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
- curl.nrunning++;
+ temp_fp.nrunning = ++fp->nrunning;
- err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
- if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
+ err = curl_easy_pause(temp_fp.easy, CURLPAUSE_CONT);
+ if (err != CURLE_OK)
+ goto error_remove;
- while (! fp->paused && ! fp->finished)
- if (wait_perform() < 0) return -1;
+ while (! temp_fp.paused && ! temp_fp.finished)
+ if (wait_perform(&temp_fp) < 0) goto error_remove;
- if (fp->finished && fp->final_result != CURLE_OK) {
- errno = easy_errno(fp->easy, fp->final_result);
+ if (temp_fp.finished && temp_fp.final_result != CURLE_OK)
+ goto error_remove;
+
+ // We've got a good response, close the original connection and
+ // replace it with the new one.
+
+ errm = curl_multi_remove_handle(fp->multi, fp->easy);
+ if (errm != CURLM_OK) {
+ curl_easy_reset(temp_fp.easy);
+ curl_multi_remove_handle(fp->multi, temp_fp.easy);
+ errno = multi_errno(errm);
return -1;
}
+ fp->nrunning--;
+
+ curl_easy_cleanup(fp->easy);
+ fp->easy = temp_fp.easy;
+ err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
+ err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
+ if (err != CURLE_OK) {
+ int save_errno = easy_errno(fp->easy, err);
+ curl_easy_reset(fp->easy);
+ errno = save_errno;
+ return -1;
+ }
+ fp->buffer.len = 0;
+ fp->paused = temp_fp.paused;
+ fp->finished = temp_fp.finished;
+ fp->perform_again = temp_fp.perform_again;
+ fp->final_result = temp_fp.final_result;
return pos;
+
+ error_remove:
+ curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp
+ errm = curl_multi_remove_handle(fp->multi, temp_fp.easy);
+ if (errm != CURLM_OK) {
+ errno = multi_errno(errm);
+ return -1;
+ }
+ fp->nrunning--;
+ error:
+ curl_easy_cleanup(temp_fp.easy);
+ early_error:
+ fp->can_seek = 0; // Don't try to seek again
+ /* This value for errno may not be entirely true, but the caller may be
+ able to carry on with the existing handle. */
+ errno = ESPIPE;
+ return -1;
}
static int libcurl_close(hFILE *fpv)
@@ -423,16 +623,22 @@ static int libcurl_close(hFILE *fpv)
if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err);
while (save_errno == 0 && ! fp->paused && ! fp->finished)
- if (wait_perform() < 0) save_errno = errno;
+ if (wait_perform(fp) < 0) save_errno = errno;
if (fp->finished && fp->final_result != CURLE_OK)
save_errno = easy_errno(fp->easy, fp->final_result);
- errm = curl_multi_remove_handle(curl.multi, fp->easy);
+ errm = curl_multi_remove_handle(fp->multi, fp->easy);
if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm);
- curl.nrunning--;
+ fp->nrunning--;
curl_easy_cleanup(fp->easy);
+ curl_multi_cleanup(fp->multi);
+
+ if (fp->headers.callback) // Tell callback to free any data it needs to
+ fp->headers.callback(fp->headers.callback_data, NULL);
+ free_headers(&fp->headers.fixed, 1);
+ free_headers(&fp->headers.extra, 1);
if (save_errno) { errno = save_errno; return -1; }
else return 0;
@@ -444,9 +650,10 @@ static const struct hFILE_backend libcurl_backend =
};
static hFILE *
-libcurl_open(const char *url, const char *modes, struct curl_slist *headers)
+libcurl_open(const char *url, const char *modes, http_headers *headers)
{
hFILE_libcurl *fp;
+ struct curl_slist *list;
char mode;
const char *s;
CURLcode err;
@@ -464,12 +671,22 @@ libcurl_open(const char *url, const char *modes, struct curl_slist *headers)
fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0);
if (fp == NULL) goto early_error;
- fp->headers = headers;
+ if (headers) {
+ fp->headers = *headers;
+ } else {
+ memset(&fp->headers, 0, sizeof(fp->headers));
+ }
fp->file_size = -1;
fp->buffer.ptr.rd = NULL;
fp->buffer.len = 0;
fp->final_result = (CURLcode) -1;
- fp->paused = fp->closing = fp->finished = 0;
+ fp->paused = fp->closing = fp->finished = fp->perform_again = 0;
+ fp->can_seek = 1;
+ fp->nrunning = 0;
+ fp->easy = NULL;
+
+ fp->multi = curl_multi_init();
+ if (fp->multi == NULL) { errno = ENOMEM; goto error; }
fp->easy = curl_easy_init();
if (fp->easy == NULL) { errno = ENOMEM; goto error; }
@@ -480,22 +697,26 @@ libcurl_open(const char *url, const char *modes, struct curl_slist *headers)
if (mode == 'r') {
err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);
err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
+ fp->is_read = 1;
}
else {
- struct curl_slist *list;
-
err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback);
err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp);
err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L);
-
- list = curl_slist_append(fp->headers, "Transfer-Encoding: chunked");
- if (list) fp->headers = list; else goto error;
+ if (append_header(&fp->headers.fixed,
+ "Transfer-Encoding: chunked", 1) < 0)
+ goto error;
+ fp->is_read = 0;
}
+ err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share);
err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url);
err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s);
- if (fp->headers)
- err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, fp->headers);
+ if (fp->headers.callback) {
+ if (add_callback_headers(fp) != 0) goto error;
+ }
+ if ((list = get_header_list(fp)) != NULL)
+ err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L);
if (hts_verbose <= 8)
err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L);
@@ -504,12 +725,12 @@ libcurl_open(const char *url, const char *modes, struct curl_slist *headers)
if (err != 0) { errno = ENOSYS; goto error; }
- errm = curl_multi_add_handle(curl.multi, fp->easy);
+ errm = curl_multi_add_handle(fp->multi, fp->easy);
if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; }
- curl.nrunning++;
+ fp->nrunning++;
while (! fp->paused && ! fp->finished)
- if (wait_perform() < 0) goto error_remove;
+ if (wait_perform(fp) < 0) goto error_remove;
if (fp->finished && fp->final_result != CURLE_OK) {
errno = easy_errno(fp->easy, fp->final_result);
@@ -528,21 +749,22 @@ libcurl_open(const char *url, const char *modes, struct curl_slist *headers)
error_remove:
save = errno;
- (void) curl_multi_remove_handle(curl.multi, fp->easy);
- curl.nrunning--;
+ (void) curl_multi_remove_handle(fp->multi, fp->easy);
+ fp->nrunning--;
errno = save;
error:
save = errno;
if (fp->easy) curl_easy_cleanup(fp->easy);
- if (fp->headers) curl_slist_free_all(fp->headers);
+ if (fp->multi) curl_multi_cleanup(fp->multi);
+ free_headers(&fp->headers.fixed, 1);
+ free_headers(&fp->headers.extra, 1);
hfile_destroy((hFILE *) fp);
errno = save;
return NULL;
early_error:
save = errno;
- if (headers) curl_slist_free_all(headers);
errno = save;
return NULL;
}
@@ -552,7 +774,7 @@ static hFILE *hopen_libcurl(const char *url, const char *modes)
return libcurl_open(url, modes, NULL);
}
-static int parse_va_list(struct curl_slist **headers, va_list args)
+static int parse_va_list(http_headers *headers, va_list args)
{
const char *argtype;
@@ -560,24 +782,30 @@ static int parse_va_list(struct curl_slist **headers, va_list args)
if (strcmp(argtype, "httphdr:v") == 0) {
const char **hdr;
for (hdr = va_arg(args, const char **); *hdr; hdr++) {
- struct curl_slist *list = curl_slist_append(*headers, *hdr);
- if (list) *headers = list; else return -1;
+ if (append_header(&headers->fixed, *hdr, 1) < 0)
+ return -1;
}
}
else if (strcmp(argtype, "httphdr:l") == 0) {
const char *hdr;
while ((hdr = va_arg(args, const char *)) != NULL) {
- struct curl_slist *list = curl_slist_append(*headers, hdr);
- if (list) *headers = list; else return -1;
+ if (append_header(&headers->fixed, hdr, 1) < 0)
+ return -1;
}
}
else if (strcmp(argtype, "httphdr") == 0) {
const char *hdr = va_arg(args, const char *);
if (hdr) {
- struct curl_slist *list = curl_slist_append(*headers, hdr);
- if (list) *headers = list; else return -1;
+ if (append_header(&headers->fixed, hdr, 1) < 0)
+ return -1;
}
}
+ else if (strcmp(argtype, "httphdr_callback") == 0) {
+ headers->callback = va_arg(args, const hts_httphdr_callback);
+ }
+ else if (strcmp(argtype, "httphdr_callback_data") == 0) {
+ headers->callback_data = va_arg(args, void *);
+ }
else if (strcmp(argtype, "va_list") == 0) {
va_list *args2 = va_arg(args, va_list *);
if (args2) {
@@ -589,15 +817,62 @@ static int parse_va_list(struct curl_slist **headers, va_list args)
return 0;
}
+/*
+ HTTP headers to be added to the request can be passed in as extra
+ arguments to hopen(). The headers can be specified as follows:
+
+ * Single header:
+ hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL);
+
+ * Multiple headers in the argument list:
+ hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL);
+
+ * Multiple headers in a char* array:
+ hopen(url, mode, "httphdr:v", hdrs, NULL);
+ where `hdrs` is a char **. The list ends with a NULL pointer.
+
+ * A callback function
+ hopen(url, mode, "httphdr_callback", func,
+ "httphdr_callback_data", arg, NULL);
+ `func` has type
+ int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
+ `arg` is passed to the callback as a void *.
+
+ The function is called at file open, and when attempting to seek (which
+ opens a new HTTP request). This allows, for example, access tokens
+ that may have gone stale to be regenerated. The function is also
+ called (with `hdrs` == NULL) on file close so that the callback can
+ free any memory that it needs to.
+
+ The callback should return 0 on success, non-zero on failure. It should
+ return in *hdrs a list of strings containing the new headers (terminated
+ with a NULL pointer). These will replace any headers previously supplied
+ by the callback. If no changes are necessary, it can return NULL
+ in *hdrs, in which case the previous headers will be left unchanged.
+
+ Ownership of the strings in the header list passes to hfile_libcurl,
+ so the callback should not attempt to use or free them itself. The memory
+ containing the array belongs to the callback and will not be freed by
+ hfile_libcurl.
+
+ Headers supplied by the callback are appended after any specified
+ using the "httphdr", "httphdr:l" or "httphdr:v" methods. No attempt
+ is made to replace these headers (even if a key is repeated) so anything
+ that is expected to vary needs to come from the callback.
+ */
+
static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args)
{
- struct curl_slist *headers = NULL;
- if (parse_va_list(&headers, args) < 0) {
- if (headers) curl_slist_free_all(headers);
- return NULL;
+ hFILE *fp = NULL;
+ http_headers headers = { { NULL, 0, 0 }, { NULL, 0, 0 }, NULL, NULL };
+ if (parse_va_list(&headers, args) == 0) {
+ fp = libcurl_open(url, modes, &headers);
}
- return libcurl_open(url, modes, headers);
+ if (!fp) {
+ free_headers(&headers.fixed, 1);
+ }
+ return fp;
}
int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
@@ -617,18 +892,26 @@ int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
const curl_version_info_data *info;
const char * const *protocol;
CURLcode err;
+ CURLSHcode errsh;
err = curl_global_init(CURL_GLOBAL_ALL);
if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }
- curl.multi = curl_multi_init();
- if (curl.multi == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
+ curl.share = curl_share_init();
+ if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
+ errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
+ errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
+ errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
+ if (errsh != 0) {
+ curl_share_cleanup(curl.share);
+ curl_global_cleanup();
+ errno = EIO;
+ return -1;
+ }
info = curl_version_info(CURLVERSION_NOW);
ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
- curl.nrunning = 0;
- curl.perform_again = 0;
self->name = "libcurl";
self->destroy = libcurl_exit;
diff --git a/hfile_s3.c b/hfile_s3.c
index 9b23b43..e09e54c 100644
--- a/hfile_s3.c
+++ b/hfile_s3.c
@@ -38,6 +38,20 @@ DEALINGS IN THE SOFTWARE. */
#include "htslib/hts.h" // for hts_version() and hts_verbose
#include "htslib/kstring.h"
+typedef struct {
+ kstring_t id;
+ kstring_t token;
+ kstring_t secret;
+ char *bucket;
+ kstring_t auth_hdr;
+ time_t auth_time;
+ char date[40];
+ char mode;
+ char *headers[3];
+} s3_auth_data;
+
+#define AUTH_LIFETIME 60
+
#if defined HAVE_COMMONCRYPTO
#include <CommonCrypto/CommonHMAC.h>
@@ -215,37 +229,100 @@ static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret)
free(text.s);
}
+static int copy_auth_headers(s3_auth_data *ad, char ***hdrs) {
+ char **hdr = &ad->headers[0];
+ *hdrs = hdr;
+ *hdr = strdup(ad->date);
+ if (!*hdr) return -1;
+ hdr++;
+ if (ad->auth_hdr.l) {
+ *hdr = strdup(ad->auth_hdr.s);
+ if (!*hdr) { free(ad->headers[0]); return -1; }
+ hdr++;
+ }
+ *hdr = NULL;
+ return 0;
+}
+
+static void free_auth_data(s3_auth_data *ad) {
+ free(ad->id.s);
+ free(ad->token.s);
+ free(ad->secret.s);
+ free(ad->bucket);
+ free(ad->auth_hdr.s);
+ free(ad);
+}
+
+static int auth_header_callback(void *ctx, char ***hdrs) {
+ s3_auth_data *ad = (s3_auth_data *) ctx;
+
+ time_t now = time(NULL);
+#ifdef HAVE_GMTIME_R
+ struct tm tm_buffer;
+ struct tm *tm = gmtime_r(&now, &tm_buffer);
+#else
+ struct tm *tm = gmtime(&now);
+#endif
+ kstring_t message = { 0, 0, NULL };
+ unsigned char digest[DIGEST_BUFSIZ];
+ size_t digest_len;
+
+ if (!hdrs) { // Closing connection
+ free_auth_data(ad);
+ return 0;
+ }
+
+ if (now - ad->auth_time < AUTH_LIFETIME) {
+ // Last auth string should still be valid
+ *hdrs = NULL;
+ return 0;
+ }
+
+ strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
+ if (!ad->id.l || !ad->secret.l) {
+ ad->auth_time = now;
+ return copy_auth_headers(ad, hdrs);
+ }
+
+ if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s/%s",
+ ad->mode == 'r' ? "GET" : "PUT", ad->date + 6,
+ ad->token.l ? "x-amz-security-token:" : "",
+ ad->token.l ? ad->token.s : "",
+ ad->token.l ? "\n" : "",
+ ad->bucket) < 0) {
+ return -1;
+ }
+
+ digest_len = s3_sign(digest, &ad->secret, &message);
+ ad->auth_hdr.l = 0;
+ if (ksprintf(&ad->auth_hdr, "Authorization: AWS %s:", ad->id.s) < 0)
+ goto fail;
+ base64_kput(digest, digest_len, &ad->auth_hdr);
+
+ free(message.s);
+ ad->auth_time = now;
+ return copy_auth_headers(ad, hdrs);
+
+ fail:
+ free(message.s);
+ return -1;
+}
+
static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
{
const char *bucket, *path;
- char date_hdr[40];
char *header_list[4], **header = header_list;
- kstring_t message = { 0, 0, NULL };
kstring_t url = { 0, 0, NULL };
kstring_t profile = { 0, 0, NULL };
- kstring_t id = { 0, 0, NULL };
- kstring_t secret = { 0, 0, NULL };
kstring_t host_base = { 0, 0, NULL };
- kstring_t token = { 0, 0, NULL };
kstring_t token_hdr = { 0, 0, NULL };
- kstring_t auth_hdr = { 0, 0, NULL };
- time_t now = time(NULL);
-#ifdef HAVE_GMTIME_R
- struct tm tm_buffer;
- struct tm *tm = gmtime_r(&now, &tm_buffer);
-#else
- struct tm *tm = gmtime(&now);
-#endif
+ s3_auth_data *ad = calloc(1, sizeof(*ad));
- kputs(strchr(mode, 'r')? "GET\n" : "PUT\n", &message);
- kputc('\n', &message);
- kputc('\n', &message);
- strftime(date_hdr, sizeof date_hdr, "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
- *header++ = date_hdr;
- kputs(&date_hdr[6], &message);
- kputc('\n', &message);
+ if (!ad)
+ return NULL;
+ ad->mode = strchr(mode, 'r') ? 'r' : 'w';
// Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
@@ -267,10 +344,10 @@ static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
}
else {
const char *colon2 = strpbrk(&colon[1], ":@");
- urldecode_kput(bucket, colon - bucket, &id);
- urldecode_kput(&colon[1], colon2 - &colon[1], &secret);
+ urldecode_kput(bucket, colon - bucket, &ad->id);
+ urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret);
if (*colon2 == ':')
- urldecode_kput(&colon2[1], path - &colon2[1], &token);
+ urldecode_kput(&colon2[1], path - &colon2[1], &ad->token);
}
bucket = &path[1];
@@ -279,27 +356,28 @@ static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
else {
// If the URL has no ID[:SECRET]@, consider environment variables.
const char *v;
- if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &id);
- if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &secret);
- if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &token);
+ if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id);
+ if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret);
+ if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token);
if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &profile);
else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &profile);
else kputs("default", &profile);
}
- if (id.l == 0) {
+ if (ad->id.l == 0) {
const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
parse_ini(v? v : "~/.aws/credentials", profile.s,
- "aws_access_key_id", &id, "aws_secret_access_key", &secret,
- "aws_session_token", &token, NULL);
+ "aws_access_key_id", &ad->id,
+ "aws_secret_access_key", &ad->secret,
+ "aws_session_token", &ad->token, NULL);
}
- if (id.l == 0)
- parse_ini("~/.s3cfg", profile.s, "access_key", &id,
- "secret_key", &secret, "access_token", &token,
+ if (ad->id.l == 0)
+ parse_ini("~/.s3cfg", profile.s, "access_key", &ad->id,
+ "secret_key", &ad->secret, "access_token", &ad->token,
"host_base", &host_base, NULL);
- if (id.l == 0)
- parse_simple("~/.awssecret", &id, &secret);
+ if (ad->id.l == 0)
+ parse_simple("~/.awssecret", &ad->id, &ad->secret);
if (host_base.l == 0)
kputs("s3.amazonaws.com", &host_base);
@@ -316,46 +394,35 @@ static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
}
kputs(path, &url);
- if (token.l > 0) {
- kputs("x-amz-security-token:", &message);
- kputs(token.s, &message);
- kputc('\n', &message);
-
+ if (ad->token.l > 0) {
kputs("X-Amz-Security-Token: ", &token_hdr);
- kputs(token.s, &token_hdr);
+ kputs(ad->token.s, &token_hdr);
*header++ = token_hdr.s;
}
- kputc('/', &message);
- kputs(bucket, &message); // CanonicalizedResource is '/' + bucket + path
-
- // If we have no id/secret, we can't sign the request but will
- // still be able to access public data sets.
- if (id.l > 0 && secret.l > 0) {
- unsigned char digest[DIGEST_BUFSIZ];
- size_t digest_len = s3_sign(digest, &secret, &message);
-
- kputs("Authorization: AWS ", &auth_hdr);
- kputs(id.s, &auth_hdr);
- kputc(':', &auth_hdr);
- base64_kput(digest, digest_len, &auth_hdr);
-
- *header++ = auth_hdr.s;
- }
+ ad->bucket = strdup(bucket);
+ if (!ad->bucket)
+ goto fail;
*header = NULL;
hFILE *fp = hopen(url.s, mode, "va_list", argsp, "httphdr:v", header_list,
- NULL);
- free(message.s);
+ "httphdr_callback", auth_header_callback,
+ "httphdr_callback_data", ad, NULL);
+ if (!fp) goto fail;
+
free(url.s);
free(profile.s);
- free(id.s);
- free(secret.s);
free(host_base.s);
- free(token.s);
free(token_hdr.s);
- free(auth_hdr.s);
return fp;
+
+ fail:
+ free(url.s);
+ free(profile.s);
+ free(host_base.s);
+ free(token_hdr.s);
+ free_auth_data(ad);
+ return NULL;
}
static hFILE *s3_open(const char *url, const char *mode)
diff --git a/hts.c b/hts.c
index 1c6ca8a..22ab44e 100644
--- a/hts.c
+++ b/hts.c
@@ -44,6 +44,7 @@ DEALINGS IN THE SOFTWARE. */
#include "version.h"
#include "hts_internal.h"
#include "hfile_internal.h"
+#include "htslib/hts_os.h" // drand48
#include "htslib/khash.h"
#include "htslib/kseq.h"
@@ -108,7 +109,7 @@ static enum htsFormatCategory format_category(enum htsExactFormat fmt)
case bed:
return region_list;
- case json:
+ case htsget:
return unknown_category;
case unknown_format:
@@ -196,7 +197,7 @@ cmp_nonblank(const char *key, const unsigned char *u, const unsigned char *ulim)
int hts_detect_format(hFILE *hfile, htsFormat *fmt)
{
- unsigned char s[21];
+ unsigned char s[32];
ssize_t len = hpeek(hfile, s, 18);
if (len < 0) return -1;
@@ -285,9 +286,9 @@ int hts_detect_format(hFILE *hfile, htsFormat *fmt)
fmt->version.major = 1, fmt->version.minor = -1;
return 0;
}
- else if (cmp_nonblank("{\"", s, &s[len]) == 0) {
+ else if (cmp_nonblank("{\"htsget\":", s, &s[len]) == 0) {
fmt->category = unknown_category;
- fmt->format = json;
+ fmt->format = htsget;
fmt->version.major = fmt->version.minor = -1;
return 0;
}
@@ -328,7 +329,7 @@ char *hts_format_description(const htsFormat *format)
case crai: kputs("CRAI", &str); break;
case csi: kputs("CSI", &str); break;
case tbi: kputs("Tabix", &str); break;
- case json: kputs("JSON", &str); break;
+ case htsget: kputs("htsget", &str); break;
default: kputs("unknown", &str); break;
}
@@ -375,7 +376,7 @@ char *hts_format_description(const htsFormat *format)
case crai:
case vcf:
case bed:
- case json:
+ case htsget:
kputs(" text", &str);
break;
@@ -796,8 +797,8 @@ htsFile *hts_hopen(hFILE *hfile, const char *fn, const char *mode)
if (strchr(simple_mode, 'r')) {
if (hts_detect_format(hfile, &fp->format) < 0) goto error;
- if (fp->format.format == json) {
- hFILE *hfile2 = hopen_json_redirect(hfile, simple_mode);
+ if (fp->format.format == htsget) {
+ hFILE *hfile2 = hopen_htsget_redirect(hfile, simple_mode);
if (hfile2 == NULL) goto error;
// Build fp against the result of the redirection
@@ -993,13 +994,14 @@ int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...) {
if (hf) {
va_start(args, opt);
- if (hfile_set_blksize(hf, va_arg(args, int)) != 0 && hts_verbose >= 2)
- fprintf(stderr, "[W::%s] Failed to change block size\n", __func__);
+ if (hfile_set_blksize(hf, va_arg(args, int)) != 0)
+ hts_log_warning("Failed to change block size");
va_end(args);
- } else if (hts_verbose >= 2)
+ }
+ else {
// To do - implement for vcf/bcf.
- fprintf(stderr, "[W::%s] cannot change block size for this format\n", __func__);
-
+ hts_log_warning("Cannot change block size for this format");
+ }
return 0;
}
@@ -2252,68 +2254,101 @@ int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data)
/**********************
*** Retrieve index ***
**********************/
-
-static char *test_and_fetch(const char *fn)
+// Returns -1 if index couldn't be opened.
+// -2 on other errors
+static int test_and_fetch(const char *fn, const char **local_fn)
{
+ hFILE *remote_hfp;
+ FILE *local_fp = NULL;
+ uint8_t *buf = NULL;
+ int save_errno;
+
if (hisremote(fn)) {
const int buf_size = 1 * 1024 * 1024;
- hFILE *fp_remote;
- FILE *fp;
- uint8_t *buf;
int l;
const char *p;
for (p = fn + strlen(fn) - 1; p >= fn; --p)
if (*p == '/') break;
++p; // p now points to the local file name
// Attempt to open local file first
- if ((fp = fopen((char*)p, "rb")) != 0)
+ if ((local_fp = fopen((char*)p, "rb")) != 0)
{
- fclose(fp);
- return (char*)p;
+ fclose(local_fp);
+ *local_fn = p;
+ return 0;
}
// Attempt to open remote file. Stay quiet on failure, it is OK to fail when trying first .csi then .tbi index.
- if ((fp_remote = hopen(fn, "r")) == 0) return 0;
- if ((fp = fopen(p, "w")) == 0) {
+ if ((remote_hfp = hopen(fn, "r")) == 0) return -1;
+ if ((local_fp = fopen(p, "w")) == 0) {
hts_log_error("Failed to create file %s in the working directory", p);
- hclose_abruptly(fp_remote);
- return 0;
+ goto fail;
}
hts_log_info("Downloading file %s to local directory", fn);
buf = (uint8_t*)calloc(buf_size, 1);
- while ((l = hread(fp_remote, buf, buf_size)) > 0) fwrite(buf, 1, l, fp);
+ if (!buf) {
+ hts_log_error("%s", strerror(errno));
+ goto fail;
+ }
+ while ((l = hread(remote_hfp, buf, buf_size)) > 0) {
+ if (fwrite(buf, 1, l, local_fp) != l) {
+ hts_log_error("Failed to write data to %s : %s",
+ fn, strerror(errno));
+ goto fail;
+ }
+ }
free(buf);
- fclose(fp);
- if (hclose(fp_remote) != 0) {
+ if (fclose(local_fp) < 0) {
+ hts_log_error("Error closing %s : %s", fn, strerror(errno));
+ local_fp = NULL;
+ goto fail;
+ }
+ if (hclose(remote_hfp) != 0) {
hts_log_error("Failed to close remote file %s", fn);
}
- return (char*)p;
+ *local_fn = p;
+ return 0;
} else {
- hFILE *fp;
- if ((fp = hopen(fn, "r")) == 0) return 0;
- hclose_abruptly(fp);
- return (char*)fn;
+ hFILE *local_hfp;
+ if ((local_hfp = hopen(fn, "r")) == 0) return -1;
+ hclose_abruptly(local_hfp);
+ *local_fn = fn;
+ return 0;
}
+
+ fail:
+ save_errno = errno;
+ hclose_abruptly(remote_hfp);
+ if (local_fp) fclose(local_fp);
+ free(buf);
+ errno = save_errno;
+ return -2;
}
char *hts_idx_getfn(const char *fn, const char *ext)
{
- int i, l_fn, l_ext;
- char *fnidx, *ret;
+ int i, l_fn, l_ext, ret;
+ char *fnidx;
+ const char *local_fn = NULL;
l_fn = strlen(fn); l_ext = strlen(ext);
fnidx = (char*)calloc(l_fn + l_ext + 1, 1);
+ if (!fnidx) return NULL;
+ // First try : append `ext` to `fn`
strcpy(fnidx, fn); strcpy(fnidx + l_fn, ext);
- if ((ret = test_and_fetch(fnidx)) == 0) {
+ if ((ret = test_and_fetch(fnidx, &local_fn)) == -1) {
+ // Second try : replace suffix of `fn` with `ext`
for (i = l_fn - 1; i > 0; --i)
- if (fnidx[i] == '.') break;
- strcpy(fnidx + i, ext);
- ret = test_and_fetch(fnidx);
+ if (fnidx[i] == '.' || fnidx[i] == '/') break;
+ if (fnidx[i] == '.') {
+ strcpy(fnidx + i, ext);
+ ret = test_and_fetch(fnidx, &local_fn);
+ }
}
- if (ret == 0) {
+ if (ret < 0) {
free(fnidx);
- return 0;
+ return NULL;
}
- l_fn = strlen(ret);
- memmove(fnidx, ret, l_fn + 1);
+ l_fn = strlen(local_fn);
+ memmove(fnidx, local_fn, l_fn + 1);
return fnidx;
}
diff --git a/hts_internal.h b/hts_internal.h
index fd38ef6..99ebaf5 100644
--- a/hts_internal.h
+++ b/hts_internal.h
@@ -159,7 +159,7 @@ typedef struct hts_cram_idx_t {
// Entry point to hFILE_multipart backend.
-struct hFILE *hopen_json_redirect(struct hFILE *hfile, const char *mode);
+struct hFILE *hopen_htsget_redirect(struct hFILE *hfile, const char *mode);
struct hts_path_itr {
diff --git a/hts_os.c b/hts_os.c
new file mode 100644
index 0000000..dcad3e7
--- /dev/null
+++ b/hts_os.c
@@ -0,0 +1,40 @@
+/// @file hts_os.c
+/// Operating System specific tweaks, for compatibility with POSIX.
+/*
+ Copyright (C) 2017 Genome Research Ltd.
+
+ Author: James Bonfield <jkb at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE. */
+
+// Windows (maybe more) lack a drand48 implementation.
+#ifndef HAVE_DRAND48
+#include "win/rand.c"
+#else
+void hts_srand48(long seed) { return srand48(seed); }
+double hts_erand48(unsigned short xseed[3]) { return erand48(xseed); }
+double hts_drand48(void) { return drand48(); }
+double hts_lrand48(void) { return lrand48(); }
+#endif
+
+// // On Windows when using the MSYS or Cygwin terminals, isatty fails
+// #ifdef _WIN32
+// #define USE_FILEEXTD
+// #include "win/iscygpty.c"
+// #endif
diff --git a/htsfile.1 b/htsfile.1
index 732f057..34bef05 100644
--- a/htsfile.1
+++ b/htsfile.1
@@ -1,4 +1,4 @@
-.TH htsfile 1 "21 June 2017" "htslib-1.5" "Bioinformatics tools"
+.TH htsfile 1 "28 September 2017" "htslib-1.6" "Bioinformatics tools"
.SH NAME
htsfile \- identify high-throughput sequencing data files
.\"
diff --git a/htslib/bgzf.h b/htslib/bgzf.h
index 15c76cd..95c30a5 100644
--- a/htslib/bgzf.h
+++ b/htslib/bgzf.h
@@ -49,6 +49,7 @@ extern "C" {
#define BGZF_ERR_IO 4
#define BGZF_ERR_MISUSE 8
#define BGZF_ERR_MT 16 // stream cannot be multi-threaded
+#define BGZF_ERR_CRC 32
struct hFILE;
struct hts_tpool;
@@ -92,6 +93,9 @@ typedef struct __kstring_t {
* Open an existing file descriptor for reading or writing.
*
* @param fd file descriptor
+ * Note that the file must be opened in binary mode, or else
+ * there will be problems on platforms that make a difference
+ * between text and binary mode.
* @param mode mode matching /[rwag][u0-9]+/: 'r' for reading, 'w' for
* writing, 'a' for appending, 'g' for gzip rather than BGZF
* compression (with 'w' only), and digit specifies the zlib
diff --git a/htslib/hfile.h b/htslib/hfile.h
index d07a755..fa89718 100644
--- a/htslib/hfile.h
+++ b/htslib/hfile.h
@@ -66,6 +66,10 @@ hFILE *hopen(const char *filename, const char *mode, ...) HTS_RESULT_USED;
/// Associate a stream with an existing open file descriptor
/** @return An hFILE pointer, or `NULL` (with _errno_ set) if an error occurred.
+Note that the file must be opened in binary mode, or else
+there will be problems on platforms that make a difference
+between text and binary mode.
+
For socket descriptors (on Windows), _mode_ should contain `s`.
*/
hFILE *hdopen(int fd, const char *mode) HTS_RESULT_USED;
diff --git a/htslib/hts.h b/htslib/hts.h
index e67a80b..1af566f 100644
--- a/htslib/hts.h
+++ b/htslib/hts.h
@@ -138,7 +138,8 @@ enum htsExactFormat {
unknown_format,
binary_format, text_format,
sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed,
- json,
+ htsget,
+ json HTS_DEPRECATED_ENUM("Use htsExactFormat 'htsget' instead") = htsget,
format_maximum = 32767
};
@@ -210,7 +211,7 @@ enum hts_fmt_option {
// CRAM specific
CRAM_OPT_DECODE_MD,
CRAM_OPT_PREFIX,
- CRAM_OPT_VERBOSITY, // make general
+ CRAM_OPT_VERBOSITY, // obsolete, use hts_set_log_level() instead
CRAM_OPT_SEQS_PER_SLICE,
CRAM_OPT_SLICES_PER_CONTAINER,
CRAM_OPT_RANGE,
diff --git a/htslib/hts_defs.h b/htslib/hts_defs.h
index 1602303..3bf4a46 100644
--- a/htslib/hts_defs.h
+++ b/htslib/hts_defs.h
@@ -69,6 +69,24 @@ DEALINGS IN THE SOFTWARE. */
#define HTS_DEPRECATED(message)
#endif
+#if HTS_COMPILER_HAS(__deprecated__) || HTS_GCC_AT_LEAST(6,4)
+#define HTS_DEPRECATED_ENUM(message) __attribute__ ((__deprecated__ (message)))
+#else
+#define HTS_DEPRECATED_ENUM(message)
+#endif
+
+// On mingw the "printf" format type doesn't work. It needs "gnu_printf"
+// in order to check %lld and %z, otherwise it defaults to checking against
+// the Microsoft library printf format options despite linking against the
+// GNU posix implementation of printf. The __MINGW_PRINTF_FORMAT macro
+// expands to printf or gnu_printf as required, but obviously may not
+// exist
+#ifdef __MINGW_PRINTF_FORMAT
+#define HTS_PRINTF_FMT __MINGW_PRINTF_FORMAT
+#else
+#define HTS_PRINTF_FMT printf
+#endif
+
#if HTS_COMPILER_HAS(__format__) || HTS_GCC_AT_LEAST(3,0)
#define HTS_FORMAT(type, idx, first) __attribute__((__format__ (type, idx, first)))
#else
diff --git a/htslib/hts_endian.h b/htslib/hts_endian.h
index 06827c0..9a843d5 100644
--- a/htslib/hts_endian.h
+++ b/htslib/hts_endian.h
@@ -82,7 +82,7 @@ DEALINGS IN THE SOFTWARE. */
/** @def HTS_ALLOW_UNALIGNED
* @brief Control use of unaligned memory access.
- *
+ *
* Defining HTS_ALLOW_UNALIGNED=1 converts shift-and-or to simple casts on
* little-endian platforms that can tolerate unaligned access (notably Intel
* x86).
@@ -136,7 +136,7 @@ static inline uint32_t le_to_u32(const uint8_t *buf) {
return *((uint32_u *) buf);
#else
return ((uint32_t) buf[0] |
- ((uint32_t) buf[1] << 8) |
+ ((uint32_t) buf[1] << 8) |
((uint32_t) buf[2] << 16) |
((uint32_t) buf[3] << 24));
#endif
@@ -285,7 +285,7 @@ static inline void i64_to_le(int64_t val, uint8_t *buf) {
/* Floating point. Assumptions:
* Platform uses IEEE 754 format
- * sizeof(float) == sizeof(uint32_t)
+ * sizeof(float) == sizeof(uint32_t)
* sizeof(double) == sizeof(uint64_t)
* Endian-ness is the same for both floating point and integer
* Type-punning via a union is allowed
diff --git a/htslib/hts_os.h b/htslib/hts_os.h
new file mode 100644
index 0000000..3a671d4
--- /dev/null
+++ b/htslib/hts_os.h
@@ -0,0 +1,59 @@
+/// @file hts_os.h
+/// Operating System specific tweaks, for compatibility with POSIX.
+/*
+ Copyright (C) 2017 Genome Research Ltd.
+
+ Author: James Bonfield <jkb at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE. */
+
+#ifndef HTSLIB_HTS_OS_H
+#define HTSLIB_HTS_OS_H
+
+extern void hts_srand48(long seed);
+extern double hts_erand48(unsigned short xseed[3]);
+extern double hts_drand48(void);
+extern long hts_lrand48(void);
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+// Windows usually lacks *rand48(), but cygwin provides them.
+#define srand48(S) hts_srand48((S))
+#define erand48(X) hts_erand48((X))
+#define drand48() hts_drand48()
+#define lrand48() hts_lrand48()
+#endif
+
+#if 0 /* def _WIN32 - disabled for now, not currently used */
+/* Check if the fd is a cygwin/msys's pty. */
+extern int is_cygpty(int fd);
+#endif
+
+
+#if defined(__MINGW32__)
+#include <io.h>
+#define mkdir(filename,mode) mkdir((filename))
+#endif
+
+#ifdef _WIN32
+#include <stdlib.h>
+#define srandom srand
+#define random rand
+#endif
+
+#endif
diff --git a/htslib/knetfile.h b/htslib/knetfile.h
index 1249a7c..8633596 100644
--- a/htslib/knetfile.h
+++ b/htslib/knetfile.h
@@ -77,7 +77,7 @@ extern "C" {
knetFile *knet_open(const char *fn, const char *mode);
- /*
+ /*
This only works with local files.
*/
knetFile *knet_dopen(int fd, const char *mode);
diff --git a/htslib/ksort.h b/htslib/ksort.h
index 331b11d..f50aa71 100644
--- a/htslib/ksort.h
+++ b/htslib/ksort.h
@@ -65,6 +65,12 @@
#include <stdlib.h>
#include <string.h>
+// Use our own drand48() symbol (used by ks_shuffle) to avoid portability
+// problems on Windows. Don't include htslib/hts_os.h for this as it
+// may not get on with older attempts to fix this in code that includes
+// this file.
+extern double hts_drand48(void);
+
typedef struct {
void *left, *right;
int depth;
@@ -261,7 +267,7 @@ typedef struct {
int i, j; \
for (i = n; i > 1; --i) { \
type_t tmp; \
- j = (int)(drand48() * i); \
+ j = (int)(hts_drand48() * i); \
tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp; \
} \
}
diff --git a/htslib/regidx.h b/htslib/regidx.h
index e432f73..f2e0e00 100644
--- a/htslib/regidx.h
+++ b/htslib/regidx.h
@@ -1,6 +1,6 @@
/// @file htslib/regidx.h
/// Region indexing.
-/*
+/*
Copyright (C) 2014 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -11,10 +11,10 @@
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
-
+
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
-
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -42,7 +42,7 @@
while ( REGITR_OVERLAP(itr,from,to) )
{
- printf("[%d,%d] overlaps with [%d,%d], payload=%s\n", from,to,
+ printf("[%d,%d] overlaps with [%d,%d], payload=%s\n", from,to,
REGITR_START(itr), REGITR_END(itr), REGITR_PAYLOAD(itr,char*));
itr.i++;
}
@@ -128,7 +128,7 @@ void regidx_destroy(regidx_t *idx);
int regidx_overlap(regidx_t *idx, const char *chr, uint32_t start, uint32_t end, regitr_t *itr);
/*
- * regidx_insert() - add a new region.
+ * regidx_insert() - add a new region.
*
* After last region has been added, call regidx_insert(idx,NULL) to
* build the index.
diff --git a/htslib/sam.h b/htslib/sam.h
index 4055e0b..8852496 100644
--- a/htslib/sam.h
+++ b/htslib/sam.h
@@ -379,6 +379,10 @@ int sam_index_build3(const char *fn, const char *fnidx, int min_shift, int nthre
int sam_parse1(kstring_t *s, bam_hdr_t *h, bam1_t *b) HTS_RESULT_USED;
int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) HTS_RESULT_USED;
+
+ /*!
+ * @return >= 0 on successfully reading a new record, -1 on end of stream, < -1 on error
+ **/
int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b) HTS_RESULT_USED;
int sam_write1(samFile *fp, const bam_hdr_t *h, const bam1_t *b) HTS_RESULT_USED;
@@ -440,7 +444,7 @@ uint32_t bam_auxB_len(const uint8_t *s);
@return The idx'th value, or 0 on error.
If the array is not an integer type, errno is set to EINVAL. If idx
is greater than or equal to the value returned by bam_auxB_len(s),
- errno is set to ERANGE. In both cases, 0 will be returned.
+ errno is set to ERANGE. In both cases, 0 will be returned.
*/
int64_t bam_auxB2i(const uint8_t *s, uint32_t idx);
diff --git a/htslib/vcfutils.h b/htslib/vcfutils.h
index 0c5828e..4999df4 100644
--- a/htslib/vcfutils.h
+++ b/htslib/vcfutils.h
@@ -51,7 +51,7 @@ int bcf_trim_alleles(const bcf_hdr_t *header, bcf1_t *line);
* @header: for access to BCF_DT_ID dictionary
* @line: VCF line obtained from vcf_parse1
* @mask: alleles to remove
- *
+ *
* If you have more than 31 alleles, then the integer bit mask will
* overflow, so use bcf_remove_allele_set instead
*/
@@ -63,7 +63,7 @@ void bcf_remove_alleles(const bcf_hdr_t *header, bcf1_t *line, int mask) HTS_DEP
* @line: VCF line obtained from vcf_parse1
* @rm_set: pointer to kbitset_t object with bits set for allele
* indexes to remove
- *
+ *
* Returns 0 on success or -1 on failure
*
* Number=A,R,G INFO and FORMAT fields will be updated accordingly.
diff --git a/knetfile.c b/knetfile.c
index dc20782..6bbc9e2 100644
--- a/knetfile.c
+++ b/knetfile.c
@@ -395,7 +395,7 @@ knetFile *khttp_parse_url(const char *fn, const char *mode)
} else {
fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
for (q = fp->host; *q && *q != ':'; ++q);
- if (*q == ':') *q++ = 0;
+ if (*q == ':') *q++ = 0;
fp->port = strdup(*q? q : "80");
fp->path = strdup(fn);
}
@@ -489,7 +489,7 @@ knetFile *knet_open(const char *fn, const char *mode)
* be undefined on some systems, although it is defined on my
* Mac and the Linux I have tested on. */
int fd = open(fn, O_RDONLY | O_BINARY);
-#else
+#else
int fd = open(fn, O_RDONLY);
#endif
if (fd == -1) {
diff --git a/kstring.c b/kstring.c
index 06e8cdf..b911902 100644
--- a/kstring.c
+++ b/kstring.c
@@ -206,7 +206,7 @@ int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
int i, n, max, last_char, last_start, *offsets, l;
n = 0; max = *_max; offsets = *_offsets;
l = strlen(s);
-
+
#define __ksplit_aux do { \
if (_offsets) { \
s[i] = 0; \
diff --git a/multipart.c b/multipart.c
index 66d5707..3ae568c 100644
--- a/multipart.c
+++ b/multipart.c
@@ -144,8 +144,8 @@ static const struct hFILE_backend multipart_backend =
// not the type expected for a particular GA4GH field, or it may be '?' or
// '\0' which should be propagated.
static char
-parse_ga4gh_redirect_json(hFILE_multipart *fp, hFILE *json,
- kstring_t *b, kstring_t *header)
+parse_ga4gh_body_json(hFILE_multipart *fp, hFILE *json,
+ kstring_t *b, kstring_t *header)
{
hts_json_token t;
@@ -205,12 +205,37 @@ parse_ga4gh_redirect_json(hFILE_multipart *fp, hFILE *json,
else if (hts_json_fskip_value(json, '\0') != 'v') return '?';
}
+ return 'v';
+}
+
+// Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing),
+// or upon encountering an unexpected token, that token's type.
+// Explicit `return '?'` means a JSON parsing error, typically a member key
+// that is not a string. An unexpected token may be a valid token that was
+// not the type expected for a particular GA4GH field, or it may be '?' or
+// '\0' which should be propagated.
+static char
+parse_ga4gh_redirect_json(hFILE_multipart *fp, hFILE *json,
+ kstring_t *b, kstring_t *header) {
+ hts_json_token t;
+
+ if (hts_json_fnext(json, &t, b) != '{') return t.type;
+ while (hts_json_fnext(json, &t, b) != '}') {
+ if (t.type != 's') return '?';
+
+ if (strcmp(t.str, "htsget") == 0) {
+ char ret = parse_ga4gh_body_json(fp, json, b, header);
+ if (ret != 'v') return ret;
+ }
+ else return '?';
+ }
+
if (hts_json_fnext(json, &t, b) != '\0') return '?';
return 'v';
}
-hFILE *hopen_json_redirect(hFILE *hfile, const char *mode)
+hFILE *hopen_htsget_redirect(hFILE *hfile, const char *mode)
{
hFILE_multipart *fp;
kstring_t s1 = { 0, 0, NULL }, s2 = { 0, 0, NULL };
diff --git a/realn.c b/realn.c
index d856f3c..a33acde 100644
--- a/realn.c
+++ b/realn.c
@@ -78,7 +78,7 @@ int sam_cap_mapq(bam1_t *b, const char *ref, int ref_len, int thres)
if (t > thres) return -1;
if (t < 0) t = 0;
t = sqrt((thres - t) / thres) * thres;
-// fprintf(stderr, "%s %lf %d\n", bam_get_qname(b), t, q);
+ //fprintf(stderr, "%s %lf %d\n", bam_get_qname(b), t, q);
return (int)(t + .499);
}
diff --git a/regidx.c b/regidx.c
index 5c4c77d..874f9c9 100644
--- a/regidx.c
+++ b/regidx.c
@@ -1,4 +1,4 @@
-/*
+/*
Copyright (C) 2014 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -9,10 +9,10 @@
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
-
+
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
-
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -95,7 +95,7 @@ int _regidx_build_index(regidx_t *idx)
int iend = list->regs[j].end >> LIDX_SHIFT;
if ( imax < iend + 1 )
{
- int old_imax = imax;
+ int old_imax = imax;
imax = iend + 1;
kroundup32(imax);
list->idx = (int*) realloc(list->idx, imax*sizeof(int));
@@ -153,7 +153,7 @@ int regidx_insert(regidx_t *idx, char *line)
if ( idx->rid_prev==rid )
{
- if ( idx->start_prev > reg.start || (idx->start_prev==reg.start && idx->end_prev>reg.end) )
+ if ( idx->start_prev > reg.start || (idx->start_prev==reg.start && idx->end_prev>reg.end) )
{
hts_log_error("The regions are not sorted: %s:%d-%d is before %s:%d-%d",
idx->str.s,idx->start_prev+1,idx->end_prev+1,idx->str.s,reg.start+1,reg.end+1);
@@ -197,7 +197,7 @@ regidx_t *regidx_init(const char *fname, regidx_parse_f parser, regidx_free_f fr
if ( payload_size ) idx->payload = malloc(payload_size);
if ( !fname ) return idx;
-
+
kstring_t str = {0,0,0};
htsFile *fp = hts_open(fname,"r");
@@ -208,7 +208,7 @@ regidx_t *regidx_init(const char *fname, regidx_parse_f parser, regidx_free_f fr
if ( regidx_insert(idx, str.s) ) goto error;
}
regidx_insert(idx, NULL);
-
+
free(str.s);
hts_close(fp);
return idx;
@@ -253,7 +253,7 @@ int regidx_overlap(regidx_t *idx, const char *chr, uint32_t from, uint32_t to, r
reglist_t *list = &idx->seq[iseq];
if ( !list->nregs ) return 0;
- int i, ibeg = from>>LIDX_SHIFT;
+ int i, ibeg = from>>LIDX_SHIFT;
int ireg = ibeg < list->nidx ? list->idx[ibeg] : list->idx[ list->nidx - 1 ];
if ( ireg < 0 )
{
@@ -290,7 +290,7 @@ int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, reg_t *re
while ( *ss && isspace_c(*ss) ) ss++;
if ( !*ss ) return -1; // skip blank lines
if ( *ss=='#' ) return -1; // skip comments
-
+
char *se = ss;
while ( *se && !isspace_c(*se) ) se++;
if ( !*se ) { hts_log_error("Could not parse bed line: %s", line); return -2; }
@@ -305,7 +305,7 @@ int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, reg_t *re
ss = se+1;
reg->end = hts_parse_decimal(ss, &se, 0) - 1;
if ( ss==se ) { hts_log_error("Could not parse bed line: %s", line); return -2; }
-
+
return 0;
}
@@ -315,7 +315,7 @@ int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, reg_t *re
while ( *ss && isspace_c(*ss) ) ss++;
if ( !*ss ) return -1; // skip blank lines
if ( *ss=='#' ) return -1; // skip comments
-
+
char *se = ss;
while ( *se && !isspace_c(*se) ) se++;
if ( !*se ) { hts_log_error("Could not parse bed line: %s", line); return -2; }
@@ -336,7 +336,7 @@ int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, reg_t *re
if ( ss==se ) reg->end = reg->start;
else reg->end--;
}
-
+
return 0;
}
diff --git a/sam.c b/sam.c
index 5e9c20d..ac18ba8 100644
--- a/sam.c
+++ b/sam.c
@@ -386,9 +386,11 @@ int bam_read1(BGZF *fp, bam1_t *b)
if (ret == 0) return -1; // normal end-of-file
else return -2; // truncated
}
+ if (fp->is_be)
+ ed_swap_4p(&block_len);
+ if (block_len < 32) return -4; // block_len includes core data
if (bgzf_read(fp, x, 32) != 32) return -3;
if (fp->is_be) {
- ed_swap_4p(&block_len);
for (i = 0; i < 8; ++i) ed_swap_4p(x + i);
}
c->tid = x[0]; c->pos = x[1];
@@ -421,6 +423,15 @@ int bam_read1(BGZF *fp, bam1_t *b)
bgzf_read(fp, b->data + c->l_qname, b->l_data - c->l_qname) != b->l_data - c->l_qname)
return -4;
if (fp->is_be) swap_data(c, b->l_data, b->data, 0);
+
+ // Sanity check for broken CIGAR alignments
+ if (c->n_cigar > 0 && c->l_qseq > 0 && !(c->flag & BAM_FUNMAP)
+ && bam_cigar2qlen(c->n_cigar, bam_get_cigar(b)) != c->l_qseq) {
+ hts_log_error("CIGAR and query sequence lengths differ for %s",
+ bam_get_qname(b));
+ return -4;
+ }
+
return 4 + block_len;
}
@@ -565,7 +576,10 @@ static int cram_readrec(BGZF *ignored, void *fpv, void *bv, int *tid, int *beg,
{
htsFile *fp = fpv;
bam1_t *b = bv;
- return cram_get_bam_seq(fp->fp.cram, &b);
+ int ret = cram_get_bam_seq(fp->fp.cram, &b);
+ return ret >= 0
+ ? ret
+ : (cram_eof(fp->fp.cram) ? -1 : -2);
}
// This is used only with read_rest=1 iterators, so need not set tid/beg/end.
@@ -575,7 +589,12 @@ static int sam_bam_cram_readrec(BGZF *bgzfp, void *fpv, void *bv, int *tid, int
bam1_t *b = bv;
switch (fp->format.format) {
case bam: return bam_read1(bgzfp, b);
- case cram: return cram_get_bam_seq(fp->fp.cram, &b);
+ case cram: {
+ int ret = cram_get_bam_seq(fp->fp.cram, &b);
+ return ret >= 0
+ ? ret
+ : (cram_eof(fp->fp.cram) ? -1 : -2);
+ }
default:
// TODO Need headers available to implement this for SAM files
hts_log_error("Not implemented for SAM files");
@@ -1852,7 +1871,7 @@ static inline int resolve_cigar2(bam_pileup1_t *p, int32_t pos, cstate_t *s)
uint32_t *cigar = bam_get_cigar(b);
int k;
// determine the current CIGAR operation
-// fprintf(stderr, "%s\tpos=%d\tend=%d\t(%d,%d,%d)\n", bam_get_qname(b), pos, s->end, s->k, s->x, s->y);
+ //fprintf(stderr, "%s\tpos=%d\tend=%d\t(%d,%d,%d)\n", bam_get_qname(b), pos, s->end, s->k, s->x, s->y);
if (s->k == -1) { // never processed
if (c->n_cigar == 1) { // just one operation, save a loop
if (_cop(cigar[0]) == BAM_CMATCH || _cop(cigar[0]) == BAM_CEQUAL || _cop(cigar[0]) == BAM_CDIFF) s->k = 0, s->x = c->pos, s->y = 0;
diff --git a/synced_bcf_reader.c b/synced_bcf_reader.c
index 47715da..f5ba8e6 100644
--- a/synced_bcf_reader.c
+++ b/synced_bcf_reader.c
@@ -95,13 +95,13 @@ char *bcf_sr_strerror(int errnum)
int bcf_sr_set_opt(bcf_srs_t *readers, bcf_sr_opt_t opt, ...)
{
va_list args;
- switch (opt)
+ switch (opt)
{
case BCF_SR_REQUIRE_IDX:
readers->require_index = 1;
return 0;
- case BCF_SR_PAIR_LOGIC:
+ case BCF_SR_PAIR_LOGIC:
va_start(args, opt);
BCF_SR_AUX(readers)->sort.pair = va_arg(args, int);
return 0;
diff --git a/tabix.1 b/tabix.1
index ea5d5e5..b4990ac 100644
--- a/tabix.1
+++ b/tabix.1
@@ -1,4 +1,4 @@
-.TH tabix 1 "21 June 2017" "htslib-1.5" "Bioinformatics tools"
+.TH tabix 1 "28 September 2017" "htslib-1.6" "Bioinformatics tools"
.SH NAME
.PP
bgzip \- Block compression/decompression utility
diff --git a/tabix.c b/tabix.c
index 681f8a0..d802e11 100644
--- a/tabix.c
+++ b/tabix.c
@@ -29,6 +29,7 @@ DEALINGS IN THE SOFTWARE. */
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
+#include <strings.h>
#include <getopt.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -69,7 +70,6 @@ static void error(const char *format, ...)
int file_type(const char *fname)
{
int l = strlen(fname);
- int strcasecmp(const char *s1, const char *s2);
if (l>=7 && strcasecmp(fname+l-7, ".gff.gz") == 0) return IS_GFF;
else if (l>=7 && strcasecmp(fname+l-7, ".bed.gz") == 0) return IS_BED;
else if (l>=7 && strcasecmp(fname+l-7, ".sam.gz") == 0) return IS_SAM;
@@ -171,7 +171,7 @@ static int query_regions(args_t *args, char *fname, char **regs, int nregs)
hts_itr_t *itr = bcf_itr_querys(idx,hdr,regs[i]);
while ( bcf_itr_next(fp, itr, rec) >=0 )
{
- if ( reg_idx && !regidx_overlap(reg_idx, bcf_seqname(hdr,rec),rec->pos,rec->pos+rec->rlen-1, NULL) ) continue;
+ if ( reg_idx && !regidx_overlap(reg_idx, bcf_seqname(hdr,rec),rec->pos,rec->pos+rec->rlen-1, NULL) ) continue;
bcf_write(out,hdr,rec);
}
tbx_itr_destroy(itr);
diff --git a/test/compare_sam.pl b/test/compare_sam.pl
index a241c6b..6860c91 100755
--- a/test/compare_sam.pl
+++ b/test/compare_sam.pl
@@ -64,6 +64,8 @@ while (<$fd2>) {
# Compare lines
while ($ln1 && $ln2) {
+ $ln1 =~ s/\015?\012/\n/;
+ $ln2 =~ s/\015?\012/\n/;
chomp($ln1);
chomp($ln2);
diff --git a/test/hfile.c b/test/hfile.c
index 16ad670..577b817 100644
--- a/test/hfile.c
+++ b/test/hfile.c
@@ -62,8 +62,8 @@ char *slurp(const char *filename)
char *text;
struct stat sbuf;
size_t filesize;
- FILE *f = fopen(filename, "r");
- if (f == NULL) fail("fopen(\"%s\", \"r\")", filename);
+ FILE *f = fopen(filename, "rb");
+ if (f == NULL) fail("fopen(\"%s\", \"rb\")", filename);
if (fstat(fileno(f), &sbuf) != 0) fail("fstat(\"%s\")", filename);
filesize = sbuf.st_size;
diff --git a/test/sam.c b/test/sam.c
index 736281d..dd83336 100644
--- a/test/sam.c
+++ b/test/sam.c
@@ -43,7 +43,7 @@ DEALINGS IN THE SOFTWARE. */
int status;
-static void HTS_FORMAT(printf, 1, 2) fail(const char *fmt, ...)
+static void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) fail(const char *fmt, ...)
{
va_list args;
diff --git a/test/tabix/test-tabix.sh b/test/tabix/test-tabix.sh
index 358a3dd..4a4b045 100755
--- a/test/tabix/test-tabix.sh
+++ b/test/tabix/test-tabix.sh
@@ -57,11 +57,12 @@ run_test() {
y="exit_code"
elif [ "x$e" != "x" -a "$e" != "." ]
then
- if cmp -s _out.tmp "$e"
+ sed -n 's/.*/&/p' _out.tmp > _out.tmp2
+ if cmp -s _out.tmp2 "$e"
then
# Output was as expected
r="P"
- rm -f _out.tmp _err.tmp
+ rm -f _out.tmp _out.tmp2 _err.tmp
else
# Output differed
r="F"
@@ -70,7 +71,7 @@ run_test() {
else
# Expected zero exit code and got it.
r="P"
- rm -f _out.tmp _err.tmp
+ rm -f _out.tmp _out.tmp2 _err.tmp
fi
if [ "$r" = "F" ]
diff --git a/test/test-bcf-sr.c b/test/test-bcf-sr.c
index a2e6539..ebe9390 100644
--- a/test/test-bcf-sr.c
+++ b/test/test-bcf-sr.c
@@ -1,4 +1,4 @@
-/*
+/*
Copyright (C) 2017 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -9,10 +9,10 @@
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
-
+
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
-
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -61,11 +61,11 @@ int main(int argc, char *argv[])
};
int c, pair = 0;
- while ((c = getopt_long(argc, argv, "p:h", loptions, NULL)) >= 0)
+ while ((c = getopt_long(argc, argv, "p:h", loptions, NULL)) >= 0)
{
switch (c)
{
- case 'p':
+ case 'p':
if ( !strcmp(optarg,"snps") ) pair |= BCF_SR_PAIR_SNPS;
else if ( !strcmp(optarg,"snp+ref") ) pair |= BCF_SR_PAIR_SNPS|BCF_SR_PAIR_SNP_REF;
else if ( !strcmp(optarg,"snps+ref") ) pair |= BCF_SR_PAIR_SNPS|BCF_SR_PAIR_SNP_REF;
@@ -120,7 +120,7 @@ int main(int argc, char *argv[])
str.l = 0;
bcf1_t *rec = bcf_sr_get_line(sr, i);
kputs(rec->n_allele > 1 ? rec->d.allele[1] : ".", &str);
- for (j=2; j<rec->n_allele; j++)
+ for (j=2; j<rec->n_allele; j++)
{
kputc(',', &str);
kputs(rec->d.allele[j], &str);
diff --git a/test/test-bcf-sr.pl b/test/test-bcf-sr.pl
index 8f78acb..3102cdd 100755
--- a/test/test-bcf-sr.pl
+++ b/test/test-bcf-sr.pl
@@ -156,6 +156,7 @@ sub check_outputs
{
my ($pos, at vals) = split(/\t/,$line);
chomp($vals[-1]);
+ $vals[-1] =~ s/\r$//;
push @{$out{$pos}},join("\t", at vals);
}
close($fh) or error("close failed: $fname_bin");
@@ -173,6 +174,7 @@ sub check_outputs
while (my $line=<$fh>)
{
chomp($line);
+ $line =~ s/\r$//;
push @plines,$line;
}
close($fh) or error("close failed: $fname_perl");
@@ -181,8 +183,8 @@ sub check_outputs
@plines = sort @plines;
for (my $i=0; $i<@plines; $i++)
{
- if ( $blines[$i] ne $plines[$i] )
- {
+ if ( $blines[$i] ne $plines[$i] )
+ {
#error("Different lines in $fname_bin vs $fname_perl:\n\t$blines[$i].\nvs\n\t$plines[$i].\n");
error("Different lines in $fname_bin vs $fname_perl:\n\t".join("\n\t", at blines)."\nvs\n\t".join("\n\t", at plines)."\n");
}
diff --git a/test/test-bcf-translate.c b/test/test-bcf-translate.c
new file mode 100644
index 0000000..f799c34
--- /dev/null
+++ b/test/test-bcf-translate.c
@@ -0,0 +1,192 @@
+/* test/test-bcf-translate.c
+
+ Copyright (C) 2017 Genome Research Ltd.
+
+ Author: Petr Danecek <pd3 at sanger.ac.uk>
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+#include <config.h>
+#include <stdio.h>
+#include <htslib/vcf.h>
+
+int main(int argc, char **argv)
+{
+ char *fname = argc>1 ? argv[1] : "/dev/null";
+ htsFile *fp = hts_open(fname, "w");
+ bcf_hdr_t *hdr1, *hdr2;
+
+ hdr1 = bcf_hdr_init("w");
+ hdr2 = bcf_hdr_init("w");
+
+ // Add two shared and two private annotations
+ bcf_hdr_append(hdr1, "##contig=<ID=1>");
+ bcf_hdr_append(hdr1, "##contig=<ID=2>");
+ bcf_hdr_append(hdr2, "##contig=<ID=2>");
+ bcf_hdr_append(hdr2, "##contig=<ID=1>");
+ bcf_hdr_append(hdr1, "##FILTER=<ID=FLT1,Description=\"Filter 1\">");
+ bcf_hdr_append(hdr1, "##FILTER=<ID=FLT2,Description=\"Filter 2\">");
+ bcf_hdr_append(hdr1, "##FILTER=<ID=FLT3,Description=\"Filter 3\">");
+ bcf_hdr_append(hdr2, "##FILTER=<ID=FLT4,Description=\"Filter 4\">");
+ bcf_hdr_append(hdr2, "##FILTER=<ID=FLT3,Description=\"Filter 3\">");
+ bcf_hdr_append(hdr2, "##FILTER=<ID=FLT2,Description=\"Filter 2\">");
+ bcf_hdr_append(hdr1, "##INFO=<ID=INF1,Number=.,Type=Integer,Description=\"Info 1\">");
+ bcf_hdr_append(hdr1, "##INFO=<ID=INF2,Number=.,Type=Integer,Description=\"Info 2\">");
+ bcf_hdr_append(hdr1, "##INFO=<ID=INF3,Number=.,Type=Integer,Description=\"Info 3\">");
+ bcf_hdr_append(hdr2, "##INFO=<ID=INF4,Number=.,Type=Integer,Description=\"Info 4\">");
+ bcf_hdr_append(hdr2, "##INFO=<ID=INF3,Number=.,Type=Integer,Description=\"Info 3\">");
+ bcf_hdr_append(hdr2, "##INFO=<ID=INF2,Number=.,Type=Integer,Description=\"Info 2\">");
+ bcf_hdr_append(hdr1, "##FORMAT=<ID=FMT1,Number=.,Type=Integer,Description=\"FMT 1\">");
+ bcf_hdr_append(hdr1, "##FORMAT=<ID=FMT2,Number=.,Type=Integer,Description=\"FMT 2\">");
+ bcf_hdr_append(hdr1, "##FORMAT=<ID=FMT3,Number=.,Type=Integer,Description=\"FMT 3\">");
+ bcf_hdr_append(hdr2, "##FORMAT=<ID=FMT4,Number=.,Type=Integer,Description=\"FMT 4\">");
+ bcf_hdr_append(hdr2, "##FORMAT=<ID=FMT3,Number=.,Type=Integer,Description=\"FMT 3\">");
+ bcf_hdr_append(hdr2, "##FORMAT=<ID=FMT2,Number=.,Type=Integer,Description=\"FMT 2\">");
+ bcf_hdr_add_sample(hdr1,"SMPL1");
+ bcf_hdr_add_sample(hdr1,"SMPL2");
+ bcf_hdr_add_sample(hdr2,"SMPL1");
+ bcf_hdr_add_sample(hdr2,"SMPL2");
+ bcf_hdr_sync(hdr1);
+ bcf_hdr_sync(hdr2);
+
+ hdr2 = bcf_hdr_merge(hdr2,hdr1);
+ bcf_hdr_sync(hdr2);
+ bcf_hdr_write(fp, hdr2);
+
+ bcf1_t *rec = bcf_init1();
+ rec->rid = bcf_hdr_name2id(hdr1, "1");
+ rec->pos = 0;
+ bcf_update_alleles_str(hdr1, rec, "G,A");
+ int32_t tmpi[3];
+ tmpi[0] = bcf_hdr_id2int(hdr1, BCF_DT_ID, "FLT1");
+ tmpi[1] = bcf_hdr_id2int(hdr1, BCF_DT_ID, "FLT2");
+ tmpi[2] = bcf_hdr_id2int(hdr1, BCF_DT_ID, "FLT3");
+ bcf_update_filter(hdr1, rec, tmpi, 3);
+ tmpi[0] = 1; bcf_update_info_int32(hdr1, rec, "INF1", tmpi, 1);
+ tmpi[0] = 2; bcf_update_info_int32(hdr1, rec, "INF2", tmpi, 1);
+ tmpi[0] = 3; bcf_update_info_int32(hdr1, rec, "INF3", tmpi, 1);
+ tmpi[0] = tmpi[1] = 1; bcf_update_format_int32(hdr1, rec, "FMT1", tmpi, 2);
+ tmpi[0] = tmpi[1] = 2; bcf_update_format_int32(hdr1, rec, "FMT2", tmpi, 2);
+ tmpi[0] = tmpi[1] = 3; bcf_update_format_int32(hdr1, rec, "FMT3", tmpi, 2);
+
+ bcf_remove_filter(hdr1, rec, bcf_hdr_id2int(hdr1, BCF_DT_ID, "FLT2"), 0);
+ bcf_update_info_int32(hdr1, rec, "INF2", NULL, 0);
+ bcf_update_format_int32(hdr1, rec, "FMT2", NULL, 0);
+
+ bcf_translate(hdr2, hdr1, rec);
+ bcf_write(fp, hdr2, rec);
+
+ // Clean
+ bcf_destroy1(rec);
+ bcf_hdr_destroy(hdr1);
+ bcf_hdr_destroy(hdr2);
+ int ret;
+ if ( (ret=hts_close(fp)) )
+ {
+ fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
+ exit(ret);
+ }
+ return 0;
+}
+
+
+
+ // // Create VCF header
+ // kstring_t str = {0,0,0};
+ // bcf_hdr_add_sample(hdr, "NA00003");
+ // bcf_hdr_add_sample(hdr, NULL); // to update internal structures
+ // bcf_hdr_write(fp, hdr);
+ // // Add a record
+ // // 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
+ // // .. CHROM
+ // rec->rid = bcf_hdr_name2id(hdr, "20");
+ // // .. POS
+ // rec->pos = 14369;
+ // // .. ID
+ // bcf_update_id(hdr, rec, "rs6054257");
+ // // .. REF and ALT
+ // bcf_update_alleles_str(hdr, rec, "G,A");
+ // // .. QUAL
+ // rec->qual = 29;
+ // // .. FILTER
+ // int32_t tmpi = bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS");
+ // bcf_update_filter(hdr, rec, &tmpi, 1);
+ // // .. INFO
+ // tmpi = 3;
+ // bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1);
+ // tmpi = 14;
+ // bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1);
+ // float tmpf = 0.5;
+ // bcf_update_info_float(hdr, rec, "AF", &tmpf, 1);
+ // bcf_update_info_flag(hdr, rec, "DB", NULL, 1);
+ // bcf_update_info_flag(hdr, rec, "H2", NULL, 1);
+ // // .. FORMAT
+ // int32_t *tmpia = (int*)malloc(bcf_hdr_nsamples(hdr)*2*sizeof(int));
+ // tmpia[0] = bcf_gt_phased(0);
+ // tmpia[1] = bcf_gt_phased(0);
+ // tmpia[2] = bcf_gt_phased(1);
+ // tmpia[3] = bcf_gt_phased(0);
+ // tmpia[4] = bcf_gt_unphased(1);
+ // tmpia[5] = bcf_gt_unphased(1);
+ // bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2);
+ // tmpia[0] = 48;
+ // tmpia[1] = 48;
+ // tmpia[2] = 43;
+ // bcf_update_format_int32(hdr, rec, "GQ", tmpia, bcf_hdr_nsamples(hdr));
+ // tmpia[0] = 1;
+ // tmpia[1] = 8;
+ // tmpia[2] = 5;
+ // bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr));
+ // tmpia[0] = 51;
+ // tmpia[1] = 51;
+ // tmpia[2] = 51;
+ // tmpia[3] = 51;
+ // tmpia[4] = bcf_int32_missing;
+ // tmpia[5] = bcf_int32_missing;
+ // bcf_update_format_int32(hdr, rec, "HQ", tmpia, bcf_hdr_nsamples(hdr)*2);
+ // char *tmp_str[] = {"String1","SomeOtherString2","YetAnotherString3"};
+ // bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3);
+ // bcf_write1(fp, hdr, rec);
+ // // 20 1110696 . A G,T 67 . NS=2;DP=10;AF=0.333,.;AA=T;DB GT 2 1 ./.
+ // bcf_clear1(rec);
+ // rec->rid = bcf_hdr_name2id(hdr, "20");
+ // rec->pos = 1110695;
+ // bcf_update_alleles_str(hdr, rec, "A,G,T");
+ // rec->qual = 67;
+ // tmpi = 2;
+ // bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1);
+ // tmpi = 10;
+ // bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1);
+ // float *tmpfa = (float*)malloc(2*sizeof(float));
+ // tmpfa[0] = 0.333;
+ // bcf_float_set_missing(tmpfa[1]);
+ // bcf_update_info_float(hdr, rec, "AF", tmpfa, 2);
+ // bcf_update_info_string(hdr, rec, "AA", "T");
+ // bcf_update_info_flag(hdr, rec, "DB", NULL, 1);
+ // tmpia[0] = bcf_gt_phased(2);
+ // tmpia[1] = bcf_int32_vector_end;
+ // tmpia[2] = bcf_gt_phased(1);
+ // tmpia[3] = bcf_int32_vector_end;
+ // tmpia[4] = bcf_gt_missing;
+ // tmpia[5] = bcf_gt_missing;
+ // bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2);
+ // bcf_write1(fp, hdr, rec);
+ // free(tmpia);
+ // free(tmpfa);
diff --git a/test/test-bcf-translate.out b/test/test-bcf-translate.out
new file mode 100644
index 0000000..e021f43
--- /dev/null
+++ b/test/test-bcf-translate.out
@@ -0,0 +1,18 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=2>
+##contig=<ID=1>
+##FILTER=<ID=FLT4,Description="Filter 4">
+##FILTER=<ID=FLT3,Description="Filter 3">
+##FILTER=<ID=FLT2,Description="Filter 2">
+##INFO=<ID=INF4,Number=.,Type=Integer,Description="Info 4">
+##INFO=<ID=INF3,Number=.,Type=Integer,Description="Info 3">
+##INFO=<ID=INF2,Number=.,Type=Integer,Description="Info 2">
+##FORMAT=<ID=FMT4,Number=.,Type=Integer,Description="FMT 4">
+##FORMAT=<ID=FMT3,Number=.,Type=Integer,Description="FMT 3">
+##FORMAT=<ID=FMT2,Number=.,Type=Integer,Description="FMT 2">
+##FILTER=<ID=FLT1,Description="Filter 1">
+##INFO=<ID=INF1,Number=.,Type=Integer,Description="Info 1">
+##FORMAT=<ID=FMT1,Number=.,Type=Integer,Description="FMT 1">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SMPL1 SMPL2
+1 1 . G A 0 FLT1;FLT3 INF1=1;INF3=3 FMT1:FMT3 1:3 1:3
diff --git a/test/test-regidx.c b/test/test-regidx.c
index 0aabc2d..30844a6 100644
--- a/test/test-regidx.c
+++ b/test/test-regidx.c
@@ -10,10 +10,10 @@
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
-
+
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
-
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -89,7 +89,7 @@ int main(int argc, char **argv)
// Finish initialization
regidx_insert(idx,NULL);
- // Test
+ // Test
regitr_t itr;
int from, to;
@@ -111,7 +111,7 @@ int main(int argc, char **argv)
// Clean up
regidx_destroy(idx);
-
+
return 0;
}
diff --git a/test/test.pl b/test/test.pl
index 3ce6e67..776e125 100755
--- a/test/test.pl
+++ b/test/test.pl
@@ -40,6 +40,7 @@ test_vcf_api($opts,out=>'test-vcf-api.out');
test_vcf_sweep($opts,out=>'test-vcf-sweep.out');
test_vcf_various($opts);
test_bcf_sr_sort($opts);
+test_command($opts,cmd=>'test-bcf-translate -',out=>'test-bcf-translate.out');
test_convert_padded_header($opts);
test_rebgzip($opts);
test_logging($opts);
@@ -64,10 +65,28 @@ sub error
"Options:\n",
" -r, --redo-outputs Recreate expected output files.\n",
" -t, --temp-dir <path> When given, temporary files will not be removed.\n",
+ " -f, --fail-fast Fail-fast mode: exit as soon as a test fails.\n",
" -h, -?, --help This help message.\n",
"\n";
exit 1;
}
+
+sub cygpath {
+ my ($path) = @_;
+ $path = `cygpath -m $path`;
+ $path =~ s/\r?\n//;
+ return $path
+}
+
+sub safe_tempdir
+{
+ my $dir = tempdir(CLEANUP=>1);
+ if ($^O =~ /^msys/) {
+ $dir = cygpath($dir);
+ }
+ return $dir;
+}
+
sub parse_params
{
my $opts = { keep_files=>0, nok=>0, nfailed=>0 };
@@ -76,14 +95,20 @@ sub parse_params
my $ret = GetOptions (
't|temp-dir:s' => \$$opts{keep_files},
'r|redo-outputs' => \$$opts{redo_outputs},
+ 'f|fail-fast' => \$$opts{fail_fast},
'h|?|help' => \$help
);
if ( !$ret or $help ) { error(); }
- $$opts{tmp} = $$opts{keep_files} ? $$opts{keep_files} : tempdir(CLEANUP=>1);
+ $$opts{tmp} = $$opts{keep_files} ? $$opts{keep_files} : safe_tempdir();
if ( $$opts{keep_files} ) { cmd("mkdir -p $$opts{keep_files}"); }
$$opts{path} = $FindBin::RealBin;
$$opts{bin} = $FindBin::RealBin;
$$opts{bin} =~ s{/test/?$}{};
+ if ($^O =~ /^msys/) {
+ $$opts{path} = cygpath($$opts{path});
+ $$opts{bin} = cygpath($$opts{bin});
+ }
+
return $opts;
}
sub _cmd
@@ -149,11 +174,13 @@ sub test_cmd
{
my @exp = <$fh>;
$exp = join('', at exp);
+ $exp =~ s/\015?\012/\n/g;
close($fh);
}
elsif ( !$$opts{redo_outputs} ) { failed($opts,$test,"$$opts{path}/$args{out}: $!"); return; }
- if ( $exp ne $out )
+ (my $out_lf = $out) =~ s/\015?\012/\n/g;
+ if ( $exp ne $out_lf )
{
open(my $fh,'>',"$$opts{path}/$args{out}.new") or error("$$opts{path}/$args{out}.new");
print $fh $out;
@@ -181,6 +208,9 @@ sub failed
if ( defined $reason ) { print STDERR "\t$reason\n"; }
print STDERR ".. failed ...\n\n";
STDERR->flush();
+ if ($$opts{fail_fast}) {
+ die "\n";
+ }
}
sub passed
{
@@ -202,7 +232,7 @@ sub is_file_newer
my $test_view_failures;
sub testv {
- my ($cmd) = @_;
+ my ($opts, $cmd) = @_;
print " $cmd\n";
my ($ret, $out) = _cmd($cmd);
if ($ret != 0) {
@@ -210,6 +240,9 @@ sub testv {
print STDERR "FAILED\n$out\n";
STDERR->flush();
$test_view_failures++;
+ if ($$opts{fail_fast}) {
+ die "\n";
+ }
}
}
@@ -234,50 +267,50 @@ sub test_view
$test_view_failures = 0;
# SAM -> BAM -> SAM
- testv "./test_view $tv_args -S -b $sam > $bam";
- testv "./test_view $tv_args $bam > $bam.sam_";
- testv "./compare_sam.pl $sam $bam.sam_";
+ testv $opts, "./test_view $tv_args -S -b $sam > $bam";
+ testv $opts, "./test_view $tv_args $bam > $bam.sam_";
+ testv $opts, "./compare_sam.pl $sam $bam.sam_";
# SAM -> CRAM -> SAM
- testv "./test_view $tv_args -t $ref -S -C $sam > $cram";
- testv "./test_view $tv_args -D $cram > $cram.sam_";
- testv "./compare_sam.pl $md $sam $cram.sam_";
+ testv $opts, "./test_view $tv_args -t $ref -S -C $sam > $cram";
+ testv $opts, "./test_view $tv_args -D $cram > $cram.sam_";
+ testv $opts, "./compare_sam.pl $md $sam $cram.sam_";
# BAM -> CRAM -> BAM -> SAM
$cram = "$bam.cram";
- testv "./test_view $tv_args -t $ref -C $bam > $cram";
- testv "./test_view $tv_args -b -D $cram > $cram.bam";
- testv "./test_view $tv_args $cram.bam > $cram.bam.sam_";
- testv "./compare_sam.pl $md $sam $cram.bam.sam_";
+ testv $opts, "./test_view $tv_args -t $ref -C $bam > $cram";
+ testv $opts, "./test_view $tv_args -b -D $cram > $cram.bam";
+ testv $opts, "./test_view $tv_args $cram.bam > $cram.bam.sam_";
+ testv $opts, "./compare_sam.pl $md $sam $cram.bam.sam_";
# SAM -> CRAM3 -> SAM
$cram = "$base.tmp.cram";
- testv "./test_view $tv_args -t $ref -S -C -o VERSION=3.0 $sam > $cram";
- testv "./test_view $tv_args -D $cram > $cram.sam_";
- testv "./compare_sam.pl $md $sam $cram.sam_";
+ testv $opts, "./test_view $tv_args -t $ref -S -C -o VERSION=3.0 $sam > $cram";
+ testv $opts, "./test_view $tv_args -D $cram > $cram.sam_";
+ testv $opts, "./compare_sam.pl $md $sam $cram.sam_";
# BAM -> CRAM3 -> BAM -> SAM
$cram = "$bam.cram";
- testv "./test_view $tv_args -t $ref -C -o VERSION=3.0 $bam > $cram";
- testv "./test_view $tv_args -b -D $cram > $cram.bam";
- testv "./test_view $tv_args $cram.bam > $cram.bam.sam_";
- testv "./compare_sam.pl $md $sam $cram.bam.sam_";
+ testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=3.0 $bam > $cram";
+ testv $opts, "./test_view $tv_args -b -D $cram > $cram.bam";
+ testv $opts, "./test_view $tv_args $cram.bam > $cram.bam.sam_";
+ testv $opts, "./compare_sam.pl $md $sam $cram.bam.sam_";
# CRAM3 -> CRAM2
$cram = "$base.tmp.cram";
- testv "./test_view $tv_args -t $ref -C -o VERSION=2.1 $cram > $cram.cram";
+ testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=2.1 $cram > $cram.cram";
# CRAM2 -> CRAM3
- testv "./test_view $tv_args -t $ref -C -o VERSION=3.0 $cram.cram > $cram";
- testv "./test_view $tv_args $cram > $cram.sam_";
- testv "./compare_sam.pl $md $sam $cram.sam_";
+ testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=3.0 $cram.cram > $cram";
+ testv $opts, "./test_view $tv_args $cram > $cram.sam_";
+ testv $opts, "./compare_sam.pl $md $sam $cram.sam_";
# Java pre-made CRAM -> SAM
my $jcram = "${base}_java.cram";
if (-e $jcram) {
my $jsam = "${base}_java.tmp.sam_";
- testv "./test_view $tv_args -i reference=$ref $jcram > $jsam";
- testv "./compare_sam.pl -Baux $md $sam $jsam";
+ testv $opts, "./test_view $tv_args -i reference=$ref $jcram > $jsam";
+ testv $opts, "./compare_sam.pl -Baux $md $sam $jsam";
}
if ($test_view_failures == 0)
@@ -359,6 +392,13 @@ sub test_bcf_sr_sort
}
}
+sub test_command
+{
+ my ($opts, %args) = @_;
+ my $cmd = "$$opts{path}/$args{cmd}";
+ test_cmd($opts, %args, cmd=>$cmd);
+}
+
sub test_logging
{
my ($opts) = @_;
diff --git a/test/test_bgzf.c b/test/test_bgzf.c
index e34b22f..52c348f 100644
--- a/test/test_bgzf.c
+++ b/test/test_bgzf.c
@@ -23,6 +23,8 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
+#include <config.h>
+
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@@ -32,6 +34,7 @@ DEALINGS IN THE SOFTWARE.
#include <fcntl.h>
#include "htslib/bgzf.h"
#include "htslib/hfile.h"
+#include "hfile_internal.h"
const char *bgzf_suffix = ".gz";
const char *idx_suffix = ".gzi";
@@ -113,17 +116,7 @@ static BGZF * try_bgzf_open(const char *name, const char *mode,
static BGZF * try_bgzf_dopen(const char *name, const char *mode,
const char *func) {
BGZF *bgz = NULL;
- int fd = -1;
- if (strchr(mode, 'r')) {
- fd = open(name, O_RDONLY);
- } else if (strchr(mode, 'w')) {
- fd = open(name, O_WRONLY | O_CREAT | O_TRUNC, 0666);
- } else if (strchr(mode, 'a')) {
- fd = open(name, O_WRONLY | O_CREAT | O_APPEND, 0666);
- } else {
- errno = EINVAL;
- }
-
+ int fd = open(name, hfile_oflags(mode), 0666);
if (fd < 0) {
fprintf(stderr, "%s : Failed to open %s with mode %s : %s\n",
func, name, mode, strerror(errno));
@@ -309,7 +302,7 @@ static int setup(const char *src, Files *f) {
const unsigned int max = 50000;
unsigned int i;
size_t text_sz = max * 8 + 1;
-
+
mem = calloc(5, len);
if (mem == NULL) {
perror(__func__);
@@ -684,7 +677,7 @@ static int test_bgzf_getline(Files *f, const char *mode, int nthreads) {
bg_put = try_bgzf_write(bgz, f->text, f->ltext, f->tmp_bgzf, __func__);
if (bg_put < 0) goto fail;
-
+
if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__) != 0) goto fail;
bgz = try_bgzf_open(f->tmp_bgzf, "r", __func__);
diff --git a/test/test_view.c b/test/test_view.c
index 7f173fd..9d2678a 100644
--- a/test/test_view.c
+++ b/test/test_view.c
@@ -35,6 +35,13 @@ DEALINGS IN THE SOFTWARE. */
#include "htslib/sam.h"
+enum test_op {
+ READ_COMPRESSED = 1,
+ WRITE_COMPRESSED = 2,
+ READ_CRAM = 4,
+ WRITE_CRAM = 8
+};
+
int main(int argc, char *argv[])
{
samFile *in;
@@ -52,30 +59,46 @@ int main(int argc, char *argv[])
int benchmark = 0;
int nthreads = 0; // shared pool
- while ((c = getopt(argc, argv, "IbDCSl:t:i:o:N:BZ:@:")) >= 0) {
+ while ((c = getopt(argc, argv, "DSIt:i:bCl:o:N:BZ:@:")) >= 0) {
switch (c) {
- case 'S': flag |= 1; break;
- case 'b': flag |= 2; break;
- case 'D': flag |= 4; break;
- case 'C': flag |= 8; break;
- case 'B': benchmark = 1; break;
- case 'l': clevel = atoi(optarg); flag |= 2; break;
- case 't': fn_ref = optarg; break;
+ case 'D': flag |= READ_CRAM; break;
+ case 'S': flag |= READ_COMPRESSED; break;
case 'I': ignore_sam_err = 1; break;
- case 'i': if (hts_opt_add(&in_opts, optarg)) return 1; break;
+ case 't': fn_ref = optarg; break;
+ case 'i': if (hts_opt_add(&in_opts, optarg)) return 1; break;
+ case 'b': flag |= WRITE_COMPRESSED; break;
+ case 'C': flag |= WRITE_CRAM; break;
+ case 'l': clevel = atoi(optarg); flag |= WRITE_COMPRESSED; break;
case 'o': if (hts_opt_add(&out_opts, optarg)) return 1; break;
case 'N': nreads = atoi(optarg); break;
+ case 'B': benchmark = 1; break;
case 'Z': extra_hdr_nuls = atoi(optarg); break;
case '@': nthreads = atoi(optarg); break;
}
}
if (argc == optind) {
- fprintf(stderr, "Usage: samview [-bSCSIB] [-N num_reads] [-l level] [-o option=value] [-Z hdr_nuls] <in.bam>|<in.sam>|<in.cram> [region]\n");
+ fprintf(stderr, "Usage: test_view [-DSI] [-t fn_ref] [-i option=value] [-bC] [-l level] [-o option=value] [-N num_reads] [-B] [-Z hdr_nuls] [-@ num_threads] <in.bam>|<in.sam>|<in.cram> [region]\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "-D: read CRAM format (mode 'c')\n");
+ fprintf(stderr, "-S: read compressed BCF, BAM, FAI (mode 'b')\n");
+ fprintf(stderr, "-I: ignore SAM parsing errors\n");
+ fprintf(stderr, "-t: fn_ref: load CRAM references from the specificed fasta file instead of @SQ headers when writing a CRAM file\n");
+ fprintf(stderr, "-i: option=value: set an option for CRAM input\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "-b: write compressed BCF, BAM, FAI (mode 'b')\n");
+ fprintf(stderr, "-C: write CRAM format (mode 'c')\n");
+ fprintf(stderr, "-l 0-9: set zlib compression level\n");
+ fprintf(stderr, "-o option=value: set an option for CRAM output\n");
+ fprintf(stderr, "-N: num_reads: limit the output to the first num_reads reads\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "-B: enable benchmarking\n");
+ fprintf(stderr, "-Z hdr_nuls: append specified number of null bytes to the SAM header\n");
+ fprintf(stderr, "-@ num_threads: use thread pool with specified number of threads\n");
return 1;
}
strcpy(moder, "r");
- if (flag&4) strcat(moder, "c");
- else if ((flag&1) == 0) strcat(moder, "b");
+ if (flag & READ_CRAM) strcat(moder, "c");
+ else if ((flag & READ_COMPRESSED) == 0) strcat(moder, "b");
in = sam_open(argv[optind], moder);
if (in == NULL) {
@@ -103,8 +126,8 @@ int main(int argc, char *argv[])
strcpy(modew, "w");
if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
- if (flag&8) strcat(modew, "c");
- else if (flag&2) strcat(modew, "b");
+ if (flag & WRITE_CRAM) strcat(modew, "c");
+ else if (flag & WRITE_COMPRESSED) strcat(modew, "b");
out = hts_open("-", modew);
if (out == NULL) {
fprintf(stderr, "Error opening standard output\n");
@@ -112,7 +135,7 @@ int main(int argc, char *argv[])
}
/* CRAM output */
- if (flag & 8) {
+ if (flag & WRITE_CRAM) {
int ret;
// Parse input header and use for CRAM output
@@ -155,7 +178,7 @@ int main(int argc, char *argv[])
fprintf(stderr, "Error writing output header.\n");
exit_code = 1;
}
- if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region
+ if (optind + 1 < argc && !(flag & READ_COMPRESSED)) { // BAM input and has a region
int i;
hts_idx_t *idx;
if ((idx = sam_index_load(in, argv[optind])) == 0) {
diff --git a/vcf.c b/vcf.c
index 6a7f16a..604344f 100644
--- a/vcf.c
+++ b/vcf.c
@@ -517,7 +517,7 @@ int bcf_hdr_register_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec)
vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_ID];
k = kh_get(vdict, d, str);
- if ( k != kh_end(d) )
+ if ( k != kh_end(d) )
{
// already present
free(str);
@@ -1345,9 +1345,9 @@ static int bcf1_sync(bcf1_t *line)
else if ( line->d.shared_dirty )
{
// The line was edited, update the BCF data block.
-
+
if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line,BCF_UN_STR);
-
+
// ptr_ori points to the original unchanged BCF data.
uint8_t *ptr_ori = (uint8_t *) line->shared.s;
@@ -1962,14 +1962,14 @@ static int vcf_parse_format(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, char *p
if (fmt[j].max_l < l) fmt[j].max_l = l;
if (fmt[j].is_gt && fmt[j].max_g < g) fmt[j].max_g = g;
l = 0, m = g = 1;
- if ( *r==':' )
+ if ( *r==':' )
{
j++;
- if ( j>=v->n_fmt )
+ if ( j>=v->n_fmt )
{
hts_log_error("Incorrect number of FORMAT fields at %s:%d",
h->id[BCF_DT_CTG][v->rid].key, v->pos+1);
- exit(1);
+ exit(1);
}
}
else break;
@@ -2161,6 +2161,9 @@ int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v)
int32_t *flt_a = NULL, *val_a = NULL;
int ret = -1;
+ if (!s || !h || !v || !(s->s))
+ return ret;
+
// Assumed in lots of places, but we may as well spot this early
assert(sizeof(float) == sizeof(int32_t));
@@ -2528,9 +2531,9 @@ int vcf_format(const bcf_hdr_t *h, const bcf1_t *v, kstring_t *s)
kputs(h->id[BCF_DT_ID][z->key].key, s);
if (z->len <= 0) continue;
kputc('=', s);
- if (z->len == 1)
+ if (z->len == 1)
{
- switch (z->type)
+ switch (z->type)
{
case BCF_BT_INT8: if ( z->v1.i==bcf_int8_missing ) kputc('.', s); else kputw(z->v1.i, s); break;
case BCF_BT_INT16: if ( z->v1.i==bcf_int16_missing ) kputc('.', s); else kputw(z->v1.i, s); break;
@@ -2929,11 +2932,12 @@ int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *line)
int src_id = line->d.info[i].key;
int dst_id = src_hdr->transl[BCF_DT_ID][src_id];
if ( dst_id<0 ) continue;
+ line->d.info[i].key = dst_id;
+ if ( !line->d.info[i].vptr ) continue; // skip deleted
int src_size = src_id>>7 ? ( src_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8;
int dst_size = dst_id>>7 ? ( dst_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8;
if ( src_size==dst_size ) // can overwrite
{
- line->d.info[i].key = dst_id;
uint8_t *vptr = line->d.info[i].vptr - line->d.info[i].vptr_off;
if ( dst_size==BCF_BT_INT8 ) { vptr[1] = (uint8_t)dst_id; }
else if ( dst_size==BCF_BT_INT16 ) { *(uint16_t*)vptr = (uint16_t)dst_id; }
@@ -2942,15 +2946,15 @@ int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *line)
else // must realloc
{
bcf_info_t *info = &line->d.info[i];
- assert( !info->vptr_free );
kstring_t str = {0,0,0};
bcf_enc_int1(&str, dst_id);
bcf_enc_size(&str, info->len,info->type);
- info->vptr_off = str.l;
+ uint32_t vptr_off = str.l;
kputsn((char*)info->vptr, info->vptr_len, &str);
+ if( info->vptr_free ) free(info->vptr - info->vptr_off);
+ info->vptr_off = vptr_off;
info->vptr = (uint8_t*)str.s + info->vptr_off;
info->vptr_free = 1;
- info->key = dst_id;
line->d.shared_dirty |= BCF1_DIRTY_INF;
}
}
@@ -2961,11 +2965,12 @@ int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *line)
int src_id = line->d.fmt[i].id;
int dst_id = src_hdr->transl[BCF_DT_ID][src_id];
if ( dst_id<0 ) continue;
+ line->d.fmt[i].id = dst_id;
+ if( !line->d.fmt[i].p ) continue; // skip deleted
int src_size = src_id>>7 ? ( src_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8;
int dst_size = dst_id>>7 ? ( dst_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8;
if ( src_size==dst_size ) // can overwrite
{
- line->d.fmt[i].id = dst_id;
uint8_t *p = line->d.fmt[i].p - line->d.fmt[i].p_off; // pointer to the vector size (4bits) and BT type (4bits)
if ( dst_size==BCF_BT_INT8 ) { p[1] = dst_id; }
else if ( dst_size==BCF_BT_INT16 ) { uint8_t *x = (uint8_t*) &dst_id; p[1] = x[0]; p[2] = x[1]; }
@@ -2974,15 +2979,15 @@ int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *line)
else // must realloc
{
bcf_fmt_t *fmt = &line->d.fmt[i];
- assert( !fmt->p_free );
kstring_t str = {0,0,0};
bcf_enc_int1(&str, dst_id);
bcf_enc_size(&str, fmt->n, fmt->type);
- fmt->p_off = str.l;
+ uint32_t p_off = str.l;
kputsn((char*)fmt->p, fmt->p_len, &str);
+ if( fmt->p_free ) free(fmt->p - fmt->p_off);
+ fmt->p_off = p_off;
fmt->p = (uint8_t*)str.s + fmt->p_off;
fmt->p_free = 1;
- fmt->id = dst_id;
line->d.indiv_dirty = 1;
}
}
@@ -3176,7 +3181,7 @@ static void bcf_set_variant_type(const char *ref, const char *alt, variant_t *va
{
if ( *alt == '.' || *ref==*alt ) { var->n = 0; var->type = VCF_REF; return; }
if ( *alt == 'X' ) { var->n = 0; var->type = VCF_REF; return; } // mpileup's X allele shouldn't be treated as variant
- if ( *alt == '*' ) { var->n = 0; var->type = VCF_REF; return; }
+ if ( *alt == '*' ) { var->n = 0; var->type = VCF_REF; return; }
var->n = 1; var->type = VCF_SNP; return;
}
if ( alt[0]=='<' )
@@ -3659,7 +3664,7 @@ bcf_info_t *bcf_get_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key)
return bcf_get_info_id(line, id);
}
-bcf_fmt_t *bcf_get_fmt_id(bcf1_t *line, const int id)
+bcf_fmt_t *bcf_get_fmt_id(bcf1_t *line, const int id)
{
int i;
if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT);
@@ -3670,7 +3675,7 @@ bcf_fmt_t *bcf_get_fmt_id(bcf1_t *line, const int id)
return NULL;
}
-bcf_info_t *bcf_get_info_id(bcf1_t *line, const int id)
+bcf_info_t *bcf_get_info_id(bcf1_t *line, const int id)
{
int i;
if ( !(line->unpacked & BCF_UN_INFO) ) bcf_unpack(line, BCF_UN_INFO);
diff --git a/vcfutils.c b/vcfutils.c
index 4e1a6c9..39fc695 100644
--- a/vcfutils.c
+++ b/vcfutils.c
@@ -70,7 +70,7 @@ int bcf_calc_ac(const bcf_hdr_t *header, bcf1_t *line, int *ac, int which)
if ( an<nac )
{
hts_log_error("Incorrect AN/AC counts at %s:%d", header->id[BCF_DT_CTG][line->rid].key, line->pos+1);
- exit(1);
+ exit(1);
}
ac[0] = an - nac;
return 1;
diff --git a/version.sh b/version.sh
new file mode 100755
index 0000000..57d0dab
--- /dev/null
+++ b/version.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+# Master version, for use in tarballs or non-git source copies
+VERSION=1.6
+
+# If we have a git clone, then check against the current tag
+if [ -e .git ]
+then
+ # If we ever get to 10.x this will need to be more liberal
+ VERSION=`git describe --match '[0-9].[0-9]*' --dirty`
+fi
+
+# Numeric version is for use in .dylib or .so libraries
+#
+# Follows the same logic from the Makefile commit c2e93911
+# as non-numeric versions get bumped to patch level 255 to indicate
+# an unknown value.
+if [ "$1" = "numeric" ]
+then
+ v1=`expr "$VERSION" : '\([0-9]*\)'`
+ v2=`expr "$VERSION" : '[0-9]*.\([0-9]*\)'`
+ v3=`expr "$VERSION" : '[0-9]*.[0-9]*.\([0-9]*\)'`
+ if [ -z "`expr "$VERSION" : '^\([0-9.]*\)$'`" ]
+ then
+ VERSION="$v1.$v2.255"
+ else
+ VERSION="$v1.$v2${v3:+.}$v3"
+ fi
+fi
+
+echo $VERSION
diff --git a/win/rand.c b/win/rand.c
new file mode 100644
index 0000000..85de95a
--- /dev/null
+++ b/win/rand.c
@@ -0,0 +1,98 @@
+/* rand.c -- drand48 implementation from the FreeBSD source tree. */
+
+// This file is an amalgamation of the many small files in FreeBSD to do with
+// drand48 and friends implementations.
+// It comprises _rand48.c, rand48.h, srand48.c, drand48.c, erand48.c, lrand48.c
+
+/*
+ * Copyright (c) 1993 Martin Birgmeier
+ * All rights reserved.
+ *
+ * You may redistribute unmodified or modified versions of this source
+ * code provided that the above copyright notice and this and the
+ * following conditions are retained.
+ *
+ * This software is provided ``as is'', and comes with no warranties
+ * of any kind. I shall in no event be liable for anything that happens
+ * to anyone/anything when using this software.
+ */
+
+//#include <sys/cdefs.h>
+//__FBSDID("$FreeBSD: src/lib/libc/gen/_rand48.c,v 1.2 2002/03/22 21:52:05 obrien Exp $");
+
+#include <math.h>
+#include "win/rand.h"
+
+#define RAND48_SEED_0 (0x330e)
+#define RAND48_SEED_1 (0xabcd)
+#define RAND48_SEED_2 (0x1234)
+#define RAND48_MULT_0 (0xe66d)
+#define RAND48_MULT_1 (0xdeec)
+#define RAND48_MULT_2 (0x0005)
+#define RAND48_ADD (0x000b)
+
+static unsigned short _rand48_seed[3] = {
+ RAND48_SEED_0,
+ RAND48_SEED_1,
+ RAND48_SEED_2
+};
+static unsigned short _rand48_mult[3] = {
+ RAND48_MULT_0,
+ RAND48_MULT_1,
+ RAND48_MULT_2
+};
+static unsigned short _rand48_add = RAND48_ADD;
+
+static void
+_dorand48(unsigned short xseed[3])
+{
+ unsigned long accu;
+ unsigned short temp[2];
+
+ accu = (unsigned long) _rand48_mult[0] * (unsigned long) xseed[0] +
+ (unsigned long) _rand48_add;
+ temp[0] = (unsigned short) accu; /* lower 16 bits */
+ accu >>= sizeof(unsigned short) * 8;
+ accu += (unsigned long) _rand48_mult[0] * (unsigned long) xseed[1] +
+ (unsigned long) _rand48_mult[1] * (unsigned long) xseed[0];
+ temp[1] = (unsigned short) accu; /* middle 16 bits */
+ accu >>= sizeof(unsigned short) * 8;
+ accu += _rand48_mult[0] * xseed[2] + _rand48_mult[1] * xseed[1] + _rand48_mult[2] * xseed[0];
+ xseed[0] = temp[0];
+ xseed[1] = temp[1];
+ xseed[2] = (unsigned short) accu;
+}
+
+void
+hts_srand48(long seed)
+{
+ _rand48_seed[0] = RAND48_SEED_0;
+ _rand48_seed[1] = (unsigned short) seed;
+ _rand48_seed[2] = (unsigned short) (seed >> 16);
+ _rand48_mult[0] = RAND48_MULT_0;
+ _rand48_mult[1] = RAND48_MULT_1;
+ _rand48_mult[2] = RAND48_MULT_2;
+ _rand48_add = RAND48_ADD;
+}
+
+double
+hts_erand48(unsigned short xseed[3])
+{
+ _dorand48(xseed);
+ return ldexp((double) xseed[0], -48) +
+ ldexp((double) xseed[1], -32) +
+ ldexp((double) xseed[2], -16);
+}
+
+double
+hts_drand48(void)
+{
+ return hts_erand48(_rand48_seed);
+}
+
+long
+hts_lrand48(void)
+{
+ _dorand48(_rand48_seed);
+ return ((long) _rand48_seed[2] << 15) + ((long) _rand48_seed[1] >> 1);
+}
diff --git a/win/rand.h b/win/rand.h
new file mode 100644
index 0000000..a8fdb6a
--- /dev/null
+++ b/win/rand.h
@@ -0,0 +1,24 @@
+/* rand.h -- drand48 implementation from the FreeBSD source tree. */
+
+/*
+ * Copyright (c) 1993 Martin Birgmeier
+ * All rights reserved.
+ *
+ * You may redistribute unmodified or modified versions of this source
+ * code provided that the above copyright notice and this and the
+ * following conditions are retained.
+ *
+ * This software is provided ``as is'', and comes with no warranties
+ * of any kind. I shall in no event be liable for anything that happens
+ * to anyone/anything when using this software.
+ */
+
+#ifndef HTSLIB_HTS_RAND_H
+#define HTSLIB_HTS_RAND_H
+
+void hts_srand48(long seed);
+double hts_erand48(unsigned short xseed[3]);
+double hts_drand48(void);
+long hts_lrand48(void);
+
+#endif /* HTSLIB_HTS_RAND_H */
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/htslib.git
More information about the debian-med-commit
mailing list