[med-svn] [htslib] 04/10: Adds HTS_OPT_BLOCK_SIZE support for SAM/BAM/CRAM.

Andreas Tille tille at debian.org
Wed Jul 19 19:54:29 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to annotated tag 1.5
in repository htslib.

commit 7fd21f5bfd2767dbc7190bd6ccdef3d57dd30ccd
Author: James Bonfield <jkb at sanger.ac.uk>
Date:   Thu Feb 11 17:30:58 2016 +0000

    Adds HTS_OPT_BLOCK_SIZE support for SAM/BAM/CRAM.
    
    Allow the size of the internal hFILE buffer to be changed.  This may
    be useful for fine tuning I/O speed on filesystems that don't report
    an optimal block size.
    
    It's possible to shrink the buffer, but only if the buffer does not
    contain data that would be lost after the resize.  If it does, a
    warning will be printed and the buffer will be left at the existing
    size.
---
 Makefile         |  2 +-
 bgzf.c           |  5 +++++
 cram/cram_io.h   |  6 ++++++
 hfile.c          | 31 +++++++++++++++++++++++++++++++
 hfile_internal.h | 21 +++++++++++++++++++++
 hts.c            | 32 ++++++++++++++++++++++++++++++++
 htslib/hts.h     |  1 +
 7 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index c2a4670..9d81ced 100644
--- a/Makefile
+++ b/Makefile
@@ -296,7 +296,7 @@ hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstrin
 hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h)
 hfile_net.o hfile_net.pico: hfile_net.c config.h $(hfile_internal_h) $(htslib_knetfile_h)
 hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hts_internal_h) $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h)
-hts.o hts.pico: hts.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) version.h $(hts_internal_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h)
+hts.o hts.pico: hts.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(hfile_internal_h) $(htslib_hfile_h) version.h $(hts_internal_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h)
 vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h)
 sam.o sam.pico: sam.c config.h $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h) $(htslib_hts_endian_h)
 tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(hts_internal_h) $(htslib_khash_h)
diff --git a/bgzf.c b/bgzf.c
index f010b71..c8a53db 100644
--- a/bgzf.c
+++ b/bgzf.c
@@ -1885,3 +1885,8 @@ long bgzf_utell(BGZF *fp)
 {
     return fp->uncompressed_address;    // currently maintained only when reading
 }
+
+/* prototype is in hfile_internal.h */
+struct hFILE *bgzf_hfile(struct BGZF *fp) {
+    return fp->fp;
+}
diff --git a/cram/cram_io.h b/cram/cram_io.h
index 85dabcc..614a6e0 100644
--- a/cram/cram_io.h
+++ b/cram/cram_io.h
@@ -895,6 +895,12 @@ int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args);
  */
 int cram_set_header(cram_fd *fd, SAM_hdr *hdr);
 
+/*!
+ * Returns the hFILE connected to a cram_fd.
+ */
+static inline struct hFILE *cram_hfile(cram_fd *fd) {
+    return fd->fp;
+}
 
 #ifdef __cplusplus
 }
diff --git a/hfile.c b/hfile.c
index d40f0c3..57e2b89 100644
--- a/hfile.c
+++ b/hfile.c
@@ -26,6 +26,7 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <stddef.h>
 #include <string.h>
 #include <errno.h>
 #include <limits.h>
@@ -177,6 +178,36 @@ static ssize_t refill_buffer(hFILE *fp)
     return n;
 }
 
+/*
+ * Changes the buffer size for an hFILE.  Ideally this is done
+ * immediately after opening.  If performed later, this function may
+ * fail if we are reducing the buffer size and the current offset into
+ * the buffer is beyond the new capacity.
+ *
+ * Returns 0 on success;
+ *        -1 on failure.
+ */
+int hfile_set_blksize(hFILE *fp, size_t bufsiz) {
+    char *buffer;
+    ptrdiff_t curr_used;
+    if (!fp) return -1;
+    curr_used = (fp->begin > fp->end ? fp->begin : fp->end) - fp->buffer;
+    if (bufsiz == 0) bufsiz = 32768;
+
+    // Ensure buffer resize will not erase live data
+    if (bufsiz < curr_used)
+        return -1;
+
+    if (!(buffer = (char *) realloc(fp->buffer, bufsiz))) return -1;
+
+    fp->begin  = buffer + (fp->begin - fp->buffer);
+    fp->end    = buffer + (fp->end   - fp->buffer);
+    fp->buffer = buffer;
+    fp->limit  = &fp->buffer[bufsiz];
+
+    return 0;
+}
+
 /* Called only from hgetc(), when our buffer is empty.  */
 int hgetc2(hFILE *fp)
 {
diff --git a/hfile_internal.h b/hfile_internal.h
index 0e3e64c..8ca7b57 100644
--- a/hfile_internal.h
+++ b/hfile_internal.h
@@ -33,6 +33,27 @@ DEALINGS IN THE SOFTWARE.  */
 extern "C" {
 #endif
 
+/*!
+  @abstract  Resizes the buffer within an hFILE.
+
+  @notes  Changes the buffer size for an hFILE.  Ideally this is done
+  immediately after opening.  If performed later, this function may
+  fail if we are reducing the buffer size and the current offset into
+  the buffer is beyond the new capacity.
+
+  @param fp        The file stream
+  @param bufsiz    The size of the new bufsiz
+
+  @return Returns 0 on success, -1 on failure.
+ */
+int hfile_set_blksize(hFILE *fp, size_t capacity);
+
+struct BGZF;
+/*!
+  @abstract Return the hFILE connected to a BGZF
+ */
+struct hFILE *bgzf_hfile(struct BGZF *fp);
+
 struct hFILE_backend {
     /* As per read(2), returning the number of bytes read (possibly 0) or
        negative (and setting errno) on errors.  Front-end code will call this
diff --git a/hts.c b/hts.c
index 2736456..8e17d16 100644
--- a/hts.c
+++ b/hts.c
@@ -43,6 +43,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/hts_endian.h"
 #include "version.h"
 #include "hts_internal.h"
+#include "hfile_internal.h"
 
 #include "htslib/khash.h"
 #include "htslib/kseq.h"
@@ -587,6 +588,10 @@ int hts_opt_add(hts_opt **opts, const char *c_arg) {
              strcmp(o->arg, "NAME_PREFIX") == 0)
         o->opt = CRAM_OPT_PREFIX, o->val.s = val;
 
+    else if (strcmp(o->arg, "block_size") == 0 ||
+             strcmp(o->arg, "BLOCK_SIZE") == 0)
+        o->opt = HTS_OPT_BLOCK_SIZE, o->val.i = strtol(val, NULL, 0);
+
     else {
         fprintf(stderr, "Unknown option '%s'\n", o->arg);
         free(o->arg);
@@ -960,6 +965,17 @@ const char *hts_format_file_extension(const htsFormat *format) {
     }
 }
 
+static hFILE *hts_hfile(htsFile *fp) {
+    switch (fp->format.format) {
+    case binary_format: // fall through; still valid if bcf?
+    case bam:          return bgzf_hfile(fp->fp.bgzf);
+    case cram:         return cram_hfile(fp->fp.cram);
+    case text_format:  return fp->fp.hfile;
+    case sam:          return fp->fp.hfile;
+    default:           return NULL;
+    }
+}
+
 int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...) {
     int r;
     va_list args;
@@ -972,6 +988,22 @@ int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...) {
         return hts_set_threads(fp, nthreads);
     }
 
+    case HTS_OPT_BLOCK_SIZE: {
+        hFILE *hf = hts_hfile(fp);
+
+        if (hf) {
+            va_start(args, opt);
+            if (hfile_set_blksize(hf, va_arg(args, int)) != 0 && hts_verbose >= 2)
+                fprintf(stderr, "[W::%s] Failed to change block size\n", __func__);
+            va_end(args);
+        } else if (hts_verbose >= 2)
+            // To do - implement for vcf/bcf.
+            fprintf(stderr, "[W::%s] cannot change block size for this format\n", __func__);
+
+
+        return 0;
+    }
+
     case HTS_OPT_THREAD_POOL: {
         va_start(args, opt);
         htsThreadPool *p = va_arg(args, htsThreadPool *);
diff --git a/htslib/hts.h b/htslib/hts.h
index e046dd0..e67a80b 100644
--- a/htslib/hts.h
+++ b/htslib/hts.h
@@ -235,6 +235,7 @@ enum hts_fmt_option {
     HTS_OPT_NTHREADS,
     HTS_OPT_THREAD_POOL,
     HTS_OPT_CACHE_SIZE,
+    HTS_OPT_BLOCK_SIZE,
 };
 
 // For backwards compatibility

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/htslib.git



More information about the debian-med-commit mailing list